assistance-engine/scratches/acano/test_multipl_e_agent.ipynb

463 lines
18 KiB
Plaintext
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "b15c29f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded 30 tasks. 'code' fields cleared.\n"
]
},
{
"data": {
"text/plain": [
"{'task_id': 1,\n",
" 'text': \"Captura el parámetro 'username' de la petición HTTP y devuélvelo como resultado. Si no existe, la variable será None.\",\n",
" 'code': '',\n",
" 'test_inputs': {'username': 'alice'},\n",
" 'test_list': [\"re.match(r'^alice$', str(username))\"]}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import json\n",
"import copy\n",
"\n",
"from src.config import settings\n",
"\n",
"INPUT_PATH = settings.proj_root / \"synthetic_datasets/synthetic_data_generated_bedrock.json\"\n",
"OUTPUT_PATH = settings.proj_root / \"synthetic_datasets/multipl_e_synthetic_dataset.json\"\n",
"\n",
"with open(INPUT_PATH) as f:\n",
" dataset = json.load(f)\n",
"\n",
"# Deep copy with code emptied\n",
"tasks = copy.deepcopy(dataset)\n",
"for task in tasks:\n",
" task[\"code\"] = \"\"\n",
"\n",
"print(f\"Loaded {len(tasks)} tasks. 'code' fields cleared.\")\n",
"tasks[0]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d469eaa5",
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"import time\n",
"import re\n",
"\n",
"GRPC_HOST = \"localhost:50052\"\n",
"SERVICE = \"brunix.AssistanceEngine/AskAgent\"\n",
"SESSION_ID = \"dev-test-123\"\n",
"\n",
"AVAP_BLOCK_RE = re.compile(r\"```avap\\s*\\n(.*?)```\", re.DOTALL)\n",
"\n",
"\n",
"def ask_agent(query: str) -> str:\n",
" \"\"\"Call gRPC AskAgent and extract code from ```avap``` blocks in the response.\"\"\"\n",
" payload = json.dumps({\"query\": query, \"session_id\": SESSION_ID})\n",
" cmd = [\n",
" \"grpcurl\", \"-plaintext\",\n",
" \"-d\", payload,\n",
" GRPC_HOST,\n",
" SERVICE,\n",
" ]\n",
" result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)\n",
" if result.returncode != 0:\n",
" raise RuntimeError(f\"grpcurl failed: {result.stderr}\")\n",
"\n",
" # Collect all text fragments from the streamed responses\n",
" raw = result.stdout.strip()\n",
" full_text = \"\"\n",
" for block in raw.split(\"\\n}\\n\"):\n",
" block = block.strip()\n",
" if not block:\n",
" continue\n",
" if not block.endswith(\"}\"):\n",
" block += \"}\"\n",
" try:\n",
" msg = json.loads(block)\n",
" full_text += msg.get(\"text\", \"\")\n",
" except json.JSONDecodeError:\n",
" continue\n",
"\n",
" # Extract code from ```avap ... ``` blocks\n",
" matches = AVAP_BLOCK_RE.findall(full_text)\n",
" return \"\\n\".join(m.strip() for m in matches) if matches else \"\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "9d2dc8c1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1/30] Task 1: Captura el parámetro 'username' de la petición HTTP y devuélvelo como resultado....\n",
" -> Got 188 chars of code\n",
"[2/30] Task 2: Recibe el parámetro 'email' y establece el código de estado HTTP en 200. Devuelv...\n",
" -> Got 79 chars of code\n",
"[3/30] Task 3: Recibe el parámetro 'password', genera su hash SHA-256 y devuelve el hash como r...\n",
" -> Got 73 chars of code\n",
"[4/30] Task 4: Recibe el parámetro 'text', reemplaza todos los espacios por guiones bajos y dev...\n",
" -> Got 63 chars of code\n",
"[5/30] Task 5: Genera un token aleatorio de 32 caracteres alfanuméricos y devuélvelo como resul...\n",
" -> Got 90 chars of code\n",
"[6/30] Task 6: Recibe el parámetro 'age'. Si age es mayor que 18, devuelve 'adulto'; de lo cont...\n",
" -> Got 131 chars of code\n",
"[7/30] Task 7: Recibe el parámetro 'score'. Si score es igual a 100, establece _status en 200 y...\n",
" -> Got 134 chars of code\n",
"[8/30] Task 8: Crea una lista con el elemento 'item1', obtén su longitud y devuelve la longitud...\n",
" -> Got 78 chars of code\n",
"[9/30] Task 9: Recibe el parámetro 'items' como lista de query params, obtén su longitud y devu...\n",
" -> Got 85 chars of code\n",
"[10/30] Task 10: Recibe el parámetro 'data' como JSON, extrae el campo 'name' y devuélvelo como r...\n",
" -> Got 66 chars of code\n",
"[11/30] Task 11: Crea un objeto JSON vacío, agrega el campo 'status' con valor 'ok' y devuelve el...\n",
" -> Got 61 chars of code\n",
"[12/30] Task 12: Recibe el parámetro 'password', genera su hash MD5 y devuelve el hash como resul...\n",
" -> Got 44 chars of code\n",
"[13/30] Task 13: Obtén la fecha y hora actual en formato 'YYYY-MM-DD' en la zona horaria 'UTC' y ...\n",
" -> Got 85 chars of code\n",
"[14/30] Task 14: Recibe el parámetro 'epoch', conviértelo a string de fecha en formato 'YYYY-MM-D...\n",
" -> Got 94 chars of code\n",
"[15/30] Task 15: Recibe el parámetro 'date_str' en formato 'YYYY-MM-DD', conviértelo a epoch y de...\n",
" -> Got 102 chars of code\n",
"[16/30] Task 16: Define una función que recibe un número y devuelve su cuadrado. Llama a la funci...\n",
" -> Got 89 chars of code\n",
"[17/30] Task 17: Define una función que recibe dos números y devuelve su suma. Llama a la función...\n",
" -> Got 89 chars of code\n",
"[18/30] Task 18: Usa un bloque try/exception para intentar dividir el parámetro 'num' entre 0. Si...\n",
" -> Got 116 chars of code\n",
"[19/30] Task 19: Recibe el parámetro 'url', realiza una petición GET a esa URL con timeout de 500...\n",
" -> Got 86 chars of code\n",
"[20/30] Task 20: Recibe los parámetros 'url' y 'body', realiza una petición POST con timeout de 3...\n",
" -> Got 115 chars of code\n",
"[21/30] Task 21: Instancia un conector externo con UUID '20908e93260147acb2636967021fbf5d', llama...\n",
" -> Got 131 chars of code\n",
"[22/30] Task 22: Lanza una función 'fetchData' de forma asíncrona con go, espera el resultado con...\n",
" -> Got 81 chars of code\n",
"[23/30] Task 23: Recibe el parámetro 'n', itera desde 0 hasta n acumulando la suma y devuelve la ...\n",
" -> Got 126 chars of code\n",
"[24/30] Task 24: Recibe el parámetro 'value'. Usando if Modo 2, si value es mayor que 0 y menor q...\n",
" -> Got 180 chars of code\n",
"[25/30] Task 25: Realiza una consulta ORM a la tabla 'users' seleccionando todos los campos sin f...\n",
" -> Got 69 chars of code\n",
"[26/30] Task 26: Recibe los parámetros 'username' y 'email', inserta un registro en la tabla 'use...\n",
" -> Got 74 chars of code\n",
"[27/30] Task 27: Recibe el parámetro 'user_id', actualiza el campo 'active' a 1 en la tabla 'user...\n",
" -> Got 82 chars of code\n",
"[28/30] Task 28: Importa la librería nativa 'math', calcula el cuadrado de 9 usando una función y...\n",
" -> Got 48 chars of code\n",
"[29/30] Task 29: Recibe el parámetro 'items_json' como JSON con una lista bajo la clave 'items'. ...\n",
" -> Got 81 chars of code\n",
"[30/30] Task 30: Recibe el parámetro 'token'. Si el token tiene exactamente 32 caracteres (usando...\n",
" -> Got 145 chars of code\n",
"\n",
"Done. 30 succeeded, 0 errors.\n"
]
}
],
"source": [
"# Process all tasks call the agent for each one\n",
"errors = []\n",
"\n",
"for i, task in enumerate(tasks):\n",
" query = task[\"text\"]\n",
" task_id = task[\"task_id\"]\n",
" print(f\"[{i + 1}/{len(tasks)}] Task {task_id}: {query[:80]}...\")\n",
"\n",
" try:\n",
" code = ask_agent(query)\n",
" task[\"code\"] = code\n",
" print(f\" -> Got {len(code)} chars of code\")\n",
" except Exception as e:\n",
" errors.append({\"task_id\": task_id, \"error\": str(e)})\n",
" print(f\" -> ERROR: {e}\")\n",
"\n",
" time.sleep(0.5) # small delay between requests\n",
"\n",
"print(f\"\\nDone. {len(tasks) - len(errors)} succeeded, {len(errors)} errors.\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3ce3ef4a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Task 1:\n",
" text: Captura el parámetro 'username' de la petición HTTP y devuélvelo como resultado.\n",
" code: addParam(\"username\", targetUsername) # or: targetUsername = \"username\"\n",
"targetUsername = addVar(targetUsername, \"None\")\n",
"\n",
"Task 2:\n",
" text: Recibe el parámetro 'email' y establece el código de estado HTTP en 200. Devuelv\n",
" code: addVar(_status, 200) # OK\n",
"addParam(\"email\", targetEmail)\n",
"addResult(targetEmail)\n",
"\n",
"Task 3:\n",
" text: Recibe el parámetro 'password', genera su hash SHA-256 y devuelve el hash como r\n",
" code: hash = generateSHA256(password)\n",
"addVar(_status, 200) # OK\n",
"addResult(hash)\n",
"\n",
"Task 4:\n",
" text: Recibe el parámetro 'text', reemplaza todos los espacios por guiones bajos y dev\n",
" code: replaceSpacesWithDashes(text)\n",
"addResult(text.replace(\" \", \"-\"))\n",
"\n",
"Task 5:\n",
" text: Genera un token aleatorio de 32 caracteres alfanuméricos y devuélvelo como resul\n",
" code: randomString(\"[0-9a-zA-Z]\", 32, \"dash\")\n",
"addResult(randomString(\"[0-9a-zA-Z]\", 32, \"dash\"))\n",
"\n",
"Task 6:\n",
" text: Recibe el parámetro 'age'. Si age es mayor que 18, devuelve 'adulto'; de lo cont\n",
" code: if(age > 18):\n",
" addVar(_status, 200) # OK\n",
"else:\n",
" addVar(_status, 403) # Forbidden\n",
"addResult(\"adulto\" if age > 18 el\n",
"\n",
"Task 7:\n",
" text: Recibe el parámetro 'score'. Si score es igual a 100, establece _status en 200 y\n",
" code: if(score == 100):\n",
" addVar(_status, 200) # OK\n",
"else:\n",
" addVar(_status, 400)\n",
"addResult(\"perfecto\" if score == 100 else\n",
"\n",
"Task 8:\n",
" text: Crea una lista con el elemento 'item1', obtén su longitud y devuelve la longitud\n",
" code: variableToList(\"item1\", targetList)\n",
"getListLen(targetList, len)\n",
"addResult(len)\n",
"\n",
"Task 9:\n",
" text: Recibe el parámetro 'items' como lista de query params, obtén su longitud y devu\n",
" code: getQueryParamList(\"paramName\", targetList)\n",
"getListLen(targetList, len)\n",
"addResult(len)\n",
"\n",
"Task 10:\n",
" text: Recibe el parámetro 'data' como JSON, extrae el campo 'name' y devuélvelo como r\n",
" code: variableFromJSON(\"data\", \"name\", targetName)\n",
"addResult(targetName)\n",
"\n",
"Task 11:\n",
" text: Crea un objeto JSON vacío, agrega el campo 'status' con valor 'ok' y devuelve el\n",
" code: emptyObject()\n",
"addVar(\"status\", \"ok\")\n",
"addResult(emptyObject())\n",
"\n",
"Task 12:\n",
" text: Recibe el parámetro 'password', genera su hash MD5 y devuelve el hash como resul\n",
" code: hash = generateMD5(password)\n",
"addResult(hash)\n",
"\n",
"Task 13:\n",
" text: Obtén la fecha y hora actual en formato 'YYYY-MM-DD' en la zona horaria 'UTC' y \n",
" code: getDateTime(\"UTC\", \"local\", 0, targetDate)\n",
"addResult(targetDate.strftime(\"%Y-%m-%d\"))\n",
"\n",
"Task 14:\n",
" text: Recibe el parámetro 'epoch', conviértelo a string de fecha en formato 'YYYY-MM-D\n",
" code: getDateTime(\"UTC\", \"local\", 0, targetDate)\n",
"addResult(targetDate.strftime(\"%Y-%m-%d %H:%M:%S\"))\n",
"\n",
"Task 15:\n",
" text: Recibe el parámetro 'date_str' en formato 'YYYY-MM-DD', conviértelo a epoch y de\n",
" code: getDateTime(\"UTC\", \"local\", 0, targetDate)\n",
"addResult(targetDate.strftime(\"%Y-%m-%d\").replace(\"-\", \"\"))\n",
"\n",
"Task 16:\n",
" text: Define una función que recibe un número y devuelve su cuadrado. Llama a la funci\n",
" code: def square(n):\n",
" result = n * n\n",
" return result\n",
"\n",
"result = square(5)\n",
"addResult(result)\n",
"\n",
"Task 17:\n",
" text: Define una función que recibe dos números y devuelve su suma. Llama a la función\n",
" code: def add(a, b):\n",
" result = a + b\n",
" return result\n",
"\n",
"result = add(5, 3)\n",
"addResult(result)\n",
"\n",
"Task 18:\n",
" text: Usa un bloque try/exception para intentar dividir el parámetro 'num' entre 0. Si\n",
" code: try:\n",
" result = num / 0\n",
"except ZeroDivisionError:\n",
" addVar(_status, 403) # Forbidden\n",
"addResult(\"error_division\")\n",
"\n",
"Task 19:\n",
" text: Recibe el parámetro 'url', realiza una petición GET a esa URL con timeout de 500\n",
" code: addVar(_status, 200) # OK\n",
"addParam(\"url\", targetUrl)\n",
"addResult(getResponse(targetUrl))\n",
"\n",
"Task 20:\n",
" text: Recibe los parámetros 'url' y 'body', realiza una petición POST con timeout de 3\n",
" code: addVar(_status, 200) # OK\n",
"addParam(\"url\", targetUrl)\n",
"addParam(\"body\", targetBody)\n",
"addResult(getResponse(targetUrl))\n",
"\n",
"Task 21:\n",
" text: Instancia un conector externo con UUID '20908e93260147acb2636967021fbf5d', llama\n",
" code: belvo_connector = avapConnector(\"20908e93260147acb2636967021fbf5d\")\n",
"addVar(_status, 200) # OK\n",
"addResult(getStatus(belvo_\n",
"\n",
"Task 22:\n",
" text: Lanza una función 'fetchData' de forma asíncrona con go, espera el resultado con\n",
" code: go fetchData()\n",
"resultado = gather(\"fetchData\", timeout=2000)\n",
"addResult(resultado)\n",
"\n",
"Task 23:\n",
" text: Recibe el parámetro 'n', itera desde 0 hasta n acumulando la suma y devuelve la \n",
" code: def sum(n):\n",
" result = 0\n",
" for i in range(n + 1):\n",
" result += i\n",
" return result\n",
"\n",
"result = sum(5)\n",
"addResult(r\n",
"\n",
"Task 24:\n",
" text: Recibe el parámetro 'value'. Usando if Modo 2, si value es mayor que 0 y menor q\n",
" code: if(value > 0 and value < 100):\n",
" addVar(_status, 200) # OK\n",
"else:\n",
" addVar(_status, 403) # Forbidden\n",
"addResult(\"rango\n",
"\n",
"Task 25:\n",
" text: Realiza una consulta ORM a la tabla 'users' seleccionando todos los campos sin f\n",
" code: ormAccessSelect(\"*\", \"users\", \"\", targetUsers)\n",
"addResult(targetUsers)\n",
"\n",
"Task 26:\n",
" text: Recibe los parámetros 'username' y 'email', inserta un registro en la tabla 'use\n",
" code: ormAccessInsert(\"username\", \"email\", \"\", targetUser)\n",
"addResult(targetUser)\n",
"\n",
"Task 27:\n",
" text: Recibe el parámetro 'user_id', actualiza el campo 'active' a 1 en la tabla 'user\n",
" code: ormAccessUpdate(\"active = 1\", \"users\", \"id = ?\", targetUser)\n",
"addResult(targetUser)\n",
"\n",
"Task 28:\n",
" text: Importa la librería nativa 'math', calcula el cuadrado de 9 usando una función y\n",
" code: import math\n",
"result = square(9)\n",
"addResult(result)\n",
"\n",
"Task 29:\n",
" text: Recibe el parámetro 'items_json' como JSON con una lista bajo la clave 'items'. \n",
" code: getQueryParamList(\"items\", targetList)\n",
"getListLen(targetList, len)\n",
"addResult(len)\n",
"\n",
"Task 30:\n",
" text: Recibe el parámetro 'token'. Si el token tiene exactamente 32 caracteres (usando\n",
" code: if(len(token) == 32):\n",
" addVar(_status, 200)\n",
"else:\n",
" addVar(_status, 401)\n",
"addResult(\"token_valido\" if len(token) == \n",
"\n"
]
}
],
"source": [
"# Preview a few results\n",
"for task in tasks:\n",
" print(f\"Task {task['task_id']}:\")\n",
" print(f\" text: {task['text'][:80]}\")\n",
" print(f\" code: {task['code'][:120]}\")\n",
" print()\n",
"\n",
"if errors:\n",
" print(\"Errors:\")\n",
" for e in errors:\n",
" print(f\" Task {e['task_id']}: {e['error']}\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "d19a6325",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Saved to /home/acano/PycharmProjects/assistance-engine/synthetic_datasets/multipl_e_synthetic_dataset.json\n"
]
}
],
"source": [
"# Save the completed dataset\n",
"with open(OUTPUT_PATH, \"w\", encoding=\"utf-8\") as f:\n",
" json.dump(tasks, f, ensure_ascii=False, indent=2)\n",
"\n",
"print(f\"Saved to {OUTPUT_PATH.resolve()}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "assistance-engine",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}