diff --git a/docs/samples/hello_world.avap b/docs/samples/hello_world.avap index 2322564..6d5b8cc 100644 --- a/docs/samples/hello_world.avap +++ b/docs/samples/hello_world.avap @@ -1,3 +1,4 @@ -addParam("Alberto",name) -result = "Hello," + name +registerEndpoint("/hello_world","GET",[],"HELLO_WORLD",main,result) +addVar(name,"Alberto") +result = "Hello," + name addResult(result) \ No newline at end of file diff --git a/ingestion/code/BNF/n01_BNF.txt b/ingestion/code/BNF/n01_BNF.txt index 2763ab2..c10303b 100644 --- a/ingestion/code/BNF/n01_BNF.txt +++ b/ingestion/code/BNF/n01_BNF.txt @@ -1,7 +1,13 @@ ::= ( | )* ::= [ ] [ | ] | ( | ) - ::= /* Retorno de carro / Salto de línea (\n o \r\n) */ + ::= /(\r\n|\n)/ + + ::= "///" + ::= "//" + ::= "/*" "*/" + ::= [^\r\n]* + ::= /* Cualquier secuencia de caracteres que no contenga la subcadena "*/" */ ::= | @@ -20,23 +26,15 @@ ::= "=" -/* Llamada a función global (sin receptor de objeto) */ ::= "(" [] ")" -/* Llamada a método sobre un objeto conector (con receptor) */ ::= "=" "." "(" [] ")" ::= | ::= "registerEndpoint(" "," "," "," "," "," ")" -/* addVar asigna un valor a una variable. Acepta (valor, variable) o (variable, valor). - Si ambos argumentos son identificadores, el valor del segundo se asigna al primero. - No está permitido pasar dos literales como argumentos. */ ::= "addVar(" "," ")" ::= | | "$" -/* Restricción semántica: al menos uno de los dos debe ser */ - ::= [a-zA-Z_] [a-zA-Z0-9_]* + ::= /[A-Za-z_][A-Za-z0-9_]*/ -/* Variables de sistema reservadas — accesibles y asignables desde cualquier scope: - _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = 404) */ ::= "_status" \ No newline at end of file diff --git a/scratches/pseco/evaluation/parser/parser v1.py b/scratches/pseco/evaluation/parser/parser v1.py new file mode 100644 index 0000000..80602b2 --- /dev/null +++ b/scratches/pseco/evaluation/parser/parser v1.py @@ -0,0 +1,33 @@ +import subprocess + +CONTAINER_ID = "637a559cbf1d" +AVAP_BIN = "/opt/AVAP_Language_Server/avap.bin" + +def run_avap_file(avap_file_in_container: str): + cmd = [ + "docker", + "exec", + CONTAINER_ID, + "sh", + "-lc", + f'{AVAP_BIN} "{avap_file_in_container}"' + ] + + result = subprocess.run( + cmd, + capture_output=True, + text=True + ) + + return result + +result = run_avap_file("/opt/AVAP_Language_Server/sample/hello_world.avap") + +print("Return code:", result.returncode) +print("STDOUT:") +print(result.stdout) +print("STDERR:") +print(result.stderr) +print("RAW OUTPUT:") +print(repr(result.stdout)) +print(result.stdout.splitlines()) \ No newline at end of file diff --git a/scratches/pseco/evaluation/parser/parser v2.py b/scratches/pseco/evaluation/parser/parser v2.py new file mode 100644 index 0000000..b81cfb1 --- /dev/null +++ b/scratches/pseco/evaluation/parser/parser v2.py @@ -0,0 +1,27 @@ +import requests +import json +from pathlib import Path + +# curl "localhost:80/hello_world" + +avap_code = Path( + "/home/pseco/VsCodeProjects/assistance-engine/docs/samples/hola_mundo.avap" +).read_text(encoding="utf-8") + +response = requests.request( + "GET", + "http://localhost:9000/", + data=avap_code.encode("utf-8"), + headers={"Content-Type": "text/plain; charset=utf-8", "Accept": "application/json"}, + timeout=30, +) + +print("STATUS:", response.status_code) +print("RAW TEXT:") +print("HEADERS:", dict(response.headers)) +print("BODY:", response.text) +try: + print("\nJSON FORMATEADO:") + print(json.dumps(response.json(), indent=2, ensure_ascii=False)) +except Exception: + print("\nLa respuesta no es JSON válido") diff --git a/scratches/pseco/evaluation/parser/parser v3.py b/scratches/pseco/evaluation/parser/parser v3.py new file mode 100644 index 0000000..e69de29 diff --git a/scratches/pseco/ingestion/Code Ingestion/n02 BNF Check.ipynb b/scratches/pseco/ingestion/Code Ingestion/n02 BNF Check.ipynb index b72736f..279b9cb 100644 --- a/scratches/pseco/ingestion/Code Ingestion/n02 BNF Check.ipynb +++ b/scratches/pseco/ingestion/Code Ingestion/n02 BNF Check.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 7, + "execution_count": 67, "id": "5b646fb1", "metadata": {}, "outputs": [ @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 68, "id": "274d6d68", "metadata": {}, "outputs": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 69, "id": "0a8abbfa", "metadata": {}, "outputs": [], @@ -64,20 +64,85 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 81, "id": "26927d0c", "metadata": {}, "outputs": [], "source": [ + "import re\n", + "\n", "def bnf_to_lark(bnf_text):\n", - " text = re.sub(r\"<([^>]+)>\", r\"\\1\", bnf_text) # remove <>\n", - " text = text.replace(\"::=\", \":\")\n", - " return text" + " # 1. ELIMINAR COMENTARIOS HUMANOS (/* ... */) COMPLETAMENTE\n", + " # Limpiamos cualquier texto entre /* y */ antes de procesar el BNF\n", + " text = re.sub(r\"/\\*.*?\\*/\", \"\", bnf_text, flags=re.DOTALL)\n", + "\n", + " # 2. TRANSFORMACIÓN ESTRUCTURAL\n", + " text = re.sub(r\"<([^>]+)>\", r\"\\1\", text) # Quitar < >\n", + " text = text.replace(\"::=\", \":\") # Cambiar ::= por :\n", + "\n", + " # 3. LIMPIEZA DE LÍNEAS RESIDUALES\n", + " # Eliminamos líneas que quedaron vacías o solo con texto descriptivo\n", + " lines = []\n", + " for line in text.split('\\n'):\n", + " line = line.strip()\n", + " # Solo conservamos líneas que parezcan reglas (tengan :) o sean parte de una definición\n", + " if \":\" in line or \"|\" in line or line.startswith(\" \") or line == \"\":\n", + " lines.append(line)\n", + " text = \"\\n\".join(lines)\n", + "\n", + " # 4. FORZAR MAYÚSCULAS PARA TERMINALES (LEXER)\n", + " # Lark LALR necesita tokens en MAYÚSCULAS\n", + " terminals = [\n", + " 'identifier', 'stringliteral', 'eol', 'doc_comment', \n", + " 'line_comment', 'block_comment', 'any_text', 'any_content', 'number'\n", + " ]\n", + " for t in terminals:\n", + " text = re.sub(rf'\\b{t}\\b', t.upper(), text)\n", + "\n", + " # 5. INYECTAR DEFINICIONES LARK VÁLIDAS\n", + " # Sustituimos definiciones rotas por las correctas al final\n", + " text = re.sub(r\"EOL\\s*:.*\", \"\", text)\n", + " text = re.sub(r\"ANY_TEXT\\s*:.*\", \"\", text)\n", + " text = re.sub(r\"ANY_CONTENT\\s*:.*\", \"\", text)\n", + " text = re.sub(r\"IDENTIFIER\\s*:.*\", \"\", text)\n", + "\n", + " footer = r\"\"\"\n", + "// --- TERMINALES DEFINITIVOS ---\n", + "IDENTIFIER: /[a-zA-Z_][a-zA-Z0-9_]*/\n", + "STRINGLITERAL: /\"([^\"\\\\\\\\]|\\\\\\\\.)*\"/\n", + "NUMBER: /\\d+(\\.\\d+)?/\n", + "EOL: /\\r?\\n/\n", + "DOC_COMMENT.2: \"///\" /[^\\r\\n]*/\n", + "LINE_COMMENT: \"//\" /[^\\r\\n]*/\n", + "BLOCK_COMMENT: \"/*\" /(.|\\n)*?/ \"*/\"\n", + "\n", + "// --- REGLAS DE APOYO ---\n", + "expression: IDENTIFIER | STRINGLITERAL | NUMBER | list_display\n", + "literal: STRINGLITERAL | NUMBER\n", + "argument_list: expression ( \",\" expression )*\n", + "list_display: \"[\" [argument_list] \"]\"\n", + "\n", + "// --- STUBS PARA REGLAS MENCIONADAS ---\n", + "function_decl: \"def\" IDENTIFIER \"(\" [argument_list] \")\" \":\" EOL\n", + "return_stmt: \"return\" [expression]\n", + "io_command: \"print\" \"(\" expression \")\"\n", + "control_flow: \"if\" expression \":\" EOL\n", + "async_command: \"async\" statement\n", + "connector_cmd: \"connect\" \"(\" \")\"\n", + "db_command: \"db\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\n", + "http_command: \"http\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\n", + "util_command: \"util\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\n", + "modularity_cmd: \"import\" IDENTIFIER\n", + "\n", + "%import common.WS_INLINE\n", + "%ignore WS_INLINE\n", + "\"\"\"\n", + " return text.strip() + \"\\n\" + footer.strip()\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 71, "id": "89be8bf6", "metadata": {}, "outputs": [], @@ -172,35 +237,13 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "bde351ba", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/home/pseco/VsCodeProjects/assistance-engine/scratches/pseco/ingestion/Code Ingestion\n", - "True\n" - ] - } - ], - "source": [ - "from pathlib import Path\n", - "print(Path.cwd())\n", - "print(Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\").exists())" - ] - }, - { - "cell_type": "code", - "execution_count": 13, + "execution_count": 77, "id": "c66842c7", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", - "# Load BNF grammar from file\n", "bnf_path = Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\")\n", "if not bnf_path.exists():\n", " raise FileNotFoundError(f\"BNF file not found: {bnf_path}\")\n", @@ -210,8 +253,8 @@ }, { "cell_type": "code", - "execution_count": 19, - "id": "07bb32cb", + "execution_count": 78, + "id": "ebf4aaac", "metadata": {}, "outputs": [], "source": [ @@ -220,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 79, "id": "8122b603", "metadata": {}, "outputs": [ @@ -228,10 +271,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "('program : ( line | block_comment )*\\n'\n", - " 'line : [ statement ] [ line_comment | doc_comment ] EOL\\n'\n", - " ' | ( line_comment | doc_comment ) EOL\\n'\n", - " 'EOL : /* Retorno de carro / Salto de línea (\\\\n o \\\\r\\\\n) */\\n'\n", + "('program : ( line | BLOCK_COMMENT )*\\n'\n", + " 'line : [ statement ] [ LINE_COMMENT | DOC_COMMENT ] EOL\\n'\n", + " ' | ( LINE_COMMENT | DOC_COMMENT ) EOL\\n'\n", + " 'EOL\\n'\n", + " '\\n'\n", + " 'DOC_COMMENT : \"///\" ANY_TEXT\\n'\n", + " 'LINE_COMMENT : \"//\" ANY_TEXT\\n'\n", + " 'BLOCK_COMMENT : \"/*\" ANY_CONTENT \"*/\"\\n'\n", + " 'ANY_TEXT\\n'\n", + " 'ANY_CONTENT\\n'\n", " '\\n'\n", " 'statement : assignment\\n'\n", " ' | method_call_stmt\\n'\n", @@ -248,35 +297,55 @@ " ' | util_command\\n'\n", " ' | modularity_cmd\\n'\n", " '\\n'\n", - " 'assignment : identifier \"=\" expression\\n'\n", + " 'assignment : IDENTIFIER \"=\" expression\\n'\n", " '\\n'\n", - " '/* Llamada a función global (sin receptor de objeto) */\\n'\n", - " 'function_call_stmt : identifier \"(\" [argument_list] \")\"\\n'\n", + " 'function_call_stmt : IDENTIFIER \"(\" [argument_list] \")\"\\n'\n", " '\\n'\n", - " '/* Llamada a método sobre un objeto conector (con receptor) */\\n'\n", - " 'method_call_stmt : identifier \"=\" identifier \".\" identifier \"(\" '\n", + " 'method_call_stmt : IDENTIFIER \"=\" IDENTIFIER \".\" IDENTIFIER \"(\" '\n", " '[argument_list] \")\"\\n'\n", " '\\n'\n", " 'system_command : register_cmd | addvar_cmd\\n'\n", - " 'register_cmd : \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" '\n", - " 'list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\\n'\n", - " '/* addVar asigna un valor a una variable. Acepta (valor, variable) o '\n", - " '(variable, valor).\\n'\n", - " ' Si ambos argumentos son identificadores, el valor del segundo se asigna '\n", - " 'al primero.\\n'\n", - " ' No está permitido pasar dos literales como argumentos. */\\n'\n", + " 'register_cmd : \"registerEndpoint(\" STRINGLITERAL \",\" STRINGLITERAL \",\" '\n", + " 'list_display \",\" STRINGLITERAL \",\" IDENTIFIER \",\" IDENTIFIER \")\"\\n'\n", " 'addvar_cmd : \"addVar(\" addvar_arg \",\" addvar_arg \")\"\\n'\n", - " 'addvar_arg : identifier | literal | \"$\" identifier\\n'\n", - " '/* Restricción semántica: al menos uno de los dos addvar_arg debe ser '\n", - " 'identifier */\\n'\n", + " 'addvar_arg : IDENTIFIER | literal | \"$\" IDENTIFIER\\n'\n", " '\\n'\n", - " 'identifier : [a-zA-Z_] [a-zA-Z0-9_]*\\n'\n", + " 'IDENTIFIER\\n'\n", " '\\n'\n", - " '/* Variables de sistema reservadas — accesibles y asignables desde cualquier '\n", - " 'scope:\\n'\n", - " ' _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = '\n", - " '404) */\\n'\n", - " 'system_variable : \"_status\"')\n" + " 'system_variable : \"_status\"\\n'\n", + " '// --- TERMINALES (LEXER) ---\\n'\n", + " 'IDENTIFIER: /[a-zA-Z_][a-zA-Z0-9_]*/\\n'\n", + " 'STRINGLITERAL: /\"[^\"\\\\r\\\\n]*\"/\\n'\n", + " 'NUMBER: /\\\\d+(\\\\.\\\\d+)?/\\n'\n", + " 'EOL: /\\\\r?\\\\n/\\n'\n", + " 'DOC_COMMENT.2: \"///\" ANY_TEXT\\n'\n", + " 'LINE_COMMENT: \"//\" ANY_TEXT\\n'\n", + " 'BLOCK_COMMENT: \"/*\" ANY_CONTENT \"*/\"\\n'\n", + " 'ANY_TEXT: /[^\\\\r\\\\n]*/\\n'\n", + " 'ANY_CONTENT: /(.|\\\\n)*?/(?=\\\\*/|$)\\n'\n", + " '\\n'\n", + " '// --- REGLAS DE SOPORTE (STUBS) ---\\n'\n", + " '// Estas reglas deben existir para que el parser no dé error de \"Undefined '\n", + " 'Rule\"\\n'\n", + " 'expression: IDENTIFIER | STRINGLITERAL | NUMBER | list_display\\n'\n", + " 'literal: STRINGLITERAL | NUMBER\\n'\n", + " 'argument_list: expression ( \",\" expression )*\\n'\n", + " 'list_display: \"[\" [argument_list] \"]\"\\n'\n", + " '\\n'\n", + " '// Stubs para los comandos que mencionas pero no defines en el BNF\\n'\n", + " 'function_decl: \"def\" IDENTIFIER \"(\" [argument_list] \")\" \":\" EOL\\n'\n", + " 'return_stmt: \"return\" [expression]\\n'\n", + " 'io_command: \"print\" \"(\" expression \")\"\\n'\n", + " 'control_flow: \"if\" expression \":\" EOL\\n'\n", + " 'async_command: \"async\" statement\\n'\n", + " 'connector_cmd: \"connect\" \"(\" \")\"\\n'\n", + " 'db_command: \"db\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\\n'\n", + " 'http_command: \"http\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\\n'\n", + " 'util_command: \"util\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\\n'\n", + " 'modularity_cmd: \"import\" IDENTIFIER\\n'\n", + " '\\n'\n", + " '%import common.WS_INLINE\\n'\n", + " '%ignore WS_INLINE')\n" ] } ], @@ -286,39 +355,27 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 80, "id": "993a3d63", "metadata": {}, "outputs": [ { - "ename": "GrammarError", - "evalue": "Unexpected input at line 4 column 52 in : \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n", - "output_type": "error", - "traceback": [ - "\u001b[31m---------------------------------------------------------------------------\u001b[39m", - "\u001b[31mUnexpectedCharacters\u001b[39m Traceback (most recent call last)", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:952\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 951\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m tree = \u001b[43m_get_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parser_frontends.py:106\u001b[39m, in \u001b[36mParsingFrontend.parse\u001b[39m\u001b[34m(self, text, start, on_error)\u001b[39m\n\u001b[32m 105\u001b[39m kw = {} \u001b[38;5;28;01mif\u001b[39;00m on_error \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {\u001b[33m'\u001b[39m\u001b[33mon_error\u001b[39m\u001b[33m'\u001b[39m: on_error}\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchosen_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:41\u001b[39m, in \u001b[36mLALR_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, on_error)\u001b[39m\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedInput \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:171\u001b[39m, in \u001b[36m_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, value_stack, state_stack, start_interactive)\u001b[39m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m InteractiveParser(\u001b[38;5;28mself\u001b[39m, parser_state, parser_state.lexer)\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_from_state\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:188\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 187\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:178\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 177\u001b[39m token = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m178\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlex\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfeed_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:388\u001b[39m, in \u001b[36mTraditionalLexer.lex\u001b[39m\u001b[34m(self, state, parser_state)\u001b[39m\n\u001b[32m 387\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m388\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnext_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:398\u001b[39m, in \u001b[36mTraditionalLexer.next_token\u001b[39m\u001b[34m(self, lex_state, parser_state)\u001b[39m\n\u001b[32m 397\u001b[39m allowed = {\u001b[33m\"\u001b[39m\u001b[33m\u001b[39m\u001b[33m\"\u001b[39m}\n\u001b[32m--> \u001b[39m\u001b[32m398\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,\n\u001b[32m 399\u001b[39m allowed=allowed, token_history=lex_state.last_token \u001b[38;5;129;01mand\u001b[39;00m [lex_state.last_token],\n\u001b[32m 400\u001b[39m state=parser_state, terminals_by_name=\u001b[38;5;28mself\u001b[39m.terminals_by_name)\n\u001b[32m 402\u001b[39m value, type_ = res\n", - "\u001b[31mUnexpectedCharacters\u001b[39m: No terminal matches 'í' in the current parser context, at line 4 col 52\n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\nExpected one of: \n\t* TERMINAL\n\t* _IGNORE\n\t* _IMPORT\n\t* OP\n\t* _RBRACE\n\t* _TO\n\t* _COMMA\n\t* _OVERRIDE\n\t* _RPAR\n\t* STRING\n\t* RULE\n\t* _DECLARE\n\t* _LBRACE\n\t* _DOTDOT\n\t* _EXTEND\n\t* _LPAR\n\t* _NL\n\t* _RBRA\n\t* NUMBER\n\t* _LBRA\n\t* _COLON\n\t* REGEXP\n\t* _NL_OR\n\t* _DOT\n\t* TILDE\n\t* _OR\n\nPrevious tokens: Token('RULE', 'l')\n", - "\nDuring handling of the above exception, another exception occurred:\n", - "\u001b[31mGrammarError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m parser = \u001b[43mLark\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlark_bnf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlalr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstart\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lark.py:300\u001b[39m, in \u001b[36mLark.__init__\u001b[39m\u001b[34m(self, grammar, **options)\u001b[39m\n\u001b[32m 296\u001b[39m \u001b[38;5;28mself\u001b[39m.options = old_options\n\u001b[32m 299\u001b[39m \u001b[38;5;66;03m# Parse the grammar file and compose the grammars\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m300\u001b[39m \u001b[38;5;28mself\u001b[39m.grammar, used_files = \u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mkeep_all_tokens\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 301\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 302\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(grammar, Grammar)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1352\u001b[39m, in \u001b[36mload_grammar\u001b[39m\u001b[34m(grammar, source, import_paths, global_keep_all_tokens)\u001b[39m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(grammar, source, import_paths, global_keep_all_tokens):\n\u001b[32m 1351\u001b[39m builder = GrammarBuilder(global_keep_all_tokens, import_paths)\n\u001b[32m-> \u001b[39m\u001b[32m1352\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m builder.build(), builder.used_files\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1185\u001b[39m, in \u001b[36mGrammarBuilder.load_grammar\u001b[39m\u001b[34m(self, grammar_text, grammar_name, mangle)\u001b[39m\n\u001b[32m 1184\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(\u001b[38;5;28mself\u001b[39m, grammar_text, grammar_name=\u001b[33m\"\u001b[39m\u001b[33m\u001b[39m\u001b[33m\"\u001b[39m, mangle=\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m-> \u001b[39m\u001b[32m1185\u001b[39m tree = \u001b[43m_parse_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrammar_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1187\u001b[39m imports = {}\n\u001b[32m 1188\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m stmt \u001b[38;5;129;01min\u001b[39;00m tree.children:\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:955\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 954\u001b[39m context = e.get_context(text)\n\u001b[32m--> \u001b[39m\u001b[32m955\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GrammarError(\u001b[33m\"\u001b[39m\u001b[33mUnexpected input at line \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m column \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 956\u001b[39m (e.line, e.column, name, context))\n\u001b[32m 957\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedToken \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 958\u001b[39m context = e.get_context(text)\n", - "\u001b[31mGrammarError\u001b[39m: Unexpected input at line 4 column 52 in : \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n" + "name": "stdout", + "output_type": "stream", + "text": [ + "Unexpected token Token('_NL', '\\n\\n') at line 4, column 4.\n", + "Expected one of: \n", + "\t* _COLON\n", + "\t* _DOT\n", + "\n" ] } ], "source": [ - "parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")" + "try:\n", + " parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")\n", + "except Exception as e:\n", + " print(e) # shows which symbol is missing and where" ] }, { diff --git a/scripts/pipelines/flows/mbap_tester.py b/scripts/pipelines/flows/mbap_tester.py new file mode 100644 index 0000000..f398544 --- /dev/null +++ b/scripts/pipelines/flows/mbap_tester.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python3 +""" +Simple MBAP test harness. +- Loads `data/raw/code/mbap.json` +- For each entry with `code` and `test_list`, runs a heuristic mock AVAP executor +- Compares returned `result` against each test's expected value and prints a summary + +Note: This is a heuristic mock executor — it supports many common AVAP idioms but +is not a full AVAP interpreter. It aims to be useful for quick verification. +""" + +import json +import re +import sys +import hashlib +import random +import string +from pathlib import Path + +MBAP_PATH = Path(__file__).resolve().parents[1] / "data" / "raw" / "code" / "mbap.json" + + +def load_mbap(path: Path): + with open(path, "r", encoding="utf-8") as f: + return json.load(f) + + +def transform_avap_to_python(code: str) -> str: + lines = code.splitlines() + out_lines = [] + for raw in lines: + line = raw.strip() + if not line: + continue + # remove addParam(...) calls + if re.match(r'addParam\(', line): + continue + # getListLen(var, lenvar) -> lenvar = len(var) + m = re.match(r'getListLen\(([^,]+),\s*([A-Za-z_][A-Za-z0-9_]*)\)', line) + if m: + out_lines.append(f"{m.group(2).strip()} = len({m.group(1).strip()})") + continue + # itemFromList(list, idx, var) -> var = list[idx] + m = re.match(r'itemFromList\(([^,]+),\s*([^,]+),\s*([A-Za-z_][A-Za-z0-9_]*)\)', line) + if m: + out_lines.append(f"{m.group(3).strip()} = {m.group(1).strip()}[{m.group(2).strip()}]") + continue + # AddVariableToJSON(idx, value, result) -> result.append(value) + m = re.match(r'AddVariableToJSON\(([^,]+),\s*([^,]+),\s*([A-Za-z_][A-Za-z0-9_]*)\)', line) + if m: + out_lines.append(f"{m.group(3).strip()}.append({m.group(2).strip()})") + continue + # startLoop(i, s, e) -> for i in range(s, e+1): + m = re.match(r'startLoop\(([^,]+),\s*([^,]+),\s*([^,]+)\)', line) + if m: + out_lines.append(f"for {m.group(1).strip()} in range({m.group(2).strip()}, {m.group(3).strip()}+1):") + continue + # simple else/end tokens + if line.startswith('else()'): + out_lines.append('else:') + continue + if line.startswith('endLoop') or line == 'end()': + continue + # if(a, b, "op") + m = re.match(r'if\(([^,]*),\s*([^,]*),\s*"([^"]+)"\)', line) + if m: + a = m.group(1).strip() + b = m.group(2).strip() + op = m.group(3).strip() + if op in ('==', '!=', '>', '<', '>=', '<='): + out_lines.append(f"if {a} {op} {b}:") + else: + # treat as raw expression + out_lines.append(f"if {op}:") + continue + # addResult(x) -> output = x + m = re.match(r'addResult\(([^)]+)\)', line) + if m: + out_lines.append(f"output = {m.group(1).strip()}") + continue + # encodeSHA256(src, dst) + m = re.match(r'encodeSHA256\(([^,]+),\s*([A-Za-z_][A-Za-z0-9_]*)\)', line) + if m: + out_lines.append("import hashlib") + out_lines.append(f"{m.group(2)} = hashlib.sha256(str({m.group(1)}).encode()).hexdigest()") + continue + # encodeMD5 + m = re.match(r'encodeMD5\(([^,]+),\s*([A-Za-z_][A-Za-z0-9_]*)\)', line) + if m: + out_lines.append("import hashlib") + out_lines.append(f"{m.group(2)} = hashlib.md5(str({m.group(1)}).encode()).hexdigest()") + continue + # randomString(len, token) + m = re.match(r'randomString\(([^,]+),\s*([A-Za-z_][A-Za-z0-9_]*)\)', line) + if m: + out_lines.append(f"{m.group(2)} = ''.join(__random.choice(__letters) for _ in range(int({m.group(1)})))") + continue + # getRegex(text, pattern, matches) -> use python re.findall + m = re.match(r'getRegex\(([^,]+),\s*"([^"]+)",\s*([A-Za-z_][A-Za-z0-9_]*)\)', line) + if m: + out_lines.append(f"{m.group(3)} = re.findall(r'{m.group(2)}', str({m.group(1)}))") + continue + # RequestGet/Post -> mock + if line.startswith('RequestGet(') or line.startswith('RequestPost('): + out_lines.append("response = {'_mock': 'response'}") + out_lines.append("output = response") + continue + # function foo(...) { -> def foo(...): + m = re.match(r'function\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(([^)]*)\)\s*\{', line) + if m: + args = m.group(2).strip() + out_lines.append(f"def {m.group(1)}({args}):") + continue + # return(...) -> return ... + m = re.match(r'return\(([^)]+)\)', line) + if m: + out_lines.append(f"return {m.group(1).strip()}") + continue + # simple replacements to make many lines valid python-ish + line = line.replace('&&', 'and').replace('||', 'or') + # remove trailing semicolons if any + if line.endswith(';'): + line = line[:-1] + out_lines.append(line) + # Ensure we always expose 'output' variable at the end if not set + py = '\n'.join(out_lines) + guard = "\nif 'output' not in locals():\n output = locals().get('result', None)\n" + return py + guard + + +def mock_execute(code: str, inputs: dict): + """Heuristic mock executor: + - Creates a restricted locals dict with provided `inputs` + - Transforms AVAP-ish code into Python-ish code + - Executes it and returns {'result': output} or {'error': msg} + """ + # prepare locals + globs = {"__builtins__": {}} + locals_: dict = {} + # expose helpers + locals_['re'] = re + locals_['hashlib'] = hashlib + locals_['__random'] = random + locals_['__letters'] = string.ascii_letters + string.digits + # copy input params into locals + if isinstance(inputs, dict): + for k, v in inputs.items(): + locals_[k] = v + # transform + py = transform_avap_to_python(code) + try: + exec(py, globs, locals_) + output = locals_.get('output', None) + return {'result': output} + except Exception as e: + return {'error': str(e), 'transformed': py} + + +def canonical_expected(test_item): + """Try to extract expected value from a test item. + Accepts several common shapes: {'input':..., 'expected':...}, + {'in':..., 'out':...}, or simple dicts. + """ + if isinstance(test_item, dict): + if 'expected' in test_item: + return test_item['expected'] + if 'output' in test_item: + return test_item['output'] + if 'result' in test_item: + return test_item['result'] + # some datasets embed expected as the last item + if 'expected_result' in test_item: + return test_item['expected_result'] + return None + + +def canonical_input(test_item): + if isinstance(test_item, dict): + if 'input' in test_item: + return test_item['input'] + if 'in' in test_item: + return test_item['in'] + # if dict and contains params keys other than expected, assume it's the input itself + # heuristics: if contains keys other than 'expected'/'output' treat as params + keys = set(test_item.keys()) + if not keys.intersection({'expected','output','result','description'}): + return test_item + return {} + + +def run_all(path: Path): + data = load_mbap(path) + total = 0 + passed = 0 + for entry in data: + task_id = entry.get('task_id') + code = entry.get('code') + tests = entry.get('test_list') or [] + if not code: + continue + print(f"Task {task_id}: processing {len(tests)} tests") + for ti, test_item in enumerate(tests): + total += 1 + inp = canonical_input(test_item) + expected = canonical_expected(test_item) + res = mock_execute(code, inp) + if 'error' in res: + print(f" test #{ti+1}: ERROR -> {res['error']}") + # optionally show transformed code for debugging + # print(res.get('transformed')) + continue + got = res.get('result') + ok = (expected is None) or (got == expected) + if ok: + passed += 1 + status = 'PASS' + else: + status = 'FAIL' + print(f" test #{ti+1}: {status} | expected={expected!r} got={got!r}") + print(f"\nSummary: passed {passed}/{total} tests") + + +if __name__ == '__main__': + p = MBAP_PATH + if not p.exists(): + print(f"mbap.json not found at {p}") + sys.exit(1) + run_all(p)