|
|
|
|
@ -2,7 +2,7 @@
|
|
|
|
|
"cells": [
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 7,
|
|
|
|
|
"execution_count": 67,
|
|
|
|
|
"id": "5b646fb1",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
@ -21,7 +21,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"execution_count": 68,
|
|
|
|
|
"id": "274d6d68",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
@ -40,7 +40,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 22,
|
|
|
|
|
"execution_count": 69,
|
|
|
|
|
"id": "0a8abbfa",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
@ -64,20 +64,85 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 10,
|
|
|
|
|
"execution_count": 81,
|
|
|
|
|
"id": "26927d0c",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import re\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"def bnf_to_lark(bnf_text):\n",
|
|
|
|
|
" text = re.sub(r\"<([^>]+)>\", r\"\\1\", bnf_text) # remove <>\n",
|
|
|
|
|
" text = text.replace(\"::=\", \":\")\n",
|
|
|
|
|
" return text"
|
|
|
|
|
" # 1. ELIMINAR COMENTARIOS HUMANOS (/* ... */) COMPLETAMENTE\n",
|
|
|
|
|
" # Limpiamos cualquier texto entre /* y */ antes de procesar el BNF\n",
|
|
|
|
|
" text = re.sub(r\"/\\*.*?\\*/\", \"\", bnf_text, flags=re.DOTALL)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # 2. TRANSFORMACIÓN ESTRUCTURAL\n",
|
|
|
|
|
" text = re.sub(r\"<([^>]+)>\", r\"\\1\", text) # Quitar < >\n",
|
|
|
|
|
" text = text.replace(\"::=\", \":\") # Cambiar ::= por :\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # 3. LIMPIEZA DE LÍNEAS RESIDUALES\n",
|
|
|
|
|
" # Eliminamos líneas que quedaron vacías o solo con texto descriptivo\n",
|
|
|
|
|
" lines = []\n",
|
|
|
|
|
" for line in text.split('\\n'):\n",
|
|
|
|
|
" line = line.strip()\n",
|
|
|
|
|
" # Solo conservamos líneas que parezcan reglas (tengan :) o sean parte de una definición\n",
|
|
|
|
|
" if \":\" in line or \"|\" in line or line.startswith(\" \") or line == \"\":\n",
|
|
|
|
|
" lines.append(line)\n",
|
|
|
|
|
" text = \"\\n\".join(lines)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # 4. FORZAR MAYÚSCULAS PARA TERMINALES (LEXER)\n",
|
|
|
|
|
" # Lark LALR necesita tokens en MAYÚSCULAS\n",
|
|
|
|
|
" terminals = [\n",
|
|
|
|
|
" 'identifier', 'stringliteral', 'eol', 'doc_comment', \n",
|
|
|
|
|
" 'line_comment', 'block_comment', 'any_text', 'any_content', 'number'\n",
|
|
|
|
|
" ]\n",
|
|
|
|
|
" for t in terminals:\n",
|
|
|
|
|
" text = re.sub(rf'\\b{t}\\b', t.upper(), text)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" # 5. INYECTAR DEFINICIONES LARK VÁLIDAS\n",
|
|
|
|
|
" # Sustituimos definiciones rotas por las correctas al final\n",
|
|
|
|
|
" text = re.sub(r\"EOL\\s*:.*\", \"\", text)\n",
|
|
|
|
|
" text = re.sub(r\"ANY_TEXT\\s*:.*\", \"\", text)\n",
|
|
|
|
|
" text = re.sub(r\"ANY_CONTENT\\s*:.*\", \"\", text)\n",
|
|
|
|
|
" text = re.sub(r\"IDENTIFIER\\s*:.*\", \"\", text)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
" footer = r\"\"\"\n",
|
|
|
|
|
"// --- TERMINALES DEFINITIVOS ---\n",
|
|
|
|
|
"IDENTIFIER: /[a-zA-Z_][a-zA-Z0-9_]*/\n",
|
|
|
|
|
"STRINGLITERAL: /\"([^\"\\\\\\\\]|\\\\\\\\.)*\"/\n",
|
|
|
|
|
"NUMBER: /\\d+(\\.\\d+)?/\n",
|
|
|
|
|
"EOL: /\\r?\\n/\n",
|
|
|
|
|
"DOC_COMMENT.2: \"///\" /[^\\r\\n]*/\n",
|
|
|
|
|
"LINE_COMMENT: \"//\" /[^\\r\\n]*/\n",
|
|
|
|
|
"BLOCK_COMMENT: \"/*\" /(.|\\n)*?/ \"*/\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"// --- REGLAS DE APOYO ---\n",
|
|
|
|
|
"expression: IDENTIFIER | STRINGLITERAL | NUMBER | list_display\n",
|
|
|
|
|
"literal: STRINGLITERAL | NUMBER\n",
|
|
|
|
|
"argument_list: expression ( \",\" expression )*\n",
|
|
|
|
|
"list_display: \"[\" [argument_list] \"]\"\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"// --- STUBS PARA REGLAS MENCIONADAS ---\n",
|
|
|
|
|
"function_decl: \"def\" IDENTIFIER \"(\" [argument_list] \")\" \":\" EOL\n",
|
|
|
|
|
"return_stmt: \"return\" [expression]\n",
|
|
|
|
|
"io_command: \"print\" \"(\" expression \")\"\n",
|
|
|
|
|
"control_flow: \"if\" expression \":\" EOL\n",
|
|
|
|
|
"async_command: \"async\" statement\n",
|
|
|
|
|
"connector_cmd: \"connect\" \"(\" \")\"\n",
|
|
|
|
|
"db_command: \"db\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\n",
|
|
|
|
|
"http_command: \"http\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\n",
|
|
|
|
|
"util_command: \"util\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\n",
|
|
|
|
|
"modularity_cmd: \"import\" IDENTIFIER\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"%import common.WS_INLINE\n",
|
|
|
|
|
"%ignore WS_INLINE\n",
|
|
|
|
|
"\"\"\"\n",
|
|
|
|
|
" return text.strip() + \"\\n\" + footer.strip()\n"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 11,
|
|
|
|
|
"execution_count": 71,
|
|
|
|
|
"id": "89be8bf6",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
@ -172,35 +237,13 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 12,
|
|
|
|
|
"id": "bde351ba",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"/home/pseco/VsCodeProjects/assistance-engine/scratches/pseco/ingestion/Code Ingestion\n",
|
|
|
|
|
"True\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"from pathlib import Path\n",
|
|
|
|
|
"print(Path.cwd())\n",
|
|
|
|
|
"print(Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\").exists())"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 13,
|
|
|
|
|
"execution_count": 77,
|
|
|
|
|
"id": "c66842c7",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from pathlib import Path\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"# Load BNF grammar from file\n",
|
|
|
|
|
"bnf_path = Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\")\n",
|
|
|
|
|
"if not bnf_path.exists():\n",
|
|
|
|
|
" raise FileNotFoundError(f\"BNF file not found: {bnf_path}\")\n",
|
|
|
|
|
@ -210,8 +253,8 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 19,
|
|
|
|
|
"id": "07bb32cb",
|
|
|
|
|
"execution_count": 78,
|
|
|
|
|
"id": "ebf4aaac",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
@ -220,7 +263,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 24,
|
|
|
|
|
"execution_count": 79,
|
|
|
|
|
"id": "8122b603",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
@ -228,10 +271,16 @@
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"('program : ( line | block_comment )*\\n'\n",
|
|
|
|
|
" 'line : [ statement ] [ line_comment | doc_comment ] EOL\\n'\n",
|
|
|
|
|
" ' | ( line_comment | doc_comment ) EOL\\n'\n",
|
|
|
|
|
" 'EOL : /* Retorno de carro / Salto de línea (\\\\n o \\\\r\\\\n) */\\n'\n",
|
|
|
|
|
"('program : ( line | BLOCK_COMMENT )*\\n'\n",
|
|
|
|
|
" 'line : [ statement ] [ LINE_COMMENT | DOC_COMMENT ] EOL\\n'\n",
|
|
|
|
|
" ' | ( LINE_COMMENT | DOC_COMMENT ) EOL\\n'\n",
|
|
|
|
|
" 'EOL\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" 'DOC_COMMENT : \"///\" ANY_TEXT\\n'\n",
|
|
|
|
|
" 'LINE_COMMENT : \"//\" ANY_TEXT\\n'\n",
|
|
|
|
|
" 'BLOCK_COMMENT : \"/*\" ANY_CONTENT \"*/\"\\n'\n",
|
|
|
|
|
" 'ANY_TEXT\\n'\n",
|
|
|
|
|
" 'ANY_CONTENT\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" 'statement : assignment\\n'\n",
|
|
|
|
|
" ' | method_call_stmt\\n'\n",
|
|
|
|
|
@ -248,35 +297,55 @@
|
|
|
|
|
" ' | util_command\\n'\n",
|
|
|
|
|
" ' | modularity_cmd\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" 'assignment : identifier \"=\" expression\\n'\n",
|
|
|
|
|
" 'assignment : IDENTIFIER \"=\" expression\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" '/* Llamada a función global (sin receptor de objeto) */\\n'\n",
|
|
|
|
|
" 'function_call_stmt : identifier \"(\" [argument_list] \")\"\\n'\n",
|
|
|
|
|
" 'function_call_stmt : IDENTIFIER \"(\" [argument_list] \")\"\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" '/* Llamada a método sobre un objeto conector (con receptor) */\\n'\n",
|
|
|
|
|
" 'method_call_stmt : identifier \"=\" identifier \".\" identifier \"(\" '\n",
|
|
|
|
|
" 'method_call_stmt : IDENTIFIER \"=\" IDENTIFIER \".\" IDENTIFIER \"(\" '\n",
|
|
|
|
|
" '[argument_list] \")\"\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" 'system_command : register_cmd | addvar_cmd\\n'\n",
|
|
|
|
|
" 'register_cmd : \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" '\n",
|
|
|
|
|
" 'list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\\n'\n",
|
|
|
|
|
" '/* addVar asigna un valor a una variable. Acepta (valor, variable) o '\n",
|
|
|
|
|
" '(variable, valor).\\n'\n",
|
|
|
|
|
" ' Si ambos argumentos son identificadores, el valor del segundo se asigna '\n",
|
|
|
|
|
" 'al primero.\\n'\n",
|
|
|
|
|
" ' No está permitido pasar dos literales como argumentos. */\\n'\n",
|
|
|
|
|
" 'register_cmd : \"registerEndpoint(\" STRINGLITERAL \",\" STRINGLITERAL \",\" '\n",
|
|
|
|
|
" 'list_display \",\" STRINGLITERAL \",\" IDENTIFIER \",\" IDENTIFIER \")\"\\n'\n",
|
|
|
|
|
" 'addvar_cmd : \"addVar(\" addvar_arg \",\" addvar_arg \")\"\\n'\n",
|
|
|
|
|
" 'addvar_arg : identifier | literal | \"$\" identifier\\n'\n",
|
|
|
|
|
" '/* Restricción semántica: al menos uno de los dos addvar_arg debe ser '\n",
|
|
|
|
|
" 'identifier */\\n'\n",
|
|
|
|
|
" 'addvar_arg : IDENTIFIER | literal | \"$\" IDENTIFIER\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" 'identifier : [a-zA-Z_] [a-zA-Z0-9_]*\\n'\n",
|
|
|
|
|
" 'IDENTIFIER\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" '/* Variables de sistema reservadas — accesibles y asignables desde cualquier '\n",
|
|
|
|
|
" 'scope:\\n'\n",
|
|
|
|
|
" ' _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = '\n",
|
|
|
|
|
" '404) */\\n'\n",
|
|
|
|
|
" 'system_variable : \"_status\"')\n"
|
|
|
|
|
" 'system_variable : \"_status\"\\n'\n",
|
|
|
|
|
" '// --- TERMINALES (LEXER) ---\\n'\n",
|
|
|
|
|
" 'IDENTIFIER: /[a-zA-Z_][a-zA-Z0-9_]*/\\n'\n",
|
|
|
|
|
" 'STRINGLITERAL: /\"[^\"\\\\r\\\\n]*\"/\\n'\n",
|
|
|
|
|
" 'NUMBER: /\\\\d+(\\\\.\\\\d+)?/\\n'\n",
|
|
|
|
|
" 'EOL: /\\\\r?\\\\n/\\n'\n",
|
|
|
|
|
" 'DOC_COMMENT.2: \"///\" ANY_TEXT\\n'\n",
|
|
|
|
|
" 'LINE_COMMENT: \"//\" ANY_TEXT\\n'\n",
|
|
|
|
|
" 'BLOCK_COMMENT: \"/*\" ANY_CONTENT \"*/\"\\n'\n",
|
|
|
|
|
" 'ANY_TEXT: /[^\\\\r\\\\n]*/\\n'\n",
|
|
|
|
|
" 'ANY_CONTENT: /(.|\\\\n)*?/(?=\\\\*/|$)\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" '// --- REGLAS DE SOPORTE (STUBS) ---\\n'\n",
|
|
|
|
|
" '// Estas reglas deben existir para que el parser no dé error de \"Undefined '\n",
|
|
|
|
|
" 'Rule\"\\n'\n",
|
|
|
|
|
" 'expression: IDENTIFIER | STRINGLITERAL | NUMBER | list_display\\n'\n",
|
|
|
|
|
" 'literal: STRINGLITERAL | NUMBER\\n'\n",
|
|
|
|
|
" 'argument_list: expression ( \",\" expression )*\\n'\n",
|
|
|
|
|
" 'list_display: \"[\" [argument_list] \"]\"\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" '// Stubs para los comandos que mencionas pero no defines en el BNF\\n'\n",
|
|
|
|
|
" 'function_decl: \"def\" IDENTIFIER \"(\" [argument_list] \")\" \":\" EOL\\n'\n",
|
|
|
|
|
" 'return_stmt: \"return\" [expression]\\n'\n",
|
|
|
|
|
" 'io_command: \"print\" \"(\" expression \")\"\\n'\n",
|
|
|
|
|
" 'control_flow: \"if\" expression \":\" EOL\\n'\n",
|
|
|
|
|
" 'async_command: \"async\" statement\\n'\n",
|
|
|
|
|
" 'connector_cmd: \"connect\" \"(\" \")\"\\n'\n",
|
|
|
|
|
" 'db_command: \"db\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\\n'\n",
|
|
|
|
|
" 'http_command: \"http\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\\n'\n",
|
|
|
|
|
" 'util_command: \"util\" \".\" IDENTIFIER \"(\" [argument_list] \")\"\\n'\n",
|
|
|
|
|
" 'modularity_cmd: \"import\" IDENTIFIER\\n'\n",
|
|
|
|
|
" '\\n'\n",
|
|
|
|
|
" '%import common.WS_INLINE\\n'\n",
|
|
|
|
|
" '%ignore WS_INLINE')\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
@ -286,39 +355,27 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 26,
|
|
|
|
|
"execution_count": 80,
|
|
|
|
|
"id": "993a3d63",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
|
"ename": "GrammarError",
|
|
|
|
|
"evalue": "Unexpected input at line 4 column 52 in <string>: \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n",
|
|
|
|
|
"output_type": "error",
|
|
|
|
|
"traceback": [
|
|
|
|
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
|
|
|
"\u001b[31mUnexpectedCharacters\u001b[39m Traceback (most recent call last)",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:952\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 951\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m tree = \u001b[43m_get_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parser_frontends.py:106\u001b[39m, in \u001b[36mParsingFrontend.parse\u001b[39m\u001b[34m(self, text, start, on_error)\u001b[39m\n\u001b[32m 105\u001b[39m kw = {} \u001b[38;5;28;01mif\u001b[39;00m on_error \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {\u001b[33m'\u001b[39m\u001b[33mon_error\u001b[39m\u001b[33m'\u001b[39m: on_error}\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchosen_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:41\u001b[39m, in \u001b[36mLALR_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, on_error)\u001b[39m\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedInput \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:171\u001b[39m, in \u001b[36m_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, value_stack, state_stack, start_interactive)\u001b[39m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m InteractiveParser(\u001b[38;5;28mself\u001b[39m, parser_state, parser_state.lexer)\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_from_state\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:188\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 187\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:178\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 177\u001b[39m token = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m178\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlex\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfeed_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:388\u001b[39m, in \u001b[36mTraditionalLexer.lex\u001b[39m\u001b[34m(self, state, parser_state)\u001b[39m\n\u001b[32m 387\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m388\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnext_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:398\u001b[39m, in \u001b[36mTraditionalLexer.next_token\u001b[39m\u001b[34m(self, lex_state, parser_state)\u001b[39m\n\u001b[32m 397\u001b[39m allowed = {\u001b[33m\"\u001b[39m\u001b[33m<END-OF-FILE>\u001b[39m\u001b[33m\"\u001b[39m}\n\u001b[32m--> \u001b[39m\u001b[32m398\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,\n\u001b[32m 399\u001b[39m allowed=allowed, token_history=lex_state.last_token \u001b[38;5;129;01mand\u001b[39;00m [lex_state.last_token],\n\u001b[32m 400\u001b[39m state=parser_state, terminals_by_name=\u001b[38;5;28mself\u001b[39m.terminals_by_name)\n\u001b[32m 402\u001b[39m value, type_ = res\n",
|
|
|
|
|
"\u001b[31mUnexpectedCharacters\u001b[39m: No terminal matches 'í' in the current parser context, at line 4 col 52\n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\nExpected one of: \n\t* TERMINAL\n\t* _IGNORE\n\t* _IMPORT\n\t* OP\n\t* _RBRACE\n\t* _TO\n\t* _COMMA\n\t* _OVERRIDE\n\t* _RPAR\n\t* STRING\n\t* RULE\n\t* _DECLARE\n\t* _LBRACE\n\t* _DOTDOT\n\t* _EXTEND\n\t* _LPAR\n\t* _NL\n\t* _RBRA\n\t* NUMBER\n\t* _LBRA\n\t* _COLON\n\t* REGEXP\n\t* _NL_OR\n\t* _DOT\n\t* TILDE\n\t* _OR\n\nPrevious tokens: Token('RULE', 'l')\n",
|
|
|
|
|
"\nDuring handling of the above exception, another exception occurred:\n",
|
|
|
|
|
"\u001b[31mGrammarError\u001b[39m Traceback (most recent call last)",
|
|
|
|
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m parser = \u001b[43mLark\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlark_bnf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlalr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstart\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lark.py:300\u001b[39m, in \u001b[36mLark.__init__\u001b[39m\u001b[34m(self, grammar, **options)\u001b[39m\n\u001b[32m 296\u001b[39m \u001b[38;5;28mself\u001b[39m.options = old_options\n\u001b[32m 299\u001b[39m \u001b[38;5;66;03m# Parse the grammar file and compose the grammars\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m300\u001b[39m \u001b[38;5;28mself\u001b[39m.grammar, used_files = \u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mkeep_all_tokens\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 301\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 302\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(grammar, Grammar)\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1352\u001b[39m, in \u001b[36mload_grammar\u001b[39m\u001b[34m(grammar, source, import_paths, global_keep_all_tokens)\u001b[39m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(grammar, source, import_paths, global_keep_all_tokens):\n\u001b[32m 1351\u001b[39m builder = GrammarBuilder(global_keep_all_tokens, import_paths)\n\u001b[32m-> \u001b[39m\u001b[32m1352\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m builder.build(), builder.used_files\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1185\u001b[39m, in \u001b[36mGrammarBuilder.load_grammar\u001b[39m\u001b[34m(self, grammar_text, grammar_name, mangle)\u001b[39m\n\u001b[32m 1184\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(\u001b[38;5;28mself\u001b[39m, grammar_text, grammar_name=\u001b[33m\"\u001b[39m\u001b[33m<?>\u001b[39m\u001b[33m\"\u001b[39m, mangle=\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m-> \u001b[39m\u001b[32m1185\u001b[39m tree = \u001b[43m_parse_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrammar_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1187\u001b[39m imports = {}\n\u001b[32m 1188\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m stmt \u001b[38;5;129;01min\u001b[39;00m tree.children:\n",
|
|
|
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:955\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 954\u001b[39m context = e.get_context(text)\n\u001b[32m--> \u001b[39m\u001b[32m955\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GrammarError(\u001b[33m\"\u001b[39m\u001b[33mUnexpected input at line \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m column \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 956\u001b[39m (e.line, e.column, name, context))\n\u001b[32m 957\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedToken \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 958\u001b[39m context = e.get_context(text)\n",
|
|
|
|
|
"\u001b[31mGrammarError\u001b[39m: Unexpected input at line 4 column 52 in <string>: \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n"
|
|
|
|
|
"name": "stdout",
|
|
|
|
|
"output_type": "stream",
|
|
|
|
|
"text": [
|
|
|
|
|
"Unexpected token Token('_NL', '\\n\\n') at line 4, column 4.\n",
|
|
|
|
|
"Expected one of: \n",
|
|
|
|
|
"\t* _COLON\n",
|
|
|
|
|
"\t* _DOT\n",
|
|
|
|
|
"\n"
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"source": [
|
|
|
|
|
"parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")"
|
|
|
|
|
"try:\n",
|
|
|
|
|
" parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")\n",
|
|
|
|
|
"except Exception as e:\n",
|
|
|
|
|
" print(e) # shows which symbol is missing and where"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
|