diff --git a/docs/samples/hello_world.avap b/docs/samples/hello_world.avap new file mode 100644 index 0000000..2322564 --- /dev/null +++ b/docs/samples/hello_world.avap @@ -0,0 +1,3 @@ +addParam("Alberto",name) +result = "Hello," + name +addResult(result) \ No newline at end of file diff --git a/ingestion/code/n01_BNF.txt b/ingestion/code/n01_BNF.txt deleted file mode 100644 index 2763ab2..0000000 --- a/ingestion/code/n01_BNF.txt +++ /dev/null @@ -1,42 +0,0 @@ - ::= ( | )* - ::= [ ] [ | ] - | ( | ) - ::= /* Retorno de carro / Salto de línea (\n o \r\n) */ - - ::= - | - | - | - | - | - | - | - | - | - | - | - | - | - - ::= "=" - -/* Llamada a función global (sin receptor de objeto) */ - ::= "(" [] ")" - -/* Llamada a método sobre un objeto conector (con receptor) */ - ::= "=" "." "(" [] ")" - - ::= | - ::= "registerEndpoint(" "," "," "," "," "," ")" -/* addVar asigna un valor a una variable. Acepta (valor, variable) o (variable, valor). - Si ambos argumentos son identificadores, el valor del segundo se asigna al primero. - No está permitido pasar dos literales como argumentos. */ - ::= "addVar(" "," ")" - ::= | | "$" -/* Restricción semántica: al menos uno de los dos debe ser */ - - ::= [a-zA-Z_] [a-zA-Z0-9_]* - -/* Variables de sistema reservadas — accesibles y asignables desde cualquier scope: - _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = 404) */ - ::= "_status" \ No newline at end of file diff --git a/ingestion/code/n02_BNF.txt b/ingestion/code/n02_BNF.txt deleted file mode 100644 index dcb404a..0000000 --- a/ingestion/code/n02_BNF.txt +++ /dev/null @@ -1,5 +0,0 @@ - ::= | | | - ::= "addParam(" "," ")" - ::= "getListLen(" "," ")" - ::= "getQueryParamList(" "," ")" - ::= "addResult(" ")" \ No newline at end of file diff --git a/ingestion/code/n03_BNF.txt b/ingestion/code/n03_BNF.txt deleted file mode 100644 index 651582d..0000000 --- a/ingestion/code/n03_BNF.txt +++ /dev/null @@ -1,28 +0,0 @@ - ::= | | - - ::= "if(" ")" - - [ "else()" ] - "end()" - -/* if() soporta dos modos: - Modo 1 — comparación estructurada: los dos primeros argumentos deben ser - identificadores simples o literales, nunca expresiones de acceso. - Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']), - debe asignarse previamente a una variable. - Modo 2 — expresión libre: None, None, expresión compleja como string */ - ::= "," "," - | "None" "," "None" "," - ::= | - - ::= "startLoop(" "," "," ")" - - "endLoop()" - - ::= "try()" - - "exception(" ")" - - "end()" - - ::= * \ No newline at end of file diff --git a/ingestion/code/n04_BNF.txt b/ingestion/code/n04_BNF.txt deleted file mode 100644 index 253a0a8..0000000 --- a/ingestion/code/n04_BNF.txt +++ /dev/null @@ -1,3 +0,0 @@ - ::= | - ::= "=" "go" "(" [] ")" - ::= "=" "gather(" ["," ] ")" \ No newline at end of file diff --git a/ingestion/code/n05_BNF.txt b/ingestion/code/n05_BNF.txt deleted file mode 100644 index 2563bca..0000000 --- a/ingestion/code/n05_BNF.txt +++ /dev/null @@ -1,25 +0,0 @@ -/* Instanciación de conector de terceros y llamada a sus métodos dinámicos */ - ::= | - ::= "=" "avapConnector(" ")" - ::= [ "=" ] "." "(" [] ")" - -/* Cliente HTTP con Timeout Obligatorio */ - ::= | - ::= "RequestPost(" "," "," "," "," "," ")" - ::= "RequestGet(" "," "," "," "," ")" - -/* ORM y Persistencia (Estandarizado con tableName) */ - ::= | | | | | - ::= "ormDirect(" "," ")" - ::= "ormCheckTable(" "," ")" - ::= "ormCreateTable(" "," "," "," ")" - -/* ormAccessSelect(fields, tableName, selector, varTarget) */ - ::= "ormAccessSelect(" "," "," [] "," ")" - ::= "*" | - -/* ormAccessInsert(fieldsValues, tableName, varTarget) */ - ::= "ormAccessInsert(" "," "," ")" - -/* ormAccessUpdate(fields, fieldsValues, tableName, selector, varTarget) */ - ::= "ormAccessUpdate(" "," "," "," "," ")" \ No newline at end of file diff --git a/ingestion/code/n06_BNF.txt b/ingestion/code/n06_BNF.txt deleted file mode 100644 index bbb341b..0000000 --- a/ingestion/code/n06_BNF.txt +++ /dev/null @@ -1,29 +0,0 @@ -/* [CORRECCIÓN] Todas las subreglas de están ahora completamente expandidas. */ - ::= | | | | | | - -/* Manipulación de listas y JSON */ - ::= "variableToList(" "," ")" - | "itemFromList(" "," "," ")" - | "variableFromJSON(" "," "," ")" - | "AddVariableToJSON(" "," "," ")" - -/* Criptografía */ - ::= "encodeSHA256(" "," ")" - | "encodeMD5(" "," ")" - -/* Expresiones regulares */ - ::= "getRegex(" "," "," ")" - - ::= "getDateTime(" "," "," "," ")" -/* Argumentos: formato_salida, epoch_origen, zona_horaria, destino */ - - ::= "stampToDatetime(" "," "," "," ")" -/* Argumentos: epoch_origen, formato, timedelta, destino */ - | "getTimeStamp(" "," "," "," ")" -/* Argumentos: fecha_string, formato_entrada, timedelta, destino */ - - ::= "randomString(" "," ")" -/* Argumentos: longitud, destino */ - - ::= "replace(" "," "," "," ")" -/* Argumentos: origen, patron_busqueda, reemplazo, destino */ \ No newline at end of file diff --git a/ingestion/code/n07_BNF.txt b/ingestion/code/n07_BNF.txt deleted file mode 100644 index 630a99f..0000000 --- a/ingestion/code/n07_BNF.txt +++ /dev/null @@ -1,9 +0,0 @@ -/* Nota: las funciones utilizan llaves {} como delimitadores de bloque por decisión - arquitectónica explícita, diferenciándose de las estructuras de control (if, loop, try) - que usan palabras clave de cierre (end(), endLoop()). Ambos patrones coexisten - en la gramática y el parser los distingue por el token de apertura. */ - ::= "function" "(" [] ")" "{" - - "}" - ::= ("," )* - ::= "return(" [] ")" \ No newline at end of file diff --git a/ingestion/code/n08_BNF.txt b/ingestion/code/n08_BNF.txt deleted file mode 100644 index e42159f..0000000 --- a/ingestion/code/n08_BNF.txt +++ /dev/null @@ -1,3 +0,0 @@ - ::= | - ::= "include" " " - ::= "import" " " ( "<" ">" | ) \ No newline at end of file diff --git a/ingestion/code/n09_BNF.txt b/ingestion/code/n09_BNF.txt deleted file mode 100644 index 90c683e..0000000 --- a/ingestion/code/n09_BNF.txt +++ /dev/null @@ -1,62 +0,0 @@ -/* Jerarquía de Expresiones (Precedencia de menor a mayor) */ - ::= - ::= ( "or" )* - ::= ( "and" )* - ::= "not" | - - ::= ( )* - ::= "==" | "!=" | "<" | ">" | "<=" | ">=" | "in" | "is" - - ::= ( ( "+" | "-" ) )* - ::= ( ( "*" | "/" | "%" ) )* - ::= ( "+" | "-" ) | - ::= [ "**" ] - -/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones) - La regla cubre también el acceso a métodos de objetos conector - (conector.metodo(...)) y el acceso por clave a sus resultados (resultado["key"]) */ - ::= - | "." - | "[" "]" - | "[" [] ":" [] [":" []] "]" - | "(" [] ")" - - ::= - | "$" - | - | "(" ")" - | - | - -/* Estructuras de Datos, Comprensiones y Argumentos */ - ::= "[" [] "]" - | "[" "for" "in" [] "]" - ::= "if" - ::= "{" [] "}" - ::= ( "," )* - ::= ":" - ::= ( "," )* - -/* Tipo numérico unificado */ - ::= | - -/* Literales (Tipos de Datos Primitivos Soportados) */ - ::= | | | "None" - ::= "True" | "False" - ::= [0-9]+ - ::= [0-9]+ "." [0-9]* | "." [0-9]+ - -/* Cadenas de Texto con soporte de secuencias de escape */ - ::= "\"" "\"" | "'" "'" - ::= "\\" ( "\"" | "'" | "\\" | "n" | "t" | "r" | "0" ) - ::= ( [^"\\] | )* - ::= ( [^'\\] | )* - ::= | - -/* Reglas de Comentarios para el Lexer - El lexer aplica longest-match: /// debe evaluarse ANTES que // */ - ::= "///" - ::= "//" - ::= "/*" "*/" - ::= [^\r\n]* - ::= /* Cualquier secuencia de caracteres que no contenga la subcadena "*/" */ \ No newline at end of file diff --git a/scratches/pseco/ingestion/Code Ingestion/n01 BNF Check.ipynb b/scratches/pseco/evaluation/n01 Execution Check.ipynb similarity index 91% rename from scratches/pseco/ingestion/Code Ingestion/n01 BNF Check.ipynb rename to scratches/pseco/evaluation/n01 Execution Check.ipynb index faf385f..c0fb612 100644 --- a/scratches/pseco/ingestion/Code Ingestion/n01 BNF Check.ipynb +++ b/scratches/pseco/evaluation/n01 Execution Check.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e784361f", + "metadata": {}, + "outputs": [], + "source": [ + "import re" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -171,16 +181,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, + "id": "d0a3051f", + "metadata": {}, + "outputs": [], + "source": [ + "result = \"Hello,Alberto\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "id": "a10a1017", "metadata": {}, "outputs": [], "source": [ - "bnf_text = r\"\"\"\n", - " ::= \n", - " ::= a | b | c\n", - " ::= [0-9]\n", - "\"\"\"" + "result_regex = re.match(r'^Hello,[A-Za-z]+$', result)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "06cd296f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Result matches BNF\n" + ] + } + ], + "source": [ + "if result_regex:\n", + " print(\"Result matches BNF\")" ] }, { diff --git a/scratches/pseco/ingestion/Code Ingestion/n02 BNF Check.ipynb b/scratches/pseco/ingestion/Code Ingestion/n02 BNF Check.ipynb new file mode 100644 index 0000000..b72736f --- /dev/null +++ b/scratches/pseco/ingestion/Code Ingestion/n02 BNF Check.ipynb @@ -0,0 +1,450 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "id": "5b646fb1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n", + "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n" + ] + } + ], + "source": [ + "! uv pip install bnf" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "274d6d68", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n", + "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n" + ] + } + ], + "source": [ + "! uv pip install ebnf" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "0a8abbfa", + "metadata": {}, + "outputs": [], + "source": [ + "import re\n", + "from dataclasses import dataclass\n", + "import pprint\n", + "from typing import Any, Dict, List, Optional, Tuple\n", + "from lark import Tree, Lark\n", + "from bnf import grammar as bnf_grammar, parse as bnf_parse\n", + "from src.config import settings" + ] + }, + { + "cell_type": "markdown", + "id": "baa779f3", + "metadata": {}, + "source": [ + "# Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "26927d0c", + "metadata": {}, + "outputs": [], + "source": [ + "def bnf_to_lark(bnf_text):\n", + " text = re.sub(r\"<([^>]+)>\", r\"\\1\", bnf_text) # remove <>\n", + " text = text.replace(\"::=\", \":\")\n", + " return text" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "89be8bf6", + "metadata": {}, + "outputs": [], + "source": [ + "@dataclass\n", + "class Chunk:\n", + " text: str\n", + " kind: str\n", + " metadata: Dict[str, Any]\n", + "\n", + "def _span(node: Tree) -> Optional[Tuple[int, int]]:\n", + " m = node.meta\n", + " s = getattr(m, \"start_pos\", None)\n", + " e = getattr(m, \"end_pos\", None)\n", + " if s is None or e is None:\n", + " return None\n", + " return s, e\n", + "\n", + "def _iter_trees(t: Tree):\n", + " yield t\n", + " for c in t.children:\n", + " if isinstance(c, Tree):\n", + " yield from _iter_trees(c)\n", + "\n", + "def _cmd_name(line: str) -> Optional[str]:\n", + " m = re.match(r\"^\\s*([A-Za-z_][A-Za-z0-9_]*)\\s*\\(\", line)\n", + " return m.group(1) if m else None\n", + "\n", + "def chunk_atomic_lines(code: str) -> List[Chunk]:\n", + " tree = parser.parse(code)\n", + " chunks: List[Chunk] = []\n", + "\n", + " for node in _iter_trees(tree):\n", + " if node.data == \"stmt_line\":\n", + " sp = _span(node)\n", + " if not sp:\n", + " continue\n", + " s, e = sp\n", + " text = code[s:e].strip()\n", + " if not text:\n", + " continue\n", + "\n", + " chunks.append(\n", + " Chunk(\n", + " text=text,\n", + " kind=\"line\",\n", + " metadata={\n", + " \"granularity\": \"atomic\",\n", + " \"command\": _cmd_name(text)\n", + " }\n", + " )\n", + " )\n", + " return chunks\n", + "\n", + "def chunk_blocks(code: str) -> List[Chunk]:\n", + " tree = parser.parse(code)\n", + " chunks: List[Chunk] = []\n", + "\n", + " for node in _iter_trees(tree):\n", + " if node.data in (\"if_block\", \"loop_block\", \"try_block\", \"go_async_block\", \"function_block\"):\n", + " sp = _span(node)\n", + " if not sp:\n", + " continue\n", + " s, e = sp\n", + " text = code[s:e].strip()\n", + " if not text:\n", + " continue\n", + "\n", + " chunks.append(\n", + " Chunk(\n", + " text=text,\n", + " kind=node.data,\n", + " metadata={\"granularity\": \"block\"}\n", + " )\n", + " )\n", + " return chunks\n", + "\n", + "def chunk_avap_code(code: str) -> List[Chunk]:\n", + " # Keep original offsets: do NOT lstrip. Grammar already accepts leading _NL.\n", + " blocks = chunk_blocks(code)\n", + " lines = chunk_atomic_lines(code)\n", + " return blocks + lines" + ] + }, + { + "cell_type": "markdown", + "id": "23a92e13", + "metadata": {}, + "source": [ + "# BNF to Lark" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "bde351ba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/pseco/VsCodeProjects/assistance-engine/scratches/pseco/ingestion/Code Ingestion\n", + "True\n" + ] + } + ], + "source": [ + "from pathlib import Path\n", + "print(Path.cwd())\n", + "print(Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\").exists())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c66842c7", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "# Load BNF grammar from file\n", + "bnf_path = Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\")\n", + "if not bnf_path.exists():\n", + " raise FileNotFoundError(f\"BNF file not found: {bnf_path}\")\n", + "\n", + "bnf_grammar: str = bnf_path.read_text(encoding=\"utf-8\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "07bb32cb", + "metadata": {}, + "outputs": [], + "source": [ + "lark_bnf = bnf_to_lark(bnf_grammar)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "8122b603", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('program : ( line | block_comment )*\\n'\n", + " 'line : [ statement ] [ line_comment | doc_comment ] EOL\\n'\n", + " ' | ( line_comment | doc_comment ) EOL\\n'\n", + " 'EOL : /* Retorno de carro / Salto de línea (\\\\n o \\\\r\\\\n) */\\n'\n", + " '\\n'\n", + " 'statement : assignment\\n'\n", + " ' | method_call_stmt\\n'\n", + " ' | function_call_stmt\\n'\n", + " ' | function_decl\\n'\n", + " ' | return_stmt\\n'\n", + " ' | system_command\\n'\n", + " ' | io_command\\n'\n", + " ' | control_flow\\n'\n", + " ' | async_command\\n'\n", + " ' | connector_cmd\\n'\n", + " ' | db_command\\n'\n", + " ' | http_command\\n'\n", + " ' | util_command\\n'\n", + " ' | modularity_cmd\\n'\n", + " '\\n'\n", + " 'assignment : identifier \"=\" expression\\n'\n", + " '\\n'\n", + " '/* Llamada a función global (sin receptor de objeto) */\\n'\n", + " 'function_call_stmt : identifier \"(\" [argument_list] \")\"\\n'\n", + " '\\n'\n", + " '/* Llamada a método sobre un objeto conector (con receptor) */\\n'\n", + " 'method_call_stmt : identifier \"=\" identifier \".\" identifier \"(\" '\n", + " '[argument_list] \")\"\\n'\n", + " '\\n'\n", + " 'system_command : register_cmd | addvar_cmd\\n'\n", + " 'register_cmd : \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" '\n", + " 'list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\\n'\n", + " '/* addVar asigna un valor a una variable. Acepta (valor, variable) o '\n", + " '(variable, valor).\\n'\n", + " ' Si ambos argumentos son identificadores, el valor del segundo se asigna '\n", + " 'al primero.\\n'\n", + " ' No está permitido pasar dos literales como argumentos. */\\n'\n", + " 'addvar_cmd : \"addVar(\" addvar_arg \",\" addvar_arg \")\"\\n'\n", + " 'addvar_arg : identifier | literal | \"$\" identifier\\n'\n", + " '/* Restricción semántica: al menos uno de los dos addvar_arg debe ser '\n", + " 'identifier */\\n'\n", + " '\\n'\n", + " 'identifier : [a-zA-Z_] [a-zA-Z0-9_]*\\n'\n", + " '\\n'\n", + " '/* Variables de sistema reservadas — accesibles y asignables desde cualquier '\n", + " 'scope:\\n'\n", + " ' _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = '\n", + " '404) */\\n'\n", + " 'system_variable : \"_status\"')\n" + ] + } + ], + "source": [ + "pprint.PrettyPrinter().pprint(lark_bnf)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "993a3d63", + "metadata": {}, + "outputs": [ + { + "ename": "GrammarError", + "evalue": "Unexpected input at line 4 column 52 in : \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mUnexpectedCharacters\u001b[39m Traceback (most recent call last)", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:952\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 951\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m tree = \u001b[43m_get_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parser_frontends.py:106\u001b[39m, in \u001b[36mParsingFrontend.parse\u001b[39m\u001b[34m(self, text, start, on_error)\u001b[39m\n\u001b[32m 105\u001b[39m kw = {} \u001b[38;5;28;01mif\u001b[39;00m on_error \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {\u001b[33m'\u001b[39m\u001b[33mon_error\u001b[39m\u001b[33m'\u001b[39m: on_error}\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchosen_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:41\u001b[39m, in \u001b[36mLALR_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, on_error)\u001b[39m\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedInput \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:171\u001b[39m, in \u001b[36m_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, value_stack, state_stack, start_interactive)\u001b[39m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m InteractiveParser(\u001b[38;5;28mself\u001b[39m, parser_state, parser_state.lexer)\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_from_state\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:188\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 187\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:178\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 177\u001b[39m token = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m178\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlex\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfeed_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:388\u001b[39m, in \u001b[36mTraditionalLexer.lex\u001b[39m\u001b[34m(self, state, parser_state)\u001b[39m\n\u001b[32m 387\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m388\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnext_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:398\u001b[39m, in \u001b[36mTraditionalLexer.next_token\u001b[39m\u001b[34m(self, lex_state, parser_state)\u001b[39m\n\u001b[32m 397\u001b[39m allowed = {\u001b[33m\"\u001b[39m\u001b[33m\u001b[39m\u001b[33m\"\u001b[39m}\n\u001b[32m--> \u001b[39m\u001b[32m398\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,\n\u001b[32m 399\u001b[39m allowed=allowed, token_history=lex_state.last_token \u001b[38;5;129;01mand\u001b[39;00m [lex_state.last_token],\n\u001b[32m 400\u001b[39m state=parser_state, terminals_by_name=\u001b[38;5;28mself\u001b[39m.terminals_by_name)\n\u001b[32m 402\u001b[39m value, type_ = res\n", + "\u001b[31mUnexpectedCharacters\u001b[39m: No terminal matches 'í' in the current parser context, at line 4 col 52\n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\nExpected one of: \n\t* TERMINAL\n\t* _IGNORE\n\t* _IMPORT\n\t* OP\n\t* _RBRACE\n\t* _TO\n\t* _COMMA\n\t* _OVERRIDE\n\t* _RPAR\n\t* STRING\n\t* RULE\n\t* _DECLARE\n\t* _LBRACE\n\t* _DOTDOT\n\t* _EXTEND\n\t* _LPAR\n\t* _NL\n\t* _RBRA\n\t* NUMBER\n\t* _LBRA\n\t* _COLON\n\t* REGEXP\n\t* _NL_OR\n\t* _DOT\n\t* TILDE\n\t* _OR\n\nPrevious tokens: Token('RULE', 'l')\n", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[31mGrammarError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m parser = \u001b[43mLark\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlark_bnf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlalr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstart\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lark.py:300\u001b[39m, in \u001b[36mLark.__init__\u001b[39m\u001b[34m(self, grammar, **options)\u001b[39m\n\u001b[32m 296\u001b[39m \u001b[38;5;28mself\u001b[39m.options = old_options\n\u001b[32m 299\u001b[39m \u001b[38;5;66;03m# Parse the grammar file and compose the grammars\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m300\u001b[39m \u001b[38;5;28mself\u001b[39m.grammar, used_files = \u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mkeep_all_tokens\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 301\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 302\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(grammar, Grammar)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1352\u001b[39m, in \u001b[36mload_grammar\u001b[39m\u001b[34m(grammar, source, import_paths, global_keep_all_tokens)\u001b[39m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(grammar, source, import_paths, global_keep_all_tokens):\n\u001b[32m 1351\u001b[39m builder = GrammarBuilder(global_keep_all_tokens, import_paths)\n\u001b[32m-> \u001b[39m\u001b[32m1352\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m builder.build(), builder.used_files\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1185\u001b[39m, in \u001b[36mGrammarBuilder.load_grammar\u001b[39m\u001b[34m(self, grammar_text, grammar_name, mangle)\u001b[39m\n\u001b[32m 1184\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(\u001b[38;5;28mself\u001b[39m, grammar_text, grammar_name=\u001b[33m\"\u001b[39m\u001b[33m\u001b[39m\u001b[33m\"\u001b[39m, mangle=\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m-> \u001b[39m\u001b[32m1185\u001b[39m tree = \u001b[43m_parse_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrammar_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1187\u001b[39m imports = {}\n\u001b[32m 1188\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m stmt \u001b[38;5;129;01min\u001b[39;00m tree.children:\n", + "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:955\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 954\u001b[39m context = e.get_context(text)\n\u001b[32m--> \u001b[39m\u001b[32m955\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GrammarError(\u001b[33m\"\u001b[39m\u001b[33mUnexpected input at line \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m column \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 956\u001b[39m (e.line, e.column, name, context))\n\u001b[32m 957\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedToken \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 958\u001b[39m context = e.get_context(text)\n", + "\u001b[31mGrammarError\u001b[39m: Unexpected input at line 4 column 52 in : \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n" + ] + } + ], + "source": [ + "parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")" + ] + }, + { + "cell_type": "markdown", + "id": "49953efd", + "metadata": {}, + "source": [ + "# BNF conversion to EBNF" + ] + }, + { + "cell_type": "markdown", + "id": "32dbc2c5", + "metadata": {}, + "source": [ + "# EBNF Check" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "37968906", + "metadata": {}, + "outputs": [], + "source": [ + "ebnf_text = r\"\"\"\n", + "assign ::= name '=' num ;\n", + "name ::= 'a' | 'b' | 'c' ;\n", + "num ::= [0-9] ;\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "b234f2c4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BNF: True\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Generating LALR tables\n" + ] + } + ], + "source": [ + "ebnf_grammar(ebnf_text)\n", + "print(\"BNF:\", ebnf_parse(\"a=7\"))" + ] + }, + { + "cell_type": "markdown", + "id": "66fb8fee", + "metadata": {}, + "source": [ + "# Lark check EBNF Style" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "08e53ccb", + "metadata": {}, + "outputs": [], + "source": [ + "ebnf_text = r\"\"\"\n", + "start: assign\n", + "\n", + "assign: name \"=\" num\n", + "name: \"a\" | \"b\" | \"c\"\n", + "num: DIGIT\n", + "\n", + "DIGIT: /[0-9]/\n", + "\n", + "%ignore \" \"\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "52935608", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'assign'), [Tree(Token('RULE', 'name'), []), Tree(Token('RULE', 'num'), [Token('DIGIT', '7')])])])\n" + ] + } + ], + "source": [ + "parser = Lark(ebnf_text)\n", + "\n", + "print(parser.parse(\"a=7\"))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "assistance-engine", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}