workin on scratches bnf and parsing

This commit is contained in:
pseco 2026-03-11 12:28:35 +01:00
parent 3ac432567b
commit d04c149e66
12 changed files with 494 additions and 212 deletions

View File

@ -0,0 +1,3 @@
addParam("Alberto",name)
result = "Hello," + name
addResult(result)

View File

@ -1,42 +0,0 @@
<program> ::= ( <line> | <block_comment> )*
<line> ::= [ <statement> ] [ <line_comment> | <doc_comment> ] <EOL>
| ( <line_comment> | <doc_comment> ) <EOL>
<EOL> ::= /* Retorno de carro / Salto de línea (\n o \r\n) */
<statement> ::= <assignment>
| <method_call_stmt>
| <function_call_stmt>
| <function_decl>
| <return_stmt>
| <system_command>
| <io_command>
| <control_flow>
| <async_command>
| <connector_cmd>
| <db_command>
| <http_command>
| <util_command>
| <modularity_cmd>
<assignment> ::= <identifier> "=" <expression>
/* Llamada a función global (sin receptor de objeto) */
<function_call_stmt> ::= <identifier> "(" [<argument_list>] ")"
/* Llamada a método sobre un objeto conector (con receptor) */
<method_call_stmt> ::= <identifier> "=" <identifier> "." <identifier> "(" [<argument_list>] ")"
<system_command> ::= <register_cmd> | <addvar_cmd>
<register_cmd> ::= "registerEndpoint(" <stringliteral> "," <stringliteral> "," <list_display> "," <stringliteral> "," <identifier> "," <identifier> ")"
/* addVar asigna un valor a una variable. Acepta (valor, variable) o (variable, valor).
Si ambos argumentos son identificadores, el valor del segundo se asigna al primero.
No está permitido pasar dos literales como argumentos. */
<addvar_cmd> ::= "addVar(" <addvar_arg> "," <addvar_arg> ")"
<addvar_arg> ::= <identifier> | <literal> | "$" <identifier>
/* Restricción semántica: al menos uno de los dos <addvar_arg> debe ser <identifier> */
<identifier> ::= [a-zA-Z_] [a-zA-Z0-9_]*
/* Variables de sistema reservadas — accesibles y asignables desde cualquier scope:
_status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = 404) */
<system_variable> ::= "_status"

View File

@ -1,5 +0,0 @@
<io_command> ::= <addparam_cmd> | <getlistlen_cmd> | <addresult_cmd> | <getparamlist_cmd>
<addparam_cmd> ::= "addParam(" <stringliteral> "," <identifier> ")"
<getlistlen_cmd> ::= "getListLen(" <identifier> "," <identifier> ")"
<getparamlist_cmd> ::= "getQueryParamList(" <stringliteral> "," <identifier> ")"
<addresult_cmd> ::= "addResult(" <identifier> ")"

View File

@ -1,28 +0,0 @@
<control_flow> ::= <if_stmt> | <loop_stmt> | <try_stmt>
<if_stmt> ::= "if(" <if_condition> ")" <EOL>
<block>
[ "else()" <EOL> <block> ]
"end()" <EOL>
/* if() soporta dos modos:
Modo 1 — comparación estructurada: los dos primeros argumentos deben ser
identificadores simples o literales, nunca expresiones de acceso.
Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']),
debe asignarse previamente a una variable.
Modo 2 — expresión libre: None, None, expresión compleja como string */
<if_condition> ::= <if_atom> "," <if_atom> "," <stringliteral>
| "None" "," "None" "," <stringliteral>
<if_atom> ::= <identifier> | <literal>
<loop_stmt> ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL>
<block>
"endLoop()" <EOL>
<try_stmt> ::= "try()" <EOL>
<block>
"exception(" <identifier> ")" <EOL>
<block>
"end()" <EOL>
<block> ::= <line>*

View File

@ -1,3 +0,0 @@
<async_command> ::= <go_stmt> | <gather_stmt>
<go_stmt> ::= <identifier> "=" "go" <identifier> "(" [<argument_list>] ")"
<gather_stmt> ::= <identifier> "=" "gather(" <identifier> ["," <expression>] ")"

View File

@ -1,25 +0,0 @@
/* Instanciación de conector de terceros y llamada a sus métodos dinámicos */
<connector_cmd> ::= <connector_instantiation> | <connector_method_call>
<connector_instantiation> ::= <identifier> "=" "avapConnector(" <stringliteral> ")"
<connector_method_call> ::= [ <identifier> "=" ] <identifier> "." <identifier> "(" [<argument_list>] ")"
/* Cliente HTTP con Timeout Obligatorio */
<http_command> ::= <req_post_cmd> | <req_get_cmd>
<req_post_cmd> ::= "RequestPost(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
<req_get_cmd> ::= "RequestGet(" <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
/* ORM y Persistencia (Estandarizado con tableName) */
<db_command> ::= <orm_direct> | <orm_check> | <orm_create> | <orm_select> | <orm_insert> | <orm_update>
<orm_direct> ::= "ormDirect(" <expression> "," <identifier> ")"
<orm_check> ::= "ormCheckTable(" <expression> "," <identifier> ")"
<orm_create> ::= "ormCreateTable(" <expression> "," <expression> "," <expression> "," <identifier> ")"
/* ormAccessSelect(fields, tableName, selector, varTarget) */
<orm_select> ::= "ormAccessSelect(" <orm_fields> "," <expression> "," [<expression>] "," <identifier> ")"
<orm_fields> ::= "*" | <expression>
/* ormAccessInsert(fieldsValues, tableName, varTarget) */
<orm_insert> ::= "ormAccessInsert(" <expression> "," <expression> "," <identifier> ")"
/* ormAccessUpdate(fields, fieldsValues, tableName, selector, varTarget) */
<orm_update> ::= "ormAccessUpdate(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> ")"

View File

@ -1,29 +0,0 @@
/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */
<util_command> ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
/* Manipulación de listas y JSON */
<json_list_cmd> ::= "variableToList(" <expression> "," <identifier> ")"
| "itemFromList(" <identifier> "," <expression> "," <identifier> ")"
| "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")"
| "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")"
/* Criptografía */
<crypto_cmd> ::= "encodeSHA256(" <identifier_or_string> "," <identifier> ")"
| "encodeMD5(" <identifier_or_string> "," <identifier> ")"
/* Expresiones regulares */
<regex_cmd> ::= "getRegex(" <identifier> "," <stringliteral> "," <identifier> ")"
<datetime_cmd> ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")"
/* Argumentos: formato_salida, epoch_origen, zona_horaria, destino */
<stamp_cmd> ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")"
/* Argumentos: epoch_origen, formato, timedelta, destino */
| "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")"
/* Argumentos: fecha_string, formato_entrada, timedelta, destino */
<string_cmd> ::= "randomString(" <expression> "," <identifier> ")"
/* Argumentos: longitud, destino */
<replace_cmd> ::= "replace(" <identifier_or_string> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
/* Argumentos: origen, patron_busqueda, reemplazo, destino */

View File

@ -1,9 +0,0 @@
/* Nota: las funciones utilizan llaves {} como delimitadores de bloque por decisión
arquitectónica explícita, diferenciándose de las estructuras de control (if, loop, try)
que usan palabras clave de cierre (end(), endLoop()). Ambos patrones coexisten
en la gramática y el parser los distingue por el token de apertura. */
<function_decl> ::= "function" <identifier> "(" [<param_list>] ")" "{" <EOL>
<block>
"}" <EOL>
<param_list> ::= <identifier> ("," <identifier>)*
<return_stmt> ::= "return(" [<expression>] ")"

View File

@ -1,3 +0,0 @@
<modularity_cmd> ::= <include_stmt> | <import_stmt>
<include_stmt> ::= "include" " " <stringliteral>
<import_stmt> ::= "import" " " ( "<" <identifier> ">" | <stringliteral> )

View File

@ -1,62 +0,0 @@
/* Jerarquía de Expresiones (Precedencia de menor a mayor) */
<expression> ::= <logical_or>
<logical_or> ::= <logical_and> ( "or" <logical_and> )*
<logical_and> ::= <logical_not> ( "and" <logical_not> )*
<logical_not> ::= "not" <logical_not> | <comparison>
<comparison> ::= <arithmetic> ( <comp_op> <arithmetic> )*
<comp_op> ::= "==" | "!=" | "<" | ">" | "<=" | ">=" | "in" | "is"
<arithmetic> ::= <term> ( ( "+" | "-" ) <term> )*
<term> ::= <factor> ( ( "*" | "/" | "%" ) <factor> )*
<factor> ::= ( "+" | "-" ) <factor> | <power>
<power> ::= <primary> [ "**" <factor> ]
/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones)
La regla <primary> cubre también el acceso a métodos de objetos conector
(conector.metodo(...)) y el acceso por clave a sus resultados (resultado["key"]) */
<primary> ::= <atom>
| <primary> "." <identifier>
| <primary> "[" <expression> "]"
| <primary> "[" [<expression>] ":" [<expression>] [":" [<expression>]] "]"
| <primary> "(" [<argument_list>] ")"
<atom> ::= <identifier>
| "$" <identifier>
| <literal>
| "(" <expression> ")"
| <list_display>
| <dict_display>
/* Estructuras de Datos, Comprensiones y Argumentos */
<list_display> ::= "[" [<argument_list>] "]"
| "[" <expression> "for" <identifier> "in" <expression> [<if_clause>] "]"
<if_clause> ::= "if" <expression>
<dict_display> ::= "{" [<key_datum_list>] "}"
<key_datum_list> ::= <key_datum> ( "," <key_datum> )*
<key_datum> ::= <expression> ":" <expression>
<argument_list> ::= <expression> ( "," <expression> )*
/* Tipo numérico unificado */
<number> ::= <floatnumber> | <integer>
/* Literales (Tipos de Datos Primitivos Soportados) */
<literal> ::= <stringliteral> | <number> | <boolean> | "None"
<boolean> ::= "True" | "False"
<integer> ::= [0-9]+
<floatnumber> ::= [0-9]+ "." [0-9]* | "." [0-9]+
/* Cadenas de Texto con soporte de secuencias de escape */
<stringliteral> ::= "\"" <text_double> "\"" | "'" <text_single> "'"
<escape_sequence> ::= "\\" ( "\"" | "'" | "\\" | "n" | "t" | "r" | "0" )
<text_double> ::= ( [^"\\] | <escape_sequence> )*
<text_single> ::= ( [^'\\] | <escape_sequence> )*
<identifier_or_string> ::= <identifier> | <stringliteral>
/* Reglas de Comentarios para el Lexer
El lexer aplica longest-match: /// debe evaluarse ANTES que // */
<doc_comment> ::= "///" <any_text>
<line_comment> ::= "//" <any_text>
<block_comment> ::= "/*" <any_content> "*/"
<any_text> ::= [^\r\n]*
<any_content> ::= /* Cualquier secuencia de caracteres que no contenga la subcadena "*/" */

View File

@ -1,5 +1,15 @@
{ {
"cells": [ "cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "e784361f",
"metadata": {},
"outputs": [],
"source": [
"import re"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
@ -171,16 +181,41 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 2,
"id": "d0a3051f",
"metadata": {},
"outputs": [],
"source": [
"result = \"Hello,Alberto\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a10a1017", "id": "a10a1017",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"bnf_text = r\"\"\"\n", "result_regex = re.match(r'^Hello,[A-Za-z]+$', result)"
"<assign> ::= <name> <num>\n", ]
"<name> ::= a | b | c\n", },
"<num> ::= [0-9]\n", {
"\"\"\"" "cell_type": "code",
"execution_count": 7,
"id": "06cd296f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Result matches BNF\n"
]
}
],
"source": [
"if result_regex:\n",
" print(\"Result matches BNF\")"
] ]
}, },
{ {

View File

@ -0,0 +1,450 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 7,
"id": "5b646fb1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
]
}
],
"source": [
"! uv pip install bnf"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "274d6d68",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
]
}
],
"source": [
"! uv pip install ebnf"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "0a8abbfa",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from dataclasses import dataclass\n",
"import pprint\n",
"from typing import Any, Dict, List, Optional, Tuple\n",
"from lark import Tree, Lark\n",
"from bnf import grammar as bnf_grammar, parse as bnf_parse\n",
"from src.config import settings"
]
},
{
"cell_type": "markdown",
"id": "baa779f3",
"metadata": {},
"source": [
"# Functions"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "26927d0c",
"metadata": {},
"outputs": [],
"source": [
"def bnf_to_lark(bnf_text):\n",
" text = re.sub(r\"<([^>]+)>\", r\"\\1\", bnf_text) # remove <>\n",
" text = text.replace(\"::=\", \":\")\n",
" return text"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "89be8bf6",
"metadata": {},
"outputs": [],
"source": [
"@dataclass\n",
"class Chunk:\n",
" text: str\n",
" kind: str\n",
" metadata: Dict[str, Any]\n",
"\n",
"def _span(node: Tree) -> Optional[Tuple[int, int]]:\n",
" m = node.meta\n",
" s = getattr(m, \"start_pos\", None)\n",
" e = getattr(m, \"end_pos\", None)\n",
" if s is None or e is None:\n",
" return None\n",
" return s, e\n",
"\n",
"def _iter_trees(t: Tree):\n",
" yield t\n",
" for c in t.children:\n",
" if isinstance(c, Tree):\n",
" yield from _iter_trees(c)\n",
"\n",
"def _cmd_name(line: str) -> Optional[str]:\n",
" m = re.match(r\"^\\s*([A-Za-z_][A-Za-z0-9_]*)\\s*\\(\", line)\n",
" return m.group(1) if m else None\n",
"\n",
"def chunk_atomic_lines(code: str) -> List[Chunk]:\n",
" tree = parser.parse(code)\n",
" chunks: List[Chunk] = []\n",
"\n",
" for node in _iter_trees(tree):\n",
" if node.data == \"stmt_line\":\n",
" sp = _span(node)\n",
" if not sp:\n",
" continue\n",
" s, e = sp\n",
" text = code[s:e].strip()\n",
" if not text:\n",
" continue\n",
"\n",
" chunks.append(\n",
" Chunk(\n",
" text=text,\n",
" kind=\"line\",\n",
" metadata={\n",
" \"granularity\": \"atomic\",\n",
" \"command\": _cmd_name(text)\n",
" }\n",
" )\n",
" )\n",
" return chunks\n",
"\n",
"def chunk_blocks(code: str) -> List[Chunk]:\n",
" tree = parser.parse(code)\n",
" chunks: List[Chunk] = []\n",
"\n",
" for node in _iter_trees(tree):\n",
" if node.data in (\"if_block\", \"loop_block\", \"try_block\", \"go_async_block\", \"function_block\"):\n",
" sp = _span(node)\n",
" if not sp:\n",
" continue\n",
" s, e = sp\n",
" text = code[s:e].strip()\n",
" if not text:\n",
" continue\n",
"\n",
" chunks.append(\n",
" Chunk(\n",
" text=text,\n",
" kind=node.data,\n",
" metadata={\"granularity\": \"block\"}\n",
" )\n",
" )\n",
" return chunks\n",
"\n",
"def chunk_avap_code(code: str) -> List[Chunk]:\n",
" # Keep original offsets: do NOT lstrip. Grammar already accepts leading _NL.\n",
" blocks = chunk_blocks(code)\n",
" lines = chunk_atomic_lines(code)\n",
" return blocks + lines"
]
},
{
"cell_type": "markdown",
"id": "23a92e13",
"metadata": {},
"source": [
"# BNF to Lark"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "bde351ba",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/pseco/VsCodeProjects/assistance-engine/scratches/pseco/ingestion/Code Ingestion\n",
"True\n"
]
}
],
"source": [
"from pathlib import Path\n",
"print(Path.cwd())\n",
"print(Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\").exists())"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "c66842c7",
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"# Load BNF grammar from file\n",
"bnf_path = Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\")\n",
"if not bnf_path.exists():\n",
" raise FileNotFoundError(f\"BNF file not found: {bnf_path}\")\n",
"\n",
"bnf_grammar: str = bnf_path.read_text(encoding=\"utf-8\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "07bb32cb",
"metadata": {},
"outputs": [],
"source": [
"lark_bnf = bnf_to_lark(bnf_grammar)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "8122b603",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('program : ( line | block_comment )*\\n'\n",
" 'line : [ statement ] [ line_comment | doc_comment ] EOL\\n'\n",
" ' | ( line_comment | doc_comment ) EOL\\n'\n",
" 'EOL : /* Retorno de carro / Salto de línea (\\\\n o \\\\r\\\\n) */\\n'\n",
" '\\n'\n",
" 'statement : assignment\\n'\n",
" ' | method_call_stmt\\n'\n",
" ' | function_call_stmt\\n'\n",
" ' | function_decl\\n'\n",
" ' | return_stmt\\n'\n",
" ' | system_command\\n'\n",
" ' | io_command\\n'\n",
" ' | control_flow\\n'\n",
" ' | async_command\\n'\n",
" ' | connector_cmd\\n'\n",
" ' | db_command\\n'\n",
" ' | http_command\\n'\n",
" ' | util_command\\n'\n",
" ' | modularity_cmd\\n'\n",
" '\\n'\n",
" 'assignment : identifier \"=\" expression\\n'\n",
" '\\n'\n",
" '/* Llamada a función global (sin receptor de objeto) */\\n'\n",
" 'function_call_stmt : identifier \"(\" [argument_list] \")\"\\n'\n",
" '\\n'\n",
" '/* Llamada a método sobre un objeto conector (con receptor) */\\n'\n",
" 'method_call_stmt : identifier \"=\" identifier \".\" identifier \"(\" '\n",
" '[argument_list] \")\"\\n'\n",
" '\\n'\n",
" 'system_command : register_cmd | addvar_cmd\\n'\n",
" 'register_cmd : \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" '\n",
" 'list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\\n'\n",
" '/* addVar asigna un valor a una variable. Acepta (valor, variable) o '\n",
" '(variable, valor).\\n'\n",
" ' Si ambos argumentos son identificadores, el valor del segundo se asigna '\n",
" 'al primero.\\n'\n",
" ' No está permitido pasar dos literales como argumentos. */\\n'\n",
" 'addvar_cmd : \"addVar(\" addvar_arg \",\" addvar_arg \")\"\\n'\n",
" 'addvar_arg : identifier | literal | \"$\" identifier\\n'\n",
" '/* Restricción semántica: al menos uno de los dos addvar_arg debe ser '\n",
" 'identifier */\\n'\n",
" '\\n'\n",
" 'identifier : [a-zA-Z_] [a-zA-Z0-9_]*\\n'\n",
" '\\n'\n",
" '/* Variables de sistema reservadas — accesibles y asignables desde cualquier '\n",
" 'scope:\\n'\n",
" ' _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = '\n",
" '404) */\\n'\n",
" 'system_variable : \"_status\"')\n"
]
}
],
"source": [
"pprint.PrettyPrinter().pprint(lark_bnf)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "993a3d63",
"metadata": {},
"outputs": [
{
"ename": "GrammarError",
"evalue": "Unexpected input at line 4 column 52 in <string>: \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mUnexpectedCharacters\u001b[39m Traceback (most recent call last)",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:952\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 951\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m tree = \u001b[43m_get_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parser_frontends.py:106\u001b[39m, in \u001b[36mParsingFrontend.parse\u001b[39m\u001b[34m(self, text, start, on_error)\u001b[39m\n\u001b[32m 105\u001b[39m kw = {} \u001b[38;5;28;01mif\u001b[39;00m on_error \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {\u001b[33m'\u001b[39m\u001b[33mon_error\u001b[39m\u001b[33m'\u001b[39m: on_error}\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchosen_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:41\u001b[39m, in \u001b[36mLALR_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, on_error)\u001b[39m\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedInput \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:171\u001b[39m, in \u001b[36m_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, value_stack, state_stack, start_interactive)\u001b[39m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m InteractiveParser(\u001b[38;5;28mself\u001b[39m, parser_state, parser_state.lexer)\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_from_state\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:188\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 187\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:178\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 177\u001b[39m token = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m178\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlex\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfeed_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:388\u001b[39m, in \u001b[36mTraditionalLexer.lex\u001b[39m\u001b[34m(self, state, parser_state)\u001b[39m\n\u001b[32m 387\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m388\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnext_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:398\u001b[39m, in \u001b[36mTraditionalLexer.next_token\u001b[39m\u001b[34m(self, lex_state, parser_state)\u001b[39m\n\u001b[32m 397\u001b[39m allowed = {\u001b[33m\"\u001b[39m\u001b[33m<END-OF-FILE>\u001b[39m\u001b[33m\"\u001b[39m}\n\u001b[32m--> \u001b[39m\u001b[32m398\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,\n\u001b[32m 399\u001b[39m allowed=allowed, token_history=lex_state.last_token \u001b[38;5;129;01mand\u001b[39;00m [lex_state.last_token],\n\u001b[32m 400\u001b[39m state=parser_state, terminals_by_name=\u001b[38;5;28mself\u001b[39m.terminals_by_name)\n\u001b[32m 402\u001b[39m value, type_ = res\n",
"\u001b[31mUnexpectedCharacters\u001b[39m: No terminal matches 'í' in the current parser context, at line 4 col 52\n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\nExpected one of: \n\t* TERMINAL\n\t* _IGNORE\n\t* _IMPORT\n\t* OP\n\t* _RBRACE\n\t* _TO\n\t* _COMMA\n\t* _OVERRIDE\n\t* _RPAR\n\t* STRING\n\t* RULE\n\t* _DECLARE\n\t* _LBRACE\n\t* _DOTDOT\n\t* _EXTEND\n\t* _LPAR\n\t* _NL\n\t* _RBRA\n\t* NUMBER\n\t* _LBRA\n\t* _COLON\n\t* REGEXP\n\t* _NL_OR\n\t* _DOT\n\t* TILDE\n\t* _OR\n\nPrevious tokens: Token('RULE', 'l')\n",
"\nDuring handling of the above exception, another exception occurred:\n",
"\u001b[31mGrammarError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m parser = \u001b[43mLark\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlark_bnf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlalr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstart\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lark.py:300\u001b[39m, in \u001b[36mLark.__init__\u001b[39m\u001b[34m(self, grammar, **options)\u001b[39m\n\u001b[32m 296\u001b[39m \u001b[38;5;28mself\u001b[39m.options = old_options\n\u001b[32m 299\u001b[39m \u001b[38;5;66;03m# Parse the grammar file and compose the grammars\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m300\u001b[39m \u001b[38;5;28mself\u001b[39m.grammar, used_files = \u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mkeep_all_tokens\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 301\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 302\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(grammar, Grammar)\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1352\u001b[39m, in \u001b[36mload_grammar\u001b[39m\u001b[34m(grammar, source, import_paths, global_keep_all_tokens)\u001b[39m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(grammar, source, import_paths, global_keep_all_tokens):\n\u001b[32m 1351\u001b[39m builder = GrammarBuilder(global_keep_all_tokens, import_paths)\n\u001b[32m-> \u001b[39m\u001b[32m1352\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m builder.build(), builder.used_files\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1185\u001b[39m, in \u001b[36mGrammarBuilder.load_grammar\u001b[39m\u001b[34m(self, grammar_text, grammar_name, mangle)\u001b[39m\n\u001b[32m 1184\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(\u001b[38;5;28mself\u001b[39m, grammar_text, grammar_name=\u001b[33m\"\u001b[39m\u001b[33m<?>\u001b[39m\u001b[33m\"\u001b[39m, mangle=\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m-> \u001b[39m\u001b[32m1185\u001b[39m tree = \u001b[43m_parse_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrammar_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1187\u001b[39m imports = {}\n\u001b[32m 1188\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m stmt \u001b[38;5;129;01min\u001b[39;00m tree.children:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:955\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 954\u001b[39m context = e.get_context(text)\n\u001b[32m--> \u001b[39m\u001b[32m955\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GrammarError(\u001b[33m\"\u001b[39m\u001b[33mUnexpected input at line \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m column \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 956\u001b[39m (e.line, e.column, name, context))\n\u001b[32m 957\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedToken \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 958\u001b[39m context = e.get_context(text)\n",
"\u001b[31mGrammarError\u001b[39m: Unexpected input at line 4 column 52 in <string>: \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n"
]
}
],
"source": [
"parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")"
]
},
{
"cell_type": "markdown",
"id": "49953efd",
"metadata": {},
"source": [
"# BNF conversion to EBNF"
]
},
{
"cell_type": "markdown",
"id": "32dbc2c5",
"metadata": {},
"source": [
"# EBNF Check"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "37968906",
"metadata": {},
"outputs": [],
"source": [
"ebnf_text = r\"\"\"\n",
"assign ::= name '=' num ;\n",
"name ::= 'a' | 'b' | 'c' ;\n",
"num ::= [0-9] ;\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "b234f2c4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"BNF: True\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Generating LALR tables\n"
]
}
],
"source": [
"ebnf_grammar(ebnf_text)\n",
"print(\"BNF:\", ebnf_parse(\"a=7\"))"
]
},
{
"cell_type": "markdown",
"id": "66fb8fee",
"metadata": {},
"source": [
"# Lark check EBNF Style"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "08e53ccb",
"metadata": {},
"outputs": [],
"source": [
"ebnf_text = r\"\"\"\n",
"start: assign\n",
"\n",
"assign: name \"=\" num\n",
"name: \"a\" | \"b\" | \"c\"\n",
"num: DIGIT\n",
"\n",
"DIGIT: /[0-9]/\n",
"\n",
"%ignore \" \"\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "52935608",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'assign'), [Tree(Token('RULE', 'name'), []), Tree(Token('RULE', 'num'), [Token('DIGIT', '7')])])])\n"
]
}
],
"source": [
"parser = Lark(ebnf_text)\n",
"\n",
"print(parser.parse(\"a=7\"))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "assistance-engine",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}