workin on scratches bnf and parsing
This commit is contained in:
parent
3ac432567b
commit
d04c149e66
|
|
@ -0,0 +1,3 @@
|
|||
addParam("Alberto",name)
|
||||
result = "Hello," + name
|
||||
addResult(result)
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
<program> ::= ( <line> | <block_comment> )*
|
||||
<line> ::= [ <statement> ] [ <line_comment> | <doc_comment> ] <EOL>
|
||||
| ( <line_comment> | <doc_comment> ) <EOL>
|
||||
<EOL> ::= /* Retorno de carro / Salto de línea (\n o \r\n) */
|
||||
|
||||
<statement> ::= <assignment>
|
||||
| <method_call_stmt>
|
||||
| <function_call_stmt>
|
||||
| <function_decl>
|
||||
| <return_stmt>
|
||||
| <system_command>
|
||||
| <io_command>
|
||||
| <control_flow>
|
||||
| <async_command>
|
||||
| <connector_cmd>
|
||||
| <db_command>
|
||||
| <http_command>
|
||||
| <util_command>
|
||||
| <modularity_cmd>
|
||||
|
||||
<assignment> ::= <identifier> "=" <expression>
|
||||
|
||||
/* Llamada a función global (sin receptor de objeto) */
|
||||
<function_call_stmt> ::= <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
/* Llamada a método sobre un objeto conector (con receptor) */
|
||||
<method_call_stmt> ::= <identifier> "=" <identifier> "." <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
<system_command> ::= <register_cmd> | <addvar_cmd>
|
||||
<register_cmd> ::= "registerEndpoint(" <stringliteral> "," <stringliteral> "," <list_display> "," <stringliteral> "," <identifier> "," <identifier> ")"
|
||||
/* addVar asigna un valor a una variable. Acepta (valor, variable) o (variable, valor).
|
||||
Si ambos argumentos son identificadores, el valor del segundo se asigna al primero.
|
||||
No está permitido pasar dos literales como argumentos. */
|
||||
<addvar_cmd> ::= "addVar(" <addvar_arg> "," <addvar_arg> ")"
|
||||
<addvar_arg> ::= <identifier> | <literal> | "$" <identifier>
|
||||
/* Restricción semántica: al menos uno de los dos <addvar_arg> debe ser <identifier> */
|
||||
|
||||
<identifier> ::= [a-zA-Z_] [a-zA-Z0-9_]*
|
||||
|
||||
/* Variables de sistema reservadas — accesibles y asignables desde cualquier scope:
|
||||
_status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = 404) */
|
||||
<system_variable> ::= "_status"
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
<io_command> ::= <addparam_cmd> | <getlistlen_cmd> | <addresult_cmd> | <getparamlist_cmd>
|
||||
<addparam_cmd> ::= "addParam(" <stringliteral> "," <identifier> ")"
|
||||
<getlistlen_cmd> ::= "getListLen(" <identifier> "," <identifier> ")"
|
||||
<getparamlist_cmd> ::= "getQueryParamList(" <stringliteral> "," <identifier> ")"
|
||||
<addresult_cmd> ::= "addResult(" <identifier> ")"
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
<control_flow> ::= <if_stmt> | <loop_stmt> | <try_stmt>
|
||||
|
||||
<if_stmt> ::= "if(" <if_condition> ")" <EOL>
|
||||
<block>
|
||||
[ "else()" <EOL> <block> ]
|
||||
"end()" <EOL>
|
||||
|
||||
/* if() soporta dos modos:
|
||||
Modo 1 — comparación estructurada: los dos primeros argumentos deben ser
|
||||
identificadores simples o literales, nunca expresiones de acceso.
|
||||
Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']),
|
||||
debe asignarse previamente a una variable.
|
||||
Modo 2 — expresión libre: None, None, expresión compleja como string */
|
||||
<if_condition> ::= <if_atom> "," <if_atom> "," <stringliteral>
|
||||
| "None" "," "None" "," <stringliteral>
|
||||
<if_atom> ::= <identifier> | <literal>
|
||||
|
||||
<loop_stmt> ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL>
|
||||
<block>
|
||||
"endLoop()" <EOL>
|
||||
|
||||
<try_stmt> ::= "try()" <EOL>
|
||||
<block>
|
||||
"exception(" <identifier> ")" <EOL>
|
||||
<block>
|
||||
"end()" <EOL>
|
||||
|
||||
<block> ::= <line>*
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<async_command> ::= <go_stmt> | <gather_stmt>
|
||||
<go_stmt> ::= <identifier> "=" "go" <identifier> "(" [<argument_list>] ")"
|
||||
<gather_stmt> ::= <identifier> "=" "gather(" <identifier> ["," <expression>] ")"
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
/* Instanciación de conector de terceros y llamada a sus métodos dinámicos */
|
||||
<connector_cmd> ::= <connector_instantiation> | <connector_method_call>
|
||||
<connector_instantiation> ::= <identifier> "=" "avapConnector(" <stringliteral> ")"
|
||||
<connector_method_call> ::= [ <identifier> "=" ] <identifier> "." <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
/* Cliente HTTP con Timeout Obligatorio */
|
||||
<http_command> ::= <req_post_cmd> | <req_get_cmd>
|
||||
<req_post_cmd> ::= "RequestPost(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
|
||||
<req_get_cmd> ::= "RequestGet(" <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
|
||||
|
||||
/* ORM y Persistencia (Estandarizado con tableName) */
|
||||
<db_command> ::= <orm_direct> | <orm_check> | <orm_create> | <orm_select> | <orm_insert> | <orm_update>
|
||||
<orm_direct> ::= "ormDirect(" <expression> "," <identifier> ")"
|
||||
<orm_check> ::= "ormCheckTable(" <expression> "," <identifier> ")"
|
||||
<orm_create> ::= "ormCreateTable(" <expression> "," <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* ormAccessSelect(fields, tableName, selector, varTarget) */
|
||||
<orm_select> ::= "ormAccessSelect(" <orm_fields> "," <expression> "," [<expression>] "," <identifier> ")"
|
||||
<orm_fields> ::= "*" | <expression>
|
||||
|
||||
/* ormAccessInsert(fieldsValues, tableName, varTarget) */
|
||||
<orm_insert> ::= "ormAccessInsert(" <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* ormAccessUpdate(fields, fieldsValues, tableName, selector, varTarget) */
|
||||
<orm_update> ::= "ormAccessUpdate(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> ")"
|
||||
|
|
@ -1,29 +0,0 @@
|
|||
/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */
|
||||
<util_command> ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
|
||||
|
||||
/* Manipulación de listas y JSON */
|
||||
<json_list_cmd> ::= "variableToList(" <expression> "," <identifier> ")"
|
||||
| "itemFromList(" <identifier> "," <expression> "," <identifier> ")"
|
||||
| "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")"
|
||||
| "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* Criptografía */
|
||||
<crypto_cmd> ::= "encodeSHA256(" <identifier_or_string> "," <identifier> ")"
|
||||
| "encodeMD5(" <identifier_or_string> "," <identifier> ")"
|
||||
|
||||
/* Expresiones regulares */
|
||||
<regex_cmd> ::= "getRegex(" <identifier> "," <stringliteral> "," <identifier> ")"
|
||||
|
||||
<datetime_cmd> ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")"
|
||||
/* Argumentos: formato_salida, epoch_origen, zona_horaria, destino */
|
||||
|
||||
<stamp_cmd> ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")"
|
||||
/* Argumentos: epoch_origen, formato, timedelta, destino */
|
||||
| "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")"
|
||||
/* Argumentos: fecha_string, formato_entrada, timedelta, destino */
|
||||
|
||||
<string_cmd> ::= "randomString(" <expression> "," <identifier> ")"
|
||||
/* Argumentos: longitud, destino */
|
||||
|
||||
<replace_cmd> ::= "replace(" <identifier_or_string> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
|
||||
/* Argumentos: origen, patron_busqueda, reemplazo, destino */
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
/* Nota: las funciones utilizan llaves {} como delimitadores de bloque por decisión
|
||||
arquitectónica explícita, diferenciándose de las estructuras de control (if, loop, try)
|
||||
que usan palabras clave de cierre (end(), endLoop()). Ambos patrones coexisten
|
||||
en la gramática y el parser los distingue por el token de apertura. */
|
||||
<function_decl> ::= "function" <identifier> "(" [<param_list>] ")" "{" <EOL>
|
||||
<block>
|
||||
"}" <EOL>
|
||||
<param_list> ::= <identifier> ("," <identifier>)*
|
||||
<return_stmt> ::= "return(" [<expression>] ")"
|
||||
|
|
@ -1,3 +0,0 @@
|
|||
<modularity_cmd> ::= <include_stmt> | <import_stmt>
|
||||
<include_stmt> ::= "include" " " <stringliteral>
|
||||
<import_stmt> ::= "import" " " ( "<" <identifier> ">" | <stringliteral> )
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
/* Jerarquía de Expresiones (Precedencia de menor a mayor) */
|
||||
<expression> ::= <logical_or>
|
||||
<logical_or> ::= <logical_and> ( "or" <logical_and> )*
|
||||
<logical_and> ::= <logical_not> ( "and" <logical_not> )*
|
||||
<logical_not> ::= "not" <logical_not> | <comparison>
|
||||
|
||||
<comparison> ::= <arithmetic> ( <comp_op> <arithmetic> )*
|
||||
<comp_op> ::= "==" | "!=" | "<" | ">" | "<=" | ">=" | "in" | "is"
|
||||
|
||||
<arithmetic> ::= <term> ( ( "+" | "-" ) <term> )*
|
||||
<term> ::= <factor> ( ( "*" | "/" | "%" ) <factor> )*
|
||||
<factor> ::= ( "+" | "-" ) <factor> | <power>
|
||||
<power> ::= <primary> [ "**" <factor> ]
|
||||
|
||||
/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones)
|
||||
La regla <primary> cubre también el acceso a métodos de objetos conector
|
||||
(conector.metodo(...)) y el acceso por clave a sus resultados (resultado["key"]) */
|
||||
<primary> ::= <atom>
|
||||
| <primary> "." <identifier>
|
||||
| <primary> "[" <expression> "]"
|
||||
| <primary> "[" [<expression>] ":" [<expression>] [":" [<expression>]] "]"
|
||||
| <primary> "(" [<argument_list>] ")"
|
||||
|
||||
<atom> ::= <identifier>
|
||||
| "$" <identifier>
|
||||
| <literal>
|
||||
| "(" <expression> ")"
|
||||
| <list_display>
|
||||
| <dict_display>
|
||||
|
||||
/* Estructuras de Datos, Comprensiones y Argumentos */
|
||||
<list_display> ::= "[" [<argument_list>] "]"
|
||||
| "[" <expression> "for" <identifier> "in" <expression> [<if_clause>] "]"
|
||||
<if_clause> ::= "if" <expression>
|
||||
<dict_display> ::= "{" [<key_datum_list>] "}"
|
||||
<key_datum_list> ::= <key_datum> ( "," <key_datum> )*
|
||||
<key_datum> ::= <expression> ":" <expression>
|
||||
<argument_list> ::= <expression> ( "," <expression> )*
|
||||
|
||||
/* Tipo numérico unificado */
|
||||
<number> ::= <floatnumber> | <integer>
|
||||
|
||||
/* Literales (Tipos de Datos Primitivos Soportados) */
|
||||
<literal> ::= <stringliteral> | <number> | <boolean> | "None"
|
||||
<boolean> ::= "True" | "False"
|
||||
<integer> ::= [0-9]+
|
||||
<floatnumber> ::= [0-9]+ "." [0-9]* | "." [0-9]+
|
||||
|
||||
/* Cadenas de Texto con soporte de secuencias de escape */
|
||||
<stringliteral> ::= "\"" <text_double> "\"" | "'" <text_single> "'"
|
||||
<escape_sequence> ::= "\\" ( "\"" | "'" | "\\" | "n" | "t" | "r" | "0" )
|
||||
<text_double> ::= ( [^"\\] | <escape_sequence> )*
|
||||
<text_single> ::= ( [^'\\] | <escape_sequence> )*
|
||||
<identifier_or_string> ::= <identifier> | <stringliteral>
|
||||
|
||||
/* Reglas de Comentarios para el Lexer
|
||||
El lexer aplica longest-match: /// debe evaluarse ANTES que // */
|
||||
<doc_comment> ::= "///" <any_text>
|
||||
<line_comment> ::= "//" <any_text>
|
||||
<block_comment> ::= "/*" <any_content> "*/"
|
||||
<any_text> ::= [^\r\n]*
|
||||
<any_content> ::= /* Cualquier secuencia de caracteres que no contenga la subcadena "*/" */
|
||||
|
|
@ -1,5 +1,15 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "e784361f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
|
|
@ -171,16 +181,41 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"id": "d0a3051f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = \"Hello,Alberto\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "a10a1017",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"bnf_text = r\"\"\"\n",
|
||||
"<assign> ::= <name> <num>\n",
|
||||
"<name> ::= a | b | c\n",
|
||||
"<num> ::= [0-9]\n",
|
||||
"\"\"\""
|
||||
"result_regex = re.match(r'^Hello,[A-Za-z]+$', result)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "06cd296f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Result matches BNF\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"if result_regex:\n",
|
||||
" print(\"Result matches BNF\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -0,0 +1,450 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "5b646fb1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"! uv pip install bnf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "274d6d68",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"! uv pip install ebnf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "0a8abbfa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"from dataclasses import dataclass\n",
|
||||
"import pprint\n",
|
||||
"from typing import Any, Dict, List, Optional, Tuple\n",
|
||||
"from lark import Tree, Lark\n",
|
||||
"from bnf import grammar as bnf_grammar, parse as bnf_parse\n",
|
||||
"from src.config import settings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "baa779f3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Functions"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "26927d0c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def bnf_to_lark(bnf_text):\n",
|
||||
" text = re.sub(r\"<([^>]+)>\", r\"\\1\", bnf_text) # remove <>\n",
|
||||
" text = text.replace(\"::=\", \":\")\n",
|
||||
" return text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "89be8bf6",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@dataclass\n",
|
||||
"class Chunk:\n",
|
||||
" text: str\n",
|
||||
" kind: str\n",
|
||||
" metadata: Dict[str, Any]\n",
|
||||
"\n",
|
||||
"def _span(node: Tree) -> Optional[Tuple[int, int]]:\n",
|
||||
" m = node.meta\n",
|
||||
" s = getattr(m, \"start_pos\", None)\n",
|
||||
" e = getattr(m, \"end_pos\", None)\n",
|
||||
" if s is None or e is None:\n",
|
||||
" return None\n",
|
||||
" return s, e\n",
|
||||
"\n",
|
||||
"def _iter_trees(t: Tree):\n",
|
||||
" yield t\n",
|
||||
" for c in t.children:\n",
|
||||
" if isinstance(c, Tree):\n",
|
||||
" yield from _iter_trees(c)\n",
|
||||
"\n",
|
||||
"def _cmd_name(line: str) -> Optional[str]:\n",
|
||||
" m = re.match(r\"^\\s*([A-Za-z_][A-Za-z0-9_]*)\\s*\\(\", line)\n",
|
||||
" return m.group(1) if m else None\n",
|
||||
"\n",
|
||||
"def chunk_atomic_lines(code: str) -> List[Chunk]:\n",
|
||||
" tree = parser.parse(code)\n",
|
||||
" chunks: List[Chunk] = []\n",
|
||||
"\n",
|
||||
" for node in _iter_trees(tree):\n",
|
||||
" if node.data == \"stmt_line\":\n",
|
||||
" sp = _span(node)\n",
|
||||
" if not sp:\n",
|
||||
" continue\n",
|
||||
" s, e = sp\n",
|
||||
" text = code[s:e].strip()\n",
|
||||
" if not text:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" chunks.append(\n",
|
||||
" Chunk(\n",
|
||||
" text=text,\n",
|
||||
" kind=\"line\",\n",
|
||||
" metadata={\n",
|
||||
" \"granularity\": \"atomic\",\n",
|
||||
" \"command\": _cmd_name(text)\n",
|
||||
" }\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" return chunks\n",
|
||||
"\n",
|
||||
"def chunk_blocks(code: str) -> List[Chunk]:\n",
|
||||
" tree = parser.parse(code)\n",
|
||||
" chunks: List[Chunk] = []\n",
|
||||
"\n",
|
||||
" for node in _iter_trees(tree):\n",
|
||||
" if node.data in (\"if_block\", \"loop_block\", \"try_block\", \"go_async_block\", \"function_block\"):\n",
|
||||
" sp = _span(node)\n",
|
||||
" if not sp:\n",
|
||||
" continue\n",
|
||||
" s, e = sp\n",
|
||||
" text = code[s:e].strip()\n",
|
||||
" if not text:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" chunks.append(\n",
|
||||
" Chunk(\n",
|
||||
" text=text,\n",
|
||||
" kind=node.data,\n",
|
||||
" metadata={\"granularity\": \"block\"}\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" return chunks\n",
|
||||
"\n",
|
||||
"def chunk_avap_code(code: str) -> List[Chunk]:\n",
|
||||
" # Keep original offsets: do NOT lstrip. Grammar already accepts leading _NL.\n",
|
||||
" blocks = chunk_blocks(code)\n",
|
||||
" lines = chunk_atomic_lines(code)\n",
|
||||
" return blocks + lines"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "23a92e13",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BNF to Lark"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "bde351ba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"/home/pseco/VsCodeProjects/assistance-engine/scratches/pseco/ingestion/Code Ingestion\n",
|
||||
"True\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"print(Path.cwd())\n",
|
||||
"print(Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\").exists())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "c66842c7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"# Load BNF grammar from file\n",
|
||||
"bnf_path = Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\")\n",
|
||||
"if not bnf_path.exists():\n",
|
||||
" raise FileNotFoundError(f\"BNF file not found: {bnf_path}\")\n",
|
||||
"\n",
|
||||
"bnf_grammar: str = bnf_path.read_text(encoding=\"utf-8\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"id": "07bb32cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lark_bnf = bnf_to_lark(bnf_grammar)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "8122b603",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"('program : ( line | block_comment )*\\n'\n",
|
||||
" 'line : [ statement ] [ line_comment | doc_comment ] EOL\\n'\n",
|
||||
" ' | ( line_comment | doc_comment ) EOL\\n'\n",
|
||||
" 'EOL : /* Retorno de carro / Salto de línea (\\\\n o \\\\r\\\\n) */\\n'\n",
|
||||
" '\\n'\n",
|
||||
" 'statement : assignment\\n'\n",
|
||||
" ' | method_call_stmt\\n'\n",
|
||||
" ' | function_call_stmt\\n'\n",
|
||||
" ' | function_decl\\n'\n",
|
||||
" ' | return_stmt\\n'\n",
|
||||
" ' | system_command\\n'\n",
|
||||
" ' | io_command\\n'\n",
|
||||
" ' | control_flow\\n'\n",
|
||||
" ' | async_command\\n'\n",
|
||||
" ' | connector_cmd\\n'\n",
|
||||
" ' | db_command\\n'\n",
|
||||
" ' | http_command\\n'\n",
|
||||
" ' | util_command\\n'\n",
|
||||
" ' | modularity_cmd\\n'\n",
|
||||
" '\\n'\n",
|
||||
" 'assignment : identifier \"=\" expression\\n'\n",
|
||||
" '\\n'\n",
|
||||
" '/* Llamada a función global (sin receptor de objeto) */\\n'\n",
|
||||
" 'function_call_stmt : identifier \"(\" [argument_list] \")\"\\n'\n",
|
||||
" '\\n'\n",
|
||||
" '/* Llamada a método sobre un objeto conector (con receptor) */\\n'\n",
|
||||
" 'method_call_stmt : identifier \"=\" identifier \".\" identifier \"(\" '\n",
|
||||
" '[argument_list] \")\"\\n'\n",
|
||||
" '\\n'\n",
|
||||
" 'system_command : register_cmd | addvar_cmd\\n'\n",
|
||||
" 'register_cmd : \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" '\n",
|
||||
" 'list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\\n'\n",
|
||||
" '/* addVar asigna un valor a una variable. Acepta (valor, variable) o '\n",
|
||||
" '(variable, valor).\\n'\n",
|
||||
" ' Si ambos argumentos son identificadores, el valor del segundo se asigna '\n",
|
||||
" 'al primero.\\n'\n",
|
||||
" ' No está permitido pasar dos literales como argumentos. */\\n'\n",
|
||||
" 'addvar_cmd : \"addVar(\" addvar_arg \",\" addvar_arg \")\"\\n'\n",
|
||||
" 'addvar_arg : identifier | literal | \"$\" identifier\\n'\n",
|
||||
" '/* Restricción semántica: al menos uno de los dos addvar_arg debe ser '\n",
|
||||
" 'identifier */\\n'\n",
|
||||
" '\\n'\n",
|
||||
" 'identifier : [a-zA-Z_] [a-zA-Z0-9_]*\\n'\n",
|
||||
" '\\n'\n",
|
||||
" '/* Variables de sistema reservadas — accesibles y asignables desde cualquier '\n",
|
||||
" 'scope:\\n'\n",
|
||||
" ' _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = '\n",
|
||||
" '404) */\\n'\n",
|
||||
" 'system_variable : \"_status\"')\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"pprint.PrettyPrinter().pprint(lark_bnf)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "993a3d63",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "GrammarError",
|
||||
"evalue": "Unexpected input at line 4 column 52 in <string>: \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mUnexpectedCharacters\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:952\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 951\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m tree = \u001b[43m_get_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parser_frontends.py:106\u001b[39m, in \u001b[36mParsingFrontend.parse\u001b[39m\u001b[34m(self, text, start, on_error)\u001b[39m\n\u001b[32m 105\u001b[39m kw = {} \u001b[38;5;28;01mif\u001b[39;00m on_error \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {\u001b[33m'\u001b[39m\u001b[33mon_error\u001b[39m\u001b[33m'\u001b[39m: on_error}\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchosen_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:41\u001b[39m, in \u001b[36mLALR_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, on_error)\u001b[39m\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedInput \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:171\u001b[39m, in \u001b[36m_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, value_stack, state_stack, start_interactive)\u001b[39m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m InteractiveParser(\u001b[38;5;28mself\u001b[39m, parser_state, parser_state.lexer)\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_from_state\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:188\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 187\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:178\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 177\u001b[39m token = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m178\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlex\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfeed_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:388\u001b[39m, in \u001b[36mTraditionalLexer.lex\u001b[39m\u001b[34m(self, state, parser_state)\u001b[39m\n\u001b[32m 387\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m388\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnext_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:398\u001b[39m, in \u001b[36mTraditionalLexer.next_token\u001b[39m\u001b[34m(self, lex_state, parser_state)\u001b[39m\n\u001b[32m 397\u001b[39m allowed = {\u001b[33m\"\u001b[39m\u001b[33m<END-OF-FILE>\u001b[39m\u001b[33m\"\u001b[39m}\n\u001b[32m--> \u001b[39m\u001b[32m398\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,\n\u001b[32m 399\u001b[39m allowed=allowed, token_history=lex_state.last_token \u001b[38;5;129;01mand\u001b[39;00m [lex_state.last_token],\n\u001b[32m 400\u001b[39m state=parser_state, terminals_by_name=\u001b[38;5;28mself\u001b[39m.terminals_by_name)\n\u001b[32m 402\u001b[39m value, type_ = res\n",
|
||||
"\u001b[31mUnexpectedCharacters\u001b[39m: No terminal matches 'í' in the current parser context, at line 4 col 52\n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\nExpected one of: \n\t* TERMINAL\n\t* _IGNORE\n\t* _IMPORT\n\t* OP\n\t* _RBRACE\n\t* _TO\n\t* _COMMA\n\t* _OVERRIDE\n\t* _RPAR\n\t* STRING\n\t* RULE\n\t* _DECLARE\n\t* _LBRACE\n\t* _DOTDOT\n\t* _EXTEND\n\t* _LPAR\n\t* _NL\n\t* _RBRA\n\t* NUMBER\n\t* _LBRA\n\t* _COLON\n\t* REGEXP\n\t* _NL_OR\n\t* _DOT\n\t* TILDE\n\t* _OR\n\nPrevious tokens: Token('RULE', 'l')\n",
|
||||
"\nDuring handling of the above exception, another exception occurred:\n",
|
||||
"\u001b[31mGrammarError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m parser = \u001b[43mLark\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlark_bnf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlalr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstart\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lark.py:300\u001b[39m, in \u001b[36mLark.__init__\u001b[39m\u001b[34m(self, grammar, **options)\u001b[39m\n\u001b[32m 296\u001b[39m \u001b[38;5;28mself\u001b[39m.options = old_options\n\u001b[32m 299\u001b[39m \u001b[38;5;66;03m# Parse the grammar file and compose the grammars\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m300\u001b[39m \u001b[38;5;28mself\u001b[39m.grammar, used_files = \u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mkeep_all_tokens\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 301\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 302\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(grammar, Grammar)\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1352\u001b[39m, in \u001b[36mload_grammar\u001b[39m\u001b[34m(grammar, source, import_paths, global_keep_all_tokens)\u001b[39m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(grammar, source, import_paths, global_keep_all_tokens):\n\u001b[32m 1351\u001b[39m builder = GrammarBuilder(global_keep_all_tokens, import_paths)\n\u001b[32m-> \u001b[39m\u001b[32m1352\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m builder.build(), builder.used_files\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1185\u001b[39m, in \u001b[36mGrammarBuilder.load_grammar\u001b[39m\u001b[34m(self, grammar_text, grammar_name, mangle)\u001b[39m\n\u001b[32m 1184\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(\u001b[38;5;28mself\u001b[39m, grammar_text, grammar_name=\u001b[33m\"\u001b[39m\u001b[33m<?>\u001b[39m\u001b[33m\"\u001b[39m, mangle=\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m-> \u001b[39m\u001b[32m1185\u001b[39m tree = \u001b[43m_parse_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrammar_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1187\u001b[39m imports = {}\n\u001b[32m 1188\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m stmt \u001b[38;5;129;01min\u001b[39;00m tree.children:\n",
|
||||
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:955\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 954\u001b[39m context = e.get_context(text)\n\u001b[32m--> \u001b[39m\u001b[32m955\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GrammarError(\u001b[33m\"\u001b[39m\u001b[33mUnexpected input at line \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m column \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 956\u001b[39m (e.line, e.column, name, context))\n\u001b[32m 957\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedToken \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 958\u001b[39m context = e.get_context(text)\n",
|
||||
"\u001b[31mGrammarError\u001b[39m: Unexpected input at line 4 column 52 in <string>: \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "49953efd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# BNF conversion to EBNF"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "32dbc2c5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# EBNF Check"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"id": "37968906",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ebnf_text = r\"\"\"\n",
|
||||
"assign ::= name '=' num ;\n",
|
||||
"name ::= 'a' | 'b' | 'c' ;\n",
|
||||
"num ::= [0-9] ;\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 64,
|
||||
"id": "b234f2c4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"BNF: True\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Generating LALR tables\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ebnf_grammar(ebnf_text)\n",
|
||||
"print(\"BNF:\", ebnf_parse(\"a=7\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66fb8fee",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Lark check EBNF Style"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 54,
|
||||
"id": "08e53ccb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ebnf_text = r\"\"\"\n",
|
||||
"start: assign\n",
|
||||
"\n",
|
||||
"assign: name \"=\" num\n",
|
||||
"name: \"a\" | \"b\" | \"c\"\n",
|
||||
"num: DIGIT\n",
|
||||
"\n",
|
||||
"DIGIT: /[0-9]/\n",
|
||||
"\n",
|
||||
"%ignore \" \"\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"id": "52935608",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'assign'), [Tree(Token('RULE', 'name'), []), Tree(Token('RULE', 'num'), [Token('DIGIT', '7')])])])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"parser = Lark(ebnf_text)\n",
|
||||
"\n",
|
||||
"print(parser.parse(\"a=7\"))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "assistance-engine",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.12.11"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Loading…
Reference in New Issue