451 lines
24 KiB
Plaintext
451 lines
24 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "5b646fb1",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
|
|
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"! uv pip install bnf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "274d6d68",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
|
|
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"! uv pip install ebnf"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"id": "0a8abbfa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import re\n",
|
|
"from dataclasses import dataclass\n",
|
|
"import pprint\n",
|
|
"from typing import Any, Dict, List, Optional, Tuple\n",
|
|
"from lark import Tree, Lark\n",
|
|
"from bnf import grammar as bnf_grammar, parse as bnf_parse\n",
|
|
"from src.config import settings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "baa779f3",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Functions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "26927d0c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def bnf_to_lark(bnf_text):\n",
|
|
" text = re.sub(r\"<([^>]+)>\", r\"\\1\", bnf_text) # remove <>\n",
|
|
" text = text.replace(\"::=\", \":\")\n",
|
|
" return text"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "89be8bf6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"@dataclass\n",
|
|
"class Chunk:\n",
|
|
" text: str\n",
|
|
" kind: str\n",
|
|
" metadata: Dict[str, Any]\n",
|
|
"\n",
|
|
"def _span(node: Tree) -> Optional[Tuple[int, int]]:\n",
|
|
" m = node.meta\n",
|
|
" s = getattr(m, \"start_pos\", None)\n",
|
|
" e = getattr(m, \"end_pos\", None)\n",
|
|
" if s is None or e is None:\n",
|
|
" return None\n",
|
|
" return s, e\n",
|
|
"\n",
|
|
"def _iter_trees(t: Tree):\n",
|
|
" yield t\n",
|
|
" for c in t.children:\n",
|
|
" if isinstance(c, Tree):\n",
|
|
" yield from _iter_trees(c)\n",
|
|
"\n",
|
|
"def _cmd_name(line: str) -> Optional[str]:\n",
|
|
" m = re.match(r\"^\\s*([A-Za-z_][A-Za-z0-9_]*)\\s*\\(\", line)\n",
|
|
" return m.group(1) if m else None\n",
|
|
"\n",
|
|
"def chunk_atomic_lines(code: str) -> List[Chunk]:\n",
|
|
" tree = parser.parse(code)\n",
|
|
" chunks: List[Chunk] = []\n",
|
|
"\n",
|
|
" for node in _iter_trees(tree):\n",
|
|
" if node.data == \"stmt_line\":\n",
|
|
" sp = _span(node)\n",
|
|
" if not sp:\n",
|
|
" continue\n",
|
|
" s, e = sp\n",
|
|
" text = code[s:e].strip()\n",
|
|
" if not text:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" chunks.append(\n",
|
|
" Chunk(\n",
|
|
" text=text,\n",
|
|
" kind=\"line\",\n",
|
|
" metadata={\n",
|
|
" \"granularity\": \"atomic\",\n",
|
|
" \"command\": _cmd_name(text)\n",
|
|
" }\n",
|
|
" )\n",
|
|
" )\n",
|
|
" return chunks\n",
|
|
"\n",
|
|
"def chunk_blocks(code: str) -> List[Chunk]:\n",
|
|
" tree = parser.parse(code)\n",
|
|
" chunks: List[Chunk] = []\n",
|
|
"\n",
|
|
" for node in _iter_trees(tree):\n",
|
|
" if node.data in (\"if_block\", \"loop_block\", \"try_block\", \"go_async_block\", \"function_block\"):\n",
|
|
" sp = _span(node)\n",
|
|
" if not sp:\n",
|
|
" continue\n",
|
|
" s, e = sp\n",
|
|
" text = code[s:e].strip()\n",
|
|
" if not text:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" chunks.append(\n",
|
|
" Chunk(\n",
|
|
" text=text,\n",
|
|
" kind=node.data,\n",
|
|
" metadata={\"granularity\": \"block\"}\n",
|
|
" )\n",
|
|
" )\n",
|
|
" return chunks\n",
|
|
"\n",
|
|
"def chunk_avap_code(code: str) -> List[Chunk]:\n",
|
|
" # Keep original offsets: do NOT lstrip. Grammar already accepts leading _NL.\n",
|
|
" blocks = chunk_blocks(code)\n",
|
|
" lines = chunk_atomic_lines(code)\n",
|
|
" return blocks + lines"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "23a92e13",
|
|
"metadata": {},
|
|
"source": [
|
|
"# BNF to Lark"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "bde351ba",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/home/pseco/VsCodeProjects/assistance-engine/scratches/pseco/ingestion/Code Ingestion\n",
|
|
"True\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from pathlib import Path\n",
|
|
"print(Path.cwd())\n",
|
|
"print(Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\").exists())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "c66842c7",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"# Load BNF grammar from file\n",
|
|
"bnf_path = Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\")\n",
|
|
"if not bnf_path.exists():\n",
|
|
" raise FileNotFoundError(f\"BNF file not found: {bnf_path}\")\n",
|
|
"\n",
|
|
"bnf_grammar: str = bnf_path.read_text(encoding=\"utf-8\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "07bb32cb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"lark_bnf = bnf_to_lark(bnf_grammar)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"id": "8122b603",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"('program : ( line | block_comment )*\\n'\n",
|
|
" 'line : [ statement ] [ line_comment | doc_comment ] EOL\\n'\n",
|
|
" ' | ( line_comment | doc_comment ) EOL\\n'\n",
|
|
" 'EOL : /* Retorno de carro / Salto de línea (\\\\n o \\\\r\\\\n) */\\n'\n",
|
|
" '\\n'\n",
|
|
" 'statement : assignment\\n'\n",
|
|
" ' | method_call_stmt\\n'\n",
|
|
" ' | function_call_stmt\\n'\n",
|
|
" ' | function_decl\\n'\n",
|
|
" ' | return_stmt\\n'\n",
|
|
" ' | system_command\\n'\n",
|
|
" ' | io_command\\n'\n",
|
|
" ' | control_flow\\n'\n",
|
|
" ' | async_command\\n'\n",
|
|
" ' | connector_cmd\\n'\n",
|
|
" ' | db_command\\n'\n",
|
|
" ' | http_command\\n'\n",
|
|
" ' | util_command\\n'\n",
|
|
" ' | modularity_cmd\\n'\n",
|
|
" '\\n'\n",
|
|
" 'assignment : identifier \"=\" expression\\n'\n",
|
|
" '\\n'\n",
|
|
" '/* Llamada a función global (sin receptor de objeto) */\\n'\n",
|
|
" 'function_call_stmt : identifier \"(\" [argument_list] \")\"\\n'\n",
|
|
" '\\n'\n",
|
|
" '/* Llamada a método sobre un objeto conector (con receptor) */\\n'\n",
|
|
" 'method_call_stmt : identifier \"=\" identifier \".\" identifier \"(\" '\n",
|
|
" '[argument_list] \")\"\\n'\n",
|
|
" '\\n'\n",
|
|
" 'system_command : register_cmd | addvar_cmd\\n'\n",
|
|
" 'register_cmd : \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" '\n",
|
|
" 'list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\\n'\n",
|
|
" '/* addVar asigna un valor a una variable. Acepta (valor, variable) o '\n",
|
|
" '(variable, valor).\\n'\n",
|
|
" ' Si ambos argumentos son identificadores, el valor del segundo se asigna '\n",
|
|
" 'al primero.\\n'\n",
|
|
" ' No está permitido pasar dos literales como argumentos. */\\n'\n",
|
|
" 'addvar_cmd : \"addVar(\" addvar_arg \",\" addvar_arg \")\"\\n'\n",
|
|
" 'addvar_arg : identifier | literal | \"$\" identifier\\n'\n",
|
|
" '/* Restricción semántica: al menos uno de los dos addvar_arg debe ser '\n",
|
|
" 'identifier */\\n'\n",
|
|
" '\\n'\n",
|
|
" 'identifier : [a-zA-Z_] [a-zA-Z0-9_]*\\n'\n",
|
|
" '\\n'\n",
|
|
" '/* Variables de sistema reservadas — accesibles y asignables desde cualquier '\n",
|
|
" 'scope:\\n'\n",
|
|
" ' _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = '\n",
|
|
" '404) */\\n'\n",
|
|
" 'system_variable : \"_status\"')\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"pprint.PrettyPrinter().pprint(lark_bnf)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 26,
|
|
"id": "993a3d63",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "GrammarError",
|
|
"evalue": "Unexpected input at line 4 column 52 in <string>: \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
|
"\u001b[31mUnexpectedCharacters\u001b[39m Traceback (most recent call last)",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:952\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 951\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m tree = \u001b[43m_get_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parser_frontends.py:106\u001b[39m, in \u001b[36mParsingFrontend.parse\u001b[39m\u001b[34m(self, text, start, on_error)\u001b[39m\n\u001b[32m 105\u001b[39m kw = {} \u001b[38;5;28;01mif\u001b[39;00m on_error \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {\u001b[33m'\u001b[39m\u001b[33mon_error\u001b[39m\u001b[33m'\u001b[39m: on_error}\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchosen_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:41\u001b[39m, in \u001b[36mLALR_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, on_error)\u001b[39m\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedInput \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:171\u001b[39m, in \u001b[36m_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, value_stack, state_stack, start_interactive)\u001b[39m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m InteractiveParser(\u001b[38;5;28mself\u001b[39m, parser_state, parser_state.lexer)\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_from_state\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:188\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 187\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:178\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 177\u001b[39m token = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m178\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlex\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfeed_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:388\u001b[39m, in \u001b[36mTraditionalLexer.lex\u001b[39m\u001b[34m(self, state, parser_state)\u001b[39m\n\u001b[32m 387\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m388\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnext_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:398\u001b[39m, in \u001b[36mTraditionalLexer.next_token\u001b[39m\u001b[34m(self, lex_state, parser_state)\u001b[39m\n\u001b[32m 397\u001b[39m allowed = {\u001b[33m\"\u001b[39m\u001b[33m<END-OF-FILE>\u001b[39m\u001b[33m\"\u001b[39m}\n\u001b[32m--> \u001b[39m\u001b[32m398\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,\n\u001b[32m 399\u001b[39m allowed=allowed, token_history=lex_state.last_token \u001b[38;5;129;01mand\u001b[39;00m [lex_state.last_token],\n\u001b[32m 400\u001b[39m state=parser_state, terminals_by_name=\u001b[38;5;28mself\u001b[39m.terminals_by_name)\n\u001b[32m 402\u001b[39m value, type_ = res\n",
|
|
"\u001b[31mUnexpectedCharacters\u001b[39m: No terminal matches 'í' in the current parser context, at line 4 col 52\n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\nExpected one of: \n\t* TERMINAL\n\t* _IGNORE\n\t* _IMPORT\n\t* OP\n\t* _RBRACE\n\t* _TO\n\t* _COMMA\n\t* _OVERRIDE\n\t* _RPAR\n\t* STRING\n\t* RULE\n\t* _DECLARE\n\t* _LBRACE\n\t* _DOTDOT\n\t* _EXTEND\n\t* _LPAR\n\t* _NL\n\t* _RBRA\n\t* NUMBER\n\t* _LBRA\n\t* _COLON\n\t* REGEXP\n\t* _NL_OR\n\t* _DOT\n\t* TILDE\n\t* _OR\n\nPrevious tokens: Token('RULE', 'l')\n",
|
|
"\nDuring handling of the above exception, another exception occurred:\n",
|
|
"\u001b[31mGrammarError\u001b[39m Traceback (most recent call last)",
|
|
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m parser = \u001b[43mLark\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlark_bnf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlalr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstart\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lark.py:300\u001b[39m, in \u001b[36mLark.__init__\u001b[39m\u001b[34m(self, grammar, **options)\u001b[39m\n\u001b[32m 296\u001b[39m \u001b[38;5;28mself\u001b[39m.options = old_options\n\u001b[32m 299\u001b[39m \u001b[38;5;66;03m# Parse the grammar file and compose the grammars\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m300\u001b[39m \u001b[38;5;28mself\u001b[39m.grammar, used_files = \u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mkeep_all_tokens\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 301\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 302\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(grammar, Grammar)\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1352\u001b[39m, in \u001b[36mload_grammar\u001b[39m\u001b[34m(grammar, source, import_paths, global_keep_all_tokens)\u001b[39m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(grammar, source, import_paths, global_keep_all_tokens):\n\u001b[32m 1351\u001b[39m builder = GrammarBuilder(global_keep_all_tokens, import_paths)\n\u001b[32m-> \u001b[39m\u001b[32m1352\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m builder.build(), builder.used_files\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1185\u001b[39m, in \u001b[36mGrammarBuilder.load_grammar\u001b[39m\u001b[34m(self, grammar_text, grammar_name, mangle)\u001b[39m\n\u001b[32m 1184\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(\u001b[38;5;28mself\u001b[39m, grammar_text, grammar_name=\u001b[33m\"\u001b[39m\u001b[33m<?>\u001b[39m\u001b[33m\"\u001b[39m, mangle=\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m-> \u001b[39m\u001b[32m1185\u001b[39m tree = \u001b[43m_parse_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrammar_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1187\u001b[39m imports = {}\n\u001b[32m 1188\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m stmt \u001b[38;5;129;01min\u001b[39;00m tree.children:\n",
|
|
"\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:955\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 954\u001b[39m context = e.get_context(text)\n\u001b[32m--> \u001b[39m\u001b[32m955\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GrammarError(\u001b[33m\"\u001b[39m\u001b[33mUnexpected input at line \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m column \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 956\u001b[39m (e.line, e.column, name, context))\n\u001b[32m 957\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedToken \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 958\u001b[39m context = e.get_context(text)\n",
|
|
"\u001b[31mGrammarError\u001b[39m: Unexpected input at line 4 column 52 in <string>: \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "49953efd",
|
|
"metadata": {},
|
|
"source": [
|
|
"# BNF conversion to EBNF"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "32dbc2c5",
|
|
"metadata": {},
|
|
"source": [
|
|
"# EBNF Check"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 63,
|
|
"id": "37968906",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ebnf_text = r\"\"\"\n",
|
|
"assign ::= name '=' num ;\n",
|
|
"name ::= 'a' | 'b' | 'c' ;\n",
|
|
"num ::= [0-9] ;\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 64,
|
|
"id": "b234f2c4",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"BNF: True\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Generating LALR tables\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"ebnf_grammar(ebnf_text)\n",
|
|
"print(\"BNF:\", ebnf_parse(\"a=7\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "66fb8fee",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Lark check EBNF Style"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 54,
|
|
"id": "08e53ccb",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"ebnf_text = r\"\"\"\n",
|
|
"start: assign\n",
|
|
"\n",
|
|
"assign: name \"=\" num\n",
|
|
"name: \"a\" | \"b\" | \"c\"\n",
|
|
"num: DIGIT\n",
|
|
"\n",
|
|
"DIGIT: /[0-9]/\n",
|
|
"\n",
|
|
"%ignore \" \"\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 55,
|
|
"id": "52935608",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'assign'), [Tree(Token('RULE', 'name'), []), Tree(Token('RULE', 'num'), [Token('DIGIT', '7')])])])\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"parser = Lark(ebnf_text)\n",
|
|
"\n",
|
|
"print(parser.parse(\"a=7\"))"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "assistance-engine",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|