{ "cells": [ { "cell_type": "code", "execution_count": 7, "id": "5b646fb1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n", "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n" ] } ], "source": [ "! uv pip install bnf" ] }, { "cell_type": "code", "execution_count": 8, "id": "274d6d68", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n", "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n" ] } ], "source": [ "! uv pip install ebnf" ] }, { "cell_type": "code", "execution_count": 22, "id": "0a8abbfa", "metadata": {}, "outputs": [], "source": [ "import re\n", "from dataclasses import dataclass\n", "import pprint\n", "from typing import Any, Dict, List, Optional, Tuple\n", "from lark import Tree, Lark\n", "from bnf import grammar as bnf_grammar, parse as bnf_parse\n", "from src.config import settings" ] }, { "cell_type": "markdown", "id": "baa779f3", "metadata": {}, "source": [ "# Functions" ] }, { "cell_type": "code", "execution_count": 10, "id": "26927d0c", "metadata": {}, "outputs": [], "source": [ "def bnf_to_lark(bnf_text):\n", " text = re.sub(r\"<([^>]+)>\", r\"\\1\", bnf_text) # remove <>\n", " text = text.replace(\"::=\", \":\")\n", " return text" ] }, { "cell_type": "code", "execution_count": 11, "id": "89be8bf6", "metadata": {}, "outputs": [], "source": [ "@dataclass\n", "class Chunk:\n", " text: str\n", " kind: str\n", " metadata: Dict[str, Any]\n", "\n", "def _span(node: Tree) -> Optional[Tuple[int, int]]:\n", " m = node.meta\n", " s = getattr(m, \"start_pos\", None)\n", " e = getattr(m, \"end_pos\", None)\n", " if s is None or e is None:\n", " return None\n", " return s, e\n", "\n", "def _iter_trees(t: Tree):\n", " yield t\n", " for c in t.children:\n", " if isinstance(c, Tree):\n", " yield from _iter_trees(c)\n", "\n", "def _cmd_name(line: str) -> Optional[str]:\n", " m = re.match(r\"^\\s*([A-Za-z_][A-Za-z0-9_]*)\\s*\\(\", line)\n", " return m.group(1) if m else None\n", "\n", "def chunk_atomic_lines(code: str) -> List[Chunk]:\n", " tree = parser.parse(code)\n", " chunks: List[Chunk] = []\n", "\n", " for node in _iter_trees(tree):\n", " if node.data == \"stmt_line\":\n", " sp = _span(node)\n", " if not sp:\n", " continue\n", " s, e = sp\n", " text = code[s:e].strip()\n", " if not text:\n", " continue\n", "\n", " chunks.append(\n", " Chunk(\n", " text=text,\n", " kind=\"line\",\n", " metadata={\n", " \"granularity\": \"atomic\",\n", " \"command\": _cmd_name(text)\n", " }\n", " )\n", " )\n", " return chunks\n", "\n", "def chunk_blocks(code: str) -> List[Chunk]:\n", " tree = parser.parse(code)\n", " chunks: List[Chunk] = []\n", "\n", " for node in _iter_trees(tree):\n", " if node.data in (\"if_block\", \"loop_block\", \"try_block\", \"go_async_block\", \"function_block\"):\n", " sp = _span(node)\n", " if not sp:\n", " continue\n", " s, e = sp\n", " text = code[s:e].strip()\n", " if not text:\n", " continue\n", "\n", " chunks.append(\n", " Chunk(\n", " text=text,\n", " kind=node.data,\n", " metadata={\"granularity\": \"block\"}\n", " )\n", " )\n", " return chunks\n", "\n", "def chunk_avap_code(code: str) -> List[Chunk]:\n", " # Keep original offsets: do NOT lstrip. Grammar already accepts leading _NL.\n", " blocks = chunk_blocks(code)\n", " lines = chunk_atomic_lines(code)\n", " return blocks + lines" ] }, { "cell_type": "markdown", "id": "23a92e13", "metadata": {}, "source": [ "# BNF to Lark" ] }, { "cell_type": "code", "execution_count": 12, "id": "bde351ba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/home/pseco/VsCodeProjects/assistance-engine/scratches/pseco/ingestion/Code Ingestion\n", "True\n" ] } ], "source": [ "from pathlib import Path\n", "print(Path.cwd())\n", "print(Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\").exists())" ] }, { "cell_type": "code", "execution_count": 13, "id": "c66842c7", "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "\n", "# Load BNF grammar from file\n", "bnf_path = Path(settings.proj_root / \"ingestion/code/BNF/n01_BNF.txt\")\n", "if not bnf_path.exists():\n", " raise FileNotFoundError(f\"BNF file not found: {bnf_path}\")\n", "\n", "bnf_grammar: str = bnf_path.read_text(encoding=\"utf-8\")" ] }, { "cell_type": "code", "execution_count": 19, "id": "07bb32cb", "metadata": {}, "outputs": [], "source": [ "lark_bnf = bnf_to_lark(bnf_grammar)" ] }, { "cell_type": "code", "execution_count": 24, "id": "8122b603", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('program : ( line | block_comment )*\\n'\n", " 'line : [ statement ] [ line_comment | doc_comment ] EOL\\n'\n", " ' | ( line_comment | doc_comment ) EOL\\n'\n", " 'EOL : /* Retorno de carro / Salto de línea (\\\\n o \\\\r\\\\n) */\\n'\n", " '\\n'\n", " 'statement : assignment\\n'\n", " ' | method_call_stmt\\n'\n", " ' | function_call_stmt\\n'\n", " ' | function_decl\\n'\n", " ' | return_stmt\\n'\n", " ' | system_command\\n'\n", " ' | io_command\\n'\n", " ' | control_flow\\n'\n", " ' | async_command\\n'\n", " ' | connector_cmd\\n'\n", " ' | db_command\\n'\n", " ' | http_command\\n'\n", " ' | util_command\\n'\n", " ' | modularity_cmd\\n'\n", " '\\n'\n", " 'assignment : identifier \"=\" expression\\n'\n", " '\\n'\n", " '/* Llamada a función global (sin receptor de objeto) */\\n'\n", " 'function_call_stmt : identifier \"(\" [argument_list] \")\"\\n'\n", " '\\n'\n", " '/* Llamada a método sobre un objeto conector (con receptor) */\\n'\n", " 'method_call_stmt : identifier \"=\" identifier \".\" identifier \"(\" '\n", " '[argument_list] \")\"\\n'\n", " '\\n'\n", " 'system_command : register_cmd | addvar_cmd\\n'\n", " 'register_cmd : \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" '\n", " 'list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\\n'\n", " '/* addVar asigna un valor a una variable. Acepta (valor, variable) o '\n", " '(variable, valor).\\n'\n", " ' Si ambos argumentos son identificadores, el valor del segundo se asigna '\n", " 'al primero.\\n'\n", " ' No está permitido pasar dos literales como argumentos. */\\n'\n", " 'addvar_cmd : \"addVar(\" addvar_arg \",\" addvar_arg \")\"\\n'\n", " 'addvar_arg : identifier | literal | \"$\" identifier\\n'\n", " '/* Restricción semántica: al menos uno de los dos addvar_arg debe ser '\n", " 'identifier */\\n'\n", " '\\n'\n", " 'identifier : [a-zA-Z_] [a-zA-Z0-9_]*\\n'\n", " '\\n'\n", " '/* Variables de sistema reservadas — accesibles y asignables desde cualquier '\n", " 'scope:\\n'\n", " ' _status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = '\n", " '404) */\\n'\n", " 'system_variable : \"_status\"')\n" ] } ], "source": [ "pprint.PrettyPrinter().pprint(lark_bnf)" ] }, { "cell_type": "code", "execution_count": 26, "id": "993a3d63", "metadata": {}, "outputs": [ { "ename": "GrammarError", "evalue": "Unexpected input at line 4 column 52 in : \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mUnexpectedCharacters\u001b[39m Traceback (most recent call last)", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:952\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 951\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m tree = \u001b[43m_get_parser\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parser_frontends.py:106\u001b[39m, in \u001b[36mParsingFrontend.parse\u001b[39m\u001b[34m(self, text, start, on_error)\u001b[39m\n\u001b[32m 105\u001b[39m kw = {} \u001b[38;5;28;01mif\u001b[39;00m on_error \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m {\u001b[33m'\u001b[39m\u001b[33mon_error\u001b[39m\u001b[33m'\u001b[39m: on_error}\n\u001b[32m--> \u001b[39m\u001b[32m106\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mchosen_start\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:41\u001b[39m, in \u001b[36mLALR_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, on_error)\u001b[39m\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m41\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparser\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 42\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedInput \u001b[38;5;28;01mas\u001b[39;00m e:\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:171\u001b[39m, in \u001b[36m_Parser.parse\u001b[39m\u001b[34m(self, lexer, start, value_stack, state_stack, start_interactive)\u001b[39m\n\u001b[32m 170\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m InteractiveParser(\u001b[38;5;28mself\u001b[39m, parser_state, parser_state.lexer)\n\u001b[32m--> \u001b[39m\u001b[32m171\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mparse_from_state\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:188\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 187\u001b[39m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m188\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m 189\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/parsers/lalr_parser.py:178\u001b[39m, in \u001b[36m_Parser.parse_from_state\u001b[39m\u001b[34m(self, state)\u001b[39m\n\u001b[32m 177\u001b[39m token = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m178\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlexer\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlex\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfeed_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:388\u001b[39m, in \u001b[36mTraditionalLexer.lex\u001b[39m\u001b[34m(self, state, parser_state)\u001b[39m\n\u001b[32m 387\u001b[39m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m388\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mnext_token\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser_state\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lexer.py:398\u001b[39m, in \u001b[36mTraditionalLexer.next_token\u001b[39m\u001b[34m(self, lex_state, parser_state)\u001b[39m\n\u001b[32m 397\u001b[39m allowed = {\u001b[33m\"\u001b[39m\u001b[33m\u001b[39m\u001b[33m\"\u001b[39m}\n\u001b[32m--> \u001b[39m\u001b[32m398\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,\n\u001b[32m 399\u001b[39m allowed=allowed, token_history=lex_state.last_token \u001b[38;5;129;01mand\u001b[39;00m [lex_state.last_token],\n\u001b[32m 400\u001b[39m state=parser_state, terminals_by_name=\u001b[38;5;28mself\u001b[39m.terminals_by_name)\n\u001b[32m 402\u001b[39m value, type_ = res\n", "\u001b[31mUnexpectedCharacters\u001b[39m: No terminal matches 'í' in the current parser context, at line 4 col 52\n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\nExpected one of: \n\t* TERMINAL\n\t* _IGNORE\n\t* _IMPORT\n\t* OP\n\t* _RBRACE\n\t* _TO\n\t* _COMMA\n\t* _OVERRIDE\n\t* _RPAR\n\t* STRING\n\t* RULE\n\t* _DECLARE\n\t* _LBRACE\n\t* _DOTDOT\n\t* _EXTEND\n\t* _LPAR\n\t* _NL\n\t* _RBRA\n\t* NUMBER\n\t* _LBRA\n\t* _COLON\n\t* REGEXP\n\t* _NL_OR\n\t* _DOT\n\t* TILDE\n\t* _OR\n\nPrevious tokens: Token('RULE', 'l')\n", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[31mGrammarError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[26]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m parser = \u001b[43mLark\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlark_bnf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparser\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlalr\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstart\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/lark.py:300\u001b[39m, in \u001b[36mLark.__init__\u001b[39m\u001b[34m(self, grammar, **options)\u001b[39m\n\u001b[32m 296\u001b[39m \u001b[38;5;28mself\u001b[39m.options = old_options\n\u001b[32m 299\u001b[39m \u001b[38;5;66;03m# Parse the grammar file and compose the grammars\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m300\u001b[39m \u001b[38;5;28mself\u001b[39m.grammar, used_files = \u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msource_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mimport_paths\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mkeep_all_tokens\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 301\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 302\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(grammar, Grammar)\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1352\u001b[39m, in \u001b[36mload_grammar\u001b[39m\u001b[34m(grammar, source, import_paths, global_keep_all_tokens)\u001b[39m\n\u001b[32m 1350\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(grammar, source, import_paths, global_keep_all_tokens):\n\u001b[32m 1351\u001b[39m builder = GrammarBuilder(global_keep_all_tokens, import_paths)\n\u001b[32m-> \u001b[39m\u001b[32m1352\u001b[39m \u001b[43mbuilder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msource\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1353\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m builder.build(), builder.used_files\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:1185\u001b[39m, in \u001b[36mGrammarBuilder.load_grammar\u001b[39m\u001b[34m(self, grammar_text, grammar_name, mangle)\u001b[39m\n\u001b[32m 1184\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload_grammar\u001b[39m(\u001b[38;5;28mself\u001b[39m, grammar_text, grammar_name=\u001b[33m\"\u001b[39m\u001b[33m\u001b[39m\u001b[33m\"\u001b[39m, mangle=\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m-> \u001b[39m\u001b[32m1185\u001b[39m tree = \u001b[43m_parse_grammar\u001b[49m\u001b[43m(\u001b[49m\u001b[43mgrammar_text\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgrammar_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1187\u001b[39m imports = {}\n\u001b[32m 1188\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m stmt \u001b[38;5;129;01min\u001b[39;00m tree.children:\n", "\u001b[36mFile \u001b[39m\u001b[32m~/VsCodeProjects/assistance-engine/.venv/lib/python3.12/site-packages/lark/load_grammar.py:955\u001b[39m, in \u001b[36m_parse_grammar\u001b[39m\u001b[34m(text, name, start)\u001b[39m\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedCharacters \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 954\u001b[39m context = e.get_context(text)\n\u001b[32m--> \u001b[39m\u001b[32m955\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m GrammarError(\u001b[33m\"\u001b[39m\u001b[33mUnexpected input at line \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m column \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[33m in \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m: \u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[33m\"\u001b[39m %\n\u001b[32m 956\u001b[39m (e.line, e.column, name, context))\n\u001b[32m 957\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m UnexpectedToken \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 958\u001b[39m context = e.get_context(text)\n", "\u001b[31mGrammarError\u001b[39m: Unexpected input at line 4 column 52 in : \n\n : /* Retorno de carro / Salto de línea (\\n o \\r\\n) */\n ^\n" ] } ], "source": [ "parser = Lark(lark_bnf, parser=\"lalr\", start=\"start\")" ] }, { "cell_type": "markdown", "id": "49953efd", "metadata": {}, "source": [ "# BNF conversion to EBNF" ] }, { "cell_type": "markdown", "id": "32dbc2c5", "metadata": {}, "source": [ "# EBNF Check" ] }, { "cell_type": "code", "execution_count": 63, "id": "37968906", "metadata": {}, "outputs": [], "source": [ "ebnf_text = r\"\"\"\n", "assign ::= name '=' num ;\n", "name ::= 'a' | 'b' | 'c' ;\n", "num ::= [0-9] ;\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 64, "id": "b234f2c4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BNF: True\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Generating LALR tables\n" ] } ], "source": [ "ebnf_grammar(ebnf_text)\n", "print(\"BNF:\", ebnf_parse(\"a=7\"))" ] }, { "cell_type": "markdown", "id": "66fb8fee", "metadata": {}, "source": [ "# Lark check EBNF Style" ] }, { "cell_type": "code", "execution_count": 54, "id": "08e53ccb", "metadata": {}, "outputs": [], "source": [ "ebnf_text = r\"\"\"\n", "start: assign\n", "\n", "assign: name \"=\" num\n", "name: \"a\" | \"b\" | \"c\"\n", "num: DIGIT\n", "\n", "DIGIT: /[0-9]/\n", "\n", "%ignore \" \"\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 55, "id": "52935608", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tree(Token('RULE', 'start'), [Tree(Token('RULE', 'assign'), [Tree(Token('RULE', 'name'), []), Tree(Token('RULE', 'num'), [Token('DIGIT', '7')])])])\n" ] } ], "source": [ "parser = Lark(ebnf_text)\n", "\n", "print(parser.parse(\"a=7\"))" ] } ], "metadata": { "kernelspec": { "display_name": "assistance-engine", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }