{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5b646fb1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n", "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n" ] } ], "source": [ "! uv pip install bnf" ] }, { "cell_type": "code", "execution_count": 2, "id": "274d6d68", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n", "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n" ] } ], "source": [ "! uv pip install ebnf" ] }, { "cell_type": "code", "execution_count": 49, "id": "0a8abbfa", "metadata": {}, "outputs": [], "source": [ "import re\n", "from dataclasses import dataclass\n", "import pprint\n", "from pathlib import Path\n", "from typing import Any, Dict, List, Optional, Tuple\n", "from lark import Tree, Lark\n", "from bnf import grammar as bnf_grammar, parse as bnf_parse\n", "from src.config import settings\n", "from lark import Lark" ] }, { "cell_type": "markdown", "id": "baa779f3", "metadata": {}, "source": [ "# Functions" ] }, { "cell_type": "markdown", "id": "23a92e13", "metadata": {}, "source": [ "# BNF to Lark" ] }, { "cell_type": "code", "execution_count": 60, "id": "93d2db25", "metadata": {}, "outputs": [], "source": [ "grammar = r\"\"\"\n", "start: program\n", " \n", "program: (line | BLOCK_COMMENT)*\n", " \n", "line: statement comment? EOL\n", "\n", " | comment EOL\n", "\n", " | EOL\n", " \n", "comment: DOC_COMMENT | LINE_COMMENT\n", " \n", "EOL: /\\r?\\n/\n", " \n", "DOC_COMMENT.2: /\\/\\/\\/[^\\r\\n]*/\n", "\n", "LINE_COMMENT.1: /\\/\\/[^\\r\\n]*/\n", "\n", "BLOCK_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//\n", " \n", "statement: assignment\n", "\n", " | function_decl\n", "\n", " | return_stmt\n", "\n", " | system_command\n", "\n", " | io_command\n", "\n", " | control_flow\n", "\n", " | async_command\n", "\n", " | connector_cmd\n", "\n", " | db_command\n", "\n", " | http_command\n", "\n", " | util_command\n", "\n", " | modularity_cmd\n", "\n", " | call_stmt\n", " \n", "assignment: identifier \"=\" expression\n", " \n", "call_stmt: identifier \"(\" argument_list? \")\"\n", "\n", " | identifier \"=\" identifier \".\" identifier \"(\" argument_list? \")\"\n", "\n", " | identifier \".\" identifier \"(\" argument_list? \")\"\n", " \n", "system_command: register_cmd\n", "\n", " | addvar_cmd\n", " \n", "register_cmd: \"registerEndpoint\" \"(\" stringliteral \",\" stringliteral \",\" list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\n", " \n", "addvar_cmd: \"addVar\" \"(\" addvar_arg \",\" addvar_arg \")\"\n", "\n", "addvar_arg: identifier\n", "\n", " | literal\n", "\n", " | \"$\" identifier\n", " \n", "identifier: IDENTIFIER\n", "\n", "system_variable: \"_status\"\n", " \n", "io_command: addparam_cmd\n", "\n", " | getlistlen_cmd\n", "\n", " | addresult_cmd\n", "\n", " | getparamlist_cmd\n", " \n", "addparam_cmd: \"addParam\" \"(\" stringliteral \",\" identifier \")\"\n", "\n", "getlistlen_cmd: \"getListLen\" \"(\" identifier \",\" identifier \")\"\n", "\n", "getparamlist_cmd: \"getQueryParamList\" \"(\" stringliteral \",\" identifier \")\"\n", "\n", "addresult_cmd: \"addResult\" \"(\" identifier \")\"\n", " \n", "control_flow: if_stmt\n", "\n", " | loop_stmt\n", "\n", " | try_stmt\n", " \n", "if_stmt: \"if\" \"(\" if_condition \")\" EOL block (\"else\" \"(\" \")\" EOL block)? \"end\" \"(\" \")\" EOL\n", " \n", "if_condition: if_atom \",\" if_atom \",\" stringliteral\n", "\n", " | \"None\" \",\" \"None\" \",\" stringliteral\n", " \n", "if_atom: identifier\n", "\n", " | literal\n", " \n", "loop_stmt: \"startLoop\" \"(\" identifier \",\" expression \",\" expression \")\" EOL block \"endLoop\" \"(\" \")\" EOL\n", " \n", "try_stmt: \"try\" \"(\" \")\" EOL block \"exception\" \"(\" identifier \")\" EOL block \"end\" \"(\" \")\" EOL\n", " \n", "block: line*\n", " \n", "async_command: go_stmt\n", "\n", " | gather_stmt\n", " \n", "go_stmt: identifier \"=\" \"go\" identifier \"(\" argument_list? \")\"\n", "\n", "gather_stmt: identifier \"=\" \"gather\" \"(\" identifier (\",\" expression)? \")\"\n", " \n", "connector_cmd: connector_instantiation\n", " \n", "connector_instantiation: identifier \"=\" \"avapConnector\" \"(\" stringliteral \")\"\n", " \n", "http_command: req_post_cmd\n", "\n", " | req_get_cmd\n", " \n", "req_post_cmd: \"RequestPost\" \"(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n", "\n", "req_get_cmd: \"RequestGet\" \"(\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n", " \n", "db_command: orm_direct\n", "\n", " | orm_check\n", "\n", " | orm_create\n", "\n", " | orm_select\n", "\n", " | orm_insert\n", "\n", " | orm_update\n", " \n", "orm_direct: \"ormDirect\" \"(\" expression \",\" identifier \")\"\n", "\n", "orm_check: \"ormCheckTable\" \"(\" expression \",\" identifier \")\"\n", "\n", "orm_create: \"ormCreateTable\" \"(\" expression \",\" expression \",\" expression \",\" identifier \")\"\n", " \n", "orm_select: \"ormAccessSelect\" \"(\" orm_fields \",\" expression (\",\" expression)? \",\" identifier \")\"\n", "\n", "orm_fields: \"*\"\n", "\n", " | expression\n", " \n", "orm_insert: \"ormAccessInsert\" \"(\" expression \",\" expression \",\" identifier \")\"\n", " \n", "orm_update: \"ormAccessUpdate\" \"(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \")\"\n", " \n", "util_command: json_list_cmd\n", "\n", " | crypto_cmd\n", "\n", " | regex_cmd\n", "\n", " | datetime_cmd\n", "\n", " | stamp_cmd\n", "\n", " | string_cmd\n", "\n", " | replace_cmd\n", " \n", "json_list_cmd: \"variableToList\" \"(\" expression \",\" identifier \")\"\n", "\n", " | \"itemFromList\" \"(\" identifier \",\" expression \",\" identifier \")\"\n", "\n", " | \"variableFromJSON\" \"(\" identifier \",\" expression \",\" identifier \")\"\n", "\n", " | \"AddVariableToJSON\" \"(\" expression \",\" expression \",\" identifier \")\"\n", " \n", "crypto_cmd: \"encodeSHA256\" \"(\" identifier_or_string \",\" identifier \")\"\n", "\n", " | \"encodeMD5\" \"(\" identifier_or_string \",\" identifier \")\"\n", " \n", "regex_cmd: \"getRegex\" \"(\" identifier \",\" stringliteral \",\" identifier \")\"\n", " \n", "datetime_cmd: \"getDateTime\" \"(\" stringliteral \",\" expression \",\" stringliteral \",\" identifier \")\"\n", " \n", "stamp_cmd: \"stampToDatetime\" \"(\" expression \",\" stringliteral \",\" expression \",\" identifier \")\"\n", "\n", " | \"getTimeStamp\" \"(\" stringliteral \",\" stringliteral \",\" expression \",\" identifier \")\"\n", " \n", "string_cmd: \"randomString\" \"(\" expression \",\" identifier \")\"\n", " \n", "replace_cmd: \"replace\" \"(\" identifier_or_string \",\" stringliteral \",\" stringliteral \",\" identifier \")\"\n", " \n", "function_decl: \"function\" identifier \"(\" param_list? \")\" \"{\" EOL block \"}\" EOL\n", " \n", "param_list: identifier (\",\" identifier)*\n", " \n", "return_stmt: \"return\" \"(\" expression? \")\"\n", " \n", "modularity_cmd: include_stmt\n", "\n", " | import_stmt\n", " \n", "include_stmt: \"include\" stringliteral\n", "\n", "import_stmt: \"import\" (\"<\" identifier \">\" | stringliteral)\n", " \n", "?expression: logical_or\n", " \n", "?logical_or: logical_and (\"or\" logical_and)*\n", "\n", "?logical_and: logical_not (\"and\" logical_not)*\n", "\n", "?logical_not: \"not\" logical_not\n", "\n", " | comparison\n", " \n", "?comparison: arithmetic (comp_op arithmetic)*\n", "\n", "comp_op: \"==\" | \"!=\" | \"<\" | \">\" | \"<=\" | \">=\" | \"in\" | \"is\"\n", " \n", "?arithmetic: term ((\"+\" | \"-\") term)*\n", "\n", "?term: factor ((\"*\" | \"/\" | \"%\") factor)*\n", "\n", "?factor: (\"+\" | \"-\") factor\n", "\n", " | power\n", " \n", "?power: primary (\"**\" factor)?\n", " \n", "?primary: atom postfix*\n", " \n", "postfix: \".\" identifier\n", "\n", " | \"[\" expression \"]\"\n", "\n", " | \"[\" expression? \":\" expression? (\":\" expression?)? \"]\"\n", "\n", " | \"(\" argument_list? \")\"\n", " \n", "?atom: identifier\n", "\n", " | \"$\" identifier\n", "\n", " | literal\n", "\n", " | \"(\" expression \")\"\n", "\n", " | list_display\n", "\n", " | dict_display\n", " \n", "list_display: \"[\" argument_list? \"]\"\n", "\n", " | \"[\" expression \"for\" identifier \"in\" expression if_clause? \"]\"\n", " \n", "if_clause: \"if\" expression\n", " \n", "dict_display: \"{\" key_datum_list? \"}\"\n", "\n", "key_datum_list: key_datum (\",\" key_datum)*\n", "\n", "key_datum: expression \":\" expression\n", " \n", "argument_list: expression (\",\" expression)*\n", " \n", "number: FLOATNUMBER\n", "\n", " | INTEGER\n", " \n", "literal: stringliteral\n", "\n", " | number\n", "\n", " | boolean\n", "\n", " | \"None\"\n", " \n", "boolean: \"True\" | \"False\"\n", " \n", "INTEGER: /[0-9]+/\n", "\n", "FLOATNUMBER: /(?:[0-9]+\\.[0-9]*|\\.[0-9]+)/\n", " \n", "stringliteral: STRING_DOUBLE\n", "\n", " | STRING_SINGLE\n", " \n", "STRING_DOUBLE: /\"([^\"\\\\]|\\\\[\"'\\\\ntr0])*\"/\n", "\n", "STRING_SINGLE: /'([^'\\\\]|\\\\[\"'\\\\ntr0])*'/\n", " \n", "identifier_or_string: identifier\n", "\n", " | stringliteral\n", " \n", "IDENTIFIER: /[A-Za-z_][A-Za-z0-9_]*/\n", " \n", "%ignore /[ \\t]+/\n", " \n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "id": "0cab2125", "metadata": {}, "outputs": [], "source": [ "code = \"\"\"\n", "addVar(mensaje, \"Hola mundo desde AVAP\")\n", "addResult(mensaje)\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 57, "id": "6a266b2a", "metadata": {}, "outputs": [], "source": [ "folder = \"/home/pseco/VsCodeProjects/assistance-engine/docs/samples/\"" ] }, { "cell_type": "code", "execution_count": 58, "id": "522bdb3b", "metadata": {}, "outputs": [], "source": [ "parser = Lark(grammar, parser=\"lalr\", propagate_positions=True)" ] }, { "cell_type": "code", "execution_count": 64, "id": "d3aa8026", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parsed 0 files successfully\n", "Failed to parse 33 files\n", "\n", "Files processed:\n", " - calculo_de_expiracion.avap: error\n", " - fecha_para_base_de_datos.avap: error\n", " - comparacion_simple.avap: error\n", " - ormAccessCreate.avap: error\n", " - if_desigualdad.avap: error\n", " - conversion_timestamp_legible.avap: error\n", " - validacion_de_nulo.avap: error\n", " - concatenacion_dinamica.avap: error\n", " - obtencion_timestamp.avap: error\n", " - manejo_error_sql_critico.avap: error\n", " - construccion_dinamica_de_objeto.avap: error\n", " - respuesta_multiple.avap: error\n", " - limpieza_de_strings.avap: error\n", " - captura_de_listas_multiples.avap: error\n", " - contador_de_parametros.avap: error\n", " - captura_de_id.avap: error\n", " - hello_world.avap: error\n", " - bucle_1_10.avap: error\n", " - hash_SHA256_para_integridad.avap: error\n", " - funcion_de_suma.avap: error\n", " - expresion_compleja.avap: error\n", " - hola_mundo.avap: error\n", " - generador_de_tokens_aleatorios.avap: error\n", " - asignacion_matematica.avap: error\n", " - salida_bucle_correcta.avap: error\n", " - bucle_longitud_de_datos.avap: error\n", " - asignacion_booleana.avap: error\n", " - validacion_in_pertenece_a_lista.avap: error\n", " - try_catch_request.avap: error\n", " - funcion_validacion_acceso.avap: error\n", " - referencia_por_valor.avap: error\n", " - paginacion_dinamica_recursos.avap: error\n", " - else_estandar.avap: error\n" ] } ], "source": [ "# Initialize dictionary to store parsed results\n", "parsed_files = {}\n", "folder_path = Path(folder)\n", "\n", "# Parse each .avap file\n", "for file_path in folder_path.glob(\"*.avap\"):\n", " try:\n", " # Read file with encoding handling and normalize line breaks\n", " file_content = file_path.read_text(encoding='utf-8')\n", " # Normalize all line endings to \\n\n", " file_content = file_content.replace('\\r\\n', '\\n').replace('\\r', '\\n')\n", " \n", " ast = parser.parse(file_content)\n", " \n", " # Store AST with metadata\n", " parsed_files[file_path.name] = {\n", " \"ast\": ast,\n", " \"tree\": ast.pretty(), # Pretty-printed BNF structure\n", " \"status\": \"success\"\n", " }\n", " except UnicodeDecodeError as e:\n", " # If UTF-8 fails, try with latin-1 (more permissive)\n", " try:\n", " file_content = file_path.read_text(encoding='latin-1')\n", " file_content = file_content.replace('\\r\\n', '\\n').replace('\\r', '\\n')\n", " ast = parser.parse(file_content)\n", " parsed_files[file_path.name] = {\n", " \"ast\": ast,\n", " \"tree\": ast.pretty(),\n", " \"status\": \"success\"\n", " }\n", " except Exception as e2:\n", " parsed_files[file_path.name] = {\n", " \"error\": f\"Encoding error: {str(e2)}\",\n", " \"status\": \"error\"\n", " }\n", " except Exception as e:\n", " parsed_files[file_path.name] = {\n", " \"error\": str(e),\n", " \"status\": \"error\"\n", " }\n", "\n", "# Display results summary\n", "print(f\"Parsed {len([f for f in parsed_files.values() if f['status'] == 'success'])} files successfully\")\n", "print(f\"Failed to parse {len([f for f in parsed_files.values() if f['status'] == 'error'])} files\")\n", "print(\"\\nFiles processed:\")\n", "for filename, result in parsed_files.items():\n", " print(f\" - {filename}: {result['status']}\")" ] }, { "cell_type": "code", "execution_count": 70, "id": "35b34377", "metadata": {}, "outputs": [], "source": [ "tree = parser.parse(code)" ] }, { "cell_type": "code", "execution_count": 71, "id": "025b46a7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "start\n", " program\n", " line\t\n", "\n", " line\n", " statement\n", " system_command\n", " addvar_cmd\n", " addvar_arg\n", " identifier\tmensaje\n", " addvar_arg\n", " literal\n", " stringliteral\t\"Hola mundo desde AVAP\"\n", " \n", "\n", " line\n", " statement\n", " io_command\n", " addresult_cmd\n", " identifier\tmensaje\n", " \n", "\n", "\n" ] } ], "source": [ "print(tree.pretty())" ] } ], "metadata": { "kernelspec": { "display_name": "assistance-engine", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }