{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "0a8abbfa", "metadata": {}, "outputs": [], "source": [ "import re\n", "from dataclasses import dataclass\n", "import pprint\n", "from pathlib import Path\n", "from typing import Any, Dict, List, Optional, Tuple\n", "from lark import Tree, Lark\n", "from src.config import settings\n", "from lark import Lark" ] }, { "cell_type": "markdown", "id": "baa779f3", "metadata": {}, "source": [ "# Functions" ] }, { "cell_type": "markdown", "id": "23a92e13", "metadata": {}, "source": [ "# BNF to Lark" ] }, { "cell_type": "code", "execution_count": 29, "id": "93d2db25", "metadata": {}, "outputs": [], "source": [ "grammar = r\"\"\"\n", "start: program\n", "\n", "program: separator* line_or_comment (separator+ line_or_comment)* separator*\n", "\n", "?line_or_comment: simple_stmt comment?\n", " | compound_stmt\n", " | comment\n", " | BLOCK_COMMENT\n", "\n", "?separator: EOL+\n", "\n", "comment: DOC_COMMENT | LINE_COMMENT\n", "\n", "EOL: /\\r?\\n/\n", "\n", "DOC_COMMENT.2: /\\/\\/\\/[^\\r\\n]*/\n", "LINE_COMMENT.1: /\\/\\/[^\\r\\n]*/\n", "BLOCK_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//\n", "\n", "?simple_stmt: assignment\n", " | return_stmt\n", " | system_command\n", " | io_command\n", " | async_command\n", " | connector_cmd\n", " | db_command\n", " | http_command\n", " | util_command\n", " | modularity_cmd\n", " | call_stmt\n", "\n", "?compound_stmt: function_decl\n", " | if_stmt\n", " | loop_stmt\n", " | try_stmt\n", "\n", "assignment: identifier \"=\" expression\n", "\n", "call_stmt: identifier \"(\" argument_list? \")\"\n", " | identifier \"=\" identifier \".\" identifier \"(\" argument_list? \")\"\n", " | identifier \".\" identifier \"(\" argument_list? \")\"\n", "\n", "system_command: register_cmd\n", " | addvar_cmd\n", "\n", "register_cmd: \"registerEndpoint\" \"(\" stringliteral \",\" stringliteral \",\" list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\n", "\n", "addvar_cmd: \"addVar\" \"(\" addvar_arg \",\" addvar_arg \")\"\n", "\n", "addvar_arg: identifier\n", " | literal\n", " | \"$\" identifier\n", "\n", "identifier: IDENTIFIER\n", "\n", "system_variable: \"_status\"\n", "\n", "io_command: addparam_cmd\n", " | getlistlen_cmd\n", " | addresult\n", " | getparamlist_cmd\n", "\n", "addparam_cmd: \"addParam\" \"(\" stringliteral \",\" identifier \")\"\n", "getlistlen_cmd: \"getListLen\" \"(\" identifier \",\" identifier \")\"\n", "getparamlist_cmd: \"getQueryParamList\" \"(\" stringliteral \",\" identifier \")\"\n", "addresult: \"addResult\" \"(\" identifier \")\"\n", "\n", "if_stmt: \"if\" \"(\" if_condition \")\" separator block (\"else\" \"(\" \")\" separator block)? \"end\" \"(\" \")\"\n", "\n", "if_condition: if_atom \",\" if_atom \",\" stringliteral\n", " | \"None\" \",\" \"None\" \",\" stringliteral\n", "\n", "if_atom: identifier\n", " | literal\n", "\n", "loop_stmt: \"startLoop\" \"(\" identifier \",\" expression \",\" expression \")\" separator block \"endLoop\" \"(\" \")\"\n", "\n", "try_stmt: \"try\" \"(\" \")\" separator block \"exception\" \"(\" identifier \")\" separator block \"end\" \"(\" \")\"\n", "\n", "block: separator* line_or_comment (separator+ line_or_comment)* separator*\n", "\n", "async_command: go_stmt\n", " | gather_stmt\n", "\n", "go_stmt: identifier \"=\" \"go\" identifier \"(\" argument_list? \")\"\n", "gather_stmt: identifier \"=\" \"gather\" \"(\" identifier (\",\" expression)? \")\"\n", "\n", "connector_cmd: connector_instantiation\n", "\n", "connector_instantiation: identifier \"=\" \"avapConnector\" \"(\" stringliteral \")\"\n", "\n", "http_command: req_post_cmd\n", " | req_get_cmd\n", "\n", "req_post_cmd: \"RequestPost\" \"(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n", "req_get_cmd: \"RequestGet\" \"(\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n", "\n", "db_command: orm_direct\n", " | orm_check\n", " | orm_create\n", " | orm_select\n", " | orm_insert\n", " | orm_update\n", "\n", "orm_direct: \"ormDirect\" \"(\" expression \",\" identifier \")\"\n", "orm_check: \"ormCheckTable\" \"(\" expression \",\" identifier \")\"\n", "orm_create: \"ormCreateTable\" \"(\" expression \",\" expression \",\" expression \",\" identifier \")\"\n", "\n", "orm_select: \"ormAccessSelect\" \"(\" orm_fields \",\" expression (\",\" expression)? \",\" identifier \")\"\n", "\n", "orm_fields: \"*\"\n", " | expression\n", "\n", "orm_insert: \"ormAccessInsert\" \"(\" expression \",\" expression \",\" identifier \")\"\n", "orm_update: \"ormAccessUpdate\" \"(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \")\"\n", "\n", "util_command: json_list_cmd\n", " | crypto_cmd\n", " | regex_cmd\n", " | datetime_cmd\n", " | stamp_cmd\n", " | string_cmd\n", " | replace_cmd\n", "\n", "json_list_cmd: \"variableToList\" \"(\" expression \",\" identifier \")\"\n", " | \"itemFromList\" \"(\" identifier \",\" expression \",\" identifier \")\"\n", " | \"variableFromJSON\" \"(\" identifier \",\" expression \",\" identifier \")\"\n", " | \"AddVariableToJSON\" \"(\" expression \",\" expression \",\" identifier \")\"\n", "\n", "crypto_cmd: \"encodeSHA256\" \"(\" identifier_or_string \",\" identifier \")\"\n", " | \"encodeMD5\" \"(\" identifier_or_string \",\" identifier \")\"\n", "\n", "regex_cmd: \"getRegex\" \"(\" identifier \",\" stringliteral \",\" identifier \")\"\n", "\n", "datetime_cmd: \"getDateTime\" \"(\" stringliteral \",\" expression \",\" stringliteral \",\" identifier \")\"\n", "\n", "stamp_cmd: \"stampToDatetime\" \"(\" expression \",\" stringliteral \",\" expression \",\" identifier \")\"\n", " | \"getTimeStamp\" \"(\" stringliteral \",\" stringliteral \",\" expression \",\" identifier \")\"\n", "\n", "string_cmd: \"randomString\" \"(\" expression \",\" expression \",\" identifier \")\"\n", "\n", "replace_cmd: \"replace\" \"(\" identifier_or_string \",\" stringliteral \",\" stringliteral \",\" identifier \")\"\n", "\n", "function_decl: \"function\" identifier \"(\" param_list? \")\" \"{\" separator block \"}\"\n", "\n", "param_list: identifier (\",\" identifier)*\n", "\n", "return_stmt: \"return\" \"(\" expression? \")\"\n", "\n", "modularity_cmd: include_stmt\n", " | import_stmt\n", "\n", "include_stmt: \"include\" stringliteral\n", "import_stmt: \"import\" (\"<\" identifier \">\" | stringliteral)\n", "\n", "?expression: logical_or\n", "\n", "?logical_or: logical_and (\"or\" logical_and)*\n", "?logical_and: logical_not (\"and\" logical_not)*\n", "\n", "?logical_not: \"not\" logical_not\n", " | comparison\n", "\n", "?comparison: arithmetic (comp_op arithmetic)*\n", "\n", "comp_op: \"==\" | \"!=\" | \"<\" | \">\" | \"<=\" | \">=\" | \"in\" | \"is\"\n", "\n", "?arithmetic: term ((\"+\" | \"-\") term)*\n", "?term: factor ((\"*\" | \"/\" | \"%\") factor)*\n", "\n", "?factor: (\"+\" | \"-\") factor\n", " | power\n", "\n", "?power: primary (\"**\" factor)?\n", "\n", "?primary: atom postfix*\n", "\n", "postfix: \".\" identifier\n", " | \"[\" expression \"]\"\n", " | \"[\" expression? \":\" expression? (\":\" expression?)? \"]\"\n", " | \"(\" argument_list? \")\"\n", "\n", "?atom: identifier\n", " | \"$\" identifier\n", " | literal\n", " | \"(\" expression \")\"\n", " | list_display\n", " | dict_display\n", "\n", "list_display: \"[\" argument_list? \"]\"\n", " | \"[\" expression \"for\" identifier \"in\" expression if_clause? \"]\"\n", "\n", "if_clause: \"if\" expression\n", "\n", "dict_display: \"{\" key_datum_list? \"}\"\n", "\n", "key_datum_list: key_datum (\",\" key_datum)*\n", "key_datum: expression \":\" expression\n", "\n", "argument_list: expression (\",\" expression)*\n", "\n", "number: FLOATNUMBER\n", " | INTEGER\n", "\n", "literal: stringliteral\n", " | number\n", " | boolean\n", " | \"None\"\n", "\n", "boolean: \"True\" | \"False\"\n", "\n", "INTEGER: /[0-9]+/\n", "FLOATNUMBER: /(?:[0-9]+\\.[0-9]*|\\.[0-9]+)/\n", "\n", "stringliteral: STRING_DOUBLE\n", " | STRING_SINGLE\n", "\n", "STRING_DOUBLE: /\"([^\"\\\\]|\\\\.)*\"/\n", "STRING_SINGLE: /'([^'\\\\]|\\\\.)*'/\n", "\n", "identifier_or_string: identifier\n", " | stringliteral\n", "\n", "IDENTIFIER: /[A-Za-z_][A-Za-z0-9_]*/\n", "\n", "%ignore /[ \\t]+/\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 30, "id": "95267b2a", "metadata": {}, "outputs": [], "source": [ "# STRING_DOUBLE: /\"([^\"\\\\]|\\\\[\"'\\\\ntr0])*\"/\n", "# STRING_SINGLE: /'([^'\\\\]|\\\\[\"'\\\\ntr0])*'/" ] }, { "cell_type": "code", "execution_count": 31, "id": "0cab2125", "metadata": {}, "outputs": [], "source": [ "code = \"\"\"\n", "addVar(mensaje, \"Hola mundo desde AVAP\")\n", "addResult(mensaje)\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 32, "id": "6a266b2a", "metadata": {}, "outputs": [], "source": [ "folder = \"/home/pseco/VsCodeProjects/assistance-engine/docs/samples/\"" ] }, { "cell_type": "code", "execution_count": 33, "id": "522bdb3b", "metadata": {}, "outputs": [], "source": [ "parser = Lark(grammar, parser=\"lalr\", propagate_positions=True, start=\"program\")" ] }, { "cell_type": "code", "execution_count": 23, "id": "d3aa8026", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parsed 26 files successfully\n", "Failed to parse 7 files\n", "\n", "Files processed:\n", " - if_desigualdad.avap: error\n", " - manejo_error_sql_critico.avap: error\n", " - captura_de_listas_multiples.avap: error\n", " - expresion_compleja.avap: error\n", " - validacion_in_pertenece_a_lista.avap: error\n", " - try_catch_request.avap: error\n", " - else_estandar.avap: error\n" ] } ], "source": [ "# Initialize dictionary to store parsed results\n", "parsed_files = {}\n", "folder_path = Path(folder)\n", "\n", "# Parse each .avap file\n", "for file_path in folder_path.glob(\"*.avap\"):\n", " try:\n", " # Read file with encoding handling and normalize line breaks\n", " file_content = file_path.read_text(encoding='utf-8')\n", " # Normalize all line endings to \\n\n", " file_content = file_content.replace('\\r\\n', '\\n').replace('\\r', '\\n')\n", " \n", " ast = parser.parse(file_content)\n", " \n", " # Store AST with metadata\n", " parsed_files[file_path.name] = {\n", " \"ast\": ast,\n", " \"tree\": ast.pretty(), # Pretty-printed BNF structure\n", " \"status\": \"success\"\n", " }\n", " except UnicodeDecodeError as e:\n", " # If UTF-8 fails, try with latin-1 (more permissive)\n", " try:\n", " file_content = file_path.read_text(encoding='latin-1')\n", " file_content = file_content.replace('\\r\\n', '\\n').replace('\\r', '\\n')\n", " ast = parser.parse(file_content)\n", " parsed_files[file_path.name] = {\n", " \"ast\": ast,\n", " \"tree\": ast.pretty(),\n", " \"status\": \"success\"\n", " }\n", " except Exception as e2:\n", " parsed_files[file_path.name] = {\n", " \"error\": f\"Encoding error: {str(e2)}\",\n", " \"status\": \"error\"\n", " }\n", " except Exception as e:\n", " parsed_files[file_path.name] = {\n", " \"error\": str(e),\n", " \"status\": \"error\"\n", " }\n", "\n", "# Display results summary\n", "print(f\"Parsed {len([f for f in parsed_files.values() if f['status'] == 'success'])} files successfully\")\n", "print(f\"Failed to parse {len([f for f in parsed_files.values() if f['status'] == 'error'])} files\")\n", "print(\"\\nFiles processed:\")\n", "for filename, result in parsed_files.items():\n", " if result[\"status\"] == \"error\":\n", " print(f\" - {filename}: {result['status']}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "35b34377", "metadata": {}, "outputs": [], "source": [ "parsed_files" ] }, { "cell_type": "code", "execution_count": 34, "id": "e9e6c0fb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Tree('program', [Token('EOL', '\\n'), Tree('system_command', [Tree('addvar_cmd', [Tree('addvar_arg', [Tree('identifier', [Token('IDENTIFIER', 'mensaje')])]), Tree('addvar_arg', [Tree('literal', [Tree('stringliteral', [Token('STRING_DOUBLE', '\"Hola mundo desde AVAP\"')])])])])]), Token('EOL', '\\n'), Tree('io_command', [Tree('addresult', [Tree('identifier', [Token('IDENTIFIER', 'mensaje')])])]), Token('EOL', '\\n')])" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "parser.parse(code)" ] } ], "metadata": { "kernelspec": { "display_name": "assistance-engine", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }