447 lines
15 KiB
Plaintext
447 lines
15 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "0a8abbfa",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import re\n",
|
|
"from dataclasses import dataclass\n",
|
|
"import pprint\n",
|
|
"from pathlib import Path\n",
|
|
"from typing import Any, Dict, List, Optional, Tuple\n",
|
|
"from lark import Tree, Lark\n",
|
|
"from src.config import settings\n",
|
|
"from lark import Lark"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "baa779f3",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Functions"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "23a92e13",
|
|
"metadata": {},
|
|
"source": [
|
|
"# BNF to Lark"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 29,
|
|
"id": "93d2db25",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"grammar = r\"\"\"\n",
|
|
"start: program\n",
|
|
"\n",
|
|
"program: separator* line_or_comment (separator+ line_or_comment)* separator*\n",
|
|
"\n",
|
|
"?line_or_comment: simple_stmt comment?\n",
|
|
" | compound_stmt\n",
|
|
" | comment\n",
|
|
" | BLOCK_COMMENT\n",
|
|
"\n",
|
|
"?separator: EOL+\n",
|
|
"\n",
|
|
"comment: DOC_COMMENT | LINE_COMMENT\n",
|
|
"\n",
|
|
"EOL: /\\r?\\n/\n",
|
|
"\n",
|
|
"DOC_COMMENT.2: /\\/\\/\\/[^\\r\\n]*/\n",
|
|
"LINE_COMMENT.1: /\\/\\/[^\\r\\n]*/\n",
|
|
"BLOCK_COMMENT: /\\/\\*[\\s\\S]*?\\*\\//\n",
|
|
"\n",
|
|
"?simple_stmt: assignment\n",
|
|
" | return_stmt\n",
|
|
" | system_command\n",
|
|
" | io_command\n",
|
|
" | async_command\n",
|
|
" | connector_cmd\n",
|
|
" | db_command\n",
|
|
" | http_command\n",
|
|
" | util_command\n",
|
|
" | modularity_cmd\n",
|
|
" | call_stmt\n",
|
|
"\n",
|
|
"?compound_stmt: function_decl\n",
|
|
" | if_stmt\n",
|
|
" | loop_stmt\n",
|
|
" | try_stmt\n",
|
|
"\n",
|
|
"assignment: identifier \"=\" expression\n",
|
|
"\n",
|
|
"call_stmt: identifier \"(\" argument_list? \")\"\n",
|
|
" | identifier \"=\" identifier \".\" identifier \"(\" argument_list? \")\"\n",
|
|
" | identifier \".\" identifier \"(\" argument_list? \")\"\n",
|
|
"\n",
|
|
"system_command: register_cmd\n",
|
|
" | addvar_cmd\n",
|
|
"\n",
|
|
"register_cmd: \"registerEndpoint\" \"(\" stringliteral \",\" stringliteral \",\" list_display \",\" stringliteral \",\" identifier \",\" identifier \")\"\n",
|
|
"\n",
|
|
"addvar_cmd: \"addVar\" \"(\" addvar_arg \",\" addvar_arg \")\"\n",
|
|
"\n",
|
|
"addvar_arg: identifier\n",
|
|
" | literal\n",
|
|
" | \"$\" identifier\n",
|
|
"\n",
|
|
"identifier: IDENTIFIER\n",
|
|
"\n",
|
|
"system_variable: \"_status\"\n",
|
|
"\n",
|
|
"io_command: addparam_cmd\n",
|
|
" | getlistlen_cmd\n",
|
|
" | addresult\n",
|
|
" | getparamlist_cmd\n",
|
|
"\n",
|
|
"addparam_cmd: \"addParam\" \"(\" stringliteral \",\" identifier \")\"\n",
|
|
"getlistlen_cmd: \"getListLen\" \"(\" identifier \",\" identifier \")\"\n",
|
|
"getparamlist_cmd: \"getQueryParamList\" \"(\" stringliteral \",\" identifier \")\"\n",
|
|
"addresult: \"addResult\" \"(\" identifier \")\"\n",
|
|
"\n",
|
|
"if_stmt: \"if\" \"(\" if_condition \")\" separator block (\"else\" \"(\" \")\" separator block)? \"end\" \"(\" \")\"\n",
|
|
"\n",
|
|
"if_condition: if_atom \",\" if_atom \",\" stringliteral\n",
|
|
" | \"None\" \",\" \"None\" \",\" stringliteral\n",
|
|
"\n",
|
|
"if_atom: identifier\n",
|
|
" | literal\n",
|
|
"\n",
|
|
"loop_stmt: \"startLoop\" \"(\" identifier \",\" expression \",\" expression \")\" separator block \"endLoop\" \"(\" \")\"\n",
|
|
"\n",
|
|
"try_stmt: \"try\" \"(\" \")\" separator block \"exception\" \"(\" identifier \")\" separator block \"end\" \"(\" \")\"\n",
|
|
"\n",
|
|
"block: separator* line_or_comment (separator+ line_or_comment)* separator*\n",
|
|
"\n",
|
|
"async_command: go_stmt\n",
|
|
" | gather_stmt\n",
|
|
"\n",
|
|
"go_stmt: identifier \"=\" \"go\" identifier \"(\" argument_list? \")\"\n",
|
|
"gather_stmt: identifier \"=\" \"gather\" \"(\" identifier (\",\" expression)? \")\"\n",
|
|
"\n",
|
|
"connector_cmd: connector_instantiation\n",
|
|
"\n",
|
|
"connector_instantiation: identifier \"=\" \"avapConnector\" \"(\" stringliteral \")\"\n",
|
|
"\n",
|
|
"http_command: req_post_cmd\n",
|
|
" | req_get_cmd\n",
|
|
"\n",
|
|
"req_post_cmd: \"RequestPost\" \"(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n",
|
|
"req_get_cmd: \"RequestGet\" \"(\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n",
|
|
"\n",
|
|
"db_command: orm_direct\n",
|
|
" | orm_check\n",
|
|
" | orm_create\n",
|
|
" | orm_select\n",
|
|
" | orm_insert\n",
|
|
" | orm_update\n",
|
|
"\n",
|
|
"orm_direct: \"ormDirect\" \"(\" expression \",\" identifier \")\"\n",
|
|
"orm_check: \"ormCheckTable\" \"(\" expression \",\" identifier \")\"\n",
|
|
"orm_create: \"ormCreateTable\" \"(\" expression \",\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"orm_select: \"ormAccessSelect\" \"(\" orm_fields \",\" expression (\",\" expression)? \",\" identifier \")\"\n",
|
|
"\n",
|
|
"orm_fields: \"*\"\n",
|
|
" | expression\n",
|
|
"\n",
|
|
"orm_insert: \"ormAccessInsert\" \"(\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"orm_update: \"ormAccessUpdate\" \"(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"util_command: json_list_cmd\n",
|
|
" | crypto_cmd\n",
|
|
" | regex_cmd\n",
|
|
" | datetime_cmd\n",
|
|
" | stamp_cmd\n",
|
|
" | string_cmd\n",
|
|
" | replace_cmd\n",
|
|
"\n",
|
|
"json_list_cmd: \"variableToList\" \"(\" expression \",\" identifier \")\"\n",
|
|
" | \"itemFromList\" \"(\" identifier \",\" expression \",\" identifier \")\"\n",
|
|
" | \"variableFromJSON\" \"(\" identifier \",\" expression \",\" identifier \")\"\n",
|
|
" | \"AddVariableToJSON\" \"(\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"crypto_cmd: \"encodeSHA256\" \"(\" identifier_or_string \",\" identifier \")\"\n",
|
|
" | \"encodeMD5\" \"(\" identifier_or_string \",\" identifier \")\"\n",
|
|
"\n",
|
|
"regex_cmd: \"getRegex\" \"(\" identifier \",\" stringliteral \",\" identifier \")\"\n",
|
|
"\n",
|
|
"datetime_cmd: \"getDateTime\" \"(\" stringliteral \",\" expression \",\" stringliteral \",\" identifier \")\"\n",
|
|
"\n",
|
|
"stamp_cmd: \"stampToDatetime\" \"(\" expression \",\" stringliteral \",\" expression \",\" identifier \")\"\n",
|
|
" | \"getTimeStamp\" \"(\" stringliteral \",\" stringliteral \",\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"string_cmd: \"randomString\" \"(\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"replace_cmd: \"replace\" \"(\" identifier_or_string \",\" stringliteral \",\" stringliteral \",\" identifier \")\"\n",
|
|
"\n",
|
|
"function_decl: \"function\" identifier \"(\" param_list? \")\" \"{\" separator block \"}\"\n",
|
|
"\n",
|
|
"param_list: identifier (\",\" identifier)*\n",
|
|
"\n",
|
|
"return_stmt: \"return\" \"(\" expression? \")\"\n",
|
|
"\n",
|
|
"modularity_cmd: include_stmt\n",
|
|
" | import_stmt\n",
|
|
"\n",
|
|
"include_stmt: \"include\" stringliteral\n",
|
|
"import_stmt: \"import\" (\"<\" identifier \">\" | stringliteral)\n",
|
|
"\n",
|
|
"?expression: logical_or\n",
|
|
"\n",
|
|
"?logical_or: logical_and (\"or\" logical_and)*\n",
|
|
"?logical_and: logical_not (\"and\" logical_not)*\n",
|
|
"\n",
|
|
"?logical_not: \"not\" logical_not\n",
|
|
" | comparison\n",
|
|
"\n",
|
|
"?comparison: arithmetic (comp_op arithmetic)*\n",
|
|
"\n",
|
|
"comp_op: \"==\" | \"!=\" | \"<\" | \">\" | \"<=\" | \">=\" | \"in\" | \"is\"\n",
|
|
"\n",
|
|
"?arithmetic: term ((\"+\" | \"-\") term)*\n",
|
|
"?term: factor ((\"*\" | \"/\" | \"%\") factor)*\n",
|
|
"\n",
|
|
"?factor: (\"+\" | \"-\") factor\n",
|
|
" | power\n",
|
|
"\n",
|
|
"?power: primary (\"**\" factor)?\n",
|
|
"\n",
|
|
"?primary: atom postfix*\n",
|
|
"\n",
|
|
"postfix: \".\" identifier\n",
|
|
" | \"[\" expression \"]\"\n",
|
|
" | \"[\" expression? \":\" expression? (\":\" expression?)? \"]\"\n",
|
|
" | \"(\" argument_list? \")\"\n",
|
|
"\n",
|
|
"?atom: identifier\n",
|
|
" | \"$\" identifier\n",
|
|
" | literal\n",
|
|
" | \"(\" expression \")\"\n",
|
|
" | list_display\n",
|
|
" | dict_display\n",
|
|
"\n",
|
|
"list_display: \"[\" argument_list? \"]\"\n",
|
|
" | \"[\" expression \"for\" identifier \"in\" expression if_clause? \"]\"\n",
|
|
"\n",
|
|
"if_clause: \"if\" expression\n",
|
|
"\n",
|
|
"dict_display: \"{\" key_datum_list? \"}\"\n",
|
|
"\n",
|
|
"key_datum_list: key_datum (\",\" key_datum)*\n",
|
|
"key_datum: expression \":\" expression\n",
|
|
"\n",
|
|
"argument_list: expression (\",\" expression)*\n",
|
|
"\n",
|
|
"number: FLOATNUMBER\n",
|
|
" | INTEGER\n",
|
|
"\n",
|
|
"literal: stringliteral\n",
|
|
" | number\n",
|
|
" | boolean\n",
|
|
" | \"None\"\n",
|
|
"\n",
|
|
"boolean: \"True\" | \"False\"\n",
|
|
"\n",
|
|
"INTEGER: /[0-9]+/\n",
|
|
"FLOATNUMBER: /(?:[0-9]+\\.[0-9]*|\\.[0-9]+)/\n",
|
|
"\n",
|
|
"stringliteral: STRING_DOUBLE\n",
|
|
" | STRING_SINGLE\n",
|
|
"\n",
|
|
"STRING_DOUBLE: /\"([^\"\\\\]|\\\\.)*\"/\n",
|
|
"STRING_SINGLE: /'([^'\\\\]|\\\\.)*'/\n",
|
|
"\n",
|
|
"identifier_or_string: identifier\n",
|
|
" | stringliteral\n",
|
|
"\n",
|
|
"IDENTIFIER: /[A-Za-z_][A-Za-z0-9_]*/\n",
|
|
"\n",
|
|
"%ignore /[ \\t]+/\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 30,
|
|
"id": "95267b2a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# STRING_DOUBLE: /\"([^\"\\\\]|\\\\[\"'\\\\ntr0])*\"/\n",
|
|
"# STRING_SINGLE: /'([^'\\\\]|\\\\[\"'\\\\ntr0])*'/"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 31,
|
|
"id": "0cab2125",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"code = \"\"\"\n",
|
|
"addVar(mensaje, \"Hola mundo desde AVAP\")\n",
|
|
"addResult(mensaje)\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"id": "6a266b2a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"folder = \"/home/pseco/VsCodeProjects/assistance-engine/docs/samples/\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"id": "522bdb3b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"parser = Lark(grammar, parser=\"lalr\", propagate_positions=True, start=\"program\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"id": "d3aa8026",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Parsed 26 files successfully\n",
|
|
"Failed to parse 7 files\n",
|
|
"\n",
|
|
"Files processed:\n",
|
|
" - if_desigualdad.avap: error\n",
|
|
" - manejo_error_sql_critico.avap: error\n",
|
|
" - captura_de_listas_multiples.avap: error\n",
|
|
" - expresion_compleja.avap: error\n",
|
|
" - validacion_in_pertenece_a_lista.avap: error\n",
|
|
" - try_catch_request.avap: error\n",
|
|
" - else_estandar.avap: error\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Initialize dictionary to store parsed results\n",
|
|
"parsed_files = {}\n",
|
|
"folder_path = Path(folder)\n",
|
|
"\n",
|
|
"# Parse each .avap file\n",
|
|
"for file_path in folder_path.glob(\"*.avap\"):\n",
|
|
" try:\n",
|
|
" # Read file with encoding handling and normalize line breaks\n",
|
|
" file_content = file_path.read_text(encoding='utf-8')\n",
|
|
" # Normalize all line endings to \\n\n",
|
|
" file_content = file_content.replace('\\r\\n', '\\n').replace('\\r', '\\n')\n",
|
|
" \n",
|
|
" ast = parser.parse(file_content)\n",
|
|
" \n",
|
|
" # Store AST with metadata\n",
|
|
" parsed_files[file_path.name] = {\n",
|
|
" \"ast\": ast,\n",
|
|
" \"tree\": ast.pretty(), # Pretty-printed BNF structure\n",
|
|
" \"status\": \"success\"\n",
|
|
" }\n",
|
|
" except UnicodeDecodeError as e:\n",
|
|
" # If UTF-8 fails, try with latin-1 (more permissive)\n",
|
|
" try:\n",
|
|
" file_content = file_path.read_text(encoding='latin-1')\n",
|
|
" file_content = file_content.replace('\\r\\n', '\\n').replace('\\r', '\\n')\n",
|
|
" ast = parser.parse(file_content)\n",
|
|
" parsed_files[file_path.name] = {\n",
|
|
" \"ast\": ast,\n",
|
|
" \"tree\": ast.pretty(),\n",
|
|
" \"status\": \"success\"\n",
|
|
" }\n",
|
|
" except Exception as e2:\n",
|
|
" parsed_files[file_path.name] = {\n",
|
|
" \"error\": f\"Encoding error: {str(e2)}\",\n",
|
|
" \"status\": \"error\"\n",
|
|
" }\n",
|
|
" except Exception as e:\n",
|
|
" parsed_files[file_path.name] = {\n",
|
|
" \"error\": str(e),\n",
|
|
" \"status\": \"error\"\n",
|
|
" }\n",
|
|
"\n",
|
|
"# Display results summary\n",
|
|
"print(f\"Parsed {len([f for f in parsed_files.values() if f['status'] == 'success'])} files successfully\")\n",
|
|
"print(f\"Failed to parse {len([f for f in parsed_files.values() if f['status'] == 'error'])} files\")\n",
|
|
"print(\"\\nFiles processed:\")\n",
|
|
"for filename, result in parsed_files.items():\n",
|
|
" if result[\"status\"] == \"error\":\n",
|
|
" print(f\" - {filename}: {result['status']}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "35b34377",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"parsed_files"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"id": "e9e6c0fb",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Tree('program', [Token('EOL', '\\n'), Tree('system_command', [Tree('addvar_cmd', [Tree('addvar_arg', [Tree('identifier', [Token('IDENTIFIER', 'mensaje')])]), Tree('addvar_arg', [Tree('literal', [Tree('stringliteral', [Token('STRING_DOUBLE', '\"Hola mundo desde AVAP\"')])])])])]), Token('EOL', '\\n'), Tree('io_command', [Tree('addresult', [Tree('identifier', [Token('IDENTIFIER', 'mensaje')])])]), Token('EOL', '\\n')])"
|
|
]
|
|
},
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"parser.parse(code)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "assistance-engine",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|