BNF extraction pipeline from avap.md
This commit is contained in:
parent
cd3922abbd
commit
3ac432567b
|
|
@ -0,0 +1,42 @@
|
|||
<program> ::= ( <line> | <block_comment> )*
|
||||
<line> ::= [ <statement> ] [ <line_comment> | <doc_comment> ] <EOL>
|
||||
| ( <line_comment> | <doc_comment> ) <EOL>
|
||||
<EOL> ::= /* Retorno de carro / Salto de línea (\n o \r\n) */
|
||||
|
||||
<statement> ::= <assignment>
|
||||
| <method_call_stmt>
|
||||
| <function_call_stmt>
|
||||
| <function_decl>
|
||||
| <return_stmt>
|
||||
| <system_command>
|
||||
| <io_command>
|
||||
| <control_flow>
|
||||
| <async_command>
|
||||
| <connector_cmd>
|
||||
| <db_command>
|
||||
| <http_command>
|
||||
| <util_command>
|
||||
| <modularity_cmd>
|
||||
|
||||
<assignment> ::= <identifier> "=" <expression>
|
||||
|
||||
/* Llamada a función global (sin receptor de objeto) */
|
||||
<function_call_stmt> ::= <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
/* Llamada a método sobre un objeto conector (con receptor) */
|
||||
<method_call_stmt> ::= <identifier> "=" <identifier> "." <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
<system_command> ::= <register_cmd> | <addvar_cmd>
|
||||
<register_cmd> ::= "registerEndpoint(" <stringliteral> "," <stringliteral> "," <list_display> "," <stringliteral> "," <identifier> "," <identifier> ")"
|
||||
/* addVar asigna un valor a una variable. Acepta (valor, variable) o (variable, valor).
|
||||
Si ambos argumentos son identificadores, el valor del segundo se asigna al primero.
|
||||
No está permitido pasar dos literales como argumentos. */
|
||||
<addvar_cmd> ::= "addVar(" <addvar_arg> "," <addvar_arg> ")"
|
||||
<addvar_arg> ::= <identifier> | <literal> | "$" <identifier>
|
||||
/* Restricción semántica: al menos uno de los dos <addvar_arg> debe ser <identifier> */
|
||||
|
||||
<identifier> ::= [a-zA-Z_] [a-zA-Z0-9_]*
|
||||
|
||||
/* Variables de sistema reservadas — accesibles y asignables desde cualquier scope:
|
||||
_status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = 404) */
|
||||
<system_variable> ::= "_status"
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
<io_command> ::= <addparam_cmd> | <getlistlen_cmd> | <addresult_cmd> | <getparamlist_cmd>
|
||||
<addparam_cmd> ::= "addParam(" <stringliteral> "," <identifier> ")"
|
||||
<getlistlen_cmd> ::= "getListLen(" <identifier> "," <identifier> ")"
|
||||
<getparamlist_cmd> ::= "getQueryParamList(" <stringliteral> "," <identifier> ")"
|
||||
<addresult_cmd> ::= "addResult(" <identifier> ")"
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
<control_flow> ::= <if_stmt> | <loop_stmt> | <try_stmt>
|
||||
|
||||
<if_stmt> ::= "if(" <if_condition> ")" <EOL>
|
||||
<block>
|
||||
[ "else()" <EOL> <block> ]
|
||||
"end()" <EOL>
|
||||
|
||||
/* if() soporta dos modos:
|
||||
Modo 1 — comparación estructurada: los dos primeros argumentos deben ser
|
||||
identificadores simples o literales, nunca expresiones de acceso.
|
||||
Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']),
|
||||
debe asignarse previamente a una variable.
|
||||
Modo 2 — expresión libre: None, None, expresión compleja como string */
|
||||
<if_condition> ::= <if_atom> "," <if_atom> "," <stringliteral>
|
||||
| "None" "," "None" "," <stringliteral>
|
||||
<if_atom> ::= <identifier> | <literal>
|
||||
|
||||
<loop_stmt> ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL>
|
||||
<block>
|
||||
"endLoop()" <EOL>
|
||||
|
||||
<try_stmt> ::= "try()" <EOL>
|
||||
<block>
|
||||
"exception(" <identifier> ")" <EOL>
|
||||
<block>
|
||||
"end()" <EOL>
|
||||
|
||||
<block> ::= <line>*
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
<async_command> ::= <go_stmt> | <gather_stmt>
|
||||
<go_stmt> ::= <identifier> "=" "go" <identifier> "(" [<argument_list>] ")"
|
||||
<gather_stmt> ::= <identifier> "=" "gather(" <identifier> ["," <expression>] ")"
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
/* Instanciación de conector de terceros y llamada a sus métodos dinámicos */
|
||||
<connector_cmd> ::= <connector_instantiation> | <connector_method_call>
|
||||
<connector_instantiation> ::= <identifier> "=" "avapConnector(" <stringliteral> ")"
|
||||
<connector_method_call> ::= [ <identifier> "=" ] <identifier> "." <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
/* Cliente HTTP con Timeout Obligatorio */
|
||||
<http_command> ::= <req_post_cmd> | <req_get_cmd>
|
||||
<req_post_cmd> ::= "RequestPost(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
|
||||
<req_get_cmd> ::= "RequestGet(" <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
|
||||
|
||||
/* ORM y Persistencia (Estandarizado con tableName) */
|
||||
<db_command> ::= <orm_direct> | <orm_check> | <orm_create> | <orm_select> | <orm_insert> | <orm_update>
|
||||
<orm_direct> ::= "ormDirect(" <expression> "," <identifier> ")"
|
||||
<orm_check> ::= "ormCheckTable(" <expression> "," <identifier> ")"
|
||||
<orm_create> ::= "ormCreateTable(" <expression> "," <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* ormAccessSelect(fields, tableName, selector, varTarget) */
|
||||
<orm_select> ::= "ormAccessSelect(" <orm_fields> "," <expression> "," [<expression>] "," <identifier> ")"
|
||||
<orm_fields> ::= "*" | <expression>
|
||||
|
||||
/* ormAccessInsert(fieldsValues, tableName, varTarget) */
|
||||
<orm_insert> ::= "ormAccessInsert(" <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* ormAccessUpdate(fields, fieldsValues, tableName, selector, varTarget) */
|
||||
<orm_update> ::= "ormAccessUpdate(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> ")"
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */
|
||||
<util_command> ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
|
||||
|
||||
/* Manipulación de listas y JSON */
|
||||
<json_list_cmd> ::= "variableToList(" <expression> "," <identifier> ")"
|
||||
| "itemFromList(" <identifier> "," <expression> "," <identifier> ")"
|
||||
| "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")"
|
||||
| "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* Criptografía */
|
||||
<crypto_cmd> ::= "encodeSHA256(" <identifier_or_string> "," <identifier> ")"
|
||||
| "encodeMD5(" <identifier_or_string> "," <identifier> ")"
|
||||
|
||||
/* Expresiones regulares */
|
||||
<regex_cmd> ::= "getRegex(" <identifier> "," <stringliteral> "," <identifier> ")"
|
||||
|
||||
<datetime_cmd> ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")"
|
||||
/* Argumentos: formato_salida, epoch_origen, zona_horaria, destino */
|
||||
|
||||
<stamp_cmd> ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")"
|
||||
/* Argumentos: epoch_origen, formato, timedelta, destino */
|
||||
| "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")"
|
||||
/* Argumentos: fecha_string, formato_entrada, timedelta, destino */
|
||||
|
||||
<string_cmd> ::= "randomString(" <expression> "," <identifier> ")"
|
||||
/* Argumentos: longitud, destino */
|
||||
|
||||
<replace_cmd> ::= "replace(" <identifier_or_string> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
|
||||
/* Argumentos: origen, patron_busqueda, reemplazo, destino */
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
/* Nota: las funciones utilizan llaves {} como delimitadores de bloque por decisión
|
||||
arquitectónica explícita, diferenciándose de las estructuras de control (if, loop, try)
|
||||
que usan palabras clave de cierre (end(), endLoop()). Ambos patrones coexisten
|
||||
en la gramática y el parser los distingue por el token de apertura. */
|
||||
<function_decl> ::= "function" <identifier> "(" [<param_list>] ")" "{" <EOL>
|
||||
<block>
|
||||
"}" <EOL>
|
||||
<param_list> ::= <identifier> ("," <identifier>)*
|
||||
<return_stmt> ::= "return(" [<expression>] ")"
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
<modularity_cmd> ::= <include_stmt> | <import_stmt>
|
||||
<include_stmt> ::= "include" " " <stringliteral>
|
||||
<import_stmt> ::= "import" " " ( "<" <identifier> ">" | <stringliteral> )
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/* Jerarquía de Expresiones (Precedencia de menor a mayor) */
|
||||
<expression> ::= <logical_or>
|
||||
<logical_or> ::= <logical_and> ( "or" <logical_and> )*
|
||||
<logical_and> ::= <logical_not> ( "and" <logical_not> )*
|
||||
<logical_not> ::= "not" <logical_not> | <comparison>
|
||||
|
||||
<comparison> ::= <arithmetic> ( <comp_op> <arithmetic> )*
|
||||
<comp_op> ::= "==" | "!=" | "<" | ">" | "<=" | ">=" | "in" | "is"
|
||||
|
||||
<arithmetic> ::= <term> ( ( "+" | "-" ) <term> )*
|
||||
<term> ::= <factor> ( ( "*" | "/" | "%" ) <factor> )*
|
||||
<factor> ::= ( "+" | "-" ) <factor> | <power>
|
||||
<power> ::= <primary> [ "**" <factor> ]
|
||||
|
||||
/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones)
|
||||
La regla <primary> cubre también el acceso a métodos de objetos conector
|
||||
(conector.metodo(...)) y el acceso por clave a sus resultados (resultado["key"]) */
|
||||
<primary> ::= <atom>
|
||||
| <primary> "." <identifier>
|
||||
| <primary> "[" <expression> "]"
|
||||
| <primary> "[" [<expression>] ":" [<expression>] [":" [<expression>]] "]"
|
||||
| <primary> "(" [<argument_list>] ")"
|
||||
|
||||
<atom> ::= <identifier>
|
||||
| "$" <identifier>
|
||||
| <literal>
|
||||
| "(" <expression> ")"
|
||||
| <list_display>
|
||||
| <dict_display>
|
||||
|
||||
/* Estructuras de Datos, Comprensiones y Argumentos */
|
||||
<list_display> ::= "[" [<argument_list>] "]"
|
||||
| "[" <expression> "for" <identifier> "in" <expression> [<if_clause>] "]"
|
||||
<if_clause> ::= "if" <expression>
|
||||
<dict_display> ::= "{" [<key_datum_list>] "}"
|
||||
<key_datum_list> ::= <key_datum> ( "," <key_datum> )*
|
||||
<key_datum> ::= <expression> ":" <expression>
|
||||
<argument_list> ::= <expression> ( "," <expression> )*
|
||||
|
||||
/* Tipo numérico unificado */
|
||||
<number> ::= <floatnumber> | <integer>
|
||||
|
||||
/* Literales (Tipos de Datos Primitivos Soportados) */
|
||||
<literal> ::= <stringliteral> | <number> | <boolean> | "None"
|
||||
<boolean> ::= "True" | "False"
|
||||
<integer> ::= [0-9]+
|
||||
<floatnumber> ::= [0-9]+ "." [0-9]* | "." [0-9]+
|
||||
|
||||
/* Cadenas de Texto con soporte de secuencias de escape */
|
||||
<stringliteral> ::= "\"" <text_double> "\"" | "'" <text_single> "'"
|
||||
<escape_sequence> ::= "\\" ( "\"" | "'" | "\\" | "n" | "t" | "r" | "0" )
|
||||
<text_double> ::= ( [^"\\] | <escape_sequence> )*
|
||||
<text_single> ::= ( [^'\\] | <escape_sequence> )*
|
||||
<identifier_or_string> ::= <identifier> | <stringliteral>
|
||||
|
||||
/* Reglas de Comentarios para el Lexer
|
||||
El lexer aplica longest-match: /// debe evaluarse ANTES que // */
|
||||
<doc_comment> ::= "///" <any_text>
|
||||
<line_comment> ::= "//" <any_text>
|
||||
<block_comment> ::= "/*" <any_content> "*/"
|
||||
<any_text> ::= [^\r\n]*
|
||||
<any_content> ::= /* Cualquier secuencia de caracteres que no contenga la subcadena "*/" */
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
<program> ::= ( <line> | <block_comment> )*
|
||||
<line> ::= [ <statement> ] [ <line_comment> | <doc_comment> ] <EOL>
|
||||
| ( <line_comment> | <doc_comment> ) <EOL>
|
||||
<EOL> ::= /* Retorno de carro / Salto de línea (\n o \r\n) */
|
||||
|
||||
<statement> ::= <assignment>
|
||||
| <method_call_stmt>
|
||||
| <function_call_stmt>
|
||||
| <function_decl>
|
||||
| <return_stmt>
|
||||
| <system_command>
|
||||
| <io_command>
|
||||
| <control_flow>
|
||||
| <async_command>
|
||||
| <connector_cmd>
|
||||
| <db_command>
|
||||
| <http_command>
|
||||
| <util_command>
|
||||
| <modularity_cmd>
|
||||
|
||||
<assignment> ::= <identifier> "=" <expression>
|
||||
|
||||
/* Llamada a función global (sin receptor de objeto) */
|
||||
<function_call_stmt> ::= <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
/* Llamada a método sobre un objeto conector (con receptor) */
|
||||
<method_call_stmt> ::= <identifier> "=" <identifier> "." <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
<system_command> ::= <register_cmd> | <addvar_cmd>
|
||||
<register_cmd> ::= "registerEndpoint(" <stringliteral> "," <stringliteral> "," <list_display> "," <stringliteral> "," <identifier> "," <identifier> ")"
|
||||
/* addVar asigna un valor a una variable. Acepta (valor, variable) o (variable, valor).
|
||||
Si ambos argumentos son identificadores, el valor del segundo se asigna al primero.
|
||||
No está permitido pasar dos literales como argumentos. */
|
||||
<addvar_cmd> ::= "addVar(" <addvar_arg> "," <addvar_arg> ")"
|
||||
<addvar_arg> ::= <identifier> | <literal> | "$" <identifier>
|
||||
/* Restricción semántica: al menos uno de los dos <addvar_arg> debe ser <identifier> */
|
||||
|
||||
<identifier> ::= [a-zA-Z_] [a-zA-Z0-9_]*
|
||||
|
||||
/* Variables de sistema reservadas — accesibles y asignables desde cualquier scope:
|
||||
_status — código HTTP de respuesta (ej. addVar(_status, 401) o _status = 404) */
|
||||
<system_variable> ::= "_status"
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
<io_command> ::= <addparam_cmd> | <getlistlen_cmd> | <addresult_cmd> | <getparamlist_cmd>
|
||||
<addparam_cmd> ::= "addParam(" <stringliteral> "," <identifier> ")"
|
||||
<getlistlen_cmd> ::= "getListLen(" <identifier> "," <identifier> ")"
|
||||
<getparamlist_cmd> ::= "getQueryParamList(" <stringliteral> "," <identifier> ")"
|
||||
<addresult_cmd> ::= "addResult(" <identifier> ")"
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
<control_flow> ::= <if_stmt> | <loop_stmt> | <try_stmt>
|
||||
|
||||
<if_stmt> ::= "if(" <if_condition> ")" <EOL>
|
||||
<block>
|
||||
[ "else()" <EOL> <block> ]
|
||||
"end()" <EOL>
|
||||
|
||||
/* if() soporta dos modos:
|
||||
Modo 1 — comparación estructurada: los dos primeros argumentos deben ser
|
||||
identificadores simples o literales, nunca expresiones de acceso.
|
||||
Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']),
|
||||
debe asignarse previamente a una variable.
|
||||
Modo 2 — expresión libre: None, None, expresión compleja como string */
|
||||
<if_condition> ::= <if_atom> "," <if_atom> "," <stringliteral>
|
||||
| "None" "," "None" "," <stringliteral>
|
||||
<if_atom> ::= <identifier> | <literal>
|
||||
|
||||
<loop_stmt> ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL>
|
||||
<block>
|
||||
"endLoop()" <EOL>
|
||||
|
||||
<try_stmt> ::= "try()" <EOL>
|
||||
<block>
|
||||
"exception(" <identifier> ")" <EOL>
|
||||
<block>
|
||||
"end()" <EOL>
|
||||
|
||||
<block> ::= <line>*
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
<async_command> ::= <go_stmt> | <gather_stmt>
|
||||
<go_stmt> ::= <identifier> "=" "go" <identifier> "(" [<argument_list>] ")"
|
||||
<gather_stmt> ::= <identifier> "=" "gather(" <identifier> ["," <expression>] ")"
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
/* Instanciación de conector de terceros y llamada a sus métodos dinámicos */
|
||||
<connector_cmd> ::= <connector_instantiation> | <connector_method_call>
|
||||
<connector_instantiation> ::= <identifier> "=" "avapConnector(" <stringliteral> ")"
|
||||
<connector_method_call> ::= [ <identifier> "=" ] <identifier> "." <identifier> "(" [<argument_list>] ")"
|
||||
|
||||
/* Cliente HTTP con Timeout Obligatorio */
|
||||
<http_command> ::= <req_post_cmd> | <req_get_cmd>
|
||||
<req_post_cmd> ::= "RequestPost(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
|
||||
<req_get_cmd> ::= "RequestGet(" <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
|
||||
|
||||
/* ORM y Persistencia (Estandarizado con tableName) */
|
||||
<db_command> ::= <orm_direct> | <orm_check> | <orm_create> | <orm_select> | <orm_insert> | <orm_update>
|
||||
<orm_direct> ::= "ormDirect(" <expression> "," <identifier> ")"
|
||||
<orm_check> ::= "ormCheckTable(" <expression> "," <identifier> ")"
|
||||
<orm_create> ::= "ormCreateTable(" <expression> "," <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* ormAccessSelect(fields, tableName, selector, varTarget) */
|
||||
<orm_select> ::= "ormAccessSelect(" <orm_fields> "," <expression> "," [<expression>] "," <identifier> ")"
|
||||
<orm_fields> ::= "*" | <expression>
|
||||
|
||||
/* ormAccessInsert(fieldsValues, tableName, varTarget) */
|
||||
<orm_insert> ::= "ormAccessInsert(" <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* ormAccessUpdate(fields, fieldsValues, tableName, selector, varTarget) */
|
||||
<orm_update> ::= "ormAccessUpdate(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> ")"
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */
|
||||
<util_command> ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
|
||||
|
||||
/* Manipulación de listas y JSON */
|
||||
<json_list_cmd> ::= "variableToList(" <expression> "," <identifier> ")"
|
||||
| "itemFromList(" <identifier> "," <expression> "," <identifier> ")"
|
||||
| "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")"
|
||||
| "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")"
|
||||
|
||||
/* Criptografía */
|
||||
<crypto_cmd> ::= "encodeSHA256(" <identifier_or_string> "," <identifier> ")"
|
||||
| "encodeMD5(" <identifier_or_string> "," <identifier> ")"
|
||||
|
||||
/* Expresiones regulares */
|
||||
<regex_cmd> ::= "getRegex(" <identifier> "," <stringliteral> "," <identifier> ")"
|
||||
|
||||
<datetime_cmd> ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")"
|
||||
/* Argumentos: formato_salida, epoch_origen, zona_horaria, destino */
|
||||
|
||||
<stamp_cmd> ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")"
|
||||
/* Argumentos: epoch_origen, formato, timedelta, destino */
|
||||
| "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")"
|
||||
/* Argumentos: fecha_string, formato_entrada, timedelta, destino */
|
||||
|
||||
<string_cmd> ::= "randomString(" <expression> "," <identifier> ")"
|
||||
/* Argumentos: longitud, destino */
|
||||
|
||||
<replace_cmd> ::= "replace(" <identifier_or_string> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
|
||||
/* Argumentos: origen, patron_busqueda, reemplazo, destino */
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
/* Nota: las funciones utilizan llaves {} como delimitadores de bloque por decisión
|
||||
arquitectónica explícita, diferenciándose de las estructuras de control (if, loop, try)
|
||||
que usan palabras clave de cierre (end(), endLoop()). Ambos patrones coexisten
|
||||
en la gramática y el parser los distingue por el token de apertura. */
|
||||
<function_decl> ::= "function" <identifier> "(" [<param_list>] ")" "{" <EOL>
|
||||
<block>
|
||||
"}" <EOL>
|
||||
<param_list> ::= <identifier> ("," <identifier>)*
|
||||
<return_stmt> ::= "return(" [<expression>] ")"
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
<modularity_cmd> ::= <include_stmt> | <import_stmt>
|
||||
<include_stmt> ::= "include" " " <stringliteral>
|
||||
<import_stmt> ::= "import" " " ( "<" <identifier> ">" | <stringliteral> )
|
||||
|
|
@ -0,0 +1,62 @@
|
|||
/* Jerarquía de Expresiones (Precedencia de menor a mayor) */
|
||||
<expression> ::= <logical_or>
|
||||
<logical_or> ::= <logical_and> ( "or" <logical_and> )*
|
||||
<logical_and> ::= <logical_not> ( "and" <logical_not> )*
|
||||
<logical_not> ::= "not" <logical_not> | <comparison>
|
||||
|
||||
<comparison> ::= <arithmetic> ( <comp_op> <arithmetic> )*
|
||||
<comp_op> ::= "==" | "!=" | "<" | ">" | "<=" | ">=" | "in" | "is"
|
||||
|
||||
<arithmetic> ::= <term> ( ( "+" | "-" ) <term> )*
|
||||
<term> ::= <factor> ( ( "*" | "/" | "%" ) <factor> )*
|
||||
<factor> ::= ( "+" | "-" ) <factor> | <power>
|
||||
<power> ::= <primary> [ "**" <factor> ]
|
||||
|
||||
/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones)
|
||||
La regla <primary> cubre también el acceso a métodos de objetos conector
|
||||
(conector.metodo(...)) y el acceso por clave a sus resultados (resultado["key"]) */
|
||||
<primary> ::= <atom>
|
||||
| <primary> "." <identifier>
|
||||
| <primary> "[" <expression> "]"
|
||||
| <primary> "[" [<expression>] ":" [<expression>] [":" [<expression>]] "]"
|
||||
| <primary> "(" [<argument_list>] ")"
|
||||
|
||||
<atom> ::= <identifier>
|
||||
| "$" <identifier>
|
||||
| <literal>
|
||||
| "(" <expression> ")"
|
||||
| <list_display>
|
||||
| <dict_display>
|
||||
|
||||
/* Estructuras de Datos, Comprensiones y Argumentos */
|
||||
<list_display> ::= "[" [<argument_list>] "]"
|
||||
| "[" <expression> "for" <identifier> "in" <expression> [<if_clause>] "]"
|
||||
<if_clause> ::= "if" <expression>
|
||||
<dict_display> ::= "{" [<key_datum_list>] "}"
|
||||
<key_datum_list> ::= <key_datum> ( "," <key_datum> )*
|
||||
<key_datum> ::= <expression> ":" <expression>
|
||||
<argument_list> ::= <expression> ( "," <expression> )*
|
||||
|
||||
/* Tipo numérico unificado */
|
||||
<number> ::= <floatnumber> | <integer>
|
||||
|
||||
/* Literales (Tipos de Datos Primitivos Soportados) */
|
||||
<literal> ::= <stringliteral> | <number> | <boolean> | "None"
|
||||
<boolean> ::= "True" | "False"
|
||||
<integer> ::= [0-9]+
|
||||
<floatnumber> ::= [0-9]+ "." [0-9]* | "." [0-9]+
|
||||
|
||||
/* Cadenas de Texto con soporte de secuencias de escape */
|
||||
<stringliteral> ::= "\"" <text_double> "\"" | "'" <text_single> "'"
|
||||
<escape_sequence> ::= "\\" ( "\"" | "'" | "\\" | "n" | "t" | "r" | "0" )
|
||||
<text_double> ::= ( [^"\\] | <escape_sequence> )*
|
||||
<text_single> ::= ( [^'\\] | <escape_sequence> )*
|
||||
<identifier_or_string> ::= <identifier> | <stringliteral>
|
||||
|
||||
/* Reglas de Comentarios para el Lexer
|
||||
El lexer aplica longest-match: /// debe evaluarse ANTES que // */
|
||||
<doc_comment> ::= "///" <any_text>
|
||||
<line_comment> ::= "//" <any_text>
|
||||
<block_comment> ::= "/*" <any_content> "*/"
|
||||
<any_text> ::= [^\r\n]*
|
||||
<any_content> ::= /* Cualquier secuencia de caracteres que no contenga la subcadena "*/" */
|
||||
|
|
@ -2,7 +2,7 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "0a8abbfa",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
|
@ -24,7 +24,6 @@
|
|||
"from dataclasses import dataclass\n",
|
||||
"from pathlib import Path\n",
|
||||
"from typing import Any, Dict, List, Optional, Tuple\n",
|
||||
"# from bnf import grammar\n",
|
||||
"import nltk\n",
|
||||
"from elasticsearch import Elasticsearch\n",
|
||||
"from langchain_core.documents import Document\n",
|
||||
|
|
@ -185,7 +184,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "26ad9c81",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
|
@ -209,7 +208,7 @@
|
|||
}
|
||||
],
|
||||
"source": [
|
||||
"grammar_ = (DATA_DIR / \"raw\" / \"code\" / \"BNF_v1.txt\").read_text(\n",
|
||||
"grammar_ = (settings.data_path / \"raw\" / \"code\" / \"BNF_v1.txt\").read_text(\n",
|
||||
" encoding=\"utf-8\"\n",
|
||||
")\n",
|
||||
"grammar(grammar_)"
|
||||
|
|
|
|||
|
|
@ -2,24 +2,55 @@
|
|||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"execution_count": 1,
|
||||
"id": "5b646fb1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"! uv pip install bnf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "274d6d68",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"! uv pip install ebnf"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "0a8abbfa",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"from dataclasses import dataclass\n",
|
||||
"\n",
|
||||
"from typing import Any, Dict, List, Optional, Tuple\n",
|
||||
"\n",
|
||||
"from lark import Tree, Lark\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"from bnf import grammar as bnf_grammar, parse as bnf_parse\n",
|
||||
"from ebnf import grammar as ebnf_grammar, parse as ebnf_parse\n",
|
||||
"\n",
|
||||
"from src.config import DATA_DIR"
|
||||
"from src.config import settings"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -0,0 +1,244 @@
|
|||
"""
|
||||
Generator for BNF specification files from AVAP documentation.
|
||||
|
||||
This script extracts BNF specifications from the AVAP Language Reference Manual (LRM)
|
||||
and generates individual text files for each BNF section.
|
||||
|
||||
Output format: n0X_BNF.txt (where X is the section number)
|
||||
Default output directory: ingestion/code/BNF/
|
||||
Default markdown source: docs/LRM/avap.md
|
||||
|
||||
USAGE EXAMPLES:
|
||||
|
||||
Use default configuration:
|
||||
python scripts/pipelines/flows/bnf_files_generator.py
|
||||
|
||||
Customize input and output paths:
|
||||
python scripts/pipelines/flows/bnf_files_generator.py --markdown docs/LRM/avap.md --output ingestion/code
|
||||
python scripts/pipelines/flows/bnf_files_generator.py -m docs/LRM/avap.md -o ingestion/code
|
||||
|
||||
OPTIONS:
|
||||
--markdown, -m: Path to the AVAP markdown file (relative to project root)
|
||||
--output, -o: Output directory for BNF files (relative to project root)
|
||||
"""
|
||||
|
||||
import re
|
||||
import typer
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
app = typer.Typer()
|
||||
|
||||
|
||||
class BNFExtractor:
|
||||
"""Extract BNF specifications from AVAP markdown documentation."""
|
||||
|
||||
def __init__(self, markdown_file: Path, output_dir: Path):
|
||||
"""
|
||||
Initialize BNF extractor.
|
||||
|
||||
Args:
|
||||
markdown_file: Path to the AVAP markdown file
|
||||
output_dir: Directory where BNF files will be saved
|
||||
"""
|
||||
self.markdown_file = markdown_file
|
||||
self.output_dir = output_dir
|
||||
self.bnf_sections: List[Tuple[int, str, str]] = []
|
||||
|
||||
@staticmethod
|
||||
def _roman_to_int(roman: str) -> int:
|
||||
"""
|
||||
Convert Roman numerals to integers.
|
||||
|
||||
Args:
|
||||
roman: Roman numeral string (e.g., 'I', 'IV', 'IX', 'XII')
|
||||
|
||||
Returns:
|
||||
Integer value of the Roman numeral
|
||||
"""
|
||||
roman_values = {
|
||||
'I': 1, 'V': 5, 'X': 10, 'L': 50,
|
||||
'C': 100, 'D': 500, 'M': 1000
|
||||
}
|
||||
total = 0
|
||||
prev_value = 0
|
||||
|
||||
for char in reversed(roman):
|
||||
value = roman_values.get(char, 0)
|
||||
if value < prev_value:
|
||||
total -= value
|
||||
else:
|
||||
total += value
|
||||
prev_value = value
|
||||
|
||||
return total
|
||||
|
||||
def read_markdown_file(self) -> str:
|
||||
"""Read the markdown file content."""
|
||||
with open(self.markdown_file, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
def extract_bnf_sections(self, content: str) -> List[Tuple[int, str, str]]:
|
||||
"""
|
||||
Extract all BNF specifications from markdown content.
|
||||
|
||||
Pattern: ### Especificación BNF (Sección I)
|
||||
```bnf
|
||||
... BNF content ...
|
||||
```
|
||||
|
||||
Args:
|
||||
content: Markdown file content
|
||||
|
||||
Returns:
|
||||
List of tuples: (section_number, section_title, bnf_content)
|
||||
"""
|
||||
bnf_sections = []
|
||||
|
||||
# Pattern to find BNF specification headers and extract Roman numerals
|
||||
# Matches: ### Especificación BNF (Sección I), (Sección II), etc.
|
||||
header_pattern = r"### Especificación BNF \(Sección ([IVXLCDM]+)\)"
|
||||
|
||||
# Find all BNF headers with their positions
|
||||
for match in re.finditer(header_pattern, content):
|
||||
roman_numeral = match.group(1)
|
||||
section_number = self._roman_to_int(roman_numeral)
|
||||
header_start = match.start()
|
||||
header_end = match.end()
|
||||
|
||||
# Find the code block after this header
|
||||
code_block_pattern = r"```bnf\n(.*?)```"
|
||||
search_start = header_end
|
||||
|
||||
code_match = re.search(code_block_pattern, content[search_start:], re.DOTALL)
|
||||
|
||||
if code_match:
|
||||
bnf_content = code_match.group(1).strip()
|
||||
section_title = f"Especificación BNF - Sección {roman_numeral}"
|
||||
bnf_sections.append((section_number, section_title, bnf_content))
|
||||
|
||||
self.bnf_sections = bnf_sections
|
||||
return bnf_sections
|
||||
|
||||
def format_bnf_file_content(self, section_number: int, title: str, bnf_content: str) -> str:
|
||||
"""
|
||||
Format BNF content for file output.
|
||||
|
||||
Args:
|
||||
section_number: Section number (1-9, etc.)
|
||||
title: Section title
|
||||
bnf_content: Raw BNF grammar content
|
||||
|
||||
Returns:
|
||||
BNF content without additional formatting
|
||||
"""
|
||||
return bnf_content
|
||||
|
||||
def save_bnf_files(self) -> int:
|
||||
"""
|
||||
Save extracted BNF sections to individual files.
|
||||
|
||||
File naming convention: n0X_BNF.txt (e.g., n01_BNF.txt, n02_BNF.txt, etc.)
|
||||
|
||||
Returns:
|
||||
Number of files created
|
||||
"""
|
||||
# Ensure output directory exists
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
files_created = 0
|
||||
|
||||
for section_number, title, bnf_content in self.bnf_sections:
|
||||
# Format filename with zero-padded section number
|
||||
filename = f"n{section_number:02d}_BNF.txt"
|
||||
filepath = self.output_dir / filename
|
||||
|
||||
# Format and write file content
|
||||
formatted_content = self.format_bnf_file_content(
|
||||
section_number, title, bnf_content
|
||||
)
|
||||
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(formatted_content)
|
||||
|
||||
print(f"Created: {filepath}")
|
||||
files_created += 1
|
||||
|
||||
return files_created
|
||||
|
||||
def generate(self) -> Tuple[int, List[str]]:
|
||||
"""
|
||||
Execute the complete BNF extraction and file generation process.
|
||||
|
||||
Returns:
|
||||
Tuple of (number_of_files_created, list_of_file_paths)
|
||||
"""
|
||||
print(f"Reading markdown file: {self.markdown_file}")
|
||||
content = self.read_markdown_file()
|
||||
|
||||
print(f"Extracting BNF specifications...")
|
||||
bnf_sections = self.extract_bnf_sections(content)
|
||||
|
||||
print(f"Found {len(bnf_sections)} BNF sections:")
|
||||
for section_number, title, _ in bnf_sections:
|
||||
print(f" - {title}")
|
||||
|
||||
print(f"\nSaving BNF files to: {self.output_dir}")
|
||||
files_created = self.save_bnf_files()
|
||||
|
||||
# Generate list of created file paths
|
||||
file_paths = [
|
||||
str(self.output_dir / f"n{i:02d}_BNF.txt")
|
||||
for i, _, _ in bnf_sections
|
||||
]
|
||||
|
||||
return files_created, file_paths
|
||||
|
||||
|
||||
@app.command()
|
||||
def main(
|
||||
markdown_file: str = typer.Option(
|
||||
"docs/LRM/avap.md",
|
||||
"--markdown",
|
||||
"-m",
|
||||
help="Path to AVAP markdown file (relative to project root)"
|
||||
),
|
||||
output_dir: str = typer.Option(
|
||||
"ingestion/code/BNF/",
|
||||
"--output",
|
||||
"-o",
|
||||
help="Output directory for BNF files (relative to project root)"
|
||||
)
|
||||
):
|
||||
"""Extract BNF specifications from AVAP documentation.
|
||||
|
||||
Default behavior:
|
||||
- Reads from: docs/LRM/avap.md
|
||||
- Writes to: ingestion/code/BNF/
|
||||
"""
|
||||
# Get project root directory (scripts/pipelines/flows -> project root)
|
||||
script_dir = Path(__file__).parent
|
||||
project_root = script_dir.parent.parent.parent
|
||||
|
||||
# Convert relative paths to absolute
|
||||
markdown_path = project_root / markdown_file
|
||||
output_path = project_root / output_dir
|
||||
|
||||
# Verify markdown file exists
|
||||
if not markdown_path.exists():
|
||||
typer.echo(f"Error: Markdown file not found: {markdown_path}", err=True)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Create extractor and generate files
|
||||
extractor = BNFExtractor(markdown_path, output_path)
|
||||
files_created, file_paths = extractor.generate()
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print(f"BNF extraction complete!")
|
||||
print(f"Total files created: {files_created}")
|
||||
print(f"Output directory: {output_path}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
Loading…
Reference in New Issue