BNF extraction pipeline from avap.md

2026-03-11 11:29:19 +01:00 · 2026-03-11 11:29:19 +01:00 · 3ac432567b
parent cd3922abbd
commit 3ac432567b
21 changed files with 699 additions and 13 deletions
--- a/ingestion/code/BNF/n01_BNF.txt
+++ b/ingestion/code/BNF/n01_BNF.txt
@ -0,0 +1,42 @@
+<program>          ::= ( <line> | <block_comment> )*
+<line>             ::= [ <statement> ] [ <line_comment> | <doc_comment> ] <EOL>
+                     | ( <line_comment> | <doc_comment> ) <EOL>
+<EOL>              ::= /* Retorno de carro / Salto de línea (\n o \r\n) */
+
+<statement>        ::= <assignment>
+                     | <method_call_stmt>
+                     | <function_call_stmt>
+                     | <function_decl>
+                     | <return_stmt>
+                     | <system_command>
+                     | <io_command>
+                     | <control_flow>
+                     | <async_command>
+                     | <connector_cmd>
+                     | <db_command>
+                     | <http_command>
+                     | <util_command>
+                     | <modularity_cmd>
+
+<assignment>       ::= <identifier> "=" <expression>
+
+/* Llamada a función global (sin receptor de objeto) */
+<function_call_stmt> ::= <identifier> "(" [<argument_list>] ")"
+
+/* Llamada a método sobre un objeto conector (con receptor) */
+<method_call_stmt> ::= <identifier> "=" <identifier> "." <identifier> "(" [<argument_list>] ")"
+
+<system_command>   ::= <register_cmd> | <addvar_cmd>
+<register_cmd>     ::= "registerEndpoint(" <stringliteral> "," <stringliteral> "," <list_display> "," <stringliteral> "," <identifier> "," <identifier> ")"
+/* addVar asigna un valor a una variable. Acepta (valor, variable) o (variable, valor).
+   Si ambos argumentos son identificadores, el valor del segundo se asigna al primero.
+   No está permitido pasar dos literales como argumentos. */
+<addvar_cmd>       ::= "addVar(" <addvar_arg> "," <addvar_arg> ")"
+<addvar_arg>       ::= <identifier> | <literal> | "$" <identifier>
+/* Restricción semántica: al menos uno de los dos <addvar_arg> debe ser <identifier> */
+
+<identifier>       ::= [a-zA-Z_] [a-zA-Z0-9_]*
+
+/* Variables de sistema reservadas — accesibles y asignables desde cualquier scope:
+   _status  — código HTTP de respuesta (ej. addVar(_status, 401) o _status = 404) */
+<system_variable>  ::= "_status"
--- a/ingestion/code/BNF/n02_BNF.txt
+++ b/ingestion/code/BNF/n02_BNF.txt
@ -0,0 +1,5 @@
+<io_command>       ::= <addparam_cmd> | <getlistlen_cmd> | <addresult_cmd> | <getparamlist_cmd>
+<addparam_cmd>     ::= "addParam(" <stringliteral> "," <identifier> ")"
+<getlistlen_cmd>   ::= "getListLen(" <identifier> "," <identifier> ")"
+<getparamlist_cmd> ::= "getQueryParamList(" <stringliteral> "," <identifier> ")"
+<addresult_cmd>    ::= "addResult(" <identifier> ")"
--- a/ingestion/code/BNF/n03_BNF.txt
+++ b/ingestion/code/BNF/n03_BNF.txt
@ -0,0 +1,28 @@
+<control_flow>    ::= <if_stmt> | <loop_stmt> | <try_stmt>
+
+<if_stmt>         ::= "if(" <if_condition> ")" <EOL>
+                        <block>
+                      [ "else()" <EOL> <block> ]
+                      "end()" <EOL>
+
+/* if() soporta dos modos:
+   Modo 1 — comparación estructurada: los dos primeros argumentos deben ser
+             identificadores simples o literales, nunca expresiones de acceso.
+             Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']),
+             debe asignarse previamente a una variable.
+   Modo 2 — expresión libre: None, None, expresión compleja como string */
+<if_condition>    ::= <if_atom> "," <if_atom> "," <stringliteral>
+                    | "None" "," "None" "," <stringliteral>
+<if_atom>         ::= <identifier> | <literal>
+
+<loop_stmt>       ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL>
+                        <block>
+                      "endLoop()" <EOL>
+
+<try_stmt>        ::= "try()" <EOL>
+                        <block>
+                      "exception(" <identifier> ")" <EOL>
+                        <block>
+                      "end()" <EOL>
+
+<block>           ::= <line>*
--- a/ingestion/code/BNF/n04_BNF.txt
+++ b/ingestion/code/BNF/n04_BNF.txt
@ -0,0 +1,3 @@
+<async_command>   ::= <go_stmt> | <gather_stmt>
+<go_stmt>         ::= <identifier> "=" "go" <identifier> "(" [<argument_list>] ")"
+<gather_stmt>     ::= <identifier> "=" "gather(" <identifier> ["," <expression>] ")"
--- a/ingestion/code/BNF/n05_BNF.txt
+++ b/ingestion/code/BNF/n05_BNF.txt
@ -0,0 +1,25 @@
+/* Instanciación de conector de terceros y llamada a sus métodos dinámicos */
+<connector_cmd>        ::= <connector_instantiation> | <connector_method_call>
+<connector_instantiation> ::= <identifier> "=" "avapConnector(" <stringliteral> ")"
+<connector_method_call>   ::= [ <identifier> "=" ] <identifier> "." <identifier> "(" [<argument_list>] ")"
+
+/* Cliente HTTP con Timeout Obligatorio */
+<http_command>    ::= <req_post_cmd> | <req_get_cmd>
+<req_post_cmd>    ::= "RequestPost(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
+<req_get_cmd>     ::= "RequestGet(" <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
+
+/* ORM y Persistencia (Estandarizado con tableName) */
+<db_command>      ::= <orm_direct> | <orm_check> | <orm_create> | <orm_select> | <orm_insert> | <orm_update>
+<orm_direct>      ::= "ormDirect(" <expression> "," <identifier> ")"
+<orm_check>       ::= "ormCheckTable(" <expression> "," <identifier> ")"
+<orm_create>      ::= "ormCreateTable(" <expression> "," <expression> "," <expression> "," <identifier> ")"
+
+/* ormAccessSelect(fields, tableName, selector, varTarget) */
+<orm_select>      ::= "ormAccessSelect(" <orm_fields> "," <expression> "," [<expression>] "," <identifier> ")"
+<orm_fields>      ::= "*" | <expression>
+
+/* ormAccessInsert(fieldsValues, tableName, varTarget) */
+<orm_insert>      ::= "ormAccessInsert(" <expression> "," <expression> "," <identifier> ")"
+
+/* ormAccessUpdate(fields, fieldsValues, tableName, selector, varTarget) */
+<orm_update>      ::= "ormAccessUpdate(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> ")"
--- a/ingestion/code/BNF/n06_BNF.txt
+++ b/ingestion/code/BNF/n06_BNF.txt
@ -0,0 +1,29 @@
+/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */
+<util_command>    ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
+
+/* Manipulación de listas y JSON */
+<json_list_cmd>   ::= "variableToList(" <expression> "," <identifier> ")"
+                    | "itemFromList(" <identifier> "," <expression> "," <identifier> ")"
+                    | "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")"
+                    | "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")"
+
+/* Criptografía */
+<crypto_cmd>      ::= "encodeSHA256(" <identifier_or_string> "," <identifier> ")"
+                    | "encodeMD5(" <identifier_or_string> "," <identifier> ")"
+
+/* Expresiones regulares */
+<regex_cmd>       ::= "getRegex(" <identifier> "," <stringliteral> "," <identifier> ")"
+
+<datetime_cmd>    ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")"
+/*  Argumentos: formato_salida, epoch_origen, zona_horaria, destino */
+
+<stamp_cmd>       ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")"
+/*  Argumentos: epoch_origen, formato, timedelta, destino */
+                    | "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")"
+/*  Argumentos: fecha_string, formato_entrada, timedelta, destino */
+
+<string_cmd>      ::= "randomString(" <expression> "," <identifier> ")"
+/*  Argumentos: longitud, destino */
+
+<replace_cmd>     ::= "replace(" <identifier_or_string> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
+/*  Argumentos: origen, patron_busqueda, reemplazo, destino */
--- a/ingestion/code/BNF/n07_BNF.txt
+++ b/ingestion/code/BNF/n07_BNF.txt
@ -0,0 +1,9 @@
+/* Nota: las funciones utilizan llaves {} como delimitadores de bloque por decisión
+   arquitectónica explícita, diferenciándose de las estructuras de control (if, loop, try)
+   que usan palabras clave de cierre (end(), endLoop()). Ambos patrones coexisten
+   en la gramática y el parser los distingue por el token de apertura. */
+<function_decl>   ::= "function" <identifier> "(" [<param_list>] ")" "{" <EOL>
+                        <block>
+                      "}" <EOL>
+<param_list>      ::= <identifier> ("," <identifier>)*
+<return_stmt>     ::= "return(" [<expression>] ")"
--- a/ingestion/code/BNF/n08_BNF.txt
+++ b/ingestion/code/BNF/n08_BNF.txt
@ -0,0 +1,3 @@
+<modularity_cmd>  ::= <include_stmt> | <import_stmt>
+<include_stmt>    ::= "include" " " <stringliteral>
+<import_stmt>     ::= "import" " " ( "<" <identifier> ">" | <stringliteral> )
--- a/ingestion/code/BNF/n09_BNF.txt
+++ b/ingestion/code/BNF/n09_BNF.txt
@ -0,0 +1,62 @@
+/* Jerarquía de Expresiones (Precedencia de menor a mayor) */
+<expression>       ::= <logical_or>
+<logical_or>       ::= <logical_and> ( "or" <logical_and> )*
+<logical_and>      ::= <logical_not> ( "and" <logical_not> )*
+<logical_not>      ::= "not" <logical_not> | <comparison>
+
+<comparison>       ::= <arithmetic> ( <comp_op> <arithmetic> )*
+<comp_op>          ::= "==" | "!=" | "<" | ">" | "<=" | ">=" | "in" | "is"
+
+<arithmetic>       ::= <term> ( ( "+" | "-" ) <term> )*
+<term>             ::= <factor> ( ( "*" | "/" | "%" ) <factor> )*
+<factor>           ::= ( "+" | "-" ) <factor> | <power>
+<power>            ::= <primary> [ "**" <factor> ]
+
+/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones)
+   La regla <primary> cubre también el acceso a métodos de objetos conector
+   (conector.metodo(...)) y el acceso por clave a sus resultados (resultado["key"]) */
+<primary>          ::= <atom>
+                     | <primary> "." <identifier>
+                     | <primary> "[" <expression> "]"
+                     | <primary> "[" [<expression>] ":" [<expression>] [":" [<expression>]] "]"
+                     | <primary> "(" [<argument_list>] ")"
+
+<atom>             ::= <identifier>
+                     | "$" <identifier>
+                     | <literal>
+                     | "(" <expression> ")"
+                     | <list_display>
+                     | <dict_display>
+
+/* Estructuras de Datos, Comprensiones y Argumentos */
+<list_display>     ::= "[" [<argument_list>] "]"
+                     | "[" <expression> "for" <identifier> "in" <expression> [<if_clause>] "]"
+<if_clause>        ::= "if" <expression>
+<dict_display>     ::= "{" [<key_datum_list>] "}"
+<key_datum_list>   ::= <key_datum> ( "," <key_datum> )*
+<key_datum>        ::= <expression> ":" <expression>
+<argument_list>    ::= <expression> ( "," <expression> )*
+
+/* Tipo numérico unificado */
+<number>           ::= <floatnumber> | <integer>
+
+/* Literales (Tipos de Datos Primitivos Soportados) */
+<literal>          ::= <stringliteral> | <number> | <boolean> | "None"
+<boolean>          ::= "True" | "False"
+<integer>          ::= [0-9]+
+<floatnumber>      ::= [0-9]+ "." [0-9]* | "." [0-9]+
+
+/* Cadenas de Texto con soporte de secuencias de escape */
+<stringliteral>    ::= "\"" <text_double> "\"" | "'" <text_single> "'"
+<escape_sequence>  ::= "\\" ( "\"" | "'" | "\\" | "n" | "t" | "r" | "0" )
+<text_double>      ::= ( [^"\\] | <escape_sequence> )*
+<text_single>      ::= ( [^'\\] | <escape_sequence> )*
+<identifier_or_string> ::= <identifier> | <stringliteral>
+
+/* Reglas de Comentarios para el Lexer
+   El lexer aplica longest-match: /// debe evaluarse ANTES que // */
+<doc_comment>      ::= "///" <any_text>
+<line_comment>     ::= "//" <any_text>
+<block_comment>    ::= "/*" <any_content> "*/"
+<any_text>         ::= [^\r\n]*
+<any_content>      ::= /* Cualquier secuencia de caracteres que no contenga la subcadena "*/" */
--- a/ingestion/code/n01_BNF.txt
+++ b/ingestion/code/n01_BNF.txt
@ -0,0 +1,42 @@
+<program>          ::= ( <line> | <block_comment> )*
+<line>             ::= [ <statement> ] [ <line_comment> | <doc_comment> ] <EOL>
+                     | ( <line_comment> | <doc_comment> ) <EOL>
+<EOL>              ::= /* Retorno de carro / Salto de línea (\n o \r\n) */
+
+<statement>        ::= <assignment>
+                     | <method_call_stmt>
+                     | <function_call_stmt>
+                     | <function_decl>
+                     | <return_stmt>
+                     | <system_command>
+                     | <io_command>
+                     | <control_flow>
+                     | <async_command>
+                     | <connector_cmd>
+                     | <db_command>
+                     | <http_command>
+                     | <util_command>
+                     | <modularity_cmd>
+
+<assignment>       ::= <identifier> "=" <expression>
+
+/* Llamada a función global (sin receptor de objeto) */
+<function_call_stmt> ::= <identifier> "(" [<argument_list>] ")"
+
+/* Llamada a método sobre un objeto conector (con receptor) */
+<method_call_stmt> ::= <identifier> "=" <identifier> "." <identifier> "(" [<argument_list>] ")"
+
+<system_command>   ::= <register_cmd> | <addvar_cmd>
+<register_cmd>     ::= "registerEndpoint(" <stringliteral> "," <stringliteral> "," <list_display> "," <stringliteral> "," <identifier> "," <identifier> ")"
+/* addVar asigna un valor a una variable. Acepta (valor, variable) o (variable, valor).
+   Si ambos argumentos son identificadores, el valor del segundo se asigna al primero.
+   No está permitido pasar dos literales como argumentos. */
+<addvar_cmd>       ::= "addVar(" <addvar_arg> "," <addvar_arg> ")"
+<addvar_arg>       ::= <identifier> | <literal> | "$" <identifier>
+/* Restricción semántica: al menos uno de los dos <addvar_arg> debe ser <identifier> */
+
+<identifier>       ::= [a-zA-Z_] [a-zA-Z0-9_]*
+
+/* Variables de sistema reservadas — accesibles y asignables desde cualquier scope:
+   _status  — código HTTP de respuesta (ej. addVar(_status, 401) o _status = 404) */
+<system_variable>  ::= "_status"
--- a/ingestion/code/n02_BNF.txt
+++ b/ingestion/code/n02_BNF.txt
@ -0,0 +1,5 @@
+<io_command>       ::= <addparam_cmd> | <getlistlen_cmd> | <addresult_cmd> | <getparamlist_cmd>
+<addparam_cmd>     ::= "addParam(" <stringliteral> "," <identifier> ")"
+<getlistlen_cmd>   ::= "getListLen(" <identifier> "," <identifier> ")"
+<getparamlist_cmd> ::= "getQueryParamList(" <stringliteral> "," <identifier> ")"
+<addresult_cmd>    ::= "addResult(" <identifier> ")"
--- a/ingestion/code/n03_BNF.txt
+++ b/ingestion/code/n03_BNF.txt
@ -0,0 +1,28 @@
+<control_flow>    ::= <if_stmt> | <loop_stmt> | <try_stmt>
+
+<if_stmt>         ::= "if(" <if_condition> ")" <EOL>
+                        <block>
+                      [ "else()" <EOL> <block> ]
+                      "end()" <EOL>
+
+/* if() soporta dos modos:
+   Modo 1 — comparación estructurada: los dos primeros argumentos deben ser
+             identificadores simples o literales, nunca expresiones de acceso.
+             Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']),
+             debe asignarse previamente a una variable.
+   Modo 2 — expresión libre: None, None, expresión compleja como string */
+<if_condition>    ::= <if_atom> "," <if_atom> "," <stringliteral>
+                    | "None" "," "None" "," <stringliteral>
+<if_atom>         ::= <identifier> | <literal>
+
+<loop_stmt>       ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL>
+                        <block>
+                      "endLoop()" <EOL>
+
+<try_stmt>        ::= "try()" <EOL>
+                        <block>
+                      "exception(" <identifier> ")" <EOL>
+                        <block>
+                      "end()" <EOL>
+
+<block>           ::= <line>*
--- a/ingestion/code/n04_BNF.txt
+++ b/ingestion/code/n04_BNF.txt
@ -0,0 +1,3 @@
+<async_command>   ::= <go_stmt> | <gather_stmt>
+<go_stmt>         ::= <identifier> "=" "go" <identifier> "(" [<argument_list>] ")"
+<gather_stmt>     ::= <identifier> "=" "gather(" <identifier> ["," <expression>] ")"
--- a/ingestion/code/n05_BNF.txt
+++ b/ingestion/code/n05_BNF.txt
@ -0,0 +1,25 @@
+/* Instanciación de conector de terceros y llamada a sus métodos dinámicos */
+<connector_cmd>        ::= <connector_instantiation> | <connector_method_call>
+<connector_instantiation> ::= <identifier> "=" "avapConnector(" <stringliteral> ")"
+<connector_method_call>   ::= [ <identifier> "=" ] <identifier> "." <identifier> "(" [<argument_list>] ")"
+
+/* Cliente HTTP con Timeout Obligatorio */
+<http_command>    ::= <req_post_cmd> | <req_get_cmd>
+<req_post_cmd>    ::= "RequestPost(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
+<req_get_cmd>     ::= "RequestGet(" <expression> "," <expression> "," <expression> "," <identifier> "," <expression> ")"
+
+/* ORM y Persistencia (Estandarizado con tableName) */
+<db_command>      ::= <orm_direct> | <orm_check> | <orm_create> | <orm_select> | <orm_insert> | <orm_update>
+<orm_direct>      ::= "ormDirect(" <expression> "," <identifier> ")"
+<orm_check>       ::= "ormCheckTable(" <expression> "," <identifier> ")"
+<orm_create>      ::= "ormCreateTable(" <expression> "," <expression> "," <expression> "," <identifier> ")"
+
+/* ormAccessSelect(fields, tableName, selector, varTarget) */
+<orm_select>      ::= "ormAccessSelect(" <orm_fields> "," <expression> "," [<expression>] "," <identifier> ")"
+<orm_fields>      ::= "*" | <expression>
+
+/* ormAccessInsert(fieldsValues, tableName, varTarget) */
+<orm_insert>      ::= "ormAccessInsert(" <expression> "," <expression> "," <identifier> ")"
+
+/* ormAccessUpdate(fields, fieldsValues, tableName, selector, varTarget) */
+<orm_update>      ::= "ormAccessUpdate(" <expression> "," <expression> "," <expression> "," <expression> "," <identifier> ")"
--- a/ingestion/code/n06_BNF.txt
+++ b/ingestion/code/n06_BNF.txt
@ -0,0 +1,29 @@
+/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */
+<util_command>    ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
+
+/* Manipulación de listas y JSON */
+<json_list_cmd>   ::= "variableToList(" <expression> "," <identifier> ")"
+                    | "itemFromList(" <identifier> "," <expression> "," <identifier> ")"
+                    | "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")"
+                    | "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")"
+
+/* Criptografía */
+<crypto_cmd>      ::= "encodeSHA256(" <identifier_or_string> "," <identifier> ")"
+                    | "encodeMD5(" <identifier_or_string> "," <identifier> ")"
+
+/* Expresiones regulares */
+<regex_cmd>       ::= "getRegex(" <identifier> "," <stringliteral> "," <identifier> ")"
+
+<datetime_cmd>    ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")"
+/*  Argumentos: formato_salida, epoch_origen, zona_horaria, destino */
+
+<stamp_cmd>       ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")"
+/*  Argumentos: epoch_origen, formato, timedelta, destino */
+                    | "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")"
+/*  Argumentos: fecha_string, formato_entrada, timedelta, destino */
+
+<string_cmd>      ::= "randomString(" <expression> "," <identifier> ")"
+/*  Argumentos: longitud, destino */
+
+<replace_cmd>     ::= "replace(" <identifier_or_string> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
+/*  Argumentos: origen, patron_busqueda, reemplazo, destino */
--- a/ingestion/code/n07_BNF.txt
+++ b/ingestion/code/n07_BNF.txt
@ -0,0 +1,9 @@
+/* Nota: las funciones utilizan llaves {} como delimitadores de bloque por decisión
+   arquitectónica explícita, diferenciándose de las estructuras de control (if, loop, try)
+   que usan palabras clave de cierre (end(), endLoop()). Ambos patrones coexisten
+   en la gramática y el parser los distingue por el token de apertura. */
+<function_decl>   ::= "function" <identifier> "(" [<param_list>] ")" "{" <EOL>
+                        <block>
+                      "}" <EOL>
+<param_list>      ::= <identifier> ("," <identifier>)*
+<return_stmt>     ::= "return(" [<expression>] ")"
--- a/ingestion/code/n08_BNF.txt
+++ b/ingestion/code/n08_BNF.txt
@ -0,0 +1,3 @@
+<modularity_cmd>  ::= <include_stmt> | <import_stmt>
+<include_stmt>    ::= "include" " " <stringliteral>
+<import_stmt>     ::= "import" " " ( "<" <identifier> ">" | <stringliteral> )
--- a/ingestion/code/n09_BNF.txt
+++ b/ingestion/code/n09_BNF.txt
@ -0,0 +1,62 @@
+/* Jerarquía de Expresiones (Precedencia de menor a mayor) */
+<expression>       ::= <logical_or>
+<logical_or>       ::= <logical_and> ( "or" <logical_and> )*
+<logical_and>      ::= <logical_not> ( "and" <logical_not> )*
+<logical_not>      ::= "not" <logical_not> | <comparison>
+
+<comparison>       ::= <arithmetic> ( <comp_op> <arithmetic> )*
+<comp_op>          ::= "==" | "!=" | "<" | ">" | "<=" | ">=" | "in" | "is"
+
+<arithmetic>       ::= <term> ( ( "+" | "-" ) <term> )*
+<term>             ::= <factor> ( ( "*" | "/" | "%" ) <factor> )*
+<factor>           ::= ( "+" | "-" ) <factor> | <power>
+<power>            ::= <primary> [ "**" <factor> ]
+
+/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones)
+   La regla <primary> cubre también el acceso a métodos de objetos conector
+   (conector.metodo(...)) y el acceso por clave a sus resultados (resultado["key"]) */
+<primary>          ::= <atom>
+                     | <primary> "." <identifier>
+                     | <primary> "[" <expression> "]"
+                     | <primary> "[" [<expression>] ":" [<expression>] [":" [<expression>]] "]"
+                     | <primary> "(" [<argument_list>] ")"
+
+<atom>             ::= <identifier>
+                     | "$" <identifier>
+                     | <literal>
+                     | "(" <expression> ")"
+                     | <list_display>
+                     | <dict_display>
+
+/* Estructuras de Datos, Comprensiones y Argumentos */
+<list_display>     ::= "[" [<argument_list>] "]"
+                     | "[" <expression> "for" <identifier> "in" <expression> [<if_clause>] "]"
+<if_clause>        ::= "if" <expression>
+<dict_display>     ::= "{" [<key_datum_list>] "}"
+<key_datum_list>   ::= <key_datum> ( "," <key_datum> )*
+<key_datum>        ::= <expression> ":" <expression>
+<argument_list>    ::= <expression> ( "," <expression> )*
+
+/* Tipo numérico unificado */
+<number>           ::= <floatnumber> | <integer>
+
+/* Literales (Tipos de Datos Primitivos Soportados) */
+<literal>          ::= <stringliteral> | <number> | <boolean> | "None"
+<boolean>          ::= "True" | "False"
+<integer>          ::= [0-9]+
+<floatnumber>      ::= [0-9]+ "." [0-9]* | "." [0-9]+
+
+/* Cadenas de Texto con soporte de secuencias de escape */
+<stringliteral>    ::= "\"" <text_double> "\"" | "'" <text_single> "'"
+<escape_sequence>  ::= "\\" ( "\"" | "'" | "\\" | "n" | "t" | "r" | "0" )
+<text_double>      ::= ( [^"\\] | <escape_sequence> )*
+<text_single>      ::= ( [^'\\] | <escape_sequence> )*
+<identifier_or_string> ::= <identifier> | <stringliteral>
+
+/* Reglas de Comentarios para el Lexer
+   El lexer aplica longest-match: /// debe evaluarse ANTES que // */
+<doc_comment>      ::= "///" <any_text>
+<line_comment>     ::= "//" <any_text>
+<block_comment>    ::= "/*" <any_content> "*/"
+<any_text>         ::= [^\r\n]*
+<any_content>      ::= /* Cualquier secuencia de caracteres que no contenga la subcadena "*/" */
--- a/scratches/pseco/ingestion/Code
+++ b/scratches/pseco/ingestion/Code
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "id": "0a8abbfa",
   "metadata": {},
   "outputs": [
@ -24,7 +24,6 @@
    "from dataclasses import dataclass\n",
    "from pathlib import Path\n",
    "from typing import Any, Dict, List, Optional, Tuple\n",
-    "# from bnf import grammar\n",
    "import nltk\n",
    "from elasticsearch import Elasticsearch\n",
    "from langchain_core.documents import Document\n",
@ -185,7 +184,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
   "id": "26ad9c81",
   "metadata": {},
   "outputs": [
@ -209,7 +208,7 @@
    }
   ],
   "source": [
-    "grammar_ = (DATA_DIR / \"raw\" / \"code\" / \"BNF_v1.txt\").read_text(\n",
+    "grammar_ = (settings.data_path / \"raw\" / \"code\" / \"BNF_v1.txt\").read_text(\n",
    "    encoding=\"utf-8\"\n",
    ")\n",
    "grammar(grammar_)"
--- a/scratches/pseco/ingestion/Code
+++ b/scratches/pseco/ingestion/Code
@ -2,24 +2,55 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 1,
+   "id": "5b646fb1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "! uv pip install bnf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "274d6d68",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[2mUsing Python 3.12.11 environment at: /home/pseco/VsCodeProjects/assistance-engine/.venv\u001b[0m\n",
+      "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 2ms\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "! uv pip install ebnf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
   "id": "0a8abbfa",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
-    "\n",
    "from dataclasses import dataclass\n",
-    "\n",
    "from typing import Any, Dict, List, Optional, Tuple\n",
-    "\n",
    "from lark import Tree, Lark\n",
-    "\n",
-    "\n",
    "from bnf import grammar as bnf_grammar, parse as bnf_parse\n",
-    "from ebnf import grammar as ebnf_grammar, parse as ebnf_parse\n",
-    "\n",
-    "from src.config import DATA_DIR"
+    "from src.config import settings"
   ]
  },
  {
--- a/scripts/pipelines/flows/bnf_files_generator.py
+++ b/scripts/pipelines/flows/bnf_files_generator.py
@ -0,0 +1,244 @@
+"""
+Generator for BNF specification files from AVAP documentation.
+
+This script extracts BNF specifications from the AVAP Language Reference Manual (LRM)
+and generates individual text files for each BNF section.
+
+Output format: n0X_BNF.txt (where X is the section number)
+Default output directory: ingestion/code/BNF/
+Default markdown source: docs/LRM/avap.md
+
+USAGE EXAMPLES:
+
+Use default configuration:
+    python scripts/pipelines/flows/bnf_files_generator.py
+
+Customize input and output paths:
+    python scripts/pipelines/flows/bnf_files_generator.py --markdown docs/LRM/avap.md --output ingestion/code
+    python scripts/pipelines/flows/bnf_files_generator.py -m docs/LRM/avap.md -o ingestion/code
+
+OPTIONS:
+    --markdown, -m: Path to the AVAP markdown file (relative to project root)
+    --output, -o:   Output directory for BNF files (relative to project root)
+"""
+
+import re
+import typer
+from pathlib import Path
+from typing import List, Tuple, Optional
+
+app = typer.Typer()
+
+
+class BNFExtractor:
+    """Extract BNF specifications from AVAP markdown documentation."""
+    
+    def __init__(self, markdown_file: Path, output_dir: Path):
+        """
+        Initialize BNF extractor.
+        
+        Args:
+            markdown_file: Path to the AVAP markdown file
+            output_dir: Directory where BNF files will be saved
+        """
+        self.markdown_file = markdown_file
+        self.output_dir = output_dir
+        self.bnf_sections: List[Tuple[int, str, str]] = []
+    
+    @staticmethod
+    def _roman_to_int(roman: str) -> int:
+        """
+        Convert Roman numerals to integers.
+        
+        Args:
+            roman: Roman numeral string (e.g., 'I', 'IV', 'IX', 'XII')
+            
+        Returns:
+            Integer value of the Roman numeral
+        """
+        roman_values = {
+            'I': 1, 'V': 5, 'X': 10, 'L': 50,
+            'C': 100, 'D': 500, 'M': 1000
+        }
+        total = 0
+        prev_value = 0
+        
+        for char in reversed(roman):
+            value = roman_values.get(char, 0)
+            if value < prev_value:
+                total -= value
+            else:
+                total += value
+            prev_value = value
+        
+        return total
+    
+    def read_markdown_file(self) -> str:
+        """Read the markdown file content."""
+        with open(self.markdown_file, "r", encoding="utf-8") as f:
+            return f.read()
+    
+    def extract_bnf_sections(self, content: str) -> List[Tuple[int, str, str]]:
+        """
+        Extract all BNF specifications from markdown content.
+        
+        Pattern: ### Especificación BNF (Sección I)
+                 ```bnf
+                 ... BNF content ...
+                 ```
+        
+        Args:
+            content: Markdown file content
+            
+        Returns:
+            List of tuples: (section_number, section_title, bnf_content)
+        """
+        bnf_sections = []
+        
+        # Pattern to find BNF specification headers and extract Roman numerals
+        # Matches: ### Especificación BNF (Sección I), (Sección II), etc.
+        header_pattern = r"### Especificación BNF \(Sección ([IVXLCDM]+)\)"
+        
+        # Find all BNF headers with their positions
+        for match in re.finditer(header_pattern, content):
+            roman_numeral = match.group(1)
+            section_number = self._roman_to_int(roman_numeral)
+            header_start = match.start()
+            header_end = match.end()
+            
+            # Find the code block after this header
+            code_block_pattern = r"```bnf\n(.*?)```"
+            search_start = header_end
+            
+            code_match = re.search(code_block_pattern, content[search_start:], re.DOTALL)
+            
+            if code_match:
+                bnf_content = code_match.group(1).strip()
+                section_title = f"Especificación BNF - Sección {roman_numeral}"
+                bnf_sections.append((section_number, section_title, bnf_content))
+        
+        self.bnf_sections = bnf_sections
+        return bnf_sections
+    
+    def format_bnf_file_content(self, section_number: int, title: str, bnf_content: str) -> str:
+        """
+        Format BNF content for file output.
+        
+        Args:
+            section_number: Section number (1-9, etc.)
+            title: Section title
+            bnf_content: Raw BNF grammar content
+            
+        Returns:
+            BNF content without additional formatting
+        """
+        return bnf_content
+    
+    def save_bnf_files(self) -> int:
+        """
+        Save extracted BNF sections to individual files.
+        
+        File naming convention: n0X_BNF.txt (e.g., n01_BNF.txt, n02_BNF.txt, etc.)
+        
+        Returns:
+            Number of files created
+        """
+        # Ensure output directory exists
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        
+        files_created = 0
+        
+        for section_number, title, bnf_content in self.bnf_sections:
+            # Format filename with zero-padded section number
+            filename = f"n{section_number:02d}_BNF.txt"
+            filepath = self.output_dir / filename
+            
+            # Format and write file content
+            formatted_content = self.format_bnf_file_content(
+                section_number, title, bnf_content
+            )
+            
+            with open(filepath, "w", encoding="utf-8") as f:
+                f.write(formatted_content)
+            
+            print(f"Created: {filepath}")
+            files_created += 1
+        
+        return files_created
+    
+    def generate(self) -> Tuple[int, List[str]]:
+        """
+        Execute the complete BNF extraction and file generation process.
+        
+        Returns:
+            Tuple of (number_of_files_created, list_of_file_paths)
+        """
+        print(f"Reading markdown file: {self.markdown_file}")
+        content = self.read_markdown_file()
+        
+        print(f"Extracting BNF specifications...")
+        bnf_sections = self.extract_bnf_sections(content)
+        
+        print(f"Found {len(bnf_sections)} BNF sections:")
+        for section_number, title, _ in bnf_sections:
+            print(f"  - {title}")
+        
+        print(f"\nSaving BNF files to: {self.output_dir}")
+        files_created = self.save_bnf_files()
+        
+        # Generate list of created file paths
+        file_paths = [
+            str(self.output_dir / f"n{i:02d}_BNF.txt")
+            for i, _, _ in bnf_sections
+        ]
+        
+        return files_created, file_paths
+
+
+@app.command()
+def main(
+    markdown_file: str = typer.Option(
+        "docs/LRM/avap.md",
+        "--markdown",
+        "-m",
+        help="Path to AVAP markdown file (relative to project root)"
+    ),
+    output_dir: str = typer.Option(
+        "ingestion/code/BNF/",
+        "--output",
+        "-o",
+        help="Output directory for BNF files (relative to project root)"
+    )
+):
+    """Extract BNF specifications from AVAP documentation.
+    
+    Default behavior:
+    - Reads from: docs/LRM/avap.md
+    - Writes to: ingestion/code/BNF/
+    """
+    # Get project root directory (scripts/pipelines/flows -> project root)
+    script_dir = Path(__file__).parent
+    project_root = script_dir.parent.parent.parent
+    
+    # Convert relative paths to absolute
+    markdown_path = project_root / markdown_file
+    output_path = project_root / output_dir
+    
+    # Verify markdown file exists
+    if not markdown_path.exists():
+        typer.echo(f"Error: Markdown file not found: {markdown_path}", err=True)
+        raise typer.Exit(code=1)
+    
+    # Create extractor and generate files
+    extractor = BNFExtractor(markdown_path, output_path)
+    files_created, file_paths = extractor.generate()
+    
+    print(f"\n{'='*80}")
+    print(f"BNF extraction complete!")
+    print(f"Total files created: {files_created}")
+    print(f"Output directory: {output_path}")
+    print(f"{'='*80}")
+
+
+if __name__ == "__main__":
+    app()