Merge branch 'mrh-online-dev' of github.com:BRUNIX-AI/assistance-engine into mrh-online-dev

2026-03-12 15:56:27 +01:00 · 2026-03-12 15:56:27 +01:00 · ba4a1f1efc
parent d518610fee c101a8f8da
commit ba4a1f1efc
19 changed files with 93326 additions and 6043 deletions
--- a/Docker/src/server.py
+++ b/Docker/src/server.py
@ -35,7 +35,7 @@ class BrunixEngine(brunix_pb2_grpc.AssistanceEngineServicer):
            index_name=os.getenv("ELASTICSEARCH_INDEX"),
            embedding=self.embeddings,
            query_field="text",
-            vector_query_field="vector",
+            vector_query_field="embedding",
        )
        self.graph = build_graph(
            llm=self.llm,
--- a/README.md
+++ b/README.md
@ -45,13 +45,15 @@ graph TD
 ├── README.md                     # System documentation & Dev guide
 ├── changelog                     # Version tracking and release history
-├── pyproject.toml                # Python project configuration
+├── pyproject.toml  
 ├── ingestion/                    # Data ingested in Elasticsearch
 ├── docs/
-|   ├── AVAP Language: ...        # AVAP DSL Documentation
+│   ├── AVAP Language: ...        # AVAP DSL Documentation
-|   |   └── AVAP.md
+│   │   └── AVAP.md
 │   ├── developer.avapfr...       # Documents on developer web page
-|   └── LRM/                      # AVAP LRM documentation
+│   ├── LRM/                      # AVAP LRM documentation
-|       └── avap.md 
+│   │   └── avap.md 
 │   └── samples/                  # AVAP code samples
 ├── Docker/
 │   ├── protos/
 │   │    └── brunix.proto         # Protocol Buffers: The source of truth for the API
@ -64,30 +66,16 @@ graph TD
 │   ├── Dockerfile                # Container definition for the Engine
 │   ├── docker-compose.yaml       # Local orchestration for dev environment
 │   ├── requirements.txt          # Python dependencies for Docker
-│   ├── protos/
+│   └── .dockerignore             # Docker ignore files
 │   │   └── brunix.proto          # Protocol Buffers: The source of truth for the API
 │   └── src/
 │       ├── graph.py              # Workflow graph orchestration
 │       ├── prompts.py            # Centralized prompt definitions
 │       ├── server.py             # gRPC Server & RAG Orchestration
 │       ├── state.py              # Shared state management
 │       └── utils/                # Utility modules
 ├── ingestion/
 │   └── docs/                     # AVAP documentation chunks
 ├── kubernetes/
 │   └── kubeconfig.yaml           # Kubernetes cluster configuration
 ├── scripts/
 │   └── pipelines/
-|       ├── samples_generator/    # AVAP Sample generator
+│       ├── flows/                # Processing pipelines
-|       |   └─ generate_mbap.py   
+│       └── tasks/                # Modules used by the flows
 │       └── flows/                # Data processing flows
 |           └─ elasticsearch_ingestion.py 
 └── src/
-    ├── __init__.py
+    ├── config.py                 # Environment variables configuration file
    └── utils/
        ├── emb_factory.py        # Embedding model factory
-        ├── llm_factory.py        # LLM model factory
+        └── llm_factory.py        # LLM model factory
        └── __init__.py
 ```
 ---
@ -146,6 +134,7 @@ The engine utilizes Langfuse for end-to-end tracing and performance monitoring.
 Create a `.env` file in the project root with the following variables:
 ```env
 PYTHONPATH=${PYTHONPATH}:/home/...
 ELASTICSEARCH_URL=http://host.docker.internal:9200
 ELASTICSEARCH_LOCAL_URL=http://localhost:9200
 ELASTICSEARCH_INDEX=avap-docs-test
@ -157,11 +146,13 @@ OLLAMA_URL=http://host.docker.internal:11434
 OLLAMA_LOCAL_URL=http://localhost:11434
 OLLAMA_MODEL_NAME=qwen2.5:1.5b
 OLLAMA_EMB_MODEL_NAME=qwen3-0.6B-emb:latest
 HF_TOKEN=hf_...
 HF_EMB_MODEL_NAME=Qwen/Qwen3-Embedding-0.6B
 ```
 | Variable | Required | Description | Example |
 |---|---|---|---|
 | `PYTHONPATH` | No | Path that aims to the root of the project  | `${PYTHONPATH}:/home/...` |
 | `ELASTICSEARCH_URL` | Yes | Elasticsearch endpoint used for vector/context retrieval in Docker | `http://host.docker.internal:9200` |
 | `ELASTICSEARCH_LOCAL_URL` | Yes | Elasticsearch endpoint used for vector/context retrieval in local | `http://localhost:9200` |
 | `ELASTICSEARCH_INDEX` | Yes | Elasticsearch index name used by the engine | `avap-docs-test` |
@ -183,13 +174,13 @@ Open a terminal and establish the connection to the Devaron Cluster:
 ```bash
 # 1. AI Model Tunnel (Ollama)
-kubectl port-forward --address 0.0.0.0 svc/ollama-light-service 11434:11434 -n brunix --kubeconfig ./kubernetes/ivar.yaml &
+kubectl port-forward --address 0.0.0.0 svc/ollama-light-service 11434:11434 -n brunix --kubeconfig ./kubernetes/kubeconfig.yaml &
 # 2. Knowledge Base Tunnel (Elasticsearch)
-kubectl port-forward --address 0.0.0.0 svc/brunix-vector-db 9200:9200 -n brunix --kubeconfig ./kubernetes/ivar.yaml &
+kubectl port-forward --address 0.0.0.0 svc/brunix-vector-db 9200:9200 -n brunix --kubeconfig ./kubernetes/kubeconfig.yaml &
 # 3. Observability DB Tunnel (PostgreSQL)
-kubectl port-forward --address 0.0.0.0 svc/brunix-postgres 5432:5432 -n brunix --kubeconfig ./kubernetes/ivar.yaml &
+kubectl port-forward --address 0.0.0.0 svc/brunix-postgres 5432:5432 -n brunix --kubeconfig ./kubernetes/kubeconfig.yaml &
 ```
 ### 5. Launch the Engine
--- a/9
+++ b/9
@ -4,24 +4,23 @@ All notable changes to the **Brunix Assistance Engine** will be documented in th
 ---
-## [1.5.0] - 2026-03-11
+## [1.5.0] - 2026-03-12
 ### Added
 - IMPLEMENTED: 
    - `scripts/pipelines/flows/translate_mbpp.py`: pipeline to generate synthethic dataset from mbpp dataset.
-    - `scripts/input/prompts.py`: module containing prompts for pipelines.
+    - `scripts/tasks/prompts.py`: module containing prompts for pipelines.
    - `scripts/tasks/chunk.py`: module containing functions related to chunk management.
    - `synthethic_datasets`: folder containing generated synthethic datasets.
    - `src/config.py`: environment variables configuration file.
 ### Changed
- REFACTORED: `scripts/pipelines/flows/elasticsearch_ingestion.py` now uses `docs` documents instead of pre chunked files.
+- REFACTORED: `scripts/pipelines/flows/elasticsearch_ingestion.py` now uses `docs/LRM` or `docs/samples` documents instead of pre chunked files.
 - RENAMED `docs/AVAP Language: Core Commands & Functional Specification` to `docs/avap_language_github_docs`.
 - REMOVED: `Makefile` file.
 - REMOVED: `scripts/start-tunnels.sh` script.
 - REMOVED `ingestion` folder.
 - DEPENDENCIES: `requirements.txt` updated with new libraries required by the new modules.
- MOVED `scripts/generate_mbpp_avap.py` into `scripts/flows/generate_mbpp_avap.py`
+- MOVED `scripts/generate_mbap.py` into `scripts/flows/generate_mbap.py`.
 ## [1.4.0] - 2026-03-10
--- a/docs/AVAP_dev.md
+++ b/docs/AVAP_dev.md
--- a/docs/LRM/avap.md
+++ b/docs/LRM/avap.md
@ -115,7 +115,41 @@ AVAP utiliza una gramática estructural mixta. Combina la fluidez de las palabra
 La estructura `if()` evalúa una expresión lógica o de comparación. Todo bloque condicional requiere un cierre explícito utilizando el comando `end()`.
 El comando `if()` soporta dos modos de invocación:
-* **Modo 1 (comparación estructurada):** `if(variable, valor, comparador)` — evalúa la comparación entre variable y valor usando el operador indicado como string (ej. `"=="`, `">"`, `"!="`). Los dos primeros argumentos deben ser identificadores simples o literales, nunca expresiones de acceso como `dict['clave']`. Si se necesita comparar un valor extraído de una estructura, debe asignarse primero a una variable.* **Modo 2 (expresión libre):** `if(None, None, "expresion_compleja")` — evalúa directamente una expresión booleana compleja proporcionada como string.
+* **Modo 1 (comparación estructurada):** `if(variable, valor, comparador)` — evalúa la comparación entre variable y valor usando el operador indicado como string (ej. `"=="`, `">"`, `"!="`). Los dos primeros argumentos deben ser identificadores simples o literales, nunca expresiones de acceso como `dict['clave']`. Si se necesita comparar un valor extraído de una estructura, debe asignarse primero a una variable.* **Modo 2 (expresión libre):** `if(None, None, expresion_compleja)` — evalúa directamente una expresión booleana compleja proporcionada como string encapsulado entre `.
 ## SECCIÓN III: Lógica de Control y Estructuras de Decisión
 AVAP utiliza una gramática estructural mixta. Combina la fluidez de las palabras clave para abrir bloques funcionales con la seguridad matemática de cierres estrictos.
 ### 3.1 El Bloque Condicional (if() / else() / end())
 El comando `if()` gestiona la lógica condicional mediante dos modos de invocación estrictamente diferenciados. Es imperativo respetar los delimitadores y la posición de los argumentos.
 #### Modo 1: Comparación Estructurada (Atómica)
 Se utiliza para comparaciones directas entre dos valores simples.
 * **Sintaxis:** `if(átomo_1, átomo_2, "operador")`
 * **Argumentos 1 y 2:** Deben ser identificadores simples (variables) o literales (strings/números). **No se permite el uso de `None` en este modo.**
 * **Argumento 3:** El operador de comparación debe ir obligatoriamente entre **comillas dobles** (`"=="`, `"!="`, `">"`, `"<"`, `">="`, `"<="`).
 * **Restricción:** No se permiten expresiones de acceso (ej. `data.user` o `list[0]`). Estos valores deben asignarse previamente a una variable.
 * **Ejemplo correcto:** `if(reintentos, 5, "<")`
 #### Modo 2: Expresión Libre (Evaluación Compleja)
 Se utiliza para evaluar expresiones lógicas que no encajan en la estructura atómica.
 * **Sintaxis:** `if(None, None, `expresión_compleja`)`
 * **Argumentos 1 y 2:** Deben ser literalmente la palabra `None` (sin comillas).
 * **Argumento 3:** La expresión completa **debe** estar encapsulada entre **acentos graves (backticks)**. Esto permite incluir lógica interna, operadores `and/or` y accesos a estructuras de datos.
 * **Ejemplo correcto:** `if(None, None, `user.id > 10 and email.contains("@")`)`
 ---
 ### Tabla de Validación para el Modelo
 | Entrada | Estado | Razón |
 | :--- | :--- | :--- |
 | `if(count, 10, "==")` | ✅ VÁLIDO | Modo 1: Átomos válidos y operador entre comillas. |
 | `if(None, None, `val > 0`)` | ✅ VÁLIDO | Modo 2: Uso correcto de `None` y backticks. |
 | `if(username, None, "==")` | ❌ ERROR | El Modo 1 prohíbe el uso de `None`. Debe usarse el Modo 2. |
 | `if(None, None, "val > 0")` | ❌ ERROR | El Modo 2 requiere backticks (`` ` ``), no comillas. |
 | `if(user.id, 10, "==")` | ❌ ERROR | El Modo 1 no permite expresiones de acceso (`.`). |
 ### 3.2 Iteraciones Estrictas y Deterministas (startLoop / endLoop)
 Para garantizar el determinismo y evitar el colapso de memoria:
@ -137,15 +171,17 @@ Diseñada para proteger la estabilidad del servidor ante fallos de I/O.
                      [ "else()" <EOL> <block> ]
                      "end()" <EOL>
-/* if() soporta dos modos:
+<if_condition>    ::= <if_structured> | <if_free_expression>
-   Modo 1 — comparación estructurada: los dos primeros argumentos deben ser
+
-             identificadores simples o literales, nunca expresiones de acceso.
+<if_structured>   ::= "if" "(" <strict_atom> "," <strict_atom> "," <backtick_string> ")"
-             Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']),
+<if_free_expression> ::= "if" "(" "None" "," "None" "," <backtick_string> ")"
-             debe asignarse previamente a una variable.
+
-   Modo 2 — expresión libre: None, None, expresión compleja como string */
+<strict_atom>     ::= <identifier> | <non_null_literal>
-<if_condition>    ::= <if_atom> "," <if_atom> "," <stringliteral>
+<backtick_string> ::= "`" <text_content> "`"
-                    | "None" "," "None" "," <stringliteral>
+
-<if_atom>         ::= <identifier> | <literal>
+<identifier>      ::= [a-zA-Z_][a-zA-Z0-9_]*
 <non_null_literal>::= <number> | <string_literal_double_quotes> 
 /* Nota: <non_null_literal> NO incluye la palabra "None" */
 <loop_stmt>       ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL>
                        <block>
@ -261,59 +297,116 @@ AVAP utiliza `avapConnector("TOKEN")` para la hidratación segura de credenciale
 ---
-## SECCIÓN VI: Utilidades, Criptografía y Manipulación de Datos
+# SECCIÓN VI: Utilidades, Criptografía y Manipulación de Datos
 AVAP incluye un set de comandos integrados de alto nivel para manipular tipos complejos (JSON y Listas), tiempos, textos y generar hashes.
-### 6.1 Manipulación Nativa de Listas y Objetos JSON
+---
 Para extraer y mutar estructuras complejas, AVAP provee comandos nativos específicos:
 * **`variableToList(elemento, destino)`**: Fuerza a que una variable escalar se convierta en una estructura iterable de lista.
 * **`itemFromList(lista_origen, indice, destino)`**: Extrae de forma segura el elemento contenido en la posición `indice` de una lista.
 * **`variableFromJSON(json_origen, clave, destino)`**: Parsea un objeto JSON en memoria y extrae el valor correspondiente a la `clave`.
 * **`AddVariableToJSON(clave, valor, json_destino)`**: Inyecta dinámicamente una nueva propiedad dentro de un objeto JSON existente.
-### 6.2 Criptografía y Expresiones Regulares
+## 6.1 Manipulación Nativa de Listas y Objetos JSON
 * **`encodeSHA256` y `encodeMD5(origen, destino)`**: Funciones criptográficas que encriptan de forma irreversible un texto. Vitales para el almacenamiento seguro de contraseñas.
 * **`getRegex(origen, patron, destino)`**: Aplica una Expresión Regular (`patron`) sobre la variable de origen, extrayendo las coincidencias exactas.
-### 6.3 Transformación de Tiempo y Cadenas
+Para extraer y mutar estructuras complejas, AVAP provee comandos nativos específicos. En AVAP, las listas **no se instancian con literales de array**, sino que se construyen y recorren a través de un conjunto cerrado de comandos especializados:
 * **Fechas:** `getTimeStamp` (convierte un string a Epoch), `getDateTime` (Epoch a string legible), y `stampToDatetime` (Epoch a objeto datetime estructurado). Soportan formatos de calendario y cálculos con TimeDeltas.
 * **Cadenas:** `replace` (saneamiento y sustitución de texto) y `randomString` (generación determinista de claves/tokens aleatorios).
-### Especificación BNF (Sección VI)
+* **`variableToList(elemento, destino)`**: Fuerza a que una variable escalar se convierta en una estructura iterable de lista de un único elemento. Es el punto de entrada canónico para construir una lista desde cero a partir de un valor existente.
 * **`itemFromList(lista_origen, indice, destino)`**: Extrae de forma segura el elemento contenido en la posición `indice` (base 0) de una lista. Equivale a un acceso por índice controlado.
 * **`getListLen(lista, destino)`**: Calcula el número total de elementos contenidos en `lista` y almacena el resultado entero en `destino`. Imprescindible para construir bucles de recorrido seguro y para validar listas antes de acceder a sus índices. Se recomienda llamar siempre a `getListLen` antes de `itemFromList` para evitar accesos fuera de rango.
 * **`variableFromJSON(json_origen, clave, destino)`**: Parsea un objeto JSON en memoria y extrae el valor correspondiente a la `clave`, almacenándolo en `destino`. El acceso es directo por nombre de propiedad.
 * **`AddVariableToJSON(clave, valor, json_destino)`**: Inyecta dinámicamente una nueva propiedad dentro de un objeto JSON existente. Si la clave ya existe, su valor es sobreescrito.
 **Patrón de recorrido típico en AVAP:**
 ```avap
 // 1. Obtener longitud de la lista
 getListLen(myList, len)
 // 2. Iterar con índice controlado
 i = 0
 while (i < len) {
    itemFromList(myList, i, currentItem)
    // ... procesar currentItem ...
    i = i + 1
 }
 ```
 ---
 ## 6.2 Criptografía y Expresiones Regulares
 * **`encodeSHA256(origen, destino)`** y **`encodeMD5(origen, destino)`**: Funciones criptográficas que encriptan de forma irreversible un texto. Vitales para el almacenamiento seguro de contraseñas y la verificación de integridad de datos. SHA-256 produce un digest de 64 caracteres hexadecimales y ofrece mayor resistencia criptográfica que MD5 (32 caracteres); se recomienda SHA-256 para nuevos desarrollos.
 * **`getRegex(origen, patron, destino)`**: Aplica una Expresión Regular (`patron`) sobre la variable de origen, extrayendo la primera coincidencia exacta encontrada. El patrón sigue la sintaxis estándar compatible con Python `re`.
 ---
 ## 6.3 Transformación de Tiempo y Cadenas
 ### Fechas y Timestamps
 AVAP provee tres comandos complementarios para cubrir todas las conversiones posibles entre representaciones de tiempo. Los tres soportan formatos de calendario en notación `strftime` de Python y cálculos con `TimeDelta` expresados en segundos (positivo para sumar, negativo para restar):
 | Comando | Entrada | Salida |
 |---|---|---|
 | `getTimeStamp(fecha_string, formato, timedelta, destino)` | String de fecha | Epoch (entero) |
 | `stampToDatetime(epoch, formato, timedelta, destino)` | Epoch (entero) | String de fecha |
 | `getDateTime(formato, timedelta, zona_horaria, destino)` | — (ahora mismo) | String de fecha |
 * **`getTimeStamp(fecha_string, formato, timedelta, destino)`**: Convierte un string de fecha legible a su valor Epoch (entero Unix). Útil para almacenar fechas y realizar cálculos aritméticos sobre ellas.
 * **`stampToDatetime(epoch, formato, timedelta, destino)`**: Convierte un valor Epoch a un string de fecha con el formato especificado. Útil para presentar timestamps almacenados de forma legible.
 * **`getDateTime(formato, timedelta, zona_horaria, destino)`**: Captura la fecha y hora actuales del sistema, aplica el ajuste `timedelta` y las convierte a la `zona_horaria` indicada antes de almacenar el resultado. Acepta cualquier zona horaria reconocida por la librería `pytz` de Python.
 ### Cadenas de Texto
 * **`randomString(patron, longitud, destino)`**: Genera una cadena aleatoria de `longitud` caracteres cuyos símbolos están restringidos al conjunto definido por `patron` (expresión regular de caracteres). Útil para generar tokens de sesión, contraseñas temporales o identificadores únicos.
 * **`replace(origen, patron_busqueda, reemplazo, destino)`**: Localiza todas las ocurrencias de `patron_busqueda` dentro de `origen` y las sustituye por `reemplazo`, almacenando el resultado en `destino`. Facilita el saneamiento y normalización de datos de entrada antes de su procesamiento o almacenamiento.
 ---
 ## BNF — Gramática Formal de los Comandos de Utilidad
 ```bnf
-/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */
+<util_command>  ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd>
-<util_command>    ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
+                  | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
 /* Manipulación de listas y JSON */
 <json_list_cmd> ::= "variableToList(" <expression> "," <identifier> ")"
                  | "itemFromList(" <identifier> "," <expression> "," <identifier> ")"
                  | "getListLen(" <identifier> "," <identifier> ")"
                  | "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")"
                  | "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")"
 /* Criptografía */
-<crypto_cmd>      ::= "encodeSHA256(" <identifier_or_string> "," <identifier> ")"
+<crypto_cmd>    ::= "encodeSHA256(" <expression> "," <identifier> ")"
-                    | "encodeMD5(" <identifier_or_string> "," <identifier> ")"
+                  | "encodeMD5(" <expression> "," <identifier> ")"
 /* Expresiones regulares */
-<regex_cmd>       ::= "getRegex(" <identifier> "," <stringliteral> "," <identifier> ")"
+<regex_cmd>     ::= "getRegex(" <identifier> "," <expression> "," <identifier> ")"
 /* Fecha/hora actual → string */
 <datetime_cmd>  ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")"
-/*  Argumentos: formato_salida, epoch_origen, zona_horaria, destino */
+/*  Argumentos: formato_salida, timedelta, zona_horaria, destino */
 /* Conversiones epoch ↔ string */
 <stamp_cmd>     ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")"
 /*  Argumentos: epoch_origen, formato, timedelta, destino */
                  | "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")"
 /*  Argumentos: fecha_string, formato_entrada, timedelta, destino */
-<string_cmd>      ::= "randomString(" <expression> "," <identifier> ")"
+/* Cadenas */
-/*  Argumentos: longitud, destino */
+<string_cmd>    ::= "randomString(" <expression> "," <expression> "," <identifier> ")"
 /*  Argumentos: patron, longitud, destino */
-<replace_cmd>     ::= "replace(" <identifier_or_string> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
+<replace_cmd>   ::= "replace(" <identifier> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
 /*  Argumentos: origen, patron_busqueda, reemplazo, destino */
 ```
 ---
 ## SECCIÓN VII: Arquitectura de Funciones y Ámbitos (Scopes)
--- a/docs/avap.txt
+++ b/docs/avap.txt
--- a/ingestion/chunks.json
+++ b/ingestion/chunks.json
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,6 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
    "chonkie[semantic]>=1.5.6",
    "grpcio>=1.78.0",
    "grpcio-reflection>=1.78.0",
    "grpcio-tools>=1.78.0",
@ -28,7 +27,9 @@ dependencies = [
 dev = [
    "beir>=2.2.0",
    "boto3>=1.42.58",
    "chonkie[elastic,semantic]>=1.6.0",
    "evidently>=0.7.20",
    "flatbuffers>=25.12.19",
    "jupyter>=1.1.1",
    "langfuse<3",
    "litellm>=1.82.0",
--- a/scratches/acano/elasticsearch_ingestion.py
+++ b/scratches/acano/elasticsearch_ingestion.py
@ -1,30 +1,29 @@
 import re
 import hashlib
 from typing import Any
 from enum import Enum
 import typer
 import logging
 import os
 from pathlib import Path
 from loguru import logger
 from elasticsearch import Elasticsearch
 from langchain_core.documents import Document
 from langchain_elasticsearch import ElasticsearchStore
-from langchain_community.embeddings import HuggingFaceEmbeddings
+from chonkie import SemanticChunker, MarkdownChef
-from langchain_experimental.text_splitter import SemanticChunker
+from transformers import AutoTokenizer
 from src.utils.emb_factory import create_embedding_model
-from scripts.pipelines.tasks.chunk import scrape_avap_docs
+from scripts.pipelines.tasks.chunk import (
    read_files, 
    get_chunk_docs, 
    convert_chunks_to_document
 )
 app = typer.Typer()
 ELASTICSEARCH_LOCAL_URL = os.getenv("ELASTICSEARCH_LOCAL_URL")
 OLLAMA_LOCAL_URL = os.getenv("OLLAMA_LOCAL_URL")
 ELASTICSEARCH_INDEX = os.getenv("ELASTICSEARCH_INDEX")
 OLLAMA_URL = os.getenv("OLLAMA_URL")
 OLLAMA_EMB_MODEL_NAME = os.getenv("OLLAMA_EMB_MODEL_NAME")
 AVAP_WEB_DOCS_URL = os.getenv("AVAP_WEB_DOCS_URL")
 HF_EMB_MODEL_NAME = os.getenv("HF_EMB_MODEL_NAME")
 class DistanceStrategy(str, Enum):
    euclidean = "EUCLIDEAN_DISTANCE"
@ -33,55 +32,45 @@ class DistanceStrategy(str, Enum):
    jaccard = "JACCARD"
    cosine = "COSINE"
 def clean_text(text: str) -> str:
    text = text.replace("\u00a0", " ")
    text = re.sub(r"\s+", " ", text).strip()
    return text
 def build_documents_from_folder(
    folder_path: str,
 ) -> list[Document]:
    folder = Path(folder_path)
    if not folder.exists() or not folder.is_dir():
        raise ValueError(f"Invalid folder path: {folder_path}")
    all_documents: list[Document] = []
    for file_path in folder.glob("*.txt"):
        doc_text = file_path.read_text(encoding="utf-8")
        if not doc_text.strip():
            continue
        metadata: dict[str, Any] = {
            "source": file_path.name,
        }   
        doc_text = clean_text(doc_text)
        document = Document(
            id=hashlib.md5(file_path.name.encode()).hexdigest(),
            page_content=doc_text,
            metadata={**metadata}
        )
        all_documents.append(document)
    return all_documents
@app.command()
 def elasticsearch_ingestion(
-    docs_folder_path: str = "ingestion/docs",
+    docs_folder_path: str = "docs",
    es_index: str = "avap-docs-test-v2",
    es_request_timeout: int = 120,
    es_max_retries: int = 5,
    es_retry_on_timeout: bool = True,
    distance_strategy: DistanceStrategy = DistanceStrategy.cosine,
    chunk_size: int = 2048,
    chunk_threshold: float = 0.5,
    chunk_similarity_window: int = 3,
    chunk_skip_window: int = 1,
 ):  
    logger.info("Starting Elasticsearch ingestion pipeline...")
-    logger.info(f"Using docs folder path: {docs_folder_path}")
+    logger.info(f"Reading files from folder: {docs_folder_path}/LRM and {docs_folder_path}/samples...")
-    documents = build_documents_from_folder(folder_path=docs_folder_path)
+    avap_code_docs = read_files(f"{docs_folder_path}/samples")
    avap_language_docs = read_files(f"{docs_folder_path}/LRM")
    logger.info("Instantiating semantic chunker and chef...")
    custom_tokenizer = AutoTokenizer.from_pretrained(HF_EMB_MODEL_NAME)
    chef = MarkdownChef(tokenizer=custom_tokenizer)
    chunker = SemanticChunker(
        embedding_model=HF_EMB_MODEL_NAME,
        chunk_size=chunk_size,
        threshold=chunk_threshold,
        similarity_window=chunk_similarity_window,
        skip_window=chunk_skip_window
    )
    logger.info("Processing Markdown docs with chef...")
    doc = chef.process(f"{docs_folder_path}/LRM/avap.md")
    logger.info("Chunking AVAP Language docs...")
    avap_language_docs_chunks = get_chunk_docs(avap_language_docs, chunker)
    logger.info("Creating Langchain Document to index...")
    avap_language_langchain_docs = convert_chunks_to_document(avap_language_docs_chunks)
    avap_code_langchain_docs = convert_chunks_to_document(avap_code_docs)
    avap_documents = avap_language_langchain_docs + avap_code_langchain_docs
    logger.info("Connecting to Elasticsearch...")
    try:
@ -106,15 +95,19 @@ def elasticsearch_ingestion(
        logger.exception("Failed to instantiate embeddings model.")
        raise
-    logger.info(f"Uploading documents to index {ELASTICSEARCH_INDEX}...")
+    logger.info(f"Checking if index {es_index} exists and deleting if it does...")
    if es.indices.exists(index=es_index):
        es.indices.delete(index=es_index)
    logger.info(f"Uploading documents to index {es_index}...")
    ElasticsearchStore.from_documents(
-        documents,
+        avap_documents,
        embeddings,
        client=es,
-        index_name=ELASTICSEARCH_INDEX,
+        index_name=es_index,
        distance_strategy=distance_strategy.value,
    )
-    logger.info(f"Finished uploading documents to index {ELASTICSEARCH_INDEX}.")
+    logger.info(f"Finished uploading documents to index {es_index}.")
 if __name__ == "__main__":
--- a/scratches/acano/elasticsearch_ingestion_v2.py
+++ b/scratches/acano/elasticsearch_ingestion_v2.py
@ -0,0 +1,122 @@
 import typer
 import logging
 from loguru import logger
 from elasticsearch import Elasticsearch
 from chonkie import MarkdownChef, FileFetcher, ElasticHandshake
 from transformers import AutoTokenizer
 from src.config import settings
 from scripts.pipelines.tasks.embeddings import OllamaEmbeddings
 from scripts.pipelines.tasks.chunk import merge_markdown_document
 app = typer.Typer()
 def get_processing_and_chunking_config(docs_extension: str, chunk_size: int, 
                                        chunk_threshold: float | None, 
                                        chunk_similarity_window: int| None, 
                                        chunk_skip_window: int | None) -> tuple[str, dict, str, dict]:
    """
    Check the file extension and return the appropriate processing and chunking strategies and their kwargs.
    Args:
        docs_extension (str): The file extension of the documents to be ingested.
        chunk_size (int): The size of the chunks to be created.
        chunk_threshold (float, optional): The threshold for semantic chunking. Required if docs_extension is .md.
        chunk_similarity_window (int, optional): The similarity window for semantic chunking
        chunk_skip_window (int, optional): The skip window for semantic chunking.
    Returns:
        tuple[str, dict, str, dict]: A tuple containing the processing strategy, its kwargs, the chunking strategy, and its kwargs.
    """
    if docs_extension == ".md":
        process_type = "markdown"
        custom_tokenizer = AutoTokenizer.from_pretrained(settings.hf_emb_model_name)
        process_kwargs = {"tokenizer": custom_tokenizer}
        # process_type = "text"
        # process_kwargs = {}
        chunk_strat = "semantic"
        chunk_kwargs = {"embedding_model": settings.hf_emb_model_name, "threshold": chunk_threshold, "chunk_size": chunk_size, 
                        "similarity_window": chunk_similarity_window, "skip_window": chunk_skip_window}
    elif docs_extension == ".avap":
        process_type = "text"
        process_kwargs = {}
        chunk_strat = "recursive" # Once we have the BNF and uploaded to tree-sitter, we can use code (?)
        chunk_kwargs = {"chunk_size": chunk_size}
    return process_type, process_kwargs, chunk_strat, chunk_kwargs
@app.command()
 def elasticsearch_ingestion(
    docs_folder_path: str = "docs/LRM",
    docs_extension: str = ".md",
    es_index: str = "avap-docs-test-v3",
    es_request_timeout: int = 120,
    es_max_retries: int = 5,
    es_retry_on_timeout: bool = True,
    delete_es_index: bool = True,
    chunk_size: int = 2048,
    chunk_threshold: float | None = 0.5,
    chunk_similarity_window: int | None = 3,
    chunk_skip_window: int | None = 1
 ):  
    custom_tokenizer = AutoTokenizer.from_pretrained(settings.hf_emb_model_name)
    processed_docs = []
    fused_docs = []
    logger.info(f"Instantiating Elasticsearch client with URL: {settings.elasticsearch_local_url}...")
    es = Elasticsearch(
        hosts=settings.elasticsearch_local_url,
        request_timeout=es_request_timeout,
        max_retries=es_max_retries,
        retry_on_timeout=es_retry_on_timeout,
    )
    if delete_es_index and es.indices.exists(index=es_index):
        logger.info(f"Deleting existing Elasticsearch index: {es_index}...")
        es.indices.delete(index=es_index)
    logger.info("Starting Elasticsearch ingestion pipeline...")
    (process_type, 
     process_kwargs, 
     chunk_strat, 
     chunk_kwargs) = get_processing_and_chunking_config(docs_extension, chunk_size, chunk_threshold, chunk_similarity_window, chunk_skip_window)
    logger.info(f"Fetching files from {docs_folder_path}...")
    fetcher = FileFetcher()
    docs = fetcher.fetch(dir=f"{settings.proj_root}/{docs_folder_path}")
    logger.info(f"Processing documents with process_type: {process_type}...")
    chef = MarkdownChef(tokenizer=custom_tokenizer)
    for doc in docs:
        processed_doc = chef.process(doc)
        processed_docs.append(processed_doc)
    logger.info(f"Chunking documents with chunk_strat: {chunk_strat}...")
    for processed_doc in processed_docs:
        fused_doc = merge_markdown_document(processed_doc)
        fused_docs.append(fused_doc)
    logger.info(f"Ingesting chunks in Elasticsearch index: {es_index}...")
    handshake = ElasticHandshake(
        client=es,
        index_name=es_index,
        embedding_model=OllamaEmbeddings(model=settings.ollama_emb_model_name)
    )
    for fused_doc in fused_docs:
        handshake.write(fused_doc.chunks)
    logger.info(f"Finished ingesting in {es_index}.")
 if __name__ == "__main__":
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
    )
    try:
        app()
    except Exception as exc:
        logger.exception(exc)
        raise
--- a/scratches/acano/langgraph_agent_simple.ipynb
+++ b/scratches/acano/langgraph_agent_simple.ipynb
@ -32,12 +32,7 @@
    "\n",
    "from src.utils.llm_factory import create_chat_model\n",
    "from src.utils.emb_factory import create_embedding_model\n",
-    "from src.config import (\n",
+    "from src.config import settings"
    "    ELASTICSEARCH_LOCAL_URL,\n",
    "    ELASTICSEARCH_INDEX,\n",
    "    OLLAMA_MODEL_NAME,\n",
    "    OLLAMA_EMB_MODEL_NAME\n",
    ")"
   ]
  },
  {
@ -51,20 +46,20 @@
    "\n",
    "llm = create_chat_model(\n",
    "    provider=\"ollama\",\n",
-    "    model=OLLAMA_MODEL_NAME,\n",
+    "    model=settings.ollama_model_name,\n",
    "    temperature=0.5,\n",
    "    validate_model_on_init=True,\n",
    ")\n",
    "embeddings = create_embedding_model(\n",
    "    provider=\"ollama\",\n",
-    "    model=OLLAMA_EMB_MODEL_NAME,\n",
+    "    model=settings.ollama_emb_model_name,\n",
    ")\n",
    "vector_store = ElasticsearchStore(\n",
-    "    es_url=ELASTICSEARCH_LOCAL_URL,\n",
+    "    es_url=settings.elasticsearch_local_url,\n",
-    "    index_name=ELASTICSEARCH_INDEX,\n",
+    "    index_name=\"avap-docs-test-v3\",\n",
    "    embedding=embeddings,\n",
    "    query_field=\"text\",\n",
-    "    vector_query_field=\"vector\",\n",
+    "    vector_query_field=\"embedding\",\n",
    "    # strategy=ElasticsearchStore.ApproxRetrievalStrategy(\n",
    "    #     hybrid=True,\n",
    "    #     rrf={\"rank_constant\": 60, \"window_size\": 100}\n",
@ -464,44 +459,185 @@
     "text": [
      "================================\u001b[1m Human Message \u001b[0m=================================\n",
      "\n",
-      "What types of includes does AVAP have?\n"
+      "What types of includes does AVAP have?\n",
-     ]
+      "[reformulate] 'What types of includes does AVAP have?' → '\"avap includes type\"'\n",
-    },
+      "================================\u001b[1m Human Message \u001b[0m=================================\n",
-    {
+      "\n",
-     "ename": "ResponseError",
+      "What types of includes does AVAP have?\n",
-     "evalue": "failed to parse JSON: unexpected end of JSON input (status code: -1)",
+      "[retrieve] 3 docs fetched\n",
-     "output_type": "error",
+      "[1] id=chunk-1 source=Untitled\n",
-     "traceback": [
+      "\n",
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\n",
-      "\u001b[31mResponseError\u001b[39m                             Traceback (most recent call last)",
+      "Token:\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[18]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m a = \u001b[43mstream_graph_updates\u001b[49m\u001b[43m(\u001b[49m\u001b[43muser_input\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43magentic_graph\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langfuse/decorators/langfuse_decorator.py:256\u001b[39m, in \u001b[36mLangfuseDecorator._sync_observe.<locals>.sync_wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m    254\u001b[39m     result = func(*args, **kwargs)\n\u001b[32m    255\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m256\u001b[39m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_handle_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobservation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    257\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m    258\u001b[39m     result = \u001b[38;5;28mself\u001b[39m._finalize_call(\n\u001b[32m    259\u001b[39m         observation, result, capture_output, transform_to_string\n\u001b[32m    260\u001b[39m     )\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langfuse/decorators/langfuse_decorator.py:520\u001b[39m, in \u001b[36mLangfuseDecorator._handle_exception\u001b[39m\u001b[34m(self, observation, e)\u001b[39m\n\u001b[32m    516\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m observation:\n\u001b[32m    517\u001b[39m     _observation_params_context.get()[observation.id].update(\n\u001b[32m    518\u001b[39m         level=\u001b[33m\"\u001b[39m\u001b[33mERROR\u001b[39m\u001b[33m\"\u001b[39m, status_message=\u001b[38;5;28mstr\u001b[39m(e)\n\u001b[32m    519\u001b[39m     )\n\u001b[32m--> \u001b[39m\u001b[32m520\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langfuse/decorators/langfuse_decorator.py:254\u001b[39m, in \u001b[36mLangfuseDecorator._sync_observe.<locals>.sync_wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m    251\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m    253\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m254\u001b[39m     result = \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    255\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m    256\u001b[39m     \u001b[38;5;28mself\u001b[39m._handle_exception(observation, e)\n",
+      "ASSIGN\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[15]\u001b[39m\u001b[32m, line 9\u001b[39m, in \u001b[36mstream_graph_updates\u001b[39m\u001b[34m(user_input, graph)\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;129m@observe\u001b[39m(name=\u001b[33m\"\u001b[39m\u001b[33mgraph_run\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mstream_graph_updates\u001b[39m(user_input: \u001b[38;5;28mstr\u001b[39m, graph: StateGraph):\n\u001b[32m      3\u001b[39m     langfuse_context.update_current_trace(\n\u001b[32m      4\u001b[39m         user_id=\u001b[33m\"\u001b[39m\u001b[33malberto\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m      5\u001b[39m         tags=[\u001b[33m\"\u001b[39m\u001b[33mavap\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mrag\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mlanggraph\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m      6\u001b[39m         metadata={\u001b[33m\"\u001b[39m\u001b[33mfeature\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33magentic-rag\u001b[39m\u001b[33m\"\u001b[39m},\n\u001b[32m      7\u001b[39m     )\n\u001b[32m----> \u001b[39m\u001b[32m9\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mevent\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m     10\u001b[39m \u001b[43m        \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrole\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43muser_input\u001b[49m\u001b[43m}\u001b[49m\u001b[43m]\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m     11\u001b[39m \u001b[43m        \u001b[49m\u001b[43mstream_mode\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mvalues\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m     12\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m     13\u001b[39m \u001b[43m        \u001b[49m\u001b[43mevent\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43m-\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpretty_print\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     15\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m event[\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m][-\u001b[32m1\u001b[39m]\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/pregel/main.py:2646\u001b[39m, in \u001b[36mPregel.stream\u001b[39m\u001b[34m(self, input, config, context, stream_mode, print_mode, output_keys, interrupt_before, interrupt_after, durability, subgraphs, debug, **kwargs)\u001b[39m\n\u001b[32m   2644\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m loop.match_cached_writes():\n\u001b[32m   2645\u001b[39m     loop.output_writes(task.id, task.writes, cached=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m-> \u001b[39m\u001b[32m2646\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtick\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   2647\u001b[39m \u001b[43m    \u001b[49m\u001b[43m[\u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mloop\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtasks\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwrites\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2648\u001b[39m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstep_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2649\u001b[39m \u001b[43m    \u001b[49m\u001b[43mget_waiter\u001b[49m\u001b[43m=\u001b[49m\u001b[43mget_waiter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2650\u001b[39m \u001b[43m    \u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m=\u001b[49m\u001b[43mloop\u001b[49m\u001b[43m.\u001b[49m\u001b[43maccept_push\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   2651\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m   2652\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# emit output\u001b[39;49;00m\n\u001b[32m   2653\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01myield from\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_output\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   2654\u001b[39m \u001b[43m        \u001b[49m\u001b[43mstream_mode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprint_mode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubgraphs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mqueue\u001b[49m\u001b[43m.\u001b[49m\u001b[43mEmpty\u001b[49m\n\u001b[32m   2655\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   2656\u001b[39m loop.after_tick()\n",
+      "[2] id=chunk-2 source=Untitled\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/pregel/_runner.py:167\u001b[39m, in \u001b[36mPregelRunner.tick\u001b[39m\u001b[34m(self, tasks, reraise, timeout, retry_policy, get_waiter, schedule_task)\u001b[39m\n\u001b[32m    165\u001b[39m t = tasks[\u001b[32m0\u001b[39m]\n\u001b[32m    166\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m167\u001b[39m     \u001b[43mrun_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    168\u001b[39m \u001b[43m        \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    169\u001b[39m \u001b[43m        \u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    170\u001b[39m \u001b[43m        \u001b[49m\u001b[43mconfigurable\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m    171\u001b[39m \u001b[43m            \u001b[49m\u001b[43mCONFIG_KEY_CALL\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    172\u001b[39m \u001b[43m                \u001b[49m\u001b[43m_call\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    173\u001b[39m \u001b[43m                \u001b[49m\u001b[43mweakref\u001b[49m\u001b[43m.\u001b[49m\u001b[43mref\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    174\u001b[39m \u001b[43m                \u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    175\u001b[39m \u001b[43m                \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m=\u001b[49m\u001b[43mweakref\u001b[49m\u001b[43m.\u001b[49m\u001b[43mref\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    176\u001b[39m \u001b[43m                \u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m=\u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    177\u001b[39m \u001b[43m                \u001b[49m\u001b[43msubmit\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msubmit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    178\u001b[39m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    179\u001b[39m \u001b[43m        \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    180\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    181\u001b[39m     \u001b[38;5;28mself\u001b[39m.commit(t, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m    182\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/pregel/_retry.py:42\u001b[39m, in \u001b[36mrun_with_retry\u001b[39m\u001b[34m(task, retry_policy, configurable)\u001b[39m\n\u001b[32m     40\u001b[39m     task.writes.clear()\n\u001b[32m     41\u001b[39m     \u001b[38;5;66;03m# run the task\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m42\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtask\u001b[49m\u001b[43m.\u001b[49m\u001b[43mproc\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m.\u001b[49m\u001b[43minput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     43\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ParentCommand \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m     44\u001b[39m     ns: \u001b[38;5;28mstr\u001b[39m = config[CONF][CONFIG_KEY_CHECKPOINT_NS]\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/_internal/_runnable.py:656\u001b[39m, in \u001b[36mRunnableSeq.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m    654\u001b[39m     \u001b[38;5;66;03m# run in context\u001b[39;00m\n\u001b[32m    655\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m set_config_context(config, run) \u001b[38;5;28;01mas\u001b[39;00m context:\n\u001b[32m--> \u001b[39m\u001b[32m656\u001b[39m         \u001b[38;5;28minput\u001b[39m = \u001b[43mcontext\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    657\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    658\u001b[39m     \u001b[38;5;28minput\u001b[39m = step.invoke(\u001b[38;5;28minput\u001b[39m, config)\n",
+      "> **Nota de implementación:** `<connector_instantiation>` se distingue de `<orm_connector_init>` (ORM) únicamente por contexto semántico: el UUID pasado como argumento determina si el adaptador resuelto es un ORM de base de datos o un proxy de terceros. La gramática los trata de forma idéntica; el motor de ejecución selecciona el adaptador apropiado en runtime.\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/_internal/_runnable.py:400\u001b[39m, in \u001b[36mRunnableCallable.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m    398\u001b[39m         run_manager.on_chain_end(ret)\n\u001b[32m    399\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m400\u001b[39m     ret = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    401\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.recurse \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(ret, Runnable):\n\u001b[32m    402\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m ret.invoke(\u001b[38;5;28minput\u001b[39m, config)\n",
+      "\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 5\u001b[39m, in \u001b[36magent\u001b[39m\u001b[34m(state)\u001b[39m\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34magent\u001b[39m(state: AgenticAgentState) -> AgenticAgentState:\n\u001b[32m      4\u001b[39m     llm_with_tools = llm.bind_tools(tools)\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m {\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m: [\u001b[43mllm_with_tools\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mSystemMessage\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m=\u001b[49m\u001b[43mAGENTIC_PROMPT\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m]}\n",
+      "---\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/runnables/base.py:5695\u001b[39m, in \u001b[36mRunnableBindingBase.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m   5688\u001b[39m \u001b[38;5;129m@override\u001b[39m\n\u001b[32m   5689\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34minvoke\u001b[39m(\n\u001b[32m   5690\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m   5693\u001b[39m     **kwargs: Any | \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m   5694\u001b[39m ) -> Output:\n\u001b[32m-> \u001b[39m\u001b[32m5695\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbound\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   5696\u001b[39m \u001b[43m        \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m   5697\u001b[39m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_merge_configs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   5698\u001b[39m \u001b[43m        \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43m{\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   5699\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:402\u001b[39m, in \u001b[36mBaseChatModel.invoke\u001b[39m\u001b[34m(self, input, config, stop, **kwargs)\u001b[39m\n\u001b[32m    388\u001b[39m \u001b[38;5;129m@override\u001b[39m\n\u001b[32m    389\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34minvoke\u001b[39m(\n\u001b[32m    390\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m    395\u001b[39m     **kwargs: Any,\n\u001b[32m    396\u001b[39m ) -> AIMessage:\n\u001b[32m    397\u001b[39m     config = ensure_config(config)\n\u001b[32m    398\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[32m    399\u001b[39m         \u001b[33m\"\u001b[39m\u001b[33mAIMessage\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m    400\u001b[39m         cast(\n\u001b[32m    401\u001b[39m             \u001b[33m\"\u001b[39m\u001b[33mChatGeneration\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m--> \u001b[39m\u001b[32m402\u001b[39m             \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgenerate_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    403\u001b[39m \u001b[43m                \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_convert_input\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    404\u001b[39m \u001b[43m                \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    405\u001b[39m \u001b[43m                \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcallbacks\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    406\u001b[39m \u001b[43m                \u001b[49m\u001b[43mtags\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtags\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    407\u001b[39m \u001b[43m                \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmetadata\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    408\u001b[39m \u001b[43m                \u001b[49m\u001b[43mrun_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrun_name\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    409\u001b[39m \u001b[43m                \u001b[49m\u001b[43mrun_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpop\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrun_id\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    410\u001b[39m \u001b[43m                \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    411\u001b[39m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m.generations[\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m],\n\u001b[32m    412\u001b[39m         ).message,\n\u001b[32m    413\u001b[39m     )\n",
+      "## SECCIÓN VI: Utilidades, Criptografía y Manipulación de Datos\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:1123\u001b[39m, in \u001b[36mBaseChatModel.generate_prompt\u001b[39m\u001b[34m(self, prompts, stop, callbacks, **kwargs)\u001b[39m\n\u001b[32m   1114\u001b[39m \u001b[38;5;129m@override\u001b[39m\n\u001b[32m   1115\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mgenerate_prompt\u001b[39m(\n\u001b[32m   1116\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m   1120\u001b[39m     **kwargs: Any,\n\u001b[32m   1121\u001b[39m ) -> LLMResult:\n\u001b[32m   1122\u001b[39m     prompt_messages = [p.to_messages() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[32m-> \u001b[39m\u001b[32m1123\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt_messages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:933\u001b[39m, in \u001b[36mBaseChatModel.generate\u001b[39m\u001b[34m(self, messages, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)\u001b[39m\n\u001b[32m    930\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i, m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(input_messages):\n\u001b[32m    931\u001b[39m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m    932\u001b[39m         results.append(\n\u001b[32m--> \u001b[39m\u001b[32m933\u001b[39m             \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_generate_with_cache\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    934\u001b[39m \u001b[43m                \u001b[49m\u001b[43mm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    935\u001b[39m \u001b[43m                \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    936\u001b[39m \u001b[43m                \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m    937\u001b[39m \u001b[43m                \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    938\u001b[39m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    939\u001b[39m         )\n\u001b[32m    940\u001b[39m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m    941\u001b[39m         \u001b[38;5;28;01mif\u001b[39;00m run_managers:\n",
+      "AVAP incluye un set de comandos integrados de alto nivel para manipular tipos complejos (JSON y Listas), tiempos, textos y generar hashes.\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:1235\u001b[39m, in \u001b[36mBaseChatModel._generate_with_cache\u001b[39m\u001b[34m(self, messages, stop, run_manager, **kwargs)\u001b[39m\n\u001b[32m   1233\u001b[39m     result = generate_from_stream(\u001b[38;5;28miter\u001b[39m(chunks))\n\u001b[32m   1234\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m inspect.signature(\u001b[38;5;28mself\u001b[39m._generate).parameters.get(\u001b[33m\"\u001b[39m\u001b[33mrun_manager\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m-> \u001b[39m\u001b[32m1235\u001b[39m     result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_generate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1236\u001b[39m \u001b[43m        \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m   1237\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1238\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m   1239\u001b[39m     result = \u001b[38;5;28mself\u001b[39m._generate(messages, stop=stop, **kwargs)\n",
+      "\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_ollama/chat_models.py:1030\u001b[39m, in \u001b[36mChatOllama._generate\u001b[39m\u001b[34m(self, messages, stop, run_manager, **kwargs)\u001b[39m\n\u001b[32m   1023\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_generate\u001b[39m(\n\u001b[32m   1024\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   1025\u001b[39m     messages: \u001b[38;5;28mlist\u001b[39m[BaseMessage],\n\u001b[32m   (...)\u001b[39m\u001b[32m   1028\u001b[39m     **kwargs: Any,\n\u001b[32m   1029\u001b[39m ) -> ChatResult:\n\u001b[32m-> \u001b[39m\u001b[32m1030\u001b[39m     final_chunk = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_chat_stream_with_aggregation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1031\u001b[39m \u001b[43m        \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m   1032\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1033\u001b[39m     generation_info = final_chunk.generation_info\n\u001b[32m   1034\u001b[39m     chat_generation = ChatGeneration(\n\u001b[32m   1035\u001b[39m         message=AIMessage(\n\u001b[32m   1036\u001b[39m             content=final_chunk.text,\n\u001b[32m   (...)\u001b[39m\u001b[32m   1043\u001b[39m         generation_info=generation_info,\n\u001b[32m   1044\u001b[39m     )\n",
+      "### 6.1 Manipulación Nativa de Listas y Objetos JSON\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_ollama/chat_models.py:965\u001b[39m, in \u001b[36mChatOllama._chat_stream_with_aggregation\u001b[39m\u001b[34m(self, messages, stop, run_manager, verbose, **kwargs)\u001b[39m\n\u001b[32m    956\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_chat_stream_with_aggregation\u001b[39m(\n\u001b[32m    957\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m    958\u001b[39m     messages: \u001b[38;5;28mlist\u001b[39m[BaseMessage],\n\u001b[32m   (...)\u001b[39m\u001b[32m    962\u001b[39m     **kwargs: Any,\n\u001b[32m    963\u001b[39m ) -> ChatGenerationChunk:\n\u001b[32m    964\u001b[39m     final_chunk = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m965\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_iterate_over_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m    966\u001b[39m \u001b[43m        \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfinal_chunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m:\u001b[49m\n\u001b[32m    967\u001b[39m \u001b[43m            \u001b[49m\u001b[43mfinal_chunk\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\n",
+      "Para extraer y mutar estructuras complejas, AVAP provee comandos nativos específicos:\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_ollama/chat_models.py:1054\u001b[39m, in \u001b[36mChatOllama._iterate_over_stream\u001b[39m\u001b[34m(self, messages, stop, **kwargs)\u001b[39m\n\u001b[32m   1047\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_iterate_over_stream\u001b[39m(\n\u001b[32m   1048\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   1049\u001b[39m     messages: \u001b[38;5;28mlist\u001b[39m[BaseMessage],\n\u001b[32m   1050\u001b[39m     stop: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m] | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m   1051\u001b[39m     **kwargs: Any,\n\u001b[32m   1052\u001b[39m ) -> Iterator[ChatGenerationChunk]:\n\u001b[32m   1053\u001b[39m     reasoning = kwargs.get(\u001b[33m\"\u001b[39m\u001b[33mreasoning\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m.reasoning)\n\u001b[32m-> \u001b[39m\u001b[32m1054\u001b[39m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_create_chat_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m   1055\u001b[39m \u001b[43m        \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m   1056\u001b[39m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1057\u001b[39m \u001b[43m                \u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessage\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[32m   1058\u001b[39m \u001b[43m                \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessage\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mand\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessage\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[32m   1059\u001b[39m \u001b[43m                \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m   1060\u001b[39m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\n",
+      "* **`variableToList(elemento, destino)`**: Fuerza a que una variable escalar se convierta en una estructura iterable de lista.\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_ollama/chat_models.py:952\u001b[39m, in \u001b[36mChatOllama._create_chat_stream\u001b[39m\u001b[34m(self, messages, stop, **kwargs)\u001b[39m\n\u001b[32m    950\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chat_params[\u001b[33m\"\u001b[39m\u001b[33mstream\u001b[39m\u001b[33m\"\u001b[39m]:\n\u001b[32m    951\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m         \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client.chat(**chat_params)\n\u001b[32m    953\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client:\n\u001b[32m    954\u001b[39m     \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client.chat(**chat_params)\n",
+      "* **`itemFromList(lista_origen, indice, destino)`**: Extrae de forma segura el elemento contenido en la posición `indice` de una lista.\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/ollama/_client.py:184\u001b[39m, in \u001b[36mClient._request.<locals>.inner\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m    182\u001b[39m part = json.loads(line)\n\u001b[32m    183\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m err := part.get(\u001b[33m'\u001b[39m\u001b[33merror\u001b[39m\u001b[33m'\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m184\u001b[39m   \u001b[38;5;28;01mraise\u001b[39;00m ResponseError(err)\n\u001b[32m    185\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(**part)\n",
+      "* **`variableFromJSON(json_origen, clave, destino)`**: Parsea un objeto JSON en memoria y extrae el valor correspondiente a la `clave`.\n",
-      "\u001b[31mResponseError\u001b[39m: failed to parse JSON: unexpected end of JSON input (status code: -1)",
+      "* **`AddVariableToJSON(clave, valor, json_destino)`**: Inyecta dinámicamente una nueva propiedad dentro de un objeto JSON existente.\n",
-      "During task with name 'agent' and id '9110cf29-5205-b67b-0456-234df433158a'"
+      "\n",
      "### 6.2 Criptografía y Expresiones Regulares\n",
      "* **`encodeSHA256` y `encodeMD5(origen, destino)`**: Funciones criptográficas que encriptan de forma irreversible un texto. Vitales para el almacenamiento seguro de contraseñas.\n",
      "* **`getRegex(origen, patron, destino)`**: Aplica una Expresión Regular (`patron`) sobre la variable de origen, extrayendo las coincidencias exactas.\n",
      "\n",
      "### 6.3 Transformación de Tiempo y Cadenas\n",
      "* **Fechas:** `getTimeStamp` (convierte un string a Epoch), `getDateTime` (Epoch a string legible), y `stampToDatetime` (Epoch a objeto datetime estructurado). Soportan formatos de calendario y cálculos con TimeDeltas.\n",
      "* **Cadenas:** `replace` (saneamiento y sustitución de texto) y `randomString` (generación determinista de claves/tokens aleatorios).\n",
      "\n",
      "### Especificación BNF (Sección VI)\n",
      "\n",
      "\n",
      "\n",
      "/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */\n",
      "<util_command>    ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>\n",
      "\n",
      "/* Manipulación de listas y JSON */\n",
      "<json_list_cmd>   ::= \"variableToList(\" <expression> \",\" <identifier> \")\"\n",
      "                    | \"itemFromList(\" <identifier> \",\" <expression> \",\" <identifier> \")\"\n",
      "                    | \"variableFromJSON(\" <identifier> \",\" <expression> \",\" <identifier> \")\"\n",
      "                    | \"AddVariableToJSON(\" <expression> \",\" <expression> \",\" <identifier> \")\"\n",
      "\n",
      "/* Criptografía */\n",
      "<crypto_cmd>      ::= \"encodeSHA256(\" <identifier_or_string> \",\" <identifier> \")\"\n",
      "                    | \"encodeMD5(\" <identifier_or_string> \",\" <identifier> \")\"\n",
      "\n",
      "/* Expresiones regulares */\n",
      "<regex_cmd>       ::= \"getRegex(\" <identifier> \",\" <stringliteral> \",\" <identifier> \")\"\n",
      "\n",
      "<datetime_cmd>    ::= \"getDateTime(\" <stringliteral> \",\" <expression> \",\" <stringliteral> \",\" <identifier> \")\"\n",
      "/*  Argumentos: formato_salida, epoch_origen, zona_horaria, destino */\n",
      "\n",
      "<stamp_cmd>       ::= \"stampToDatetime(\" <expression> \",\" <stringliteral> \",\" <expression> \",\" <identifier> \")\"\n",
      "/*  Argumentos: epoch_origen, formato, timedelta, destino */\n",
      "                    | \"getTimeStamp(\" <stringliteral> \",\" <stringliteral> \",\" <expression> \",\" <identifier> \")\"\n",
      "/*  Argumentos: fecha_string, formato_entrada, timedelta, destino */\n",
      "\n",
      "<string_cmd>      ::= \"randomString(\" <expression> \",\" <identifier> \")\"\n",
      "/*  Argumentos: longitud, destino */\n",
      "\n",
      "<replace_cmd>     ::= \"replace(\" <identifier_or_string> \",\" <stringliteral> \",\" <stringliteral> \",\" <identifier> \")\"\n",
      "/*  Argumentos: origen, patron_busqueda, reemplazo, destino */\n",
      "\n",
      "[3] id=chunk-3 source=Untitled\n",
      "\n",
      "\n",
      "---\n",
      "\n",
      "## SECCIÓN IX: Expresiones y Gramática Léxica Estricta\n",
      "\n",
      "Esta sección es el corazón matemático evaluador de AVAP. Define la jerarquía exacta (Precedencia) y provee soporte nativo para características avanzadas similares a Python.\n",
      "\n",
      "### 9.1 Cast de Tipos Explícito\n",
      "AVAP permite conversiones de tipos (Type Casting) en cualquier evaluación utilizando funciones constructoras estándar. Puedes transformar variables dinámicamente usando `int(var)`, `float(var)` o `str(var)`.\n",
      "\n",
      "### 9.2 Slicing y Comprensiones (Comprehensions)\n",
      "* **Slicing (Cortes):** Puedes extraer fragmentos de listas o strings utilizando la notación de dos puntos. Ejemplo: `mi_lista[1:4]` (extrae desde el índice 1 hasta el 3).\n",
      "* **Comprehensions:** AVAP soporta la construcción rápida de listas mediante iteradores en una sola línea, permitiendo filtrar y mapear colecciones enteras (ej. `[x * 2 for x in valores if x > 0]`).\n",
      "\n",
      "### 9.3 Análisis Léxico (Lexer) y Documentación\n",
      "AVAP cuenta con tres niveles de descarte de texto para anotaciones humanas:\n",
      "1. **Comentarios de Línea (`//`):** Ignora el texto hasta el salto de línea.\n",
      "2. **Comentarios de Bloque (`/* ... */`):** Para aislar bloques enteros multilínea.\n",
      "3. **Comentarios de Documentación (`///`):** Utilizados por analizadores de código o IDEs para generar documentación técnica automática (Docstrings) a partir del código fuente.\n",
      "\n",
      "### Especificación BNF (Sección IX)\n",
      "\n",
      "\n",
      "\n",
      "/* Jerarquía de Expresiones (Precedencia de menor a mayor) */\n",
      "<expression>       ::= <logical_or>\n",
      "<logical_or>       ::= <logical_and> ( \"or\" <logical_and> )*\n",
      "<logical_and>      ::= <logical_not> ( \"and\" <logical_not> )*\n",
      "<logical_not>      ::= \"not\" <logical_not> | <comparison>\n",
      "\n",
      "<comparison>       ::= <arithmetic> ( <comp_op> <arithmetic> )*\n",
      "<comp_op>          ::= \"==\" | \"!=\" | \"<\" | \">\" | \"<=\" | \">=\" | \"in\" | \"is\"\n",
      "\n",
      "<arithmetic>       ::= <term> ( ( \"+\" | \"-\" ) <term> )*\n",
      "<term>             ::= <factor> ( ( \"*\" | \"/\" | \"%\" ) <factor> )*\n",
      "<factor>           ::= ( \"+\" | \"-\" ) <factor> | <power>\n",
      "<power>            ::= <primary> [ \"**\" <factor> ]\n",
      "\n",
      "/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones)\n",
      "   La regla <primary> cubre también el acceso a métodos de objetos conector\n",
      "   (conector.metodo(...)) y el acceso por clave a sus resultados (resultado[\"key\"]) */\n",
      "<primary>          ::= <atom>\n",
      "                     | <primary> \".\" <identifier>\n",
      "                     | <primary> \"[\" <expression> \"]\"\n",
      "                     | <primary> \"[\" [<expression>] \":\" [<expression>] [\":\" [<expression>]] \"]\"\n",
      "                     | <primary> \"(\" [<argument_list>] \")\"\n",
      "\n",
      "<atom>             ::= <identifier>\n",
      "                     | \"$\" <identifier>\n",
      "                     | <literal>\n",
      "                     | \"(\" <expression> \")\"\n",
      "                     | <list_display>\n",
      "                     | <dict_display>\n",
      "\n",
      "/* Estructuras de Datos, Comprensiones y Argumentos */\n",
      "<list_display>     ::= \"[\" [<argument_list>] \"]\"\n",
      "                     | \"[\" <expression> \"for\" <identifier> \"in\" <expression> [<if_clause>] \"]\"\n",
      "<if_clause>        ::= \"if\" <expression>\n",
      "<dict_display>     ::= \"{\" [<key_datum_list>] \"}\"\n",
      "<key_datum_list>   ::= <key_datum> ( \",\" <key_datum> )*\n",
      "<key_datum>        ::= <expression> \":\" <expression>\n",
      "<argument_list>    ::= <expression> ( \",\" <expression> )*\n",
      "\n",
      "/* Tipo numérico unificado */\n",
      "<number>           ::= <floatnumber> | <integer>\n",
      "\n",
      "/* Literales (Tipos de Datos Primitivos Soportados) */\n",
      "<literal>          ::= <stringliteral> | <number> | <boolean> | \"None\"\n",
      "<boolean>          ::= \"True\" | \"False\"\n",
      "<integer>          ::= [0-9]+\n",
      "<floatnumber>      ::= [0-9]+ \".\" [0-9]* | \".\" [0-9]+\n",
      "\n",
      "/* Cadenas de Texto con soporte de secuencias de escape */\n",
      "<stringliteral>    ::= \"\\\"\" <text_double> \"\\\"\" | \"'\" <text_single> \"'\"\n",
      "<escape_sequence>  ::= \"\\\\\" ( \"\\\"\" | \"'\" | \"\\\\\" | \"n\" | \"t\" | \"r\" | \"0\" )\n",
      "<text_double>      ::= ( [^\"\\\\] | <escape_sequence> )*\n",
      "<text_single>      ::= ( [^'\\\\] | <escape_sequence> )*\n",
      "<identifier_or_string> ::= <identifier> | <stringliteral>\n",
      "\n",
      "/* Reglas de Comentarios para el Lexer\n",
      "   El lexer aplica longest-match: /// debe evaluarse ANTES que // */\n",
      "<doc_comment>      ::= \"///\" <any_text>\n",
      "<line_comment>     ::= \"//\" <any_text>\n",
      "<block_comment>    ::= \"/*\" <any_content> \"*/\"\n",
      "<any_text>         ::= [^\\r\\n]*\n",
      "<any_content>      ::= /* Cualquier secuencia de caracteres que no contenga la subcadena \"*/\" */\n",
      "================================\u001b[1m Human Message \u001b[0m=================================\n",
      "\n",
      "What types of includes does AVAP have?\n",
      "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
      "\n",
      "AVAP has two main types of include:\n",
      "\n",
      "1. **<connector_instantiation>:** This is used to instantiate a connector, which could be for a database connection or a third-party API.\n",
      "2. **<orm_connector_init>:** This term seems to be related to initializing an Object-Relational Mapping (ORM) connector, indicating that the context suggests it's part of a specific ORM setup.\n",
      "\n",
      "Both types are treated similarly in terms of grammar but differ semantically by their purpose - one is for database connections or third-party APIs, while the other is specifically for connecting to ORMs. The engine selects the appropriate adapter based on runtime context.\n"
     ]
    }
   ],
   "source": [
-    "a = stream_graph_updates(user_input, agentic_graph)"
+    "a = stream_graph_updates(user_input, guided_graph)"
   ]
  },
  {
--- a/scripts/pipelines/flows/elasticsearch_ingestion.py
+++ b/scripts/pipelines/flows/elasticsearch_ingestion.py
@ -1,134 +1,60 @@
 from enum import Enum
 import typer
 import logging
 import os
 from pathlib import Path
 from loguru import logger
 from elasticsearch import Elasticsearch
 from langchain_elasticsearch import ElasticsearchStore
 from chonkie import SemanticChunker
 from src.utils.emb_factory import create_embedding_model
 from scripts.pipelines.tasks.chunk import (
-    read_files, 
+    fetch_documents, 
-    get_chunk_docs, 
+    process_documents, 
-    convert_chunks_to_document
+    export_documents,
    ingest_documents
 )
 app = typer.Typer()
 ELASTICSEARCH_LOCAL_URL = os.getenv("ELASTICSEARCH_LOCAL_URL")
 OLLAMA_LOCAL_URL = os.getenv("OLLAMA_LOCAL_URL")
 ELASTICSEARCH_INDEX = os.getenv("ELASTICSEARCH_INDEX")
 OLLAMA_URL = os.getenv("OLLAMA_URL")
 OLLAMA_EMB_MODEL_NAME = os.getenv("OLLAMA_EMB_MODEL_NAME")
 AVAP_WEB_DOCS_URL = os.getenv("AVAP_WEB_DOCS_URL")
 HF_EMB_MODEL_NAME = os.getenv("HF_EMB_MODEL_NAME")
 class DistanceStrategy(str, Enum):
    euclidean = "EUCLIDEAN_DISTANCE"
    max_inner_product = "MAX_INNER_PRODUCT"
    dot_product = "DOT_PRODUCT"
    jaccard = "JACCARD"
    cosine = "COSINE"
@app.command()
 def elasticsearch_ingestion(
-    docs_folder_path: str = "docs",
+    docs_folder_path: str = "docs/samples",
    output_path: str = "ingestion/chunks.json",
    docs_extension: list[str] = [".md", ".avap"],
    es_index: str = "avap-docs-test-v3",
    es_request_timeout: int = 120,
    es_max_retries: int = 5,
    es_retry_on_timeout: bool = True,
-    distance_strategy: DistanceStrategy = DistanceStrategy.cosine,
+    delete_es_index: bool = True
-    chunk_size: int = 2048,
+) -> None:  
-    chunk_threshold: float = 0.5,
+    """
-    chunk_similarity_window: int = 3,
+    Pipeline to ingest documents into an Elasticsearch index. 
-    chunk_skip_window: int = 1,
+    The pipeline includes fetching documents from a specified folder, processing them into chunks, and then ingesting those chunks into the specified Elasticsearch index.
-):  
+
    Args:
        docs_folder_path (str): Path to the folder containing documents to be ingested. Default is "docs/samples".
        docs_extension (list[str]): List of file extensions to filter by (e.g., [".md", ".avap"]). Default is [".md", ".avap"].
        es_index (str): Name of the Elasticsearch index to ingest documents into. Default is "avap-docs-test-v3".
        es_request_timeout (int): Timeout in seconds for Elasticsearch requests. Default is 120 seconds.
        es_max_retries (int): Maximum number of retries for Elasticsearch requests in case of failure. Default is 5 retries.
        es_retry_on_timeout (bool): Whether to retry Elasticsearch requests on timeout. Default is True.
        delete_es_index (bool): Whether to delete the existing Elasticsearch index before ingestion. Default is True.
    Returns:
        None
    """
    logger.info("Starting Elasticsearch ingestion pipeline...")
-    logger.info(f"Reading and concatenating files from folder: {docs_folder_path}/developer.avapframework.com")
+    logger.info(f"Fetching files from {docs_folder_path}...")
-    avap_github_docs = read_files(f"{docs_folder_path}/avap_language_github_docs", concatenate=False)
+    docs_path = fetch_documents(docs_folder_path, docs_extension)
    avap_web_docs_intro = read_files(f"{docs_folder_path}/developer.avapframework.com", "intro", concatenate=True)
-    # Check chapters in developer.avapframework.com folder and read and concatenate files for each chapter
+    logger.info("Processing docs...")
-    chapters = sorted({
+    chunked_docs = process_documents(docs_path)
        p.name.split("_")[0]
        for p in Path(f"{docs_folder_path}/developer.avapframework.com").glob("chapter*.md")
    })
-    avap_web_docs_chapters = [
+    logger.info(f"Ingesting chunks in Elasticsearch index: {es_index}...")
-        item
+    elasticsearch_docs = ingest_documents(chunked_docs, es_index, es_request_timeout, es_max_retries, 
-        for chapter in chapters
+                     es_retry_on_timeout, delete_es_index)
        for item in read_files(
            f"{docs_folder_path}/developer.avapframework.com",
            f"{chapter}_",
            concatenate=True
        )
    ]
-    avap_web_docs_appendices = read_files(f"{docs_folder_path}/developer.avapframework.com", "appendices_", concatenate=False)
+    logger.info(f"Exporting processed documents to {output_path}...")
-    avap_samples_docs = read_files(f"{docs_folder_path}/samples", concatenate=False)
+    export_documents(elasticsearch_docs, output_path)
-    logger.info("Instantiating semantic chunker...")
+    logger.info(f"Finished ingesting in {es_index}.")
    chunker = SemanticChunker(
        embedding_model=HF_EMB_MODEL_NAME,
        chunk_size=chunk_size,
        threshold=chunk_threshold,
        similarity_window=chunk_similarity_window,
        skip_window=chunk_skip_window
    )
    logger.info("Chunking AVAP GitHub docs...")
    avap_github_docs_chunks = get_chunk_docs(avap_github_docs, chunker)
    logger.info("Chunking AVAP web docs chapters...")
    avap_web_docs_chapters_chunks = get_chunk_docs(avap_web_docs_chapters, chunker)
    logger.info("Creating Langchain Document to index...")
    avap_github_langchain_docs = convert_chunks_to_document(avap_github_docs_chunks)
    avap_web_chapters_langchain_docs = convert_chunks_to_document(avap_web_docs_chapters_chunks)
    avap_web_intro_langchain_docs = convert_chunks_to_document(avap_web_docs_intro)
    avap_web_appendices_langchain_docs = convert_chunks_to_document(avap_web_docs_appendices)
    avap_samples_langchain_docs = convert_chunks_to_document(avap_samples_docs)
    avap_documents = avap_github_langchain_docs + avap_web_chapters_langchain_docs + avap_web_intro_langchain_docs + avap_web_appendices_langchain_docs + avap_samples_langchain_docs
    logger.info("Connecting to Elasticsearch...")
    try:
        es = Elasticsearch(
            ELASTICSEARCH_LOCAL_URL,
            request_timeout=es_request_timeout,
            max_retries=es_max_retries,
            retry_on_timeout=es_retry_on_timeout,
        )
    except:
        logger.exception("Failed to connect to Elasticsearch.")
        raise
    logger.info("Instantiating embeddings model...")
    try:
        embeddings = create_embedding_model(
            provider="ollama",
            model=OLLAMA_EMB_MODEL_NAME,
            base_url=OLLAMA_LOCAL_URL, 
        )
    except:
        logger.exception("Failed to instantiate embeddings model.")
        raise
    logger.info(f"Checking if index {ELASTICSEARCH_INDEX} exists and deleting if it does...")
    if es.indices.exists(index=ELASTICSEARCH_INDEX):
        es.indices.delete(index=ELASTICSEARCH_INDEX)
    logger.info(f"Uploading documents to index {ELASTICSEARCH_INDEX}...")
    ElasticsearchStore.from_documents(
        avap_documents,
        embeddings,
        client=es,
        index_name=ELASTICSEARCH_INDEX,
        distance_strategy=distance_strategy.value,
    )
    logger.info(f"Finished uploading documents to index {ELASTICSEARCH_INDEX}.")
 if __name__ == "__main__":
--- a/scripts/pipelines/samples_generator/generate_mbap.py
+++ b/scripts/pipelines/samples_generator/generate_mbap.py
@ -1,9 +1,9 @@
 #!/usr/bin/env python3
 """
 Use:
-    python generate_mbpp_avap.py
+    python generate_mbap.py
-    python generate_mbpp_avap.py --lrm path/to/avap.md
+    python generate_mbap.py --lrm path/to/avap.md
-    python generate_mbpp_avap.py --lrm avap.md --output output/mbpp_avap.json --problems 300
+    python generate_mbap.py --lrm avap.md --output output/mbpp_avap.json --problems 300
 Requirements:
    pip install anthropic
@ -53,7 +53,7 @@ REGLAS ESTRICTAS para el código AVAP generado:
 5. if() Modo 1: if(var_o_literal, var_o_literal, "operador")
   — los argumentos NO pueden ser expresiones de acceso como dict['key'];
     hay que extraer el valor a una variable propia primero.
-6. if() Modo 2: if(None, None, "expresion_completa_como_string")
+6. if() Modo 2: if(None, None, `expresion_completa_como_string`)
 7. _status se asigna con: addVar(_status, 404)
 8. ormAccessSelect firma: ormAccessSelect(campos, "tabla", selector, varTarget)
   — selector puede ser cadena vacía.
@ -62,7 +62,7 @@ REGLAS ESTRICTAS para el código AVAP generado:
 MODO DE EJECUCIÓN — MUY IMPORTANTE:
 - El código se ejecuta DIRECTAMENTE, línea a línea, sin servidor ni registro de endpoints.
- NUNCA uses registerEndpoint(), NUNCA uses mainHandler(), NUNCA envuelvas el código en funciones solo para ejecutarlo salvo que queramos probar la funcionalidad de funciones.
+- NUNCA uses registerEndpoint(), NUNCA uses mainHandler(), NUNCA envuelvas el código en funciones solo para ejecutarlo.
 - El código correcto es simplemente las instrucciones en línea, por ejemplo:
    result = "Hello World"
    addResult(result)
@ -82,29 +82,48 @@ Estructura exacta de cada elemento:
  "task_id": <número entero>,
  "text": "<enunciado del problema en español>",
  "code": "<código AVAP con saltos de línea como \\n>",
  "test_inputs": { "<param1>": <valor1>, "<param2>": <valor2> },
  "test_list": ["<expr_python_1>", "<expr_python_2>"]
 }
 FORMATO DE test_inputs — MUY IMPORTANTE:
 - Es un objeto JSON con un valor fijo para cada variable que el código recibe via addParam().
 - Los nombres de las claves deben coincidir EXACTAMENTE con el nombre de variable usado en addParam().
 - Los valores deben ser concretos y representativos del problema (no genéricos como "test" o 123).
 - Si el código no tiene ningún addParam(), el campo test_inputs debe ser un objeto vacío: {}
 - Estos valores son los que el evaluador inyectará en el stack antes de ejecutar el código,
  de modo que las aserciones de test_list puedan validar las variables de salida resultantes.
 Ejemplo con addParam:
  código:       addParam("password", password)\\nencodeSHA256(password, hashed)\\naddResult(hashed)
  test_inputs:  { "password": "secret123" }
  test_list:    ["re.match(r'^[a-f0-9]{64}$', hashed)"]
 Ejemplo sin addParam:
  código:       randomString(16, token)\\naddResult(token)
  test_inputs:  {}
  test_list:    ["re.match(r'^[a-zA-Z0-9]{16}$', token)"]
 FORMATO DE test_list — MUY IMPORTANTE:
-Cada aserción debe ser una expresión Python con re.match() o re.search()
+Cada aserción debe ser una expresión Python con re.match()
 evaluable directamente sobre las variables del stack AVAP (disponibles como
 variables Python locales). El módulo 're' está siempre disponible.
 La expresión debe devolver un match object (truthy) si el test pasa.
 Reglas estrictas:
- USA ÚNICAMENTE re.match(r'<patrón>', <variable>) o re.search(r'<patrón>', str(<variable>))
+- USA ÚNICAMENTE re.match(r'<patrón>', <variable>)
 - NO combines expresiones re.match en una aserción, cada asercion tiene que ser un unico re.match(r'<patrón>', <variable>)
 - Convierte a string si es necesario: re.match(r'^\\d+$', str(result))
 - Puedes encadenar con 'and': re.match(r'^[a-zA-Z0-9]{32}$', token) and re.match(r'.{32}', token)
 - Las variables referenciadas deben existir en el stack tras ejecutar el código.
 - NUNCA uses comparaciones directas (==, !=, >, <).
 - NUNCA uses isinstance(), len(), assert, ni texto descriptivo.
- NUNCA uses nada que no sea re.match() o re.search().
+- NUNCA uses nada que no sea re.match().
 Ejemplos correctos de test_list:
  "re.match(r'^[a-f0-9]{64}$', hashed)"
  "re.match(r'^[a-zA-Z0-9]{32}$', token)"
  "re.match(r'^\\d{4}-\\d{2}-\\d{2}$', date_str)"
  "re.search(r'Hello', result)"
  "re.match(r'^-?\\d+(\\.\\d+)?$', str(result))"
  "re.match(r'^(par|impar)$', result)"
  "re.match(r'^40[134]$', str(_status))"
@ -138,22 +157,26 @@ Responde ÚNICAMENTE con el array JSON. Sin texto antes ni después.
 def parse_response(raw: str):
    text = raw.strip()
    if text.startswith("```"):
        lines = text.splitlines()
        inner = lines[1:]
        if inner and inner[-1].strip() == "```":
            inner = inner[:-1]
        text = "\n".join(inner).strip()
    problems = json.loads(text)
    if not isinstance(problems, list):
-        raise ValueError("answer is not a JSON.")
+        raise ValueError("response is not an JSON array")
    for p in problems:
        for field in ("task_id", "text", "code", "test_list"):
            if field not in p:
-                raise ValueError(f"field '{field}' not found in a problem.")
+                raise ValueError(f"Field missing '{field}' in task_id={p.get('task_id','?')}.")
        if "test_inputs" not in p:
            p["test_inputs"] = {}
        if not isinstance(p["test_inputs"], dict):
            raise ValueError(f"'test_inputs' must by a JSON Object (task_id={p.get('task_id','?')}).")
    return problems
--- a/scripts/pipelines/flows/translate_mbpp.py
+++ b/scripts/pipelines/flows/translate_mbpp.py
@ -8,8 +8,7 @@ from botocore.config import Config
 from pathlib import Path
 from langchain_core.messages import SystemMessage, HumanMessage
 from src.utils.llm_factory import create_chat_model
-from src.config import RAW_DIR, INTERIM_DIR
+from scripts.pipelines.tasks.prompts import get_prompt_mbpp
 from scripts.pipelines.input.prompts import get_prompt_mbpp
 app = typer.Typer()
--- a/scripts/pipelines/tasks/chunk.py
+++ b/scripts/pipelines/tasks/chunk.py
@ -1,136 +1,277 @@
-import os
+import json
-import re
+from copy import deepcopy
-import uuid
+from dataclasses import replace
 from pathlib import Path
 from typing import Any, Union
 from chonkie import (
    Chunk,
    ElasticHandshake,
    FileFetcher,
    MarkdownChef,
    TextChef,
    TokenChunker,
    MarkdownDocument
 )
 from elasticsearch import Elasticsearch
 from loguru import logger
-from chonkie import Chunk, SemanticChunker
+from transformers import AutoTokenizer
-from langchain_core.documents import Document
+
 from scripts.pipelines.tasks.embeddings import OllamaEmbeddings
 from src.config import settings
-def replace_javascript_with_avap(text: str) -> str:
+def _get_text(element) -> str:
-    """
+    for attr in ("text", "content", "markdown"):
-    Replace mentions of javascript language with avap in the text.
+        value = getattr(element, attr, None)
-    Handles code blocks, language identifiers, and references.
+        if isinstance(value, str):
-
+            return value
-    Args:
+    raise AttributeError(
-        text: The text to process.
+        f"Could not extract text from element of type {type(element).__name__}"
    Returns:
        The text with javascript references replaced with avap.
    """
    # Replace ```javascript with ```avap
    text = text.replace("```javascript", "```avap")
    # Replace ```js with ```avap
    text = text.replace("```js", "```avap")
    # Replace common phrases (case-insensitive)
    text = re.sub(r"\bjavascript\s+code\b", "avap code", text, flags=re.IGNORECASE)
    text = re.sub(
        r"\bjavascript\s+example\b", "avap example", text, flags=re.IGNORECASE
    )
    text = re.sub(r"\bjavascript\b(?!\s+file)", "avap", text, flags=re.IGNORECASE)
    return text
-def read_files(
+def _merge_markdown_document(processed_doc: MarkdownDocument) -> MarkdownDocument:
-    folder_path: str, file_prefix: str | None = None, concatenate: bool = True
+    elements = []
-) -> list[dict]:
+
    for chunk in processed_doc.chunks:
        elements.append(("chunk", chunk.start_index, chunk.end_index, chunk))
    for code in processed_doc.code:
        elements.append(("code", code.start_index, code.end_index, code))
    for table in processed_doc.tables:
        elements.append(("table", table.start_index, table.end_index, table))
    elements.sort(key=lambda item: (item[1], item[2]))
    merged_chunks = []
    current_chunk = None
    current_parts = []
    current_end_index = None
    current_token_count = None
    def flush():
        nonlocal current_chunk, current_parts, current_end_index, current_token_count
        if current_chunk is None:
            return
        merged_text = "\n\n".join(part for part in current_parts if part)
        merged_chunks.append(
            replace(
                current_chunk,
                text=merged_text,
                end_index=current_end_index,
                token_count=current_token_count,
            )
        )
        current_chunk = None
        current_parts = []
        current_end_index = None
        current_token_count = None
    for kind, _, _, element in elements:
        if kind == "chunk":
            flush()
            current_chunk = element
            current_parts = [_get_text(element)]
            current_end_index = element.end_index
            current_token_count = element.token_count
            continue
        if current_chunk is None:
            continue
        current_parts.append(_get_text(element))
        current_end_index = max(current_end_index, element.end_index)
        current_token_count += getattr(element, "token_count", 0)
    flush()
    fused_processed_doc = deepcopy(processed_doc)
    fused_processed_doc.chunks = merged_chunks
    fused_processed_doc.code = processed_doc.code
    fused_processed_doc.tables = processed_doc.tables
    return fused_processed_doc
 class ElasticHandshakeWithMetadata(ElasticHandshake):
    """Extended ElasticHandshake that preserves chunk metadata in Elasticsearch."""
    def _create_bulk_actions(self, chunks: list[dict]) -> list[dict[str, Any]]:
        """Generate bulk actions including metadata."""
        actions = []
        embeddings = self.embedding_model.embed_batch([chunk["chunk"].text for chunk in chunks])
        for i, chunk in enumerate(chunks):
            source = {
                "text": chunk["chunk"].text,
                "embedding": embeddings[i],
                "start_index": chunk["chunk"].start_index,
                "end_index": chunk["chunk"].end_index,
                "token_count": chunk["chunk"].token_count,
            }
            # Include metadata if it exists
            if chunk.get("extra_metadata"):
                source.update(chunk["extra_metadata"])
            actions.append({
                "_index": self.index_name,
                "_id": self._generate_id(i, chunk["chunk"]),
                "_source": source,
            })
        return actions
    def write(self, chunks: Union[Chunk, list[Chunk]]) -> list[dict[str, Any]]:
        """Write the chunks to the Elasticsearch index using the bulk API."""
        if isinstance(chunks, Chunk):
            chunks = [chunks]
        actions = self._create_bulk_actions(chunks)
        # Use the bulk helper to efficiently write the documents
        from elasticsearch.helpers import bulk
        success, errors = bulk(self.client, actions, raise_on_error=False)
        if errors:
            logger.warning(f"Encountered {len(errors)} errors during bulk indexing.")  # type: ignore
            # Optionally log the first few errors for debugging
            for i, error in enumerate(errors[:5]):  # type: ignore
                logger.error(f"Error {i + 1}: {error}")
        logger.info(f"Chonkie wrote {success} chunks to Elasticsearch index: {self.index_name}")
        return actions
 def fetch_documents(docs_folder_path: str, docs_extension: list[str]) -> list[Path]:
    """
-    Read files in a folder whose names start with a given prefix.
+    Fetch files from a folder that match the specified extensions.
    Replaces javascript language markers with avap.
    Args:
-        folder_path: Path to the folder to search in.
+        docs_folder_path (str): Path to the folder containing documents
-        file_prefix: The prefix that file names must start with.
+        docs_extension (list[str]): List of file extensions to filter by (e.g., [".md", ".avap"])
            If None, all files in the folder are included.
        concatenate: Whether to concatenate the contents of the files.
    Returns:
-        A list of dictionaries, each containing 'content' and 'title' keys.
+        List of Paths to the fetched documents
        If concatenate is True, returns a single dict with concatenated content and title as 'appendix'.
        If concatenate is False, returns one dict per file with filename as title.
    """
-    contents = []
+    fetcher = FileFetcher()
-    filenames = []
+    docs_path = fetcher.fetch(dir=f"{settings.proj_root}/{docs_folder_path}", ext=docs_extension)
    return docs_path
    for filename in sorted(os.listdir(folder_path)):
        include_file = file_prefix is None or filename.startswith(file_prefix)
        if include_file:
            file_path = os.path.join(folder_path, filename)
            if os.path.isfile(file_path):
                with open(file_path, "r", encoding="utf-8") as f:
                    content = f.read()
                    cleaned_content = content.strip()
                    if cleaned_content:
                        contents.append(cleaned_content)
                        filenames.append(filename)
-    if concatenate:
+def process_documents(docs_path: list[Path]) -> list[dict[str, Chunk | dict[str, Any]]]:
-        concatenated = "\n".join(contents)
+    """
-        processed_content = replace_javascript_with_avap(concatenated)
+    Process documents by applying appropriate chefs and chunking strategies based on file type.
-        title = file_prefix if file_prefix is not None else "all_files"
+
-        return [{"content": processed_content, "title": title}]
+    Args:
        docs_path (list[Path]): List of Paths to the documents to be processed
    Returns:
        List of dicts with "chunk" (Chunk object) and "metadata" (dict with file info)
    """
    processed_docs = []
    custom_tokenizer = AutoTokenizer.from_pretrained(settings.hf_emb_model_name)
    chef_md = MarkdownChef(tokenizer=custom_tokenizer)
    chef_txt = TextChef()
    chunker = TokenChunker(tokenizer=custom_tokenizer)
    for doc_path in docs_path:
        doc_extension = doc_path.suffix.lower()
        filename = doc_path.name
        if doc_extension == ".md":
            processed_doc = chef_md.process(doc_path)
            fused_doc = _merge_markdown_document(processed_doc)
            chunked_doc = fused_doc.chunks
        elif doc_extension == ".avap":
            processed_doc = chef_txt.process(doc_path)
            chunked_doc = chunker.chunk(processed_doc.content)
        else:
-        return [
+            continue
-            {"content": replace_javascript_with_avap(content), "title": filename}
+
-            for content, filename in zip(contents, filenames)
+        for chunk in chunked_doc:            
-        ]
+            processed_docs.append({
                "chunk": chunk,
                "extra_metadata": {"file": filename}
            })
    return processed_docs
-def get_chunk_docs(docs: list[dict], chunker: SemanticChunker) -> list[list[Chunk]]:
+def ingest_documents(
    chunked_docs: list[dict[str, Chunk | dict[str, Any]]],
    es_index: str,
    es_request_timeout: int,
    es_max_retries: int,
    es_retry_on_timeout: bool,
    delete_es_index: bool,
 ) -> list[dict[str, Any]]:
    """
-    Chunk the content of the documents using the provided chunker.
+    Ingest processed documents into an Elasticsearch index.
    Args:
-        docs: A list of dictionaries, each containing 'content' and 'title' keys.
+        chunked_docs (list[dict[str, Any]]): List of dicts with "chunk" and "metadata" keys
-        chunker: An instance of SemanticChunker to use for chunking the content.
+        es_index (str): Name of the Elasticsearch index to ingest into
        es_request_timeout (int): Timeout for Elasticsearch requests in seconds
        es_max_retries (int): Maximum number of retries for Elasticsearch requests
        es_retry_on_timeout (bool): Whether to retry on Elasticsearch request timeouts
        delete_es_index (bool): Whether to delete the existing Elasticsearch index before ingestion
    Returns:
-        A list of lists of Chunk objects, where each inner list corresponds to the chunks of a
+        List of dicts with Elasticsearch response for each chunk
        single document.
    """
-    list_chunks = []
+    logger.info(
        f"Instantiating Elasticsearch client with URL: {settings.elasticsearch_local_url}..."
    )
    es = Elasticsearch(
        hosts=settings.elasticsearch_local_url,
        request_timeout=es_request_timeout,
        max_retries=es_max_retries,
        retry_on_timeout=es_retry_on_timeout,
    )
-    for doc in docs:
+    if delete_es_index and es.indices.exists(index=es_index):
-        content = doc["content"]
+        logger.info(f"Deleting existing Elasticsearch index: {es_index}...")
-        chunks = chunker.chunk(content)
+        es.indices.delete(index=es_index)
        for chunk in chunks:
            chunk.context = {"source": doc["title"]}
        list_chunks.append(chunks)
        logger.info(f"Finished chunking {doc['title']}")
-    return list_chunks
+    handshake = ElasticHandshakeWithMetadata(
        client=es,
        index_name=es_index,
        embedding_model=OllamaEmbeddings(model=settings.ollama_emb_model_name),
    )
    logger.info(
        f"Ingesting {len(chunked_docs)} chunks into Elasticsearch index: {es_index}..."
    )
    elasticsearch_chunks = handshake.write(chunked_docs)
    return elasticsearch_chunks
-def convert_chunks_to_document(chunks: list[dict] | list[list[Chunk]]) -> list[Document]:
+def export_documents(elasticsearch_chunks: list[dict[str, Any]], output_path: str) -> None:
    """
-    Convert the chunked content into a list of Document objects.
+    Export processed documents to JSON files in the specified output folder.
    Args:
-        chunks: A list of dictionaries containing 'content' and 'title' keys.
+        elasticsearch_chunks (list[dict[str, Any]]): List of dicts with Elasticsearch response for each chunk
-
+        output_path (str): Path to the file where the JSON will be saved
    Returns:
-        A list of Document objects created from the chunked content.
+        None
    """
-    documents = []
+    output_path = settings.proj_root / output_path
-    if isinstance(chunks[0], dict):
+    for chunk in elasticsearch_chunks:
-        for chunk in chunks:
+        chunk["_source"]["embedding"] = chunk["_source"]["embedding"].tolist() # For JSON serialization
            content = chunk["content"]
            title = chunk["title"]
            documents.append(Document(id=str(uuid.uuid4()),
                                      page_content=content, 
                                      metadata={"source": title}))
-    else:
+    with output_path.open("w", encoding="utf-8") as f:
-        for chunk_list in chunks:
+        json.dump(elasticsearch_chunks, f, ensure_ascii=False, indent=4)
            for chunk in chunk_list:
                content = chunk.text
                title = chunk.context.get("source", "unknown")
                documents.append(Document(id=str(uuid.uuid4()),
                                          page_content=content, 
                                          metadata={"source": title}))
-    return documents
+    logger.info(f"Exported processed documents to {output_path}")
--- a/scripts/pipelines/tasks/embeddings.py
+++ b/scripts/pipelines/tasks/embeddings.py
@ -0,0 +1,125 @@
 import requests
 from typing import Any, Callable
 import numpy as np
 from chonkie.embeddings import BaseEmbeddings
 from src.config import settings
 class OllamaEmbeddings(BaseEmbeddings):
    """Chonkie embeddings adapter for a local Ollama embedding model."""
    def __init__(
        self,
        model: str,
        base_url: str = settings.ollama_local_url,
        timeout: float = 60.0,
        truncate: bool = True,
        keep_alive: str = "5m",
    ) -> None:
        self.model = model
        self.base_url = base_url.rstrip("/")
        self.timeout = timeout
        self.truncate = truncate
        self.keep_alive = keep_alive
        self._dimension: int | None = None
    @property
    def dimension(self) -> int:
        if self._dimension is None:
            # Lazy-load the dimension from a real embedding response.
            self._dimension = int(self.embed(" ").shape[0])
        return self._dimension
    def embed(self, text: str) -> np.ndarray:
        embeddings = self._embed_api(text)
        vector = np.asarray(embeddings[0], dtype=np.float32)
        if self._dimension is None:
            self._dimension = int(vector.shape[0])
        return vector
    def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
        if not texts:
            return []
        embeddings = self._embed_api(texts)
        vectors = [np.asarray(vector, dtype=np.float32) for vector in embeddings]
        if vectors and self._dimension is None:
            self._dimension = int(vectors[0].shape[0])
        return vectors
    def count_tokens(self, text: str) -> int:
        payload = self._build_payload(text)
        response = self._post_embed(payload)
        return int(response["prompt_eval_count"])
    def count_tokens_batch(self, texts: list[str]) -> list[int]:
        # Ollama returns a single prompt_eval_count for the whole request,
        # not one count per input item, so we compute them individually.
        return [self.count_tokens(text) for text in texts]
    def get_tokenizer(self) -> Callable[[str], int]:
        # Chonkie mainly needs something usable for token counting.
        return self.count_tokens
    @classmethod
    def is_available(cls) -> bool:
        try:
            response = requests.get(
                f"{settings.ollama_local_url}/api/tags",
                timeout=5.0,
            )
            response.raise_for_status()
            return True
        except requests.RequestException:
            return False
    def __repr__(self) -> str:
        return (
            f"OllamaEmbeddings("
            f"model={self.model!r}, "
            f"base_url={self.base_url!r}, "
            f"dimension={self._dimension!r}"
            f")"
        )
    def _build_payload(self, text_or_texts: str | list[str]) -> dict[str, Any]:
        return {
            "model": self.model,
            "input": text_or_texts,
            "truncate": self.truncate,
            "keep_alive": self.keep_alive,
        }
    def _post_embed(self, payload: dict[str, Any]) -> dict[str, Any]:
        try:
            response = requests.post(
                f"{self.base_url}/api/embed",
                json=payload,
                timeout=self.timeout,
            )
            response.raise_for_status()
            data = response.json()
        except requests.RequestException as exc:
            raise RuntimeError(
                f"Failed to call Ollama embeddings endpoint at "
                f"{self.base_url}/api/embed"
            ) from exc
        if "embeddings" not in data:
            raise RuntimeError(
                "Ollama response did not include 'embeddings'. "
                f"Response keys: {list(data.keys())}"
            )
        return data
    def _embed_api(self, text_or_texts: str | list[str]) -> list[list[float]]:
        payload = self._build_payload(text_or_texts)
        data = self._post_embed(payload)
        return data["embeddings"]
--- a/scripts/pipelines/tasks/prompts.py
+++ b/scripts/pipelines/tasks/prompts.py
--- a/src/config.py
+++ b/src/config.py
@ -1,39 +1,29 @@
 from pathlib import Path
 from typing import Optional
 from pydantic_settings import BaseSettings, SettingsConfigDict
 from pydantic import Field
 from dotenv import load_dotenv
 from datetime import timedelta
 import warnings
 load_dotenv()
 class Settings(BaseSettings):
-    raw_path_: str 
+    data_path_: Optional[str] = None
-    data_path_: str
+    raw_path_: Optional[str] = None
-    processed_path_: str 
+    processed_path_: Optional[str] = None
-    models_path_: str 
+    models_path_: Optional[str] = None
-    external_path_: str 
+    external_path_: Optional[str] = None
-    kubeconfig_path: str 
+    interim_path_: Optional[str] = None
-    interim_path_: str 
+    kubeconfig_path_: Optional[str] = None
-    database_url: str 
+    postgres_url: str
    openai_api_key: str 
    elasticsearch_index: str 
    elasticsearch_docs_index: str 
    elasticsearch_code_index: str 
    llm_base_url: str 
    ollama_url: str 
    ollama_local_url: str 
    langfuse_host: str 
    elasticsearch_url: str
    elasticsearch_local_url: str
    ollama_url: str
    ollama_local_url: str
    ollama_model_name: str
    ollama_emb_model_name: str
-    model_name: str 
+    langfuse_host: str
    hf_emb_model_name: str 
    langfuse_public_key: str
    langfuse_secret_key: str
    hf_token: str
    hf_emb_model_name: str
    model_config = SettingsConfigDict(
        env_file=".env",
@ -43,108 +33,40 @@ class Settings(BaseSettings):
    )
    @property
-    def data_path(self) -> Path:
+    def project_root(self) -> Path:
        return Path(self.data_path_)
    @property
    def models_path(self) -> Path:
        return Path(self.models_path_)
    @property
    def processed_path(self) -> Path:
        return Path(self.processed_path_)
    @property
    def raw_path(self) -> Path:
        return Path(self.raw_path_)
    @property
    def interim_path(self) -> Path:
        return Path(self.interim_path_)
    @property
    def external_path(self) -> Path:
        return Path(self.external_path_)
    @property
    def proj_root(self) -> Path:
        return Path(__file__).resolve().parents[1]
-    @property
+    def _resolve_path(self, path: Optional[str]) -> Optional[Path]:
-    def database_url(self) -> str:
+        if path is None:
-        return self.database_url
+            return None
        return self.project_root / path
    @property
-    def openai_api_key(self) -> str:
+    def data_path(self) -> Optional[Path]:
-        return self.openai_api_key
+        return self._resolve_path(self.data_path_)
    @property
-    def elasticsearch_index(self) -> str:
+    def raw_path(self) -> Optional[Path]:
-        return self.elasticsearch_index
+        return self._resolve_path(self.raw_path_)
    @property
-    def elasticsearch_docs_index(self) -> str:
+    def processed_path(self) -> Optional[Path]:
-        return self.elasticsearch_docs_index
+        return self._resolve_path(self.processed_path_)
    @property
-    def elasticsearch_code_index(self) -> str:
+    def models_path(self) -> Optional[Path]:
-        return self.elasticsearch_code_index
+        return self._resolve_path(self.models_path_)
    @property
-    def llm_base_url(self) -> str:
+    def external_path(self) -> Optional[Path]:
-        return self.llm_base_url
+        return self._resolve_path(self.external_path_)
    @property
-    def ollama_url(self) -> str:
+    def interim_path(self) -> Optional[Path]:
-        return self.ollama_url
+        return self._resolve_path(self.interim_path_)
    @property
-    def ollama_local_url(self) -> str:
+    def kubeconfig_path(self) -> Optional[Path]:
-        return self.ollama_local_url
+        return self._resolve_path(self.kubeconfig_path_)
    @property
    def langfuse_host(self) -> str:
        return self.langfuse_host
    @property
    def elasticsearch_url(self) -> str:
        return self.elasticsearch_url
    @property
    def elasticsearch_local_url(self) -> str:
        return self.elasticsearch_local_url
    @property
    def ollama_model_name(self) -> str:
        return self.ollama_model_name
    @property
    def ollama_emb_model_name(self) -> str:
        return self.ollama_emb_model_name
    @property
    def model_name(self) -> str:
        return self.model_name
    @property
    def hf_emb_model_name(self) -> str:
        return self.hf_emb_model_name
    @property
    def langfuse_public_key(self) -> str:
        return self.langfuse_public_key
    @property
    def langfuse_secret_key(self) -> str:
        return self.langfuse_secret_key
    @property
    def hf_token(self) -> str:
        return self.hf_token
    @property
    def kubeconfig_path(self) -> Path:
        return Path(self.kubeconfig_path)
 settings = Settings()
--- a/uv.lock
+++ b/uv.lock
@ -250,7 +250,6 @@ name = "assistance-engine"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
    { name = "chonkie", extra = ["semantic"] },
    { name = "grpcio" },
    { name = "grpcio-reflection" },
    { name = "grpcio-tools" },
@ -273,7 +272,9 @@ dependencies = [
 dev = [
    { name = "beir" },
    { name = "boto3" },
    { name = "chonkie", extra = ["elastic", "semantic"] },
    { name = "evidently" },
    { name = "flatbuffers" },
    { name = "jupyter" },
    { name = "langfuse" },
    { name = "litellm" },
@ -288,7 +289,6 @@ dev = [
 [package.metadata]
 requires-dist = [
    { name = "chonkie", extras = ["semantic"], specifier = ">=1.5.6" },
    { name = "grpcio", specifier = ">=1.78.0" },
    { name = "grpcio-reflection", specifier = ">=1.78.0" },
    { name = "grpcio-tools", specifier = ">=1.78.0" },
@ -311,7 +311,9 @@ requires-dist = [
 dev = [
    { name = "beir", specifier = ">=2.2.0" },
    { name = "boto3", specifier = ">=1.42.58" },
    { name = "chonkie", extras = ["elastic", "semantic"], specifier = ">=1.6.0" },
    { name = "evidently", specifier = ">=0.7.20" },
    { name = "flatbuffers", specifier = ">=25.12.19" },
    { name = "jupyter", specifier = ">=1.1.1" },
    { name = "langfuse", specifier = "<3" },
    { name = "litellm", specifier = ">=1.82.0" },
@ -595,7 +597,7 @@ wheels = [
 [[package]]
 name = "chonkie"
-version = "1.5.6"
+version = "1.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "chonkie-core" },
@ -603,12 +605,15 @@ dependencies = [
    { name = "tenacity" },
    { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a4/16/e51295955f5a627ebb7867dc2e7fa48d4c6dc2a5f3cde3690de84812e929/chonkie-1.5.6.tar.gz", hash = "sha256:282a24c20b88c4c28d8cae893ac78bcbee531a87d28ec86b419897a9eea2ecf3", size = 172066, upload-time = "2026-02-16T21:44:01.336Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e5/72/fdf8f89ff439f4ec357af0866c819512391936e4e61b6f15635a48434b8a/chonkie-1.6.0.tar.gz", hash = "sha256:14120d80610c1f549027fc7aa9a5ff604a729b545836f6cadd65d5ae83596279", size = 187056, upload-time = "2026-03-11T04:55:07.657Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/18/3a/24cf4cb377f4d44126231d55a19b48a645a0f78f891288a8d4300c95160d/chonkie-1.5.6-py3-none-any.whl", hash = "sha256:4c3be39a0f97315eb3c5efe6dc5d7933d3d27a1918b55c39ab211b403bb03df7", size = 210065, upload-time = "2026-02-16T21:43:59.926Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/c2/7ea7d3409df220dd0e048b1113b44f47eccab9d517b00b037ab0e34c3c7a/chonkie-1.6.0-py3-none-any.whl", hash = "sha256:aa357e02f5cdacac6f8280c5e8651207c866b4137bcf20904db8670ee0808877", size = 232997, upload-time = "2026-03-11T04:55:05.252Z" },
 ]
 [package.optional-dependencies]
 elastic = [
    { name = "elasticsearch" },
 ]
 semantic = [
    { name = "model2vec" },
    { name = "tokenizers" },
@ -1061,6 +1066,14 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d", size = 24331, upload-time = "2026-02-19T00:48:18.465Z" },
 ]
 [[package]]
 name = "flatbuffers"
 version = "25.12.19"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/e8/2d/d2a548598be01649e2d46231d151a6c56d10b964d94043a335ae56ea2d92/flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4", size = 26661, upload-time = "2025-12-19T23:16:13.622Z" },
 ]
 [[package]]
 name = "fqdn"
 version = "1.5.1"
@ -3112,14 +3125,14 @@ wheels = [
 [[package]]
 name = "opentelemetry-proto"
-version = "1.39.1"
+version = "1.40.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "protobuf" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4c/77/dd38991db037fdfce45849491cb61de5ab000f49824a00230afb112a4392/opentelemetry_proto-1.40.0.tar.gz", hash = "sha256:03f639ca129ba513f5819810f5b1f42bcb371391405d99c168fe6937c62febcd", size = 45667, upload-time = "2026-03-04T14:17:31.194Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/b2/189b2577dde745b15625b3214302605b1353436219d42b7912e77fa8dc24/opentelemetry_proto-1.40.0-py3-none-any.whl", hash = "sha256:266c4385d88923a23d63e353e9761af0f47a6ed0d486979777fe4de59dc9b25f", size = 72073, upload-time = "2026-03-04T14:17:16.673Z" },
 ]
 [[package]]