Merge branch 'mrh-online-dev' of github.com:BRUNIX-AI/assistance-engine into mrh-online-dev

This commit is contained in:
pseco 2026-03-12 15:56:27 +01:00
commit ba4a1f1efc
19 changed files with 93326 additions and 6043 deletions

View File

@ -35,7 +35,7 @@ class BrunixEngine(brunix_pb2_grpc.AssistanceEngineServicer):
index_name=os.getenv("ELASTICSEARCH_INDEX"), index_name=os.getenv("ELASTICSEARCH_INDEX"),
embedding=self.embeddings, embedding=self.embeddings,
query_field="text", query_field="text",
vector_query_field="vector", vector_query_field="embedding",
) )
self.graph = build_graph( self.graph = build_graph(
llm=self.llm, llm=self.llm,

View File

@ -45,13 +45,15 @@ graph TD
├── README.md # System documentation & Dev guide ├── README.md # System documentation & Dev guide
├── changelog # Version tracking and release history ├── changelog # Version tracking and release history
├── pyproject.toml # Python project configuration ├── pyproject.toml
├── ingestion/ # Data ingested in Elasticsearch
├── docs/ ├── docs/
| ├── AVAP Language: ... # AVAP DSL Documentation ├── AVAP Language: ... # AVAP DSL Documentation
| | └── AVAP.md │ │ └── AVAP.md
│ ├── developer.avapfr... # Documents on developer web page │ ├── developer.avapfr... # Documents on developer web page
| └── LRM/ # AVAP LRM documentation │ ├── LRM/ # AVAP LRM documentation
| └── avap.md │ │ └── avap.md
│ └── samples/ # AVAP code samples
├── Docker/ ├── Docker/
│ ├── protos/ │ ├── protos/
│ │ └── brunix.proto # Protocol Buffers: The source of truth for the API │ │ └── brunix.proto # Protocol Buffers: The source of truth for the API
@ -64,30 +66,16 @@ graph TD
│ ├── Dockerfile # Container definition for the Engine │ ├── Dockerfile # Container definition for the Engine
│ ├── docker-compose.yaml # Local orchestration for dev environment │ ├── docker-compose.yaml # Local orchestration for dev environment
│ ├── requirements.txt # Python dependencies for Docker │ ├── requirements.txt # Python dependencies for Docker
│ ├── protos/ │ └── .dockerignore # Docker ignore files
│ │ └── brunix.proto # Protocol Buffers: The source of truth for the API
│ └── src/
│ ├── graph.py # Workflow graph orchestration
│ ├── prompts.py # Centralized prompt definitions
│ ├── server.py # gRPC Server & RAG Orchestration
│ ├── state.py # Shared state management
│ └── utils/ # Utility modules
├── ingestion/
│ └── docs/ # AVAP documentation chunks
├── kubernetes/
│ └── kubeconfig.yaml # Kubernetes cluster configuration
├── scripts/ ├── scripts/
│ └── pipelines/ │ └── pipelines/
| ├── samples_generator/ # AVAP Sample generator │ ├── flows/ # Processing pipelines
| | └─ generate_mbap.py │ └── tasks/ # Modules used by the flows
│ └── flows/ # Data processing flows
| └─ elasticsearch_ingestion.py
└── src/ └── src/
├── __init__.py ├── config.py # Environment variables configuration file
└── utils/ └── utils/
├── emb_factory.py # Embedding model factory ├── emb_factory.py # Embedding model factory
├── llm_factory.py # LLM model factory └── llm_factory.py # LLM model factory
└── __init__.py
``` ```
--- ---
@ -146,6 +134,7 @@ The engine utilizes Langfuse for end-to-end tracing and performance monitoring.
Create a `.env` file in the project root with the following variables: Create a `.env` file in the project root with the following variables:
```env ```env
PYTHONPATH=${PYTHONPATH}:/home/...
ELASTICSEARCH_URL=http://host.docker.internal:9200 ELASTICSEARCH_URL=http://host.docker.internal:9200
ELASTICSEARCH_LOCAL_URL=http://localhost:9200 ELASTICSEARCH_LOCAL_URL=http://localhost:9200
ELASTICSEARCH_INDEX=avap-docs-test ELASTICSEARCH_INDEX=avap-docs-test
@ -157,11 +146,13 @@ OLLAMA_URL=http://host.docker.internal:11434
OLLAMA_LOCAL_URL=http://localhost:11434 OLLAMA_LOCAL_URL=http://localhost:11434
OLLAMA_MODEL_NAME=qwen2.5:1.5b OLLAMA_MODEL_NAME=qwen2.5:1.5b
OLLAMA_EMB_MODEL_NAME=qwen3-0.6B-emb:latest OLLAMA_EMB_MODEL_NAME=qwen3-0.6B-emb:latest
HF_TOKEN=hf_...
HF_EMB_MODEL_NAME=Qwen/Qwen3-Embedding-0.6B HF_EMB_MODEL_NAME=Qwen/Qwen3-Embedding-0.6B
``` ```
| Variable | Required | Description | Example | | Variable | Required | Description | Example |
|---|---|---|---| |---|---|---|---|
| `PYTHONPATH` | No | Path that aims to the root of the project | `${PYTHONPATH}:/home/...` |
| `ELASTICSEARCH_URL` | Yes | Elasticsearch endpoint used for vector/context retrieval in Docker | `http://host.docker.internal:9200` | | `ELASTICSEARCH_URL` | Yes | Elasticsearch endpoint used for vector/context retrieval in Docker | `http://host.docker.internal:9200` |
| `ELASTICSEARCH_LOCAL_URL` | Yes | Elasticsearch endpoint used for vector/context retrieval in local | `http://localhost:9200` | | `ELASTICSEARCH_LOCAL_URL` | Yes | Elasticsearch endpoint used for vector/context retrieval in local | `http://localhost:9200` |
| `ELASTICSEARCH_INDEX` | Yes | Elasticsearch index name used by the engine | `avap-docs-test` | | `ELASTICSEARCH_INDEX` | Yes | Elasticsearch index name used by the engine | `avap-docs-test` |
@ -183,13 +174,13 @@ Open a terminal and establish the connection to the Devaron Cluster:
```bash ```bash
# 1. AI Model Tunnel (Ollama) # 1. AI Model Tunnel (Ollama)
kubectl port-forward --address 0.0.0.0 svc/ollama-light-service 11434:11434 -n brunix --kubeconfig ./kubernetes/ivar.yaml & kubectl port-forward --address 0.0.0.0 svc/ollama-light-service 11434:11434 -n brunix --kubeconfig ./kubernetes/kubeconfig.yaml &
# 2. Knowledge Base Tunnel (Elasticsearch) # 2. Knowledge Base Tunnel (Elasticsearch)
kubectl port-forward --address 0.0.0.0 svc/brunix-vector-db 9200:9200 -n brunix --kubeconfig ./kubernetes/ivar.yaml & kubectl port-forward --address 0.0.0.0 svc/brunix-vector-db 9200:9200 -n brunix --kubeconfig ./kubernetes/kubeconfig.yaml &
# 3. Observability DB Tunnel (PostgreSQL) # 3. Observability DB Tunnel (PostgreSQL)
kubectl port-forward --address 0.0.0.0 svc/brunix-postgres 5432:5432 -n brunix --kubeconfig ./kubernetes/ivar.yaml & kubectl port-forward --address 0.0.0.0 svc/brunix-postgres 5432:5432 -n brunix --kubeconfig ./kubernetes/kubeconfig.yaml &
``` ```
### 5. Launch the Engine ### 5. Launch the Engine

View File

@ -4,24 +4,23 @@ All notable changes to the **Brunix Assistance Engine** will be documented in th
--- ---
## [1.5.0] - 2026-03-11 ## [1.5.0] - 2026-03-12
### Added ### Added
- IMPLEMENTED: - IMPLEMENTED:
- `scripts/pipelines/flows/translate_mbpp.py`: pipeline to generate synthethic dataset from mbpp dataset. - `scripts/pipelines/flows/translate_mbpp.py`: pipeline to generate synthethic dataset from mbpp dataset.
- `scripts/input/prompts.py`: module containing prompts for pipelines. - `scripts/tasks/prompts.py`: module containing prompts for pipelines.
- `scripts/tasks/chunk.py`: module containing functions related to chunk management. - `scripts/tasks/chunk.py`: module containing functions related to chunk management.
- `synthethic_datasets`: folder containing generated synthethic datasets. - `synthethic_datasets`: folder containing generated synthethic datasets.
- `src/config.py`: environment variables configuration file. - `src/config.py`: environment variables configuration file.
### Changed ### Changed
- REFACTORED: `scripts/pipelines/flows/elasticsearch_ingestion.py` now uses `docs` documents instead of pre chunked files. - REFACTORED: `scripts/pipelines/flows/elasticsearch_ingestion.py` now uses `docs/LRM` or `docs/samples` documents instead of pre chunked files.
- RENAMED `docs/AVAP Language: Core Commands & Functional Specification` to `docs/avap_language_github_docs`. - RENAMED `docs/AVAP Language: Core Commands & Functional Specification` to `docs/avap_language_github_docs`.
- REMOVED: `Makefile` file. - REMOVED: `Makefile` file.
- REMOVED: `scripts/start-tunnels.sh` script. - REMOVED: `scripts/start-tunnels.sh` script.
- REMOVED `ingestion` folder.
- DEPENDENCIES: `requirements.txt` updated with new libraries required by the new modules. - DEPENDENCIES: `requirements.txt` updated with new libraries required by the new modules.
- MOVED `scripts/generate_mbpp_avap.py` into `scripts/flows/generate_mbpp_avap.py` - MOVED `scripts/generate_mbap.py` into `scripts/flows/generate_mbap.py`.
## [1.4.0] - 2026-03-10 ## [1.4.0] - 2026-03-10

File diff suppressed because it is too large Load Diff

View File

@ -115,7 +115,41 @@ AVAP utiliza una gramática estructural mixta. Combina la fluidez de las palabra
La estructura `if()` evalúa una expresión lógica o de comparación. Todo bloque condicional requiere un cierre explícito utilizando el comando `end()`. La estructura `if()` evalúa una expresión lógica o de comparación. Todo bloque condicional requiere un cierre explícito utilizando el comando `end()`.
El comando `if()` soporta dos modos de invocación: El comando `if()` soporta dos modos de invocación:
* **Modo 1 (comparación estructurada):** `if(variable, valor, comparador)` — evalúa la comparación entre variable y valor usando el operador indicado como string (ej. `"=="`, `">"`, `"!="`). Los dos primeros argumentos deben ser identificadores simples o literales, nunca expresiones de acceso como `dict['clave']`. Si se necesita comparar un valor extraído de una estructura, debe asignarse primero a una variable.* **Modo 2 (expresión libre):** `if(None, None, "expresion_compleja")` — evalúa directamente una expresión booleana compleja proporcionada como string. * **Modo 1 (comparación estructurada):** `if(variable, valor, comparador)` — evalúa la comparación entre variable y valor usando el operador indicado como string (ej. `"=="`, `">"`, `"!="`). Los dos primeros argumentos deben ser identificadores simples o literales, nunca expresiones de acceso como `dict['clave']`. Si se necesita comparar un valor extraído de una estructura, debe asignarse primero a una variable.* **Modo 2 (expresión libre):** `if(None, None, expresion_compleja)` — evalúa directamente una expresión booleana compleja proporcionada como string encapsulado entre `.
## SECCIÓN III: Lógica de Control y Estructuras de Decisión
AVAP utiliza una gramática estructural mixta. Combina la fluidez de las palabras clave para abrir bloques funcionales con la seguridad matemática de cierres estrictos.
### 3.1 El Bloque Condicional (if() / else() / end())
El comando `if()` gestiona la lógica condicional mediante dos modos de invocación estrictamente diferenciados. Es imperativo respetar los delimitadores y la posición de los argumentos.
#### Modo 1: Comparación Estructurada (Atómica)
Se utiliza para comparaciones directas entre dos valores simples.
* **Sintaxis:** `if(átomo_1, átomo_2, "operador")`
* **Argumentos 1 y 2:** Deben ser identificadores simples (variables) o literales (strings/números). **No se permite el uso de `None` en este modo.**
* **Argumento 3:** El operador de comparación debe ir obligatoriamente entre **comillas dobles** (`"=="`, `"!="`, `">"`, `"<"`, `">="`, `"<="`).
* **Restricción:** No se permiten expresiones de acceso (ej. `data.user` o `list[0]`). Estos valores deben asignarse previamente a una variable.
* **Ejemplo correcto:** `if(reintentos, 5, "<")`
#### Modo 2: Expresión Libre (Evaluación Compleja)
Se utiliza para evaluar expresiones lógicas que no encajan en la estructura atómica.
* **Sintaxis:** `if(None, None, `expresión_compleja`)`
* **Argumentos 1 y 2:** Deben ser literalmente la palabra `None` (sin comillas).
* **Argumento 3:** La expresión completa **debe** estar encapsulada entre **acentos graves (backticks)**. Esto permite incluir lógica interna, operadores `and/or` y accesos a estructuras de datos.
* **Ejemplo correcto:** `if(None, None, `user.id > 10 and email.contains("@")`)`
---
### Tabla de Validación para el Modelo
| Entrada | Estado | Razón |
| :--- | :--- | :--- |
| `if(count, 10, "==")` | ✅ VÁLIDO | Modo 1: Átomos válidos y operador entre comillas. |
| `if(None, None, `val > 0`)` | ✅ VÁLIDO | Modo 2: Uso correcto de `None` y backticks. |
| `if(username, None, "==")` | ❌ ERROR | El Modo 1 prohíbe el uso de `None`. Debe usarse el Modo 2. |
| `if(None, None, "val > 0")` | ❌ ERROR | El Modo 2 requiere backticks (`` ` ``), no comillas. |
| `if(user.id, 10, "==")` | ❌ ERROR | El Modo 1 no permite expresiones de acceso (`.`). |
### 3.2 Iteraciones Estrictas y Deterministas (startLoop / endLoop) ### 3.2 Iteraciones Estrictas y Deterministas (startLoop / endLoop)
Para garantizar el determinismo y evitar el colapso de memoria: Para garantizar el determinismo y evitar el colapso de memoria:
@ -137,15 +171,17 @@ Diseñada para proteger la estabilidad del servidor ante fallos de I/O.
[ "else()" <EOL> <block> ] [ "else()" <EOL> <block> ]
"end()" <EOL> "end()" <EOL>
/* if() soporta dos modos: <if_condition> ::= <if_structured> | <if_free_expression>
Modo 1 — comparación estructurada: los dos primeros argumentos deben ser
identificadores simples o literales, nunca expresiones de acceso. <if_structured> ::= "if" "(" <strict_atom> "," <strict_atom> "," <backtick_string> ")"
Si se necesita comparar un valor extraído de una estructura (ej. dict['clave']), <if_free_expression> ::= "if" "(" "None" "," "None" "," <backtick_string> ")"
debe asignarse previamente a una variable.
Modo 2 — expresión libre: None, None, expresión compleja como string */ <strict_atom> ::= <identifier> | <non_null_literal>
<if_condition> ::= <if_atom> "," <if_atom> "," <stringliteral> <backtick_string> ::= "`" <text_content> "`"
| "None" "," "None" "," <stringliteral>
<if_atom> ::= <identifier> | <literal> <identifier> ::= [a-zA-Z_][a-zA-Z0-9_]*
<non_null_literal>::= <number> | <string_literal_double_quotes>
/* Nota: <non_null_literal> NO incluye la palabra "None" */
<loop_stmt> ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL> <loop_stmt> ::= "startLoop(" <identifier> "," <expression> "," <expression> ")" <EOL>
<block> <block>
@ -261,59 +297,116 @@ AVAP utiliza `avapConnector("TOKEN")` para la hidratación segura de credenciale
--- ---
## SECCIÓN VI: Utilidades, Criptografía y Manipulación de Datos # SECCIÓN VI: Utilidades, Criptografía y Manipulación de Datos
AVAP incluye un set de comandos integrados de alto nivel para manipular tipos complejos (JSON y Listas), tiempos, textos y generar hashes. AVAP incluye un set de comandos integrados de alto nivel para manipular tipos complejos (JSON y Listas), tiempos, textos y generar hashes.
### 6.1 Manipulación Nativa de Listas y Objetos JSON ---
Para extraer y mutar estructuras complejas, AVAP provee comandos nativos específicos:
* **`variableToList(elemento, destino)`**: Fuerza a que una variable escalar se convierta en una estructura iterable de lista.
* **`itemFromList(lista_origen, indice, destino)`**: Extrae de forma segura el elemento contenido en la posición `indice` de una lista.
* **`variableFromJSON(json_origen, clave, destino)`**: Parsea un objeto JSON en memoria y extrae el valor correspondiente a la `clave`.
* **`AddVariableToJSON(clave, valor, json_destino)`**: Inyecta dinámicamente una nueva propiedad dentro de un objeto JSON existente.
### 6.2 Criptografía y Expresiones Regulares ## 6.1 Manipulación Nativa de Listas y Objetos JSON
* **`encodeSHA256` y `encodeMD5(origen, destino)`**: Funciones criptográficas que encriptan de forma irreversible un texto. Vitales para el almacenamiento seguro de contraseñas.
* **`getRegex(origen, patron, destino)`**: Aplica una Expresión Regular (`patron`) sobre la variable de origen, extrayendo las coincidencias exactas.
### 6.3 Transformación de Tiempo y Cadenas Para extraer y mutar estructuras complejas, AVAP provee comandos nativos específicos. En AVAP, las listas **no se instancian con literales de array**, sino que se construyen y recorren a través de un conjunto cerrado de comandos especializados:
* **Fechas:** `getTimeStamp` (convierte un string a Epoch), `getDateTime` (Epoch a string legible), y `stampToDatetime` (Epoch a objeto datetime estructurado). Soportan formatos de calendario y cálculos con TimeDeltas.
* **Cadenas:** `replace` (saneamiento y sustitución de texto) y `randomString` (generación determinista de claves/tokens aleatorios).
### Especificación BNF (Sección VI) * **`variableToList(elemento, destino)`**: Fuerza a que una variable escalar se convierta en una estructura iterable de lista de un único elemento. Es el punto de entrada canónico para construir una lista desde cero a partir de un valor existente.
* **`itemFromList(lista_origen, indice, destino)`**: Extrae de forma segura el elemento contenido en la posición `indice` (base 0) de una lista. Equivale a un acceso por índice controlado.
* **`getListLen(lista, destino)`**: Calcula el número total de elementos contenidos en `lista` y almacena el resultado entero en `destino`. Imprescindible para construir bucles de recorrido seguro y para validar listas antes de acceder a sus índices. Se recomienda llamar siempre a `getListLen` antes de `itemFromList` para evitar accesos fuera de rango.
* **`variableFromJSON(json_origen, clave, destino)`**: Parsea un objeto JSON en memoria y extrae el valor correspondiente a la `clave`, almacenándolo en `destino`. El acceso es directo por nombre de propiedad.
* **`AddVariableToJSON(clave, valor, json_destino)`**: Inyecta dinámicamente una nueva propiedad dentro de un objeto JSON existente. Si la clave ya existe, su valor es sobreescrito.
**Patrón de recorrido típico en AVAP:**
```avap
// 1. Obtener longitud de la lista
getListLen(myList, len)
// 2. Iterar con índice controlado
i = 0
while (i < len) {
itemFromList(myList, i, currentItem)
// ... procesar currentItem ...
i = i + 1
}
```
---
## 6.2 Criptografía y Expresiones Regulares
* **`encodeSHA256(origen, destino)`** y **`encodeMD5(origen, destino)`**: Funciones criptográficas que encriptan de forma irreversible un texto. Vitales para el almacenamiento seguro de contraseñas y la verificación de integridad de datos. SHA-256 produce un digest de 64 caracteres hexadecimales y ofrece mayor resistencia criptográfica que MD5 (32 caracteres); se recomienda SHA-256 para nuevos desarrollos.
* **`getRegex(origen, patron, destino)`**: Aplica una Expresión Regular (`patron`) sobre la variable de origen, extrayendo la primera coincidencia exacta encontrada. El patrón sigue la sintaxis estándar compatible con Python `re`.
---
## 6.3 Transformación de Tiempo y Cadenas
### Fechas y Timestamps
AVAP provee tres comandos complementarios para cubrir todas las conversiones posibles entre representaciones de tiempo. Los tres soportan formatos de calendario en notación `strftime` de Python y cálculos con `TimeDelta` expresados en segundos (positivo para sumar, negativo para restar):
| Comando | Entrada | Salida |
|---|---|---|
| `getTimeStamp(fecha_string, formato, timedelta, destino)` | String de fecha | Epoch (entero) |
| `stampToDatetime(epoch, formato, timedelta, destino)` | Epoch (entero) | String de fecha |
| `getDateTime(formato, timedelta, zona_horaria, destino)` | — (ahora mismo) | String de fecha |
* **`getTimeStamp(fecha_string, formato, timedelta, destino)`**: Convierte un string de fecha legible a su valor Epoch (entero Unix). Útil para almacenar fechas y realizar cálculos aritméticos sobre ellas.
* **`stampToDatetime(epoch, formato, timedelta, destino)`**: Convierte un valor Epoch a un string de fecha con el formato especificado. Útil para presentar timestamps almacenados de forma legible.
* **`getDateTime(formato, timedelta, zona_horaria, destino)`**: Captura la fecha y hora actuales del sistema, aplica el ajuste `timedelta` y las convierte a la `zona_horaria` indicada antes de almacenar el resultado. Acepta cualquier zona horaria reconocida por la librería `pytz` de Python.
### Cadenas de Texto
* **`randomString(patron, longitud, destino)`**: Genera una cadena aleatoria de `longitud` caracteres cuyos símbolos están restringidos al conjunto definido por `patron` (expresión regular de caracteres). Útil para generar tokens de sesión, contraseñas temporales o identificadores únicos.
* **`replace(origen, patron_busqueda, reemplazo, destino)`**: Localiza todas las ocurrencias de `patron_busqueda` dentro de `origen` y las sustituye por `reemplazo`, almacenando el resultado en `destino`. Facilita el saneamiento y normalización de datos de entrada antes de su procesamiento o almacenamiento.
---
## BNF — Gramática Formal de los Comandos de Utilidad
```bnf ```bnf
/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */ <util_command> ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd>
<util_command> ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>
/* Manipulación de listas y JSON */ /* Manipulación de listas y JSON */
<json_list_cmd> ::= "variableToList(" <expression> "," <identifier> ")" <json_list_cmd> ::= "variableToList(" <expression> "," <identifier> ")"
| "itemFromList(" <identifier> "," <expression> "," <identifier> ")" | "itemFromList(" <identifier> "," <expression> "," <identifier> ")"
| "getListLen(" <identifier> "," <identifier> ")"
| "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")" | "variableFromJSON(" <identifier> "," <expression> "," <identifier> ")"
| "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")" | "AddVariableToJSON(" <expression> "," <expression> "," <identifier> ")"
/* Criptografía */ /* Criptografía */
<crypto_cmd> ::= "encodeSHA256(" <identifier_or_string> "," <identifier> ")" <crypto_cmd> ::= "encodeSHA256(" <expression> "," <identifier> ")"
| "encodeMD5(" <identifier_or_string> "," <identifier> ")" | "encodeMD5(" <expression> "," <identifier> ")"
/* Expresiones regulares */ /* Expresiones regulares */
<regex_cmd> ::= "getRegex(" <identifier> "," <stringliteral> "," <identifier> ")" <regex_cmd> ::= "getRegex(" <identifier> "," <expression> "," <identifier> ")"
/* Fecha/hora actual → string */
<datetime_cmd> ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")" <datetime_cmd> ::= "getDateTime(" <stringliteral> "," <expression> "," <stringliteral> "," <identifier> ")"
/* Argumentos: formato_salida, epoch_origen, zona_horaria, destino */ /* Argumentos: formato_salida, timedelta, zona_horaria, destino */
/* Conversiones epoch ↔ string */
<stamp_cmd> ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")" <stamp_cmd> ::= "stampToDatetime(" <expression> "," <stringliteral> "," <expression> "," <identifier> ")"
/* Argumentos: epoch_origen, formato, timedelta, destino */ /* Argumentos: epoch_origen, formato, timedelta, destino */
| "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")" | "getTimeStamp(" <stringliteral> "," <stringliteral> "," <expression> "," <identifier> ")"
/* Argumentos: fecha_string, formato_entrada, timedelta, destino */ /* Argumentos: fecha_string, formato_entrada, timedelta, destino */
<string_cmd> ::= "randomString(" <expression> "," <identifier> ")" /* Cadenas */
/* Argumentos: longitud, destino */ <string_cmd> ::= "randomString(" <expression> "," <expression> "," <identifier> ")"
/* Argumentos: patron, longitud, destino */
<replace_cmd> ::= "replace(" <identifier_or_string> "," <stringliteral> "," <stringliteral> "," <identifier> ")" <replace_cmd> ::= "replace(" <identifier> "," <stringliteral> "," <stringliteral> "," <identifier> ")"
/* Argumentos: origen, patron_busqueda, reemplazo, destino */ /* Argumentos: origen, patron_busqueda, reemplazo, destino */
``` ```
--- ---
## SECCIÓN VII: Arquitectura de Funciones y Ámbitos (Scopes) ## SECCIÓN VII: Arquitectura de Funciones y Ámbitos (Scopes)

File diff suppressed because it is too large Load Diff

92295
ingestion/chunks.json Normal file

File diff suppressed because one or more lines are too long

View File

@ -5,7 +5,6 @@ description = "Add your description here"
readme = "README.md" readme = "README.md"
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [ dependencies = [
"chonkie[semantic]>=1.5.6",
"grpcio>=1.78.0", "grpcio>=1.78.0",
"grpcio-reflection>=1.78.0", "grpcio-reflection>=1.78.0",
"grpcio-tools>=1.78.0", "grpcio-tools>=1.78.0",
@ -28,7 +27,9 @@ dependencies = [
dev = [ dev = [
"beir>=2.2.0", "beir>=2.2.0",
"boto3>=1.42.58", "boto3>=1.42.58",
"chonkie[elastic,semantic]>=1.6.0",
"evidently>=0.7.20", "evidently>=0.7.20",
"flatbuffers>=25.12.19",
"jupyter>=1.1.1", "jupyter>=1.1.1",
"langfuse<3", "langfuse<3",
"litellm>=1.82.0", "litellm>=1.82.0",

View File

@ -1,30 +1,29 @@
import re
import hashlib
from typing import Any
from enum import Enum from enum import Enum
import typer import typer
import logging import logging
import os import os
from pathlib import Path
from loguru import logger from loguru import logger
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
from langchain_core.documents import Document
from langchain_elasticsearch import ElasticsearchStore from langchain_elasticsearch import ElasticsearchStore
from langchain_community.embeddings import HuggingFaceEmbeddings from chonkie import SemanticChunker, MarkdownChef
from langchain_experimental.text_splitter import SemanticChunker from transformers import AutoTokenizer
from src.utils.emb_factory import create_embedding_model from src.utils.emb_factory import create_embedding_model
from scripts.pipelines.tasks.chunk import scrape_avap_docs from scripts.pipelines.tasks.chunk import (
read_files,
get_chunk_docs,
convert_chunks_to_document
)
app = typer.Typer() app = typer.Typer()
ELASTICSEARCH_LOCAL_URL = os.getenv("ELASTICSEARCH_LOCAL_URL") ELASTICSEARCH_LOCAL_URL = os.getenv("ELASTICSEARCH_LOCAL_URL")
OLLAMA_LOCAL_URL = os.getenv("OLLAMA_LOCAL_URL") OLLAMA_LOCAL_URL = os.getenv("OLLAMA_LOCAL_URL")
ELASTICSEARCH_INDEX = os.getenv("ELASTICSEARCH_INDEX")
OLLAMA_URL = os.getenv("OLLAMA_URL") OLLAMA_URL = os.getenv("OLLAMA_URL")
OLLAMA_EMB_MODEL_NAME = os.getenv("OLLAMA_EMB_MODEL_NAME") OLLAMA_EMB_MODEL_NAME = os.getenv("OLLAMA_EMB_MODEL_NAME")
AVAP_WEB_DOCS_URL = os.getenv("AVAP_WEB_DOCS_URL") AVAP_WEB_DOCS_URL = os.getenv("AVAP_WEB_DOCS_URL")
HF_EMB_MODEL_NAME = os.getenv("HF_EMB_MODEL_NAME")
class DistanceStrategy(str, Enum): class DistanceStrategy(str, Enum):
euclidean = "EUCLIDEAN_DISTANCE" euclidean = "EUCLIDEAN_DISTANCE"
@ -33,55 +32,45 @@ class DistanceStrategy(str, Enum):
jaccard = "JACCARD" jaccard = "JACCARD"
cosine = "COSINE" cosine = "COSINE"
def clean_text(text: str) -> str:
text = text.replace("\u00a0", " ")
text = re.sub(r"\s+", " ", text).strip()
return text
def build_documents_from_folder(
folder_path: str,
) -> list[Document]:
folder = Path(folder_path)
if not folder.exists() or not folder.is_dir():
raise ValueError(f"Invalid folder path: {folder_path}")
all_documents: list[Document] = []
for file_path in folder.glob("*.txt"):
doc_text = file_path.read_text(encoding="utf-8")
if not doc_text.strip():
continue
metadata: dict[str, Any] = {
"source": file_path.name,
}
doc_text = clean_text(doc_text)
document = Document(
id=hashlib.md5(file_path.name.encode()).hexdigest(),
page_content=doc_text,
metadata={**metadata}
)
all_documents.append(document)
return all_documents
@app.command() @app.command()
def elasticsearch_ingestion( def elasticsearch_ingestion(
docs_folder_path: str = "ingestion/docs", docs_folder_path: str = "docs",
es_index: str = "avap-docs-test-v2",
es_request_timeout: int = 120, es_request_timeout: int = 120,
es_max_retries: int = 5, es_max_retries: int = 5,
es_retry_on_timeout: bool = True, es_retry_on_timeout: bool = True,
distance_strategy: DistanceStrategy = DistanceStrategy.cosine, distance_strategy: DistanceStrategy = DistanceStrategy.cosine,
chunk_size: int = 2048,
chunk_threshold: float = 0.5,
chunk_similarity_window: int = 3,
chunk_skip_window: int = 1,
): ):
logger.info("Starting Elasticsearch ingestion pipeline...") logger.info("Starting Elasticsearch ingestion pipeline...")
logger.info(f"Using docs folder path: {docs_folder_path}") logger.info(f"Reading files from folder: {docs_folder_path}/LRM and {docs_folder_path}/samples...")
documents = build_documents_from_folder(folder_path=docs_folder_path) avap_code_docs = read_files(f"{docs_folder_path}/samples")
avap_language_docs = read_files(f"{docs_folder_path}/LRM")
logger.info("Instantiating semantic chunker and chef...")
custom_tokenizer = AutoTokenizer.from_pretrained(HF_EMB_MODEL_NAME)
chef = MarkdownChef(tokenizer=custom_tokenizer)
chunker = SemanticChunker(
embedding_model=HF_EMB_MODEL_NAME,
chunk_size=chunk_size,
threshold=chunk_threshold,
similarity_window=chunk_similarity_window,
skip_window=chunk_skip_window
)
logger.info("Processing Markdown docs with chef...")
doc = chef.process(f"{docs_folder_path}/LRM/avap.md")
logger.info("Chunking AVAP Language docs...")
avap_language_docs_chunks = get_chunk_docs(avap_language_docs, chunker)
logger.info("Creating Langchain Document to index...")
avap_language_langchain_docs = convert_chunks_to_document(avap_language_docs_chunks)
avap_code_langchain_docs = convert_chunks_to_document(avap_code_docs)
avap_documents = avap_language_langchain_docs + avap_code_langchain_docs
logger.info("Connecting to Elasticsearch...") logger.info("Connecting to Elasticsearch...")
try: try:
@ -106,15 +95,19 @@ def elasticsearch_ingestion(
logger.exception("Failed to instantiate embeddings model.") logger.exception("Failed to instantiate embeddings model.")
raise raise
logger.info(f"Uploading documents to index {ELASTICSEARCH_INDEX}...") logger.info(f"Checking if index {es_index} exists and deleting if it does...")
if es.indices.exists(index=es_index):
es.indices.delete(index=es_index)
logger.info(f"Uploading documents to index {es_index}...")
ElasticsearchStore.from_documents( ElasticsearchStore.from_documents(
documents, avap_documents,
embeddings, embeddings,
client=es, client=es,
index_name=ELASTICSEARCH_INDEX, index_name=es_index,
distance_strategy=distance_strategy.value, distance_strategy=distance_strategy.value,
) )
logger.info(f"Finished uploading documents to index {ELASTICSEARCH_INDEX}.") logger.info(f"Finished uploading documents to index {es_index}.")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -0,0 +1,122 @@
import typer
import logging
from loguru import logger
from elasticsearch import Elasticsearch
from chonkie import MarkdownChef, FileFetcher, ElasticHandshake
from transformers import AutoTokenizer
from src.config import settings
from scripts.pipelines.tasks.embeddings import OllamaEmbeddings
from scripts.pipelines.tasks.chunk import merge_markdown_document
app = typer.Typer()
def get_processing_and_chunking_config(docs_extension: str, chunk_size: int,
chunk_threshold: float | None,
chunk_similarity_window: int| None,
chunk_skip_window: int | None) -> tuple[str, dict, str, dict]:
"""
Check the file extension and return the appropriate processing and chunking strategies and their kwargs.
Args:
docs_extension (str): The file extension of the documents to be ingested.
chunk_size (int): The size of the chunks to be created.
chunk_threshold (float, optional): The threshold for semantic chunking. Required if docs_extension is .md.
chunk_similarity_window (int, optional): The similarity window for semantic chunking
chunk_skip_window (int, optional): The skip window for semantic chunking.
Returns:
tuple[str, dict, str, dict]: A tuple containing the processing strategy, its kwargs, the chunking strategy, and its kwargs.
"""
if docs_extension == ".md":
process_type = "markdown"
custom_tokenizer = AutoTokenizer.from_pretrained(settings.hf_emb_model_name)
process_kwargs = {"tokenizer": custom_tokenizer}
# process_type = "text"
# process_kwargs = {}
chunk_strat = "semantic"
chunk_kwargs = {"embedding_model": settings.hf_emb_model_name, "threshold": chunk_threshold, "chunk_size": chunk_size,
"similarity_window": chunk_similarity_window, "skip_window": chunk_skip_window}
elif docs_extension == ".avap":
process_type = "text"
process_kwargs = {}
chunk_strat = "recursive" # Once we have the BNF and uploaded to tree-sitter, we can use code (?)
chunk_kwargs = {"chunk_size": chunk_size}
return process_type, process_kwargs, chunk_strat, chunk_kwargs
@app.command()
def elasticsearch_ingestion(
docs_folder_path: str = "docs/LRM",
docs_extension: str = ".md",
es_index: str = "avap-docs-test-v3",
es_request_timeout: int = 120,
es_max_retries: int = 5,
es_retry_on_timeout: bool = True,
delete_es_index: bool = True,
chunk_size: int = 2048,
chunk_threshold: float | None = 0.5,
chunk_similarity_window: int | None = 3,
chunk_skip_window: int | None = 1
):
custom_tokenizer = AutoTokenizer.from_pretrained(settings.hf_emb_model_name)
processed_docs = []
fused_docs = []
logger.info(f"Instantiating Elasticsearch client with URL: {settings.elasticsearch_local_url}...")
es = Elasticsearch(
hosts=settings.elasticsearch_local_url,
request_timeout=es_request_timeout,
max_retries=es_max_retries,
retry_on_timeout=es_retry_on_timeout,
)
if delete_es_index and es.indices.exists(index=es_index):
logger.info(f"Deleting existing Elasticsearch index: {es_index}...")
es.indices.delete(index=es_index)
logger.info("Starting Elasticsearch ingestion pipeline...")
(process_type,
process_kwargs,
chunk_strat,
chunk_kwargs) = get_processing_and_chunking_config(docs_extension, chunk_size, chunk_threshold, chunk_similarity_window, chunk_skip_window)
logger.info(f"Fetching files from {docs_folder_path}...")
fetcher = FileFetcher()
docs = fetcher.fetch(dir=f"{settings.proj_root}/{docs_folder_path}")
logger.info(f"Processing documents with process_type: {process_type}...")
chef = MarkdownChef(tokenizer=custom_tokenizer)
for doc in docs:
processed_doc = chef.process(doc)
processed_docs.append(processed_doc)
logger.info(f"Chunking documents with chunk_strat: {chunk_strat}...")
for processed_doc in processed_docs:
fused_doc = merge_markdown_document(processed_doc)
fused_docs.append(fused_doc)
logger.info(f"Ingesting chunks in Elasticsearch index: {es_index}...")
handshake = ElasticHandshake(
client=es,
index_name=es_index,
embedding_model=OllamaEmbeddings(model=settings.ollama_emb_model_name)
)
for fused_doc in fused_docs:
handshake.write(fused_doc.chunks)
logger.info(f"Finished ingesting in {es_index}.")
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
)
try:
app()
except Exception as exc:
logger.exception(exc)
raise

View File

@ -32,12 +32,7 @@
"\n", "\n",
"from src.utils.llm_factory import create_chat_model\n", "from src.utils.llm_factory import create_chat_model\n",
"from src.utils.emb_factory import create_embedding_model\n", "from src.utils.emb_factory import create_embedding_model\n",
"from src.config import (\n", "from src.config import settings"
" ELASTICSEARCH_LOCAL_URL,\n",
" ELASTICSEARCH_INDEX,\n",
" OLLAMA_MODEL_NAME,\n",
" OLLAMA_EMB_MODEL_NAME\n",
")"
] ]
}, },
{ {
@ -51,20 +46,20 @@
"\n", "\n",
"llm = create_chat_model(\n", "llm = create_chat_model(\n",
" provider=\"ollama\",\n", " provider=\"ollama\",\n",
" model=OLLAMA_MODEL_NAME,\n", " model=settings.ollama_model_name,\n",
" temperature=0.5,\n", " temperature=0.5,\n",
" validate_model_on_init=True,\n", " validate_model_on_init=True,\n",
")\n", ")\n",
"embeddings = create_embedding_model(\n", "embeddings = create_embedding_model(\n",
" provider=\"ollama\",\n", " provider=\"ollama\",\n",
" model=OLLAMA_EMB_MODEL_NAME,\n", " model=settings.ollama_emb_model_name,\n",
")\n", ")\n",
"vector_store = ElasticsearchStore(\n", "vector_store = ElasticsearchStore(\n",
" es_url=ELASTICSEARCH_LOCAL_URL,\n", " es_url=settings.elasticsearch_local_url,\n",
" index_name=ELASTICSEARCH_INDEX,\n", " index_name=\"avap-docs-test-v3\",\n",
" embedding=embeddings,\n", " embedding=embeddings,\n",
" query_field=\"text\",\n", " query_field=\"text\",\n",
" vector_query_field=\"vector\",\n", " vector_query_field=\"embedding\",\n",
" # strategy=ElasticsearchStore.ApproxRetrievalStrategy(\n", " # strategy=ElasticsearchStore.ApproxRetrievalStrategy(\n",
" # hybrid=True,\n", " # hybrid=True,\n",
" # rrf={\"rank_constant\": 60, \"window_size\": 100}\n", " # rrf={\"rank_constant\": 60, \"window_size\": 100}\n",
@ -464,44 +459,185 @@
"text": [ "text": [
"================================\u001b[1m Human Message \u001b[0m=================================\n", "================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n", "\n",
"What types of includes does AVAP have?\n" "What types of includes does AVAP have?\n",
] "[reformulate] 'What types of includes does AVAP have?' → '\"avap includes type\"'\n",
}, "================================\u001b[1m Human Message \u001b[0m=================================\n",
{ "\n",
"ename": "ResponseError", "What types of includes does AVAP have?\n",
"evalue": "failed to parse JSON: unexpected end of JSON input (status code: -1)", "[retrieve] 3 docs fetched\n",
"output_type": "error", "[1] id=chunk-1 source=Untitled\n",
"traceback": [ "\n",
"\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\n",
"\u001b[31mResponseError\u001b[39m Traceback (most recent call last)", "Token:\n",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[18]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m a = \u001b[43mstream_graph_updates\u001b[49m\u001b[43m(\u001b[49m\u001b[43muser_input\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43magentic_graph\u001b[49m\u001b[43m)\u001b[49m\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langfuse/decorators/langfuse_decorator.py:256\u001b[39m, in \u001b[36mLangfuseDecorator._sync_observe.<locals>.sync_wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 254\u001b[39m result = func(*args, **kwargs)\n\u001b[32m 255\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m256\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_handle_exception\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobservation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 257\u001b[39m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[32m 258\u001b[39m result = \u001b[38;5;28mself\u001b[39m._finalize_call(\n\u001b[32m 259\u001b[39m observation, result, capture_output, transform_to_string\n\u001b[32m 260\u001b[39m )\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langfuse/decorators/langfuse_decorator.py:520\u001b[39m, in \u001b[36mLangfuseDecorator._handle_exception\u001b[39m\u001b[34m(self, observation, e)\u001b[39m\n\u001b[32m 516\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m observation:\n\u001b[32m 517\u001b[39m _observation_params_context.get()[observation.id].update(\n\u001b[32m 518\u001b[39m level=\u001b[33m\"\u001b[39m\u001b[33mERROR\u001b[39m\u001b[33m\"\u001b[39m, status_message=\u001b[38;5;28mstr\u001b[39m(e)\n\u001b[32m 519\u001b[39m )\n\u001b[32m--> \u001b[39m\u001b[32m520\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langfuse/decorators/langfuse_decorator.py:254\u001b[39m, in \u001b[36mLangfuseDecorator._sync_observe.<locals>.sync_wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m 251\u001b[39m result = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 253\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m254\u001b[39m result = \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 255\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 256\u001b[39m \u001b[38;5;28mself\u001b[39m._handle_exception(observation, e)\n", "ASSIGN\n",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[15]\u001b[39m\u001b[32m, line 9\u001b[39m, in \u001b[36mstream_graph_updates\u001b[39m\u001b[34m(user_input, graph)\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;129m@observe\u001b[39m(name=\u001b[33m\"\u001b[39m\u001b[33mgraph_run\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mstream_graph_updates\u001b[39m(user_input: \u001b[38;5;28mstr\u001b[39m, graph: StateGraph):\n\u001b[32m 3\u001b[39m langfuse_context.update_current_trace(\n\u001b[32m 4\u001b[39m user_id=\u001b[33m\"\u001b[39m\u001b[33malberto\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 5\u001b[39m tags=[\u001b[33m\"\u001b[39m\u001b[33mavap\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mrag\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mlanggraph\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m 6\u001b[39m metadata={\u001b[33m\"\u001b[39m\u001b[33mfeature\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33magentic-rag\u001b[39m\u001b[33m\"\u001b[39m},\n\u001b[32m 7\u001b[39m )\n\u001b[32m----> \u001b[39m\u001b[32m9\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mevent\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mgraph\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 10\u001b[39m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrole\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43muser_input\u001b[49m\u001b[43m}\u001b[49m\u001b[43m]\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 11\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_mode\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mvalues\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 12\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 13\u001b[39m \u001b[43m \u001b[49m\u001b[43mevent\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43m-\u001b[49m\u001b[32;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpretty_print\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 15\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m event[\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m][-\u001b[32m1\u001b[39m]\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/pregel/main.py:2646\u001b[39m, in \u001b[36mPregel.stream\u001b[39m\u001b[34m(self, input, config, context, stream_mode, print_mode, output_keys, interrupt_before, interrupt_after, durability, subgraphs, debug, **kwargs)\u001b[39m\n\u001b[32m 2644\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m task \u001b[38;5;129;01min\u001b[39;00m loop.match_cached_writes():\n\u001b[32m 2645\u001b[39m loop.output_writes(task.id, task.writes, cached=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m-> \u001b[39m\u001b[32m2646\u001b[39m \u001b[43m\u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrunner\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtick\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2647\u001b[39m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mloop\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtasks\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m.\u001b[49m\u001b[43mwrites\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2648\u001b[39m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstep_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2649\u001b[39m \u001b[43m \u001b[49m\u001b[43mget_waiter\u001b[49m\u001b[43m=\u001b[49m\u001b[43mget_waiter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2650\u001b[39m \u001b[43m \u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m=\u001b[49m\u001b[43mloop\u001b[49m\u001b[43m.\u001b[49m\u001b[43maccept_push\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 2651\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 2652\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# emit output\u001b[39;49;00m\n\u001b[32m 2653\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01myield from\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m_output\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 2654\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_mode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprint_mode\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubgraphs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mqueue\u001b[49m\u001b[43m.\u001b[49m\u001b[43mEmpty\u001b[49m\n\u001b[32m 2655\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2656\u001b[39m loop.after_tick()\n", "[2] id=chunk-2 source=Untitled\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/pregel/_runner.py:167\u001b[39m, in \u001b[36mPregelRunner.tick\u001b[39m\u001b[34m(self, tasks, reraise, timeout, retry_policy, get_waiter, schedule_task)\u001b[39m\n\u001b[32m 165\u001b[39m t = tasks[\u001b[32m0\u001b[39m]\n\u001b[32m 166\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m167\u001b[39m \u001b[43mrun_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 168\u001b[39m \u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 169\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 170\u001b[39m \u001b[43m \u001b[49m\u001b[43mconfigurable\u001b[49m\u001b[43m=\u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m 171\u001b[39m \u001b[43m \u001b[49m\u001b[43mCONFIG_KEY_CALL\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpartial\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 172\u001b[39m \u001b[43m \u001b[49m\u001b[43m_call\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 173\u001b[39m \u001b[43m \u001b[49m\u001b[43mweakref\u001b[49m\u001b[43m.\u001b[49m\u001b[43mref\u001b[49m\u001b[43m(\u001b[49m\u001b[43mt\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 174\u001b[39m \u001b[43m \u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m=\u001b[49m\u001b[43mretry_policy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 175\u001b[39m \u001b[43m \u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m=\u001b[49m\u001b[43mweakref\u001b[49m\u001b[43m.\u001b[49m\u001b[43mref\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfutures\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 176\u001b[39m \u001b[43m \u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m=\u001b[49m\u001b[43mschedule_task\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 177\u001b[39m \u001b[43m \u001b[49m\u001b[43msubmit\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43msubmit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 178\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 179\u001b[39m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 180\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 181\u001b[39m \u001b[38;5;28mself\u001b[39m.commit(t, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m 182\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m exc:\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/pregel/_retry.py:42\u001b[39m, in \u001b[36mrun_with_retry\u001b[39m\u001b[34m(task, retry_policy, configurable)\u001b[39m\n\u001b[32m 40\u001b[39m task.writes.clear()\n\u001b[32m 41\u001b[39m \u001b[38;5;66;03m# run the task\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m42\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtask\u001b[49m\u001b[43m.\u001b[49m\u001b[43mproc\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtask\u001b[49m\u001b[43m.\u001b[49m\u001b[43minput\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 43\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m ParentCommand \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[32m 44\u001b[39m ns: \u001b[38;5;28mstr\u001b[39m = config[CONF][CONFIG_KEY_CHECKPOINT_NS]\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/_internal/_runnable.py:656\u001b[39m, in \u001b[36mRunnableSeq.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m 654\u001b[39m \u001b[38;5;66;03m# run in context\u001b[39;00m\n\u001b[32m 655\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m set_config_context(config, run) \u001b[38;5;28;01mas\u001b[39;00m context:\n\u001b[32m--> \u001b[39m\u001b[32m656\u001b[39m \u001b[38;5;28minput\u001b[39m = \u001b[43mcontext\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 657\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 658\u001b[39m \u001b[38;5;28minput\u001b[39m = step.invoke(\u001b[38;5;28minput\u001b[39m, config)\n", "> **Nota de implementación:** `<connector_instantiation>` se distingue de `<orm_connector_init>` (ORM) únicamente por contexto semántico: el UUID pasado como argumento determina si el adaptador resuelto es un ORM de base de datos o un proxy de terceros. La gramática los trata de forma idéntica; el motor de ejecución selecciona el adaptador apropiado en runtime.\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langgraph/_internal/_runnable.py:400\u001b[39m, in \u001b[36mRunnableCallable.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m 398\u001b[39m run_manager.on_chain_end(ret)\n\u001b[32m 399\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m400\u001b[39m ret = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 401\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.recurse \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(ret, Runnable):\n\u001b[32m 402\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m ret.invoke(\u001b[38;5;28minput\u001b[39m, config)\n", "\n",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[10]\u001b[39m\u001b[32m, line 5\u001b[39m, in \u001b[36magent\u001b[39m\u001b[34m(state)\u001b[39m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34magent\u001b[39m(state: AgenticAgentState) -> AgenticAgentState:\n\u001b[32m 4\u001b[39m llm_with_tools = llm.bind_tools(tools)\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m {\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m: [\u001b[43mllm_with_tools\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mSystemMessage\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m=\u001b[49m\u001b[43mAGENTIC_PROMPT\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[43m+\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m]}\n", "---\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/runnables/base.py:5695\u001b[39m, in \u001b[36mRunnableBindingBase.invoke\u001b[39m\u001b[34m(self, input, config, **kwargs)\u001b[39m\n\u001b[32m 5688\u001b[39m \u001b[38;5;129m@override\u001b[39m\n\u001b[32m 5689\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34minvoke\u001b[39m(\n\u001b[32m 5690\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 5693\u001b[39m **kwargs: Any | \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 5694\u001b[39m ) -> Output:\n\u001b[32m-> \u001b[39m\u001b[32m5695\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mbound\u001b[49m\u001b[43m.\u001b[49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 5696\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m 5697\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_merge_configs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 5698\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43m{\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 5699\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:402\u001b[39m, in \u001b[36mBaseChatModel.invoke\u001b[39m\u001b[34m(self, input, config, stop, **kwargs)\u001b[39m\n\u001b[32m 388\u001b[39m \u001b[38;5;129m@override\u001b[39m\n\u001b[32m 389\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34minvoke\u001b[39m(\n\u001b[32m 390\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 395\u001b[39m **kwargs: Any,\n\u001b[32m 396\u001b[39m ) -> AIMessage:\n\u001b[32m 397\u001b[39m config = ensure_config(config)\n\u001b[32m 398\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[32m 399\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mAIMessage\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 400\u001b[39m cast(\n\u001b[32m 401\u001b[39m \u001b[33m\"\u001b[39m\u001b[33mChatGeneration\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m--> \u001b[39m\u001b[32m402\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgenerate_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 403\u001b[39m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_convert_input\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 404\u001b[39m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 405\u001b[39m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcallbacks\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 406\u001b[39m \u001b[43m \u001b[49m\u001b[43mtags\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtags\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 407\u001b[39m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmetadata\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 408\u001b[39m \u001b[43m \u001b[49m\u001b[43mrun_name\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrun_name\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 409\u001b[39m \u001b[43m \u001b[49m\u001b[43mrun_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpop\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrun_id\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 410\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 411\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m.generations[\u001b[32m0\u001b[39m][\u001b[32m0\u001b[39m],\n\u001b[32m 412\u001b[39m ).message,\n\u001b[32m 413\u001b[39m )\n", "## SECCIÓN VI: Utilidades, Criptografía y Manipulación de Datos\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:1123\u001b[39m, in \u001b[36mBaseChatModel.generate_prompt\u001b[39m\u001b[34m(self, prompts, stop, callbacks, **kwargs)\u001b[39m\n\u001b[32m 1114\u001b[39m \u001b[38;5;129m@override\u001b[39m\n\u001b[32m 1115\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mgenerate_prompt\u001b[39m(\n\u001b[32m 1116\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m (...)\u001b[39m\u001b[32m 1120\u001b[39m **kwargs: Any,\n\u001b[32m 1121\u001b[39m ) -> LLMResult:\n\u001b[32m 1122\u001b[39m prompt_messages = [p.to_messages() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[32m-> \u001b[39m\u001b[32m1123\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt_messages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:933\u001b[39m, in \u001b[36mBaseChatModel.generate\u001b[39m\u001b[34m(self, messages, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)\u001b[39m\n\u001b[32m 930\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i, m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(input_messages):\n\u001b[32m 931\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m 932\u001b[39m results.append(\n\u001b[32m--> \u001b[39m\u001b[32m933\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_generate_with_cache\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 934\u001b[39m \u001b[43m \u001b[49m\u001b[43mm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 935\u001b[39m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 936\u001b[39m \u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m 937\u001b[39m \u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 938\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 939\u001b[39m )\n\u001b[32m 940\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m 941\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m run_managers:\n", "AVAP incluye un set de comandos integrados de alto nivel para manipular tipos complejos (JSON y Listas), tiempos, textos y generar hashes.\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_core/language_models/chat_models.py:1235\u001b[39m, in \u001b[36mBaseChatModel._generate_with_cache\u001b[39m\u001b[34m(self, messages, stop, run_manager, **kwargs)\u001b[39m\n\u001b[32m 1233\u001b[39m result = generate_from_stream(\u001b[38;5;28miter\u001b[39m(chunks))\n\u001b[32m 1234\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m inspect.signature(\u001b[38;5;28mself\u001b[39m._generate).parameters.get(\u001b[33m\"\u001b[39m\u001b[33mrun_manager\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m-> \u001b[39m\u001b[32m1235\u001b[39m result = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_generate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1236\u001b[39m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m=\u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m 1237\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1238\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1239\u001b[39m result = \u001b[38;5;28mself\u001b[39m._generate(messages, stop=stop, **kwargs)\n", "\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_ollama/chat_models.py:1030\u001b[39m, in \u001b[36mChatOllama._generate\u001b[39m\u001b[34m(self, messages, stop, run_manager, **kwargs)\u001b[39m\n\u001b[32m 1023\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_generate\u001b[39m(\n\u001b[32m 1024\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 1025\u001b[39m messages: \u001b[38;5;28mlist\u001b[39m[BaseMessage],\n\u001b[32m (...)\u001b[39m\u001b[32m 1028\u001b[39m **kwargs: Any,\n\u001b[32m 1029\u001b[39m ) -> ChatResult:\n\u001b[32m-> \u001b[39m\u001b[32m1030\u001b[39m final_chunk = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_chat_stream_with_aggregation\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1031\u001b[39m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[32m 1032\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 1033\u001b[39m generation_info = final_chunk.generation_info\n\u001b[32m 1034\u001b[39m chat_generation = ChatGeneration(\n\u001b[32m 1035\u001b[39m message=AIMessage(\n\u001b[32m 1036\u001b[39m content=final_chunk.text,\n\u001b[32m (...)\u001b[39m\u001b[32m 1043\u001b[39m generation_info=generation_info,\n\u001b[32m 1044\u001b[39m )\n", "### 6.1 Manipulación Nativa de Listas y Objetos JSON\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_ollama/chat_models.py:965\u001b[39m, in \u001b[36mChatOllama._chat_stream_with_aggregation\u001b[39m\u001b[34m(self, messages, stop, run_manager, verbose, **kwargs)\u001b[39m\n\u001b[32m 956\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_chat_stream_with_aggregation\u001b[39m(\n\u001b[32m 957\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 958\u001b[39m messages: \u001b[38;5;28mlist\u001b[39m[BaseMessage],\n\u001b[32m (...)\u001b[39m\u001b[32m 962\u001b[39m **kwargs: Any,\n\u001b[32m 963\u001b[39m ) -> ChatGenerationChunk:\n\u001b[32m 964\u001b[39m final_chunk = \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m965\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_iterate_over_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 966\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mfinal_chunk\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m:\u001b[49m\n\u001b[32m 967\u001b[39m \u001b[43m \u001b[49m\u001b[43mfinal_chunk\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43mchunk\u001b[49m\n", "Para extraer y mutar estructuras complejas, AVAP provee comandos nativos específicos:\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_ollama/chat_models.py:1054\u001b[39m, in \u001b[36mChatOllama._iterate_over_stream\u001b[39m\u001b[34m(self, messages, stop, **kwargs)\u001b[39m\n\u001b[32m 1047\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_iterate_over_stream\u001b[39m(\n\u001b[32m 1048\u001b[39m \u001b[38;5;28mself\u001b[39m,\n\u001b[32m 1049\u001b[39m messages: \u001b[38;5;28mlist\u001b[39m[BaseMessage],\n\u001b[32m 1050\u001b[39m stop: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mstr\u001b[39m] | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 1051\u001b[39m **kwargs: Any,\n\u001b[32m 1052\u001b[39m ) -> Iterator[ChatGenerationChunk]:\n\u001b[32m 1053\u001b[39m reasoning = kwargs.get(\u001b[33m\"\u001b[39m\u001b[33mreasoning\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m.reasoning)\n\u001b[32m-> \u001b[39m\u001b[32m1054\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_create_chat_stream\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 1055\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[32m 1056\u001b[39m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 1057\u001b[39m \u001b[43m \u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessage\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[32m 1058\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessage\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mand\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstream_resp\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessage\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[32m 1059\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m 1060\u001b[39m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "* **`variableToList(elemento, destino)`**: Fuerza a que una variable escalar se convierta en una estructura iterable de lista.\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langchain_ollama/chat_models.py:952\u001b[39m, in \u001b[36mChatOllama._create_chat_stream\u001b[39m\u001b[34m(self, messages, stop, **kwargs)\u001b[39m\n\u001b[32m 950\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m chat_params[\u001b[33m\"\u001b[39m\u001b[33mstream\u001b[39m\u001b[33m\"\u001b[39m]:\n\u001b[32m 951\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client:\n\u001b[32m--> \u001b[39m\u001b[32m952\u001b[39m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client.chat(**chat_params)\n\u001b[32m 953\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client:\n\u001b[32m 954\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mself\u001b[39m._client.chat(**chat_params)\n", "* **`itemFromList(lista_origen, indice, destino)`**: Extrae de forma segura el elemento contenido en la posición `indice` de una lista.\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/ollama/_client.py:184\u001b[39m, in \u001b[36mClient._request.<locals>.inner\u001b[39m\u001b[34m()\u001b[39m\n\u001b[32m 182\u001b[39m part = json.loads(line)\n\u001b[32m 183\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m err := part.get(\u001b[33m'\u001b[39m\u001b[33merror\u001b[39m\u001b[33m'\u001b[39m):\n\u001b[32m--> \u001b[39m\u001b[32m184\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m ResponseError(err)\n\u001b[32m 185\u001b[39m \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(**part)\n", "* **`variableFromJSON(json_origen, clave, destino)`**: Parsea un objeto JSON en memoria y extrae el valor correspondiente a la `clave`.\n",
"\u001b[31mResponseError\u001b[39m: failed to parse JSON: unexpected end of JSON input (status code: -1)", "* **`AddVariableToJSON(clave, valor, json_destino)`**: Inyecta dinámicamente una nueva propiedad dentro de un objeto JSON existente.\n",
"During task with name 'agent' and id '9110cf29-5205-b67b-0456-234df433158a'" "\n",
"### 6.2 Criptografía y Expresiones Regulares\n",
"* **`encodeSHA256` y `encodeMD5(origen, destino)`**: Funciones criptográficas que encriptan de forma irreversible un texto. Vitales para el almacenamiento seguro de contraseñas.\n",
"* **`getRegex(origen, patron, destino)`**: Aplica una Expresión Regular (`patron`) sobre la variable de origen, extrayendo las coincidencias exactas.\n",
"\n",
"### 6.3 Transformación de Tiempo y Cadenas\n",
"* **Fechas:** `getTimeStamp` (convierte un string a Epoch), `getDateTime` (Epoch a string legible), y `stampToDatetime` (Epoch a objeto datetime estructurado). Soportan formatos de calendario y cálculos con TimeDeltas.\n",
"* **Cadenas:** `replace` (saneamiento y sustitución de texto) y `randomString` (generación determinista de claves/tokens aleatorios).\n",
"\n",
"### Especificación BNF (Sección VI)\n",
"\n",
"\n",
"\n",
"/* [CORRECCIÓN] Todas las subreglas de <util_command> están ahora completamente expandidas. */\n",
"<util_command> ::= <json_list_cmd> | <crypto_cmd> | <regex_cmd> | <datetime_cmd> | <stamp_cmd> | <string_cmd> | <replace_cmd>\n",
"\n",
"/* Manipulación de listas y JSON */\n",
"<json_list_cmd> ::= \"variableToList(\" <expression> \",\" <identifier> \")\"\n",
" | \"itemFromList(\" <identifier> \",\" <expression> \",\" <identifier> \")\"\n",
" | \"variableFromJSON(\" <identifier> \",\" <expression> \",\" <identifier> \")\"\n",
" | \"AddVariableToJSON(\" <expression> \",\" <expression> \",\" <identifier> \")\"\n",
"\n",
"/* Criptografía */\n",
"<crypto_cmd> ::= \"encodeSHA256(\" <identifier_or_string> \",\" <identifier> \")\"\n",
" | \"encodeMD5(\" <identifier_or_string> \",\" <identifier> \")\"\n",
"\n",
"/* Expresiones regulares */\n",
"<regex_cmd> ::= \"getRegex(\" <identifier> \",\" <stringliteral> \",\" <identifier> \")\"\n",
"\n",
"<datetime_cmd> ::= \"getDateTime(\" <stringliteral> \",\" <expression> \",\" <stringliteral> \",\" <identifier> \")\"\n",
"/* Argumentos: formato_salida, epoch_origen, zona_horaria, destino */\n",
"\n",
"<stamp_cmd> ::= \"stampToDatetime(\" <expression> \",\" <stringliteral> \",\" <expression> \",\" <identifier> \")\"\n",
"/* Argumentos: epoch_origen, formato, timedelta, destino */\n",
" | \"getTimeStamp(\" <stringliteral> \",\" <stringliteral> \",\" <expression> \",\" <identifier> \")\"\n",
"/* Argumentos: fecha_string, formato_entrada, timedelta, destino */\n",
"\n",
"<string_cmd> ::= \"randomString(\" <expression> \",\" <identifier> \")\"\n",
"/* Argumentos: longitud, destino */\n",
"\n",
"<replace_cmd> ::= \"replace(\" <identifier_or_string> \",\" <stringliteral> \",\" <stringliteral> \",\" <identifier> \")\"\n",
"/* Argumentos: origen, patron_busqueda, reemplazo, destino */\n",
"\n",
"[3] id=chunk-3 source=Untitled\n",
"\n",
"\n",
"---\n",
"\n",
"## SECCIÓN IX: Expresiones y Gramática Léxica Estricta\n",
"\n",
"Esta sección es el corazón matemático evaluador de AVAP. Define la jerarquía exacta (Precedencia) y provee soporte nativo para características avanzadas similares a Python.\n",
"\n",
"### 9.1 Cast de Tipos Explícito\n",
"AVAP permite conversiones de tipos (Type Casting) en cualquier evaluación utilizando funciones constructoras estándar. Puedes transformar variables dinámicamente usando `int(var)`, `float(var)` o `str(var)`.\n",
"\n",
"### 9.2 Slicing y Comprensiones (Comprehensions)\n",
"* **Slicing (Cortes):** Puedes extraer fragmentos de listas o strings utilizando la notación de dos puntos. Ejemplo: `mi_lista[1:4]` (extrae desde el índice 1 hasta el 3).\n",
"* **Comprehensions:** AVAP soporta la construcción rápida de listas mediante iteradores en una sola línea, permitiendo filtrar y mapear colecciones enteras (ej. `[x * 2 for x in valores if x > 0]`).\n",
"\n",
"### 9.3 Análisis Léxico (Lexer) y Documentación\n",
"AVAP cuenta con tres niveles de descarte de texto para anotaciones humanas:\n",
"1. **Comentarios de Línea (`//`):** Ignora el texto hasta el salto de línea.\n",
"2. **Comentarios de Bloque (`/* ... */`):** Para aislar bloques enteros multilínea.\n",
"3. **Comentarios de Documentación (`///`):** Utilizados por analizadores de código o IDEs para generar documentación técnica automática (Docstrings) a partir del código fuente.\n",
"\n",
"### Especificación BNF (Sección IX)\n",
"\n",
"\n",
"\n",
"/* Jerarquía de Expresiones (Precedencia de menor a mayor) */\n",
"<expression> ::= <logical_or>\n",
"<logical_or> ::= <logical_and> ( \"or\" <logical_and> )*\n",
"<logical_and> ::= <logical_not> ( \"and\" <logical_not> )*\n",
"<logical_not> ::= \"not\" <logical_not> | <comparison>\n",
"\n",
"<comparison> ::= <arithmetic> ( <comp_op> <arithmetic> )*\n",
"<comp_op> ::= \"==\" | \"!=\" | \"<\" | \">\" | \"<=\" | \">=\" | \"in\" | \"is\"\n",
"\n",
"<arithmetic> ::= <term> ( ( \"+\" | \"-\" ) <term> )*\n",
"<term> ::= <factor> ( ( \"*\" | \"/\" | \"%\" ) <factor> )*\n",
"<factor> ::= ( \"+\" | \"-\" ) <factor> | <power>\n",
"<power> ::= <primary> [ \"**\" <factor> ]\n",
"\n",
"/* Primarios y Átomos (Accesos, Castings, Slicing, Métodos y Funciones)\n",
" La regla <primary> cubre también el acceso a métodos de objetos conector\n",
" (conector.metodo(...)) y el acceso por clave a sus resultados (resultado[\"key\"]) */\n",
"<primary> ::= <atom>\n",
" | <primary> \".\" <identifier>\n",
" | <primary> \"[\" <expression> \"]\"\n",
" | <primary> \"[\" [<expression>] \":\" [<expression>] [\":\" [<expression>]] \"]\"\n",
" | <primary> \"(\" [<argument_list>] \")\"\n",
"\n",
"<atom> ::= <identifier>\n",
" | \"$\" <identifier>\n",
" | <literal>\n",
" | \"(\" <expression> \")\"\n",
" | <list_display>\n",
" | <dict_display>\n",
"\n",
"/* Estructuras de Datos, Comprensiones y Argumentos */\n",
"<list_display> ::= \"[\" [<argument_list>] \"]\"\n",
" | \"[\" <expression> \"for\" <identifier> \"in\" <expression> [<if_clause>] \"]\"\n",
"<if_clause> ::= \"if\" <expression>\n",
"<dict_display> ::= \"{\" [<key_datum_list>] \"}\"\n",
"<key_datum_list> ::= <key_datum> ( \",\" <key_datum> )*\n",
"<key_datum> ::= <expression> \":\" <expression>\n",
"<argument_list> ::= <expression> ( \",\" <expression> )*\n",
"\n",
"/* Tipo numérico unificado */\n",
"<number> ::= <floatnumber> | <integer>\n",
"\n",
"/* Literales (Tipos de Datos Primitivos Soportados) */\n",
"<literal> ::= <stringliteral> | <number> | <boolean> | \"None\"\n",
"<boolean> ::= \"True\" | \"False\"\n",
"<integer> ::= [0-9]+\n",
"<floatnumber> ::= [0-9]+ \".\" [0-9]* | \".\" [0-9]+\n",
"\n",
"/* Cadenas de Texto con soporte de secuencias de escape */\n",
"<stringliteral> ::= \"\\\"\" <text_double> \"\\\"\" | \"'\" <text_single> \"'\"\n",
"<escape_sequence> ::= \"\\\\\" ( \"\\\"\" | \"'\" | \"\\\\\" | \"n\" | \"t\" | \"r\" | \"0\" )\n",
"<text_double> ::= ( [^\"\\\\] | <escape_sequence> )*\n",
"<text_single> ::= ( [^'\\\\] | <escape_sequence> )*\n",
"<identifier_or_string> ::= <identifier> | <stringliteral>\n",
"\n",
"/* Reglas de Comentarios para el Lexer\n",
" El lexer aplica longest-match: /// debe evaluarse ANTES que // */\n",
"<doc_comment> ::= \"///\" <any_text>\n",
"<line_comment> ::= \"//\" <any_text>\n",
"<block_comment> ::= \"/*\" <any_content> \"*/\"\n",
"<any_text> ::= [^\\r\\n]*\n",
"<any_content> ::= /* Cualquier secuencia de caracteres que no contenga la subcadena \"*/\" */\n",
"================================\u001b[1m Human Message \u001b[0m=================================\n",
"\n",
"What types of includes does AVAP have?\n",
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
"\n",
"AVAP has two main types of include:\n",
"\n",
"1. **<connector_instantiation>:** This is used to instantiate a connector, which could be for a database connection or a third-party API.\n",
"2. **<orm_connector_init>:** This term seems to be related to initializing an Object-Relational Mapping (ORM) connector, indicating that the context suggests it's part of a specific ORM setup.\n",
"\n",
"Both types are treated similarly in terms of grammar but differ semantically by their purpose - one is for database connections or third-party APIs, while the other is specifically for connecting to ORMs. The engine selects the appropriate adapter based on runtime context.\n"
] ]
} }
], ],
"source": [ "source": [
"a = stream_graph_updates(user_input, agentic_graph)" "a = stream_graph_updates(user_input, guided_graph)"
] ]
}, },
{ {

View File

@ -1,134 +1,60 @@
from enum import Enum
import typer import typer
import logging import logging
import os
from pathlib import Path
from loguru import logger from loguru import logger
from elasticsearch import Elasticsearch
from langchain_elasticsearch import ElasticsearchStore
from chonkie import SemanticChunker
from src.utils.emb_factory import create_embedding_model
from scripts.pipelines.tasks.chunk import ( from scripts.pipelines.tasks.chunk import (
read_files, fetch_documents,
get_chunk_docs, process_documents,
convert_chunks_to_document export_documents,
ingest_documents
) )
app = typer.Typer() app = typer.Typer()
ELASTICSEARCH_LOCAL_URL = os.getenv("ELASTICSEARCH_LOCAL_URL")
OLLAMA_LOCAL_URL = os.getenv("OLLAMA_LOCAL_URL")
ELASTICSEARCH_INDEX = os.getenv("ELASTICSEARCH_INDEX")
OLLAMA_URL = os.getenv("OLLAMA_URL")
OLLAMA_EMB_MODEL_NAME = os.getenv("OLLAMA_EMB_MODEL_NAME")
AVAP_WEB_DOCS_URL = os.getenv("AVAP_WEB_DOCS_URL")
HF_EMB_MODEL_NAME = os.getenv("HF_EMB_MODEL_NAME")
class DistanceStrategy(str, Enum):
euclidean = "EUCLIDEAN_DISTANCE"
max_inner_product = "MAX_INNER_PRODUCT"
dot_product = "DOT_PRODUCT"
jaccard = "JACCARD"
cosine = "COSINE"
@app.command() @app.command()
def elasticsearch_ingestion( def elasticsearch_ingestion(
docs_folder_path: str = "docs", docs_folder_path: str = "docs/samples",
output_path: str = "ingestion/chunks.json",
docs_extension: list[str] = [".md", ".avap"],
es_index: str = "avap-docs-test-v3",
es_request_timeout: int = 120, es_request_timeout: int = 120,
es_max_retries: int = 5, es_max_retries: int = 5,
es_retry_on_timeout: bool = True, es_retry_on_timeout: bool = True,
distance_strategy: DistanceStrategy = DistanceStrategy.cosine, delete_es_index: bool = True
chunk_size: int = 2048, ) -> None:
chunk_threshold: float = 0.5, """
chunk_similarity_window: int = 3, Pipeline to ingest documents into an Elasticsearch index.
chunk_skip_window: int = 1, The pipeline includes fetching documents from a specified folder, processing them into chunks, and then ingesting those chunks into the specified Elasticsearch index.
):
Args:
docs_folder_path (str): Path to the folder containing documents to be ingested. Default is "docs/samples".
docs_extension (list[str]): List of file extensions to filter by (e.g., [".md", ".avap"]). Default is [".md", ".avap"].
es_index (str): Name of the Elasticsearch index to ingest documents into. Default is "avap-docs-test-v3".
es_request_timeout (int): Timeout in seconds for Elasticsearch requests. Default is 120 seconds.
es_max_retries (int): Maximum number of retries for Elasticsearch requests in case of failure. Default is 5 retries.
es_retry_on_timeout (bool): Whether to retry Elasticsearch requests on timeout. Default is True.
delete_es_index (bool): Whether to delete the existing Elasticsearch index before ingestion. Default is True.
Returns:
None
"""
logger.info("Starting Elasticsearch ingestion pipeline...") logger.info("Starting Elasticsearch ingestion pipeline...")
logger.info(f"Reading and concatenating files from folder: {docs_folder_path}/developer.avapframework.com") logger.info(f"Fetching files from {docs_folder_path}...")
avap_github_docs = read_files(f"{docs_folder_path}/avap_language_github_docs", concatenate=False) docs_path = fetch_documents(docs_folder_path, docs_extension)
avap_web_docs_intro = read_files(f"{docs_folder_path}/developer.avapframework.com", "intro", concatenate=True)
# Check chapters in developer.avapframework.com folder and read and concatenate files for each chapter logger.info("Processing docs...")
chapters = sorted({ chunked_docs = process_documents(docs_path)
p.name.split("_")[0]
for p in Path(f"{docs_folder_path}/developer.avapframework.com").glob("chapter*.md")
})
avap_web_docs_chapters = [ logger.info(f"Ingesting chunks in Elasticsearch index: {es_index}...")
item elasticsearch_docs = ingest_documents(chunked_docs, es_index, es_request_timeout, es_max_retries,
for chapter in chapters es_retry_on_timeout, delete_es_index)
for item in read_files(
f"{docs_folder_path}/developer.avapframework.com",
f"{chapter}_",
concatenate=True
)
]
avap_web_docs_appendices = read_files(f"{docs_folder_path}/developer.avapframework.com", "appendices_", concatenate=False) logger.info(f"Exporting processed documents to {output_path}...")
avap_samples_docs = read_files(f"{docs_folder_path}/samples", concatenate=False) export_documents(elasticsearch_docs, output_path)
logger.info("Instantiating semantic chunker...") logger.info(f"Finished ingesting in {es_index}.")
chunker = SemanticChunker(
embedding_model=HF_EMB_MODEL_NAME,
chunk_size=chunk_size,
threshold=chunk_threshold,
similarity_window=chunk_similarity_window,
skip_window=chunk_skip_window
)
logger.info("Chunking AVAP GitHub docs...")
avap_github_docs_chunks = get_chunk_docs(avap_github_docs, chunker)
logger.info("Chunking AVAP web docs chapters...")
avap_web_docs_chapters_chunks = get_chunk_docs(avap_web_docs_chapters, chunker)
logger.info("Creating Langchain Document to index...")
avap_github_langchain_docs = convert_chunks_to_document(avap_github_docs_chunks)
avap_web_chapters_langchain_docs = convert_chunks_to_document(avap_web_docs_chapters_chunks)
avap_web_intro_langchain_docs = convert_chunks_to_document(avap_web_docs_intro)
avap_web_appendices_langchain_docs = convert_chunks_to_document(avap_web_docs_appendices)
avap_samples_langchain_docs = convert_chunks_to_document(avap_samples_docs)
avap_documents = avap_github_langchain_docs + avap_web_chapters_langchain_docs + avap_web_intro_langchain_docs + avap_web_appendices_langchain_docs + avap_samples_langchain_docs
logger.info("Connecting to Elasticsearch...")
try:
es = Elasticsearch(
ELASTICSEARCH_LOCAL_URL,
request_timeout=es_request_timeout,
max_retries=es_max_retries,
retry_on_timeout=es_retry_on_timeout,
)
except:
logger.exception("Failed to connect to Elasticsearch.")
raise
logger.info("Instantiating embeddings model...")
try:
embeddings = create_embedding_model(
provider="ollama",
model=OLLAMA_EMB_MODEL_NAME,
base_url=OLLAMA_LOCAL_URL,
)
except:
logger.exception("Failed to instantiate embeddings model.")
raise
logger.info(f"Checking if index {ELASTICSEARCH_INDEX} exists and deleting if it does...")
if es.indices.exists(index=ELASTICSEARCH_INDEX):
es.indices.delete(index=ELASTICSEARCH_INDEX)
logger.info(f"Uploading documents to index {ELASTICSEARCH_INDEX}...")
ElasticsearchStore.from_documents(
avap_documents,
embeddings,
client=es,
index_name=ELASTICSEARCH_INDEX,
distance_strategy=distance_strategy.value,
)
logger.info(f"Finished uploading documents to index {ELASTICSEARCH_INDEX}.")
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -1,9 +1,9 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Use: Use:
python generate_mbpp_avap.py python generate_mbap.py
python generate_mbpp_avap.py --lrm path/to/avap.md python generate_mbap.py --lrm path/to/avap.md
python generate_mbpp_avap.py --lrm avap.md --output output/mbpp_avap.json --problems 300 python generate_mbap.py --lrm avap.md --output output/mbpp_avap.json --problems 300
Requirements: Requirements:
pip install anthropic pip install anthropic
@ -53,7 +53,7 @@ REGLAS ESTRICTAS para el código AVAP generado:
5. if() Modo 1: if(var_o_literal, var_o_literal, "operador") 5. if() Modo 1: if(var_o_literal, var_o_literal, "operador")
los argumentos NO pueden ser expresiones de acceso como dict['key']; los argumentos NO pueden ser expresiones de acceso como dict['key'];
hay que extraer el valor a una variable propia primero. hay que extraer el valor a una variable propia primero.
6. if() Modo 2: if(None, None, "expresion_completa_como_string") 6. if() Modo 2: if(None, None, `expresion_completa_como_string`)
7. _status se asigna con: addVar(_status, 404) 7. _status se asigna con: addVar(_status, 404)
8. ormAccessSelect firma: ormAccessSelect(campos, "tabla", selector, varTarget) 8. ormAccessSelect firma: ormAccessSelect(campos, "tabla", selector, varTarget)
selector puede ser cadena vacía. selector puede ser cadena vacía.
@ -62,7 +62,7 @@ REGLAS ESTRICTAS para el código AVAP generado:
MODO DE EJECUCIÓN MUY IMPORTANTE: MODO DE EJECUCIÓN MUY IMPORTANTE:
- El código se ejecuta DIRECTAMENTE, línea a línea, sin servidor ni registro de endpoints. - El código se ejecuta DIRECTAMENTE, línea a línea, sin servidor ni registro de endpoints.
- NUNCA uses registerEndpoint(), NUNCA uses mainHandler(), NUNCA envuelvas el código en funciones solo para ejecutarlo salvo que queramos probar la funcionalidad de funciones. - NUNCA uses registerEndpoint(), NUNCA uses mainHandler(), NUNCA envuelvas el código en funciones solo para ejecutarlo.
- El código correcto es simplemente las instrucciones en línea, por ejemplo: - El código correcto es simplemente las instrucciones en línea, por ejemplo:
result = "Hello World" result = "Hello World"
addResult(result) addResult(result)
@ -82,29 +82,48 @@ Estructura exacta de cada elemento:
"task_id": <número entero>, "task_id": <número entero>,
"text": "<enunciado del problema en español>", "text": "<enunciado del problema en español>",
"code": "<código AVAP con saltos de línea como \\n>", "code": "<código AVAP con saltos de línea como \\n>",
"test_inputs": { "<param1>": <valor1>, "<param2>": <valor2> },
"test_list": ["<expr_python_1>", "<expr_python_2>"] "test_list": ["<expr_python_1>", "<expr_python_2>"]
} }
FORMATO DE test_inputs MUY IMPORTANTE:
- Es un objeto JSON con un valor fijo para cada variable que el código recibe via addParam().
- Los nombres de las claves deben coincidir EXACTAMENTE con el nombre de variable usado en addParam().
- Los valores deben ser concretos y representativos del problema (no genéricos como "test" o 123).
- Si el código no tiene ningún addParam(), el campo test_inputs debe ser un objeto vacío: {}
- Estos valores son los que el evaluador inyectará en el stack antes de ejecutar el código,
de modo que las aserciones de test_list puedan validar las variables de salida resultantes.
Ejemplo con addParam:
código: addParam("password", password)\\nencodeSHA256(password, hashed)\\naddResult(hashed)
test_inputs: { "password": "secret123" }
test_list: ["re.match(r'^[a-f0-9]{64}$', hashed)"]
Ejemplo sin addParam:
código: randomString(16, token)\\naddResult(token)
test_inputs: {}
test_list: ["re.match(r'^[a-zA-Z0-9]{16}$', token)"]
FORMATO DE test_list MUY IMPORTANTE: FORMATO DE test_list MUY IMPORTANTE:
Cada aserción debe ser una expresión Python con re.match() o re.search() Cada aserción debe ser una expresión Python con re.match()
evaluable directamente sobre las variables del stack AVAP (disponibles como evaluable directamente sobre las variables del stack AVAP (disponibles como
variables Python locales). El módulo 're' está siempre disponible. variables Python locales). El módulo 're' está siempre disponible.
La expresión debe devolver un match object (truthy) si el test pasa. La expresión debe devolver un match object (truthy) si el test pasa.
Reglas estrictas: Reglas estrictas:
- USA ÚNICAMENTE re.match(r'<patrón>', <variable>) o re.search(r'<patrón>', str(<variable>)) - USA ÚNICAMENTE re.match(r'<patrón>', <variable>)
- NO combines expresiones re.match en una aserción, cada asercion tiene que ser un unico re.match(r'<patrón>', <variable>)
- Convierte a string si es necesario: re.match(r'^\\d+$', str(result)) - Convierte a string si es necesario: re.match(r'^\\d+$', str(result))
- Puedes encadenar con 'and': re.match(r'^[a-zA-Z0-9]{32}$', token) and re.match(r'.{32}', token) - Puedes encadenar con 'and': re.match(r'^[a-zA-Z0-9]{32}$', token) and re.match(r'.{32}', token)
- Las variables referenciadas deben existir en el stack tras ejecutar el código. - Las variables referenciadas deben existir en el stack tras ejecutar el código.
- NUNCA uses comparaciones directas (==, !=, >, <). - NUNCA uses comparaciones directas (==, !=, >, <).
- NUNCA uses isinstance(), len(), assert, ni texto descriptivo. - NUNCA uses isinstance(), len(), assert, ni texto descriptivo.
- NUNCA uses nada que no sea re.match() o re.search(). - NUNCA uses nada que no sea re.match().
Ejemplos correctos de test_list: Ejemplos correctos de test_list:
"re.match(r'^[a-f0-9]{64}$', hashed)" "re.match(r'^[a-f0-9]{64}$', hashed)"
"re.match(r'^[a-zA-Z0-9]{32}$', token)" "re.match(r'^[a-zA-Z0-9]{32}$', token)"
"re.match(r'^\\d{4}-\\d{2}-\\d{2}$', date_str)" "re.match(r'^\\d{4}-\\d{2}-\\d{2}$', date_str)"
"re.search(r'Hello', result)"
"re.match(r'^-?\\d+(\\.\\d+)?$', str(result))" "re.match(r'^-?\\d+(\\.\\d+)?$', str(result))"
"re.match(r'^(par|impar)$', result)" "re.match(r'^(par|impar)$', result)"
"re.match(r'^40[134]$', str(_status))" "re.match(r'^40[134]$', str(_status))"
@ -138,22 +157,26 @@ Responde ÚNICAMENTE con el array JSON. Sin texto antes ni después.
def parse_response(raw: str): def parse_response(raw: str):
text = raw.strip() text = raw.strip()
if text.startswith("```"): if text.startswith("```"):
lines = text.splitlines() lines = text.splitlines()
inner = lines[1:] inner = lines[1:]
if inner and inner[-1].strip() == "```": if inner and inner[-1].strip() == "```":
inner = inner[:-1] inner = inner[:-1]
text = "\n".join(inner).strip() text = "\n".join(inner).strip()
problems = json.loads(text) problems = json.loads(text)
if not isinstance(problems, list): if not isinstance(problems, list):
raise ValueError("answer is not a JSON.") raise ValueError("response is not an JSON array")
for p in problems: for p in problems:
for field in ("task_id", "text", "code", "test_list"): for field in ("task_id", "text", "code", "test_list"):
if field not in p: if field not in p:
raise ValueError(f"field '{field}' not found in a problem.") raise ValueError(f"Field missing '{field}' in task_id={p.get('task_id','?')}.")
if "test_inputs" not in p:
p["test_inputs"] = {}
if not isinstance(p["test_inputs"], dict):
raise ValueError(f"'test_inputs' must by a JSON Object (task_id={p.get('task_id','?')}).")
return problems return problems

View File

@ -8,8 +8,7 @@ from botocore.config import Config
from pathlib import Path from pathlib import Path
from langchain_core.messages import SystemMessage, HumanMessage from langchain_core.messages import SystemMessage, HumanMessage
from src.utils.llm_factory import create_chat_model from src.utils.llm_factory import create_chat_model
from src.config import RAW_DIR, INTERIM_DIR from scripts.pipelines.tasks.prompts import get_prompt_mbpp
from scripts.pipelines.input.prompts import get_prompt_mbpp
app = typer.Typer() app = typer.Typer()

View File

@ -1,136 +1,277 @@
import os import json
import re from copy import deepcopy
import uuid from dataclasses import replace
from pathlib import Path
from typing import Any, Union
from chonkie import (
Chunk,
ElasticHandshake,
FileFetcher,
MarkdownChef,
TextChef,
TokenChunker,
MarkdownDocument
)
from elasticsearch import Elasticsearch
from loguru import logger from loguru import logger
from chonkie import Chunk, SemanticChunker from transformers import AutoTokenizer
from langchain_core.documents import Document
from scripts.pipelines.tasks.embeddings import OllamaEmbeddings
from src.config import settings
def replace_javascript_with_avap(text: str) -> str: def _get_text(element) -> str:
""" for attr in ("text", "content", "markdown"):
Replace mentions of javascript language with avap in the text. value = getattr(element, attr, None)
Handles code blocks, language identifiers, and references. if isinstance(value, str):
return value
Args: raise AttributeError(
text: The text to process. f"Could not extract text from element of type {type(element).__name__}"
Returns:
The text with javascript references replaced with avap.
"""
# Replace ```javascript with ```avap
text = text.replace("```javascript", "```avap")
# Replace ```js with ```avap
text = text.replace("```js", "```avap")
# Replace common phrases (case-insensitive)
text = re.sub(r"\bjavascript\s+code\b", "avap code", text, flags=re.IGNORECASE)
text = re.sub(
r"\bjavascript\s+example\b", "avap example", text, flags=re.IGNORECASE
) )
text = re.sub(r"\bjavascript\b(?!\s+file)", "avap", text, flags=re.IGNORECASE)
return text
def read_files( def _merge_markdown_document(processed_doc: MarkdownDocument) -> MarkdownDocument:
folder_path: str, file_prefix: str | None = None, concatenate: bool = True elements = []
) -> list[dict]:
for chunk in processed_doc.chunks:
elements.append(("chunk", chunk.start_index, chunk.end_index, chunk))
for code in processed_doc.code:
elements.append(("code", code.start_index, code.end_index, code))
for table in processed_doc.tables:
elements.append(("table", table.start_index, table.end_index, table))
elements.sort(key=lambda item: (item[1], item[2]))
merged_chunks = []
current_chunk = None
current_parts = []
current_end_index = None
current_token_count = None
def flush():
nonlocal current_chunk, current_parts, current_end_index, current_token_count
if current_chunk is None:
return
merged_text = "\n\n".join(part for part in current_parts if part)
merged_chunks.append(
replace(
current_chunk,
text=merged_text,
end_index=current_end_index,
token_count=current_token_count,
)
)
current_chunk = None
current_parts = []
current_end_index = None
current_token_count = None
for kind, _, _, element in elements:
if kind == "chunk":
flush()
current_chunk = element
current_parts = [_get_text(element)]
current_end_index = element.end_index
current_token_count = element.token_count
continue
if current_chunk is None:
continue
current_parts.append(_get_text(element))
current_end_index = max(current_end_index, element.end_index)
current_token_count += getattr(element, "token_count", 0)
flush()
fused_processed_doc = deepcopy(processed_doc)
fused_processed_doc.chunks = merged_chunks
fused_processed_doc.code = processed_doc.code
fused_processed_doc.tables = processed_doc.tables
return fused_processed_doc
class ElasticHandshakeWithMetadata(ElasticHandshake):
"""Extended ElasticHandshake that preserves chunk metadata in Elasticsearch."""
def _create_bulk_actions(self, chunks: list[dict]) -> list[dict[str, Any]]:
"""Generate bulk actions including metadata."""
actions = []
embeddings = self.embedding_model.embed_batch([chunk["chunk"].text for chunk in chunks])
for i, chunk in enumerate(chunks):
source = {
"text": chunk["chunk"].text,
"embedding": embeddings[i],
"start_index": chunk["chunk"].start_index,
"end_index": chunk["chunk"].end_index,
"token_count": chunk["chunk"].token_count,
}
# Include metadata if it exists
if chunk.get("extra_metadata"):
source.update(chunk["extra_metadata"])
actions.append({
"_index": self.index_name,
"_id": self._generate_id(i, chunk["chunk"]),
"_source": source,
})
return actions
def write(self, chunks: Union[Chunk, list[Chunk]]) -> list[dict[str, Any]]:
"""Write the chunks to the Elasticsearch index using the bulk API."""
if isinstance(chunks, Chunk):
chunks = [chunks]
actions = self._create_bulk_actions(chunks)
# Use the bulk helper to efficiently write the documents
from elasticsearch.helpers import bulk
success, errors = bulk(self.client, actions, raise_on_error=False)
if errors:
logger.warning(f"Encountered {len(errors)} errors during bulk indexing.") # type: ignore
# Optionally log the first few errors for debugging
for i, error in enumerate(errors[:5]): # type: ignore
logger.error(f"Error {i + 1}: {error}")
logger.info(f"Chonkie wrote {success} chunks to Elasticsearch index: {self.index_name}")
return actions
def fetch_documents(docs_folder_path: str, docs_extension: list[str]) -> list[Path]:
""" """
Read files in a folder whose names start with a given prefix. Fetch files from a folder that match the specified extensions.
Replaces javascript language markers with avap.
Args: Args:
folder_path: Path to the folder to search in. docs_folder_path (str): Path to the folder containing documents
file_prefix: The prefix that file names must start with. docs_extension (list[str]): List of file extensions to filter by (e.g., [".md", ".avap"])
If None, all files in the folder are included.
concatenate: Whether to concatenate the contents of the files.
Returns: Returns:
A list of dictionaries, each containing 'content' and 'title' keys. List of Paths to the fetched documents
If concatenate is True, returns a single dict with concatenated content and title as 'appendix'.
If concatenate is False, returns one dict per file with filename as title.
""" """
contents = [] fetcher = FileFetcher()
filenames = [] docs_path = fetcher.fetch(dir=f"{settings.proj_root}/{docs_folder_path}", ext=docs_extension)
return docs_path
for filename in sorted(os.listdir(folder_path)):
include_file = file_prefix is None or filename.startswith(file_prefix)
if include_file:
file_path = os.path.join(folder_path, filename)
if os.path.isfile(file_path):
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
cleaned_content = content.strip()
if cleaned_content:
contents.append(cleaned_content)
filenames.append(filename)
if concatenate: def process_documents(docs_path: list[Path]) -> list[dict[str, Chunk | dict[str, Any]]]:
concatenated = "\n".join(contents) """
processed_content = replace_javascript_with_avap(concatenated) Process documents by applying appropriate chefs and chunking strategies based on file type.
title = file_prefix if file_prefix is not None else "all_files"
return [{"content": processed_content, "title": title}] Args:
docs_path (list[Path]): List of Paths to the documents to be processed
Returns:
List of dicts with "chunk" (Chunk object) and "metadata" (dict with file info)
"""
processed_docs = []
custom_tokenizer = AutoTokenizer.from_pretrained(settings.hf_emb_model_name)
chef_md = MarkdownChef(tokenizer=custom_tokenizer)
chef_txt = TextChef()
chunker = TokenChunker(tokenizer=custom_tokenizer)
for doc_path in docs_path:
doc_extension = doc_path.suffix.lower()
filename = doc_path.name
if doc_extension == ".md":
processed_doc = chef_md.process(doc_path)
fused_doc = _merge_markdown_document(processed_doc)
chunked_doc = fused_doc.chunks
elif doc_extension == ".avap":
processed_doc = chef_txt.process(doc_path)
chunked_doc = chunker.chunk(processed_doc.content)
else: else:
return [ continue
{"content": replace_javascript_with_avap(content), "title": filename}
for content, filename in zip(contents, filenames) for chunk in chunked_doc:
] processed_docs.append({
"chunk": chunk,
"extra_metadata": {"file": filename}
})
return processed_docs
def get_chunk_docs(docs: list[dict], chunker: SemanticChunker) -> list[list[Chunk]]: def ingest_documents(
chunked_docs: list[dict[str, Chunk | dict[str, Any]]],
es_index: str,
es_request_timeout: int,
es_max_retries: int,
es_retry_on_timeout: bool,
delete_es_index: bool,
) -> list[dict[str, Any]]:
""" """
Chunk the content of the documents using the provided chunker. Ingest processed documents into an Elasticsearch index.
Args: Args:
docs: A list of dictionaries, each containing 'content' and 'title' keys. chunked_docs (list[dict[str, Any]]): List of dicts with "chunk" and "metadata" keys
chunker: An instance of SemanticChunker to use for chunking the content. es_index (str): Name of the Elasticsearch index to ingest into
es_request_timeout (int): Timeout for Elasticsearch requests in seconds
es_max_retries (int): Maximum number of retries for Elasticsearch requests
es_retry_on_timeout (bool): Whether to retry on Elasticsearch request timeouts
delete_es_index (bool): Whether to delete the existing Elasticsearch index before ingestion
Returns: Returns:
A list of lists of Chunk objects, where each inner list corresponds to the chunks of a List of dicts with Elasticsearch response for each chunk
single document.
""" """
list_chunks = [] logger.info(
f"Instantiating Elasticsearch client with URL: {settings.elasticsearch_local_url}..."
)
es = Elasticsearch(
hosts=settings.elasticsearch_local_url,
request_timeout=es_request_timeout,
max_retries=es_max_retries,
retry_on_timeout=es_retry_on_timeout,
)
for doc in docs: if delete_es_index and es.indices.exists(index=es_index):
content = doc["content"] logger.info(f"Deleting existing Elasticsearch index: {es_index}...")
chunks = chunker.chunk(content) es.indices.delete(index=es_index)
for chunk in chunks:
chunk.context = {"source": doc["title"]}
list_chunks.append(chunks)
logger.info(f"Finished chunking {doc['title']}")
return list_chunks handshake = ElasticHandshakeWithMetadata(
client=es,
index_name=es_index,
embedding_model=OllamaEmbeddings(model=settings.ollama_emb_model_name),
)
logger.info(
f"Ingesting {len(chunked_docs)} chunks into Elasticsearch index: {es_index}..."
)
elasticsearch_chunks = handshake.write(chunked_docs)
return elasticsearch_chunks
def convert_chunks_to_document(chunks: list[dict] | list[list[Chunk]]) -> list[Document]: def export_documents(elasticsearch_chunks: list[dict[str, Any]], output_path: str) -> None:
""" """
Convert the chunked content into a list of Document objects. Export processed documents to JSON files in the specified output folder.
Args: Args:
chunks: A list of dictionaries containing 'content' and 'title' keys. elasticsearch_chunks (list[dict[str, Any]]): List of dicts with Elasticsearch response for each chunk
output_path (str): Path to the file where the JSON will be saved
Returns: Returns:
A list of Document objects created from the chunked content. None
""" """
documents = [] output_path = settings.proj_root / output_path
if isinstance(chunks[0], dict): for chunk in elasticsearch_chunks:
for chunk in chunks: chunk["_source"]["embedding"] = chunk["_source"]["embedding"].tolist() # For JSON serialization
content = chunk["content"]
title = chunk["title"]
documents.append(Document(id=str(uuid.uuid4()),
page_content=content,
metadata={"source": title}))
else: with output_path.open("w", encoding="utf-8") as f:
for chunk_list in chunks: json.dump(elasticsearch_chunks, f, ensure_ascii=False, indent=4)
for chunk in chunk_list:
content = chunk.text
title = chunk.context.get("source", "unknown")
documents.append(Document(id=str(uuid.uuid4()),
page_content=content,
metadata={"source": title}))
return documents logger.info(f"Exported processed documents to {output_path}")

View File

@ -0,0 +1,125 @@
import requests
from typing import Any, Callable
import numpy as np
from chonkie.embeddings import BaseEmbeddings
from src.config import settings
class OllamaEmbeddings(BaseEmbeddings):
"""Chonkie embeddings adapter for a local Ollama embedding model."""
def __init__(
self,
model: str,
base_url: str = settings.ollama_local_url,
timeout: float = 60.0,
truncate: bool = True,
keep_alive: str = "5m",
) -> None:
self.model = model
self.base_url = base_url.rstrip("/")
self.timeout = timeout
self.truncate = truncate
self.keep_alive = keep_alive
self._dimension: int | None = None
@property
def dimension(self) -> int:
if self._dimension is None:
# Lazy-load the dimension from a real embedding response.
self._dimension = int(self.embed(" ").shape[0])
return self._dimension
def embed(self, text: str) -> np.ndarray:
embeddings = self._embed_api(text)
vector = np.asarray(embeddings[0], dtype=np.float32)
if self._dimension is None:
self._dimension = int(vector.shape[0])
return vector
def embed_batch(self, texts: list[str]) -> list[np.ndarray]:
if not texts:
return []
embeddings = self._embed_api(texts)
vectors = [np.asarray(vector, dtype=np.float32) for vector in embeddings]
if vectors and self._dimension is None:
self._dimension = int(vectors[0].shape[0])
return vectors
def count_tokens(self, text: str) -> int:
payload = self._build_payload(text)
response = self._post_embed(payload)
return int(response["prompt_eval_count"])
def count_tokens_batch(self, texts: list[str]) -> list[int]:
# Ollama returns a single prompt_eval_count for the whole request,
# not one count per input item, so we compute them individually.
return [self.count_tokens(text) for text in texts]
def get_tokenizer(self) -> Callable[[str], int]:
# Chonkie mainly needs something usable for token counting.
return self.count_tokens
@classmethod
def is_available(cls) -> bool:
try:
response = requests.get(
f"{settings.ollama_local_url}/api/tags",
timeout=5.0,
)
response.raise_for_status()
return True
except requests.RequestException:
return False
def __repr__(self) -> str:
return (
f"OllamaEmbeddings("
f"model={self.model!r}, "
f"base_url={self.base_url!r}, "
f"dimension={self._dimension!r}"
f")"
)
def _build_payload(self, text_or_texts: str | list[str]) -> dict[str, Any]:
return {
"model": self.model,
"input": text_or_texts,
"truncate": self.truncate,
"keep_alive": self.keep_alive,
}
def _post_embed(self, payload: dict[str, Any]) -> dict[str, Any]:
try:
response = requests.post(
f"{self.base_url}/api/embed",
json=payload,
timeout=self.timeout,
)
response.raise_for_status()
data = response.json()
except requests.RequestException as exc:
raise RuntimeError(
f"Failed to call Ollama embeddings endpoint at "
f"{self.base_url}/api/embed"
) from exc
if "embeddings" not in data:
raise RuntimeError(
"Ollama response did not include 'embeddings'. "
f"Response keys: {list(data.keys())}"
)
return data
def _embed_api(self, text_or_texts: str | list[str]) -> list[list[float]]:
payload = self._build_payload(text_or_texts)
data = self._post_embed(payload)
return data["embeddings"]

View File

@ -1,39 +1,29 @@
from pathlib import Path from pathlib import Path
from typing import Optional
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field
from dotenv import load_dotenv
from datetime import timedelta
import warnings
load_dotenv()
class Settings(BaseSettings): class Settings(BaseSettings):
raw_path_: str data_path_: Optional[str] = None
data_path_: str raw_path_: Optional[str] = None
processed_path_: str processed_path_: Optional[str] = None
models_path_: str models_path_: Optional[str] = None
external_path_: str external_path_: Optional[str] = None
kubeconfig_path: str interim_path_: Optional[str] = None
interim_path_: str kubeconfig_path_: Optional[str] = None
database_url: str postgres_url: str
openai_api_key: str
elasticsearch_index: str
elasticsearch_docs_index: str
elasticsearch_code_index: str
llm_base_url: str
ollama_url: str
ollama_local_url: str
langfuse_host: str
elasticsearch_url: str elasticsearch_url: str
elasticsearch_local_url: str elasticsearch_local_url: str
ollama_url: str
ollama_local_url: str
ollama_model_name: str ollama_model_name: str
ollama_emb_model_name: str ollama_emb_model_name: str
model_name: str langfuse_host: str
hf_emb_model_name: str
langfuse_public_key: str langfuse_public_key: str
langfuse_secret_key: str langfuse_secret_key: str
hf_token: str hf_token: str
hf_emb_model_name: str
model_config = SettingsConfigDict( model_config = SettingsConfigDict(
env_file=".env", env_file=".env",
@ -43,108 +33,40 @@ class Settings(BaseSettings):
) )
@property @property
def data_path(self) -> Path: def project_root(self) -> Path:
return Path(self.data_path_)
@property
def models_path(self) -> Path:
return Path(self.models_path_)
@property
def processed_path(self) -> Path:
return Path(self.processed_path_)
@property
def raw_path(self) -> Path:
return Path(self.raw_path_)
@property
def interim_path(self) -> Path:
return Path(self.interim_path_)
@property
def external_path(self) -> Path:
return Path(self.external_path_)
@property
def proj_root(self) -> Path:
return Path(__file__).resolve().parents[1] return Path(__file__).resolve().parents[1]
@property def _resolve_path(self, path: Optional[str]) -> Optional[Path]:
def database_url(self) -> str: if path is None:
return self.database_url return None
return self.project_root / path
@property @property
def openai_api_key(self) -> str: def data_path(self) -> Optional[Path]:
return self.openai_api_key return self._resolve_path(self.data_path_)
@property @property
def elasticsearch_index(self) -> str: def raw_path(self) -> Optional[Path]:
return self.elasticsearch_index return self._resolve_path(self.raw_path_)
@property @property
def elasticsearch_docs_index(self) -> str: def processed_path(self) -> Optional[Path]:
return self.elasticsearch_docs_index return self._resolve_path(self.processed_path_)
@property @property
def elasticsearch_code_index(self) -> str: def models_path(self) -> Optional[Path]:
return self.elasticsearch_code_index return self._resolve_path(self.models_path_)
@property @property
def llm_base_url(self) -> str: def external_path(self) -> Optional[Path]:
return self.llm_base_url return self._resolve_path(self.external_path_)
@property @property
def ollama_url(self) -> str: def interim_path(self) -> Optional[Path]:
return self.ollama_url return self._resolve_path(self.interim_path_)
@property @property
def ollama_local_url(self) -> str: def kubeconfig_path(self) -> Optional[Path]:
return self.ollama_local_url return self._resolve_path(self.kubeconfig_path_)
@property
def langfuse_host(self) -> str:
return self.langfuse_host
@property
def elasticsearch_url(self) -> str:
return self.elasticsearch_url
@property
def elasticsearch_local_url(self) -> str:
return self.elasticsearch_local_url
@property
def ollama_model_name(self) -> str:
return self.ollama_model_name
@property
def ollama_emb_model_name(self) -> str:
return self.ollama_emb_model_name
@property
def model_name(self) -> str:
return self.model_name
@property
def hf_emb_model_name(self) -> str:
return self.hf_emb_model_name
@property
def langfuse_public_key(self) -> str:
return self.langfuse_public_key
@property
def langfuse_secret_key(self) -> str:
return self.langfuse_secret_key
@property
def hf_token(self) -> str:
return self.hf_token
@property
def kubeconfig_path(self) -> Path:
return Path(self.kubeconfig_path)
settings = Settings() settings = Settings()

29
uv.lock
View File

@ -250,7 +250,6 @@ name = "assistance-engine"
version = "0.1.0" version = "0.1.0"
source = { virtual = "." } source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "chonkie", extra = ["semantic"] },
{ name = "grpcio" }, { name = "grpcio" },
{ name = "grpcio-reflection" }, { name = "grpcio-reflection" },
{ name = "grpcio-tools" }, { name = "grpcio-tools" },
@ -273,7 +272,9 @@ dependencies = [
dev = [ dev = [
{ name = "beir" }, { name = "beir" },
{ name = "boto3" }, { name = "boto3" },
{ name = "chonkie", extra = ["elastic", "semantic"] },
{ name = "evidently" }, { name = "evidently" },
{ name = "flatbuffers" },
{ name = "jupyter" }, { name = "jupyter" },
{ name = "langfuse" }, { name = "langfuse" },
{ name = "litellm" }, { name = "litellm" },
@ -288,7 +289,6 @@ dev = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "chonkie", extras = ["semantic"], specifier = ">=1.5.6" },
{ name = "grpcio", specifier = ">=1.78.0" }, { name = "grpcio", specifier = ">=1.78.0" },
{ name = "grpcio-reflection", specifier = ">=1.78.0" }, { name = "grpcio-reflection", specifier = ">=1.78.0" },
{ name = "grpcio-tools", specifier = ">=1.78.0" }, { name = "grpcio-tools", specifier = ">=1.78.0" },
@ -311,7 +311,9 @@ requires-dist = [
dev = [ dev = [
{ name = "beir", specifier = ">=2.2.0" }, { name = "beir", specifier = ">=2.2.0" },
{ name = "boto3", specifier = ">=1.42.58" }, { name = "boto3", specifier = ">=1.42.58" },
{ name = "chonkie", extras = ["elastic", "semantic"], specifier = ">=1.6.0" },
{ name = "evidently", specifier = ">=0.7.20" }, { name = "evidently", specifier = ">=0.7.20" },
{ name = "flatbuffers", specifier = ">=25.12.19" },
{ name = "jupyter", specifier = ">=1.1.1" }, { name = "jupyter", specifier = ">=1.1.1" },
{ name = "langfuse", specifier = "<3" }, { name = "langfuse", specifier = "<3" },
{ name = "litellm", specifier = ">=1.82.0" }, { name = "litellm", specifier = ">=1.82.0" },
@ -595,7 +597,7 @@ wheels = [
[[package]] [[package]]
name = "chonkie" name = "chonkie"
version = "1.5.6" version = "1.6.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "chonkie-core" }, { name = "chonkie-core" },
@ -603,12 +605,15 @@ dependencies = [
{ name = "tenacity" }, { name = "tenacity" },
{ name = "tqdm" }, { name = "tqdm" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/a4/16/e51295955f5a627ebb7867dc2e7fa48d4c6dc2a5f3cde3690de84812e929/chonkie-1.5.6.tar.gz", hash = "sha256:282a24c20b88c4c28d8cae893ac78bcbee531a87d28ec86b419897a9eea2ecf3", size = 172066, upload-time = "2026-02-16T21:44:01.336Z" } sdist = { url = "https://files.pythonhosted.org/packages/e5/72/fdf8f89ff439f4ec357af0866c819512391936e4e61b6f15635a48434b8a/chonkie-1.6.0.tar.gz", hash = "sha256:14120d80610c1f549027fc7aa9a5ff604a729b545836f6cadd65d5ae83596279", size = 187056, upload-time = "2026-03-11T04:55:07.657Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/18/3a/24cf4cb377f4d44126231d55a19b48a645a0f78f891288a8d4300c95160d/chonkie-1.5.6-py3-none-any.whl", hash = "sha256:4c3be39a0f97315eb3c5efe6dc5d7933d3d27a1918b55c39ab211b403bb03df7", size = 210065, upload-time = "2026-02-16T21:43:59.926Z" }, { url = "https://files.pythonhosted.org/packages/ae/c2/7ea7d3409df220dd0e048b1113b44f47eccab9d517b00b037ab0e34c3c7a/chonkie-1.6.0-py3-none-any.whl", hash = "sha256:aa357e02f5cdacac6f8280c5e8651207c866b4137bcf20904db8670ee0808877", size = 232997, upload-time = "2026-03-11T04:55:05.252Z" },
] ]
[package.optional-dependencies] [package.optional-dependencies]
elastic = [
{ name = "elasticsearch" },
]
semantic = [ semantic = [
{ name = "model2vec" }, { name = "model2vec" },
{ name = "tokenizers" }, { name = "tokenizers" },
@ -1061,6 +1066,14 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d", size = 24331, upload-time = "2026-02-19T00:48:18.465Z" }, { url = "https://files.pythonhosted.org/packages/9c/0f/5d0c71a1aefeb08efff26272149e07ab922b64f46c63363756224bd6872e/filelock-3.24.3-py3-none-any.whl", hash = "sha256:426e9a4660391f7f8a810d71b0555bce9008b0a1cc342ab1f6947d37639e002d", size = 24331, upload-time = "2026-02-19T00:48:18.465Z" },
] ]
[[package]]
name = "flatbuffers"
version = "25.12.19"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e8/2d/d2a548598be01649e2d46231d151a6c56d10b964d94043a335ae56ea2d92/flatbuffers-25.12.19-py2.py3-none-any.whl", hash = "sha256:7634f50c427838bb021c2d66a3d1168e9d199b0607e6329399f04846d42e20b4", size = 26661, upload-time = "2025-12-19T23:16:13.622Z" },
]
[[package]] [[package]]
name = "fqdn" name = "fqdn"
version = "1.5.1" version = "1.5.1"
@ -3112,14 +3125,14 @@ wheels = [
[[package]] [[package]]
name = "opentelemetry-proto" name = "opentelemetry-proto"
version = "1.39.1" version = "1.40.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "protobuf" }, { name = "protobuf" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/49/1d/f25d76d8260c156c40c97c9ed4511ec0f9ce353f8108ca6e7561f82a06b2/opentelemetry_proto-1.39.1.tar.gz", hash = "sha256:6c8e05144fc0d3ed4d22c2289c6b126e03bcd0e6a7da0f16cedd2e1c2772e2c8", size = 46152, upload-time = "2025-12-11T13:32:48.681Z" } sdist = { url = "https://files.pythonhosted.org/packages/4c/77/dd38991db037fdfce45849491cb61de5ab000f49824a00230afb112a4392/opentelemetry_proto-1.40.0.tar.gz", hash = "sha256:03f639ca129ba513f5819810f5b1f42bcb371391405d99c168fe6937c62febcd", size = 45667, upload-time = "2026-03-04T14:17:31.194Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/51/95/b40c96a7b5203005a0b03d8ce8cd212ff23f1793d5ba289c87a097571b18/opentelemetry_proto-1.39.1-py3-none-any.whl", hash = "sha256:22cdc78efd3b3765d09e68bfbd010d4fc254c9818afd0b6b423387d9dee46007", size = 72535, upload-time = "2025-12-11T13:32:33.866Z" }, { url = "https://files.pythonhosted.org/packages/b9/b2/189b2577dde745b15625b3214302605b1353436219d42b7912e77fa8dc24/opentelemetry_proto-1.40.0-py3-none-any.whl", hash = "sha256:266c4385d88923a23d63e353e9761af0f47a6ed0d486979777fe4de59dc9b25f", size = 72073, upload-time = "2026-03-04T14:17:16.673Z" },
] ]
[[package]] [[package]]