diff --git a/README.md b/README.md
index 9e9626f..5daf4c1 100644
--- a/README.md
+++ b/README.md
@@ -45,15 +45,7 @@ graph TD
 
 ├── README.md                     # System documentation & Dev guide
 ├── changelog                     # Version tracking and release history
-├── pyproject.toml  
-├── ingestion/                    # Data ingested in Elasticsearch
-├── docs/
-│   ├── AVAP Language: ...        # AVAP DSL Documentation
-│   │   └── AVAP.md
-│   ├── developer.avapfr...       # Documents on developer web page
-│   ├── LRM/                      # AVAP LRM documentation
-│   │   └── avap.md 
-│   └── samples/                  # AVAP code samples
+├── pyproject.toml                # Python project configuration
 ├── Docker/
 │   ├── protos/
 │   │    └── brunix.proto         # Protocol Buffers: The source of truth for the API
@@ -65,12 +57,22 @@ graph TD
 │   │    └── utils/               # Utility modules
 │   ├── Dockerfile                # Container definition for the Engine
 │   ├── docker-compose.yaml       # Local orchestration for dev environment
-│   ├── requirements.txt          # Python dependencies for Docker
-│   └── .dockerignore             # Docker ignore files
+│   ├── requirements.txt           # Python dependencies for Docker
+│   ├── protos/
+│   │   └── brunix.proto          # Protocol Buffers: The source of truth for the API
+│   └── src/
+│       ├── graph.py              # Workflow graph orchestration
+│       ├── prompts.py            # Centralized prompt definitions
+│       ├── server.py             # gRPC Server & RAG Orchestration
+│       ├── state.py              # Shared state management
+│       └── utils/                # Utility modules
+├── ingestion/
+│   └── docs/                     # AVAP documentation chunks
+├── kubernetes/
+│   └── kubeconfig.yaml           # Kubernetes cluster configuration
 ├── scripts/
 │   └── pipelines/
-│       ├── flows/                # Processing pipelines
-│       └── tasks/                # Modules used by the flows
+│       └── flows/                # Data processing flows
 └── src/
     ├── config.py                 # Environment variables configuration file
     └── utils/
diff --git a/scripts/pipelines/samples_generator/generate_mbap.py b/scripts/pipelines/samples_generator/generate_mbap.py
new file mode 100644
index 0000000..a567d4d
--- /dev/null
+++ b/scripts/pipelines/samples_generator/generate_mbap.py
@@ -0,0 +1,352 @@
+#!/usr/bin/env python3
+"""
+Use:
+    python generate_mbap.py
+    python generate_mbap.py --lrm path/to/avap.md
+    python generate_mbap.py --lrm avap.md --output output/mbpp_avap.json --problems 300
+
+Requirements:
+    pip install anthropic
+    export ANTHROPIC_API_KEY=sk-ant-...
+"""
+
+import argparse
+import json
+import os
+import sys
+import time
+from pathlib import Path
+
+import anthropic
+
+CATEGORIES = [
+    ("HTTP params / addParam / addResult / _status",10),
+    ("Variables y strings / addVar / replace / randomString",10),
+    ("Condicionales / if() Modo 1 y Modo 2 / else() / end()",10),
+    ("Bucles y listas / startLoop / itemFromList / getListLen",10),
+    ("JSON / variableFromJSON / AddVariableToJSON",10),
+    ("ORM / ormAccessSelect / ormAccessInsert / ormAccessUpdate",10),
+    ("Criptografía / encodeSHA256 / encodeMD5",10),
+    ("Fechas / getTimeStamp / getDateTime / stampToDatetime",10),
+    ("Conectores externos / avapConnector + métodos dinámicos",10),
+    ("Concurrencia / go + gather",10),
+    ("Funciones y scope / function / return()",10),
+    ("Manejo de errores / try() / exception()",10),
+    ("HTTP externo / RequestGet / RequestPost",10),
+    ("Modularidad / import / include + casos de uso complejos",10),
+]
+
+TOTAL_PROBLEMS    = sum(n for _, n in CATEGORIES)
+PROBLEMS_PER_CALL = 10
+
+
+SYSTEM_PROMPT = """Eres un experto en el lenguaje AVAP.
+Se te proporciona el Language Reference Manual (LRM) completo de AVAP.
+Tu tarea es generar problemas de benchmark estilo MBPP para evaluar
+modelos de lenguaje en su capacidad de generar código AVAP correcto.
+
+REGLAS ESTRICTAS para el código AVAP generado:
+1. Una instrucción por línea. EOL es el terminador absoluto.
+2. Sin indentación significativa (es solo decorativa).
+3. Bloques de control: if()...else()...end(), startLoop()...endLoop(), try()...exception()...end()
+4. Funciones: function name(args) { ... return(val) }
+5. if() Modo 1: if(var_o_literal, var_o_literal, "operador")
+   — los argumentos NO pueden ser expresiones de acceso como dict['key'];
+     hay que extraer el valor a una variable propia primero.
+6. if() Modo 2: if(None, None, `expresion_completa_como_string`)
+7. _status se asigna con: addVar(_status, 404)
+8. ormAccessSelect firma: ormAccessSelect(campos, "tabla", selector, varTarget)
+   — selector puede ser cadena vacía.
+9. Acceso a campos de dict: val = dict['campo']  (línea propia, luego se usa val).
+10. Genera ÚNICAMENTE código AVAP válido según el LRM. Sin Python, sin pseudocódigo.
+
+MODO DE EJECUCIÓN — MUY IMPORTANTE:
+- El código se ejecuta DIRECTAMENTE, línea a línea, sin servidor ni registro de endpoints.
+- NUNCA uses registerEndpoint(), NUNCA uses mainHandler(), NUNCA envuelvas el código en funciones solo para ejecutarlo.
+- El código correcto es simplemente las instrucciones en línea, por ejemplo:
+    result = "Hello World"
+    addResult(result)
+- Si el problema requiere una función auxiliar reutilizable, defínela con function...{} y llámala directamente después:
+    function double(n) {
+        return(n * 2)
+    }
+    addParam("n", n)
+    result = double(n)
+    addResult(result)
+- NUNCA termines el código con registerEndpoint ni con ninguna llamada de registro.
+
+FORMATO DE SALIDA: responde ÚNICAMENTE con un array JSON válido.
+Sin texto adicional, sin bloques de código markdown, sin explicaciones.
+Estructura exacta de cada elemento:
+{
+  "task_id": <número entero>,
+  "text": "<enunciado del problema en español>",
+  "code": "<código AVAP con saltos de línea como \\n>",
+  "test_inputs": { "<param1>": <valor1>, "<param2>": <valor2> },
+  "test_list": ["<expr_python_1>", "<expr_python_2>"]
+}
+
+FORMATO DE test_inputs — MUY IMPORTANTE:
+- Es un objeto JSON con un valor fijo para cada variable que el código recibe via addParam().
+- Los nombres de las claves deben coincidir EXACTAMENTE con el nombre de variable usado en addParam().
+- Los valores deben ser concretos y representativos del problema (no genéricos como "test" o 123).
+- Si el código no tiene ningún addParam(), el campo test_inputs debe ser un objeto vacío: {}
+- Estos valores son los que el evaluador inyectará en el stack antes de ejecutar el código,
+  de modo que las aserciones de test_list puedan validar las variables de salida resultantes.
+
+Ejemplo con addParam:
+  código:       addParam("password", password)\\nencodeSHA256(password, hashed)\\naddResult(hashed)
+  test_inputs:  { "password": "secret123" }
+  test_list:    ["re.match(r'^[a-f0-9]{64}$', hashed)"]
+
+Ejemplo sin addParam:
+  código:       randomString(16, token)\\naddResult(token)
+  test_inputs:  {}
+  test_list:    ["re.match(r'^[a-zA-Z0-9]{16}$', token)"]
+
+FORMATO DE test_list — MUY IMPORTANTE:
+Cada aserción debe ser una expresión Python con re.match()
+evaluable directamente sobre las variables del stack AVAP (disponibles como
+variables Python locales). El módulo 're' está siempre disponible.
+La expresión debe devolver un match object (truthy) si el test pasa.
+
+Reglas estrictas:
+- USA ÚNICAMENTE re.match(r'<patrón>', <variable>)
+- NO combines expresiones re.match en una aserción, cada asercion tiene que ser un unico re.match(r'<patrón>', <variable>)
+- Convierte a string si es necesario: re.match(r'^\\d+$', str(result))
+- Puedes encadenar con 'and': re.match(r'^[a-zA-Z0-9]{32}$', token) and re.match(r'.{32}', token)
+- Las variables referenciadas deben existir en el stack tras ejecutar el código.
+- NUNCA uses comparaciones directas (==, !=, >, <).
+- NUNCA uses isinstance(), len(), assert, ni texto descriptivo.
+- NUNCA uses nada que no sea re.match().
+
+Ejemplos correctos de test_list:
+  "re.match(r'^[a-f0-9]{64}$', hashed)"
+  "re.match(r'^[a-zA-Z0-9]{32}$', token)"
+  "re.match(r'^\\d{4}-\\d{2}-\\d{2}$', date_str)"
+  "re.match(r'^-?\\d+(\\.\\d+)?$', str(result))"
+  "re.match(r'^(par|impar)$', result)"
+  "re.match(r'^40[134]$', str(_status))"
+  "re.match(r'^\\d+$', str(length))"
+"""
+
+
+def build_user_prompt(lrm: str, category: str, count: int, start_id: int):
+    return f"""# LRM AVAP — Language Reference Manual
+
+{lrm}
+
+---
+
+# TAREA
+
+Genera exactamente {count} problemas de benchmark MBPP-AVAP para la categoría:
+
+**{category}**
+
+Requisitos:
+- Los task_id deben comenzar en {start_id} y ser consecutivos.
+- Cada problema debe cubrir un aspecto distinto de la categoría.
+- Dificultad variada: algunos simples, algunos intermedios, alguno avanzado.
+- El código debe ser realista como endpoint de microservicio HTTP en AVAP.
+- Incluye 2-3 aserciones descriptivas en test_list por problema.
+
+Responde ÚNICAMENTE con el array JSON. Sin texto antes ni después.
+"""
+
+
+def parse_response(raw: str):
+    text = raw.strip()
+    if text.startswith("```"):
+        lines = text.splitlines()
+        inner = lines[1:]
+        if inner and inner[-1].strip() == "```":
+            inner = inner[:-1]
+        text = "\n".join(inner).strip()
+
+    problems = json.loads(text)
+
+    if not isinstance(problems, list):
+        raise ValueError("response is not an JSON array")
+
+    for p in problems:
+        for field in ("task_id", "text", "code", "test_list"):
+            if field not in p:
+                raise ValueError(f"Field missing '{field}' in task_id={p.get('task_id','?')}.")
+        if "test_inputs" not in p:
+            p["test_inputs"] = {}
+        if not isinstance(p["test_inputs"], dict):
+            raise ValueError(f"'test_inputs' must by a JSON Object (task_id={p.get('task_id','?')}).")
+
+    return problems
+
+
+def call_api( client: anthropic.Anthropic, lrm: str, category: str, count: int, start_id: int, retries: int = 3,):
+    
+    for attempt in range(1, retries + 1):
+        try:
+            message = client.messages.create(
+                model="claude-sonnet-4-20250514",
+                max_tokens=8000,
+                system=SYSTEM_PROMPT,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": build_user_prompt(lrm, category, count, start_id),
+                    }
+                ],
+            )
+            raw = message.content[0].text
+            problems = parse_response(raw)
+
+            for i, problem in enumerate(problems):
+                problem["task_id"] = start_id + i
+
+            return problems
+
+        except (json.JSONDecodeError, ValueError, KeyError) as e:
+            print(f"\n   Attempt {attempt}/{retries} — parser error: {e}")
+            if attempt < retries:
+                time.sleep(2 ** attempt)
+
+        except anthropic.RateLimitError:
+            wait = 30 * attempt
+            print(f"\n   Rate limit. waiting {wait}s...")
+            time.sleep(wait)
+
+        except anthropic.APIError as e:
+            print(f"\n   API error at attempt {attempt}: {e}")
+            if attempt < retries:
+                time.sleep(5)
+
+    raise RuntimeError(
+        f"Cant generate problems '{category}' since {retries} trys."
+    )
+
+
+def scale_categories(target: int):
+    base = TOTAL_PROBLEMS
+    scaled = [
+        (cat, max(1, round(n * target / base)))
+        for cat, n in CATEGORIES
+    ]
+
+    diff = target - sum(n for _, n in scaled)
+    if diff != 0:
+        last_cat, last_n = scaled[-1]
+        scaled[-1] = (last_cat, max(1, last_n + diff))
+    return scaled
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Create a bunch of samples of code from an LRM."
+    )
+    parser.add_argument(
+        "--lrm",
+        default="avap.md",
+        help="Path to AVAP LRM (default: avap.md)",
+    )
+    parser.add_argument(
+        "--output",
+        default="output/mbpp_avap.json",
+        help="Output JSON file (default: output/mbpp_avap.json)",
+    )
+    parser.add_argument(
+        "--problems",
+        type=int,
+        default=300,
+        help="Total problems number to generate (default: 300)",
+    )
+    parser.add_argument(
+        "--api-key",
+        default=None,
+        help="Anthropic API key",
+    )
+    args = parser.parse_args()
+
+    api_key = args.api_key or os.environ.get("ANTHROPIC_API_KEY")
+    if not api_key:
+        sys.exit(
+            "ERROR: API key not found.\n"
+            "  Export variable:  export ANTHROPIC_API_KEY=sk-ant-...\n"
+            "  Or indicate with:         --api-key sk-ant-..."
+        )
+
+    lrm_path = Path(args.lrm)
+    if not lrm_path.exists():
+        sys.exit(
+            f"ERROR: file '{lrm_path}' not found.\n"
+            f"  Put avap.md in actual directory or use --lrm <path>."
+        )
+    lrm = lrm_path.read_text(encoding="utf-8")
+    print(f" Source LRM: {lrm_path} ")
+
+    output_path = Path(args.output)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    categories = scale_categories(args.problems)
+    total_calls = sum((n + PROBLEMS_PER_CALL - 1) // PROBLEMS_PER_CALL for _, n in categories)
+
+    print(f" Problems    : {args.problems}")
+    print(f" Output file : {output_path}\n")
+    print("────────────────────────────────────────────────────────────")
+
+    client       = anthropic.Anthropic(api_key=api_key)
+    all_problems: list[dict] = []
+    task_id      = 1
+    call_count   = 0
+
+    for cat_idx, (category, total_cat) in enumerate(categories, 1):
+        print(f"\n[{cat_idx:02d}/{len(categories)}] {category}  ({total_cat} problems)")
+
+        remaining = total_cat
+        batch_num = 0
+
+        while remaining > 0:
+            batch_size = min(PROBLEMS_PER_CALL, remaining)
+            batch_num += 1
+            call_count += 1
+
+            print(
+                f"  Batch {batch_num}  |  task_id {task_id}–{task_id + batch_size - 1}  "
+                f"|  API Call {call_count}/{total_calls} ... ",
+                end="",
+                flush=True,
+            )
+
+            try:
+                batch = call_api(client, lrm, category, batch_size, task_id)
+            except RuntimeError as e:
+                print(f"\n {e}")
+                if all_problems:
+                    _save(all_problems, output_path, partial=True)
+                sys.exit(1)
+
+            all_problems.extend(batch)
+            task_id   += len(batch)
+            remaining -= len(batch)
+            print(f"  {len(batch)} generated (total: {len(all_problems)})")
+
+            if remaining > 0:
+                time.sleep(1.5)
+
+        _save(all_problems, output_path, partial=False)
+        print(f"  '- Save actual results.")
+
+    print("\n" + "────────────────────────────────────────────────────────────")
+    print(f" Process completed")
+    print(f" Problems generated : {len(all_problems)}")
+    print(f" task_id range      : {all_problems[0]['task_id']} – {all_problems[-1]['task_id']}")
+    print(f" Output file        : {output_path}")
+
+
+def _save(problems: list[dict], path: Path, partial: bool = False):
+    suffix = ".partial" if partial else ""
+    target = path.with_suffix(suffix + path.suffix) if partial else path
+    with open(target, "w", encoding="utf-8") as f:
+        json.dump(problems, f, ensure_ascii=False, indent=2)
+
+
+if __name__ == "__main__":
+    main()