Refactor langgraph_agent_simple notebook execution counts and handle Langfuse client errors

- Set execution counts to null for initial cells in langgraph_agent_simple.ipynb - Update execution counts for subsequent cells to maintain order - Change output stream name from stdout to stderr for error handling - Capture and log detailed error messages for failed Langfuse client authentication Update uv.lock to manage accelerate dependency - Remove accelerate from main dependencies - Add accelerate to dev dependencies with version specification - Adjust requires-dist section to reflect changes in dependency management
2026-03-02 14:07:29 +01:00 · 2026-03-02 14:07:29 +01:00 · 5a666079a4
parent 5b424f8409
commit 5a666079a4
16 changed files with 659 additions and 723 deletions
--- a/Docker/docker-compose.yaml
+++ b/Docker/docker-compose.yaml
@ -6,15 +6,17 @@ services:
    container_name: brunix-assistance-engine
    ports:
      - "50052:50051"
+    network_mode: "host"
    environment:
      ELASTICSEARCH_URL: ${ELASTICSEARCH_URL}
-      DATABASE_URL: ${DATABASE_URL}
-      OLLAMA_URL: ${OLLAMA_URL}
+      ELASTICSEARCH_INDEX: ${ELASTICSEARCH_INDEX}
+      POSTGRES_URL: ${POSTGRES_URL}
      LANGFUSE_HOST: ${LANGFUSE_HOST}
      LANGFUSE_PUBLIC_KEY: ${LANGFUSE_PUBLIC_KEY}
      LANGFUSE_SECRET_KEY: ${LANGFUSE_SECRET_KEY}
-      ELASTICSEARCH_INDEX: ${ELASTICSEARCH_INDEX}
+      OLLAMA_URL: ${OLLAMA_LOCAL_URL}
      OLLAMA_MODEL_NAME: ${OLLAMA_MODEL_NAME}
+      OLLAMA_EMB_MODEL_NAME: ${OLLAMA_EMB_MODEL_NAME}
    
    extra_hosts:
      - "host.docker.internal:host-gateway"
--- a/Docker/requirements.txt
+++ b/Docker/requirements.txt
@ -1,7 +1,5 @@
 # This file was autogenerated by uv via the following command:
 #    uv export --format requirements-txt --no-hashes --no-dev -o Docker/requirements.txt
-accelerate==1.12.0
-    # via assistance-engine
 aiohappyeyeballs==2.6.1
    # via aiohttp
 aiohttp==3.13.3
@ -35,10 +33,6 @@ colorama==0.4.6 ; sys_platform == 'win32'
    #   click
    #   loguru
    #   tqdm
-cuda-bindings==12.9.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-cuda-pathfinder==1.3.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via cuda-bindings
 dataclasses-json==0.6.7
    # via langchain-community
 elastic-transport==8.17.1
@ -46,17 +40,13 @@ elastic-transport==8.17.1
 elasticsearch==8.19.3
    # via langchain-elasticsearch
 filelock==3.24.3
-    # via
-    #   huggingface-hub
-    #   torch
+    # via huggingface-hub
 frozenlist==1.8.0
    # via
    #   aiohttp
    #   aiosignal
 fsspec==2025.10.0
-    # via
-    #   huggingface-hub
-    #   torch
+    # via huggingface-hub
 greenlet==3.3.2 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'
    # via sqlalchemy
 grpcio==1.78.1
@ -83,7 +73,6 @@ httpx-sse==0.4.3
    # via langchain-community
 huggingface-hub==0.36.2
    # via
-    #   accelerate
    #   langchain-huggingface
    #   tokenizers
 idna==3.11
@ -92,8 +81,6 @@ idna==3.11
    #   httpx
    #   requests
    #   yarl
-jinja2==3.1.6
-    # via torch
 jmespath==1.1.0
    # via
    #   boto3
@ -150,69 +137,23 @@ langsmith==0.7.6
    #   langchain-core
 loguru==0.7.3
    # via assistance-engine
-markupsafe==3.0.3
-    # via jinja2
 marshmallow==3.26.2
    # via dataclasses-json
-mpmath==1.3.0
-    # via sympy
 multidict==6.7.1
    # via
    #   aiohttp
    #   yarl
 mypy-extensions==1.1.0
    # via typing-inspect
-networkx==3.6.1
-    # via torch
 nltk==3.9.3
    # via assistance-engine
 numpy==2.4.2
    # via
-    #   accelerate
    #   assistance-engine
    #   elasticsearch
    #   langchain-aws
    #   langchain-community
    #   pandas
-nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via
-    #   nvidia-cudnn-cu12
-    #   nvidia-cusolver-cu12
-    #   torch
-nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via
-    #   nvidia-cusolver-cu12
-    #   torch
-nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via
-    #   nvidia-cufft-cu12
-    #   nvidia-cusolver-cu12
-    #   nvidia-cusparse-cu12
-    #   torch
-nvidia-nvshmem-cu12==3.4.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
-nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
 ollama==0.6.1
    # via langchain-ollama
 orjson==3.11.7
@ -223,7 +164,6 @@ ormsgpack==1.12.2
    # via langgraph-checkpoint
 packaging==24.2
    # via
-    #   accelerate
    #   huggingface-hub
    #   langchain-core
    #   langsmith
@ -238,8 +178,6 @@ protobuf==6.33.5
    # via
    #   grpcio-reflection
    #   grpcio-tools
-psutil==7.2.2
-    # via accelerate
 pydantic==2.12.5
    # via
    #   langchain
@ -265,7 +203,6 @@ python-dotenv==1.2.1
    #   pydantic-settings
 pyyaml==6.0.3
    # via
-    #   accelerate
    #   huggingface-hub
    #   langchain-classic
    #   langchain-community
@ -285,12 +222,8 @@ requests-toolbelt==1.0.0
    # via langsmith
 s3transfer==0.16.0
    # via boto3
-safetensors==0.7.0
-    # via accelerate
 setuptools==82.0.0
-    # via
-    #   grpcio-tools
-    #   torch
+    # via grpcio-tools
 simsimd==6.5.13
    # via elasticsearch
 six==1.17.0
@ -299,23 +232,17 @@ sqlalchemy==2.0.46
    # via
    #   langchain-classic
    #   langchain-community
-sympy==1.14.0
-    # via torch
 tenacity==9.1.4
    # via
    #   langchain-community
    #   langchain-core
 tokenizers==0.22.2
    # via langchain-huggingface
-torch==2.10.0
-    # via accelerate
 tqdm==4.67.3
    # via
    #   assistance-engine
    #   huggingface-hub
    #   nltk
-triton==3.6.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-    # via torch
 typing-extensions==4.15.0
    # via
    #   aiosignal
@ -327,7 +254,6 @@ typing-extensions==4.15.0
    #   pydantic
    #   pydantic-core
    #   sqlalchemy
-    #   torch
    #   typing-inspect
    #   typing-inspection
 typing-inspect==0.9.0
--- a/Docker/src/graph.py
+++ b/Docker/src/graph.py
@ -0,0 +1,57 @@
+# graph.py
+from langchain_core.documents import Document
+from langchain_core.messages import SystemMessage
+from langgraph.graph import StateGraph, END
+from langgraph.graph.state import CompiledStateGraph
+
+from prompts import REFORMULATE_PROMPT, GENERATE_PROMPT
+from state import AgentState
+
+
+def format_context(docs: list[Document]) -> str:
+    chunks = []
+    for i, doc in enumerate(docs, 1):
+        source = (doc.metadata or {}).get("source", "Untitled")
+        source_id = (doc.metadata or {}).get("id", f"chunk-{i}")
+        text = doc.page_content or ""
+        chunks.append(f"[{i}] id={source_id} source={source}\n{text}")
+    return "\n\n".join(chunks)
+
+
+def build_graph(llm, vector_store) -> CompiledStateGraph:
+    def reformulate(state: AgentState) -> AgentState:
+        user_msg = state["messages"][-1]
+        resp = llm.invoke([REFORMULATE_PROMPT, user_msg])
+        reformulated = resp.content.strip()
+        print(f"[reformulate] '{user_msg.content}' → '{reformulated}'")
+        return {"reformulated_query": reformulated}
+
+    def retrieve(state: AgentState) -> AgentState:
+        query = state["reformulated_query"]
+        docs = vector_store.as_retriever(
+            search_type="similarity",
+            search_kwargs={"k": 3},
+        ).invoke(query)
+        context = format_context(docs)
+        print(f"[retrieve] {len(docs)} docs fetched")
+        print(context)
+        return {"context": context}
+
+    def generate(state: AgentState) -> AgentState:
+        prompt = SystemMessage(
+            content=GENERATE_PROMPT.content.format(context=state["context"])
+        )
+        resp = llm.invoke([prompt] + state["messages"])
+        return {"messages": [resp]}
+
+    graph_builder = StateGraph(AgentState)
+    graph_builder.add_node("reformulate", reformulate)
+    graph_builder.add_node("retrieve", retrieve)
+    graph_builder.add_node("generate", generate)
+
+    graph_builder.set_entry_point("reformulate")
+    graph_builder.add_edge("reformulate", "retrieve")
+    graph_builder.add_edge("retrieve", "generate")
+    graph_builder.add_edge("generate", END)
+
+    return graph_builder.compile()
--- a/Docker/src/instances.py
+++ b/Docker/src/instances.py
@ -0,0 +1,24 @@
+import os
+
+from langchain_elasticsearch import ElasticsearchStore
+
+from utils.llm_factory import create_chat_model
+from utils.emb_factory import create_embedding_model
+
+llm = create_chat_model(
+    provider="ollama",
+    model=os.getenv("OLLAMA_MODEL_NAME"),
+    temperature=0,
+    validate_model_on_init=True,
+)
+embeddings = create_embedding_model(
+    provider="ollama",
+    model=os.getenv("OLLAMA_EMB_MODEL_NAME"),
+)
+vector_store = ElasticsearchStore(
+    es_url=os.getenv("ELASTICSEARCH_URL"),
+    index_name=os.getenv("ELASTICSEARCH_INDEX"),
+    embedding=embeddings,
+    query_field="text",
+    vector_query_field="vector",
+)
--- a/Docker/src/prompts.py
+++ b/Docker/src/prompts.py
@ -0,0 +1,89 @@
+from langchain_core.messages import SystemMessage
+
+REFORMULATE_PROMPT = SystemMessage(
+    content=(
+        "You are a deterministic lexical query rewriter used for vector retrieval.\n"
+        "Your task is to rewrite user questions into optimized keyword search queries.\n\n"
+
+        "CRITICAL RULES (ABSOLUTE):\n"
+        "1. NEVER answer the question.\n"
+        "2. NEVER expand acronyms.\n"
+        "3. NEVER introduce new terms not present in the original query.\n"
+        "4. NEVER infer missing information.\n"
+        "5. NEVER add explanations, definitions, or interpretations.\n"
+        "6. Preserve all technical tokens exactly as written.\n"
+        "7. Only remove filler words (e.g., what, does, is, explain, tell me, please).\n"
+        "8. You may reorder terms for better retrieval.\n"
+        "9. Output must be a single-line plain keyword query.\n"
+        "10. If the query is already optimal, return it unchanged.\n\n"
+        "11. If you receive something that looks like code, do NOT attempt to rewrite it. Return it verbatim.\n\n"
+
+        "ALLOWED OPERATIONS:\n"
+        "- Remove interrogative phrasing.\n"
+        "- Remove stopwords.\n"
+        "- Reorder words.\n"
+        "- Convert to noun phrase form.\n\n"
+
+        "FORBIDDEN OPERATIONS:\n"
+        "- Expanding abbreviations.\n"
+        "- Paraphrasing into unseen vocabulary.\n"
+        "- Adding definitions.\n"
+        "- Answering implicitly.\n\n"
+
+        "Examples:\n"
+        "Input: What does AVAP stand for?\n"
+        "Output: AVAP stand for\n"
+
+        "Input: Hey, I'm trying to understand how AVAP handels a ZeroDivisionError when doing divison or modulus operatoins. Can you explane what situatoins cause a ZeroDivisionError to be raised and how I can catch it in my AVAP scripts?\n"
+        "Output: AVAP ZeroDivisionError division / modulus % catch try except\n"
+        
+        "Input: What does AVAP stand for?\n"
+        "Output: AVAP stand for\n"
+
+        "Input: Please explain how the import statement works in AVAP scripts.\n"
+        "Output: AVAP import statement syntax behavior\n\n"
+
+        "Return only the rewritten query."
+    )
+)
+
+GENERATE_PROMPT = SystemMessage(
+    content=(
+        """You are a retrieval-grounded assistant.
+
+        CRITICAL SCOPE RULES
+        - AVAP (Advanced Virtual API Programming) is a new/unknown programming language.
+        - If the user message is AVAP-related (explicitly mentions "AVAP" case-insensitive OR the provided context is AVAP code/docs), you MUST answer using ONLY <CONTEXT>.
+        - You MUST NOT use general programming knowledge to interpret AVAP.
+        - You MUST NOT assume AVAP behaves like any other language even if syntax looks similar.
+        - You MUST NOT infer missing details. Only state what is explicitly present in <CONTEXT>.
+
+        GROUNDING REQUIREMENTS (AVAP-RELATED)
+        1) Every non-trivial factual claim MUST be directly supported by an EXACT QUOTE from <CONTEXT>.
+        2) If a claim is not supported by a quote, DO NOT include it.
+        3) If <CONTEXT> does not contain enough information to answer, reply with EXACTLY:
+        "I don't have enough information in the provided context to answer that."
+
+        WORKFLOW (AVAP-RELATED) — FOLLOW IN ORDER
+        A) Identify the specific question(s) being asked.
+        B) Extract the minimum necessary quotes from <CONTEXT> that answer those question(s).
+        C) Write the answer using ONLY those quotes (paraphrase is allowed, but every statement must be backed by at least one quote).
+        D) Verify: for EACH sentence in your answer, confirm there is a supporting quote. If any sentence lacks a quote, delete it or refuse.
+
+        OUTPUT FORMAT (AVAP-RELATED ONLY)
+        Answer:
+        <short, direct answer; no extra speculation; no unrelated tips>
+
+        Evidence:
+        - "<exact quote 1>"
+        - "<exact quote 2>"
+        (Include only quotes you actually used. Prefer the smallest quotes that fully support the statements.)
+
+        NON-AVAP QUESTIONS
+        - If the question is clearly not AVAP-related, answer normally using general knowledge.
+
+        <CONTEXT>
+        {context}
+        </CONTEXT>"""
+    )
+)
--- a/Docker/src/server
+++ b/Docker/src/server
@ -1,140 +0,0 @@
-import logging
-import os
-import sys
-from concurrent import futures
-from pathlib import Path
-from typing import Any
-
-import brunix_pb2
-import brunix_pb2_grpc
-import grpc
-from grpc_reflection.v1alpha import reflection
-from langchain_core.prompts import ChatPromptTemplate
-from langchain_elasticsearch import ElasticsearchStore
-
-PROJECT_ROOT = Path(__file__).resolve().parents[2]
-if str(PROJECT_ROOT) not in sys.path:
-    sys.path.insert(0, str(PROJECT_ROOT))
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("brunix-engine")
-
-
-def _provider_kwargs(provider: str, base_url: str) -> dict[str, Any]:
-    if provider == "ollama":
-        return {"base_url": base_url}
-    return {}
-
-
-class BrunixEngine(brunix_pb2_grpc.AssistanceEngineServicer):
-    def __init__(self):
-        from src.emb_factory import create_embedding_model
-        from src.llm_factory import create_chat_model
-
-        self.base_url = os.getenv("OLLAMA_LOCAL_URL", "http://ollama-light-service:11434")
-        self.chat_provider = os.getenv("CHAT_PROVIDER", "ollama")
-        self.embedding_provider = os.getenv("EMBEDDING_PROVIDER", self.chat_provider)
-        self.chat_model_name = os.getenv("OLLAMA_MODEL_NAME")
-        self.embedding_model_name = os.getenv(
-            "OLLAMA_EMB_MODEL_NAME", self.chat_model_name
-        )
-
-        if not self.chat_model_name:
-            raise ValueError("OLLAMA_MODEL_NAME is required")
-
-        logger.info("Starting server")
-
-        self.llm = create_chat_model(
-            provider=self.chat_provider,
-            model=self.chat_model_name,
-            **_provider_kwargs(self.chat_provider, self.base_url),
-        )
-
-        self.embeddings = create_embedding_model(
-            provider=self.embedding_provider,
-            model=self.embedding_model_name,
-            **_provider_kwargs(self.embedding_provider, self.base_url),
-        )
-
-        es_url = os.getenv("ELASTICSEARCH_URL", "http://elasticsearch:9200")
-        logger.info("ElasticSearch on: %s", es_url)
-
-        self.vector_store = ElasticsearchStore(
-            es_url=es_url,
-            index_name=os.getenv("ELASTICSEARCH_INDEX"),
-            embedding=self.embeddings,
-            query_field="text",
-            vector_query_field="embedding",
-        )
-
-    def format_context(self, docs) -> str:
-        parts = []
-        for i, d in enumerate(docs, start=1):
-            meta = d.metadata or {}
-            source = meta.get("source", "unknown")
-            doc_id = meta.get("doc_id", "unknown")
-            chunk_id = meta.get("chunk_id", "unknown")
-
-            parts.append(
-                f"[{i}] source={source} doc_id={doc_id} chunk_id={chunk_id}\n{d.page_content}"
-            )
-        return "\n\n---\n\n".join(parts)
-
-    def AskAgent(self, request, context):
-        logger.info(f"request {request.session_id}): {request.query[:50]}.")
-
-        docs_and_scores = self.vector_store.similarity_search_with_score(
-            request.query, k=4
-        )
-
-        try:
-            context_text = self.format_context([doc for doc, _ in docs_and_scores])
-            # 4. Prompt Engineering
-            prompt = ChatPromptTemplate.from_template("""
-            You are a helpful assistant. Use the following retrieved documents to answer the question. 
-                                                      If you don't know the answer, say you don't know.
-            
-            CONTEXT:
-            {context}
-
-            QUESTION:
-            {question}
-            """)
-
-            chain = prompt | self.llm
-
-            result = chain.invoke({"context": context_text, "question": request.query})
-            result_text = getattr(result, "content", str(result))
-            yield brunix_pb2.AgentResponse(
-                text=str(result_text), avap_code="AVAP-2026", is_final=True
-            )
-
-            yield brunix_pb2.AgentResponse(text="", avap_code="", is_final=True)
-
-        except Exception as e:
-            logger.error(f"Error in AskAgent: {str(e)}")
-            yield brunix_pb2.AgentResponse(
-                text=f"[Error Motor]: {str(e)}", is_final=True
-            )
-
-
-def serve():
-
-    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
-
-    brunix_pb2_grpc.add_AssistanceEngineServicer_to_server(BrunixEngine(), server)
-
-    SERVICE_NAMES = (
-        brunix_pb2.DESCRIPTOR.services_by_name["AssistanceEngine"].full_name,
-        reflection.SERVICE_NAME,
-    )
-    reflection.enable_server_reflection(SERVICE_NAMES, server)
-
-    server.add_insecure_port("[::]:50051")
-    logger.info("Brunix Engine on port 50051")
-    server.start()
-    server.wait_for_termination()
-
-
-if __name__ == "__main__":
-    serve()
--- a/Docker/src/server.py
+++ b/Docker/src/server.py
@ -12,6 +12,10 @@ from grpc_reflection.v1alpha import reflection
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_elasticsearch import ElasticsearchStore

+from utils.llm_factory import create_chat_model
+from utils.emb_factory import create_embedding_model
+from graph import build_graph
+
 # PROJECT_ROOT = Path(__file__).resolve().parents[2]
 # if str(PROJECT_ROOT) not in sys.path:
 #     sys.path.insert(0, str(PROJECT_ROOT))
@ -28,93 +32,54 @@ def _provider_kwargs(provider: str, base_url: str) -> dict[str, Any]:

 class BrunixEngine(brunix_pb2_grpc.AssistanceEngineServicer):
    def __init__(self):
-        from emb_factory import create_embedding_model
-        from llm_factory import create_chat_model
-
-        self.base_url = os.getenv("OLLAMA_LOCAL_URL", "http://ollama-light-service:11434")
-        self.chat_provider = os.getenv("CHAT_PROVIDER", "ollama")
-        self.embedding_provider = os.getenv("EMBEDDING_PROVIDER", self.chat_provider)
-        self.chat_model_name = os.getenv("OLLAMA_MODEL_NAME")
-        self.embedding_model_name = os.getenv(
-            "OLLAMA_EMB_MODEL_NAME", self.chat_model_name
-        )
-
-        if not self.chat_model_name:
-            raise ValueError("OLLAMA_MODEL_NAME is required")
-
-        logger.info("Starting server")
-
        self.llm = create_chat_model(
-            provider=self.chat_provider,
-            model=self.chat_model_name,
-            **_provider_kwargs(self.chat_provider, self.base_url),
+            provider="ollama",
+            model=os.getenv("OLLAMA_MODEL_NAME"),
+            temperature=0,
+            validate_model_on_init=True,
        )
-
        self.embeddings = create_embedding_model(
-            provider=self.embedding_provider,
-            model=self.embedding_model_name,
-            **_provider_kwargs(self.embedding_provider, self.base_url),
+            provider="ollama",
+            model=os.getenv("OLLAMA_EMB_MODEL_NAME"),
        )
-
-        es_url = os.getenv("ELASTICSEARCH_URL", "http://elasticsearch:9200")
-        logger.info("ElasticSearch on: %s", es_url)
-
        self.vector_store = ElasticsearchStore(
-            es_url=es_url,
+            es_url=os.getenv("ELASTICSEARCH_URL"),
            index_name=os.getenv("ELASTICSEARCH_INDEX"),
            embedding=self.embeddings,
            query_field="text",
-            vector_query_field="embedding",
+            vector_query_field="vector",
        )
+        self.graph = build_graph(
+            llm=self.llm,
+            vector_store=self.vector_store
+        )
+        logger.info("Brunix Engine initializing.")

-    def format_context(self, docs) -> str:
-        parts = []
-        for i, d in enumerate(docs, start=1):
-            meta = d.metadata or {}
-            source = meta.get("source", "unknown")
-            doc_id = meta.get("doc_id", "unknown")
-            chunk_id = meta.get("chunk_id", "unknown")
-
-            parts.append(
-                f"[{i}] source={source} doc_id={doc_id} chunk_id={chunk_id}\n{d.page_content}"
-            )
-        return "\n\n---\n\n".join(parts)

    def AskAgent(self, request, context):
        logger.info(f"request {request.session_id}): {request.query[:50]}.")

-        docs_and_scores = self.vector_store.similarity_search_with_score(
-            request.query, k=4
-        )
-
        try:
-            context_text = self.format_context([doc for doc, _ in docs_and_scores])
-            # 4. Prompt Engineering
-            prompt = ChatPromptTemplate.from_template("""
-            You are a helpful assistant. Use the following retrieved documents to answer the question. 
-                                                      If you don't know the answer, say you don't know.
+            final_state = self.graph.invoke({"messages": [{"role": "user", 
+                                                           "content": request.query}]})

-            CONTEXT:
-            {context}
+            messages = final_state.get("messages", [])
+            last_msg = messages[-1] if messages else None
+            result_text = getattr(last_msg, "content", str(last_msg)) if last_msg else ""

-            QUESTION:
-            {question}
-            """)
-
-            chain = prompt | self.llm
-
-            result = chain.invoke({"context": context_text, "question": request.query})
-            result_text = getattr(result, "content", str(result))
            yield brunix_pb2.AgentResponse(
-                text=str(result_text), avap_code="AVAP-2026", is_final=True
+                text=result_text,
+                avap_code="AVAP-2026",
+                is_final=True,
            )

            yield brunix_pb2.AgentResponse(text="", avap_code="", is_final=True)

        except Exception as e:
-            logger.error(f"Error in AskAgent: {str(e)}")
+            logger.error(f"Error in AskAgent: {str(e)}", exc_info=True)
            yield brunix_pb2.AgentResponse(
-                text=f"[Error Motor]: {str(e)}", is_final=True
+                text=f"[Error Motor]: {str(e)}",
+                is_final=True,
            )


--- a/Docker/src/state.py
+++ b/Docker/src/state.py
@ -0,0 +1,9 @@
+from typing import TypedDict, Annotated
+
+from langgraph.graph.message import add_messages
+
+
+class AgentState(TypedDict):
+    messages: Annotated[list, add_messages]
+    reformulated_query: str
+    context: str
--- a/Docker/src/utils/emb_factory.py
+++ b/Docker/src/utils/emb_factory.py
--- a/Docker/src/utils/llm_factory.py
+++ b/Docker/src/utils/llm_factory.py
--- a/notebooks/langgraph_agent_simple.ipynb
+++ b/notebooks/langgraph_agent_simple.ipynb
@ -360,7 +360,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
   "id": "53b89690",
   "metadata": {},
   "outputs": [
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,6 @@ description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
-    "accelerate>=1.12.0",
    "grpcio>=1.78.0",
    "grpcio-reflection>=1.78.0",
    "grpcio-tools>=1.78.0",
@ -26,6 +25,7 @@ dependencies = [

 [dependency-groups]
 dev = [
+    "accelerate>=1.12.0",
    "beir>=2.2.0",
    "evidently>=0.7.20",
    "jupyter>=1.1.1",
--- a/scratches/acano/es_ingestion.ipynb
+++ b/scratches/acano/es_ingestion.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
   "id": "0a8abbfa",
   "metadata": {},
   "outputs": [],
@ -23,7 +23,6 @@
    "from langchain_ollama import OllamaEmbeddings\n",
    "from transformers import AutoTokenizer, AutoModel, AutoConfig\n",
    "from elasticsearch import Elasticsearch\n",
-    "from langchain_elasticsearch import ElasticsearchStore\n",
    "import nltk\n",
    "from nltk.tokenize import sent_tokenize\n",
    "nltk.download(\"punkt\", quiet=True)\n",
@ -693,10 +692,106 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
   "id": "5ab78d28",
   "metadata": {},
   "outputs": [],
+   "source": [
+    "from chonkie import CodeChunker\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(HF_EMB_MODEL_NAME, use_fast=True)\n",
+    "chunker = CodeChunker(\n",
+    "    language=\"python\",\n",
+    "    tokenizer=tokenizer,\n",
+    "    chunk_size=128\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "37676348",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Chunk text: \n",
+      "registerEndpoint(\"/v1/user\", \"POST\", [], \"Crear Usuario\", main, final_res)\n",
+      "\n",
+      "function main(){\n",
+      "    addParam(\"user\", u)\n",
+      "    addParam(\"pass\", p)\n",
+      "\n",
+      "    if\n",
+      "Token count: 38\n",
+      "Chunk text: (u, None, \"==\")\n",
+      "        addVar(_status, 400)\n",
+      "        return(\"Usuario requerido\")\n",
+      "    end()\n",
+      "\n",
+      "    encodeSHA256(p, pass_hash)\n",
+      "\n",
+      "    go_async(\"audit\")\n",
+      "        ormDirect(\"INSERT INTO audit (event) VALUES ('User creation attempt')\", r)\n",
+      "    end()\n",
+      "\n",
+      "    db = avapConnector(\"TOKEN_DB\")\n",
+      "    res_db = db.query(\"INSERT INTO users (name, pass) VALUES ('%s', '%s')\" % (u, pass_hash))\n",
+      "\n",
+      "    addVar(_status, 201)\n",
+      "    addResult(res_db)\n",
+      "    return(res_db)\n",
+      "\n",
+      "Token count: 128\n",
+      "Chunk text: }\n",
+      "\n",
+      "Token count: 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "code = \"\"\"\n",
+    "registerEndpoint(\"/v1/user\", \"POST\", [], \"Crear Usuario\", main, final_res)\n",
+    "\n",
+    "function main(){\n",
+    "    addParam(\"user\", u)\n",
+    "    addParam(\"pass\", p)\n",
+    "    \n",
+    "    if(u, None, \"==\")\n",
+    "        addVar(_status, 400)\n",
+    "        return(\"Usuario requerido\")\n",
+    "    end()\n",
+    "    \n",
+    "    encodeSHA256(p, pass_hash)\n",
+    "    \n",
+    "    go_async(\"audit\")\n",
+    "        ormDirect(\"INSERT INTO audit (event) VALUES ('User creation attempt')\", r)\n",
+    "    end()\n",
+    "    \n",
+    "    db = avapConnector(\"TOKEN_DB\")\n",
+    "    res_db = db.query(\"INSERT INTO users (name, pass) VALUES ('%s', '%s')\" % (u, pass_hash))\n",
+    "    \n",
+    "    addVar(_status, 201)\n",
+    "    addResult(res_db)\n",
+    "    return(res_db)\n",
+    "}\n",
+    "\"\"\"\n",
+    "\n",
+    "chunks = chunker.chunk(code)\n",
+    "\n",
+    "for chunk in chunks:\n",
+    "    print(f\"Chunk text: {chunk.text}\")\n",
+    "    print(f\"Token count: {chunk.token_count}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "66cfd3c7",
+   "metadata": {},
+   "outputs": [],
   "source": []
  }
 ],
--- a/scratches/acano/evaluate_retrieve.ipynb
+++ b/scratches/acano/evaluate_retrieve.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 6,
   "id": "8fed4518",
   "metadata": {},
   "outputs": [
@ -10,23 +10,23 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing faithfulness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import faithfulness\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing faithfulness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import faithfulness\n",
      "  from ragas.metrics import (\n",
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_relevancy from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_relevancy\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing answer_relevancy from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_relevancy\n",
      "  from ragas.metrics import (\n",
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_recall\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing context_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_recall\n",
      "  from ragas.metrics import (\n",
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_precision from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_precision\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing context_precision from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_precision\n",
      "  from ragas.metrics import (\n",
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_entity_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_entity_recall\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing context_entity_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_entity_recall\n",
      "  from ragas.metrics import (\n",
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_similarity from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_similarity\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing answer_similarity from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_similarity\n",
      "  from ragas.metrics import (\n",
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_correctness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_correctness\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing answer_correctness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_correctness\n",
      "  from ragas.metrics import (\n",
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing NonLLMContextRecall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextRecall\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing NonLLMContextRecall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextRecall\n",
      "  from ragas.metrics import (\n",
-      "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing NonLLMContextPrecisionWithReference from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextPrecisionWithReference\n",
+      "/tmp/ipykernel_782131/4243561678.py:18: DeprecationWarning: Importing NonLLMContextPrecisionWithReference from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextPrecisionWithReference\n",
      "  from ragas.metrics import (\n"
     ]
    }
@ -68,13 +68,14 @@
    "    ELASTICSEARCH_INDEX,\n",
    "    OLLAMA_MODEL_NAME,\n",
    "    OLLAMA_EMB_MODEL_NAME,\n",
-    "    RAW_DIR\n",
+    "    RAW_DIR,\n",
+    "    INTERIM_DIR\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "id": "4426d6c0",
   "metadata": {},
   "outputs": [],
@ -105,7 +106,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
   "id": "fe524d14",
   "metadata": {},
   "outputs": [
@ -128,410 +129,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
-   "id": "06103178",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>user_input</th>\n",
-       "      <th>reference_contexts</th>\n",
-       "      <th>reference</th>\n",
-       "      <th>persona_name</th>\n",
-       "      <th>query_style</th>\n",
-       "      <th>query_length</th>\n",
-       "      <th>synthesizer_name</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>How does AVAP handel a ZeroDivisionError when ...</td>\n",
-       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
-       "      <td>In AVAP, when a division by zero occurs—whethe...</td>\n",
-       "      <td>Carlos Menendez</td>\n",
-       "      <td>MISSPELLED</td>\n",
-       "      <td>LONG</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>As a backend developer who is learning AVAP an...</td>\n",
-       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
-       "      <td>In AVAP, control flow structures include condi...</td>\n",
-       "      <td>Carlos Menendez</td>\n",
-       "      <td>PERFECT_GRAMMAR</td>\n",
-       "      <td>LONG</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>hey so in AVAP when i do division by zero what...</td>\n",
-       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
-       "      <td>In AVAP, when you perform a division by zero, ...</td>\n",
-       "      <td>Carlos Medina</td>\n",
-       "      <td>POOR_GRAMMAR</td>\n",
-       "      <td>MEDIUM</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>what happen if file not found when i do import...</td>\n",
-       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
-       "      <td>When an import statement is executed in AVAP, ...</td>\n",
-       "      <td>Carlos Medina</td>\n",
-       "      <td>POOR_GRAMMAR</td>\n",
-       "      <td>SHORT</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>In AVAP, under what circumstances is a TypeErr...</td>\n",
-       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
-       "      <td>In AVAP, a TypeError exception is raised in tw...</td>\n",
-       "      <td>Carlos Menendez</td>\n",
-       "      <td>PERFECT_GRAMMAR</td>\n",
-       "      <td>MEDIUM</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>...</th>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "      <td>...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>95</th>\n",
-       "      <td>How does the data model in AVAP™ compare to Py...</td>\n",
-       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
-       "      <td>The data model in AVAP™ is very similar to Pyt...</td>\n",
-       "      <td>Carlos Menendez</td>\n",
-       "      <td>PERFECT_GRAMMAR</td>\n",
-       "      <td>MEDIUM</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>96</th>\n",
-       "      <td>What data types are available in AVAP™?</td>\n",
-       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
-       "      <td>In AVAP™, the most common data types include i...</td>\n",
-       "      <td>Carlos Medina</td>\n",
-       "      <td>PERFECT_GRAMMAR</td>\n",
-       "      <td>SHORT</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>97</th>\n",
-       "      <td>AVAP strings Unicode</td>\n",
-       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
-       "      <td>In AVAP™, strings (str) represent sequences of...</td>\n",
-       "      <td>Carlos Medina</td>\n",
-       "      <td>WEB_SEARCH_LIKE</td>\n",
-       "      <td>SHORT</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>98</th>\n",
-       "      <td>AVAP data model comparison with Python data ty...</td>\n",
-       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
-       "      <td>The data model in AVAP is similar to Python in...</td>\n",
-       "      <td>Carlos Mendieta</td>\n",
-       "      <td>WEB_SEARCH_LIKE</td>\n",
-       "      <td>MEDIUM</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>99</th>\n",
-       "      <td>AVAP™ data types and data structures overview</td>\n",
-       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
-       "      <td>AVAP™ uses a flexible and dynamic data model s...</td>\n",
-       "      <td>Carlos Mendieta</td>\n",
-       "      <td>WEB_SEARCH_LIKE</td>\n",
-       "      <td>SHORT</td>\n",
-       "      <td>single_hop_specific_query_synthesizer</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>100 rows × 7 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                           user_input  \\\n",
-       "0   How does AVAP handel a ZeroDivisionError when ...   \n",
-       "1   As a backend developer who is learning AVAP an...   \n",
-       "2   hey so in AVAP when i do division by zero what...   \n",
-       "3   what happen if file not found when i do import...   \n",
-       "4   In AVAP, under what circumstances is a TypeErr...   \n",
-       "..                                                ...   \n",
-       "95  How does the data model in AVAP™ compare to Py...   \n",
-       "96            What data types are available in AVAP™?   \n",
-       "97                               AVAP strings Unicode   \n",
-       "98  AVAP data model comparison with Python data ty...   \n",
-       "99      AVAP™ data types and data structures overview   \n",
-       "\n",
-       "                                   reference_contexts  \\\n",
-       "0   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
-       "1   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
-       "2   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
-       "3   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
-       "4   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
-       "..                                                ...   \n",
-       "95  [Introduction\\nThe data model in AVAP™ defines...   \n",
-       "96  [Introduction\\nThe data model in AVAP™ defines...   \n",
-       "97  [Introduction\\nThe data model in AVAP™ defines...   \n",
-       "98  [Introduction\\nThe data model in AVAP™ defines...   \n",
-       "99  [Introduction\\nThe data model in AVAP™ defines...   \n",
-       "\n",
-       "                                            reference     persona_name  \\\n",
-       "0   In AVAP, when a division by zero occurs—whethe...  Carlos Menendez   \n",
-       "1   In AVAP, control flow structures include condi...  Carlos Menendez   \n",
-       "2   In AVAP, when you perform a division by zero, ...    Carlos Medina   \n",
-       "3   When an import statement is executed in AVAP, ...    Carlos Medina   \n",
-       "4   In AVAP, a TypeError exception is raised in tw...  Carlos Menendez   \n",
-       "..                                                ...              ...   \n",
-       "95  The data model in AVAP™ is very similar to Pyt...  Carlos Menendez   \n",
-       "96  In AVAP™, the most common data types include i...    Carlos Medina   \n",
-       "97  In AVAP™, strings (str) represent sequences of...    Carlos Medina   \n",
-       "98  The data model in AVAP is similar to Python in...  Carlos Mendieta   \n",
-       "99  AVAP™ uses a flexible and dynamic data model s...  Carlos Mendieta   \n",
-       "\n",
-       "        query_style query_length                       synthesizer_name  \n",
-       "0        MISSPELLED         LONG  single_hop_specific_query_synthesizer  \n",
-       "1   PERFECT_GRAMMAR         LONG  single_hop_specific_query_synthesizer  \n",
-       "2      POOR_GRAMMAR       MEDIUM  single_hop_specific_query_synthesizer  \n",
-       "3      POOR_GRAMMAR        SHORT  single_hop_specific_query_synthesizer  \n",
-       "4   PERFECT_GRAMMAR       MEDIUM  single_hop_specific_query_synthesizer  \n",
-       "..              ...          ...                                    ...  \n",
-       "95  PERFECT_GRAMMAR       MEDIUM  single_hop_specific_query_synthesizer  \n",
-       "96  PERFECT_GRAMMAR        SHORT  single_hop_specific_query_synthesizer  \n",
-       "97  WEB_SEARCH_LIKE        SHORT  single_hop_specific_query_synthesizer  \n",
-       "98  WEB_SEARCH_LIKE       MEDIUM  single_hop_specific_query_synthesizer  \n",
-       "99  WEB_SEARCH_LIKE        SHORT  single_hop_specific_query_synthesizer  \n",
-       "\n",
-       "[100 rows x 7 columns]"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "synthetic_dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "id": "ab1932b7",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_716860/244266171.py:1: DeprecationWarning: LangchainLLMWrapper is deprecated and will be removed in a future version. Use llm_factory instead: from openai import OpenAI; from ragas.llms import llm_factory; llm = llm_factory('gpt-4o-mini', client=OpenAI(api_key='...'))\n",
-      "  synth = SingleHopSpecificQuerySynthesizer(llm=LangchainLLMWrapper(llm))\n",
-      "/tmp/ipykernel_716860/244266171.py:3: DeprecationWarning: LangchainLLMWrapper is deprecated and will be removed in a future version. Use llm_factory instead: from openai import OpenAI; from ragas.llms import llm_factory; llm = llm_factory('gpt-4o-mini', client=OpenAI(api_key='...'))\n",
-      "  generator = TestsetGenerator(llm=LangchainLLMWrapper(llm), embedding_model=LangchainEmbeddingsWrapper(embeddings))\n",
-      "/tmp/ipykernel_716860/244266171.py:3: DeprecationWarning: LangchainEmbeddingsWrapper is deprecated and will be removed in a future version. Use the modern embedding providers instead: embedding_factory('openai', model='text-embedding-3-small', client=openai_client) or from ragas.embeddings import OpenAIEmbeddings, GoogleEmbeddings, HuggingFaceEmbeddings\n",
-      "  generator = TestsetGenerator(llm=LangchainLLMWrapper(llm), embedding_model=LangchainEmbeddingsWrapper(embeddings))\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8ec6ef79b1964c44b78a75ca539f816b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Applying SummaryExtractor:   0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f583879571cf4c818cbb7321b0839990",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Applying CustomNodeFilter:   0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Node 75603049-8ebb-49dc-9e7d-da37fa927eb9 does not have a summary. Skipping filtering.\n",
-      "Node c2d1e8b0-ca69-47af-9bcd-39cbf8560edb does not have a summary. Skipping filtering.\n",
-      "Node 24c16f65-02fd-4d80-84c6-d7d1a8a2638c does not have a summary. Skipping filtering.\n",
-      "Node a0975db8-14b3-44eb-8aa2-e83274fb55ab does not have a summary. Skipping filtering.\n",
-      "Node 6768ece8-9a13-42b3-9aec-08e828044420 does not have a summary. Skipping filtering.\n",
-      "Node 54719709-293a-49db-86f2-8f697015e16a does not have a summary. Skipping filtering.\n",
-      "Node a049eacb-5a3e-404f-83ea-249061fcae0a does not have a summary. Skipping filtering.\n",
-      "Node eb4ac1be-55ae-487e-936c-ee43513f25e9 does not have a summary. Skipping filtering.\n",
-      "Node baf6b749-0280-46f0-a47b-8fd82373da1b does not have a summary. Skipping filtering.\n",
-      "Node 9caa0b62-10ea-4f19-98b7-5f10b2cbc486 does not have a summary. Skipping filtering.\n",
-      "Node d28505f3-cdd7-44d1-9c45-9741e27e25c3 does not have a summary. Skipping filtering.\n",
-      "Node f9a234cb-1af1-4f06-8d9a-6921c19ffbf5 does not have a summary. Skipping filtering.\n",
-      "Node 4f0b355e-81ca-450c-99e3-8458ebd304c6 does not have a summary. Skipping filtering.\n",
-      "Node 66cf6447-7639-497c-9ae2-e26b0c7443b5 does not have a summary. Skipping filtering.\n",
-      "Node 722bfb38-b24e-483f-9787-253d71716c1e does not have a summary. Skipping filtering.\n",
-      "Node ce76bfcc-8cb3-4de2-87e4-74f10ad5c549 does not have a summary. Skipping filtering.\n",
-      "Node dada2116-28ae-4d7c-a4ad-f8ccc3952eb1 does not have a summary. Skipping filtering.\n",
-      "Node e6f7360d-4309-453a-aab8-d3015d53dd88 does not have a summary. Skipping filtering.\n",
-      "Node a73eb1ba-9609-4ad8-80bc-98d9c4993fcd does not have a summary. Skipping filtering.\n",
-      "Node 004b6ce2-48a7-4bff-9393-67e963ebe7fc does not have a summary. Skipping filtering.\n",
-      "Node 854676ec-e80f-45ef-a84c-08d527b96813 does not have a summary. Skipping filtering.\n",
-      "Node 241a936b-3470-41be-8449-7994f3ba5eee does not have a summary. Skipping filtering.\n",
-      "Node 28f76e87-5e68-4a63-83a8-e7c4addb855a does not have a summary. Skipping filtering.\n",
-      "Node f7e3d432-5073-4004-af6c-683cc7e7a600 does not have a summary. Skipping filtering.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a3cf82e356d7485fa6ffa54b131d6a18",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Applying EmbeddingExtractor:   0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b14be1e6a8e74d9592860377b5fa0044",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Applying ThemesExtractor:   0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "65089eea206341f290cda033732df991",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Applying NERExtractor:   0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3120af625643421eafc48c78fce57d8d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Applying CosineSimilarityBuilder:   0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6a8e81dbde254d6e82d76f3752e211d2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Applying OverlapScoreBuilder:   0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "59ca74c675f14067a4a665d56b4e29ba",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fa5aee3db9674f6eb50fef7214cadd92",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating Scenarios:   0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "63030d2b67984b838c055b29d0443639",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating Samples:   0%|          | 0/100 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
   "source": [
    "synth = SingleHopSpecificQuerySynthesizer(llm=LangchainLLMWrapper(llm))\n",
    "\n",
@ -544,6 +145,16 @@
    "synthetic_dataset = synthetic_dataset.to_pandas()"
   ]
  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "d15cea12",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "synthetic_dataset.to_csv(INTERIM_DIR / \"retrieve_eval_results/synthetic_dataset.csv\", index=False)"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 5,
@ -976,13 +587,297 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
   "id": "350755fd",
   "metadata": {},
   "outputs": [],
   "source": [
-    "result_df.to_csv(\"/home/acano/PycharmProjects/assistance-engine/data/interim/embedding_eval_results/retrieve_eval_results/ragas_eval.csv\", index=False)"
+    "result_df.to_csv(INTERIM_DIR + \"retrieve_eval_results/ragas_eval.csv\", index=False)"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "1ff60103",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>user_input</th>\n",
+       "      <th>reference_contexts</th>\n",
+       "      <th>reference</th>\n",
+       "      <th>persona_name</th>\n",
+       "      <th>query_style</th>\n",
+       "      <th>query_length</th>\n",
+       "      <th>synthesizer_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Hey, I'm trying to understand how AVAP handels...</td>\n",
+       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
+       "      <td>In AVAP, a ZeroDivisionError is raised in two ...</td>\n",
+       "      <td>Carlos Medina</td>\n",
+       "      <td>MISSPELLED</td>\n",
+       "      <td>LONG</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>How AVAP handle name resolution different from...</td>\n",
+       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
+       "      <td>In AVAP, when a name is used in a code block, ...</td>\n",
+       "      <td>Carlos Medina</td>\n",
+       "      <td>POOR_GRAMMAR</td>\n",
+       "      <td>MEDIUM</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>How does AVAP handle name resoltuion and scopi...</td>\n",
+       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
+       "      <td>In AVAP, name resolution works differently fro...</td>\n",
+       "      <td>Carlos Méndez</td>\n",
+       "      <td>MISSPELLED</td>\n",
+       "      <td>MEDIUM</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>AVAP how does import statement work and what a...</td>\n",
+       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
+       "      <td>In AVAP, the import statement is the only way ...</td>\n",
+       "      <td>Carlos Méndez</td>\n",
+       "      <td>WEB_SEARCH_LIKE</td>\n",
+       "      <td>MEDIUM</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>what happen with StopIteration when generator ...</td>\n",
+       "      <td>[Execution Model in AVAP\\n4.1. Structure of a ...</td>\n",
+       "      <td>In generator functions, the return statement i...</td>\n",
+       "      <td>Carlos Méndez</td>\n",
+       "      <td>POOR_GRAMMAR</td>\n",
+       "      <td>SHORT</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>95</th>\n",
+       "      <td>Hey so I been learning AVAP and I wanna know, ...</td>\n",
+       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
+       "      <td>In AVAP™, the data type that uses Unicode is t...</td>\n",
+       "      <td>Carlos Méndez</td>\n",
+       "      <td>POOR_GRAMMAR</td>\n",
+       "      <td>MEDIUM</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>96</th>\n",
+       "      <td>Hey so I been trying to learn AVAP™ and I want...</td>\n",
+       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
+       "      <td>In AVAP™, just like in Python, data types are ...</td>\n",
+       "      <td>Carlos Medina</td>\n",
+       "      <td>POOR_GRAMMAR</td>\n",
+       "      <td>LONG</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>97</th>\n",
+       "      <td>How are Unicde characters related to strings i...</td>\n",
+       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
+       "      <td>In AVAP™, strings (str) represent sequences of...</td>\n",
+       "      <td>Carlos Méndez</td>\n",
+       "      <td>MISSPELLED</td>\n",
+       "      <td>MEDIUM</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>98</th>\n",
+       "      <td>How does the data model in AVAP compare to Pyt...</td>\n",
+       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
+       "      <td>Similar to Python, AVAP uses a flexible and dy...</td>\n",
+       "      <td>Carlos Medina</td>\n",
+       "      <td>PERFECT_GRAMMAR</td>\n",
+       "      <td>SHORT</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>99</th>\n",
+       "      <td>hey so i been learning AVAP™ and i wanna know ...</td>\n",
+       "      <td>[Introduction\\nThe data model in AVAP™ defines...</td>\n",
+       "      <td>In AVAP™, the most common data types include: ...</td>\n",
+       "      <td>Carlos Méndez</td>\n",
+       "      <td>POOR_GRAMMAR</td>\n",
+       "      <td>MEDIUM</td>\n",
+       "      <td>single_hop_specific_query_synthesizer</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>100 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                           user_input  \\\n",
+       "0   Hey, I'm trying to understand how AVAP handels...   \n",
+       "1   How AVAP handle name resolution different from...   \n",
+       "2   How does AVAP handle name resoltuion and scopi...   \n",
+       "3   AVAP how does import statement work and what a...   \n",
+       "4   what happen with StopIteration when generator ...   \n",
+       "..                                                ...   \n",
+       "95  Hey so I been learning AVAP and I wanna know, ...   \n",
+       "96  Hey so I been trying to learn AVAP™ and I want...   \n",
+       "97  How are Unicde characters related to strings i...   \n",
+       "98  How does the data model in AVAP compare to Pyt...   \n",
+       "99  hey so i been learning AVAP™ and i wanna know ...   \n",
+       "\n",
+       "                                   reference_contexts  \\\n",
+       "0   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
+       "1   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
+       "2   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
+       "3   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
+       "4   [Execution Model in AVAP\\n4.1. Structure of a ...   \n",
+       "..                                                ...   \n",
+       "95  [Introduction\\nThe data model in AVAP™ defines...   \n",
+       "96  [Introduction\\nThe data model in AVAP™ defines...   \n",
+       "97  [Introduction\\nThe data model in AVAP™ defines...   \n",
+       "98  [Introduction\\nThe data model in AVAP™ defines...   \n",
+       "99  [Introduction\\nThe data model in AVAP™ defines...   \n",
+       "\n",
+       "                                            reference   persona_name  \\\n",
+       "0   In AVAP, a ZeroDivisionError is raised in two ...  Carlos Medina   \n",
+       "1   In AVAP, when a name is used in a code block, ...  Carlos Medina   \n",
+       "2   In AVAP, name resolution works differently fro...  Carlos Méndez   \n",
+       "3   In AVAP, the import statement is the only way ...  Carlos Méndez   \n",
+       "4   In generator functions, the return statement i...  Carlos Méndez   \n",
+       "..                                                ...            ...   \n",
+       "95  In AVAP™, the data type that uses Unicode is t...  Carlos Méndez   \n",
+       "96  In AVAP™, just like in Python, data types are ...  Carlos Medina   \n",
+       "97  In AVAP™, strings (str) represent sequences of...  Carlos Méndez   \n",
+       "98  Similar to Python, AVAP uses a flexible and dy...  Carlos Medina   \n",
+       "99  In AVAP™, the most common data types include: ...  Carlos Méndez   \n",
+       "\n",
+       "        query_style query_length                       synthesizer_name  \n",
+       "0        MISSPELLED         LONG  single_hop_specific_query_synthesizer  \n",
+       "1      POOR_GRAMMAR       MEDIUM  single_hop_specific_query_synthesizer  \n",
+       "2        MISSPELLED       MEDIUM  single_hop_specific_query_synthesizer  \n",
+       "3   WEB_SEARCH_LIKE       MEDIUM  single_hop_specific_query_synthesizer  \n",
+       "4      POOR_GRAMMAR        SHORT  single_hop_specific_query_synthesizer  \n",
+       "..              ...          ...                                    ...  \n",
+       "95     POOR_GRAMMAR       MEDIUM  single_hop_specific_query_synthesizer  \n",
+       "96     POOR_GRAMMAR         LONG  single_hop_specific_query_synthesizer  \n",
+       "97       MISSPELLED       MEDIUM  single_hop_specific_query_synthesizer  \n",
+       "98  PERFECT_GRAMMAR        SHORT  single_hop_specific_query_synthesizer  \n",
+       "99     POOR_GRAMMAR       MEDIUM  single_hop_specific_query_synthesizer  \n",
+       "\n",
+       "[100 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "synthetic_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "71743384",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "from evidently import Dataset\n",
+    "from evidently import DataDefinition\n",
+    "from evidently.descriptors import *\n",
+    "\n",
+    "from evidently import Report\n",
+    "from evidently.presets import TextEvals\n",
+    "from evidently.metrics import *\n",
+    "from evidently.tests import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "e1ac1a41",
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "ValidationError",
+     "evalue": "1 validation error for OllamaOptions\napi_url\n  field required (type=value_error.missing)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mValidationError\u001b[39m                           Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[24]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m context_based_evals = \u001b[43mDataset\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m      2\u001b[39m \u001b[43m    \u001b[49m\u001b[43msynthetic_dataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m      3\u001b[39m \u001b[43m    \u001b[49m\u001b[43mdata_definition\u001b[49m\u001b[43m=\u001b[49m\u001b[43mDataDefinition\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtext_columns\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser_input\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreference_contexts\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreference\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m      4\u001b[39m \u001b[43m    \u001b[49m\u001b[43mdescriptors\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[43mContextQualityLLMEval\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreference_contexts\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquestion\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser_input\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprovider\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mollama\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mOLLAMA_MODEL_NAME\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m      5\u001b[39m \u001b[43m)\u001b[49m\n\u001b[32m      6\u001b[39m context_based_evals.as_dataframe()\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/core/datasets.py:1271\u001b[39m, in \u001b[36mDataset.from_pandas\u001b[39m\u001b[34m(cls, data, data_definition, descriptors, options, metadata, tags)\u001b[39m\n\u001b[32m   1269\u001b[39m dataset = PandasDataset(data, data_definition, metadata=metadata, tags=tags)\n\u001b[32m   1270\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m descriptors \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1271\u001b[39m     \u001b[43mdataset\u001b[49m\u001b[43m.\u001b[49m\u001b[43madd_descriptors\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdescriptors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1272\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m dataset\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/core/datasets.py:1382\u001b[39m, in \u001b[36mDataset.add_descriptors\u001b[39m\u001b[34m(self, descriptors, options)\u001b[39m\n\u001b[32m   1375\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Add multiple descriptors to the dataset.\u001b[39;00m\n\u001b[32m   1376\u001b[39m \n\u001b[32m   1377\u001b[39m \u001b[33;03mArgs:\u001b[39;00m\n\u001b[32m   1378\u001b[39m \u001b[33;03m* `descriptors`: List of `Descriptor` objects to compute\u001b[39;00m\n\u001b[32m   1379\u001b[39m \u001b[33;03m* `options`: Optional options for descriptor computation\u001b[39;00m\n\u001b[32m   1380\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m   1381\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m descriptor \u001b[38;5;129;01min\u001b[39;00m descriptors:\n\u001b[32m-> \u001b[39m\u001b[32m1382\u001b[39m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43madd_descriptor\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdescriptor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/core/datasets.py:1688\u001b[39m, in \u001b[36mPandasDataset.add_descriptor\u001b[39m\u001b[34m(self, descriptor, options)\u001b[39m\n\u001b[32m   1686\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34madd_descriptor\u001b[39m(\u001b[38;5;28mself\u001b[39m, descriptor: Descriptor, options: AnyOptions = \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m   1687\u001b[39m     descriptor.validate_input(\u001b[38;5;28mself\u001b[39m._data_definition)\n\u001b[32m-> \u001b[39m\u001b[32m1688\u001b[39m     new_columns = \u001b[43mdescriptor\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mOptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mfrom_any_options\u001b[49m\u001b[43m(\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1689\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(new_columns, DatasetColumn):\n\u001b[32m   1690\u001b[39m         new_columns = {descriptor.alias: new_columns}\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/core/datasets.py:1099\u001b[39m, in \u001b[36mFeatureDescriptor.generate_data\u001b[39m\u001b[34m(self, dataset, options)\u001b[39m\n\u001b[32m   1096\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mgenerate_data\u001b[39m(\n\u001b[32m   1097\u001b[39m     \u001b[38;5;28mself\u001b[39m, dataset: \u001b[33m\"\u001b[39m\u001b[33mDataset\u001b[39m\u001b[33m\"\u001b[39m, options: Options\n\u001b[32m   1098\u001b[39m ) -> Union[DatasetColumn, Dict[DisplayName, DatasetColumn]]:\n\u001b[32m-> \u001b[39m\u001b[32m1099\u001b[39m     feature = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mfeature\u001b[49m\u001b[43m.\u001b[49m\u001b[43mgenerate_features_renamed\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1100\u001b[39m \u001b[43m        \u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m.\u001b[49m\u001b[43mas_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1101\u001b[39m \u001b[43m        \u001b[49m\u001b[43mcreate_data_definition\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m.\u001b[49m\u001b[43mas_dataframe\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mColumnMapping\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1102\u001b[39m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1103\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1104\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m {\n\u001b[32m   1105\u001b[39m         col.display_name: \u001b[38;5;28mself\u001b[39m.get_dataset_column(col.name, feature[col.name])\n\u001b[32m   1106\u001b[39m         \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.feature.list_columns()\n\u001b[32m   1107\u001b[39m     }\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/legacy/features/generated_features.py:56\u001b[39m, in \u001b[36mGeneratedFeatures.generate_features_renamed\u001b[39m\u001b[34m(self, data, data_definition, options)\u001b[39m\n\u001b[32m     53\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mgenerate_features_renamed\u001b[39m(\n\u001b[32m     54\u001b[39m     \u001b[38;5;28mself\u001b[39m, data: pd.DataFrame, data_definition: DataDefinition, options: Options\n\u001b[32m     55\u001b[39m ) -> pd.DataFrame:\n\u001b[32m---> \u001b[39m\u001b[32m56\u001b[39m     features = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgenerate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata_definition\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     57\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m features.rename(columns={col: \u001b[38;5;28mself\u001b[39m._create_column_name(col) \u001b[38;5;28;01mfor\u001b[39;00m col \u001b[38;5;129;01min\u001b[39;00m features.columns}).set_index(\n\u001b[32m     58\u001b[39m         data.index\n\u001b[32m     59\u001b[39m     )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/legacy/features/llm_judge.py:54\u001b[39m, in \u001b[36mLLMJudge.generate_features\u001b[39m\u001b[34m(self, data, data_definition, options)\u001b[39m\n\u001b[32m     53\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mgenerate_features\u001b[39m(\u001b[38;5;28mself\u001b[39m, data: pd.DataFrame, data_definition: DataDefinition, options: Options) -> pd.DataFrame:\n\u001b[32m---> \u001b[39m\u001b[32m54\u001b[39m     result: Union[List, Dict] = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mget_llm_wrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m.run_batch_sync(\n\u001b[32m     55\u001b[39m         requests=\u001b[38;5;28mself\u001b[39m.template.iterate_messages(data, \u001b[38;5;28mself\u001b[39m.get_input_columns())\n\u001b[32m     56\u001b[39m     )\n\u001b[32m     57\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, \u001b[38;5;28mlist\u001b[39m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28many\u001b[39m(\u001b[38;5;28misinstance\u001b[39m(o, \u001b[38;5;28mdict\u001b[39m) \u001b[38;5;28;01mfor\u001b[39;00m o \u001b[38;5;129;01min\u001b[39;00m result):\n\u001b[32m     58\u001b[39m         result = {\u001b[38;5;28mself\u001b[39m.display_name \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m.template.get_main_output_column(): result}\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/legacy/features/llm_judge.py:43\u001b[39m, in \u001b[36mLLMJudge.get_llm_wrapper\u001b[39m\u001b[34m(self, options)\u001b[39m\n\u001b[32m     41\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mget_llm_wrapper\u001b[39m(\u001b[38;5;28mself\u001b[39m, options: Options) -> LLMWrapper:\n\u001b[32m     42\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m._llm_wrapper \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m43\u001b[39m         \u001b[38;5;28mself\u001b[39m._llm_wrapper = \u001b[43mget_llm_wrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mprovider\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     44\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m._llm_wrapper\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/llm/utils/wrapper.py:437\u001b[39m, in \u001b[36mget_llm_wrapper\u001b[39m\u001b[34m(provider, model, options)\u001b[39m\n\u001b[32m    435\u001b[39m key = (provider, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[32m    436\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m key \u001b[38;5;129;01min\u001b[39;00m _wrappers:\n\u001b[32m--> \u001b[39m\u001b[32m437\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_wrappers\u001b[49m\u001b[43m[\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m]\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    438\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m find_spec(\u001b[33m\"\u001b[39m\u001b[33mlitellm\u001b[39m\u001b[33m\"\u001b[39m) \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m    439\u001b[39m     litellm_wrapper = get_litellm_wrapper(provider, model, options)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/llm/utils/wrapper.py:583\u001b[39m, in \u001b[36mLiteLLMWrapper.__init__\u001b[39m\u001b[34m(self, model, options)\u001b[39m\n\u001b[32m    581\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, model: \u001b[38;5;28mstr\u001b[39m, options: Options):\n\u001b[32m    582\u001b[39m     \u001b[38;5;28mself\u001b[39m.model = model\n\u001b[32m--> \u001b[39m\u001b[32m583\u001b[39m     \u001b[38;5;28mself\u001b[39m.options: LLMOptions = \u001b[43moptions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__llm_options_type__\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/legacy/options/base.py:51\u001b[39m, in \u001b[36mOptions.get\u001b[39m\u001b[34m(self, option_type)\u001b[39m\n\u001b[32m     49\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(possible_subclass, option_type):\n\u001b[32m     50\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m.custom[possible_subclass]  \u001b[38;5;66;03m# type: ignore[return-value]\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m51\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43moption_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/llm/utils/wrapper.py:472\u001b[39m, in \u001b[36mLLMOptions.__init__\u001b[39m\u001b[34m(self, api_key, rpm_limit, **data)\u001b[39m\n\u001b[32m    465\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Initialize LLM options.\u001b[39;00m\n\u001b[32m    466\u001b[39m \n\u001b[32m    467\u001b[39m \u001b[33;03mArgs:\u001b[39;00m\n\u001b[32m    468\u001b[39m \u001b[33;03m* `api_key`: Optional API key for the provider.\u001b[39;00m\n\u001b[32m    469\u001b[39m \u001b[33;03m* `rpm_limit`: Optional requests per minute limit (backward compatibility).\u001b[39;00m\n\u001b[32m    470\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m    471\u001b[39m \u001b[38;5;28mself\u001b[39m.api_key = SecretStr(api_key) \u001b[38;5;28;01mif\u001b[39;00m api_key \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m472\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[34;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    473\u001b[39m \u001b[38;5;66;03m# backward comp\u001b[39;00m\n\u001b[32m    474\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m rpm_limit \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/evidently/pydantic_utils.py:89\u001b[39m, in \u001b[36mFrozenBaseModel.__init__\u001b[39m\u001b[34m(self, **data)\u001b[39m\n\u001b[32m     88\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, **data: Any):\n\u001b[32m---> \u001b[39m\u001b[32m89\u001b[39m     \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[34;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m__init_values__\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     90\u001b[39m     \u001b[38;5;28;01mfor\u001b[39;00m private_attr \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__private_attributes__:\n\u001b[32m     91\u001b[39m         \u001b[38;5;28;01mif\u001b[39;00m private_attr \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m.__init_values__:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/pydantic/v1/main.py:347\u001b[39m, in \u001b[36mBaseModel.__init__\u001b[39m\u001b[34m(__pydantic_self__, **data)\u001b[39m\n\u001b[32m    345\u001b[39m values, fields_set, validation_error = validate_model(__pydantic_self__.\u001b[34m__class__\u001b[39m, data)\n\u001b[32m    346\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m validation_error:\n\u001b[32m--> \u001b[39m\u001b[32m347\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m validation_error\n\u001b[32m    348\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m    349\u001b[39m     object_setattr(__pydantic_self__, \u001b[33m'\u001b[39m\u001b[33m__dict__\u001b[39m\u001b[33m'\u001b[39m, values)\n",
+      "\u001b[31mValidationError\u001b[39m: 1 validation error for OllamaOptions\napi_url\n  field required (type=value_error.missing)"
+     ]
+    }
+   ],
+   "source": [
+    "context_based_evals = Dataset.from_pandas(\n",
+    "    synthetic_dataset,\n",
+    "    data_definition=DataDefinition(text_columns=[\"user_input\", \"reference_contexts\", \"reference\"]),\n",
+    "    descriptors=[ContextQualityLLMEval(\"reference_contexts\", question=\"user_input\", provider=\"ollama\", model=OLLAMA_MODEL_NAME)]\n",
+    ")\n",
+    "context_based_evals.as_dataframe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2d127ad",
+   "metadata": {},
+   "outputs": [],
+   "source": []
  }
 ],
 "metadata": {
--- a/scratches/acano/langgraph_agent_simple.ipynb
+++ b/scratches/acano/langgraph_agent_simple.ipynb
@ -50,7 +50,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "id": "30edcecc",
   "metadata": {},
   "outputs": [],
@ -67,7 +67,6 @@
    "    provider=\"ollama\",\n",
    "    model=OLLAMA_EMB_MODEL_NAME,\n",
    ")\n",
-    "\n",
    "vector_store = ElasticsearchStore(\n",
    "    es_url=ELASTICSEARCH_LOCAL_URL,\n",
    "    index_name=ELASTICSEARCH_INDEX,\n",
@ -83,15 +82,30 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
   "id": "ad98841b",
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
+     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "Langfuse client is authenticated and ready!\n"
+      "Failed to export span batch code: 404, reason: <!DOCTYPE html><html lang=\"en\"><head><meta charSet=\"utf-8\"/><meta name=\"viewport\" content=\"width=device-width\"/><meta name=\"next-head-count\" content=\"2\"/><link rel=\"preload\" href=\"/_next/static/css/1d1bf8ebf039f876.css\" as=\"style\"/><link rel=\"stylesheet\" href=\"/_next/static/css/1d1bf8ebf039f876.css\" data-n-g=\"\"/><noscript data-n-css=\"\"></noscript><script defer=\"\" nomodule=\"\" src=\"/_next/static/chunks/polyfills-42372ed130431b0a.js\"></script><script src=\"/_next/static/chunks/webpack-fafe7122277f5772.js\" defer=\"\"></script><script src=\"/_next/static/chunks/framework-d2dcb18632b956b2.js\" defer=\"\"></script><script src=\"/_next/static/chunks/main-19694d19b6f6c228.js\" defer=\"\"></script><script src=\"/_next/static/chunks/pages/_app-9e6c56ce79ebaf0a.js\" defer=\"\"></script><script src=\"/_next/static/chunks/pages/_error-881d11b9e6da73d1.js\" defer=\"\"></script><script src=\"/_next/static/Eu5QEyLpa3Avvo-CHkMDG/_buildManifest.js\" defer=\"\"></script><script src=\"/_next/static/Eu5QEyLpa3Avvo-CHkMDG/_ssgManifest.js\" defer=\"\"></script></head><body><div id=\"__next\"><script>!function(){try{var d=document.documentElement,c=d.classList;c.remove('light','dark');var e=localStorage.getItem('theme');if('system'===e||(!e&&true)){var t='(prefers-color-scheme: dark)',m=window.matchMedia(t);if(m.media!==t||m.matches){d.style.colorScheme = 'dark';c.add('dark')}else{d.style.colorScheme = 'light';c.add('light')}}else if(e){c.add(e|| '')}if(e==='light'||e==='dark')d.style.colorScheme=e}catch(e){}}()</script><div class=\"flex min-h-full flex-1 flex-col justify-center py-12 sm:px-6 lg:px-8\" data-sentry-component=\"Spinner\" data-sentry-source-file=\"spinner.tsx\"><div class=\"sm:mx-auto sm:w-full sm:max-w-md\"><img src=\"/icon.svg\" width=\"42\" height=\"42\" alt=\"Langfuse Icon\" class=\"mx-auto motion-safe:animate-spin\" data-sentry-component=\"LangfuseIcon\" data-sentry-source-file=\"LangfuseLogo.tsx\"/><h2 class=\"mt-5 text-center text-2xl font-bold leading-9 tracking-tight text-primary\">Loading<!-- --> ...</h2></div></div></div><script id=\"__NEXT_DATA__\" type=\"application/json\">{\"props\":{\"pageProps\":{\"statusCode\":404}},\"page\":\"/_error\",\"query\":{},\"buildId\":\"Eu5QEyLpa3Avvo-CHkMDG\",\"nextExport\":true,\"isFallback\":false,\"gip\":true,\"locale\":\"en\",\"locales\":[\"en\"],\"defaultLocale\":\"en\",\"scriptLoader\":[]}</script></body></html>\n"
+     ]
+    },
+    {
+     "ename": "ValidationError",
+     "evalue": "1 validation error for ParsingModel[Projects]\n__root__ -> data -> 0 -> metadata\n  field required (type=value_error.missing)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mValidationError\u001b[39m                           Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[43mlangfuse\u001b[49m\u001b[43m.\u001b[49m\u001b[43mauth_check\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[32m      2\u001b[39m     \u001b[38;5;28mprint\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33mLangfuse client is authenticated and ready!\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langfuse/_client/client.py:1702\u001b[39m, in \u001b[36mLangfuse.auth_check\u001b[39m\u001b[34m(self)\u001b[39m\n\u001b[32m   1693\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Check if the provided credentials (public and secret key) are valid.\u001b[39;00m\n\u001b[32m   1694\u001b[39m \n\u001b[32m   1695\u001b[39m \u001b[33;03mRaises:\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m   1699\u001b[39m \u001b[33;03m    This method is blocking. It is discouraged to use it in production code.\u001b[39;00m\n\u001b[32m   1700\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m   1701\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m-> \u001b[39m\u001b[32m1702\u001b[39m     projects = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mapi\u001b[49m\u001b[43m.\u001b[49m\u001b[43mprojects\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1703\u001b[39m     langfuse_logger.debug(\n\u001b[32m   1704\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAuth check successful, found \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(projects.data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m projects\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m   1705\u001b[39m     )\n\u001b[32m   1706\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(projects.data) == \u001b[32m0\u001b[39m:\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/langfuse/api/resources/projects/client.py:65\u001b[39m, in \u001b[36mProjectsClient.get\u001b[39m\u001b[34m(self, request_options)\u001b[39m\n\u001b[32m     63\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m     64\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[32m200\u001b[39m <= _response.status_code < \u001b[32m300\u001b[39m:\n\u001b[32m---> \u001b[39m\u001b[32m65\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpydantic_v1\u001b[49m\u001b[43m.\u001b[49m\u001b[43mparse_obj_as\u001b[49m\u001b[43m(\u001b[49m\u001b[43mProjects\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_response\u001b[49m\u001b[43m.\u001b[49m\u001b[43mjson\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[32m     66\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m _response.status_code == \u001b[32m400\u001b[39m:\n\u001b[32m     67\u001b[39m         \u001b[38;5;28;01mraise\u001b[39;00m Error(pydantic_v1.parse_obj_as(typing.Any, _response.json()))  \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/pydantic/v1/tools.py:38\u001b[39m, in \u001b[36mparse_obj_as\u001b[39m\u001b[34m(type_, obj, type_name)\u001b[39m\n\u001b[32m     36\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mparse_obj_as\u001b[39m(type_: Type[T], obj: Any, *, type_name: Optional[NameFactory] = \u001b[38;5;28;01mNone\u001b[39;00m) -> T:\n\u001b[32m     37\u001b[39m     model_type = _get_parsing_type(type_, type_name=type_name)  \u001b[38;5;66;03m# type: ignore[arg-type]\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m38\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43m__root__\u001b[49m\u001b[43m=\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m.__root__\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/pydantic/v1/main.py:347\u001b[39m, in \u001b[36mBaseModel.__init__\u001b[39m\u001b[34m(__pydantic_self__, **data)\u001b[39m\n\u001b[32m    345\u001b[39m values, fields_set, validation_error = validate_model(__pydantic_self__.\u001b[34m__class__\u001b[39m, data)\n\u001b[32m    346\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m validation_error:\n\u001b[32m--> \u001b[39m\u001b[32m347\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m validation_error\n\u001b[32m    348\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m    349\u001b[39m     object_setattr(__pydantic_self__, \u001b[33m'\u001b[39m\u001b[33m__dict__\u001b[39m\u001b[33m'\u001b[39m, values)\n",
+      "\u001b[31mValidationError\u001b[39m: 1 validation error for ParsingModel[Projects]\n__root__ -> data -> 0 -> metadata\n  field required (type=value_error.missing)"
     ]
    }
   ],
@ -112,7 +126,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
   "id": "5f8c88cf",
   "metadata": {},
   "outputs": [],
@ -125,7 +139,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 6,
   "id": "fd8ed542",
   "metadata": {},
   "outputs": [],
@ -144,7 +158,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 7,
   "id": "f0a21230",
   "metadata": {},
   "outputs": [],
@ -154,7 +168,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
   "id": "f9359747",
   "metadata": {},
   "outputs": [],
@ -193,7 +207,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
   "id": "66ae23f0",
   "metadata": {},
   "outputs": [],
@ -237,7 +251,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "id": "36d0f54e",
   "metadata": {},
   "outputs": [],
@ -275,7 +289,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 11,
   "id": "f073edc9",
   "metadata": {},
   "outputs": [],
@ -295,7 +309,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 12,
   "id": "fae46a58",
   "metadata": {},
   "outputs": [],
@ -318,7 +332,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 13,
   "id": "7f57b543",
   "metadata": {},
   "outputs": [
@ -342,7 +356,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 14,
   "id": "f7a0993f",
   "metadata": {},
   "outputs": [],
@ -368,7 +382,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 15,
   "id": "2fec3fdb",
   "metadata": {},
   "outputs": [
--- a/uv.lock
+++ b/uv.lock
@ -268,7 +268,6 @@ name = "assistance-engine"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
-    { name = "accelerate" },
    { name = "grpcio" },
    { name = "grpcio-reflection" },
    { name = "grpcio-tools" },
@ -289,6 +288,7 @@ dependencies = [

 [package.dev-dependencies]
 dev = [
+    { name = "accelerate" },
    { name = "beir" },
    { name = "evidently" },
    { name = "jupyter" },
@ -305,7 +305,6 @@ dev = [

 [package.metadata]
 requires-dist = [
-    { name = "accelerate", specifier = ">=1.12.0" },
    { name = "grpcio", specifier = ">=1.78.0" },
    { name = "grpcio-reflection", specifier = ">=1.78.0" },
    { name = "grpcio-tools", specifier = ">=1.78.0" },
@ -326,6 +325,7 @@ requires-dist = [

 [package.metadata.requires-dev]
 dev = [
+    { name = "accelerate", specifier = ">=1.12.0" },
    { name = "beir", specifier = ">=2.2.0" },
    { name = "evidently", specifier = ">=0.7.20" },
    { name = "jupyter", specifier = ">=1.1.1" },