{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "8fed4518", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing faithfulness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import faithfulness\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_relevancy from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_relevancy\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_recall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_precision from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_precision\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_entity_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_entity_recall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_similarity from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_similarity\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_correctness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_correctness\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing NonLLMContextRecall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextRecall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing NonLLMContextPrecisionWithReference from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextPrecisionWithReference\n", " from ragas.metrics import (\n" ] } ], "source": [ "import sys\n", "from pathlib import Path\n", "\n", "# Ensure the project root is on the path so `src` is importable\n", "_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n", "if _project_root not in sys.path:\n", " sys.path.insert(0, _project_root)\n", "\n", "from langchain_core.documents import Document\n", "from langchain_classic.chains.retrieval_qa.base import RetrievalQA\n", "from langchain_elasticsearch import ElasticsearchStore\n", "from ragas import evaluate, SingleTurnSample\n", "from ragas.llms import LangchainLLMWrapper\n", "from ragas.embeddings import LangchainEmbeddingsWrapper\n", "from ragas.testset import TestsetGenerator\n", "from ragas.testset.persona import Persona\n", "from ragas.testset.synthesizers.single_hop.specific import SingleHopSpecificQuerySynthesizer\n", "from ragas.metrics import (\n", " faithfulness,\n", " answer_relevancy,\n", " context_recall,\n", " context_precision,\n", " context_entity_recall,\n", " answer_similarity,\n", " answer_correctness,\n", " NonLLMContextRecall,\n", " NonLLMContextPrecisionWithReference\n", ")\n", "\n", "from src.llm_factory import create_chat_model\n", "from src.emb_factory import create_embedding_model\n", "from src.config import (\n", " ELASTICSEARCH_LOCAL_URL,\n", " ELASTICSEARCH_INDEX,\n", " OLLAMA_MODEL_NAME,\n", " OLLAMA_EMB_MODEL_NAME,\n", " RAW_DIR\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "id": "4426d6c0", "metadata": {}, "outputs": [], "source": [ "llm = create_chat_model(\n", " provider=\"bedrock\",\n", " model=\"global.anthropic.claude-opus-4-6-v1\",\n", " temperature=0,\n", ")\n", "embeddings = create_embedding_model(\n", " provider=\"ollama\",\n", " model=OLLAMA_EMB_MODEL_NAME,\n", ")\n", "agent_llm = create_chat_model(\n", " provider=\"ollama\",\n", " model=OLLAMA_MODEL_NAME,\n", " temperature=0,\n", " validate_model_on_init=True,\n", ")\n", "vector_store = ElasticsearchStore(\n", " es_url=ELASTICSEARCH_LOCAL_URL,\n", " index_name=ELASTICSEARCH_INDEX,\n", " embedding=embeddings,\n", " query_field=\"text\",\n", " vector_query_field=\"vector\",\n", ")\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "fe524d14", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loaded 24 documents from /home/acano/PycharmProjects/assistance-engine/data/raw\n" ] } ], "source": [ "docs: list[Document] = []\n", "for txt_file in sorted(RAW_DIR.glob(\"*.txt\")):\n", " text = txt_file.read_text(encoding=\"utf-8\")\n", " docs.append(Document(page_content=text, metadata={\"source\": txt_file.name}))\n", "\n", "print(f\"Loaded {len(docs)} documents from {RAW_DIR}\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "06103178", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | user_input | \n", "reference_contexts | \n", "reference | \n", "persona_name | \n", "query_style | \n", "query_length | \n", "synthesizer_name | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "How does AVAP handel a ZeroDivisionError when ... | \n", "[Execution Model in AVAP\\n4.1. Structure of a ... | \n", "In AVAP, when a division by zero occurs—whethe... | \n", "Carlos Menendez | \n", "MISSPELLED | \n", "LONG | \n", "single_hop_specific_query_synthesizer | \n", "
| 1 | \n", "As a backend developer who is learning AVAP an... | \n", "[Execution Model in AVAP\\n4.1. Structure of a ... | \n", "In AVAP, control flow structures include condi... | \n", "Carlos Menendez | \n", "PERFECT_GRAMMAR | \n", "LONG | \n", "single_hop_specific_query_synthesizer | \n", "
| 2 | \n", "hey so in AVAP when i do division by zero what... | \n", "[Execution Model in AVAP\\n4.1. Structure of a ... | \n", "In AVAP, when you perform a division by zero, ... | \n", "Carlos Medina | \n", "POOR_GRAMMAR | \n", "MEDIUM | \n", "single_hop_specific_query_synthesizer | \n", "
| 3 | \n", "what happen if file not found when i do import... | \n", "[Execution Model in AVAP\\n4.1. Structure of a ... | \n", "When an import statement is executed in AVAP, ... | \n", "Carlos Medina | \n", "POOR_GRAMMAR | \n", "SHORT | \n", "single_hop_specific_query_synthesizer | \n", "
| 4 | \n", "In AVAP, under what circumstances is a TypeErr... | \n", "[Execution Model in AVAP\\n4.1. Structure of a ... | \n", "In AVAP, a TypeError exception is raised in tw... | \n", "Carlos Menendez | \n", "PERFECT_GRAMMAR | \n", "MEDIUM | \n", "single_hop_specific_query_synthesizer | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 95 | \n", "How does the data model in AVAP™ compare to Py... | \n", "[Introduction\\nThe data model in AVAP™ defines... | \n", "The data model in AVAP™ is very similar to Pyt... | \n", "Carlos Menendez | \n", "PERFECT_GRAMMAR | \n", "MEDIUM | \n", "single_hop_specific_query_synthesizer | \n", "
| 96 | \n", "What data types are available in AVAP™? | \n", "[Introduction\\nThe data model in AVAP™ defines... | \n", "In AVAP™, the most common data types include i... | \n", "Carlos Medina | \n", "PERFECT_GRAMMAR | \n", "SHORT | \n", "single_hop_specific_query_synthesizer | \n", "
| 97 | \n", "AVAP strings Unicode | \n", "[Introduction\\nThe data model in AVAP™ defines... | \n", "In AVAP™, strings (str) represent sequences of... | \n", "Carlos Medina | \n", "WEB_SEARCH_LIKE | \n", "SHORT | \n", "single_hop_specific_query_synthesizer | \n", "
| 98 | \n", "AVAP data model comparison with Python data ty... | \n", "[Introduction\\nThe data model in AVAP™ defines... | \n", "The data model in AVAP is similar to Python in... | \n", "Carlos Mendieta | \n", "WEB_SEARCH_LIKE | \n", "MEDIUM | \n", "single_hop_specific_query_synthesizer | \n", "
| 99 | \n", "AVAP™ data types and data structures overview | \n", "[Introduction\\nThe data model in AVAP™ defines... | \n", "AVAP™ uses a flexible and dynamic data model s... | \n", "Carlos Mendieta | \n", "WEB_SEARCH_LIKE | \n", "SHORT | \n", "single_hop_specific_query_synthesizer | \n", "
100 rows × 7 columns
\n", "| \n", " | user_input | \n", "retrieved_contexts | \n", "response | \n", "reference | \n", "faithfulness | \n", "answer_relevancy | \n", "context_precision | \n", "context_recall | \n", "context_entity_recall | \n", "answer_similarity | \n", "answer_correctness | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "How does AVAP handel a ZeroDivisionError when ... | \n", "[Execution Model in AVAP 4.1. Structure of a P... | \n", "AVAP (Advanced Virtual Application Platform) i... | \n", "In AVAP, when a division by zero occurs—whethe... | \n", "0.083333 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.300000 | \n", "0.833670 | \n", "0.363590 | \n", "
| 1 | \n", "As a backend developer who is learning AVAP an... | \n", "[SECTION III: Control Logic and Decision Struc... | \n", "I can provide information on the if statement ... | \n", "In AVAP, control flow structures include condi... | \n", "0.904762 | \n", "0.837564 | \n", "1.000000 | \n", "0.454545 | \n", "0.157895 | \n", "0.809311 | \n", "0.531596 | \n", "
| 2 | \n", "hey so in AVAP when i do division by zero what... | \n", "[Execution Model in AVAP 4.1. Structure of a P... | \n", "In AVAP, when a division operation is attempte... | \n", "In AVAP, when you perform a division by zero, ... | \n", "0.312500 | \n", "0.892255 | \n", "1.000000 | \n", "1.000000 | \n", "0.200000 | \n", "0.923880 | \n", "0.472905 | \n", "
| 3 | \n", "what happen if file not found when i do import... | \n", "[Execution Model in AVAP 4.1. Structure of a P... | \n", "In AVAP, the equivalent of a `FileNotFoundErro... | \n", "When an import statement is executed in AVAP, ... | \n", "0.142857 | \n", "0.912031 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.642044 | \n", "0.248746 | \n", "
| 4 | \n", "In AVAP, under what circumstances is a TypeErr... | \n", "[Execution Model in AVAP 4.1. Structure of a P... | \n", "I don't have information on the specific circu... | \n", "In AVAP, a TypeError exception is raised in tw... | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.806848 | \n", "0.201712 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 95 | \n", "How does the data model in AVAP™ compare to Py... | \n", "[Introduction The data model in AVAP™ defines ... | \n", "AVAP (Advanced Vector Analysis Platform) is a ... | \n", "The data model in AVAP™ is very similar to Pyt... | \n", "0.289474 | \n", "0.925546 | \n", "0.833333 | \n", "0.800000 | \n", "0.500000 | \n", "0.736816 | \n", "0.422299 | \n", "
| 96 | \n", "What data types are available in AVAP™? | \n", "[Chapter 5: Data Types In this chapter, we wil... | \n", "I don't know the specific details about the da... | \n", "In AVAP™, the most common data types include i... | \n", "0.818182 | \n", "0.000000 | \n", "0.500000 | \n", "1.000000 | \n", "0.500000 | \n", "0.856402 | \n", "0.658545 | \n", "
| 97 | \n", "AVAP strings Unicode | \n", "[Chapter 2: Notation in AVAP™ Introduction Not... | \n", "AVAP (African Vertebrate Anatomy Project) uses... | \n", "In AVAP™, strings (str) represent sequences of... | \n", "0.000000 | \n", "0.918740 | \n", "0.000000 | \n", "0.000000 | \n", "0.500000 | \n", "0.686281 | \n", "0.271570 | \n", "
| 98 | \n", "AVAP data model comparison with Python data ty... | \n", "[Introduction The data model in AVAP™ defines ... | \n", "Here's a comparison of the AVAP data model wit... | \n", "The data model in AVAP is similar to Python in... | \n", "0.343750 | \n", "0.954994 | \n", "0.833333 | \n", "1.000000 | \n", "0.555556 | \n", "0.824449 | \n", "0.587930 | \n", "
| 99 | \n", "AVAP™ data types and data structures overview | \n", "[Introduction The data model in AVAP™ defines ... | \n", "AVAP (Advanced Visual Analytics Platform) is a... | \n", "AVAP™ uses a flexible and dynamic data model s... | \n", "0.000000 | \n", "0.855719 | \n", "1.000000 | \n", "1.000000 | \n", "0.100000 | \n", "0.856107 | \n", "0.323783 | \n", "
100 rows × 11 columns
\n", "