{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "8fed4518", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing faithfulness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import faithfulness\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_relevancy from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_relevancy\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_recall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_precision from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_precision\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_entity_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_entity_recall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_similarity from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_similarity\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_correctness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_correctness\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing NonLLMContextRecall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextRecall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing NonLLMContextPrecisionWithReference from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextPrecisionWithReference\n", " from ragas.metrics import (\n" ] } ], "source": [ "import sys\n", "from pathlib import Path\n", "\n", "# Ensure the project root is on the path so `src` is importable\n", "_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n", "if _project_root not in sys.path:\n", " sys.path.insert(0, _project_root)\n", "\n", "from langchain_core.documents import Document\n", "from langchain_classic.chains.retrieval_qa.base import RetrievalQA\n", "from langchain_elasticsearch import ElasticsearchStore\n", "from ragas import evaluate, SingleTurnSample\n", "from ragas.llms import LangchainLLMWrapper\n", "from ragas.embeddings import LangchainEmbeddingsWrapper\n", "from ragas.testset import TestsetGenerator\n", "from ragas.testset.persona import Persona\n", "from ragas.testset.synthesizers.single_hop.specific import SingleHopSpecificQuerySynthesizer\n", "from ragas.metrics import (\n", " faithfulness,\n", " answer_relevancy,\n", " context_recall,\n", " context_precision,\n", " context_entity_recall,\n", " answer_similarity,\n", " answer_correctness,\n", " NonLLMContextRecall,\n", " NonLLMContextPrecisionWithReference\n", ")\n", "\n", "from src.llm_factory import create_chat_model\n", "from src.emb_factory import create_embedding_model\n", "from src.config import (\n", " ELASTICSEARCH_LOCAL_URL,\n", " ELASTICSEARCH_INDEX,\n", " OLLAMA_MODEL_NAME,\n", " OLLAMA_EMB_MODEL_NAME,\n", " RAW_DIR\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "id": "4426d6c0", "metadata": {}, "outputs": [], "source": [ "llm = create_chat_model(\n", " provider=\"bedrock\",\n", " model=\"global.anthropic.claude-opus-4-6-v1\",\n", " temperature=0,\n", ")\n", "embeddings = create_embedding_model(\n", " provider=\"ollama\",\n", " model=OLLAMA_EMB_MODEL_NAME,\n", ")\n", "agent_llm = create_chat_model(\n", " provider=\"ollama\",\n", " model=OLLAMA_MODEL_NAME,\n", " temperature=0,\n", " validate_model_on_init=True,\n", ")\n", "vector_store = ElasticsearchStore(\n", " es_url=ELASTICSEARCH_LOCAL_URL,\n", " index_name=ELASTICSEARCH_INDEX,\n", " embedding=embeddings,\n", " query_field=\"text\",\n", " vector_query_field=\"vector\",\n", ")\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "fe524d14", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loaded 24 documents from /home/acano/PycharmProjects/assistance-engine/data/raw\n" ] } ], "source": [ "docs: list[Document] = []\n", "for txt_file in sorted(RAW_DIR.glob(\"*.txt\")):\n", " text = txt_file.read_text(encoding=\"utf-8\")\n", " docs.append(Document(page_content=text, metadata={\"source\": txt_file.name}))\n", "\n", "print(f\"Loaded {len(docs)} documents from {RAW_DIR}\")" ] }, { "cell_type": "code", "execution_count": 15, "id": "06103178", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_inputreference_contextsreferencepersona_namequery_stylequery_lengthsynthesizer_name
0How does AVAP handel a ZeroDivisionError when ...[Execution Model in AVAP\\n4.1. Structure of a ...In AVAP, when a division by zero occurs—whethe...Carlos MenendezMISSPELLEDLONGsingle_hop_specific_query_synthesizer
1As a backend developer who is learning AVAP an...[Execution Model in AVAP\\n4.1. Structure of a ...In AVAP, control flow structures include condi...Carlos MenendezPERFECT_GRAMMARLONGsingle_hop_specific_query_synthesizer
2hey so in AVAP when i do division by zero what...[Execution Model in AVAP\\n4.1. Structure of a ...In AVAP, when you perform a division by zero, ...Carlos MedinaPOOR_GRAMMARMEDIUMsingle_hop_specific_query_synthesizer
3what happen if file not found when i do import...[Execution Model in AVAP\\n4.1. Structure of a ...When an import statement is executed in AVAP, ...Carlos MedinaPOOR_GRAMMARSHORTsingle_hop_specific_query_synthesizer
4In AVAP, under what circumstances is a TypeErr...[Execution Model in AVAP\\n4.1. Structure of a ...In AVAP, a TypeError exception is raised in tw...Carlos MenendezPERFECT_GRAMMARMEDIUMsingle_hop_specific_query_synthesizer
........................
95How does the data model in AVAP™ compare to Py...[Introduction\\nThe data model in AVAP™ defines...The data model in AVAP™ is very similar to Pyt...Carlos MenendezPERFECT_GRAMMARMEDIUMsingle_hop_specific_query_synthesizer
96What data types are available in AVAP™?[Introduction\\nThe data model in AVAP™ defines...In AVAP™, the most common data types include i...Carlos MedinaPERFECT_GRAMMARSHORTsingle_hop_specific_query_synthesizer
97AVAP strings Unicode[Introduction\\nThe data model in AVAP™ defines...In AVAP™, strings (str) represent sequences of...Carlos MedinaWEB_SEARCH_LIKESHORTsingle_hop_specific_query_synthesizer
98AVAP data model comparison with Python data ty...[Introduction\\nThe data model in AVAP™ defines...The data model in AVAP is similar to Python in...Carlos MendietaWEB_SEARCH_LIKEMEDIUMsingle_hop_specific_query_synthesizer
99AVAP™ data types and data structures overview[Introduction\\nThe data model in AVAP™ defines...AVAP™ uses a flexible and dynamic data model s...Carlos MendietaWEB_SEARCH_LIKESHORTsingle_hop_specific_query_synthesizer
\n", "

100 rows × 7 columns

\n", "
" ], "text/plain": [ " user_input \\\n", "0 How does AVAP handel a ZeroDivisionError when ... \n", "1 As a backend developer who is learning AVAP an... \n", "2 hey so in AVAP when i do division by zero what... \n", "3 what happen if file not found when i do import... \n", "4 In AVAP, under what circumstances is a TypeErr... \n", ".. ... \n", "95 How does the data model in AVAP™ compare to Py... \n", "96 What data types are available in AVAP™? \n", "97 AVAP strings Unicode \n", "98 AVAP data model comparison with Python data ty... \n", "99 AVAP™ data types and data structures overview \n", "\n", " reference_contexts \\\n", "0 [Execution Model in AVAP\\n4.1. Structure of a ... \n", "1 [Execution Model in AVAP\\n4.1. Structure of a ... \n", "2 [Execution Model in AVAP\\n4.1. Structure of a ... \n", "3 [Execution Model in AVAP\\n4.1. Structure of a ... \n", "4 [Execution Model in AVAP\\n4.1. Structure of a ... \n", ".. ... \n", "95 [Introduction\\nThe data model in AVAP™ defines... \n", "96 [Introduction\\nThe data model in AVAP™ defines... \n", "97 [Introduction\\nThe data model in AVAP™ defines... \n", "98 [Introduction\\nThe data model in AVAP™ defines... \n", "99 [Introduction\\nThe data model in AVAP™ defines... \n", "\n", " reference persona_name \\\n", "0 In AVAP, when a division by zero occurs—whethe... Carlos Menendez \n", "1 In AVAP, control flow structures include condi... Carlos Menendez \n", "2 In AVAP, when you perform a division by zero, ... Carlos Medina \n", "3 When an import statement is executed in AVAP, ... Carlos Medina \n", "4 In AVAP, a TypeError exception is raised in tw... Carlos Menendez \n", ".. ... ... \n", "95 The data model in AVAP™ is very similar to Pyt... Carlos Menendez \n", "96 In AVAP™, the most common data types include i... Carlos Medina \n", "97 In AVAP™, strings (str) represent sequences of... Carlos Medina \n", "98 The data model in AVAP is similar to Python in... Carlos Mendieta \n", "99 AVAP™ uses a flexible and dynamic data model s... Carlos Mendieta \n", "\n", " query_style query_length synthesizer_name \n", "0 MISSPELLED LONG single_hop_specific_query_synthesizer \n", "1 PERFECT_GRAMMAR LONG single_hop_specific_query_synthesizer \n", "2 POOR_GRAMMAR MEDIUM single_hop_specific_query_synthesizer \n", "3 POOR_GRAMMAR SHORT single_hop_specific_query_synthesizer \n", "4 PERFECT_GRAMMAR MEDIUM single_hop_specific_query_synthesizer \n", ".. ... ... ... \n", "95 PERFECT_GRAMMAR MEDIUM single_hop_specific_query_synthesizer \n", "96 PERFECT_GRAMMAR SHORT single_hop_specific_query_synthesizer \n", "97 WEB_SEARCH_LIKE SHORT single_hop_specific_query_synthesizer \n", "98 WEB_SEARCH_LIKE MEDIUM single_hop_specific_query_synthesizer \n", "99 WEB_SEARCH_LIKE SHORT single_hop_specific_query_synthesizer \n", "\n", "[100 rows x 7 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "synthetic_dataset" ] }, { "cell_type": "code", "execution_count": 4, "id": "ab1932b7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_716860/244266171.py:1: DeprecationWarning: LangchainLLMWrapper is deprecated and will be removed in a future version. Use llm_factory instead: from openai import OpenAI; from ragas.llms import llm_factory; llm = llm_factory('gpt-4o-mini', client=OpenAI(api_key='...'))\n", " synth = SingleHopSpecificQuerySynthesizer(llm=LangchainLLMWrapper(llm))\n", "/tmp/ipykernel_716860/244266171.py:3: DeprecationWarning: LangchainLLMWrapper is deprecated and will be removed in a future version. Use llm_factory instead: from openai import OpenAI; from ragas.llms import llm_factory; llm = llm_factory('gpt-4o-mini', client=OpenAI(api_key='...'))\n", " generator = TestsetGenerator(llm=LangchainLLMWrapper(llm), embedding_model=LangchainEmbeddingsWrapper(embeddings))\n", "/tmp/ipykernel_716860/244266171.py:3: DeprecationWarning: LangchainEmbeddingsWrapper is deprecated and will be removed in a future version. Use the modern embedding providers instead: embedding_factory('openai', model='text-embedding-3-small', client=openai_client) or from ragas.embeddings import OpenAIEmbeddings, GoogleEmbeddings, HuggingFaceEmbeddings\n", " generator = TestsetGenerator(llm=LangchainLLMWrapper(llm), embedding_model=LangchainEmbeddingsWrapper(embeddings))\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8ec6ef79b1964c44b78a75ca539f816b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Applying SummaryExtractor: 0%| | 0/24 [00:00\n", "Traceback (most recent call last):\n", " File \"/home/acano/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/asyncio/events.py\", line 84, in _run\n", " self._context.run(self._callback, *self._args)\n", "RuntimeError: cannot enter context: <_contextvars.Context object at 0x74fe3aa80780> is already entered\n", "Task was destroyed but it is pending!\n", "task: .run_in_context() done, defined at /home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/ipykernel/utils.py:57> wait_for= cb=[Task.__wakeup()]> cb=[ZMQStream._run_callback.._log_error() at /home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/zmq/eventloop/zmqstream.py:563]>\n", "/home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/pydantic/json_schema.py:335: RuntimeWarning: coroutine 'Kernel.shell_main' was never awaited\n", " mapping[key] = getattr(self, method_name)\n", "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n", "Task was destroyed but it is pending!\n", "task: cb=[Task.__wakeup()]>\n", "Exception in callback Task.__step()\n", "handle: \n", "Traceback (most recent call last):\n", " File \"/home/acano/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/asyncio/events.py\", line 84, in _run\n", " self._context.run(self._callback, *self._args)\n", "RuntimeError: cannot enter context: <_contextvars.Context object at 0x74fe3aa80780> is already entered\n", "Task was destroyed but it is pending!\n", "task: .run_in_context() done, defined at /home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/ipykernel/utils.py:57> wait_for= cb=[Task.__wakeup()]> cb=[ZMQStream._run_callback.._log_error() at /home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/zmq/eventloop/zmqstream.py:563]>\n", "/home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/pydantic/main.py:716: RuntimeWarning: coroutine 'Kernel.shell_main' was never awaited\n", " return cls.__pydantic_validator__.validate_python(\n", "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n", "Task was destroyed but it is pending!\n", "task: cb=[Task.__wakeup()]>\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_inputretrieved_contextsresponsereferencefaithfulnessanswer_relevancycontext_precisioncontext_recallcontext_entity_recallanswer_similarityanswer_correctness
0How does AVAP handel a ZeroDivisionError when ...[Execution Model in AVAP 4.1. Structure of a P...AVAP (Advanced Virtual Application Platform) i...In AVAP, when a division by zero occurs—whethe...0.0833330.0000001.0000001.0000000.3000000.8336700.363590
1As a backend developer who is learning AVAP an...[SECTION III: Control Logic and Decision Struc...I can provide information on the if statement ...In AVAP, control flow structures include condi...0.9047620.8375641.0000000.4545450.1578950.8093110.531596
2hey so in AVAP when i do division by zero what...[Execution Model in AVAP 4.1. Structure of a P...In AVAP, when a division operation is attempte...In AVAP, when you perform a division by zero, ...0.3125000.8922551.0000001.0000000.2000000.9238800.472905
3what happen if file not found when i do import...[Execution Model in AVAP 4.1. Structure of a P...In AVAP, the equivalent of a `FileNotFoundErro...When an import statement is executed in AVAP, ...0.1428570.9120311.0000001.0000001.0000000.6420440.248746
4In AVAP, under what circumstances is a TypeErr...[Execution Model in AVAP 4.1. Structure of a P...I don't have information on the specific circu...In AVAP, a TypeError exception is raised in tw...0.0000000.0000001.0000001.0000001.0000000.8068480.201712
....................................
95How does the data model in AVAP™ compare to Py...[Introduction The data model in AVAP™ defines ...AVAP (Advanced Vector Analysis Platform) is a ...The data model in AVAP™ is very similar to Pyt...0.2894740.9255460.8333330.8000000.5000000.7368160.422299
96What data types are available in AVAP™?[Chapter 5: Data Types In this chapter, we wil...I don't know the specific details about the da...In AVAP™, the most common data types include i...0.8181820.0000000.5000001.0000000.5000000.8564020.658545
97AVAP strings Unicode[Chapter 2: Notation in AVAP™ Introduction Not...AVAP (African Vertebrate Anatomy Project) uses...In AVAP™, strings (str) represent sequences of...0.0000000.9187400.0000000.0000000.5000000.6862810.271570
98AVAP data model comparison with Python data ty...[Introduction The data model in AVAP™ defines ...Here's a comparison of the AVAP data model wit...The data model in AVAP is similar to Python in...0.3437500.9549940.8333331.0000000.5555560.8244490.587930
99AVAP™ data types and data structures overview[Introduction The data model in AVAP™ defines ...AVAP (Advanced Visual Analytics Platform) is a...AVAP™ uses a flexible and dynamic data model s...0.0000000.8557191.0000001.0000000.1000000.8561070.323783
\n", "

100 rows × 11 columns

\n", "
" ], "text/plain": [ " user_input \\\n", "0 How does AVAP handel a ZeroDivisionError when ... \n", "1 As a backend developer who is learning AVAP an... \n", "2 hey so in AVAP when i do division by zero what... \n", "3 what happen if file not found when i do import... \n", "4 In AVAP, under what circumstances is a TypeErr... \n", ".. ... \n", "95 How does the data model in AVAP™ compare to Py... \n", "96 What data types are available in AVAP™? \n", "97 AVAP strings Unicode \n", "98 AVAP data model comparison with Python data ty... \n", "99 AVAP™ data types and data structures overview \n", "\n", " retrieved_contexts \\\n", "0 [Execution Model in AVAP 4.1. Structure of a P... \n", "1 [SECTION III: Control Logic and Decision Struc... \n", "2 [Execution Model in AVAP 4.1. Structure of a P... \n", "3 [Execution Model in AVAP 4.1. Structure of a P... \n", "4 [Execution Model in AVAP 4.1. Structure of a P... \n", ".. ... \n", "95 [Introduction The data model in AVAP™ defines ... \n", "96 [Chapter 5: Data Types In this chapter, we wil... \n", "97 [Chapter 2: Notation in AVAP™ Introduction Not... \n", "98 [Introduction The data model in AVAP™ defines ... \n", "99 [Introduction The data model in AVAP™ defines ... \n", "\n", " response \\\n", "0 AVAP (Advanced Virtual Application Platform) i... \n", "1 I can provide information on the if statement ... \n", "2 In AVAP, when a division operation is attempte... \n", "3 In AVAP, the equivalent of a `FileNotFoundErro... \n", "4 I don't have information on the specific circu... \n", ".. ... \n", "95 AVAP (Advanced Vector Analysis Platform) is a ... \n", "96 I don't know the specific details about the da... \n", "97 AVAP (African Vertebrate Anatomy Project) uses... \n", "98 Here's a comparison of the AVAP data model wit... \n", "99 AVAP (Advanced Visual Analytics Platform) is a... \n", "\n", " reference faithfulness \\\n", "0 In AVAP, when a division by zero occurs—whethe... 0.083333 \n", "1 In AVAP, control flow structures include condi... 0.904762 \n", "2 In AVAP, when you perform a division by zero, ... 0.312500 \n", "3 When an import statement is executed in AVAP, ... 0.142857 \n", "4 In AVAP, a TypeError exception is raised in tw... 0.000000 \n", ".. ... ... \n", "95 The data model in AVAP™ is very similar to Pyt... 0.289474 \n", "96 In AVAP™, the most common data types include i... 0.818182 \n", "97 In AVAP™, strings (str) represent sequences of... 0.000000 \n", "98 The data model in AVAP is similar to Python in... 0.343750 \n", "99 AVAP™ uses a flexible and dynamic data model s... 0.000000 \n", "\n", " answer_relevancy context_precision context_recall \\\n", "0 0.000000 1.000000 1.000000 \n", "1 0.837564 1.000000 0.454545 \n", "2 0.892255 1.000000 1.000000 \n", "3 0.912031 1.000000 1.000000 \n", "4 0.000000 1.000000 1.000000 \n", ".. ... ... ... \n", "95 0.925546 0.833333 0.800000 \n", "96 0.000000 0.500000 1.000000 \n", "97 0.918740 0.000000 0.000000 \n", "98 0.954994 0.833333 1.000000 \n", "99 0.855719 1.000000 1.000000 \n", "\n", " context_entity_recall answer_similarity answer_correctness \n", "0 0.300000 0.833670 0.363590 \n", "1 0.157895 0.809311 0.531596 \n", "2 0.200000 0.923880 0.472905 \n", "3 1.000000 0.642044 0.248746 \n", "4 1.000000 0.806848 0.201712 \n", ".. ... ... ... \n", "95 0.500000 0.736816 0.422299 \n", "96 0.500000 0.856402 0.658545 \n", "97 0.500000 0.686281 0.271570 \n", "98 0.555556 0.824449 0.587930 \n", "99 0.100000 0.856107 0.323783 \n", "\n", "[100 rows x 11 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics = [\n", " faithfulness,\n", " answer_relevancy,\n", " context_precision,\n", " context_recall,\n", " context_entity_recall,\n", " answer_similarity,\n", " answer_correctness\n", "]\n", "\n", "result = evaluate(\n", " dataset=dataset, \n", " metrics=metrics,\n", " llm=llm,\n", " embeddings=embeddings,\n", ")\n", "\n", "result_df = result.to_pandas()\n", "result_df" ] }, { "cell_type": "code", "execution_count": 14, "id": "20c3fa64", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "faithfulness 0.254643\n", "answer_relevancy 0.609250\n", "context_precision 0.862500\n", "context_recall 0.906242\n", "context_entity_recall 0.354178\n", "answer_similarity 0.781973\n", "answer_correctness 0.359654\n", "dtype: float64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result_df.mean(numeric_only=True)" ] }, { "cell_type": "code", "execution_count": 16, "id": "350755fd", "metadata": {}, "outputs": [], "source": [ "result_df.to_csv(\"/home/acano/PycharmProjects/assistance-engine/data/interim/embedding_eval_results/retrieve_eval_results/ragas_eval.csv\", index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "assistance-engine", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }