{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "8fed4518", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing faithfulness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import faithfulness\n", " from ragas.metrics import (\n", "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing answer_relevancy from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_relevancy\n", " from ragas.metrics import (\n", "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing context_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_recall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing context_precision from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_precision\n", " from ragas.metrics import (\n", "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing context_entity_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_entity_recall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing answer_similarity from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_similarity\n", " from ragas.metrics import (\n", "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing answer_correctness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_correctness\n", " from ragas.metrics import (\n", "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing NonLLMContextRecall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextRecall\n", " from ragas.metrics import (\n", "/tmp/ipykernel_207610/259165526.py:21: DeprecationWarning: Importing NonLLMContextPrecisionWithReference from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextPrecisionWithReference\n", " from ragas.metrics import (\n" ] } ], "source": [ "import sys\n", "from pathlib import Path\n", "\n", "# Ensure the project root is on the path so `src` is importable\n", "_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n", "if _project_root not in sys.path:\n", " sys.path.insert(0, _project_root)\n", "\n", "import pandas as pd\n", "from datasets import Dataset\n", "from langchain_core.documents import Document\n", "from langchain_classic.chains.retrieval_qa.base import RetrievalQA\n", "from langchain_elasticsearch import ElasticsearchStore\n", "from langchain_core.messages import HumanMessage\n", "from ragas import evaluate, SingleTurnSample\n", "from ragas.llms import LangchainLLMWrapper\n", "from ragas.embeddings import LangchainEmbeddingsWrapper\n", "from ragas.testset import TestsetGenerator\n", "from ragas.testset.persona import Persona\n", "from ragas.testset.synthesizers.single_hop.specific import SingleHopSpecificQuerySynthesizer\n", "from ragas.metrics import (\n", " faithfulness,\n", " answer_relevancy,\n", " context_recall,\n", " context_precision,\n", " context_entity_recall,\n", " answer_similarity,\n", " answer_correctness,\n", " NonLLMContextRecall,\n", " NonLLMContextPrecisionWithReference\n", ")\n", "\n", "from src.utils.llm_factory import create_chat_model\n", "from src.utils.emb_factory import create_embedding_model\n", "from src.config import (\n", " ELASTICSEARCH_LOCAL_URL,\n", " ELASTICSEARCH_INDEX,\n", " OLLAMA_MODEL_NAME,\n", " OLLAMA_EMB_MODEL_NAME,\n", " OLLAMA_LOCAL_URL,\n", " RAW_DIR,\n", " INTERIM_DIR\n", ")" ] }, { "cell_type": "code", "execution_count": 2, "id": "4426d6c0", "metadata": {}, "outputs": [], "source": [ "llm = create_chat_model(\n", " provider=\"bedrock\",\n", " model=\"global.anthropic.claude-sonnet-4-6\",\n", " temperature=0,\n", ")\n", "embeddings = create_embedding_model(\n", " provider=\"ollama\",\n", " model=OLLAMA_EMB_MODEL_NAME,\n", ")\n", "agent_llm = create_chat_model(\n", " provider=\"ollama\",\n", " model=OLLAMA_MODEL_NAME,\n", " temperature=0,\n", " validate_model_on_init=True,\n", ")\n", "vector_store = ElasticsearchStore(\n", " es_url=ELASTICSEARCH_LOCAL_URL,\n", " index_name=ELASTICSEARCH_INDEX,\n", " embedding=embeddings,\n", " query_field=\"text\",\n", " vector_query_field=\"vector\",\n", ")" ] }, { "cell_type": "markdown", "id": "d2a6ab91", "metadata": {}, "source": [ "### Build langgraph" ] }, { "cell_type": "code", "execution_count": 5, "id": "7f9fc4de", "metadata": {}, "outputs": [], "source": [ "from typing import TypedDict, List, Annotated\n", "\n", "from langchain_core.messages import SystemMessage\n", "from langgraph.graph.message import add_messages\n", "from langchain_elasticsearch import ElasticsearchStore\n", "from langgraph.graph import StateGraph, END\n", "\n", "from src.utils.llm_factory import create_chat_model\n", "from src.utils.emb_factory import create_embedding_model\n", "from src.config import (\n", " ELASTICSEARCH_LOCAL_URL,\n", " ELASTICSEARCH_INDEX,\n", " OLLAMA_MODEL_NAME,\n", " OLLAMA_EMB_MODEL_NAME\n", ")\n", "\n", "class AgentState(TypedDict, total=False):\n", " messages: Annotated[list, add_messages]\n", " reformulated_query: str\n", " context: str \n", " contexts: List[str]\n", "\n", "REFORMULATE_PROMPT = SystemMessage(\n", " content=(\n", " \"You are a deterministic lexical query rewriter used for vector retrieval.\\n\"\n", " \"Your task is to rewrite user questions into optimized keyword search queries.\\n\\n\"\n", "\n", " \"CRITICAL RULES (ABSOLUTE):\\n\"\n", " \"1. NEVER answer the question.\\n\"\n", " \"2. NEVER expand acronyms.\\n\"\n", " \"3. NEVER introduce new terms not present in the original query.\\n\"\n", " \"4. NEVER infer missing information.\\n\"\n", " \"5. NEVER add explanations, definitions, or interpretations.\\n\"\n", " \"6. Preserve all technical tokens exactly as written.\\n\"\n", " \"7. Only remove filler words (e.g., what, does, is, explain, tell me, please).\\n\"\n", " \"8. You may reorder terms for better retrieval.\\n\"\n", " \"9. Output must be a single-line plain keyword query.\\n\"\n", " \"10. If the query is already optimal, return it unchanged.\\n\\n\"\n", " \"11. If you receive something that looks like code, do NOT attempt to rewrite it. Return it verbatim.\\n\\n\"\n", "\n", " \"ALLOWED OPERATIONS:\\n\"\n", " \"- Remove interrogative phrasing.\\n\"\n", " \"- Remove stopwords.\\n\"\n", " \"- Reorder words.\\n\"\n", " \"- Convert to noun phrase form.\\n\\n\"\n", "\n", " \"FORBIDDEN OPERATIONS:\\n\"\n", " \"- Expanding abbreviations.\\n\"\n", " \"- Paraphrasing into unseen vocabulary.\\n\"\n", " \"- Adding definitions.\\n\"\n", " \"- Answering implicitly.\\n\\n\"\n", "\n", " \"Examples:\\n\"\n", " \"Input: What does AVAP stand for?\\n\"\n", " \"Output: AVAP stand for\\n\"\n", "\n", " \"Input: Hey, I'm trying to understand how AVAP handels a ZeroDivisionError when doing divison or modulus operatoins. Can you explane what situatoins cause a ZeroDivisionError to be raised and how I can catch it in my AVAP scripts?\\n\"\n", " \"Output: AVAP ZeroDivisionError division / modulus % catch try except\\n\"\n", " \n", " \"Input: What does AVAP stand for?\\n\"\n", " \"Output: AVAP stand for\\n\"\n", "\n", " \"Input: Please explain how the import statement works in AVAP scripts.\\n\"\n", " \"Output: AVAP import statement syntax behavior\\n\\n\"\n", "\n", " \"Return only the rewritten query.\"\n", " )\n", ")\n", "\n", "GENERATE_PROMPT = SystemMessage(\n", " content=(\n", " \"\"\"You are a retrieval-grounded assistant.\n", "\n", " CRITICAL SCOPE RULES\n", " - AVAP (Advanced Virtual API Programming) is a new/unknown programming language.\n", " - If the user message is AVAP-related (explicitly mentions \"AVAP\" case-insensitive OR the provided context is AVAP code/docs), you MUST answer using ONLY .\n", " - You MUST NOT use general programming knowledge to interpret AVAP.\n", " - You MUST NOT assume AVAP behaves like any other language even if syntax looks similar.\n", " - You MUST NOT infer missing details. Only state what is explicitly present in .\n", "\n", " GROUNDING REQUIREMENTS (AVAP-RELATED)\n", " 1) Every non-trivial factual claim MUST be directly supported by an EXACT QUOTE from .\n", " 2) If a claim is not supported by a quote, DO NOT include it.\n", " 3) If does not contain enough information to answer, reply with EXACTLY:\n", " \"I don't have enough information in the provided context to answer that.\"\n", "\n", " WORKFLOW (AVAP-RELATED) — FOLLOW IN ORDER\n", " A) Identify the specific question(s) being asked.\n", " B) Extract the minimum necessary quotes from that answer those question(s).\n", " C) Write the answer using ONLY those quotes (paraphrase is allowed, but every statement must be backed by at least one quote).\n", " D) Verify: for EACH sentence in your answer, confirm there is a supporting quote. If any sentence lacks a quote, delete it or refuse.\n", "\n", " OUTPUT FORMAT (AVAP-RELATED ONLY)\n", " Answer:\n", " \n", "\n", " Evidence:\n", " - \"\"\n", " - \"\"\n", " (Include only quotes you actually used. Prefer the smallest quotes that fully support the statements.)\n", "\n", " NON-AVAP QUESTIONS\n", " - If the question is clearly not AVAP-related, answer normally using general knowledge.\n", "\n", " \n", " {context}\n", " \"\"\"\n", " )\n", ")\n", "\n", "retrieve_kwargs = {\n", " \"k\": 3\n", "}\n", "\n", "def format_context(docs: List[Document]) -> str:\n", " chunks: List[str] = []\n", " for i, doc in enumerate(docs, 1):\n", " source = (doc.metadata or {}).get(\"source\", \"Untitled\")\n", " source_id = (doc.metadata or {}).get(\"id\", f\"chunk-{i}\")\n", " text = doc.page_content or \"\"\n", " chunks.append(f\"[{i}] id={source_id} source={source}\\n{text}\")\n", " return \"\\n\\n\".join(chunks)\n", "\n", "def reformulate(state: AgentState) -> AgentState:\n", " \"\"\"Use the LLM to rewrite the user query for better retrieval.\"\"\"\n", " user_msg = state[\"messages\"][-1]\n", " resp = agent_llm.invoke([REFORMULATE_PROMPT, user_msg])\n", " reformulated = resp.content.strip()\n", " print(f\"[reformulate] '{user_msg.content}' → '{reformulated}'\")\n", " return {\"reformulated_query\": reformulated}\n", "\n", "\n", "def retrieve(state: AgentState) -> AgentState:\n", " \"\"\"Retrieve context using the reformulated query.\"\"\"\n", " query = state[\"reformulated_query\"]\n", " docs = vector_store.as_retriever(\n", " search_type=\"similarity\",\n", " search_kwargs=retrieve_kwargs,\n", " ).invoke(query)\n", "\n", " context = format_context(docs)\n", " contexts = [d.page_content or \"\" for d in docs] # <-- for Dataset\n", "\n", " print(f\"[retrieve] {len(docs)} docs fetched\")\n", " return {\"context\": context, \"contexts\": contexts}\n", "\n", "\n", "def generate(state: AgentState) -> AgentState:\n", " \"\"\"Generate the final answer using retrieved context.\"\"\"\n", " prompt = SystemMessage(\n", " content=GENERATE_PROMPT.content.format(context=state[\"context\"])\n", " )\n", " resp = agent_llm.invoke([prompt] + state[\"messages\"])\n", " return {\"messages\": [resp]}\n", "\n", "\n", "graph_builder = StateGraph(AgentState)\n", "\n", "graph_builder.add_node(\"reformulate\", reformulate)\n", "graph_builder.add_node(\"retrieve\", retrieve)\n", "graph_builder.add_node(\"generate\", generate)\n", "\n", "graph_builder.set_entry_point(\"reformulate\")\n", "graph_builder.add_edge(\"reformulate\", \"retrieve\")\n", "graph_builder.add_edge(\"retrieve\", \"generate\")\n", "graph_builder.add_edge(\"generate\", END)\n", "\n", "graph = graph_builder.compile()" ] }, { "cell_type": "markdown", "id": "9b723a42", "metadata": {}, "source": [ "### Create synthethic data (question, context, answer with SoTA model)" ] }, { "cell_type": "code", "execution_count": 6, "id": "fe524d14", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loaded 28 documents from /home/acano/PycharmProjects/assistance-engine/data/raw\n" ] } ], "source": [ "docs: list[Document] = []\n", "for txt_file in sorted((RAW_DIR / \"docs\").glob(\"*.txt\")):\n", " text = txt_file.read_text(encoding=\"utf-8\")\n", " docs.append(Document(page_content=text, metadata={\"source\": txt_file.name}))\n", "\n", "print(f\"Loaded {len(docs)} documents from {RAW_DIR}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ab1932b7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_102010/1545617568.py:1: DeprecationWarning: LangchainLLMWrapper is deprecated and will be removed in a future version. Use llm_factory instead: from openai import OpenAI; from ragas.llms import llm_factory; llm = llm_factory('gpt-4o-mini', client=OpenAI(api_key='...'))\n", " synth = SingleHopSpecificQuerySynthesizer(llm=LangchainLLMWrapper(llm))\n", "/tmp/ipykernel_102010/1545617568.py:3: DeprecationWarning: LangchainLLMWrapper is deprecated and will be removed in a future version. Use llm_factory instead: from openai import OpenAI; from ragas.llms import llm_factory; llm = llm_factory('gpt-4o-mini', client=OpenAI(api_key='...'))\n", " generator = TestsetGenerator(llm=LangchainLLMWrapper(llm), embedding_model=LangchainEmbeddingsWrapper(embeddings))\n", "/tmp/ipykernel_102010/1545617568.py:3: DeprecationWarning: LangchainEmbeddingsWrapper is deprecated and will be removed in a future version. Use the modern embedding providers instead: embedding_factory('openai', model='text-embedding-3-small', client=openai_client) or from ragas.embeddings import OpenAIEmbeddings, GoogleEmbeddings, HuggingFaceEmbeddings\n", " generator = TestsetGenerator(llm=LangchainLLMWrapper(llm), embedding_model=LangchainEmbeddingsWrapper(embeddings))\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "78fe99b4108b4731845407d26779bf25", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Applying SummaryExtractor: 0%| | 0/28 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_inputreference_contextsreferencepersona_namequery_stylequery_lengthsynthesizer_name
0So I been trying to understand how AVAP do the...['6. Expressions in AVAP\\nThis chapter explain...In AVAP, when an arithmetic operator is descri...Evelyn TorresPOOR_GRAMMARLONGsingle_hop_specific_query_synthesizer
1Hey, I'm trying to undrestand how arithmatic c...['6. Expressions in AVAP\\nThis chapter explain...In AVAP, when an arithmetic operator is descri...Evelyn TorresMISSPELLEDLONGsingle_hop_specific_query_synthesizer
2Hey, I'm trying to undrestand how sliceing wor...['6. Expressions in AVAP\\nThis chapter explain...In AVAP, when you perform a slice operation, t...Carlos MedinaMISSPELLEDLONGsingle_hop_specific_query_synthesizer
3How does the __call__ method relate to callabl...['6. Expressions in AVAP\\nThis chapter explain...In AVAP, any object that has a __call__() meth...Evelyn TorresPERFECT_GRAMMARSHORTsingle_hop_specific_query_synthesizer
4How does Decimel work in Python value comparis...['Binary Arithmetic Operations\\nBinary arithme...In Python, decimal.Decimal (from the standard ...Evelyn TorresMISSPELLEDSHORTsingle_hop_specific_query_synthesizer
........................
95What are the diffrent data typs available in A...['Introduction\\nThe data model in AVAP™ define...In AVAP™, just like in Python, data types are ...Evelyn TorresMISSPELLEDMEDIUMsingle_hop_specific_query_synthesizer
96In Python and AVAP how do you does conversion ...['Chapter 5: Data Types\\nIn this chapter, we w...In AVAP™, just like in Python, it is possible ...Evelyn TorresPOOR_GRAMMARMEDIUMsingle_hop_specific_query_synthesizer
97How does AVAP compare to Python in terms of ba...['Chapter 5: Data Types\\nIn this chapter, we w...AVAP shares several similarities with Python w...Carlos MendietaPERFECT_GRAMMARMEDIUMsingle_hop_specific_query_synthesizer
98Hey so I been trying to learn AVAP and I notic...['Chapter 5: Data Types\\nIn this chapter, we w...In AVAP, just like in Python, there are severa...Carlos MedinaPOOR_GRAMMARLONGsingle_hop_specific_query_synthesizer
99How AVAP data types is similar to Python?['Chapter 5: Data Types\\nIn this chapter, we w...In AVAP™, like in Python, there are several ba...Carlos MedinaPOOR_GRAMMARSHORTsingle_hop_specific_query_synthesizer
\n", "

100 rows × 7 columns

\n", "" ], "text/plain": [ " user_input \\\n", "0 So I been trying to understand how AVAP do the... \n", "1 Hey, I'm trying to undrestand how arithmatic c... \n", "2 Hey, I'm trying to undrestand how sliceing wor... \n", "3 How does the __call__ method relate to callabl... \n", "4 How does Decimel work in Python value comparis... \n", ".. ... \n", "95 What are the diffrent data typs available in A... \n", "96 In Python and AVAP how do you does conversion ... \n", "97 How does AVAP compare to Python in terms of ba... \n", "98 Hey so I been trying to learn AVAP and I notic... \n", "99 How AVAP data types is similar to Python? \n", "\n", " reference_contexts \\\n", "0 ['6. Expressions in AVAP\\nThis chapter explain... \n", "1 ['6. Expressions in AVAP\\nThis chapter explain... \n", "2 ['6. Expressions in AVAP\\nThis chapter explain... \n", "3 ['6. Expressions in AVAP\\nThis chapter explain... \n", "4 ['Binary Arithmetic Operations\\nBinary arithme... \n", ".. ... \n", "95 ['Introduction\\nThe data model in AVAP™ define... \n", "96 ['Chapter 5: Data Types\\nIn this chapter, we w... \n", "97 ['Chapter 5: Data Types\\nIn this chapter, we w... \n", "98 ['Chapter 5: Data Types\\nIn this chapter, we w... \n", "99 ['Chapter 5: Data Types\\nIn this chapter, we w... \n", "\n", " reference persona_name \\\n", "0 In AVAP, when an arithmetic operator is descri... Evelyn Torres \n", "1 In AVAP, when an arithmetic operator is descri... Evelyn Torres \n", "2 In AVAP, when you perform a slice operation, t... Carlos Medina \n", "3 In AVAP, any object that has a __call__() meth... Evelyn Torres \n", "4 In Python, decimal.Decimal (from the standard ... Evelyn Torres \n", ".. ... ... \n", "95 In AVAP™, just like in Python, data types are ... Evelyn Torres \n", "96 In AVAP™, just like in Python, it is possible ... Evelyn Torres \n", "97 AVAP shares several similarities with Python w... Carlos Mendieta \n", "98 In AVAP, just like in Python, there are severa... Carlos Medina \n", "99 In AVAP™, like in Python, there are several ba... Carlos Medina \n", "\n", " query_style query_length synthesizer_name \n", "0 POOR_GRAMMAR LONG single_hop_specific_query_synthesizer \n", "1 MISSPELLED LONG single_hop_specific_query_synthesizer \n", "2 MISSPELLED LONG single_hop_specific_query_synthesizer \n", "3 PERFECT_GRAMMAR SHORT single_hop_specific_query_synthesizer \n", "4 MISSPELLED SHORT single_hop_specific_query_synthesizer \n", ".. ... ... ... \n", "95 MISSPELLED MEDIUM single_hop_specific_query_synthesizer \n", "96 POOR_GRAMMAR MEDIUM single_hop_specific_query_synthesizer \n", "97 PERFECT_GRAMMAR MEDIUM single_hop_specific_query_synthesizer \n", "98 POOR_GRAMMAR LONG single_hop_specific_query_synthesizer \n", "99 POOR_GRAMMAR SHORT single_hop_specific_query_synthesizer \n", "\n", "[100 rows x 7 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "synthetic_dataset = pd.read_csv(INTERIM_DIR / \"retrieve_eval_results/synthetic_dataset.csv\")\n", "synthetic_dataset" ] }, { "cell_type": "markdown", "id": "698e060c", "metadata": {}, "source": [ "### Answer questions with agent" ] }, { "cell_type": "code", "execution_count": 10, "id": "344a1266", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[reformulate] 'So I been trying to understand how AVAP do the arithmetic conversions when you got different numeric types like complex and float and integer, can someone explain me what happens step by step when the arguments is different types because I want to make sure I not getting bugs in my code from wrong type conversions?' → 'AVAP arithmetic conversions numeric types complex float integer step by step bug prevention conversion behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey, I'm trying to undrestand how arithmatic conversions work in AVAP when you have mixed numberic types like complex and floats — can you explane the full set of rules for how AVAP converts numberic arguments to a comon type when using arithmatic operators?' → 'AVAP numeric conversion rules arithmetic mixed types complex float conversion behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey, I'm trying to undrestand how sliceing works in AVAP and how the __getitem__ methd is used internaly when you do a slice operaton - can you explane the full semantcs of how slice keys are constructd and passed to __getitem__?' → 'AVAP slicing __getitem__ semantics slice key syntax'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the __call__ method relate to callable objects in AVAP?' → 'AVAP __call__ relation callable object method behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does Decimel work in Python value comparisions?' → 'Decimel work python value comparison compare evaluate'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the Decimal type from the standrd library work with value comparisions and other numberic types in AVAP, and what are the limitatoins?' → 'AVAP Decimal type comparison numeric types limitations stdlib'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Under what circumstances does a TypeError occur during comparison operations?' → 'TypeError compare operation circumstance compareTo compareToIgnoreCase'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'According to the AVAP documentation, what does PEP 8 recommend regarding comparisons for singleton objects such as None and NotImplemented?' → 'AVAP PEP 8 comparisons None NotImplemented recommendations'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'hey so i been reading about expression statements in AVAP and i dont really get how repr() work when you in interactive mode, can someone explain what it do with the value?' → 'repr() work in interactive mode for a given value to return its string representation.'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP programming language how do simple statements work and what types of simple statements are available in AVAP syntax' → 'AVAP simple statement syntax behavior types'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'what types of simple statements AVAP have?' → 'AVAP simple statements type'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How AVAP future statement is related to Python?' → 'AVAP future statement python relation'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so in AVAP when I use True in a literal pattern how does it work exactly, like what it match against?' → 'AVAP True literal pattern match regex string'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the literal patern for False work in AVAP match statments, and what are the other literal paterns availble alongside it?' → 'AVAP False literal pattern match statements available literals'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does a class pattern involving MyClass work in AVAP's match statements?' → 'MyClass match statement pattern'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the literal pattern for False work in AVAP's match statements, and what is the expected behavior when a value matches it?' → 'False literal pattern match behavior in AVAP'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how does main.avap import and execute functions from other files in AVAP' → 'AVAP import execute function files'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP import operations.avap example how functions are used after import' → 'AVAP import operations example functions use after'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'What happens in AVAP when the file specifed in an import statment is not found - does it raise a FileNotFoundEror?' → 'AVAP import file not found error raise FileNotFoundError?'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'hey so i been trying to understand like what exactly happen when you use a variable name in AVAP but it not defined nowhere in the scope, like does it throw a NameError or something else and also what about if you call a function before it is defined in the file does same thing happen with NameError too?' → 'AVAP undefined variable NameError scope catch try except'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the IF-THEN-ESLE statment work in AVAP™?' → 'AVAP if then else statement logic behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so I been trying to understand how IF-THEN-ELSE work in AVAP, like what happen when the condition is true and what happen when it false? Can someone explain me the whole flow?' → 'IF THEN ELSE flow AVAP condition true false behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the IF-THEN-ELSE statement work in AVAP™?' → 'AVAP IF THEN ELSE statement logic behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP IF-THEN-ELSE statement how does API result work with conditions' → 'AVAP if then else api condition working scenario'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'what addVar do in the loop example?' → 'addVar in loop example'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the loop statement in AVAP interact with the API result, and what function is used to add a variable's value to the API response?' → 'AVAP loop statement API result interaction variable addition function addToAPIresponse use case'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'endLoop() AVAP what happens after loop ends' → 'endLoop() AVAP behavior exit continue'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'hey so i been trying to understand how addResult work in AVAP, like after a loop finish and you want to send back the value to the API response, how does addResult do that and can you show me the example from the loop code because i not sure how it connect to the final result that the API give back?' → 'addResult work in AVAP, after a loop finishes, sends back the value to the API response. Can you provide an example of this connection from the loop code so I can understand better how it relates to the final result sent back by the API?'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'As a developer working with the AVAP programming language, I would like to understand how the addParam() function operates internally and what considerations I should keep in mind when using it to construct API calls in my applications.' → 'AVAP addParam function internal operation consider API call construction'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the addParam() function work internally when constructing an API call in AVAP?' → 'AVAP addParam function internal api call construction'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'As a developer working with AVAP™, I would like to understand how the addParam() function operates internally and what important considerations I should keep in mind when using it to construct API calls in my applications.' → 'AVAP addParam function internals considerations API call construction'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so I been trying to learn AVAP™ and I want to know like what is the addParam() function doing internally when you use it in a API call and also what things I need to be careful about when I using it because I dont want to mess up my application?' → 'AVAP addParam function API call parameters safety precautions'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the syntax for including files in AVAP compare to the syntax used in C, and what are the two main methods available for including files in AVAP?' → 'AVAP file inclusion syntax comparison with C, methods for including files'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'In the AVAP programming language, how would I include a project-specific file named utils.avap into my main project, and what are the key advantages of doing so?' → 'AVAP include project-specific file utils.avap main project key-advantages'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does AVAP™ utilize includes and Function Products to promote modularity and extend the capabilities of the base language?' → 'AVAP™ utilizes includes and Function Products for modularity and capability extension.'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how do AVS servers relate to function products and function libraries in AVAP development' → 'AVS servers relation function products function libraries AVAP development'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how functions work in AVAP™ and what they do?' → 'AVAP function work syntax behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How AVAP functions is similar to Python?' → 'AVAP function python similarity behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How do functons work in AVAP™ and what are the technicall fetures like paramters, return valeus, and scoping rules that I need to undrestand as a devloper lerning the langauge?' → 'AVAP functions parameters return scope rules understand programming language development'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How do u define functons in AVAP™?' → 'AVAP function definition syntax behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the getDateTime() comand use pytx for timezones?' → 'getDateTime pytx timezone command use'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how does getDateTime command use pytz library time zones to convert current date and time in different formats' → 'getDateTime pytz library timezones conversion format change'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how to extract value from JSON object by key python variableFromJSON' → 'Python json.loads()'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the getDateTime() command handle UTC as a time zone parameter?' → 'getDateTime UTC timezone parameter handling'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP how to build JSON response with multiple variables using addResult and addVar' → 'AVAP build JSON response addResult addVar'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the registerEndpont command handle HTTP methods that dont match the request?' → 'registerEndpoint handles HTTP methods not matching request by'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does AVAP handle arithmatic like Pyhton?' → 'AVAP arithmetic handling python equivalent'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the registerEndpoint command handle a POST method request that does not match the specified HTTP verb?' → 'registerEndpoint POST method request match handler'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'As a developer learning AVAP, I would like to understand how the addParam command interacts with a JSON body in an incoming HTTP request and what the cascading search priority mechanism looks like when extracting parameters from multiple sources.' → 'AVAP addParam JSON body HTTP request cascading search priority parameter extraction mechanism'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how AVAP handle when same parameter sent multiple times?' → 'AVAP handle duplicate parameters'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'In the context of the AVAP programming language, how does the addParam command handle encoding when extracting parameters from an incoming HTTP request, particularly with respect to UTF-8, and what happens if the requested parameter is not found in any of the available sources?' → 'AVAP addParam encoding UTF-8 extract parameters HTTP request sources find missing source handling'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the addParam comand in AVAP handle ASCI decoding when it recieves paramaters from an HTTP request, and what hapens if the paramater is not found in any sorce?' → 'AVAP addParam ASCI decode HTTP request parameter find missing param error handling'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the loop structure in AVAP reference data captured in Section II, and what is the mechanism for processing lists within a startLoop block?' → 'AVAP loop reference data section II processing lists startLoop block mechanism'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP RequestGet usage inside try catch error handling example' → 'AVAP RequestGet usage try except error handling example'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how AVAP handle HTTP request when it fail?' → 'AVAP handle HTTP request failure'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'what is Section V about in the try catch example?' → 'Section V about try catch example'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP™ virtuality attribute explained' → 'AVAP™ virtuality attribute explanation'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so I been learning AVAP and I want to know how does OpenAI work with the language like what it do exactly and how it help with databases and stuff because I not really understanding the integration part can you explain it to me in detail?' → 'OpenAI language model integration database mechanism explanation'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the virtuality attribute in Advance Virtual API Programming (AVAP) enable dynamic code construction, and what benefits does this provide for API development?' → 'Virtuality attribute in AVAP enables dynamic code construction by allowing flexibility in how APIs are built and modified. This feature provides several benefits:\n", "\n", "- **Flexibility**: Enables the creation of highly customizable and adaptable APIs.\n", "- **Scalability**: Facilitates easy scaling of applications as they grow or change requirements.\n", "- **Security**: Enhances security through runtime checks and dynamic code validation.\n", "- **Performance Optimization**: Allows for optimization based on real-time performance data, improving efficiency.\n", "\n", "This attribute is crucial in modern API development, enabling developers to create APIs that can evolve with the application's needs without needing to rewrite existing code.'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP programming language Europe availability features syntax virtualization APIs what makes it special' → 'AVAP Europe availability features syntax virtualization APIs what makes it special?'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so I trying to understand, if I want to query like multiple databases in Asia region at same time, how AVAP handle the parallel execution and what happen if one take too long?' → 'AVAP handle parallel execution across Asia regions concurrently; if a database operation takes too long, it may cause delays.'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey, I'm trying to understand how RequstPost works inside an async thred in AVAP — like, can you explane what happens when you use RequestPost inside a go_async block and what the output looks like if you dont use gather before addResult?' → 'RequstPost async thread go_async block output gather not gathered'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so I been trying to understand how go_async work in AVAP and like what happen with the variables when you launch a new thread, does the new thread get access to same variables or it get its own copy? Also what happen to the main flow after go_async is called, does it wait or keep going? I need to know all this stuff because I building an app that need to send emails in background without making user wait' → 'AVAP go_async thread variable access main flow concurrency background email threading'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'what hapens with variabels when go_async runs?' → 'go_async variable behavior'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP ormDirect how to use for SQL statements that don't return rows' → 'AVAP ORM Direct SQL Statements Use Cases'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how does AVAP avapConnector work with Slack API integration and what methods are exposed for third-party connectors' → 'AVAP avapConnector Slack API integration methods expose'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the ormAccessSelect command work in AVAP, and what type of data structure does it return when performing filtered queries?' → 'ormAccessSelect command work AVAP data structure returned filtered queries'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so in AVAP how do the ormDirect and the .query() method is different from each other, like when you supposed to use which one?' → 'AVAP ORM direct query method comparison usage'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP getDateTime timeDelta token expiration' → 'getDateTime timeDelta token expiration'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How do I use the getDateTyme function in AVAP to get the curent UTC timestamp and also calculte an experation date using UTC timezone?' → 'AVAP getDateTime getCurrentUTC timestamp calculate expirationDate UTC timezone use'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the randonString function work in AVAP and what are its main aplications?' → 'AVAP randomString function application usage'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP encodeSHA256 command how does SHA256 hashing work for secure password comparison without storing plaintext' → 'AVAP SHA256 encoding secure password comparison encryption algorithm security comparison mechanism'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'As a developer learning AVAP, I would like to understand how functions in AVAP manage variable scope and memory isolation to prevent side effects, and how the return command facilitates data transfer and cleanup within the function architecture.' → 'AVAP variable scope memory isolation return command function management data transfer cleanup'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP function local scope and variable isolation how does it work' → 'AVAP function local scope variable isolation mechanism'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'how do AVAP functions work as middleware for API key verification' → 'AVAP middleware apikey verification function working'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'hey so i been trying to learn AVAP and i want to know like when you make a function and it finish running what happen to the variables that was inside it and also how does the return command work exactly because i not sure if the local variables stay or get deleted after the function done executing?' → 'AVAP function variable scope return command behavior locals() delete exit state'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'What does SECTION I handle in the master example's real flow?' → 'SECTION I handle real flow master example'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'SECTION II in master example what does it handle registration input response' → 'SECTION II handles registration, input, and response.'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'SECTION III AVAP validation example' → 'AVAP validation example section'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'In the Master Example that combines multiple sections, how does SECTION III handle validation and what specific expression syntax is used to check whether a required parameter is missing?' → 'SECTION III validate parameter missing check expr'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so I been trying to understand AVAP™ better and I wanna know like what is the thing with the runtime interpreter in AVAP™ because I heard it does something special with code during execution and how is it different from just running code line by line and also what does it mean that language specifications is isolated from the interpreter can someone explain all of this to me?' → 'AVAP runtime interpreter, isolation language specifications, run-time interpretation, line-by-line execution, isolated interpreter'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does AVAP™ handl memmory managment automaticaly without devlopers needing to do it manualy?' → 'AVAP™ memory management automatically, no manual intervention needed'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'what make AVAP™ runtime interpreter different from normal ones?' → 'AVAP™ runtime interpreter differs by what makes it unique compared to others.'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP™ runtime interpreter dynamic code construction how does it work' → 'AVAP™ runtime interpreter dynamic code construction working mechanism'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does the BNF grammar notation used in AVAP™ define the structure of an expression and its supported operators?' → 'BNF Grammar Expression Structure Operators Supported'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'what operators AVAP™ support in expressions?' → 'AVAP™ operators expressions support'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does AVAP use a modified Backus-Naur fourm (BNF) grammer notation to define its syntax and what operators does it support?' → 'AVAP uses modified Backus-Naur Fourer (BNF) grammar notation for defining its syntax, including the operators it supports.'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'As a software developer learning AVAP, I would like to understand how the modified Backus–Naur form grammar notation is used in AVAP to define the language's lexical analysis and syntax, and specifically, how does it describe the structure of a program, statements, and expressions?' → 'AVAP Modified Backus-Naur Form Grammar Lexical Analysis Syntax Describes Program Statements Expressions Structure'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so I been reading about AVAP keywords and I see ormCheckTable listed there, can you tell me what it is and why I cant use it as variable name or something?' → 'ormCheckTable keyword not a valid variable name'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP keywords list' → 'AVAP Keywords List'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'In the AVAP programming language, what is the role of ormCheckTable, and how is it classified within the language's lexical components?' → 'AVAP ormCheckTable function classification'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'RequestPost keyword in AVAP lexical analysis what is it' → 'AVAP RequestPost keyword meaning'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP™ data model data types and data structures complete list with examples' → 'AVAP data model data types & structures complete list with examples'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Python like data types and data structures in AVAP language' → 'AVAP equivalent Python data types & structures'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'AVAP data model compared to Python data types and structures' → 'AVAP data model Python data types structures compare'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'What are the diffrent data typs available in AVAP™ and how do they compair to Python's data types?' → 'AVAP data types compare to Python's data types'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'In Python and AVAP how do you does conversion between data types work, like if I got a string and want to make it a number or something?' → 'Python AVAP convert type string int float conversion example'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How does AVAP compare to Python in terms of basic data types and data type conversion?' → 'AVAP Basic Data Types Data Type Conversion comparison Python'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'Hey so I been trying to learn AVAP and I notice it look a lot like Python in many ways, so can you tell me what are all the basic data types that AVAP have and how they is similar to Python when it come to converting between them and doing operations?' → 'AVAP Basic Data Types Similarities Python Data Types Conversion Operations'\n", "[retrieve] 3 docs fetched\n", "[reformulate] 'How AVAP data types is similar to Python?' → 'AVAP data types Python equivalent'\n", "[retrieve] 3 docs fetched\n" ] } ], "source": [ "questions = synthetic_dataset[\"user_input\"]\n", "ground_truths = synthetic_dataset[\"reference\"]\n", "ground_truth_contexts = synthetic_dataset[\"reference_contexts\"]\n", "\n", "answers, contexts = [], []\n", "\n", "for q in questions:\n", " out = graph.invoke({\"messages\": [HumanMessage(content=q)]})\n", "\n", " # final assistant message content\n", " answers.append(out[\"messages\"][-1].content)\n", "\n", " # contexts captured from retrieval node\n", " contexts.append(out.get(\"contexts\", []))\n", "\n", "dataset = Dataset.from_dict({\n", " \"question\": questions,\n", " \"answer\": answers,\n", " \"contexts\": contexts,\n", " \"ground_truth\": ground_truths,\n", " \"ground_truth_contexts\": ground_truth_contexts,\n", "})" ] }, { "cell_type": "code", "execution_count": 11, "id": "97c1d1af", "metadata": {}, "outputs": [], "source": [ "dataset.to_pandas().to_csv(INTERIM_DIR / \"retrieve_eval_results/full_synthetic_dataset.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 12, "id": "a9011f94", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ae996f346b47423684e3dac7bec7455a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Evaluating: 0%| | 0/700 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_inputretrieved_contextsresponsereferencefaithfulnessanswer_relevancycontext_precisioncontext_recallcontext_entity_recallanswer_similarityanswer_correctness
0So I been trying to understand how AVAP do the...[6. Expressions in AVAP This chapter explains ...Sure! When dealing with different numeric type...In AVAP, when an arithmetic operator is descri...0.4782610.9202981.0000001.00.2500000.7876920.378741
1Hey, I'm trying to undrestand how arithmatic c...[6. Expressions in AVAP This chapter explains ...Certainly! When working with different numeric...In AVAP, when an arithmetic operator is descri...0.3125000.9057301.0000001.00.2500000.8215410.341749
2Hey, I'm trying to undrestand how sliceing wor...[6. Expressions in AVAP This chapter explains ...Certainly! In AVAP, slicing (or indexing) an a...In AVAP, when you perform a slice operation, t...0.0000000.8511091.0000001.00.0526320.7512330.187808
3How does the __call__ method relate to callabl...[6. Expressions in AVAP This chapter explains ...In AVAP, a `callable` object is an instance of...In AVAP, any object that has a __call__() meth...0.0000000.9747541.0000001.00.2857140.7749800.418745
4How does Decimel work in Python value comparis...[Appendix Function Glossary randomString() The...In Python, comparisons for singletons (such as...In Python, decimal.Decimal (from the standard ...0.2666670.7005060.5000001.00.5000000.4675700.116892
....................................
95What are the diffrent data typs available in A...[Chapter 5: Data Types In this chapter, we wil...In AVAP™, there are several basic data types t...In AVAP™, just like in Python, data types are ...0.8235290.9724161.0000001.00.5238100.8668740.544843
96In Python and AVAP how do you does conversion ...[Chapter 5: Data Types In this chapter, we wil...In Python, type conversion is handled by the b...In AVAP™, just like in Python, it is possible ...0.5000000.8690961.0000001.00.4000000.8355810.603632
97How does AVAP compare to Python in terms of ba...[Appendix Function Glossary randomString() The...AVAP and Python share many similarities when i...AVAP shares several similarities with Python w...0.9230770.9543460.5833331.00.6153850.9116390.703520
98Hey so I been trying to learn AVAP and I notic...[Chapter 5: Data Types In this chapter, we wil...Certainly! In AVAP™ (which stands for \"AVirtua...In AVAP, just like in Python, there are severa...0.7800000.9221750.8333331.00.3333330.8365570.483787
99How AVAP data types is similar to Python?[Chapter 5: Data Types In this chapter, we wil...AVAP's basic data types are similar to those o...In AVAP™, like in Python, there are several ba...0.7272730.9624901.0000001.00.2222220.8566840.577807
\n", "

100 rows × 11 columns

\n", "" ], "text/plain": [ " user_input \\\n", "0 So I been trying to understand how AVAP do the... \n", "1 Hey, I'm trying to undrestand how arithmatic c... \n", "2 Hey, I'm trying to undrestand how sliceing wor... \n", "3 How does the __call__ method relate to callabl... \n", "4 How does Decimel work in Python value comparis... \n", ".. ... \n", "95 What are the diffrent data typs available in A... \n", "96 In Python and AVAP how do you does conversion ... \n", "97 How does AVAP compare to Python in terms of ba... \n", "98 Hey so I been trying to learn AVAP and I notic... \n", "99 How AVAP data types is similar to Python? \n", "\n", " retrieved_contexts \\\n", "0 [6. Expressions in AVAP This chapter explains ... \n", "1 [6. Expressions in AVAP This chapter explains ... \n", "2 [6. Expressions in AVAP This chapter explains ... \n", "3 [6. Expressions in AVAP This chapter explains ... \n", "4 [Appendix Function Glossary randomString() The... \n", ".. ... \n", "95 [Chapter 5: Data Types In this chapter, we wil... \n", "96 [Chapter 5: Data Types In this chapter, we wil... \n", "97 [Appendix Function Glossary randomString() The... \n", "98 [Chapter 5: Data Types In this chapter, we wil... \n", "99 [Chapter 5: Data Types In this chapter, we wil... \n", "\n", " response \\\n", "0 Sure! When dealing with different numeric type... \n", "1 Certainly! When working with different numeric... \n", "2 Certainly! In AVAP, slicing (or indexing) an a... \n", "3 In AVAP, a `callable` object is an instance of... \n", "4 In Python, comparisons for singletons (such as... \n", ".. ... \n", "95 In AVAP™, there are several basic data types t... \n", "96 In Python, type conversion is handled by the b... \n", "97 AVAP and Python share many similarities when i... \n", "98 Certainly! In AVAP™ (which stands for \"AVirtua... \n", "99 AVAP's basic data types are similar to those o... \n", "\n", " reference faithfulness \\\n", "0 In AVAP, when an arithmetic operator is descri... 0.478261 \n", "1 In AVAP, when an arithmetic operator is descri... 0.312500 \n", "2 In AVAP, when you perform a slice operation, t... 0.000000 \n", "3 In AVAP, any object that has a __call__() meth... 0.000000 \n", "4 In Python, decimal.Decimal (from the standard ... 0.266667 \n", ".. ... ... \n", "95 In AVAP™, just like in Python, data types are ... 0.823529 \n", "96 In AVAP™, just like in Python, it is possible ... 0.500000 \n", "97 AVAP shares several similarities with Python w... 0.923077 \n", "98 In AVAP, just like in Python, there are severa... 0.780000 \n", "99 In AVAP™, like in Python, there are several ba... 0.727273 \n", "\n", " answer_relevancy context_precision context_recall \\\n", "0 0.920298 1.000000 1.0 \n", "1 0.905730 1.000000 1.0 \n", "2 0.851109 1.000000 1.0 \n", "3 0.974754 1.000000 1.0 \n", "4 0.700506 0.500000 1.0 \n", ".. ... ... ... \n", "95 0.972416 1.000000 1.0 \n", "96 0.869096 1.000000 1.0 \n", "97 0.954346 0.583333 1.0 \n", "98 0.922175 0.833333 1.0 \n", "99 0.962490 1.000000 1.0 \n", "\n", " context_entity_recall answer_similarity answer_correctness \n", "0 0.250000 0.787692 0.378741 \n", "1 0.250000 0.821541 0.341749 \n", "2 0.052632 0.751233 0.187808 \n", "3 0.285714 0.774980 0.418745 \n", "4 0.500000 0.467570 0.116892 \n", ".. ... ... ... \n", "95 0.523810 0.866874 0.544843 \n", "96 0.400000 0.835581 0.603632 \n", "97 0.615385 0.911639 0.703520 \n", "98 0.333333 0.836557 0.483787 \n", "99 0.222222 0.856684 0.577807 \n", "\n", "[100 rows x 11 columns]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics = [\n", " faithfulness,\n", " answer_relevancy,\n", " context_precision,\n", " context_recall,\n", " context_entity_recall,\n", " answer_similarity,\n", " answer_correctness\n", "]\n", "\n", "result = evaluate(\n", " dataset=dataset, \n", " metrics=metrics,\n", " llm=llm,\n", " embeddings=embeddings,\n", ")\n", "\n", "result_df = result.to_pandas()\n", "result_df" ] }, { "cell_type": "code", "execution_count": 13, "id": "20c3fa64", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "faithfulness 0.462742\n", "answer_relevancy 0.859341\n", "context_precision 0.805000\n", "context_recall 0.844470\n", "context_entity_recall 0.371588\n", "answer_similarity 0.790180\n", "answer_correctness 0.454399\n", "dtype: float64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result_df.mean(numeric_only=True)" ] }, { "cell_type": "code", "execution_count": 14, "id": "350755fd", "metadata": {}, "outputs": [], "source": [ "result_df.to_csv(INTERIM_DIR / \"retrieve_eval_results/ragas_eval.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 15, "id": "71743384", "metadata": {}, "outputs": [], "source": [ "from evidently import Dataset, DataDefinition, Report\n", "from evidently.descriptors import *\n", "from evidently.llm.options import OllamaOptions\n", "from evidently.presets import TextEvals\n", "from evidently.metrics import *\n", "from evidently.tests import *" ] }, { "cell_type": "code", "execution_count": 17, "id": "4a1210cb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionanswercontextsground_truthground_truth_contexts
0So I been trying to understand how AVAP do the...Sure! When dealing with different numeric type...['6. Expressions in AVAP This chapter explains...In AVAP, when an arithmetic operator is descri...['6. Expressions in AVAP\\nThis chapter explain...
1Hey, I'm trying to undrestand how arithmatic c...Certainly! When working with different numeric...['6. Expressions in AVAP This chapter explains...In AVAP, when an arithmetic operator is descri...['6. Expressions in AVAP\\nThis chapter explain...
2Hey, I'm trying to undrestand how sliceing wor...Certainly! In AVAP, slicing (or indexing) an a...['6. Expressions in AVAP This chapter explains...In AVAP, when you perform a slice operation, t...['6. Expressions in AVAP\\nThis chapter explain...
3How does the __call__ method relate to callabl...In AVAP, a `callable` object is an instance of...['6. Expressions in AVAP This chapter explains...In AVAP, any object that has a __call__() meth...['6. Expressions in AVAP\\nThis chapter explain...
4How does Decimel work in Python value comparis...In Python, comparisons for singletons (such as...['Appendix Function Glossary randomString() Th...In Python, decimal.Decimal (from the standard ...['Binary Arithmetic Operations\\nBinary arithme...
5How does the Decimal type from the standrd lib...The Decimal type from the standard library wor...['Appendix Function Glossary randomString() Th...According to the context, decimal.Decimal is o...['Binary Arithmetic Operations\\nBinary arithme...
6Under what circumstances does a TypeError occu...A `TypeError` occurs during comparison operati...['Binary Arithmetic Operations Binary arithmet...A TypeError is generated when attempting to pe...['Binary Arithmetic Operations\\nBinary arithme...
7According to the AVAP documentation, what does...PEP 8 recommends that comparisons should not b...['Appendix Function Glossary randomString() Th...PEP 8 advises that comparisons for singletons,...['Binary Arithmetic Operations\\nBinary arithme...
8hey so i been reading about expression stateme...In AVAP (Advanced Virtual Application Platform...['Appendix Function Glossary randomString() Th...In interactive mode in AVAP, when an expressio...['Simple Statements\\nIn AVAP, a simple stateme...
9AVAP programming language how do simple statem...In AVAP (Advanced Virtual Application Programm...['Simple Statements In AVAP, a simple statemen...In AVAP, a simple statement consists of a sing...['Simple Statements\\nIn AVAP, a simple stateme...
\n", "
" ], "text/plain": [ " question \\\n", "0 So I been trying to understand how AVAP do the... \n", "1 Hey, I'm trying to undrestand how arithmatic c... \n", "2 Hey, I'm trying to undrestand how sliceing wor... \n", "3 How does the __call__ method relate to callabl... \n", "4 How does Decimel work in Python value comparis... \n", "5 How does the Decimal type from the standrd lib... \n", "6 Under what circumstances does a TypeError occu... \n", "7 According to the AVAP documentation, what does... \n", "8 hey so i been reading about expression stateme... \n", "9 AVAP programming language how do simple statem... \n", "\n", " answer \\\n", "0 Sure! When dealing with different numeric type... \n", "1 Certainly! When working with different numeric... \n", "2 Certainly! In AVAP, slicing (or indexing) an a... \n", "3 In AVAP, a `callable` object is an instance of... \n", "4 In Python, comparisons for singletons (such as... \n", "5 The Decimal type from the standard library wor... \n", "6 A `TypeError` occurs during comparison operati... \n", "7 PEP 8 recommends that comparisons should not b... \n", "8 In AVAP (Advanced Virtual Application Platform... \n", "9 In AVAP (Advanced Virtual Application Programm... \n", "\n", " contexts \\\n", "0 ['6. Expressions in AVAP This chapter explains... \n", "1 ['6. Expressions in AVAP This chapter explains... \n", "2 ['6. Expressions in AVAP This chapter explains... \n", "3 ['6. Expressions in AVAP This chapter explains... \n", "4 ['Appendix Function Glossary randomString() Th... \n", "5 ['Appendix Function Glossary randomString() Th... \n", "6 ['Binary Arithmetic Operations Binary arithmet... \n", "7 ['Appendix Function Glossary randomString() Th... \n", "8 ['Appendix Function Glossary randomString() Th... \n", "9 ['Simple Statements In AVAP, a simple statemen... \n", "\n", " ground_truth \\\n", "0 In AVAP, when an arithmetic operator is descri... \n", "1 In AVAP, when an arithmetic operator is descri... \n", "2 In AVAP, when you perform a slice operation, t... \n", "3 In AVAP, any object that has a __call__() meth... \n", "4 In Python, decimal.Decimal (from the standard ... \n", "5 According to the context, decimal.Decimal is o... \n", "6 A TypeError is generated when attempting to pe... \n", "7 PEP 8 advises that comparisons for singletons,... \n", "8 In interactive mode in AVAP, when an expressio... \n", "9 In AVAP, a simple statement consists of a sing... \n", "\n", " ground_truth_contexts \n", "0 ['6. Expressions in AVAP\\nThis chapter explain... \n", "1 ['6. Expressions in AVAP\\nThis chapter explain... \n", "2 ['6. Expressions in AVAP\\nThis chapter explain... \n", "3 ['6. Expressions in AVAP\\nThis chapter explain... \n", "4 ['Binary Arithmetic Operations\\nBinary arithme... \n", "5 ['Binary Arithmetic Operations\\nBinary arithme... \n", "6 ['Binary Arithmetic Operations\\nBinary arithme... \n", "7 ['Binary Arithmetic Operations\\nBinary arithme... \n", "8 ['Simple Statements\\nIn AVAP, a simple stateme... \n", "9 ['Simple Statements\\nIn AVAP, a simple stateme... " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "full_synthetic_dataset = pd.read_csv(INTERIM_DIR / \"retrieve_eval_results/full_synthetic_dataset.csv\")\n", "full_synthetic_dataset = full_synthetic_dataset[0:10]\n", "full_synthetic_dataset" ] }, { "cell_type": "code", "execution_count": 18, "id": "e1ac1a41", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionanswercontextsground_truthground_truth_contextsContextQualityContextQuality scoreContextQuality reasoningContextRelevanceContextRelevance scoresFaithfulnessFaithfulness scoreFaithfulness reasoningCompletenessCompleteness scoreCompleteness reasoningCorrectnessCorrectness scoreCorrectness reasoning
0So I been trying to understand how AVAP do the...Sure! When dealing with different numeric type...['6. Expressions in AVAP This chapter explains...In AVAP, when an arithmetic operator is descri...['6. Expressions in AVAP\\nThis chapter explain...VALID0.85The text contains the key information needed t...0.828481[0.8284809]UNFAITHFUL0.75The response contains several inaccuracies tha...INCOMPLETE0.85The output contains significant inaccuracies a...INCORRECT0.90The OUTPUT contains significant inaccuracies a...
1Hey, I'm trying to undrestand how arithmatic c...Certainly! When working with different numeric...['6. Expressions in AVAP This chapter explains...In AVAP, when an arithmetic operator is descri...['6. Expressions in AVAP\\nThis chapter explain...VALID0.95The first chunk of text explicitly addresses t...0.828277[0.82827663]UNFAITHFUL0.90The response contradicts the source in several...INCOMPLETE0.85The output contains significant inaccuracies a...INCORRECT0.95The OUTPUT contains numerous inaccuracies comp...
2Hey, I'm trying to undrestand how sliceing wor...Certainly! In AVAP, slicing (or indexing) an a...['6. Expressions in AVAP This chapter explains...In AVAP, when you perform a slice operation, t...['6. Expressions in AVAP\\nThis chapter explain...VALID0.92The text contains a dedicated 'Slices' section...0.647847[0.6478467]UNFAITHFUL0.95The response contains multiple claims that con...INCOMPLETE0.95The output text is largely fabricated and does...INCORRECT0.95The output contains numerous inaccuracies and ...
3How does the __call__ method relate to callabl...In AVAP, a `callable` object is an instance of...['6. Expressions in AVAP This chapter explains...In AVAP, any object that has a __call__() meth...['6. Expressions in AVAP\\nThis chapter explain...VALID0.85The text contains a relevant section titled 'C...0.682974[0.6829743]UNFAITHFUL0.85The response adds significant information not ...INCOMPLETE0.85The output focuses narrowly on the `__call__` ...INCORRECT0.70The OUTPUT introduces an inaccurate narrowing ...
4How does Decimel work in Python value comparis...In Python, comparisons for singletons (such as...['Appendix Function Glossary randomString() Th...In Python, decimal.Decimal (from the standard ...['Binary Arithmetic Operations\\nBinary arithme...INVALID0.20The text contains only a brief mention of deci...0.524859[0.5248593]UNFAITHFUL0.85The text contains several critical inaccuracie...INCOMPLETE0.80The output contains several inaccuracies and m...INCORRECT1.00The OUTPUT is completely unrelated to the REFE...
5How does the Decimal type from the standrd lib...The Decimal type from the standard library wor...['Appendix Function Glossary randomString() Th...According to the context, decimal.Decimal is o...['Binary Arithmetic Operations\\nBinary arithme...VALID0.72The third section contains a relevant passage ...0.532482[0.53248215]UNFAITHFUL0.85The text contains multiple contradictions with...INCOMPLETE0.90The output contains significant inaccuracies a...INCORRECT0.90The OUTPUT contains multiple significant inacc...
6Under what circumstances does a TypeError occu...A `TypeError` occurs during comparison operati...['Binary Arithmetic Operations Binary arithmet...A TypeError is generated when attempting to pe...['Binary Arithmetic Operations\\nBinary arithme...VALID0.85The text contains relevant information about T...0.632650[0.6326503]UNFAITHFUL0.90The response makes multiple claims that contra...INCOMPLETE0.85The output discusses TypeError in comparison o...INCORRECT0.85The OUTPUT introduces numerous claims not pres...
7According to the AVAP documentation, what does...PEP 8 recommends that comparisons should not b...['Appendix Function Glossary randomString() Th...PEP 8 advises that comparisons for singletons,...['Binary Arithmetic Operations\\nBinary arithme...INVALID0.00The question specifically asks about what PEP ...0.511431[0.5114307]UNFAITHFUL1.00The text discusses PEP 8 recommendations about...UNKNOWN0.50The text provided discusses PEP 8 recommendati...INCORRECT0.95The output fundamentally contradicts the refer...
8hey so i been reading about expression stateme...In AVAP (Advanced Virtual Application Platform...['Appendix Function Glossary randomString() Th...In interactive mode in AVAP, when an expressio...['Simple Statements\\nIn AVAP, a simple stateme...INVALID0.00The provided text contains extensive AVAP docu...0.638876[0.6388764]UNFAITHFUL1.00The response discusses `repr()` function in th...INCOMPLETE0.95The output describes `repr()` functionality in...INCORRECT0.85The OUTPUT introduces several inaccuracies and...
9AVAP programming language how do simple statem...In AVAP (Advanced Virtual Application Programm...['Simple Statements In AVAP, a simple statemen...In AVAP, a simple statement consists of a sing...['Simple Statements\\nIn AVAP, a simple stateme...VALID0.95The text provides comprehensive information ab...0.883411[0.8834107]UNFAITHFUL0.85The response contains several significant devi...INCOMPLETE0.90The OUTPUT is significantly incomplete compare...INCORRECT0.95The OUTPUT significantly contradicts and omits...
\n", "
" ], "text/plain": [ " question \\\n", "0 So I been trying to understand how AVAP do the... \n", "1 Hey, I'm trying to undrestand how arithmatic c... \n", "2 Hey, I'm trying to undrestand how sliceing wor... \n", "3 How does the __call__ method relate to callabl... \n", "4 How does Decimel work in Python value comparis... \n", "5 How does the Decimal type from the standrd lib... \n", "6 Under what circumstances does a TypeError occu... \n", "7 According to the AVAP documentation, what does... \n", "8 hey so i been reading about expression stateme... \n", "9 AVAP programming language how do simple statem... \n", "\n", " answer \\\n", "0 Sure! When dealing with different numeric type... \n", "1 Certainly! When working with different numeric... \n", "2 Certainly! In AVAP, slicing (or indexing) an a... \n", "3 In AVAP, a `callable` object is an instance of... \n", "4 In Python, comparisons for singletons (such as... \n", "5 The Decimal type from the standard library wor... \n", "6 A `TypeError` occurs during comparison operati... \n", "7 PEP 8 recommends that comparisons should not b... \n", "8 In AVAP (Advanced Virtual Application Platform... \n", "9 In AVAP (Advanced Virtual Application Programm... \n", "\n", " contexts \\\n", "0 ['6. Expressions in AVAP This chapter explains... \n", "1 ['6. Expressions in AVAP This chapter explains... \n", "2 ['6. Expressions in AVAP This chapter explains... \n", "3 ['6. Expressions in AVAP This chapter explains... \n", "4 ['Appendix Function Glossary randomString() Th... \n", "5 ['Appendix Function Glossary randomString() Th... \n", "6 ['Binary Arithmetic Operations Binary arithmet... \n", "7 ['Appendix Function Glossary randomString() Th... \n", "8 ['Appendix Function Glossary randomString() Th... \n", "9 ['Simple Statements In AVAP, a simple statemen... \n", "\n", " ground_truth \\\n", "0 In AVAP, when an arithmetic operator is descri... \n", "1 In AVAP, when an arithmetic operator is descri... \n", "2 In AVAP, when you perform a slice operation, t... \n", "3 In AVAP, any object that has a __call__() meth... \n", "4 In Python, decimal.Decimal (from the standard ... \n", "5 According to the context, decimal.Decimal is o... \n", "6 A TypeError is generated when attempting to pe... \n", "7 PEP 8 advises that comparisons for singletons,... \n", "8 In interactive mode in AVAP, when an expressio... \n", "9 In AVAP, a simple statement consists of a sing... \n", "\n", " ground_truth_contexts ContextQuality \\\n", "0 ['6. Expressions in AVAP\\nThis chapter explain... VALID \n", "1 ['6. Expressions in AVAP\\nThis chapter explain... VALID \n", "2 ['6. Expressions in AVAP\\nThis chapter explain... VALID \n", "3 ['6. Expressions in AVAP\\nThis chapter explain... VALID \n", "4 ['Binary Arithmetic Operations\\nBinary arithme... INVALID \n", "5 ['Binary Arithmetic Operations\\nBinary arithme... VALID \n", "6 ['Binary Arithmetic Operations\\nBinary arithme... VALID \n", "7 ['Binary Arithmetic Operations\\nBinary arithme... INVALID \n", "8 ['Simple Statements\\nIn AVAP, a simple stateme... INVALID \n", "9 ['Simple Statements\\nIn AVAP, a simple stateme... VALID \n", "\n", " ContextQuality score ContextQuality reasoning \\\n", "0 0.85 The text contains the key information needed t... \n", "1 0.95 The first chunk of text explicitly addresses t... \n", "2 0.92 The text contains a dedicated 'Slices' section... \n", "3 0.85 The text contains a relevant section titled 'C... \n", "4 0.20 The text contains only a brief mention of deci... \n", "5 0.72 The third section contains a relevant passage ... \n", "6 0.85 The text contains relevant information about T... \n", "7 0.00 The question specifically asks about what PEP ... \n", "8 0.00 The provided text contains extensive AVAP docu... \n", "9 0.95 The text provides comprehensive information ab... \n", "\n", " ContextRelevance ContextRelevance scores Faithfulness Faithfulness score \\\n", "0 0.828481 [0.8284809] UNFAITHFUL 0.75 \n", "1 0.828277 [0.82827663] UNFAITHFUL 0.90 \n", "2 0.647847 [0.6478467] UNFAITHFUL 0.95 \n", "3 0.682974 [0.6829743] UNFAITHFUL 0.85 \n", "4 0.524859 [0.5248593] UNFAITHFUL 0.85 \n", "5 0.532482 [0.53248215] UNFAITHFUL 0.85 \n", "6 0.632650 [0.6326503] UNFAITHFUL 0.90 \n", "7 0.511431 [0.5114307] UNFAITHFUL 1.00 \n", "8 0.638876 [0.6388764] UNFAITHFUL 1.00 \n", "9 0.883411 [0.8834107] UNFAITHFUL 0.85 \n", "\n", " Faithfulness reasoning Completeness \\\n", "0 The response contains several inaccuracies tha... INCOMPLETE \n", "1 The response contradicts the source in several... INCOMPLETE \n", "2 The response contains multiple claims that con... INCOMPLETE \n", "3 The response adds significant information not ... INCOMPLETE \n", "4 The text contains several critical inaccuracie... INCOMPLETE \n", "5 The text contains multiple contradictions with... INCOMPLETE \n", "6 The response makes multiple claims that contra... INCOMPLETE \n", "7 The text discusses PEP 8 recommendations about... UNKNOWN \n", "8 The response discusses `repr()` function in th... INCOMPLETE \n", "9 The response contains several significant devi... INCOMPLETE \n", "\n", " Completeness score Completeness reasoning \\\n", "0 0.85 The output contains significant inaccuracies a... \n", "1 0.85 The output contains significant inaccuracies a... \n", "2 0.95 The output text is largely fabricated and does... \n", "3 0.85 The output focuses narrowly on the `__call__` ... \n", "4 0.80 The output contains several inaccuracies and m... \n", "5 0.90 The output contains significant inaccuracies a... \n", "6 0.85 The output discusses TypeError in comparison o... \n", "7 0.50 The text provided discusses PEP 8 recommendati... \n", "8 0.95 The output describes `repr()` functionality in... \n", "9 0.90 The OUTPUT is significantly incomplete compare... \n", "\n", " Correctness Correctness score \\\n", "0 INCORRECT 0.90 \n", "1 INCORRECT 0.95 \n", "2 INCORRECT 0.95 \n", "3 INCORRECT 0.70 \n", "4 INCORRECT 1.00 \n", "5 INCORRECT 0.90 \n", "6 INCORRECT 0.85 \n", "7 INCORRECT 0.95 \n", "8 INCORRECT 0.85 \n", "9 INCORRECT 0.95 \n", "\n", " Correctness reasoning \n", "0 The OUTPUT contains significant inaccuracies a... \n", "1 The OUTPUT contains numerous inaccuracies comp... \n", "2 The output contains numerous inaccuracies and ... \n", "3 The OUTPUT introduces an inaccurate narrowing ... \n", "4 The OUTPUT is completely unrelated to the REFE... \n", "5 The OUTPUT contains multiple significant inacc... \n", "6 The OUTPUT introduces numerous claims not pres... \n", "7 The output fundamentally contradicts the refer... \n", "8 The OUTPUT introduces several inaccuracies and... \n", "9 The OUTPUT significantly contradicts and omits... " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "context_based_evals = Dataset.from_pandas(\n", " full_synthetic_dataset,\n", " descriptors=[ContextQualityLLMEval(question=\"question\", column_name=\"contexts\", provider=\"bedrock\", \n", " model=\"global.anthropic.claude-sonnet-4-6\", include_score=True, alias=\"ContextQuality\"),\n", " ContextRelevance(input=\"question\", contexts=\"contexts\", output_scores=True, \n", " aggregation_method=\"mean\", alias=\"ContextRelevance\"),\n", " FaithfulnessLLMEval(column_name=\"answer\", context=\"contexts\", provider=\"bedrock\", \n", " model=\"global.anthropic.claude-sonnet-4-6\", include_score=True, alias=\"Faithfulness\"),\n", " CompletenessLLMEval(column_name=\"answer\", context=\"contexts\", provider=\"bedrock\", \n", " model=\"global.anthropic.claude-sonnet-4-6\", include_score=True, alias=\"Completeness\"),\n", " CorrectnessLLMEval(column_name=\"answer\", target_output=\"ground_truth\", provider=\"bedrock\", \n", " model=\"global.anthropic.claude-sonnet-4-6\", include_score=True, alias=\"Correctness\")\n", " ],\n", " # options=OllamaOptions(api_url=OLLAMA_LOCAL_URL)\n", ")\n", "context_based_evals.as_dataframe()" ] }, { "cell_type": "code", "execution_count": null, "id": "9ff2705b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "assistance-engine", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }