assistance-engine/scratches/pseco/agent/n00 Run Evaluation.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "9f97dd1e",
   "metadata": {},
   "source": [
    "# Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "9e974df6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import json\n",
    "from datetime import datetime\n",
    "from pathlib import Path\n",
    "\n",
    "from src.utils.llm_factory import create_chat_model\n",
    "from src.utils.emb_factory import create_embedding_model\n",
    "\n",
    "from Docker.src.evaluate import run_evaluation\n",
    "from elasticsearch import Elasticsearch\n",
    "\n",
    "# Ensure the project root is on the path so `src` is importable\n",
    "_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n",
    "if _project_root not in sys.path:\n",
    "    sys.path.insert(0, _project_root)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "d2f66015",
   "metadata": {},
   "outputs": [],
   "source": [
    "from src.config import settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2e2ceb6f",
   "metadata": {},
   "outputs": [],
   "source": [
    "emb = [\"harrier-oss-v1:0.6b\"]\n",
    "index = [\"avap-knowledge-v2-qwen\", \"avap-docs-test-v4\",\"avap-knowledge-v2-bge\", \"avap-docs-test-v4-bge\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "30edcecc",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = create_chat_model(\n",
    "    provider=\"ollama\",\n",
    "    model=settings.ollama_model_name,\n",
    "    temperature=0,\n",
    "    validate_model_on_init=True,\n",
    ")\n",
    "\n",
    "es_url = settings.elasticsearch_local_url\n",
    "\n",
    "es_kwargs: dict = {\"hosts\": [es_url], \"request_timeout\": 60}\n",
    "\n",
    "es_client  = Elasticsearch(**es_kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "dceb22fc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PosixPath('/home/pseco/VsCodeProjects/assistance-engine')"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "settings.proj_root"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "94f5178d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(str(settings.proj_root/\"Docker/src\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ad8d341e",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "2feda85fcc744b91b55fd72c4bf0540b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[97]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[117]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[121]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[137]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[153]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved: /home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-knowledge-v2-qwen_20260406_112342.json\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "554aa243310546fc99980133ad1db56e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[77]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[97]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[121]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[153]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved: /home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-docs-test-v4_20260406_113404.json\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "25a2ecc2a7a64ac4b463fb35b8430f88",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[89]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[97]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[121]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[117]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[145]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[153]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[165]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved: /home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_114111.json\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
      "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b63fdd0278034fe0bc6136f21179d789",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[97]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[121]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[145]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[153]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
      "Exception raised in Job[157]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Saved: /home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_114819.json\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['/home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-knowledge-v2-qwen_20260406_112342.json',\n",
       " '/home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-docs-test-v4_20260406_113404.json',\n",
       " '/home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_114111.json',\n",
       " '/home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_114819.json']"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "saved_files = []\n",
    "\n",
    "for embedding_model, index_group in zip(emb, [index[:2], index[2:]]):\n",
    "    embeddings = create_embedding_model(\n",
    "        provider=\"ollama\",\n",
    "        model=embedding_model,\n",
    "    )\n",
    "\n",
    "    for index_name in index_group:\n",
    "        result = run_evaluation(\n",
    "            es_client=es_client,\n",
    "            llm=llm,\n",
    "            embeddings=embeddings,\n",
    "            index_name=index_name,\n",
    "        )\n",
    "\n",
    "        timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
    "        safe_embedding = embedding_model.replace(\":\", \"-\").replace(\"/\", \"-\")\n",
    "        safe_index = index_name.replace(\":\", \"-\").replace(\"/\", \"-\")\n",
    "        output_path = (\n",
    "            settings.proj_root\n",
    "            / \"output\"\n",
    "            / f\"evaluation_{safe_embedding}_{safe_index}_{timestamp}.json\"\n",
    "        )\n",
    "        output_path.parent.mkdir(parents=True, exist_ok=True)\n",
    "\n",
    "        payload = {\n",
    "            \"generated_at\": datetime.now().isoformat(),\n",
    "            \"embedding_model\": embedding_model,\n",
    "            \"index_name\": index_name,\n",
    "            \"result\": result,\n",
    "        }\n",
    "\n",
    "        with output_path.open(\"w\", encoding=\"utf-8\") as fp:\n",
    "            json.dump(payload, fp, ensure_ascii=False, indent=2)\n",
    "\n",
    "        saved_files.append(str(output_path))\n",
    "        print(f\"Saved: {output_path}\")\n",
    "\n",
    "saved_files"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "assistance-engine",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}