assistance-engine/scratches/pseco/agent/n00 Run Evaluation.ipynb

214 lines
5.8 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "9f97dd1e",
"metadata": {},
"source": [
"# Libraries"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "9e974df6",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"import json\n",
"from datetime import datetime\n",
"from pathlib import Path\n",
"\n",
"from src.utils.llm_factory import create_chat_model\n",
"from src.utils.emb_factory import create_embedding_model\n",
"\n",
"from Docker.src.evaluate import run_evaluation\n",
"from elasticsearch import Elasticsearch\n",
"\n",
"# Ensure the project root is on the path so `src` is importable\n",
"_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n",
"if _project_root not in sys.path:\n",
" sys.path.insert(0, _project_root)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "d2f66015",
"metadata": {},
"outputs": [],
"source": [
"from src.config import settings"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "2e2ceb6f",
"metadata": {},
"outputs": [],
"source": [
"emb = [\"bge-m3:latest\", \"bge-m3:latest\"]\n",
"index = [\"avap-knowledge-v2-qwen\", \"avap-docs-test-v4\",\"avap-knowledge-v2-bge\", \"avap-docs-test-v4-bge\"]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "30edcecc",
"metadata": {},
"outputs": [],
"source": [
"llm = create_chat_model(\n",
" provider=\"ollama\",\n",
" model=settings.ollama_model_name,\n",
" temperature=0,\n",
" validate_model_on_init=True,\n",
")\n",
"\n",
"es_url = settings.elasticsearch_local_url\n",
"\n",
"es_kwargs: dict = {\"hosts\": [es_url], \"request_timeout\": 60}\n",
"\n",
"es_client = Elasticsearch(**es_kwargs)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "dceb22fc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PosixPath('/home/pseco/VsCodeProjects/assistance-engine')"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"settings.proj_root"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "94f5178d",
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append(str(settings.proj_root/\"Docker/src\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ad8d341e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2feda85fcc744b91b55fd72c4bf0540b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Evaluating: 0%| | 0/200 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
]
}
],
"source": [
"saved_files = []\n",
"\n",
"for embedding_model, index_group in zip(emb, [index[:2], index[2:]]):\n",
" embeddings = create_embedding_model(\n",
" provider=\"ollama\",\n",
" model=embedding_model,\n",
" )\n",
"\n",
" for index_name in index_group:\n",
" result = run_evaluation(\n",
" es_client=es_client,\n",
" llm=llm,\n",
" embeddings=embeddings,\n",
" index_name=index_name,\n",
" )\n",
"\n",
" timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
" safe_embedding = embedding_model.replace(\":\", \"-\").replace(\"/\", \"-\")\n",
" safe_index = index_name.replace(\":\", \"-\").replace(\"/\", \"-\")\n",
" output_path = (\n",
" settings.proj_root\n",
" / \"output\"\n",
" / f\"evaluation_{safe_embedding}_{safe_index}_{timestamp}.json\"\n",
" )\n",
" output_path.parent.mkdir(parents=True, exist_ok=True)\n",
"\n",
" payload = {\n",
" \"generated_at\": datetime.now().isoformat(),\n",
" \"embedding_model\": embedding_model,\n",
" \"index_name\": index_name,\n",
" \"result\": result,\n",
" }\n",
"\n",
" with output_path.open(\"w\", encoding=\"utf-8\") as fp:\n",
" json.dump(payload, fp, ensure_ascii=False, indent=2)\n",
"\n",
" saved_files.append(str(output_path))\n",
" print(f\"Saved: {output_path}\")\n",
"\n",
"saved_files"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "assistance-engine",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}