369 lines
13 KiB
Plaintext
369 lines
13 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9f97dd1e",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Libraries"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"id": "9e974df6",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sys\n",
|
|
"import json\n",
|
|
"from datetime import datetime\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"from src.utils.llm_factory import create_chat_model\n",
|
|
"from src.utils.emb_factory import create_embedding_model\n",
|
|
"\n",
|
|
"from Docker.src.evaluate import run_evaluation\n",
|
|
"from elasticsearch import Elasticsearch\n",
|
|
"\n",
|
|
"# Ensure the project root is on the path so `src` is importable\n",
|
|
"_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n",
|
|
"if _project_root not in sys.path:\n",
|
|
" sys.path.insert(0, _project_root)\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 16,
|
|
"id": "d2f66015",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from src.config import settings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "2e2ceb6f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"emb = [\"harrier-oss-v1:0.6b\"]\n",
|
|
"index = [\"avap-knowledge-v2-qwen\", \"avap-docs-test-v4\",\"avap-knowledge-v2-bge\", \"avap-docs-test-v4-bge\"]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"id": "30edcecc",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"llm = create_chat_model(\n",
|
|
" provider=\"ollama\",\n",
|
|
" model=settings.ollama_model_name,\n",
|
|
" temperature=0,\n",
|
|
" validate_model_on_init=True,\n",
|
|
")\n",
|
|
"\n",
|
|
"es_url = settings.elasticsearch_local_url\n",
|
|
"\n",
|
|
"es_kwargs: dict = {\"hosts\": [es_url], \"request_timeout\": 60}\n",
|
|
"\n",
|
|
"es_client = Elasticsearch(**es_kwargs)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 19,
|
|
"id": "dceb22fc",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"PosixPath('/home/pseco/VsCodeProjects/assistance-engine')"
|
|
]
|
|
},
|
|
"execution_count": 19,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"settings.proj_root"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"id": "94f5178d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import sys\n",
|
|
"sys.path.append(str(settings.proj_root/\"Docker/src\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"id": "ad8d341e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "2feda85fcc744b91b55fd72c4bf0540b",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Evaluating: 0%| | 0/200 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[97]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[117]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[121]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[137]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[153]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Saved: /home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-knowledge-v2-qwen_20260406_112342.json\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "554aa243310546fc99980133ad1db56e",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Evaluating: 0%| | 0/200 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[77]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[97]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[121]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[153]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Saved: /home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-docs-test-v4_20260406_113404.json\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "25a2ecc2a7a64ac4b463fb35b8430f88",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Evaluating: 0%| | 0/200 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[89]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[97]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[121]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[117]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[145]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[153]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[165]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Saved: /home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_114111.json\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
|
|
"[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "b63fdd0278034fe0bc6136f21179d789",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Evaluating: 0%| | 0/200 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[97]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[121]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[145]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[153]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n",
|
|
"Exception raised in Job[157]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Saved: /home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_114819.json\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"['/home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-knowledge-v2-qwen_20260406_112342.json',\n",
|
|
" '/home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-docs-test-v4_20260406_113404.json',\n",
|
|
" '/home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_114111.json',\n",
|
|
" '/home/pseco/VsCodeProjects/assistance-engine/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_114819.json']"
|
|
]
|
|
},
|
|
"execution_count": 21,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"saved_files = []\n",
|
|
"\n",
|
|
"for embedding_model, index_group in zip(emb, [index[:2], index[2:]]):\n",
|
|
" embeddings = create_embedding_model(\n",
|
|
" provider=\"ollama\",\n",
|
|
" model=embedding_model,\n",
|
|
" )\n",
|
|
"\n",
|
|
" for index_name in index_group:\n",
|
|
" result = run_evaluation(\n",
|
|
" es_client=es_client,\n",
|
|
" llm=llm,\n",
|
|
" embeddings=embeddings,\n",
|
|
" index_name=index_name,\n",
|
|
" )\n",
|
|
"\n",
|
|
" timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
|
|
" safe_embedding = embedding_model.replace(\":\", \"-\").replace(\"/\", \"-\")\n",
|
|
" safe_index = index_name.replace(\":\", \"-\").replace(\"/\", \"-\")\n",
|
|
" output_path = (\n",
|
|
" settings.proj_root\n",
|
|
" / \"output\"\n",
|
|
" / f\"evaluation_{safe_embedding}_{safe_index}_{timestamp}.json\"\n",
|
|
" )\n",
|
|
" output_path.parent.mkdir(parents=True, exist_ok=True)\n",
|
|
"\n",
|
|
" payload = {\n",
|
|
" \"generated_at\": datetime.now().isoformat(),\n",
|
|
" \"embedding_model\": embedding_model,\n",
|
|
" \"index_name\": index_name,\n",
|
|
" \"result\": result,\n",
|
|
" }\n",
|
|
"\n",
|
|
" with output_path.open(\"w\", encoding=\"utf-8\") as fp:\n",
|
|
" json.dump(payload, fp, ensure_ascii=False, indent=2)\n",
|
|
"\n",
|
|
" saved_files.append(str(output_path))\n",
|
|
" print(f\"Saved: {output_path}\")\n",
|
|
"\n",
|
|
"saved_files"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "assistance-engine",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|