working on evaluating embeddings

This commit is contained in:
pseco 2026-02-24 16:53:37 +01:00
parent 0d6c08e341
commit d6ac7aa1ca
4 changed files with 168 additions and 121 deletions

File diff suppressed because one or more lines are too long

View File

@ -174,11 +174,33 @@
"# Evaluar métricas (NDCG, MAP, Recall, Precision)\n", "# Evaluar métricas (NDCG, MAP, Recall, Precision)\n",
"ndcg, _map, recall, precision = evaluator.evaluate(\n", "ndcg, _map, recall, precision = evaluator.evaluate(\n",
" qrels, results, [1, 3, 5, 10]\n", " qrels, results, [1, 3, 5, 10]\n",
")\n", ")"
"\n", ]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "5c0f9845",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Resultados para CodeXGLUE:\n",
"NDCG: {'NDCG@1': 0.94971, 'NDCG@3': 0.96956, 'NDCG@5': 0.97166, 'NDCG@10': 0.97342}\n",
"MAP: {'MAP@1': 0.94971, 'MAP@3': 0.96504, 'MAP@5': 0.9662, 'MAP@10': 0.96694}\n",
"Recall: {'Recall@1': 0.94971, 'Recall@3': 0.98251, 'Recall@5': 0.98761, 'Recall@10': 0.99297}\n",
"Precision: {'P@1': 0.94971, 'P@3': 0.3275, 'P@5': 0.19752, 'P@10': 0.0993}\n"
]
}
],
"source": [
"print(f\"Resultados para CodeXGLUE:\")\n", "print(f\"Resultados para CodeXGLUE:\")\n",
"print(\"NDCG@10:\", ndcg[\"NDCG@10\"])\n", "print(\"NDCG:\", ndcg)\n",
"print(\"Recall@10:\", recall[\"Recall@10\"])" "print(\"MAP:\", _map)\n",
"print(\"Recall:\", recall)\n",
"print(\"Precision:\", precision)"
] ]
}, },
{ {
@ -191,43 +213,99 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 8,
"id": "5ced1c25", "id": "5ced1c25",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"name": "stdout",
"output_type": "stream",
"text": [
"NDCG: {'NDCG@1': 0.02333, 'NDCG@3': 0.03498, 'NDCG@5': 0.0404, 'NDCG@10': 0.04619, 'NDCG@100': 0.07768}\n",
"MAP: {'MAP@1': 0.02083, 'MAP@3': 0.03083, 'MAP@5': 0.03375, 'MAP@10': 0.03632, 'MAP@100': 0.04123}\n",
"Recall: {'Recall@1': 0.02083, 'Recall@3': 0.04417, 'Recall@5': 0.0575, 'Recall@10': 0.07417, 'Recall@100': 0.23144}\n",
"Precision: {'P@1': 0.02333, 'P@3': 0.01556, 'P@5': 0.01267, 'P@10': 0.00833, 'P@100': 0.00277}\n"
]
}
],
"source": [ "source": [
"model = BEIROllamaEmbeddings(\n", "model_q2 = BEIROllamaEmbeddings(\n",
" base_url=\"http://localhost:11434\",\n", " base_url=\"http://localhost:11434\",\n",
" model=\"qwen2.5:1.5b\",\n", " model=\"qwen2.5:1.5b\",\n",
" batch_size=64,\n", " batch_size=64,\n",
")\n", ")\n",
"\n", "\n",
"# Inicializar buscador y evaluador\n", "# Inicializar buscador y evaluador\n",
"retriever = DenseRetrievalExactSearch(model, batch_size=64)\n", "retriever_q2 = DenseRetrievalExactSearch(model_q2, batch_size=64)\n",
"evaluator = EvaluateRetrieval(retriever, score_function=\"cos_sim\")\n", "evaluator_q2 = EvaluateRetrieval(retriever_q2, score_function=\"cos_sim\")\n",
"\n", "\n",
"# Ejecutar recuperación\n", "# Ejecutar recuperación\n",
"results = evaluator.retrieve(corpus, queries)\n", "results_q2 = evaluator_q2.retrieve(corpus, queries)\n",
"\n", "\n",
"# Evaluar métricas (NDCG, MAP, Recall, Precision)\n", "# Evaluar métricas (NDCG, MAP, Recall, Precision)\n",
"ndcg, _map, recall, precision = evaluator.evaluate(\n", "ndcg_qwen_2, _map_qwen_2, recall_qwen_2, precision_qwen_2 = evaluator_q2.evaluate(\n",
" qrels, results, [1, 3, 5, 10]\n", " qrels, results_q2, [1, 3, 5, 10]\n",
")\n", ")"
"\n", ]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "6a95189e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Resultados para CodeXGLUE:\n",
"NDCG: {'NDCG@1': 0.00031, 'NDCG@3': 0.00061, 'NDCG@5': 0.00086, 'NDCG@10': 0.00118}\n",
"MAP: {'MAP@1': 0.00031, 'MAP@3': 0.00051, 'MAP@5': 0.00065, 'MAP@10': 0.00078}\n",
"Recall: {'Recall@1': 0.00031, 'Recall@3': 0.00088, 'Recall@5': 0.00151, 'Recall@10': 0.0025}\n",
"Precision: {'P@1': 0.00031, 'P@3': 0.00029, 'P@5': 0.0003, 'P@10': 0.00025}\n"
]
}
],
"source": [
"print(f\"Resultados para CodeXGLUE:\")\n", "print(f\"Resultados para CodeXGLUE:\")\n",
"print(\"NDCG@10:\", ndcg[\"NDCG@10\"])\n", "print(\"NDCG:\", ndcg_qwen_2)\n",
"print(\"Recall@10:\", recall[\"Recall@10\"])" "print(\"MAP:\", _map_qwen_2)\n",
"print(\"Recall:\", recall_qwen_2)\n",
"print(\"Precision:\", precision_qwen_2)"
]
},
{
"cell_type": "markdown",
"id": "3dad9811",
"metadata": {},
"source": [
"# Save data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f875dd8d",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Resultados guardados en /home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_CodeXGlue_results.json\n"
]
}
],
"source": [
"results_data = {\n",
" \"qwen3-0.6B-emb:latest\": {\n",
" \"NDCG\": ndcg,\n",
" \"MAP\": _map,\n",
" \"Recall\": recall,\n",
" \"Precision\": precision,\n",
" },\n",
" \"qwen2.5:1.5b\": {\n",
" \"NDCG\": ndcg_qwen_2,\n",
" \"MAP\": _map_qwen_2,\n",
" \"Recall\": recall_qwen_2,\n",
" \"Precision\": precision_qwen_2,\n",
" }\n",
"}\n",
"\n",
"output_file = \"/home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_CodeXGlue_results.json\"\n",
"with open(output_file, \"w\") as f:\n",
" json.dump(results_data, f, indent=2)\n",
"\n",
"print(f\"Resultados guardados en {output_file}\")"
] ]
} }
], ],

View File

@ -10,7 +10,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 15,
"id": "c01c19dc", "id": "c01c19dc",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -252,6 +252,51 @@
"print(\"Recall:\", recall_qwen_2)\n", "print(\"Recall:\", recall_qwen_2)\n",
"print(\"Precision:\", precision_qwen_2)" "print(\"Precision:\", precision_qwen_2)"
] ]
},
{
"cell_type": "markdown",
"id": "b9402837",
"metadata": {},
"source": [
"# Save Data"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "c281d5e1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Resultados guardados en /home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_Scifact_results.json\n"
]
}
],
"source": [
"results_data = {\n",
" \"qwen3-0.6B-emb:latest\": {\n",
" \"NDCG\": ndcg,\n",
" \"MAP\": _map,\n",
" \"Recall\": recall,\n",
" \"Precision\": precision,\n",
" },\n",
" \"qwen2.5:1.5b\": {\n",
" \"NDCG\": ndcg_qwen_2,\n",
" \"MAP\": _map_qwen_2,\n",
" \"Recall\": recall_qwen_2,\n",
" \"Precision\": precision_qwen_2,\n",
" }\n",
"}\n",
"\n",
"output_file = \"/home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_Scifact_results.json\"\n",
"with open(output_file, \"w\") as f:\n",
" json.dump(results_data, f, indent=2)\n",
"\n",
"print(f\"Resultados guardados en {output_file}\")"
]
} }
], ],
"metadata": { "metadata": {

View File

@ -261,14 +261,6 @@
"print(\"Recall:\", recall_qwen_2)\n", "print(\"Recall:\", recall_qwen_2)\n",
"print(\"Precision:\", precision_qwen_2)" "print(\"Precision:\", precision_qwen_2)"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1db7d110",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {