working on evaluating embeddings
This commit is contained in:
parent
0d6c08e341
commit
d6ac7aa1ca
File diff suppressed because one or more lines are too long
|
|
@ -174,11 +174,33 @@
|
|||
"# Evaluar métricas (NDCG, MAP, Recall, Precision)\n",
|
||||
"ndcg, _map, recall, precision = evaluator.evaluate(\n",
|
||||
" qrels, results, [1, 3, 5, 10]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "5c0f9845",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Resultados para CodeXGLUE:\n",
|
||||
"NDCG: {'NDCG@1': 0.94971, 'NDCG@3': 0.96956, 'NDCG@5': 0.97166, 'NDCG@10': 0.97342}\n",
|
||||
"MAP: {'MAP@1': 0.94971, 'MAP@3': 0.96504, 'MAP@5': 0.9662, 'MAP@10': 0.96694}\n",
|
||||
"Recall: {'Recall@1': 0.94971, 'Recall@3': 0.98251, 'Recall@5': 0.98761, 'Recall@10': 0.99297}\n",
|
||||
"Precision: {'P@1': 0.94971, 'P@3': 0.3275, 'P@5': 0.19752, 'P@10': 0.0993}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(f\"Resultados para CodeXGLUE:\")\n",
|
||||
"print(\"NDCG@10:\", ndcg[\"NDCG@10\"])\n",
|
||||
"print(\"Recall@10:\", recall[\"Recall@10\"])"
|
||||
"print(\"NDCG:\", ndcg)\n",
|
||||
"print(\"MAP:\", _map)\n",
|
||||
"print(\"Recall:\", recall)\n",
|
||||
"print(\"Precision:\", precision)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -191,43 +213,99 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"id": "5ced1c25",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"NDCG: {'NDCG@1': 0.02333, 'NDCG@3': 0.03498, 'NDCG@5': 0.0404, 'NDCG@10': 0.04619, 'NDCG@100': 0.07768}\n",
|
||||
"MAP: {'MAP@1': 0.02083, 'MAP@3': 0.03083, 'MAP@5': 0.03375, 'MAP@10': 0.03632, 'MAP@100': 0.04123}\n",
|
||||
"Recall: {'Recall@1': 0.02083, 'Recall@3': 0.04417, 'Recall@5': 0.0575, 'Recall@10': 0.07417, 'Recall@100': 0.23144}\n",
|
||||
"Precision: {'P@1': 0.02333, 'P@3': 0.01556, 'P@5': 0.01267, 'P@10': 0.00833, 'P@100': 0.00277}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model = BEIROllamaEmbeddings(\n",
|
||||
"model_q2 = BEIROllamaEmbeddings(\n",
|
||||
" base_url=\"http://localhost:11434\",\n",
|
||||
" model=\"qwen2.5:1.5b\",\n",
|
||||
" batch_size=64,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Inicializar buscador y evaluador\n",
|
||||
"retriever = DenseRetrievalExactSearch(model, batch_size=64)\n",
|
||||
"evaluator = EvaluateRetrieval(retriever, score_function=\"cos_sim\")\n",
|
||||
"retriever_q2 = DenseRetrievalExactSearch(model_q2, batch_size=64)\n",
|
||||
"evaluator_q2 = EvaluateRetrieval(retriever_q2, score_function=\"cos_sim\")\n",
|
||||
"\n",
|
||||
"# Ejecutar recuperación\n",
|
||||
"results = evaluator.retrieve(corpus, queries)\n",
|
||||
"results_q2 = evaluator_q2.retrieve(corpus, queries)\n",
|
||||
"\n",
|
||||
"# Evaluar métricas (NDCG, MAP, Recall, Precision)\n",
|
||||
"ndcg, _map, recall, precision = evaluator.evaluate(\n",
|
||||
" qrels, results, [1, 3, 5, 10]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"ndcg_qwen_2, _map_qwen_2, recall_qwen_2, precision_qwen_2 = evaluator_q2.evaluate(\n",
|
||||
" qrels, results_q2, [1, 3, 5, 10]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "6a95189e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Resultados para CodeXGLUE:\n",
|
||||
"NDCG: {'NDCG@1': 0.00031, 'NDCG@3': 0.00061, 'NDCG@5': 0.00086, 'NDCG@10': 0.00118}\n",
|
||||
"MAP: {'MAP@1': 0.00031, 'MAP@3': 0.00051, 'MAP@5': 0.00065, 'MAP@10': 0.00078}\n",
|
||||
"Recall: {'Recall@1': 0.00031, 'Recall@3': 0.00088, 'Recall@5': 0.00151, 'Recall@10': 0.0025}\n",
|
||||
"Precision: {'P@1': 0.00031, 'P@3': 0.00029, 'P@5': 0.0003, 'P@10': 0.00025}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(f\"Resultados para CodeXGLUE:\")\n",
|
||||
"print(\"NDCG@10:\", ndcg[\"NDCG@10\"])\n",
|
||||
"print(\"Recall@10:\", recall[\"Recall@10\"])"
|
||||
"print(\"NDCG:\", ndcg_qwen_2)\n",
|
||||
"print(\"MAP:\", _map_qwen_2)\n",
|
||||
"print(\"Recall:\", recall_qwen_2)\n",
|
||||
"print(\"Precision:\", precision_qwen_2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "3dad9811",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Save data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "f875dd8d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Resultados guardados en /home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_CodeXGlue_results.json\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results_data = {\n",
|
||||
" \"qwen3-0.6B-emb:latest\": {\n",
|
||||
" \"NDCG\": ndcg,\n",
|
||||
" \"MAP\": _map,\n",
|
||||
" \"Recall\": recall,\n",
|
||||
" \"Precision\": precision,\n",
|
||||
" },\n",
|
||||
" \"qwen2.5:1.5b\": {\n",
|
||||
" \"NDCG\": ndcg_qwen_2,\n",
|
||||
" \"MAP\": _map_qwen_2,\n",
|
||||
" \"Recall\": recall_qwen_2,\n",
|
||||
" \"Precision\": precision_qwen_2,\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"output_file = \"/home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_CodeXGlue_results.json\"\n",
|
||||
"with open(output_file, \"w\") as f:\n",
|
||||
" json.dump(results_data, f, indent=2)\n",
|
||||
"\n",
|
||||
"print(f\"Resultados guardados en {output_file}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 15,
|
||||
"id": "c01c19dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -252,6 +252,51 @@
|
|||
"print(\"Recall:\", recall_qwen_2)\n",
|
||||
"print(\"Precision:\", precision_qwen_2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b9402837",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Save Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"id": "c281d5e1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Resultados guardados en /home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_Scifact_results.json\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"results_data = {\n",
|
||||
" \"qwen3-0.6B-emb:latest\": {\n",
|
||||
" \"NDCG\": ndcg,\n",
|
||||
" \"MAP\": _map,\n",
|
||||
" \"Recall\": recall,\n",
|
||||
" \"Precision\": precision,\n",
|
||||
" },\n",
|
||||
" \"qwen2.5:1.5b\": {\n",
|
||||
" \"NDCG\": ndcg_qwen_2,\n",
|
||||
" \"MAP\": _map_qwen_2,\n",
|
||||
" \"Recall\": recall_qwen_2,\n",
|
||||
" \"Precision\": precision_qwen_2,\n",
|
||||
" }\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"output_file = \"/home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_Scifact_results.json\"\n",
|
||||
"with open(output_file, \"w\") as f:\n",
|
||||
" json.dump(results_data, f, indent=2)\n",
|
||||
"\n",
|
||||
"print(f\"Resultados guardados en {output_file}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
|
|
@ -261,14 +261,6 @@
|
|||
"print(\"Recall:\", recall_qwen_2)\n",
|
||||
"print(\"Precision:\", precision_qwen_2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1db7d110",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
|
|
|||
Loading…
Reference in New Issue