{ "cells": [ { "cell_type": "markdown", "id": "66cbbaf8", "metadata": {}, "source": [ "# Libraries" ] }, { "cell_type": "code", "execution_count": 1, "id": "c01c19dc", "metadata": {}, "outputs": [], "source": [ "from typing import Dict, List, Union\n", "import numpy as np\n", "from langchain_ollama import OllamaEmbeddings\n", "from beir.datasets.data_loader import GenericDataLoader\n", "from beir.retrieval.search.dense import DenseRetrievalExactSearch\n", "from beir.retrieval.evaluation import EvaluateRetrieval\n", "from beir import util\n", "import json\n", "from datasets import load_dataset" ] }, { "cell_type": "markdown", "id": "ac011c1c", "metadata": {}, "source": [ "# Utils" ] }, { "cell_type": "code", "execution_count": 2, "id": "b83e7900", "metadata": {}, "outputs": [], "source": [ "class BEIROllamaEmbeddings:\n", " \"\"\"\n", " Adapter that makes LangChain's OllamaEmbeddings compatible with BEIR.\n", " \"\"\"\n", "\n", " def __init__(\n", " self,\n", " base_url: str,\n", " model: str,\n", " batch_size: int = 64,\n", " ) -> None:\n", " self.batch_size = batch_size\n", " self.embeddings = OllamaEmbeddings(\n", " base_url=base_url,\n", " model=model,\n", " )\n", "\n", " def _batch_embed(self, texts: List[str]) -> np.ndarray:\n", " vectors = []\n", "\n", " for i in range(0, len(texts), self.batch_size):\n", " batch = texts[i : i + self.batch_size]\n", " batch_vectors = self.embeddings.embed_documents(batch)\n", " vectors.extend(batch_vectors)\n", "\n", " return np.asarray(vectors, dtype=np.float32)\n", "\n", " def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:\n", " \"\"\"\n", " BEIR query encoder\n", " \"\"\"\n", " return self._batch_embed(queries)\n", "\n", " def encode_corpus(\n", " self,\n", " corpus: Union[List[Dict[str, str]], Dict[str, Dict[str, str]]],\n", " **kwargs,\n", " ) -> np.ndarray:\n", " \"\"\"\n", " BEIR corpus encoder\n", " \"\"\"\n", " if isinstance(corpus, dict):\n", " corpus = list(corpus.values())\n", "\n", " texts = []\n", " for doc in corpus:\n", " title = (doc.get(\"title\") or \"\").strip()\n", " text = (doc.get(\"text\") or \"\").strip()\n", "\n", " if title:\n", " texts.append(f\"{title}\\n{text}\")\n", " else:\n", " texts.append(text)\n", "\n", " return self._batch_embed(texts)" ] }, { "cell_type": "code", "execution_count": 3, "id": "af3eb66d", "metadata": {}, "outputs": [], "source": [ "def convert_hf_to_beir(hf_dataset):\n", " corpus, queries, qrels = {}, {}, {}\n", " \n", " for i, data in enumerate(hf_dataset):\n", " docid = f\"doc_{i}\"\n", " queryid = f\"q_{i}\"\n", " \n", " # El código es el documento (lo que el agente debe recuperar)\n", " corpus[docid] = {\"title\": data.get(\"func_name\", \"\"), \"text\": data['code']}\n", " \n", " # El docstring es la consulta (lo que el usuario pide)\n", " queries[queryid] = data['docstring']\n", " \n", " # Relación 1 a 1: la query i busca el código i\n", " qrels[queryid] = {docid: 1}\n", " \n", " return corpus, queries, qrels" ] }, { "cell_type": "markdown", "id": "c9528fb6", "metadata": {}, "source": [ "# Data" ] }, { "cell_type": "code", "execution_count": 4, "id": "230aae25", "metadata": {}, "outputs": [], "source": [ "raw_dataset = load_dataset(\"google/code_x_glue_tc_nl_code_search_adv\", split=\"test\")\n", "corpus, queries, qrels = convert_hf_to_beir(raw_dataset)" ] }, { "cell_type": "markdown", "id": "13050d31", "metadata": {}, "source": [ "# Test qwen3-0.6B-emb:latest" ] }, { "cell_type": "code", "execution_count": null, "id": "514540af", "metadata": {}, "outputs": [], "source": [ "model = BEIROllamaEmbeddings(\n", " base_url=\"http://localhost:11434\",\n", " model=\"qwen3-0.6B-emb:latest\",\n", " batch_size=64,\n", ")\n", "\n", "# Inicializar buscador y evaluador\n", "retriever = DenseRetrievalExactSearch(model, batch_size=64)\n", "evaluator = EvaluateRetrieval(retriever, score_function=\"cos_sim\")\n", "\n", "# Ejecutar recuperación\n", "results = evaluator.retrieve(corpus, queries)\n", "\n", "# Evaluar métricas (NDCG, MAP, Recall, Precision)\n", "ndcg, _map, recall, precision = evaluator.evaluate(\n", " qrels, results, [1, 3, 5, 10]\n", ")" ] }, { "cell_type": "code", "execution_count": 7, "id": "5c0f9845", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Resultados para CodeXGLUE:\n", "NDCG: {'NDCG@1': 0.94971, 'NDCG@3': 0.96956, 'NDCG@5': 0.97166, 'NDCG@10': 0.97342}\n", "MAP: {'MAP@1': 0.94971, 'MAP@3': 0.96504, 'MAP@5': 0.9662, 'MAP@10': 0.96694}\n", "Recall: {'Recall@1': 0.94971, 'Recall@3': 0.98251, 'Recall@5': 0.98761, 'Recall@10': 0.99297}\n", "Precision: {'P@1': 0.94971, 'P@3': 0.3275, 'P@5': 0.19752, 'P@10': 0.0993}\n" ] } ], "source": [ "print(f\"Resultados para CodeXGLUE:\")\n", "print(\"NDCG:\", ndcg)\n", "print(\"MAP:\", _map)\n", "print(\"Recall:\", recall)\n", "print(\"Precision:\", precision)" ] }, { "cell_type": "markdown", "id": "c4e643ca", "metadata": {}, "source": [ "# Test qwen2.5:1.5b" ] }, { "cell_type": "code", "execution_count": 8, "id": "5ced1c25", "metadata": {}, "outputs": [], "source": [ "model_q2 = BEIROllamaEmbeddings(\n", " base_url=\"http://localhost:11434\",\n", " model=\"qwen2.5:1.5b\",\n", " batch_size=64,\n", ")\n", "\n", "# Inicializar buscador y evaluador\n", "retriever_q2 = DenseRetrievalExactSearch(model_q2, batch_size=64)\n", "evaluator_q2 = EvaluateRetrieval(retriever_q2, score_function=\"cos_sim\")\n", "\n", "# Ejecutar recuperación\n", "results_q2 = evaluator_q2.retrieve(corpus, queries)\n", "\n", "# Evaluar métricas (NDCG, MAP, Recall, Precision)\n", "ndcg_qwen_2, _map_qwen_2, recall_qwen_2, precision_qwen_2 = evaluator_q2.evaluate(\n", " qrels, results_q2, [1, 3, 5, 10]\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "id": "6a95189e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Resultados para CodeXGLUE:\n", "NDCG: {'NDCG@1': 0.00031, 'NDCG@3': 0.00061, 'NDCG@5': 0.00086, 'NDCG@10': 0.00118}\n", "MAP: {'MAP@1': 0.00031, 'MAP@3': 0.00051, 'MAP@5': 0.00065, 'MAP@10': 0.00078}\n", "Recall: {'Recall@1': 0.00031, 'Recall@3': 0.00088, 'Recall@5': 0.00151, 'Recall@10': 0.0025}\n", "Precision: {'P@1': 0.00031, 'P@3': 0.00029, 'P@5': 0.0003, 'P@10': 0.00025}\n" ] } ], "source": [ "print(f\"Resultados para CodeXGLUE:\")\n", "print(\"NDCG:\", ndcg_qwen_2)\n", "print(\"MAP:\", _map_qwen_2)\n", "print(\"Recall:\", recall_qwen_2)\n", "print(\"Precision:\", precision_qwen_2)" ] }, { "cell_type": "markdown", "id": "3dad9811", "metadata": {}, "source": [ "# Save data" ] }, { "cell_type": "code", "execution_count": 10, "id": "f875dd8d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Resultados guardados en /home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_CodeXGlue_results.json\n" ] } ], "source": [ "results_data = {\n", " \"qwen3-0.6B-emb:latest\": {\n", " \"NDCG\": ndcg,\n", " \"MAP\": _map,\n", " \"Recall\": recall,\n", " \"Precision\": precision,\n", " },\n", " \"qwen2.5:1.5b\": {\n", " \"NDCG\": ndcg_qwen_2,\n", " \"MAP\": _map_qwen_2,\n", " \"Recall\": recall_qwen_2,\n", " \"Precision\": precision_qwen_2,\n", " }\n", "}\n", "\n", "output_file = \"/home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_CodeXGlue_results.json\"\n", "with open(output_file, \"w\") as f:\n", " json.dump(results_data, f, indent=2)\n", "\n", "print(f\"Resultados guardados en {output_file}\")" ] } ], "metadata": { "kernelspec": { "display_name": "assistance-engine", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }