From d6ac7aa1ca58b8877cc2c4462de6d44c7bf33b6b Mon Sep 17 00:00:00 2001 From: pseco Date: Tue, 24 Feb 2026 16:53:37 +0100 Subject: [PATCH] working on evaluating embeddings --- .../agent/n00 langgraph_agent_simple.ipynb | 102 +++----------- .../n00 Beir Analysis CodeXGlue.ipynb | 132 ++++++++++++++---- .../embeddings/n00 Beir Analysis.ipynb | 47 ++++++- .../embeddings/n00 Beir Analysis_cosqa.ipynb | 8 -- 4 files changed, 168 insertions(+), 121 deletions(-) diff --git a/scratches/pseco/agent/n00 langgraph_agent_simple.ipynb b/scratches/pseco/agent/n00 langgraph_agent_simple.ipynb index 7826a6e..b688f47 100644 --- a/scratches/pseco/agent/n00 langgraph_agent_simple.ipynb +++ b/scratches/pseco/agent/n00 langgraph_agent_simple.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "9e974df6", "metadata": {}, "outputs": [], @@ -34,19 +34,10 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "30edcecc", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DEBUG: LANGFUSE_HOST from env = http://45.77.119.180\n", - "DEBUG: Langfuse client base_url = NOT SET\n" - ] - } - ], + "outputs": [], "source": [ "ES_URL = os.getenv(\"ELASTICSEARCH_LOCAL_URL\")\n", "INDEX_NAME = os.getenv(\"ELASTICSEARCH_INDEX\")\n", @@ -81,24 +72,10 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "ad98841b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Langfuse Configuration:\n", - " Host: http://45.77.119.180\n", - " Public Key: **********\n", - " Secret Key: **********\n", - "\n", - "✓ All Langfuse environment variables are set\n", - " Tracing will be sent to Langfuse when you run the agent\n" - ] - } - ], + "outputs": [], "source": [ "print(f\"Langfuse Configuration:\")\n", "print(f\" Host: {LANGFUSE_HOST}\")\n", @@ -123,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "5f8c88cf", "metadata": {}, "outputs": [], @@ -142,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "f9359747", "metadata": {}, "outputs": [], @@ -174,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "e5247ab9", "metadata": {}, "outputs": [], @@ -189,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "a644f6fa", "metadata": {}, "outputs": [], @@ -209,7 +186,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "36d0f54e", "metadata": {}, "outputs": [], @@ -242,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "fae46a58", "metadata": {}, "outputs": [], @@ -260,21 +237,10 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "2fec3fdb", "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAANgAAAERCAIAAACW0v5yAAAQAElEQVR4nOydB3xTVfvHn3tvRvfeLW0pZbVlCsgLgmyVWZa+TBV52fxBBUSRoYIIojhAFAUZgqCCLEFUppRZkC2ztLSldK+Upk2T+39ubglpSRdtkpPkfD98ys29Jzdp88s5zzjnORKe54FCMTcSoFAIgAqRQgRUiBQioEKkEAEVIoUIqBApRECFWAOSbiqvxeZlpxYXKzWaEl6jLt+AYYHXGDiJPH4eOABDd9DwPMMz5W+LUTb9k3jI8KBhyj+fLX9SImdkdqyjszQw3L7Fsy5AKgyNI1bJv6cUZw9m5WWXaNQajmOkdqydHccwoC4pLy5GwvAl5f+eghAZ4B/THMexavVjd+AY0EC5D4VhUXSoMP1TQsvHXwvwtcreEt+tugRUxXzRA7W6hJc7sMGNHXuN9AHCoEKsjBux+Ud2ZKiKeK8AWatnPRq2dgBLprAQ/t6Wevf6A+zR64U79BvvD8RAhVghmxYn5mYUN2rl3IO8/qOWJPxbeHBrmqpIPXhqsGcAEeYZFaJhVs287eIuHfFOMFgvp/fnnv0rI7KDW+eBnmBuqBANsGrW7eadPDr2cwcb4OvZcX3GBNRrZAdmhQqxPKtmxnWK9onq6AQ2w7dv3wlr7tx9mBeYDxYoeqx+O65VVw+bUiHyv8X1b57PvX5GAeaDCvERm5cmOrlJ2vd2A9vjhVcCDvyUCuaDCrGUhKvKnNSi4W9Zs3dSCSFN7T395T8uSQQzQYVYyoEf74dEOIMN89IbQZmpRfkZGjAHVIgCiTeLlQ/UfV7zBdvGy1++Z20ymAMqRIFjv6Y5e8nAtMyePXvnzp1QQ27fvt23b18wDh36e2elFYE5oEIUyM0sjmxv6gkBV69ehZrzZM+qJsGN7RiGOftXLpgcGkcERZZm/aI7kz9pAMYhJiZmw4YNV65c8fLyatGixdSpU/GgTZs24lUnJ6fDhw9jP/fLL7+cOXPm3r17YWFh0dHRQ4YMERt079597NixBw8e/Oeff0aNGrVx40bx/Ouvvz5ixAioazZ+mGDvwA2ZHgSmhU4Dg6un8zgJA8bh2rVr06ZNmzBhwnvvvRcXF/fll18uWLBgxYoVqM6OHTvOnTt3wIAB2OyTTz5BCc6ZMwc7pPj4+CVLlvj7+2MDvCSVSn/99dd27dqhHJ966ils8Mcff+zZsweMg4evPC1JCSaHChEy7imldsYyUc6fP29nZzdmzBiWZf38/CIiIm7duvV4s8WLFxcUFAQEBOAxdpa7du06fvy4KERUnqur64wZM8AkuPvIkm49AJNDhQhFDzQSqbF6xJYtWyqVyunTpz/99NOdO3euV6+eblDWBw2kLVu2YDeZkJAgngkMDNRdRfmCqXBw4dQaM1hr1FkBtTD52Vh/+iZNmnzxxRfe3t44KA8cOHDSpEkXLlwo10aj0eDwjQbilClTDh06FBsbi6akfgOZzHQePcsIk27B5FAhgoODRGPMIG6HDh3QFty9ezdah7m5udg7lpSU6DdAOxJdGXQ+unbt6uwsBNXz8/PBTBQq1IwZdEiFCODqLSkqNJYSz549i9YeHmCniPG/N998E0WWkpKi3yYnJwd/+viUTr+N0wJmIvO+SsqZQRVUiNCwlYtaZSwh4kA8a9as7du3Z2dnX758GQ1BVCR6xHK5HJV38uRJHIiDg4MlEgnGZfLy8tBl/vjjj9u3b19OrDqwcUZGBkZ8dNZk3ZJ5v8jBlQOTQ4UIPvWk6JtePZEHRmDkyJFoGi5btqxnz57jxo1zdHRcvXo1yg4voSuNdiH2kegUL1y48NKlS926dcMBevLkyRhERNXqQon6PPPMM+gAoRO9f/9+MAL5WcVBjcywNIcGtAW+XxAvs2NHzLbRqTc6clJLNn50Z+ryhmByaI8o8PRzXnnZKrB59m24Z+9khnEZaBxRJOI/Toe3pR7cmt7tJW+DDdDbFVMgj4M5OoXC8NxmTNatXbsWjMM6LVDDt4RB8kWLFkEFZNwr6jMmAMwBHZpLOXsg9+Te9MmfhBu8iqG++/fvG7yE8WrMnRi8hLagzheuc/K1QA3fEjpJnp6G1+zt+DolN6345XkhYA6oEB+xZt4dd2/poKmmzveTAK+Br2bequh7aAKojfiI196vfz+hKOGqGVL+Zmf1nDuR7c25WIcKsQyj3grbu848U5TNyMYPEj18ZF2GmnM5KR2ay6NUaNYsuDN8VrC7jxRsgG+xL/yPS4e+Zi72QIVoAEWOZt37ceHNnZ9/xZpXseSmq7cuT/Dwlw+ZGgjmhgqxQla/E8cwzLODvRu1tsL19j9/lpyWVNi6i+d/+hJRWYUKsTIObE6/fi5XJudCmzn1+K83WD7/nsz/52hOTlqxi6d05NsEZZKoEKvmz42p8dcKipUalmMcXaQOzpy9I8dIGXXxo+KbnIQtrdvJYCxEe4Zj1Oqyf1sGOLb8SZYVKnmqy8wLw5NCzVhebeCj4fB1VbzuuboJbJxEuIn+GREJx5aUwIN81YM8tfKBGljG3Uf2wqgAVx+y/FQqxGqjhMN70tMSihR5Jag5XlNGUizHa9Rl6w0Lf9ryM/sYluc15ZppyxVrJY1hc1Z4zGirHZdvKcJxvPrhC2Fb3afHcsJN9M/o2ktknJ096+ojb9rWOawZobVGqRAJYvjw4QsWLGjUqBHYHjTXTBAlJSXiDDEbhAqRIKgQKURAhUghApVKJZXaRDrncagQCYL2iBQioEKkEAEVIoUIqI1IIQK1Wk17RIqZQRVynHlW0JEAFSIp2LKBCFSI5ECFSCEC9FSoECnmh/aIFCKgQqQQARUihQhsOZoNVIjkQHtEChFQIVKIgAqRQgRUiBQioM4KhQhoj0ghAoZh3N2JKENjFqgQSYFl2YyMDLBVqBBJAcflcluj2RRUiKSAQlSr1WCrUCGSAu0RKURAhUghAipEChFQIVKIgAqRQgRUiBQioEKkEAEVIoUIqBApRECFSCECKkQKEXAcZ8u5ZrpNLkGgFm22U6RCJAhbHp3pzlPmp2XLlizLMoywsZlGoxEPRo8ePX36dLAZaI9ofpo0aSIKEcHRGY/r1as3bNgwsCWoEM3P4MGDZTKZ/plOnTr5+lrznuWPQ4VofoYOHRoaGqp7iBLEM2BjUCESwfDhwx0cSjewbdu2bUhICNgYVIhE0LdvX7FTxO4QRQm2B/Waa0BBLpzZn1FYgDGWMtvEsxJGo+ah7B8SfQ4Nr9E/yYjfer7M3t4MK/jIDPCpaenXrl3z9PSMaBohPJ0TPhpeU/494GsB3vex83hzBhiNxvCnKbeXBIY7RrZ3BFKhQqwumz66m5epksk5FKFGVUYIDKfdbb7sH1LYrJ4ve1K3Ib2+EBleuIANNYLABM9Z247hhDPw2IeD5wUpPyZE/CRR03wFqRm5HatS8ZwEBkwI9A6SAXlQIVaLH5cmaUqY/pMDwZK58nfe+aMZQ18P8vQnTotUiFWzeUkSy7F9/hcAlo8iB3asjJu4NAwIgzorVVCYBbkZRdahQsTJDZzdZNtWpABhUCFWwZnDmRK5Ve1M5h1kl5NeBIRBp4FVQWG+WlNiZdYLX6LUAGFQIVaBmteo1cR9bLWBF34j4r5aVIg2B5neKRWizSGEvhkgDSpEm0NI95DXKVIh2hwEdodAhVglmKljrCvGRWYIgAqxKnjG2nJP1FmxRAQVWpcQS2dVEAYVos3B8yR+s6gQbQ6hR2RpQJtiboQeUUPc4EyFWAUsK/yzJoQekQa0LQ7e6rxmnicxoE2ngVWBsHCEYCG+9/7svft2guVDhWjZXL9+FawCOjTXPQqF4udffjh95kR8/G1PD68OHZ4d8+pEOzs7vJSdnbX4o3lXrl4Mrhc6YMDQpKS7fx87tP77X0C7Te6atV+dPHUsLe1+VFTLgQNebN/+GfGG0YN6vPrKhNzcnPUbVtvb27dt858pk2d4enp17d4Gr3687INVXy/fvfMwWDK0R6wSvqam/fZft2z+cd1LL476cNFn48dPO3zkTxSQeGnpsvfvJsZ/vPSrhR98eupUDP5jH7pCX3y59JdtmwdGv7R50+5nO3ef/96sI0cPiJekUunWrRuw5Y5fD6z/ftuly+fXrf8Gz/++NwZ/zpwxt0YqpM6KZaJd71kjXhw6EpUUElJffHj58oXTZ46PH/d/2KWdPHls6pSZEU2j8Pybb7w7bHhfL28fPC4qKtr/x57hw17p328wPuz9wgB81oaN3+J9xJsEBtYbOWKMcOTkjD3ijRv/wpNCprNChVgVNU/xYQd2JvbER0vm37p9Q6x36O7ugT9vx93En1FRLcRmTk5OrVu3ww4Sj1FYxcXFqDDdTVq2eGrf77ty83JdXVzxYaNGTXWXnJ1dCgoU8KQwRAakqBDrntXffrl37w4clFFYvr5+361ZKTq2+fl5+NPR0UnX0kUrMhDMynz8OXXaa+VulZ2VKQqRqbvRFLtDDXldIhViHYPBnt17tg0ZPLxvn4HiGVFkiFwu+Cuq4mJd4+ycLPHA08sbhMF6Dg7B+nfz8fGDun+Lgt0LhEGFWAWCiViTgQzH4sLCQi8vH/EhDrjHTxwVj+vVE2p83Ym/HRoqrG9H5/rcudO+vv54HBQYLJfL8aBVyzZiY/SvUdO6EmF1CJnOCvWaq4SpkY2IBmJwcCiad8n3ktA7QTe5WVRLHJQLCgoCA4LQg0EPGi+hCj/7fLG/f2kNExTcKy+PR+/k0qXzqF30l2fMmvTZ5x9V/lqoXW9vn9jYk/+cj61+2J1mViySJ5iPOHfOh3Zyu1deHTJydPRTrduNHTsFHw4c3CPl/r1ZM+ZhFGbU6IGvvzEO/Y+oyBZSiVR81n9fGj1zxrzNW9b1G9Dl8y+WBPgHvfnmu1W+1ojhY879c2buvDc1Gste80pr31TB3nX3468UjHq3AdQF2EcqlUr0YMSHb8+ZLuEkH7y/DExIzM7UO5cUEz+um9+orqA9oknB1DD2hZhNQUVu/GHN2bOn+vcfAqaFruKzSOp28dT8+Us+Xvb+t9+tSE9PDQmuP3/uR23btAcKFWKV8BqGrzvrC4OCC9//BMwKXWBPIQJacoRCBjTXbIkImVmOyNoITwqt9GCRCDXWySviVhvo0EyhVAgVIoUIqBCrgJWARGptNiIN31gemhIoUVmbjUi9ZgrFMFSIFCKgQqwCqYyRyq3KRpRKJTJ74naOobNvqsDX30Gjtioh5uUUy+2I+9ypEKugeVdntO3jLz0AayHznrJRK2cgDCrEqmnbw+f47vtgFfz6ZaKdPft0b3cgDDpDu1pkp6h+Wp7oEWgf3NhJbs+WqPX2RWbKrCXgtV9u8QTPAKstzyo2edSQh8erB4v7NvPl71emPfPw0eMvzosBwtKXE/9/dJUFJiO5KOmmwsNfHj3RH8iDCrG63LqWuXdNklziolaV2UJMDA7r/or4UP9YvMQYWvfCPFSP2ECEr+i2eurltVUbrpy9FgAAEABJREFUxXCgdo9x/QO+dA00/+jmCPpbMjkX0tip+3AvIBIqxOoyduzYRYsW+fr6gtEYOXLk3LlzGzduDE/ExYsXp02b5uTk1KlTp+jo6EaNGoHlQG3Eqvnzzz/x53fffWdUFSJ4f3t7e3hSmjVr5unpmZKSsmXLltdff3369OkHDhwAC4H2iJWh0Wj69ev36aefPnEvZWJmzpyJ4hMrjOGbd3Nz8/Pze/7550ePHg1kQ3vECklOTi4sLFy7dq3JVIivKBZtemLatGnDcaXBapRjXl7etWvX1q1bB8RDhWiYt956S6FQODo6Gns41mfy5MmpqalQC6Kiory8yrgj3t7eBw8eBOKhQiwP9klnzpzp1auX6YfjgIAAqVQKtSAyMhK/PLqHLi4u+/fvB0uACrEMGzduxOGsdevW3bt3B5Pz1Vdf+fj4QO0ICwvTaAkNDW3btq2l+Ct00sMj9u3bl5WV5eHhAWYiKSnJ399fZ+Q9Ge3bt8exODY2VnyIIaHAwMAmTZoA2VCvWeDff/9t2rRpYmJivXr1wHz07Nnzp59+cnev4/xb165dd+3a5exMXH5ZHzo0A1pRa9asAaF+oTlVCEKh7ECZTAZ1zc6dOwcMGABkY9M9IhpSGOPYu3dv7969warBLv/DDz9ECxhIxXZ7RLSi5syZgwfkqDAhIcFI/QIaHi+//PLs2bOBVGxXiGiNLV68GEjipZdeUuvP66lTevTogXL88ssvgUhsTogFBQW//fYbHixduhQIA41UicSIcQzsFPPz87dv3w7kYVs2IqbsMPG6bdu2cukHmwLzN6jIdu3aAUnYkBAxOuPg4ODp6QmkgjZiSEgIGB90ojF4jk46EINNDM1FRUVDhw6Vy+Ukq1ClUv33v/8Fk4ABnejoaCAJ6xcixmhiYmLQIqx99syo4Pts0MB0BdZ37NhBlBatfGheuHAhxiyM6gFYLqdPn16/fv3KlSuBAKy5R1y9enVUVJSlqBB7xLt374IJQX+le/fuGOgGArBOIR46dAi0YTnSLKFKyMnJGTt2LJiWQYMGYQ4a+0UwN1YoRPQHb9++jQeurq5gOTAMExoaCiZn6tSpmAD866+/wKxYlY2Ynp7u7e196tSpp59+Gig1YdSoUe+88w6mXsBMWI8Qt2zZkpubO378eLBMMLmXkpISFBQEZqJbt27oSru4uIA5MIUQMatmgi0L0QdE65vwWXeVkJycjDkPlAKYCcz+9e/fXzSvTY8pPEqMJxtPiBgHxr7Ezs6uRYsW+EKOjo7iYkqLA23E4OBgMB/4HV61atXIkSN/+OEHMDmm6BGzsrKMJES8bV5enpubm+6Mh4eHhQqREA4cOPDHH38sWbIETIsFf2bijCl9FVo0xcXF9+7dA3ODkcXIyEjTzxazSCFiR4gOMqsFrIW4uLhZs2YBAYwePVqhUJh4tphFfpBoF2Lo64UXXsAgMFgLmAEy+6IZHW+//TaO0RgIA1NhSUJEcxYDNHggl8vB6ggPDydqxjjmoPH9JCUlgUmwJCGKNUDASkGX//59surSYixp4MCBYBLMI8SrV6/OmTNnyJAhr7322urVqx88KK1QvWvXrmHDhiUmJmJc+vnnn584cSJ6cKCdWY0/t27digmAMWPGbNiwoZbFigjkypUr8+bNA8JALZpmKaoZhIiRW8wmKZXK5cuX45/+zp07M2fOFIUllUqx28Nk8fTp0/ft29epUydsg4ljdEr2aJk0adLnn3/u5+e3adMmsC5kMhlRU6ZF8C1hl4F/djAyZhAixu7RMEcJom0eEhKCmkOpHT9+XLyKjsiIESMw6YkB3i5duqBdiAMWGoU7d+7spAXjrr169WrZsiVYF1FRUfPnzwfywHxVz549Fy1aBMbEDELEcblx48a6qTG+vr7+/v6XL1/WNRDLcGHX6ODggAc4cKMcMcamn3ho2LAhWBdoftSyJp3xQEsRPy+j1lk0w6RRVNiNGzfQBNQ/mZ2drTvGvhCVx3GcLkyIWsTwtX5ZX8zpgXWBJsrmzZsXLlwIRDJlypS5c+deunSpWbNmYATMIETMwmHsvlwx3XKTPlCLKDudE4NdI+oS/UpdA9F9sSYiIiL69esXExPTsWNHIJKDBw++++67YBzMIMT69etjsBS/WLoOLyEhoZydjq6MftYEdenj44NBbN2Z06dPg9VB8jRKNBvc3d2NF8E1g404aNAgzNF9/fXXqDaMl65Zs2bChAnx8fH6bdCJLld8o3PnzseOHTt69Choq4Vcu3YNrJGCggIMWgF53L1716iJHzMIEd1eVCEaeVOnTh07duzFixfRcca8gn4bvFquQBvGF9GsXLVqFf7E1NO4ceMAwPqWIGLEHoMGK1asAMJAIRp1lpplTwN7HDoNzEhgQBfjG8OHDwfjQOhnVqQFbBhMOKHpAsRghUNzdXjcRrQ1OnTogKYzEIONDs2oQnxjT7A23pqGZoxeiWEsIIC2bdueOXMGjAahnxlGDWmdEIye3rx5E/1oMDeJiYnGXl5IbUSiQbOMhGIVxh6XgVgh4tBsfRO9ngCMIa9fv14/EW8WTFC40RTDn5ubW01tRMxHoxCfYGGU9cVuAgIC/Pz80GJmGAbMBA7NYWFhYExMIcQnWOVkliowxIJ/vWeeeQbzouZaI4FDc9euXcGYENp/YO5/9+7dQHnIpk2btm7dCmYCh2YbtRExB22t2eQnA000c23+rVKpMjMz0TwAY0KoEDt27Ni/f3+glGX+/PmmX4SP47IJSswTKkSMWpl+u2TymTx5sukXWBk7uSdCqBAxiL9t2zaglMXHx+e7774D02KCICIQK8SUlJQrV64AxRD79+9H7wFMhU0PzZjZHDJkCFAM8dxzzxl1175y2PTQ7O/vHxERAZQKOHLkiMnq/tj00HzhwoXNmzcDpQIwsq1UKtPT08HIFBQUYNLfBDt2ESpE/BNfvHgRKBUTGBg4fvx4Y2/NYppxGcyyiq86NG/e3NfXFyiVsnHjxhMnThh13DTNuAzECtFHC1AqxdHRsUePHmBMTLZhKqFDM8ZuNmzYAJRqMG3aNONV1ExMTDTN0EyoELOzs8+dOweUarB8+fI9e/aAcTDZ0Ezohj+YZcfvovWV/LI4unTpgip3cnICI0Noj4jxAqrCGrF169aYmBjdw969e0OtwXFJKpWaQIVArBBv3br17bffAqXaYK7lm2++SUtL69Onz1NPPcWy7J07d6B2mCa5J0KoEHNzc2NjY4FSE9C9Gzx4cGpqKsMwhYWFycnJUDtMMB9WB6Hhm/DwcLG6DaX6tGrViuM48Ri/ybXfEMBkLjMQ2yO6urri+AKU6tG5c2d9FYJ2TyRx0+raQIdm4bu4cuVKoFSPo0eP1q9fHx0L3RkcnWtfCNmUQzOhQlQoFCdPngRKtdm+ffukSZMCAgLEChkYlUtJSYHaYcqhmVAbEX//qVOnAqVSrp8pUJVoZyUyKD1oFT4gakbv4zHHL1+6nJOf68Q5nfgjydm5tCY0w6A6sRXor45mtE8sF0lmWOA1aGXmRQb3vvlPEfBF4kswupbaZ5X+fNjeICzL+ATJvQJlUBVkBbTHjh0r1m3XVQNDW0epVIrb/lB0bFx0Nz9HxbKgKhY+voeSAFEgouaEY+Go9EKp/hjtUv2H7bVH2LbM0n2OY9Tq8qrQb1lWh6Ctvc/oXkX/mRIpXmOkMqbFM+7tXqisXAJZPWLz5s0fTzF7e3sDRY/Vs+O8gxyixwVD1R0NEVyOyT13KNMvRB4cUWFlM7JsxNGjR5czSrBHbNu2LVAesvqduGYdvXqM8rMUFSJRHV1HzAnbv+l+7B+5FbUhS4hubm6Ym9Iv8uLj4zNs2DCgaNm3Pk0i5aI6u4AF0ugp1/NHMiu6SpzXjLLT7xRxsG7atClQtKTeVXr5W+pOR627e6hUfLHC8FXihIgp9kGDBokxCE9PzxEjRgDlIaqiEomdBZc702ggI9XwTk0k/lYvvviiuP9PREREixYtgPKQkmK+pFgFFotGzWsqqHpZK6+5+AEc35uedrf4QX5JkRKjLQy+EsMyvEb4qXXx+dIIk9at5yRCA17n+pfGGTCcwOJTQDyB0SoN3zX0o5J6aiknWTUrjuWEZ4lPEW+ubQkMB49+K11EAco0K/0l8bdkGamUdXBhgxo6dOhr9DVplJryhEL8fV3q3esFqiKelbBoPrMyVu4o5XlRVYK4RH8D1aIR45QPZQQaIYBaJo6lpbSV9iE2kJWNdelinbpjbUsDQVBxQ8lyJyUSDgcFdbE6K1WVlph97mC23J5r0talUzRVJCnUWIj7vk+Nu6zgpIyzl1NgpEV+kHwxf/dy+sVjOZdP5LR61q19bypHEyH0IxXUva2ZEL95+w4OtSEt/Z28zFO6tE5gZExIa2GJYHpc3tmDWVdPKca8Z6I5JjYOmkssGM7kVddZSb6uXPHGLWcvxyZdgi1ahfp4h7lEdg9lOO6rN2s7Y8pEMGC+Qtp1AANQUUa5WkLMTS/ZsTo5olv9gAgrHMXqt/X3a+L91QwL0KIgQmvbBrOUqoV468KDTUsTInuEshxYKx5BjqGtA1cSr0Wet2wdCj1iBT161ULcv/5eeDsTTUozIw7uUq8Qt6/figOK0dAG9AxfqkKIq+fEO/s6y5ystzPUwzfcjZNxm5cmArEw2hCYxcJDhTZuZUI8/EtmiUoT3NwLbIaGHYKy7helxBcDkWhtRAsenJ/QWbl8PNs7tMZ7P1k6Du52u7+p7fo3IyHYiJZsJGqzEIa7xAqFeGJ3FmZNvOu7ApGcv/TXjLlPKwqyoa4Ja+OP6crcDBJ3ixYSm2Bqogf12LCxzirIV7QioEIhXj6Va+dsJfHCmiKVS/78obYrj4zBE3jN770/e+++nUAG5VbM6FOhEJUFav9GHmCTuPo4Z9wn1EysKdevXwVLwHCK78aZAomEtXcx1mz0+LsX/zj0XWLSVSdH96aNn+nVdaydnSOejzn5859H1k4cs2rDlrdT0+L8fcM7dxjWtnVf8Vl7fv8y9sJeucyhVfPnfLyMuN7Wt4FrZpKJSqUbla7d2+DPj5d9sOrr5bt3HgZhk8Mj6zesTrh7x9XVLTy88bSpb/n6lu5tVsklERxVt23/cf/+PYlJCSHB9du0aT/m1Yn6q/qrRY285luX8sFoYYKMzMRv1k1VqYqmjPvu5eFLUlJvrlo7Ua0WZnRxEmlhYf6O35a9GP3Ox++fbB7V7acdC7Nz7uOl46e3HT/9y6A+M6eN/97TPeDPQ2vAaLAyFqMkN84owML5fa9QH2zmjLmiCmPPnpq3YGavXn1+2rJ3/tyPUlNTPvviI7FlJZd0bN++5YdNa4cMHr5l855+/Qb/tnfHlq01K6ZaY69ZkVsikRprzuy5C79LOOkrw5b4eof6+YQNHTAnOeX65X+PiFfValXPrmND6jVjGKZNyz74LUxOuYHnj534qXlkd0rPGUQAAAcYSURBVJSmg4ML9pHhYW3AmHASNv0ecaNzLZ2Vtd+v6typGyoJ+7zIyOaTJr5x8uSxa9qxu5JLOi5cPNe4ccRzz/V1c3Pv22fgyhXrnm7XEWpCjW3EElX5ta51CI7L9YIiHB1LA0Me7v6eHkF3Es7rGgQHRooHDvbCKqFCZT7KMSMr0denvq5NUEATMCoaXqEgToi1TPHFxd1s0iRS97BxI2Enm2vXrlR+SUdUVIuzZ08t/fj93/fvzs3LDQwICg9vBHWEYRuRYTTGC1cVKhWJyVcx+KJ/Mi8/U+/Vy38HlEUFGo1aLnfQnZHJ7MGoMAzHGmtMqAVPvo+9QqEoKiqSyx+tvXJwEP6eDx4UVHJJ/w7YXzo4OMYcP7Jk6XsSiaRLl57j//d/Xl51s+rcsBClMgkDxgqkOTt71g9p+Vy3MlXnHB0rC1jayR1ZllOplLozRcUPwJhgH2xnT15iU3+2eg2xsxN0plQ+WrtUoNWZp4dXJZf078CyLI7I+C8+Pu7cudPrNqwuKFB8uHA5VBtGexeDlwwL0c1TmpFirIEpwLfh2Qt7w0JbsQ/f0/20OG/Pyrxg7Abc3fzj71569qFN8u/1GDAmGg3vV9/Ine4TUIuhGfuwxo2aXrnyaBsl8TisQcNKLunfAf3lRo2a1q/fIDQ0DP/lK/J/2/sr1BBeY7hMjmF5NmjhpFZVUFen1mBERqPR7Nq3vLhYmZaesGf/ik9WDE9JvVX5s1pE9bh09RAmVPD44N8bEpIug9EoVqjRRgxv4QCEwbA1s9zlcrm3t09s7Ml/zseWlJQMjH7pWMzhbdt+zMvPwzNfrfq0dau2DcOFfbEruaTjwMHf0bM+fvwoGojoyvx97GBUZM3WWFbirBjuEes3c8Bn5GcUORthMja6vTOmbD7098bPvn45LT0+OChyaPScKp2PHs++WlCQvWPvJz/8NAdH9v4vTN/88zwjVZBKu5MtlZM44YjX1LhHHDF8zPfrvj595viPm/dgdCY9I23rzxtXfPUJxgjbPNX+f2OniM0quaTjzTfeXbFy2Zy5b+Cxh4cnjtFDh4yEOqLCamDr3ktQ81yDp/3B9rh+JNE3RB49kbjffdWs24Hh9l1fCgDLZN2CWwMnBAY1NmDzVOgYtuzsWqQoAptEWaSKnkDiN1CII1r6opUKFFfhKr6WXd1O7MtK/jczsKnhdSo5uanLVgw3eMle7lRYZDgt4ecdNmVcXe5b8e6i7hVdwmwNxxn4BUODm48dVaGvd+tkiqu7DIj8uPmKZ69YBEKpT77my0nb9vI49XuFQnR28nxj0kaDl9ALkckM1wpi2TquyFjRexDehqpIJjVg40q4ynLoynzlhI/CgUwsfOWUWPvD4KXKZNGmh9uV47nxsfdD2/g9fhU7Gw938xsrdfsebvydWK+ho4Tg0oMV9SiWThXJg5fnhRTmKXNSjBs9JoSki+kcBwPI81EewQDLWHCvWFq1yBBVZ7EmLmmQdCUNrJ17VzIVmQ9e+yAUSMbyl5NCTWdo6zeZuLTB5T/vZN+z2n4x8RKqUDFhaRhQjMmTrFnRBwesKZ+G37uaGneGxAn0teT634kPshXjFlMVmgK+lrVvkMmfhIOm5NrhhPs3637JklmI/yftyoF4VzfJeKpCk1DJAvuaBVPGLAg9vT/nn8NZWYm59i523g08nNwtp7j9Q7KTC7IScpWFxVIZO3BcvYBGFvMrsKxlx7MFKnj/NY7qtXvODf/F/pVzOSY3/mwyywqz6vGvI5FxGp7X7UCkvwmMiLY+J1Om6ib/qBLKoz1qHhoSYrVP7QRdXns7/WZlGoD+PjMsD5ryRT5ZjufVwhsqKS6d2+bqKesxLDAkwsIKo2s0Fh3P1lInPaIODDHiPzy4df7BrQv5ORnFmhK+WKknRAnwJY9es7QULGqT1UqyVCaPlMiyQqVvsdyr0JgREvwPTwrnxdlD4pmH9xceimvOdbtwMRzwQvnk0odie4mUYTjG3knq4i6J/I9rQAMbXSZLMrXNc4S3dMB/QKHUDkI3haQYRCrjJFILLoglkWBE3vD7p0K0JKR2TNEDY01YNgFoQwWFGXYNLXj3GBsktKlz5n1LnZt3fFeG3J6DCjp0KkRL4tnBHviBHdxskRnXhCt53Yb6VHSVrP2aKdVhw8K7DMu26uIVEmkB4SdFDn/ur/SEa/kvvxvq6FqhgUuFaJH8/FlyZkqRRs3r7/BdbmmSbtulcmh3DmfKPansOtWHd9LF1ype9SQ20QVuHzXUvjzLCRVu7R0lz4/29wurLHFAhWjJFENhod7y84fRWu2x9gz/WOgfym3lVaogntUrqqCTlbBTWNlEgnhG3MZeVw3koZi1yQNdpFd7nuPsnaA6UCFSiICGbyhEQIVIIQIqRAoRUCFSiIAKkUIEVIgUIvh/AAAA//8K91KcAAAABklEQVQDAAPvFDLgENXIAAAAAElFTkSuQmCC", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "try:\n", " display(Image(agent_graph.get_graph().draw_mermaid_png()))\n", @@ -292,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "a7f4fbf6", "metadata": {}, "outputs": [], @@ -302,7 +268,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "8569cf39", "metadata": {}, "outputs": [], @@ -324,44 +290,10 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "53b89690", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting agent...\n", - "================================\u001b[1m Human Message \u001b[0m=================================\n", - "\n", - "What does vertical farming proposes?\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Failed to export span batch code: 404, reason:
\"Langfuse

Loading ...

\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "==================================\u001b[1m Ai Message \u001b[0m==================================\n", - "\n", - "To provide you with an accurate answer about what vertical farming proposes, I would need to call a specific function that retrieves context from Elasticsearch. However, without access to actual data points or recent developments in the field of hydroponic agriculture, it's difficult to give an exhaustive and up-to-date response.\n", - "\n", - "For precise information on current practices and innovations within vertical farming, you might want to refer to specialized agricultural news outlets, industry reports, or directly contact experts in this area. This could include websites focusing on indoor agriculture, research institutions working with hydroponics, or the latest publications from agritech companies that specialize in vertical farming technology.\n", - "\n", - "If you're interested in learning about current trends and advancements in vertical farming, I can try to provide a more comprehensive answer when I do have access to relevant data.\n", - "\n", - "Flushing traces to Langfuse...\n", - "✓ Traces flushed (check your Langfuse dashboard at http://45.77.119.180)\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Starting agent...\")\n", "stream_graph_updates(user_input)\n", diff --git a/scratches/pseco/evaluation/embeddings/n00 Beir Analysis CodeXGlue.ipynb b/scratches/pseco/evaluation/embeddings/n00 Beir Analysis CodeXGlue.ipynb index 0ba4395..a4bc78f 100644 --- a/scratches/pseco/evaluation/embeddings/n00 Beir Analysis CodeXGlue.ipynb +++ b/scratches/pseco/evaluation/embeddings/n00 Beir Analysis CodeXGlue.ipynb @@ -174,11 +174,33 @@ "# Evaluar métricas (NDCG, MAP, Recall, Precision)\n", "ndcg, _map, recall, precision = evaluator.evaluate(\n", " qrels, results, [1, 3, 5, 10]\n", - ")\n", - "\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5c0f9845", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resultados para CodeXGLUE:\n", + "NDCG: {'NDCG@1': 0.94971, 'NDCG@3': 0.96956, 'NDCG@5': 0.97166, 'NDCG@10': 0.97342}\n", + "MAP: {'MAP@1': 0.94971, 'MAP@3': 0.96504, 'MAP@5': 0.9662, 'MAP@10': 0.96694}\n", + "Recall: {'Recall@1': 0.94971, 'Recall@3': 0.98251, 'Recall@5': 0.98761, 'Recall@10': 0.99297}\n", + "Precision: {'P@1': 0.94971, 'P@3': 0.3275, 'P@5': 0.19752, 'P@10': 0.0993}\n" + ] + } + ], + "source": [ "print(f\"Resultados para CodeXGLUE:\")\n", - "print(\"NDCG@10:\", ndcg[\"NDCG@10\"])\n", - "print(\"Recall@10:\", recall[\"Recall@10\"])" + "print(\"NDCG:\", ndcg)\n", + "print(\"MAP:\", _map)\n", + "print(\"Recall:\", recall)\n", + "print(\"Precision:\", precision)" ] }, { @@ -191,43 +213,99 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "5ced1c25", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "NDCG: {'NDCG@1': 0.02333, 'NDCG@3': 0.03498, 'NDCG@5': 0.0404, 'NDCG@10': 0.04619, 'NDCG@100': 0.07768}\n", - "MAP: {'MAP@1': 0.02083, 'MAP@3': 0.03083, 'MAP@5': 0.03375, 'MAP@10': 0.03632, 'MAP@100': 0.04123}\n", - "Recall: {'Recall@1': 0.02083, 'Recall@3': 0.04417, 'Recall@5': 0.0575, 'Recall@10': 0.07417, 'Recall@100': 0.23144}\n", - "Precision: {'P@1': 0.02333, 'P@3': 0.01556, 'P@5': 0.01267, 'P@10': 0.00833, 'P@100': 0.00277}\n" - ] - } - ], + "outputs": [], "source": [ - "model = BEIROllamaEmbeddings(\n", + "model_q2 = BEIROllamaEmbeddings(\n", " base_url=\"http://localhost:11434\",\n", " model=\"qwen2.5:1.5b\",\n", " batch_size=64,\n", ")\n", "\n", "# Inicializar buscador y evaluador\n", - "retriever = DenseRetrievalExactSearch(model, batch_size=64)\n", - "evaluator = EvaluateRetrieval(retriever, score_function=\"cos_sim\")\n", + "retriever_q2 = DenseRetrievalExactSearch(model_q2, batch_size=64)\n", + "evaluator_q2 = EvaluateRetrieval(retriever_q2, score_function=\"cos_sim\")\n", "\n", "# Ejecutar recuperación\n", - "results = evaluator.retrieve(corpus, queries)\n", + "results_q2 = evaluator_q2.retrieve(corpus, queries)\n", "\n", "# Evaluar métricas (NDCG, MAP, Recall, Precision)\n", - "ndcg, _map, recall, precision = evaluator.evaluate(\n", - " qrels, results, [1, 3, 5, 10]\n", - ")\n", - "\n", + "ndcg_qwen_2, _map_qwen_2, recall_qwen_2, precision_qwen_2 = evaluator_q2.evaluate(\n", + " qrels, results_q2, [1, 3, 5, 10]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "6a95189e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resultados para CodeXGLUE:\n", + "NDCG: {'NDCG@1': 0.00031, 'NDCG@3': 0.00061, 'NDCG@5': 0.00086, 'NDCG@10': 0.00118}\n", + "MAP: {'MAP@1': 0.00031, 'MAP@3': 0.00051, 'MAP@5': 0.00065, 'MAP@10': 0.00078}\n", + "Recall: {'Recall@1': 0.00031, 'Recall@3': 0.00088, 'Recall@5': 0.00151, 'Recall@10': 0.0025}\n", + "Precision: {'P@1': 0.00031, 'P@3': 0.00029, 'P@5': 0.0003, 'P@10': 0.00025}\n" + ] + } + ], + "source": [ "print(f\"Resultados para CodeXGLUE:\")\n", - "print(\"NDCG@10:\", ndcg[\"NDCG@10\"])\n", - "print(\"Recall@10:\", recall[\"Recall@10\"])" + "print(\"NDCG:\", ndcg_qwen_2)\n", + "print(\"MAP:\", _map_qwen_2)\n", + "print(\"Recall:\", recall_qwen_2)\n", + "print(\"Precision:\", precision_qwen_2)" + ] + }, + { + "cell_type": "markdown", + "id": "3dad9811", + "metadata": {}, + "source": [ + "# Save data" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f875dd8d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resultados guardados en /home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_CodeXGlue_results.json\n" + ] + } + ], + "source": [ + "results_data = {\n", + " \"qwen3-0.6B-emb:latest\": {\n", + " \"NDCG\": ndcg,\n", + " \"MAP\": _map,\n", + " \"Recall\": recall,\n", + " \"Precision\": precision,\n", + " },\n", + " \"qwen2.5:1.5b\": {\n", + " \"NDCG\": ndcg_qwen_2,\n", + " \"MAP\": _map_qwen_2,\n", + " \"Recall\": recall_qwen_2,\n", + " \"Precision\": precision_qwen_2,\n", + " }\n", + "}\n", + "\n", + "output_file = \"/home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_CodeXGlue_results.json\"\n", + "with open(output_file, \"w\") as f:\n", + " json.dump(results_data, f, indent=2)\n", + "\n", + "print(f\"Resultados guardados en {output_file}\")" ] } ], diff --git a/scratches/pseco/evaluation/embeddings/n00 Beir Analysis.ipynb b/scratches/pseco/evaluation/embeddings/n00 Beir Analysis.ipynb index b7c1ae2..fcd0587 100644 --- a/scratches/pseco/evaluation/embeddings/n00 Beir Analysis.ipynb +++ b/scratches/pseco/evaluation/embeddings/n00 Beir Analysis.ipynb @@ -10,7 +10,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "c01c19dc", "metadata": {}, "outputs": [], @@ -252,6 +252,51 @@ "print(\"Recall:\", recall_qwen_2)\n", "print(\"Precision:\", precision_qwen_2)" ] + }, + { + "cell_type": "markdown", + "id": "b9402837", + "metadata": {}, + "source": [ + "# Save Data" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "c281d5e1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Resultados guardados en /home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_Scifact_results.json\n" + ] + } + ], + "source": [ + "results_data = {\n", + " \"qwen3-0.6B-emb:latest\": {\n", + " \"NDCG\": ndcg,\n", + " \"MAP\": _map,\n", + " \"Recall\": recall,\n", + " \"Precision\": precision,\n", + " },\n", + " \"qwen2.5:1.5b\": {\n", + " \"NDCG\": ndcg_qwen_2,\n", + " \"MAP\": _map_qwen_2,\n", + " \"Recall\": recall_qwen_2,\n", + " \"Precision\": precision_qwen_2,\n", + " }\n", + "}\n", + "\n", + "output_file = \"/home/pseco/VsCodeProjects/assistance-engine/data/interim/beir_Scifact_results.json\"\n", + "with open(output_file, \"w\") as f:\n", + " json.dump(results_data, f, indent=2)\n", + "\n", + "print(f\"Resultados guardados en {output_file}\")" + ] } ], "metadata": { diff --git a/scratches/pseco/evaluation/embeddings/n00 Beir Analysis_cosqa.ipynb b/scratches/pseco/evaluation/embeddings/n00 Beir Analysis_cosqa.ipynb index 6522862..0cf0ad1 100644 --- a/scratches/pseco/evaluation/embeddings/n00 Beir Analysis_cosqa.ipynb +++ b/scratches/pseco/evaluation/embeddings/n00 Beir Analysis_cosqa.ipynb @@ -261,14 +261,6 @@ "print(\"Recall:\", recall_qwen_2)\n", "print(\"Precision:\", precision_qwen_2)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1db7d110", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {