Add BEIR analysis notebook for CosQA and update dependencies
- Created a new Jupyter notebook for analyzing BEIR dataset with CosQA using Ollama embeddings. - Implemented a custom embedding class to integrate LangChain's OllamaEmbeddings with BEIR. - Added data loading and evaluation logic for the CosQA dataset. - Updated `uv.lock` to remove unnecessary dependencies (`mteb` and `polars`) and incremented revision number.
This commit is contained in:
parent
ff438ea6c4
commit
0d6c08e341
|
|
@ -28,7 +28,5 @@ dev = [
|
|||
"beir>=2.2.0",
|
||||
"jupyter>=1.1.1",
|
||||
"langfuse>=3.14.4",
|
||||
"mteb>=2.8.8",
|
||||
"polars>=1.38.1",
|
||||
"ruff>=0.15.1",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -235,7 +235,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"id": "c35182b8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
|
@ -277,7 +277,7 @@
|
|||
"\n",
|
||||
"\n",
|
||||
"chunks = build_chunks_from_folder(\n",
|
||||
" folder_path=\"/home/acano/PycharmProjects/assistance-engine/data\",\n",
|
||||
" folder_path=\"/home/acano/PycharmProjects/assistance-engine/data/raw/avap_docs_web\",\n",
|
||||
" chunking_strategy=\"fixed\",\n",
|
||||
" chunk_size=500,\n",
|
||||
" overlap=25,\n",
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because one or more lines are too long
|
|
@ -1,340 +0,0 @@
|
|||
query-id corpus-id score
|
||||
1 31715818 1
|
||||
3 14717500 1
|
||||
5 13734012 1
|
||||
13 1606628 1
|
||||
36 5152028 1
|
||||
36 11705328 1
|
||||
42 18174210 1
|
||||
48 13734012 1
|
||||
49 5953485 1
|
||||
50 12580014 1
|
||||
51 45638119 1
|
||||
53 45638119 1
|
||||
54 49556906 1
|
||||
56 4709641 1
|
||||
57 4709641 1
|
||||
70 5956380 1
|
||||
70 4414547 1
|
||||
72 6076903 1
|
||||
75 4387784 1
|
||||
94 1215116 1
|
||||
99 18810195 1
|
||||
100 4381486 1
|
||||
113 6157837 1
|
||||
115 33872649 1
|
||||
118 6372244 1
|
||||
124 4883040 1
|
||||
127 21598000 1
|
||||
128 8290953 1
|
||||
129 27768226 1
|
||||
130 27768226 1
|
||||
132 7975937 1
|
||||
133 38485364 1
|
||||
133 6969753 1
|
||||
133 17934082 1
|
||||
133 16280642 1
|
||||
133 12640810 1
|
||||
137 26016929 1
|
||||
141 6955746 1
|
||||
141 14437255 1
|
||||
142 10582939 1
|
||||
143 10582939 1
|
||||
146 10582939 1
|
||||
148 1084345 1
|
||||
163 18872233 1
|
||||
171 12670680 1
|
||||
179 16322674 1
|
||||
179 27123743 1
|
||||
179 23557241 1
|
||||
179 17450673 1
|
||||
180 16966326 1
|
||||
183 12827098 1
|
||||
185 18340282 1
|
||||
198 2177022 1
|
||||
208 13519661 1
|
||||
212 22038539 1
|
||||
213 13625993 1
|
||||
216 21366394 1
|
||||
217 21366394 1
|
||||
218 21366394 1
|
||||
219 21366394 1
|
||||
230 3067015 1
|
||||
232 10536636 1
|
||||
233 4388470 1
|
||||
236 4388470 1
|
||||
237 4942718 1
|
||||
238 2251426 1
|
||||
239 14079881 1
|
||||
248 1568684 1
|
||||
249 1568684 1
|
||||
261 1122279 1
|
||||
261 10697096 1
|
||||
268 970012 1
|
||||
269 970012 1
|
||||
274 11614737 1
|
||||
275 4961038 1
|
||||
275 14241418 1
|
||||
275 14819804 1
|
||||
279 14376683 1
|
||||
294 10874408 1
|
||||
295 20310709 1
|
||||
298 39381118 1
|
||||
300 3553087 1
|
||||
303 4388470 1
|
||||
312 6173523 1
|
||||
314 4347374 1
|
||||
324 2014909 1
|
||||
327 17997584 1
|
||||
338 23349986 1
|
||||
343 7873737 1
|
||||
343 5884524 1
|
||||
350 16927286 1
|
||||
354 8774475 1
|
||||
362 38587347 1
|
||||
380 19005293 1
|
||||
384 13770184 1
|
||||
385 9955779 1
|
||||
385 9767444 1
|
||||
386 16495649 1
|
||||
388 1148122 1
|
||||
399 791050 1
|
||||
410 14924526 1
|
||||
411 14924526 1
|
||||
415 6309659 1
|
||||
421 11172205 1
|
||||
431 28937856 1
|
||||
436 14637235 1
|
||||
437 18399038 1
|
||||
439 4423559 1
|
||||
440 4423559 1
|
||||
443 10165258 1
|
||||
452 12804937 1
|
||||
452 464511 1
|
||||
475 18678095 1
|
||||
478 14767844 1
|
||||
491 56893404 1
|
||||
501 17930286 1
|
||||
502 13071728 1
|
||||
507 30774694 1
|
||||
508 13980338 1
|
||||
513 13230773 1
|
||||
514 16256507 1
|
||||
516 29564505 1
|
||||
517 15663829 1
|
||||
521 34873974 1
|
||||
525 13639330 1
|
||||
527 3863543 1
|
||||
528 5476778 1
|
||||
532 12991445 1
|
||||
533 12991445 1
|
||||
535 39368721 1
|
||||
536 16056514 1
|
||||
539 13282296 1
|
||||
540 11886686 1
|
||||
540 25007443 1
|
||||
544 24221369 1
|
||||
549 9433958 1
|
||||
551 33499189 1
|
||||
552 1471041 1
|
||||
554 1049501 1
|
||||
560 40096222 1
|
||||
569 23460562 1
|
||||
575 10300888 1
|
||||
577 5289038 1
|
||||
578 8764879 1
|
||||
587 16999023 1
|
||||
589 10984005 1
|
||||
593 19675911 1
|
||||
597 12779444 1
|
||||
597 36355784 1
|
||||
597 25742130 1
|
||||
598 25742130 1
|
||||
613 9638032 1
|
||||
619 20888849 1
|
||||
619 2565138 1
|
||||
623 17000834 1
|
||||
628 24512064 1
|
||||
636 24294572 1
|
||||
637 25649714 1
|
||||
641 5912283 1
|
||||
641 31554917 1
|
||||
644 13619127 1
|
||||
649 12789595 1
|
||||
659 1215116 1
|
||||
660 1215116 1
|
||||
674 2095573 1
|
||||
684 4942718 1
|
||||
690 18750453 1
|
||||
691 10991183 1
|
||||
692 24088502 1
|
||||
693 24088502 1
|
||||
700 4350400 1
|
||||
702 4350400 1
|
||||
715 18421962 1
|
||||
716 18421962 1
|
||||
718 17587795 1
|
||||
721 1834762 1
|
||||
723 5531479 1
|
||||
727 7521113 1
|
||||
728 7521113 1
|
||||
728 36444198 1
|
||||
729 26851674 1
|
||||
742 32159283 1
|
||||
743 32159283 1
|
||||
744 8460275 1
|
||||
756 2831620 1
|
||||
759 1805641 1
|
||||
768 6421792 1
|
||||
770 15476777 1
|
||||
775 32275758 1
|
||||
781 24338780 1
|
||||
783 40632104 1
|
||||
784 2356950 1
|
||||
785 12471115 1
|
||||
793 8551160 1
|
||||
800 22543403 1
|
||||
805 22180793 1
|
||||
808 36606083 1
|
||||
811 19799455 1
|
||||
814 33387953 1
|
||||
820 8646760 1
|
||||
821 8646760 1
|
||||
823 15319019 1
|
||||
830 1897324 1
|
||||
831 1897324 1
|
||||
832 30303335 1
|
||||
834 5483793 1
|
||||
837 15928989 1
|
||||
839 1469751 1
|
||||
845 17741440 1
|
||||
847 16787954 1
|
||||
852 13843341 1
|
||||
859 1982286 1
|
||||
870 195689316 1
|
||||
873 1180972 1
|
||||
873 19307912 1
|
||||
873 27393799 1
|
||||
873 29025270 1
|
||||
873 3315558 1
|
||||
879 8426046 1
|
||||
880 8426046 1
|
||||
882 14803797 1
|
||||
887 18855191 1
|
||||
903 10648422 1
|
||||
904 7370282 1
|
||||
907 6923961 1
|
||||
911 11254556 1
|
||||
913 3203590 1
|
||||
914 3203590 1
|
||||
921 1642727 1
|
||||
922 17077004 1
|
||||
936 5483793 1
|
||||
956 12956194 1
|
||||
957 123859 1
|
||||
960 8780599 1
|
||||
967 2119889 1
|
||||
967 8997410 1
|
||||
971 46695481 1
|
||||
971 27873158 1
|
||||
971 28617573 1
|
||||
971 9764256 1
|
||||
975 5304891 1
|
||||
982 2988714 1
|
||||
985 6828370 1
|
||||
993 16472469 1
|
||||
1012 9745001 1
|
||||
1014 6277638 1
|
||||
1019 11603066 1
|
||||
1020 9433958 1
|
||||
1021 9433958 1
|
||||
1024 5373138 1
|
||||
1029 13923140 1
|
||||
1029 13940200 1
|
||||
1029 11899391 1
|
||||
1041 25254425 1
|
||||
1041 16626264 1
|
||||
1049 12486491 1
|
||||
1062 20381484 1
|
||||
1086 39281140 1
|
||||
1088 37549932 1
|
||||
1089 17628888 1
|
||||
1099 7662206 1
|
||||
1100 7662206 1
|
||||
1104 3898784 1
|
||||
1107 20532591 1
|
||||
1110 13770184 1
|
||||
1121 4456756 1
|
||||
1130 17997584 1
|
||||
1132 33499189 1
|
||||
1132 9283422 1
|
||||
1137 33370 1
|
||||
1140 12009265 1
|
||||
1144 10071552 1
|
||||
1146 13906581 1
|
||||
1150 11369420 1
|
||||
1163 15305881 1
|
||||
1175 31272411 1
|
||||
1179 31272411 1
|
||||
1180 31272411 1
|
||||
1185 16737210 1
|
||||
1187 52873726 1
|
||||
1191 30655442 1
|
||||
1194 11419230 1
|
||||
1196 25649714 1
|
||||
1197 25649714 1
|
||||
1199 16760369 1
|
||||
1200 3441524 1
|
||||
1202 3475317 1
|
||||
1204 31141365 1
|
||||
1207 18909530 1
|
||||
1213 14407673 1
|
||||
1216 24142891 1
|
||||
1221 19736671 1
|
||||
1225 9650982 1
|
||||
1226 13777138 1
|
||||
1232 13905670 1
|
||||
1241 4427392 1
|
||||
1245 7662395 1
|
||||
1259 24341590 1
|
||||
1262 44172171 1
|
||||
1266 37480103 1
|
||||
1270 13900610 1
|
||||
1271 13768432 1
|
||||
1272 17081238 1
|
||||
1273 11041152 1
|
||||
1274 12428814 1
|
||||
1274 27731651 1
|
||||
1274 4406819 1
|
||||
1278 11335781 1
|
||||
1279 11335781 1
|
||||
1280 4387784 1
|
||||
1281 4387784 1
|
||||
1282 23649163 1
|
||||
1290 4687948 1
|
||||
1292 56893404 1
|
||||
1298 11718220 1
|
||||
1303 12631697 1
|
||||
1316 27910499 1
|
||||
1319 16284655 1
|
||||
1320 16284655 1
|
||||
1332 5304891 1
|
||||
1335 27910499 1
|
||||
1336 27910499 1
|
||||
1337 20231138 1
|
||||
1339 15482274 1
|
||||
1344 9559146 1
|
||||
1352 12885341 1
|
||||
1359 11614737 1
|
||||
1362 8290953 1
|
||||
1363 8290953 1
|
||||
1368 2425364 1
|
||||
1370 2425364 1
|
||||
1379 16322674 1
|
||||
1379 27123743 1
|
||||
1379 23557241 1
|
||||
1379 17450673 1
|
||||
1382 17755060 1
|
||||
1385 306006 1
|
||||
1389 23895668 1
|
||||
1395 17717391 1
|
||||
|
|
|
@ -1,920 +0,0 @@
|
|||
query-id corpus-id score
|
||||
0 31715818 1
|
||||
2 13734012 1
|
||||
4 22942787 1
|
||||
6 2613775 1
|
||||
9 44265107 1
|
||||
10 32587939 1
|
||||
11 32587939 1
|
||||
12 33409100 1
|
||||
14 641786 1
|
||||
15 22080671 1
|
||||
17 1606628 1
|
||||
18 22942787 1
|
||||
19 3202143 1
|
||||
20 3202143 1
|
||||
21 41493639 1
|
||||
22 6490571 1
|
||||
24 3471191 1
|
||||
25 2613775 1
|
||||
26 32390525 1
|
||||
27 32390525 1
|
||||
28 12670680 1
|
||||
30 24341590 1
|
||||
32 12428497 1
|
||||
34 11705328 1
|
||||
35 5152028 1
|
||||
35 11705328 1
|
||||
37 5152028 1
|
||||
37 11705328 1
|
||||
39 13497630 1
|
||||
40 13497630 1
|
||||
41 18174210 1
|
||||
43 7224723 1
|
||||
44 56893404 1
|
||||
45 56893404 1
|
||||
46 380526 1
|
||||
47 3512154 1
|
||||
47 26996935 1
|
||||
52 45638119 1
|
||||
55 49556906 1
|
||||
58 4709641 1
|
||||
60 13899137 1
|
||||
60 13901073 1
|
||||
61 13899137 1
|
||||
61 13901073 1
|
||||
62 32587939 1
|
||||
63 40349336 1
|
||||
64 40349336 1
|
||||
66 14806256 1
|
||||
67 21295300 1
|
||||
68 21295300 1
|
||||
69 5956380 1
|
||||
69 4414547 1
|
||||
71 1127562 1
|
||||
73 6076903 1
|
||||
74 4387784 1
|
||||
76 5531479 1
|
||||
77 5531479 1
|
||||
78 5099266 1
|
||||
79 5099266 1
|
||||
80 4920376 1
|
||||
81 1797622 1
|
||||
82 3619372 1
|
||||
85 7521113 1
|
||||
85 22406695 1
|
||||
86 7521113 1
|
||||
86 22406695 1
|
||||
88 7521113 1
|
||||
88 22406695 1
|
||||
89 7521113 1
|
||||
89 22406695 1
|
||||
90 22406695 1
|
||||
91 1084345 1
|
||||
92 1084345 1
|
||||
93 2692522 1
|
||||
95 1215116 1
|
||||
96 14500725 1
|
||||
98 6540064 1
|
||||
104 40164383 1
|
||||
105 36606083 1
|
||||
106 25515907 1
|
||||
106 5151024 1
|
||||
108 6191684 1
|
||||
108 22995579 1
|
||||
108 23865182 1
|
||||
109 4319174 1
|
||||
111 13513790 1
|
||||
112 6157837 1
|
||||
114 33872649 1
|
||||
116 33872649 1
|
||||
119 14606752 1
|
||||
120 14606752 1
|
||||
121 31460499 1
|
||||
122 31460499 1
|
||||
123 4883040 1
|
||||
126 24512064 1
|
||||
134 4695046 1
|
||||
138 26016929 1
|
||||
139 22080671 1
|
||||
144 10582939 1
|
||||
149 6227220 1
|
||||
152 15488881 1
|
||||
153 4702639 1
|
||||
154 4702639 1
|
||||
155 37549932 1
|
||||
156 37549932 1
|
||||
157 13439128 1
|
||||
159 9394119 1
|
||||
160 52874170 1
|
||||
161 6903077 1
|
||||
164 5824985 1
|
||||
165 5824985 1
|
||||
166 18872233 1
|
||||
167 18872233 1
|
||||
168 5824985 1
|
||||
169 5824985 1
|
||||
172 12670680 1
|
||||
173 8126244 1
|
||||
174 1710116 1
|
||||
175 1710116 1
|
||||
176 32587939 1
|
||||
177 9669099 1
|
||||
178 16322674 1
|
||||
178 27123743 1
|
||||
178 23557241 1
|
||||
178 17450673 1
|
||||
181 16966326 1
|
||||
182 11369420 1
|
||||
184 12827098 1
|
||||
186 16855829 1
|
||||
187 16855829 1
|
||||
189 4421578 1
|
||||
196 19313533 1
|
||||
197 2177022 1
|
||||
199 2177022 1
|
||||
200 18231807 1
|
||||
201 2462673 1
|
||||
203 9558539 1
|
||||
204 7898952 1
|
||||
205 7898952 1
|
||||
205 470625 1
|
||||
209 32587939 1
|
||||
210 13794374 1
|
||||
211 13794374 1
|
||||
214 13625993 1
|
||||
220 19205437 1
|
||||
221 19205437 1
|
||||
222 19205437 1
|
||||
223 2014909 1
|
||||
224 6944800 1
|
||||
225 6944800 1
|
||||
226 6944800 1
|
||||
227 26973393 1
|
||||
228 4928057 1
|
||||
229 56893404 1
|
||||
235 4388470 1
|
||||
241 2212067 1
|
||||
241 10608822 1
|
||||
242 2212067 1
|
||||
242 10608822 1
|
||||
243 8148122 1
|
||||
244 21498497 1
|
||||
245 8447873 1
|
||||
245 3430789 1
|
||||
246 8447873 1
|
||||
246 3430789 1
|
||||
247 13578199 1
|
||||
250 1568684 1
|
||||
251 1568684 1
|
||||
253 37424881 1
|
||||
254 37424881 1
|
||||
255 5850219 1
|
||||
256 5850219 1
|
||||
258 22080671 1
|
||||
259 8883846 1
|
||||
262 14610165 1
|
||||
263 11328820 1
|
||||
263 30041340 1
|
||||
263 14853989 1
|
||||
264 11328820 1
|
||||
265 2033917 1
|
||||
266 22405338 1
|
||||
267 5912283 1
|
||||
267 31554917 1
|
||||
272 11614737 1
|
||||
277 14376683 1
|
||||
278 14376683 1
|
||||
280 25001628 1
|
||||
281 4632921 1
|
||||
283 1974176 1
|
||||
285 5548081 1
|
||||
286 4709641 1
|
||||
287 4709641 1
|
||||
290 15048300 1
|
||||
292 15048300 1
|
||||
293 10874408 1
|
||||
296 4398832 1
|
||||
299 39381118 1
|
||||
301 3553087 1
|
||||
304 14797520 1
|
||||
305 14797520 1
|
||||
306 7821634 1
|
||||
308 7821634 1
|
||||
309 7821634 1
|
||||
310 6173523 1
|
||||
313 6173523 1
|
||||
315 3701541 1
|
||||
316 712078 1
|
||||
317 4506414 1
|
||||
323 2014909 1
|
||||
325 40349336 1
|
||||
326 40349336 1
|
||||
330 9505448 1
|
||||
331 9505448 1
|
||||
332 29023309 1
|
||||
333 29023309 1
|
||||
334 25079962 1
|
||||
335 1780819 1
|
||||
336 2097256 1
|
||||
337 2097256 1
|
||||
339 23349986 1
|
||||
340 7098463 1
|
||||
341 7098463 1
|
||||
342 7873737 1
|
||||
342 5884524 1
|
||||
345 4394817 1
|
||||
346 11902109 1
|
||||
347 11902109 1
|
||||
349 13497630 1
|
||||
351 14658685 1
|
||||
352 14658685 1
|
||||
355 12800122 1
|
||||
355 38380061 1
|
||||
356 6144337 1
|
||||
357 18111172 1
|
||||
358 18111172 1
|
||||
361 38587347 1
|
||||
363 5386514 1
|
||||
364 1550937 1
|
||||
365 600437 1
|
||||
366 13956305 1
|
||||
367 27099731 1
|
||||
368 27099731 1
|
||||
369 6826100 1
|
||||
370 1550937 1
|
||||
371 1550937 1
|
||||
372 24922825 1
|
||||
375 1522647 1
|
||||
376 22401061 1
|
||||
377 18810195 1
|
||||
378 45154987 1
|
||||
378 10534299 1
|
||||
378 11886686 1
|
||||
378 25007443 1
|
||||
378 17150648 1
|
||||
379 19005293 1
|
||||
381 18340282 1
|
||||
382 11659421 1
|
||||
383 13770184 1
|
||||
389 1148122 1
|
||||
390 1148122 1
|
||||
391 1148122 1
|
||||
392 1148122 1
|
||||
393 1148122 1
|
||||
394 11360768 1
|
||||
396 1456068 1
|
||||
397 1456068 1
|
||||
398 8883846 1
|
||||
400 791050 1
|
||||
401 5633876 1
|
||||
403 1921218 1
|
||||
404 1921218 1
|
||||
406 6796297 1
|
||||
407 9889151 1
|
||||
413 6309659 1
|
||||
414 6309659 1
|
||||
416 6309659 1
|
||||
417 6309659 1
|
||||
418 16660256 1
|
||||
420 9315213 1
|
||||
422 11172205 1
|
||||
423 8595678 1
|
||||
425 33257464 1
|
||||
426 16728949 1
|
||||
428 16728949 1
|
||||
429 36540079 1
|
||||
430 28937856 1
|
||||
432 8002887 1
|
||||
434 9500590 1
|
||||
435 9500590 1
|
||||
441 2014909 1
|
||||
444 10165258 1
|
||||
445 10165258 1
|
||||
447 2052720 1
|
||||
448 2052720 1
|
||||
449 12209494 1
|
||||
449 3430789 1
|
||||
453 4200695 1
|
||||
454 4200695 1
|
||||
455 12643937 1
|
||||
456 30507607 1
|
||||
458 597790 1
|
||||
461 40096222 1
|
||||
463 19736671 1
|
||||
466 22544171 1
|
||||
469 1410197 1
|
||||
470 12685434 1
|
||||
472 7185591 1
|
||||
472 26330861 1
|
||||
472 4414481 1
|
||||
473 4373433 1
|
||||
474 4373433 1
|
||||
479 6325527 1
|
||||
480 6325527 1
|
||||
481 14706752 1
|
||||
482 10991183 1
|
||||
483 22703082 1
|
||||
484 14637235 1
|
||||
485 14637235 1
|
||||
486 14637235 1
|
||||
487 14637235 1
|
||||
488 1780819 1
|
||||
489 6625693 1
|
||||
490 56893404 1
|
||||
492 19583924 1
|
||||
493 19583924 1
|
||||
494 34873974 1
|
||||
495 17077004 1
|
||||
498 17077004 1
|
||||
499 26064662 1
|
||||
500 17930286 1
|
||||
504 10883736 1
|
||||
505 22703082 1
|
||||
506 7433668 1
|
||||
509 13980338 1
|
||||
515 29564505 1
|
||||
523 14803797 1
|
||||
524 14803797 1
|
||||
526 3863543 1
|
||||
529 10546779 1
|
||||
529 25413327 1
|
||||
529 36651210 1
|
||||
530 10546779 1
|
||||
530 25413327 1
|
||||
530 36651210 1
|
||||
530 87610599 1
|
||||
531 10546779 1
|
||||
531 25413327 1
|
||||
531 36651210 1
|
||||
537 16056514 1
|
||||
541 45154987 1
|
||||
541 11886686 1
|
||||
541 25007443 1
|
||||
542 19688024 1
|
||||
545 24221369 1
|
||||
547 10648422 1
|
||||
548 18199839 1
|
||||
550 33499189 1
|
||||
553 1471041 1
|
||||
555 1049501 1
|
||||
557 1049501 1
|
||||
559 3475317 1
|
||||
562 20101846 1
|
||||
563 2867345 1
|
||||
564 2867345 1
|
||||
565 16120395 1
|
||||
566 16120395 1
|
||||
568 23418635 1
|
||||
570 20333864 1
|
||||
571 20333864 1
|
||||
572 4447055 1
|
||||
573 10300888 1
|
||||
574 10300888 1
|
||||
576 4468861 1
|
||||
579 34139429 1
|
||||
580 23460562 1
|
||||
582 14260013 1
|
||||
584 14260013 1
|
||||
585 42291761 1
|
||||
588 16999023 1
|
||||
590 10984005 1
|
||||
591 14682243 1
|
||||
592 14682243 1
|
||||
594 19675911 1
|
||||
595 4824840 1
|
||||
600 12258338 1
|
||||
601 12258338 1
|
||||
602 3701541 1
|
||||
603 6540064 1
|
||||
606 712078 1
|
||||
607 4506414 1
|
||||
609 40096222 1
|
||||
610 40096222 1
|
||||
611 32408470 1
|
||||
612 9638032 1
|
||||
614 9638032 1
|
||||
615 9638032 1
|
||||
616 18670 1
|
||||
617 18670 1
|
||||
618 6836086 1
|
||||
620 2565138 1
|
||||
621 1642727 1
|
||||
622 17000834 1
|
||||
624 20033112 1
|
||||
625 20033112 1
|
||||
626 16355392 1
|
||||
631 5468807 1
|
||||
632 5172048 1
|
||||
633 5172048 1
|
||||
635 1686997 1
|
||||
638 25649714 1
|
||||
640 6503185 1
|
||||
642 13619127 1
|
||||
643 15535511 1
|
||||
645 12810152 1
|
||||
646 12810152 1
|
||||
647 15041758 1
|
||||
648 15041758 1
|
||||
650 12789595 1
|
||||
651 9433958 1
|
||||
652 9433958 1
|
||||
653 24384587 1
|
||||
654 57574395 1
|
||||
655 57574395 1
|
||||
657 8533245 1
|
||||
658 5293024 1
|
||||
661 37204802 1
|
||||
662 37204802 1
|
||||
663 22080671 1
|
||||
665 12580014 1
|
||||
666 4469125 1
|
||||
667 6493422 1
|
||||
668 6493422 1
|
||||
668 25148216 1
|
||||
669 6493422 1
|
||||
669 25148216 1
|
||||
670 5573975 1
|
||||
671 5573975 1
|
||||
672 15635366 1
|
||||
673 2095573 1
|
||||
676 857189 1
|
||||
677 857189 1
|
||||
679 13639330 1
|
||||
680 9315213 1
|
||||
681 9315213 1
|
||||
682 9315213 1
|
||||
683 9315213 1
|
||||
685 4452659 1
|
||||
686 4452659 1
|
||||
687 4452659 1
|
||||
688 4452659 1
|
||||
689 22080671 1
|
||||
694 1071991 1
|
||||
696 16355392 1
|
||||
698 22544171 1
|
||||
703 4350400 1
|
||||
704 14658685 1
|
||||
705 22442133 1
|
||||
709 22442133 1
|
||||
710 22442133 1
|
||||
713 18421962 1
|
||||
714 18421962 1
|
||||
717 17587795 1
|
||||
724 5531479 1
|
||||
726 7521113 1
|
||||
726 36444198 1
|
||||
730 13400643 1
|
||||
732 34469966 1
|
||||
733 34469966 1
|
||||
734 4961038 1
|
||||
736 5389095 1
|
||||
737 16562534 1
|
||||
737 6609935 1
|
||||
738 16562534 1
|
||||
738 6609935 1
|
||||
738 33912020 1
|
||||
739 4446814 1
|
||||
740 23078022 1
|
||||
745 11291348 1
|
||||
746 11291348 1
|
||||
747 11291348 1
|
||||
748 11291348 1
|
||||
749 13868795 1
|
||||
751 19800147 1
|
||||
752 19800147 1
|
||||
753 1173667 1
|
||||
755 17844478 1
|
||||
757 17123657 1
|
||||
758 14195528 1
|
||||
760 1805641 1
|
||||
761 10009203 1
|
||||
762 4695046 1
|
||||
764 7552215 1
|
||||
765 7552215 1
|
||||
766 7552215 1
|
||||
767 2488880 1
|
||||
771 15476777 1
|
||||
772 24922825 1
|
||||
774 32275758 1
|
||||
776 32275758 1
|
||||
777 32275758 1
|
||||
778 13001323 1
|
||||
779 13001323 1
|
||||
780 8246922 1
|
||||
780 24338780 1
|
||||
782 8246922 1
|
||||
787 4740447 1
|
||||
788 4740447 1
|
||||
789 15493354 1
|
||||
790 15493354 1
|
||||
791 15984735 1
|
||||
792 3610080 1
|
||||
795 8551160 1
|
||||
797 8551160 1
|
||||
798 8551160 1
|
||||
799 5293024 1
|
||||
801 22180793 1
|
||||
802 22180793 1
|
||||
803 22180793 1
|
||||
804 22180793 1
|
||||
807 36606083 1
|
||||
810 13513790 1
|
||||
812 19799455 1
|
||||
813 33387953 1
|
||||
815 8148304 1
|
||||
816 8148304 1
|
||||
817 17814815 1
|
||||
818 17814815 1
|
||||
822 15319019 1
|
||||
825 15319019 1
|
||||
826 4678846 1
|
||||
828 4678846 1
|
||||
835 15928989 1
|
||||
838 15928989 1
|
||||
840 15663829 1
|
||||
841 15663829 1
|
||||
844 17741440 1
|
||||
846 22696649 1
|
||||
848 14500725 1
|
||||
853 24922825 1
|
||||
854 12206390 1
|
||||
855 8190282 1
|
||||
856 43334921 1
|
||||
857 43334921 1
|
||||
858 1982286 1
|
||||
860 16066726 1
|
||||
861 16066726 1
|
||||
863 20568364 1
|
||||
863 16361581 1
|
||||
866 37822406 1
|
||||
867 14340571 1
|
||||
871 195689316 1
|
||||
876 195689316 1
|
||||
877 313394 1
|
||||
881 14803797 1
|
||||
883 14803797 1
|
||||
884 14803797 1
|
||||
885 6477536 1
|
||||
886 6477536 1
|
||||
890 2097256 1
|
||||
891 2097256 1
|
||||
893 13509809 1
|
||||
894 14724693 1
|
||||
895 18750453 1
|
||||
896 14338915 1
|
||||
897 14338915 1
|
||||
898 13106686 1
|
||||
898 5572127 1
|
||||
899 13106686 1
|
||||
899 5572127 1
|
||||
900 18678095 1
|
||||
901 6540064 1
|
||||
902 10648422 1
|
||||
908 6923961 1
|
||||
909 11254556 1
|
||||
910 11254556 1
|
||||
912 11254556 1
|
||||
916 18037805 1
|
||||
917 34071621 1
|
||||
919 16422880 1
|
||||
923 17077004 1
|
||||
925 17077004 1
|
||||
926 16390264 1
|
||||
927 16390264 1
|
||||
928 18174210 1
|
||||
929 18174210 1
|
||||
930 16056514 1
|
||||
933 14711483 1
|
||||
934 8563659 1
|
||||
935 5483793 1
|
||||
938 26231129 1
|
||||
939 26231129 1
|
||||
940 12258338 1
|
||||
941 12258338 1
|
||||
942 11527199 1
|
||||
944 1642727 1
|
||||
945 8428935 1
|
||||
945 26112696 1
|
||||
945 4463588 1
|
||||
945 13083189 1
|
||||
946 8428935 1
|
||||
946 26112696 1
|
||||
946 4463588 1
|
||||
946 13083189 1
|
||||
949 13578199 1
|
||||
951 21414718 1
|
||||
952 3355397 1
|
||||
953 3355397 1
|
||||
954 3355397 1
|
||||
955 2078658 1
|
||||
955 30507607 1
|
||||
959 8780599 1
|
||||
962 13931771 1
|
||||
962 935538 1
|
||||
962 4306711 1
|
||||
963 4162857 1
|
||||
963 29828242 1
|
||||
964 4162857 1
|
||||
964 29828242 1
|
||||
965 40817021 1
|
||||
969 19356271 1
|
||||
969 17368516 1
|
||||
970 19356271 1
|
||||
970 17368516 1
|
||||
972 46695481 1
|
||||
972 27873158 1
|
||||
972 28617573 1
|
||||
972 9764256 1
|
||||
973 27446873 1
|
||||
973 27873158 1
|
||||
973 28617573 1
|
||||
973 9764256 1
|
||||
976 5304891 1
|
||||
977 14075252 1
|
||||
977 39264456 1
|
||||
978 14075252 1
|
||||
979 11659421 1
|
||||
980 20128547 1
|
||||
984 6828370 1
|
||||
988 3033830 1
|
||||
989 9988425 1
|
||||
990 16472469 1
|
||||
992 16472469 1
|
||||
994 16472469 1
|
||||
996 16472469 1
|
||||
997 16472469 1
|
||||
998 16472469 1
|
||||
999 16472469 1
|
||||
1000 16472469 1
|
||||
1001 5702790 1
|
||||
1002 13639330 1
|
||||
1003 14332945 1
|
||||
1003 4319844 1
|
||||
1003 4899981 1
|
||||
1004 301838 1
|
||||
1004 2734421 1
|
||||
1004 3952288 1
|
||||
1005 301838 1
|
||||
1005 2734421 1
|
||||
1005 3952288 1
|
||||
1006 4926049 1
|
||||
1008 2547636 1
|
||||
1009 1982286 1
|
||||
1011 9745001 1
|
||||
1015 6277638 1
|
||||
1016 6277638 1
|
||||
1018 11603066 1
|
||||
1023 16927286 1
|
||||
1025 32408470 1
|
||||
1026 3113630 1
|
||||
1027 3113630 1
|
||||
1028 13923140 1
|
||||
1028 11899391 1
|
||||
1030 6441369 1
|
||||
1031 12486491 1
|
||||
1032 6836086 1
|
||||
1033 6836086 1
|
||||
1034 4547102 1
|
||||
1035 4547102 1
|
||||
1036 4547102 1
|
||||
1037 16287725 1
|
||||
1038 16287725 1
|
||||
1040 25254425 1
|
||||
1040 16626264 1
|
||||
1042 17421851 1
|
||||
1043 17671145 1
|
||||
1044 22500262 1
|
||||
1045 22500262 1
|
||||
1046 418246 1
|
||||
1046 4324278 1
|
||||
1046 16712164 1
|
||||
1047 14706752 1
|
||||
1048 12486491 1
|
||||
1050 19878070 1
|
||||
1052 18816720 1
|
||||
1053 18816720 1
|
||||
1054 10072941 1
|
||||
1055 13906581 1
|
||||
1056 4200695 1
|
||||
1058 13027590 1
|
||||
1065 20418809 1
|
||||
1067 4429668 1
|
||||
1068 4429668 1
|
||||
1069 4200695 1
|
||||
1070 25649714 1
|
||||
1072 4824840 1
|
||||
1073 4824840 1
|
||||
1074 14658685 1
|
||||
1075 14658685 1
|
||||
1081 5691302 1
|
||||
1084 5691302 1
|
||||
1085 5691302 1
|
||||
1087 39281140 1
|
||||
1090 17628888 1
|
||||
1091 2603304 1
|
||||
1096 29638116 1
|
||||
1097 26851674 1
|
||||
1098 13552682 1
|
||||
1101 3874000 1
|
||||
1102 3874000 1
|
||||
1103 3898784 1
|
||||
1105 6710713 1
|
||||
1106 6710713 1
|
||||
1109 13770184 1
|
||||
1109 8582337 1
|
||||
1111 1686881 1
|
||||
1112 1686881 1
|
||||
1114 12824568 1
|
||||
1115 44048701 1
|
||||
1118 23351136 1
|
||||
1119 5323845 1
|
||||
1119 18997216 1
|
||||
1119 13907928 1
|
||||
1120 5323845 1
|
||||
1120 18997216 1
|
||||
1120 13907928 1
|
||||
1125 21009874 1
|
||||
1126 21009874 1
|
||||
1127 27466734 1
|
||||
1128 33499189 1
|
||||
1128 9283422 1
|
||||
1133 24142891 1
|
||||
1134 33370 1
|
||||
1135 33370 1
|
||||
1136 33370 1
|
||||
1138 6796297 1
|
||||
1139 12009265 1
|
||||
1141 12009265 1
|
||||
1142 5260382 1
|
||||
1145 10071552 1
|
||||
1148 4828631 1
|
||||
1153 7370282 1
|
||||
1156 12584053 1
|
||||
1157 12584053 1
|
||||
1158 12584053 1
|
||||
1159 12584053 1
|
||||
1161 13048272 1
|
||||
1162 15305881 1
|
||||
1164 4455466 1
|
||||
1165 4455466 1
|
||||
1166 9889151 1
|
||||
1168 8563659 1
|
||||
1169 4319174 1
|
||||
1170 18956141 1
|
||||
1171 18956141 1
|
||||
1173 7370282 1
|
||||
1174 31272411 1
|
||||
1176 13910150 1
|
||||
1177 13910150 1
|
||||
1178 31272411 1
|
||||
1181 301838 1
|
||||
1181 2734421 1
|
||||
1181 39128592 1
|
||||
1181 3952288 1
|
||||
1182 14541844 1
|
||||
1183 1967017 1
|
||||
1184 16737210 1
|
||||
1186 7485455 1
|
||||
1188 4394817 1
|
||||
1190 30655442 1
|
||||
1193 20532591 1
|
||||
1195 26283293 1
|
||||
1205 5558754 1
|
||||
1206 18909530 1
|
||||
1208 10284593 1
|
||||
1209 4347374 1
|
||||
1210 4928282 1
|
||||
1211 4928282 1
|
||||
1212 6493422 1
|
||||
1212 44724517 1
|
||||
1214 6493422 1
|
||||
1214 14407673 1
|
||||
1215 16355392 1
|
||||
1218 15635366 1
|
||||
1219 9393969 1
|
||||
1219 14864285 1
|
||||
1220 13023410 1
|
||||
1223 5289038 1
|
||||
1224 21932050 1
|
||||
1224 34016987 1
|
||||
1227 25641414 1
|
||||
1228 25641414 1
|
||||
1229 1676568 1
|
||||
1230 13905670 1
|
||||
1231 13905670 1
|
||||
1234 13905670 1
|
||||
1235 17973161 1
|
||||
1236 17973161 1
|
||||
1237 3654468 1
|
||||
1238 3654468 1
|
||||
1239 21387297 1
|
||||
1239 4427392 1
|
||||
1244 18949516 1
|
||||
1246 7662395 1
|
||||
1247 5114282 1
|
||||
1248 7209559 1
|
||||
1249 7209559 1
|
||||
1253 3321943 1
|
||||
1254 16939583 1
|
||||
1255 16939583 1
|
||||
1257 581832 1
|
||||
1258 12040627 1
|
||||
1260 24341590 1
|
||||
1261 13023410 1
|
||||
1263 3981729 1
|
||||
1265 37480103 1
|
||||
1268 52072815 1
|
||||
1269 13900610 1
|
||||
1275 27731651 1
|
||||
1276 3475317 1
|
||||
1284 3578380 1
|
||||
1288 4687948 1
|
||||
1289 21239672 1
|
||||
1291 56893404 1
|
||||
1293 43329366 1
|
||||
1294 2078658 1
|
||||
1294 30507607 1
|
||||
1295 21239672 1
|
||||
1297 9167230 1
|
||||
1300 6421792 1
|
||||
1302 12631697 1
|
||||
1304 12631697 1
|
||||
1305 12631697 1
|
||||
1306 6000423 1
|
||||
1306 5836 1
|
||||
1307 18231807 1
|
||||
1308 18231807 1
|
||||
1309 18231807 1
|
||||
1310 8042158 1
|
||||
1311 13763195 1
|
||||
1312 24177706 1
|
||||
1314 13072112 1
|
||||
1314 16237005 1
|
||||
1315 13072112 1
|
||||
1315 16237005 1
|
||||
1322 16284655 1
|
||||
1323 19912367 1
|
||||
1324 19912367 1
|
||||
1325 40476126 1
|
||||
1327 24241932 1
|
||||
1327 22194407 1
|
||||
1328 3475317 1
|
||||
1330 14075252 1
|
||||
1331 14075252 1
|
||||
1333 1649738 1
|
||||
1334 13923140 1
|
||||
1334 13940200 1
|
||||
1334 11899391 1
|
||||
1340 15482274 1
|
||||
1341 15482274 1
|
||||
1342 8148122 1
|
||||
1345 9559146 1
|
||||
1346 9505402 1
|
||||
1347 19005293 1
|
||||
1348 19005293 1
|
||||
1349 5377642 1
|
||||
1350 5377642 1
|
||||
1351 28369117 1
|
||||
1353 18816720 1
|
||||
1355 5256564 1
|
||||
1356 13764090 1
|
||||
1360 11614737 1
|
||||
1361 15488881 1
|
||||
1361 15058155 1
|
||||
1364 8290953 1
|
||||
1366 4406819 1
|
||||
1367 2425364 1
|
||||
1371 16256507 1
|
||||
1372 21003930 1
|
||||
1373 21003930 1
|
||||
1374 21993510 1
|
||||
1375 21993510 1
|
||||
1376 3944632 1
|
||||
1378 2488880 1
|
||||
1380 16322674 1
|
||||
1380 23557241 1
|
||||
1380 17450673 1
|
||||
1381 13481880 1
|
||||
1383 17755060 1
|
||||
1386 306006 1
|
||||
1387 9669099 1
|
||||
1390 2890952 1
|
||||
1391 6766459 1
|
||||
1392 6766459 1
|
||||
1393 2000038 1
|
||||
1393 12440953 1
|
||||
1394 2251426 1
|
||||
1397 17717391 1
|
||||
1398 17717391 1
|
||||
1400 14706752 1
|
||||
1401 5185871 1
|
||||
1402 8126244 1
|
||||
1403 33370 1
|
||||
1403 38355793 1
|
||||
1404 33370 1
|
||||
1404 38355793 1
|
||||
1405 10504681 1
|
||||
1406 2617858 1
|
||||
1407 8087082 1
|
||||
1407 29863668 1
|
||||
|
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,295 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "66cbbaf8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Libraries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "c01c19dc",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"from typing import Dict, List, Union\n",
|
||||
"import numpy as np\n",
|
||||
"from datasets import load_dataset\n",
|
||||
"from langchain_ollama import OllamaEmbeddings\n",
|
||||
"from beir.datasets.data_loader import GenericDataLoader\n",
|
||||
"from beir.retrieval.search.dense import DenseRetrievalExactSearch\n",
|
||||
"from beir.retrieval.evaluation import EvaluateRetrieval\n",
|
||||
"from beir import util"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ac011c1c",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Utils"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "b83e7900",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class BEIROllamaEmbeddings:\n",
|
||||
" \"\"\"\n",
|
||||
" Adapter that makes LangChain's OllamaEmbeddings compatible with BEIR.\n",
|
||||
" \"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(\n",
|
||||
" self,\n",
|
||||
" base_url: str,\n",
|
||||
" model: str,\n",
|
||||
" batch_size: int = 64,\n",
|
||||
" ) -> None:\n",
|
||||
" self.batch_size = batch_size\n",
|
||||
" self.embeddings = OllamaEmbeddings(\n",
|
||||
" base_url=base_url,\n",
|
||||
" model=model,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" def _batch_embed(self, texts: List[str]) -> np.ndarray:\n",
|
||||
" vectors = []\n",
|
||||
"\n",
|
||||
" for i in range(0, len(texts), self.batch_size):\n",
|
||||
" batch = texts[i : i + self.batch_size]\n",
|
||||
" batch_vectors = self.embeddings.embed_documents(batch)\n",
|
||||
" vectors.extend(batch_vectors)\n",
|
||||
"\n",
|
||||
" return np.asarray(vectors, dtype=np.float32)\n",
|
||||
"\n",
|
||||
" def encode_queries(self, queries: List[str], **kwargs) -> np.ndarray:\n",
|
||||
" \"\"\"\n",
|
||||
" BEIR query encoder\n",
|
||||
" \"\"\"\n",
|
||||
" return self._batch_embed(queries)\n",
|
||||
"\n",
|
||||
" def encode_corpus(\n",
|
||||
" self,\n",
|
||||
" corpus: Union[List[Dict[str, str]], Dict[str, Dict[str, str]]],\n",
|
||||
" **kwargs,\n",
|
||||
" ) -> np.ndarray:\n",
|
||||
" \"\"\"\n",
|
||||
" BEIR corpus encoder\n",
|
||||
" \"\"\"\n",
|
||||
" if isinstance(corpus, dict):\n",
|
||||
" corpus = list(corpus.values())\n",
|
||||
"\n",
|
||||
" texts = []\n",
|
||||
" for doc in corpus:\n",
|
||||
" title = (doc.get(\"title\") or \"\").strip()\n",
|
||||
" text = (doc.get(\"text\") or \"\").strip()\n",
|
||||
"\n",
|
||||
" if title:\n",
|
||||
" texts.append(f\"{title}\\n{text}\")\n",
|
||||
" else:\n",
|
||||
" texts.append(text)\n",
|
||||
"\n",
|
||||
" return self._batch_embed(texts)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c9528fb6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "230aae25",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Descargando datos de Hugging Face...\n",
|
||||
"Cargando con BEIR GenericDataLoader...\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"application/vnd.jupyter.widget-view+json": {
|
||||
"model_id": "0e67479e959248f598db3415efbb13ae",
|
||||
"version_major": 2,
|
||||
"version_minor": 0
|
||||
},
|
||||
"text/plain": [
|
||||
" 0%| | 0/20604 [00:00<?, ?it/s]"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"dataset_name = \"cosqa\"\n",
|
||||
"data_path = f\"/home/acano/PycharmProjects/assistance-engine/data/external/{dataset_name}\"\n",
|
||||
"\n",
|
||||
"os.makedirs(f\"{data_path}/qrels\", exist_ok=True)\n",
|
||||
"\n",
|
||||
"# 1. Cargar desde Hugging Face con los nombres de configuración correctos\n",
|
||||
"print(\"Descargando datos de Hugging Face...\")\n",
|
||||
"hf_corpus = load_dataset(\"CoIR-Retrieval/cosqa\", \"corpus\", split=\"corpus\")\n",
|
||||
"hf_queries = load_dataset(\"CoIR-Retrieval/cosqa\", \"queries\", split=\"queries\")\n",
|
||||
"# Los qrels están en la config 'default'\n",
|
||||
"hf_qrels = load_dataset(\"CoIR-Retrieval/cosqa\", \"default\", split=\"test\")\n",
|
||||
"\n",
|
||||
"# 2. Guardar Corpus\n",
|
||||
"with open(f\"{data_path}/corpus.jsonl\", \"w\") as f:\n",
|
||||
" for item in hf_corpus:\n",
|
||||
" f.write(json.dumps({\"_id\": str(item[\"_id\"]), \"text\": item[\"text\"], \"title\": \"\"}) + \"\\n\")\n",
|
||||
"\n",
|
||||
"# 3. Guardar Queries\n",
|
||||
"with open(f\"{data_path}/queries.jsonl\", \"w\") as f:\n",
|
||||
" for item in hf_queries:\n",
|
||||
" f.write(json.dumps({\"_id\": str(item[\"_id\"]), \"text\": item[\"text\"]}) + \"\\n\")\n",
|
||||
"\n",
|
||||
"# 4. Guardar Qrels (Formato TSV para BEIR)\n",
|
||||
"with open(f\"{data_path}/qrels/test.tsv\", \"w\") as f:\n",
|
||||
" f.write(\"query-id\\tcorpus-id\\tscore\\n\")\n",
|
||||
" for item in hf_qrels:\n",
|
||||
" # En la config 'default', los campos suelen ser 'query-id' y 'corpus-id'\n",
|
||||
" f.write(f\"{item['query-id']}\\t{item['corpus-id']}\\t{item['score']}\\n\")\n",
|
||||
"\n",
|
||||
"print(\"Cargando con BEIR GenericDataLoader...\")\n",
|
||||
"corpus, queries, qrels = GenericDataLoader(data_path).load(split=\"test\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "13050d31",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test qwen3-0.6B-emb:latest"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "514540af",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"NDCG: {'NDCG@1': 0.174, 'NDCG@3': 0.27374, 'NDCG@5': 0.33509, 'NDCG@10': 0.39086, 'NDCG@100': 0.45099}\n",
|
||||
"MAP: {'MAP@1': 0.174, 'MAP@3': 0.247, 'MAP@5': 0.2808, 'MAP@10': 0.30466, 'MAP@100': 0.31702}\n",
|
||||
"Recall: {'Recall@1': 0.174, 'Recall@3': 0.352, 'Recall@5': 0.502, 'Recall@10': 0.67, 'Recall@100': 0.952}\n",
|
||||
"Precision: {'P@1': 0.174, 'P@3': 0.11733, 'P@5': 0.1004, 'P@10': 0.067, 'P@100': 0.00952}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = BEIROllamaEmbeddings(\n",
|
||||
" base_url=\"http://localhost:11434\",\n",
|
||||
" model=\"qwen3-0.6B-emb:latest\",\n",
|
||||
" batch_size=64,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"retriever = DenseRetrievalExactSearch(model, batch_size=64)\n",
|
||||
"evaluator = EvaluateRetrieval(retriever, score_function=\"cos_sim\")\n",
|
||||
"\n",
|
||||
"results = evaluator.retrieve(corpus, queries)\n",
|
||||
"ndcg, _map, recall, precision = evaluator.evaluate(\n",
|
||||
" qrels, results, [1, 3, 5, 10, 100]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"NDCG:\", ndcg)\n",
|
||||
"print(\"MAP:\", _map)\n",
|
||||
"print(\"Recall:\", recall)\n",
|
||||
"print(\"Precision:\", precision)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c4e643ca",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Test qwen2.5:1.5b"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "5ced1c25",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"NDCG: {'NDCG@1': 0.0, 'NDCG@3': 0.0, 'NDCG@5': 0.0, 'NDCG@10': 0.0, 'NDCG@100': 0.0021}\n",
|
||||
"MAP: {'MAP@1': 0.0, 'MAP@3': 0.0, 'MAP@5': 0.0, 'MAP@10': 0.0, 'MAP@100': 0.00043}\n",
|
||||
"Recall: {'Recall@1': 0.0, 'Recall@3': 0.0, 'Recall@5': 0.0, 'Recall@10': 0.0, 'Recall@100': 0.01}\n",
|
||||
"Precision: {'P@1': 0.0, 'P@3': 0.0, 'P@5': 0.0, 'P@10': 0.0, 'P@100': 0.0001}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model_qwen2 = BEIROllamaEmbeddings(\n",
|
||||
" base_url=\"http://localhost:11434\",\n",
|
||||
" model=\"qwen2.5:1.5b\",\n",
|
||||
" batch_size=64,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"retriever_qwen_2 = DenseRetrievalExactSearch(model_qwen2, batch_size=64)\n",
|
||||
"evaluator_qwen_2 = EvaluateRetrieval(retriever_qwen_2, score_function=\"cos_sim\")\n",
|
||||
"\n",
|
||||
"results_qwen_2 = evaluator_qwen_2.retrieve(corpus, queries)\n",
|
||||
"ndcg_qwen_2, _map_qwen_2, recall_qwen_2, precision_qwen_2 = evaluator_qwen_2.evaluate(\n",
|
||||
" qrels, results_qwen_2, [1, 3, 5, 10, 100]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(\"NDCG:\", ndcg_qwen_2)\n",
|
||||
"print(\"MAP:\", _map_qwen_2)\n",
|
||||
"print(\"Recall:\", recall_qwen_2)\n",
|
||||
"print(\"Precision:\", precision_qwen_2)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1db7d110",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "assistance-engine",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
58
uv.lock
58
uv.lock
|
|
@ -1,5 +1,5 @@
|
|||
version = 1
|
||||
revision = 2
|
||||
revision = 3
|
||||
requires-python = ">=3.11"
|
||||
resolution-markers = [
|
||||
"python_full_version >= '3.14' and sys_platform == 'win32'",
|
||||
|
|
@ -264,8 +264,6 @@ dev = [
|
|||
{ name = "beir" },
|
||||
{ name = "jupyter" },
|
||||
{ name = "langfuse" },
|
||||
{ name = "mteb" },
|
||||
{ name = "polars" },
|
||||
{ name = "ruff" },
|
||||
]
|
||||
|
||||
|
|
@ -294,8 +292,6 @@ dev = [
|
|||
{ name = "beir", specifier = ">=2.2.0" },
|
||||
{ name = "jupyter", specifier = ">=1.1.1" },
|
||||
{ name = "langfuse", specifier = ">=3.14.4" },
|
||||
{ name = "mteb", specifier = ">=2.8.8" },
|
||||
{ name = "polars", specifier = ">=1.38.1" },
|
||||
{ name = "ruff", specifier = ">=0.15.1" },
|
||||
]
|
||||
|
||||
|
|
@ -2020,30 +2016,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mteb"
|
||||
version = "2.8.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "datasets" },
|
||||
{ name = "numpy" },
|
||||
{ name = "polars" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "pytrec-eval-terrier" },
|
||||
{ name = "requests" },
|
||||
{ name = "rich" },
|
||||
{ name = "scikit-learn" },
|
||||
{ name = "scipy" },
|
||||
{ name = "sentence-transformers" },
|
||||
{ name = "torch" },
|
||||
{ name = "tqdm" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/4f/05/80bc9b33130f11c16b3ddfafd7ad847a7f5d7cfb0631c97c0933d311dc90/mteb-2.8.8.tar.gz", hash = "sha256:767198481dcb2984fb62077c8ad31941e907e0c7f2acd88bfd80ef254ee55a41", size = 3324775, upload-time = "2026-02-23T19:33:25.809Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/d8/36/377263e288395c65b6f1e35d2b46faf6e0de45287abb6050386abfd2837b/mteb-2.8.8-py3-none-any.whl", hash = "sha256:e8364baa65ba0ee42bcc80bff3d69a34271a96f0af09f31048b28be2d08e6c02", size = 5119890, upload-time = "2026-02-23T19:33:22.436Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "multidict"
|
||||
version = "6.7.1"
|
||||
|
|
@ -2953,34 +2925,6 @@ wheels = [
|
|||
{ url = "https://files.pythonhosted.org/packages/48/31/05e764397056194206169869b50cf2fee4dbbbc71b344705b9c0d878d4d8/platformdirs-4.9.2-py3-none-any.whl", hash = "sha256:9170634f126f8efdae22fb58ae8a0eaa86f38365bc57897a6c4f781d1f5875bd", size = 21168, upload-time = "2026-02-16T03:56:08.891Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars"
|
||||
version = "1.38.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "polars-runtime-32" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c6/5e/208a24471a433bcd0e9a6889ac49025fd4daad2815c8220c5bd2576e5f1b/polars-1.38.1.tar.gz", hash = "sha256:803a2be5344ef880ad625addfb8f641995cfd777413b08a10de0897345778239", size = 717667, upload-time = "2026-02-06T18:13:23.013Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/49/737c1a6273c585719858261753da0b688454d1b634438ccba8a9c4eb5aab/polars-1.38.1-py3-none-any.whl", hash = "sha256:a29479c48fed4984d88b656486d221f638cba45d3e961631a50ee5fdde38cb2c", size = 810368, upload-time = "2026-02-06T18:11:55.819Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "polars-runtime-32"
|
||||
version = "1.38.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/07/4b/04d6b3fb7cf336fbe12fbc4b43f36d1783e11bb0f2b1e3980ec44878df06/polars_runtime_32-1.38.1.tar.gz", hash = "sha256:04f20ed1f5c58771f34296a27029dc755a9e4b1390caeaef8f317e06fdfce2ec", size = 2812631, upload-time = "2026-02-06T18:13:25.206Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ae/a2/a00defbddadd8cf1042f52380dcba6b6592b03bac8e3b34c436b62d12d3b/polars_runtime_32-1.38.1-cp310-abi3-macosx_10_12_x86_64.whl", hash = "sha256:18154e96044724a0ac38ce155cf63aa03c02dd70500efbbf1a61b08cadd269ef", size = 44108001, upload-time = "2026-02-06T18:11:58.127Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a7/fb/599ff3709e6a303024efd7edfd08cf8de55c6ac39527d8f41cbc4399385f/polars_runtime_32-1.38.1-cp310-abi3-macosx_11_0_arm64.whl", hash = "sha256:c49acac34cc4049ed188f1eb67d6ff3971a39b4af7f7b734b367119970f313ac", size = 40230140, upload-time = "2026-02-06T18:12:01.181Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/8c/3ac18d6f89dc05fe2c7c0ee1dc5b81f77a5c85ad59898232c2500fe2ebbf/polars_runtime_32-1.38.1-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fef2ef2626a954e010e006cc8e4de467ecf32d08008f130cea1c78911f545323", size = 41994039, upload-time = "2026-02-06T18:12:04.332Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f2/5a/61d60ec5cc0ab37cbd5a699edb2f9af2875b7fdfdfb2a4608ca3cc5f0448/polars_runtime_32-1.38.1-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8a5f7a8125e2d50e2e060296551c929aec09be23a9edcb2b12ca923f555a5ba", size = 45755804, upload-time = "2026-02-06T18:12:07.846Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/91/54/02cd4074c98c361ccd3fec3bcb0bd68dbc639c0550c42a4436b0ff0f3ccf/polars_runtime_32-1.38.1-cp310-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:10d19cd9863e129273b18b7fcaab625b5c8143c2d22b3e549067b78efa32e4fa", size = 42159605, upload-time = "2026-02-06T18:12:10.919Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/8e/f3/b2a5e720cc56eaa38b4518e63aa577b4bbd60e8b05a00fe43ca051be5879/polars_runtime_32-1.38.1-cp310-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61e8d73c614b46a00d2f853625a7569a2e4a0999333e876354ac81d1bf1bb5e2", size = 45336615, upload-time = "2026-02-06T18:12:14.074Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/f1/8d/ee2e4b7de948090cfb3df37d401c521233daf97bfc54ddec5d61d1d31618/polars_runtime_32-1.38.1-cp310-abi3-win_amd64.whl", hash = "sha256:08c2b3b93509c1141ac97891294ff5c5b0c548a373f583eaaea873a4bf506437", size = 45680732, upload-time = "2026-02-06T18:12:19.097Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/18/72c216f4ab0c82b907009668f79183ae029116ff0dd245d56ef58aac48e7/polars_runtime_32-1.38.1-cp310-abi3-win_arm64.whl", hash = "sha256:6d07d0cc832bfe4fb54b6e04218c2c27afcfa6b9498f9f6bbf262a00d58cc7c4", size = 41639413, upload-time = "2026-02-06T18:12:22.044Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prometheus-client"
|
||||
version = "0.24.1"
|
||||
|
|
|
|||
Loading…
Reference in New Issue