From 48d280440cdd48835a647d38a251559181e67f4c Mon Sep 17 00:00:00 2001 From: acano Date: Fri, 27 Feb 2026 14:45:33 +0100 Subject: [PATCH] Refactor code structure for improved readability and maintainability --- pyproject.toml | 2 + scratches/acano/es_ingestion.ipynb | 4 +- scratches/acano/evaluate_retrieve.ipynb | 1009 ++++++++++++++++++ scratches/acano/langgraph_agent_simple.ipynb | 285 ++--- src/__init__.py | 0 src/emb_factory.py | 67 ++ src/llm_factory v1.py | 152 --- src/llm_factory v2.py | 179 ---- src/llm_factory.py | 72 ++ uv.lock | 149 ++- 10 files changed, 1398 insertions(+), 521 deletions(-) create mode 100644 scratches/acano/evaluate_retrieve.ipynb create mode 100644 src/__init__.py create mode 100644 src/emb_factory.py delete mode 100644 src/llm_factory v1.py delete mode 100644 src/llm_factory v2.py create mode 100644 src/llm_factory.py diff --git a/pyproject.toml b/pyproject.toml index eff7c81..9e64193 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "grpcio-reflection>=1.78.0", "grpcio-tools>=1.78.0", "langchain>=1.2.10", + "langchain-aws>=1.3.1", "langchain-community>=0.4.1", "langchain-elasticsearch>=1.0.0", "langchain-huggingface>=1.2.0", @@ -19,6 +20,7 @@ dependencies = [ "numpy>=2.4.2", "pandas>=3.0.0", "python-dotenv>=1.2.1", + "rapidfuzz>=3.14.3", "torch>=2.10.0", "torchvision>=0.25.0", "tqdm>=4.67.3", diff --git a/scratches/acano/es_ingestion.ipynb b/scratches/acano/es_ingestion.ipynb index 25991b6..7a6be88 100644 --- a/scratches/acano/es_ingestion.ipynb +++ b/scratches/acano/es_ingestion.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "0a8abbfa", "metadata": {}, "outputs": [], @@ -15,14 +15,12 @@ "import markdown\n", "from bs4 import BeautifulSoup\n", "\n", - "\n", "from langchain_core.documents import Document\n", "from langchain_elasticsearch import ElasticsearchStore\n", "import torch\n", "import torch.nn.functional as F\n", "from loguru import logger\n", "from langchain_ollama import OllamaEmbeddings\n", - "\n", "from transformers import AutoTokenizer, AutoModel, AutoConfig\n", "from elasticsearch import Elasticsearch\n", "from langchain_elasticsearch import ElasticsearchStore\n", diff --git a/scratches/acano/evaluate_retrieve.ipynb b/scratches/acano/evaluate_retrieve.ipynb new file mode 100644 index 0000000..7ed2ddb --- /dev/null +++ b/scratches/acano/evaluate_retrieve.ipynb @@ -0,0 +1,1009 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8fed4518", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing faithfulness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import faithfulness\n", + " from ragas.metrics import (\n", + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_relevancy from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_relevancy\n", + " from ragas.metrics import (\n", + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_recall\n", + " from ragas.metrics import (\n", + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_precision from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_precision\n", + " from ragas.metrics import (\n", + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing context_entity_recall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import context_entity_recall\n", + " from ragas.metrics import (\n", + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_similarity from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_similarity\n", + " from ragas.metrics import (\n", + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing answer_correctness from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import answer_correctness\n", + " from ragas.metrics import (\n", + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing NonLLMContextRecall from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextRecall\n", + " from ragas.metrics import (\n", + "/tmp/ipykernel_716860/1516785970.py:18: DeprecationWarning: Importing NonLLMContextPrecisionWithReference from 'ragas.metrics' is deprecated and will be removed in v1.0. Please use 'ragas.metrics.collections' instead. Example: from ragas.metrics.collections import NonLLMContextPrecisionWithReference\n", + " from ragas.metrics import (\n" + ] + } + ], + "source": [ + "import sys\n", + "from pathlib import Path\n", + "\n", + "# Ensure the project root is on the path so `src` is importable\n", + "_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n", + "if _project_root not in sys.path:\n", + " sys.path.insert(0, _project_root)\n", + "\n", + "from langchain_core.documents import Document\n", + "from langchain_classic.chains.retrieval_qa.base import RetrievalQA\n", + "from langchain_elasticsearch import ElasticsearchStore\n", + "from ragas import evaluate, SingleTurnSample\n", + "from ragas.llms import LangchainLLMWrapper\n", + "from ragas.embeddings import LangchainEmbeddingsWrapper\n", + "from ragas.testset import TestsetGenerator\n", + "from ragas.testset.persona import Persona\n", + "from ragas.testset.synthesizers.single_hop.specific import SingleHopSpecificQuerySynthesizer\n", + "from ragas.metrics import (\n", + " faithfulness,\n", + " answer_relevancy,\n", + " context_recall,\n", + " context_precision,\n", + " context_entity_recall,\n", + " answer_similarity,\n", + " answer_correctness,\n", + " NonLLMContextRecall,\n", + " NonLLMContextPrecisionWithReference\n", + ")\n", + "\n", + "from src.llm_factory import create_chat_model\n", + "from src.emb_factory import create_embedding_model\n", + "from src.config import (\n", + " ELASTICSEARCH_LOCAL_URL,\n", + " ELASTICSEARCH_INDEX,\n", + " OLLAMA_MODEL_NAME,\n", + " OLLAMA_EMB_MODEL_NAME,\n", + " RAW_DIR\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4426d6c0", + "metadata": {}, + "outputs": [], + "source": [ + "llm = create_chat_model(\n", + " provider=\"bedrock\",\n", + " model=\"global.anthropic.claude-opus-4-6-v1\",\n", + " temperature=0,\n", + ")\n", + "embeddings = create_embedding_model(\n", + " provider=\"ollama\",\n", + " model=OLLAMA_EMB_MODEL_NAME,\n", + ")\n", + "agent_llm = create_chat_model(\n", + " provider=\"ollama\",\n", + " model=OLLAMA_MODEL_NAME,\n", + " temperature=0,\n", + " validate_model_on_init=True,\n", + ")\n", + "vector_store = ElasticsearchStore(\n", + " es_url=ELASTICSEARCH_LOCAL_URL,\n", + " index_name=ELASTICSEARCH_INDEX,\n", + " embedding=embeddings,\n", + " query_field=\"text\",\n", + " vector_query_field=\"vector\",\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "fe524d14", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loaded 24 documents from /home/acano/PycharmProjects/assistance-engine/data/raw\n" + ] + } + ], + "source": [ + "docs: list[Document] = []\n", + "for txt_file in sorted(RAW_DIR.glob(\"*.txt\")):\n", + " text = txt_file.read_text(encoding=\"utf-8\")\n", + " docs.append(Document(page_content=text, metadata={\"source\": txt_file.name}))\n", + "\n", + "print(f\"Loaded {len(docs)} documents from {RAW_DIR}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "06103178", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_inputreference_contextsreferencepersona_namequery_stylequery_lengthsynthesizer_name
0How does AVAP handel a ZeroDivisionError when ...[Execution Model in AVAP\\n4.1. Structure of a ...In AVAP, when a division by zero occurs—whethe...Carlos MenendezMISSPELLEDLONGsingle_hop_specific_query_synthesizer
1As a backend developer who is learning AVAP an...[Execution Model in AVAP\\n4.1. Structure of a ...In AVAP, control flow structures include condi...Carlos MenendezPERFECT_GRAMMARLONGsingle_hop_specific_query_synthesizer
2hey so in AVAP when i do division by zero what...[Execution Model in AVAP\\n4.1. Structure of a ...In AVAP, when you perform a division by zero, ...Carlos MedinaPOOR_GRAMMARMEDIUMsingle_hop_specific_query_synthesizer
3what happen if file not found when i do import...[Execution Model in AVAP\\n4.1. Structure of a ...When an import statement is executed in AVAP, ...Carlos MedinaPOOR_GRAMMARSHORTsingle_hop_specific_query_synthesizer
4In AVAP, under what circumstances is a TypeErr...[Execution Model in AVAP\\n4.1. Structure of a ...In AVAP, a TypeError exception is raised in tw...Carlos MenendezPERFECT_GRAMMARMEDIUMsingle_hop_specific_query_synthesizer
........................
95How does the data model in AVAP™ compare to Py...[Introduction\\nThe data model in AVAP™ defines...The data model in AVAP™ is very similar to Pyt...Carlos MenendezPERFECT_GRAMMARMEDIUMsingle_hop_specific_query_synthesizer
96What data types are available in AVAP™?[Introduction\\nThe data model in AVAP™ defines...In AVAP™, the most common data types include i...Carlos MedinaPERFECT_GRAMMARSHORTsingle_hop_specific_query_synthesizer
97AVAP strings Unicode[Introduction\\nThe data model in AVAP™ defines...In AVAP™, strings (str) represent sequences of...Carlos MedinaWEB_SEARCH_LIKESHORTsingle_hop_specific_query_synthesizer
98AVAP data model comparison with Python data ty...[Introduction\\nThe data model in AVAP™ defines...The data model in AVAP is similar to Python in...Carlos MendietaWEB_SEARCH_LIKEMEDIUMsingle_hop_specific_query_synthesizer
99AVAP™ data types and data structures overview[Introduction\\nThe data model in AVAP™ defines...AVAP™ uses a flexible and dynamic data model s...Carlos MendietaWEB_SEARCH_LIKESHORTsingle_hop_specific_query_synthesizer
\n", + "

100 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " user_input \\\n", + "0 How does AVAP handel a ZeroDivisionError when ... \n", + "1 As a backend developer who is learning AVAP an... \n", + "2 hey so in AVAP when i do division by zero what... \n", + "3 what happen if file not found when i do import... \n", + "4 In AVAP, under what circumstances is a TypeErr... \n", + ".. ... \n", + "95 How does the data model in AVAP™ compare to Py... \n", + "96 What data types are available in AVAP™? \n", + "97 AVAP strings Unicode \n", + "98 AVAP data model comparison with Python data ty... \n", + "99 AVAP™ data types and data structures overview \n", + "\n", + " reference_contexts \\\n", + "0 [Execution Model in AVAP\\n4.1. Structure of a ... \n", + "1 [Execution Model in AVAP\\n4.1. Structure of a ... \n", + "2 [Execution Model in AVAP\\n4.1. Structure of a ... \n", + "3 [Execution Model in AVAP\\n4.1. Structure of a ... \n", + "4 [Execution Model in AVAP\\n4.1. Structure of a ... \n", + ".. ... \n", + "95 [Introduction\\nThe data model in AVAP™ defines... \n", + "96 [Introduction\\nThe data model in AVAP™ defines... \n", + "97 [Introduction\\nThe data model in AVAP™ defines... \n", + "98 [Introduction\\nThe data model in AVAP™ defines... \n", + "99 [Introduction\\nThe data model in AVAP™ defines... \n", + "\n", + " reference persona_name \\\n", + "0 In AVAP, when a division by zero occurs—whethe... Carlos Menendez \n", + "1 In AVAP, control flow structures include condi... Carlos Menendez \n", + "2 In AVAP, when you perform a division by zero, ... Carlos Medina \n", + "3 When an import statement is executed in AVAP, ... Carlos Medina \n", + "4 In AVAP, a TypeError exception is raised in tw... Carlos Menendez \n", + ".. ... ... \n", + "95 The data model in AVAP™ is very similar to Pyt... Carlos Menendez \n", + "96 In AVAP™, the most common data types include i... Carlos Medina \n", + "97 In AVAP™, strings (str) represent sequences of... Carlos Medina \n", + "98 The data model in AVAP is similar to Python in... Carlos Mendieta \n", + "99 AVAP™ uses a flexible and dynamic data model s... Carlos Mendieta \n", + "\n", + " query_style query_length synthesizer_name \n", + "0 MISSPELLED LONG single_hop_specific_query_synthesizer \n", + "1 PERFECT_GRAMMAR LONG single_hop_specific_query_synthesizer \n", + "2 POOR_GRAMMAR MEDIUM single_hop_specific_query_synthesizer \n", + "3 POOR_GRAMMAR SHORT single_hop_specific_query_synthesizer \n", + "4 PERFECT_GRAMMAR MEDIUM single_hop_specific_query_synthesizer \n", + ".. ... ... ... \n", + "95 PERFECT_GRAMMAR MEDIUM single_hop_specific_query_synthesizer \n", + "96 PERFECT_GRAMMAR SHORT single_hop_specific_query_synthesizer \n", + "97 WEB_SEARCH_LIKE SHORT single_hop_specific_query_synthesizer \n", + "98 WEB_SEARCH_LIKE MEDIUM single_hop_specific_query_synthesizer \n", + "99 WEB_SEARCH_LIKE SHORT single_hop_specific_query_synthesizer \n", + "\n", + "[100 rows x 7 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "synthetic_dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ab1932b7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_716860/244266171.py:1: DeprecationWarning: LangchainLLMWrapper is deprecated and will be removed in a future version. Use llm_factory instead: from openai import OpenAI; from ragas.llms import llm_factory; llm = llm_factory('gpt-4o-mini', client=OpenAI(api_key='...'))\n", + " synth = SingleHopSpecificQuerySynthesizer(llm=LangchainLLMWrapper(llm))\n", + "/tmp/ipykernel_716860/244266171.py:3: DeprecationWarning: LangchainLLMWrapper is deprecated and will be removed in a future version. Use llm_factory instead: from openai import OpenAI; from ragas.llms import llm_factory; llm = llm_factory('gpt-4o-mini', client=OpenAI(api_key='...'))\n", + " generator = TestsetGenerator(llm=LangchainLLMWrapper(llm), embedding_model=LangchainEmbeddingsWrapper(embeddings))\n", + "/tmp/ipykernel_716860/244266171.py:3: DeprecationWarning: LangchainEmbeddingsWrapper is deprecated and will be removed in a future version. Use the modern embedding providers instead: embedding_factory('openai', model='text-embedding-3-small', client=openai_client) or from ragas.embeddings import OpenAIEmbeddings, GoogleEmbeddings, HuggingFaceEmbeddings\n", + " generator = TestsetGenerator(llm=LangchainLLMWrapper(llm), embedding_model=LangchainEmbeddingsWrapper(embeddings))\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8ec6ef79b1964c44b78a75ca539f816b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Applying SummaryExtractor: 0%| | 0/24 [00:00\n", + "Traceback (most recent call last):\n", + " File \"/home/acano/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/asyncio/events.py\", line 84, in _run\n", + " self._context.run(self._callback, *self._args)\n", + "RuntimeError: cannot enter context: <_contextvars.Context object at 0x74fe3aa80780> is already entered\n", + "Task was destroyed but it is pending!\n", + "task: .run_in_context() done, defined at /home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/ipykernel/utils.py:57> wait_for= cb=[Task.__wakeup()]> cb=[ZMQStream._run_callback.._log_error() at /home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/zmq/eventloop/zmqstream.py:563]>\n", + "/home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/pydantic/json_schema.py:335: RuntimeWarning: coroutine 'Kernel.shell_main' was never awaited\n", + " mapping[key] = getattr(self, method_name)\n", + "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n", + "Task was destroyed but it is pending!\n", + "task: cb=[Task.__wakeup()]>\n", + "Exception in callback Task.__step()\n", + "handle: \n", + "Traceback (most recent call last):\n", + " File \"/home/acano/.local/share/uv/python/cpython-3.11.13-linux-x86_64-gnu/lib/python3.11/asyncio/events.py\", line 84, in _run\n", + " self._context.run(self._callback, *self._args)\n", + "RuntimeError: cannot enter context: <_contextvars.Context object at 0x74fe3aa80780> is already entered\n", + "Task was destroyed but it is pending!\n", + "task: .run_in_context() done, defined at /home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/ipykernel/utils.py:57> wait_for= cb=[Task.__wakeup()]> cb=[ZMQStream._run_callback.._log_error() at /home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/zmq/eventloop/zmqstream.py:563]>\n", + "/home/acano/PycharmProjects/assistance-engine/.venv/lib/python3.11/site-packages/pydantic/main.py:716: RuntimeWarning: coroutine 'Kernel.shell_main' was never awaited\n", + " return cls.__pydantic_validator__.validate_python(\n", + "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n", + "Task was destroyed but it is pending!\n", + "task: cb=[Task.__wakeup()]>\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_inputretrieved_contextsresponsereferencefaithfulnessanswer_relevancycontext_precisioncontext_recallcontext_entity_recallanswer_similarityanswer_correctness
0How does AVAP handel a ZeroDivisionError when ...[Execution Model in AVAP 4.1. Structure of a P...AVAP (Advanced Virtual Application Platform) i...In AVAP, when a division by zero occurs—whethe...0.0833330.0000001.0000001.0000000.3000000.8336700.363590
1As a backend developer who is learning AVAP an...[SECTION III: Control Logic and Decision Struc...I can provide information on the if statement ...In AVAP, control flow structures include condi...0.9047620.8375641.0000000.4545450.1578950.8093110.531596
2hey so in AVAP when i do division by zero what...[Execution Model in AVAP 4.1. Structure of a P...In AVAP, when a division operation is attempte...In AVAP, when you perform a division by zero, ...0.3125000.8922551.0000001.0000000.2000000.9238800.472905
3what happen if file not found when i do import...[Execution Model in AVAP 4.1. Structure of a P...In AVAP, the equivalent of a `FileNotFoundErro...When an import statement is executed in AVAP, ...0.1428570.9120311.0000001.0000001.0000000.6420440.248746
4In AVAP, under what circumstances is a TypeErr...[Execution Model in AVAP 4.1. Structure of a P...I don't have information on the specific circu...In AVAP, a TypeError exception is raised in tw...0.0000000.0000001.0000001.0000001.0000000.8068480.201712
....................................
95How does the data model in AVAP™ compare to Py...[Introduction The data model in AVAP™ defines ...AVAP (Advanced Vector Analysis Platform) is a ...The data model in AVAP™ is very similar to Pyt...0.2894740.9255460.8333330.8000000.5000000.7368160.422299
96What data types are available in AVAP™?[Chapter 5: Data Types In this chapter, we wil...I don't know the specific details about the da...In AVAP™, the most common data types include i...0.8181820.0000000.5000001.0000000.5000000.8564020.658545
97AVAP strings Unicode[Chapter 2: Notation in AVAP™ Introduction Not...AVAP (African Vertebrate Anatomy Project) uses...In AVAP™, strings (str) represent sequences of...0.0000000.9187400.0000000.0000000.5000000.6862810.271570
98AVAP data model comparison with Python data ty...[Introduction The data model in AVAP™ defines ...Here's a comparison of the AVAP data model wit...The data model in AVAP is similar to Python in...0.3437500.9549940.8333331.0000000.5555560.8244490.587930
99AVAP™ data types and data structures overview[Introduction The data model in AVAP™ defines ...AVAP (Advanced Visual Analytics Platform) is a...AVAP™ uses a flexible and dynamic data model s...0.0000000.8557191.0000001.0000000.1000000.8561070.323783
\n", + "

100 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " user_input \\\n", + "0 How does AVAP handel a ZeroDivisionError when ... \n", + "1 As a backend developer who is learning AVAP an... \n", + "2 hey so in AVAP when i do division by zero what... \n", + "3 what happen if file not found when i do import... \n", + "4 In AVAP, under what circumstances is a TypeErr... \n", + ".. ... \n", + "95 How does the data model in AVAP™ compare to Py... \n", + "96 What data types are available in AVAP™? \n", + "97 AVAP strings Unicode \n", + "98 AVAP data model comparison with Python data ty... \n", + "99 AVAP™ data types and data structures overview \n", + "\n", + " retrieved_contexts \\\n", + "0 [Execution Model in AVAP 4.1. Structure of a P... \n", + "1 [SECTION III: Control Logic and Decision Struc... \n", + "2 [Execution Model in AVAP 4.1. Structure of a P... \n", + "3 [Execution Model in AVAP 4.1. Structure of a P... \n", + "4 [Execution Model in AVAP 4.1. Structure of a P... \n", + ".. ... \n", + "95 [Introduction The data model in AVAP™ defines ... \n", + "96 [Chapter 5: Data Types In this chapter, we wil... \n", + "97 [Chapter 2: Notation in AVAP™ Introduction Not... \n", + "98 [Introduction The data model in AVAP™ defines ... \n", + "99 [Introduction The data model in AVAP™ defines ... \n", + "\n", + " response \\\n", + "0 AVAP (Advanced Virtual Application Platform) i... \n", + "1 I can provide information on the if statement ... \n", + "2 In AVAP, when a division operation is attempte... \n", + "3 In AVAP, the equivalent of a `FileNotFoundErro... \n", + "4 I don't have information on the specific circu... \n", + ".. ... \n", + "95 AVAP (Advanced Vector Analysis Platform) is a ... \n", + "96 I don't know the specific details about the da... \n", + "97 AVAP (African Vertebrate Anatomy Project) uses... \n", + "98 Here's a comparison of the AVAP data model wit... \n", + "99 AVAP (Advanced Visual Analytics Platform) is a... \n", + "\n", + " reference faithfulness \\\n", + "0 In AVAP, when a division by zero occurs—whethe... 0.083333 \n", + "1 In AVAP, control flow structures include condi... 0.904762 \n", + "2 In AVAP, when you perform a division by zero, ... 0.312500 \n", + "3 When an import statement is executed in AVAP, ... 0.142857 \n", + "4 In AVAP, a TypeError exception is raised in tw... 0.000000 \n", + ".. ... ... \n", + "95 The data model in AVAP™ is very similar to Pyt... 0.289474 \n", + "96 In AVAP™, the most common data types include i... 0.818182 \n", + "97 In AVAP™, strings (str) represent sequences of... 0.000000 \n", + "98 The data model in AVAP is similar to Python in... 0.343750 \n", + "99 AVAP™ uses a flexible and dynamic data model s... 0.000000 \n", + "\n", + " answer_relevancy context_precision context_recall \\\n", + "0 0.000000 1.000000 1.000000 \n", + "1 0.837564 1.000000 0.454545 \n", + "2 0.892255 1.000000 1.000000 \n", + "3 0.912031 1.000000 1.000000 \n", + "4 0.000000 1.000000 1.000000 \n", + ".. ... ... ... \n", + "95 0.925546 0.833333 0.800000 \n", + "96 0.000000 0.500000 1.000000 \n", + "97 0.918740 0.000000 0.000000 \n", + "98 0.954994 0.833333 1.000000 \n", + "99 0.855719 1.000000 1.000000 \n", + "\n", + " context_entity_recall answer_similarity answer_correctness \n", + "0 0.300000 0.833670 0.363590 \n", + "1 0.157895 0.809311 0.531596 \n", + "2 0.200000 0.923880 0.472905 \n", + "3 1.000000 0.642044 0.248746 \n", + "4 1.000000 0.806848 0.201712 \n", + ".. ... ... ... \n", + "95 0.500000 0.736816 0.422299 \n", + "96 0.500000 0.856402 0.658545 \n", + "97 0.500000 0.686281 0.271570 \n", + "98 0.555556 0.824449 0.587930 \n", + "99 0.100000 0.856107 0.323783 \n", + "\n", + "[100 rows x 11 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "metrics = [\n", + " faithfulness,\n", + " answer_relevancy,\n", + " context_precision,\n", + " context_recall,\n", + " context_entity_recall,\n", + " answer_similarity,\n", + " answer_correctness\n", + "]\n", + "\n", + "result = evaluate(\n", + " dataset=dataset, \n", + " metrics=metrics,\n", + " llm=llm,\n", + " embeddings=embeddings,\n", + ")\n", + "\n", + "result_df = result.to_pandas()\n", + "result_df" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "20c3fa64", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "faithfulness 0.254643\n", + "answer_relevancy 0.609250\n", + "context_precision 0.862500\n", + "context_recall 0.906242\n", + "context_entity_recall 0.354178\n", + "answer_similarity 0.781973\n", + "answer_correctness 0.359654\n", + "dtype: float64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result_df.mean(numeric_only=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "350755fd", + "metadata": {}, + "outputs": [], + "source": [ + "result_df.to_csv(\"/home/acano/PycharmProjects/assistance-engine/data/interim/embedding_eval_results/retrieve_eval_results/ragas_eval.csv\", index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "assistance-engine", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/scratches/acano/langgraph_agent_simple.ipynb b/scratches/acano/langgraph_agent_simple.ipynb index aad3c8f..d411432 100644 --- a/scratches/acano/langgraph_agent_simple.ipynb +++ b/scratches/acano/langgraph_agent_simple.ipynb @@ -10,16 +10,23 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 1, "id": "9e974df6", "metadata": {}, "outputs": [], "source": [ "import os\n", + "import sys\n", + "from pathlib import Path\n", "from typing import TypedDict, List, Optional, Annotated, Literal\n", "from IPython.display import Image, display\n", "from pydantic import BaseModel, Field\n", "\n", + "# Ensure the project root is on the path so `src` is importable\n", + "_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n", + "if _project_root not in sys.path:\n", + " sys.path.insert(0, _project_root)\n", + "\n", "from langchain_core.documents import Document\n", "from langchain_core.messages import BaseMessage, SystemMessage, AIMessage\n", "from langchain_core.tools import tool\n", @@ -29,33 +36,41 @@ "from langchain_elasticsearch import ElasticsearchStore\n", "from langgraph.graph import StateGraph, END\n", "from langgraph.prebuilt import ToolNode, tools_condition\n", - "from langfuse import Langfuse" + "from langfuse import Langfuse\n", + "\n", + "from src.llm_factory import create_chat_model\n", + "from src.emb_factory import create_embedding_model\n", + "from src.config import (\n", + " ELASTICSEARCH_LOCAL_URL,\n", + " ELASTICSEARCH_INDEX,\n", + " OLLAMA_MODEL_NAME,\n", + " OLLAMA_EMB_MODEL_NAME\n", + ")" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 2, "id": "30edcecc", "metadata": {}, "outputs": [], "source": [ - "ES_URL = os.getenv(\"ELASTICSEARCH_LOCAL_URL\")\n", - "INDEX_NAME = os.getenv(\"ELASTICSEARCH_INDEX\")\n", - "MODEL_NAME = os.getenv(\"OLLAMA_MODEL_NAME\")\n", - "EMB_MODEL_NAME = os.getenv(\"OLLAMA_EMB_MODEL_NAME\")\n", - "\n", "langfuse = Langfuse()\n", "\n", - "embeddings = OllamaEmbeddings(model=EMB_MODEL_NAME)\n", - "llm = ChatOllama(\n", - " model=MODEL_NAME,\n", + "llm = create_chat_model(\n", + " provider=\"ollama\",\n", + " model=OLLAMA_MODEL_NAME,\n", " temperature=0,\n", " validate_model_on_init=True,\n", ")\n", + "embeddings = create_embedding_model(\n", + " provider=\"ollama\",\n", + " model=OLLAMA_EMB_MODEL_NAME,\n", + ")\n", "\n", "vector_store = ElasticsearchStore(\n", - " es_url=ES_URL,\n", - " index_name=INDEX_NAME,\n", + " es_url=ELASTICSEARCH_LOCAL_URL,\n", + " index_name=ELASTICSEARCH_INDEX,\n", " embedding=embeddings,\n", " query_field=\"text\",\n", " vector_query_field=\"vector\",\n", @@ -68,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 3, "id": "ad98841b", "metadata": {}, "outputs": [ @@ -97,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 4, "id": "5f8c88cf", "metadata": {}, "outputs": [], @@ -110,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 5, "id": "fd8ed542", "metadata": {}, "outputs": [], @@ -129,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 6, "id": "f0a21230", "metadata": {}, "outputs": [], @@ -139,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 7, "id": "f9359747", "metadata": {}, "outputs": [], @@ -178,16 +193,24 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 8, "id": "66ae23f0", "metadata": {}, "outputs": [], "source": [ "REFORMULATE_PROMPT = SystemMessage(\n", - " content=\"\"\"You are a query reformulation assistant.\n", - " Given the user's conversational message, rewrite it as a concise, \n", - " standalone search query optimized for vector similarity retrieval.\n", - " Output ONLY the reformulated query, nothing else.\"\"\"\n", + " content=(\n", + " \"You are a deterministic query rewriting function.\\n\"\n", + " \"You convert natural language questions into keyword search queries.\\n\\n\"\n", + " \"Strict constraints:\\n\"\n", + " \"1. Keep function names and technical tokens unchanged.\\n\"\n", + " \"2. Remove filler phrases.\\n\"\n", + " \"3. Do not answer.\\n\"\n", + " \"4. Do not explain.\\n\"\n", + " \"5. Do not generate code.\\n\"\n", + " \"6. Return a single-line query only.\\n\"\n", + " \"7. If already optimal, return unchanged.\\n\"\n", + " )\n", ")\n", "\n", "GENERATE_PROMPT = SystemMessage(\n", @@ -214,7 +237,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 9, "id": "36d0f54e", "metadata": {}, "outputs": [], @@ -252,7 +275,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 10, "id": "f073edc9", "metadata": {}, "outputs": [], @@ -272,7 +295,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 11, "id": "fae46a58", "metadata": {}, "outputs": [], @@ -295,7 +318,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 12, "id": "7f57b543", "metadata": {}, "outputs": [ @@ -319,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 13, "id": "f7a0993f", "metadata": {}, "outputs": [], @@ -345,7 +368,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 14, "id": "2fec3fdb", "metadata": {}, "outputs": [ @@ -377,7 +400,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 15, "id": "8569cf39", "metadata": {}, "outputs": [], @@ -401,17 +424,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "a1a1f3cf", "metadata": {}, "outputs": [], "source": [ - "user_input = \"\"\"How does if statement work in AVAP? Respond con\"\"\"" + "user_input = \"\"\"Suppose you want to create a table called users in a database called myDatabase, with two columns: username of type VARCHAR and age of type INTEGER. How would you do that in AVAP?\"\"\"" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 21, "id": "53b89690", "metadata": {}, "outputs": [ @@ -421,35 +444,59 @@ "text": [ "================================\u001b[1m Human Message \u001b[0m=================================\n", "\n", - "What would this AVAP code returns:\n", - "addVar(selector,'yes')\n", - " IF(selector,'yes','=')\n", - " addVar(result,1) ELSE()\n", - " addVar(result,0) END()\n", - " addResult(result)\n", + "Suppose you want to create a table called users in a database called myDatabase, with two columns: username of type VARCHAR and age of type INTEGER. How would you do that in AVAP?\n", + "[reformulate] 'Suppose you want to create a table called users in a database called myDatabase, with two columns: username of type VARCHAR and age of type INTEGER. How would you do that in AVAP?' → 'CREATE TABLE myDatabase.users (username VARCHAR, age INTEGER);'\n", + "================================\u001b[1m Human Message \u001b[0m=================================\n", + "\n", + "Suppose you want to create a table called users in a database called myDatabase, with two columns: username of type VARCHAR and age of type INTEGER. How would you do that in AVAP?\n", + "[retrieve] 3 docs fetched\n", + "[1] id=chunk-1 source=21_Persistance_connectors_orm.txt\n", + "SECTION V: Persistence, Connectors, and Native ORM AVAP is designed to be database-agnostic. It enables data manipulation through three layers: the universal connector, simplified ORM commands, and direct SQL execution. 5.1 The Universal Connector (avapConnector) The avapConnector command is the entry point for any external integration. It uses a Connection Token system (Base64) that encapsulates configuration details (host, port, credentials, driver) to keep code clean and secure. Interface connector_variable = avapConnector(\"BASE64_TOKEN\") Connector Object Capabilities Once instantiated, the variable behaves as an object with dynamic methods: Database Connectors: Expose the .query(sql_string) method, which returns objects or lists depending on the result set. API Connectors (Twilio, Slack, etc.): Expose native service methods (e.g., .send_sms()). Example: Dynamic Assignment with Connectors // Instantiate the connection db = avapConnector(\"REJfQ09OTkVDVE9SM...\") // Execute query and use Section I dynamic evaluation users = db.query(\"SELECT * FROM users\") first_admin = users[0].name if users[0].role == 'admin' else 'N/A' addResult(first_admin) 5.2 Native ORM Layer (ormCheckTable / ormDirect) For quick operations on the local or default database cluster, AVAP provides system-level commands that do not require prior instantiation. 5.2.1 ormCheckTable Verifies the existence of a database structure. It is critical for installation scripts or automated migrations. Interface: ormCheckTable(table_name, target_var) Response: target_var receives the string values \"True\" or \"False\". 5.2.2 ormDirect Executes SQL statements directly. Unlike .query(), it is optimized for statements that do not necessarily return rows (such as INSERT, UPDATE, or CREATE TABLE). Interface: ormDirect(statement, target_var) Interpolation Usage Example: ormDirect(\"UPDATE users SET login = '%s' WHERE id = %s\" % (now, id), result) 5.3 Data Access Abstraction (Implicit Commands) AVAP includes specialized commands for common CRUD operations, reducing the need to write manual SQL and mitigating injection risks. ormAccessSelect Performs filtered queries returning a list-of-objects structure. Syntax: ormAccessSelect(table, filters, target) ormAccessInsert / ormAccessUpdate Manages data persistence. If used on an object that already has an ID, Update synchronizes changes; otherwise, Insert creates the record. 5.4 Dynamic Query Formatting (Injection Prevention) As detailed in Section I, the AVAP engine processes SQL strings before sending them to the database engine. The official recommendation is to always use interpolation with the % operator to ensure proper handling of data types (Strings vs Integers) by the driver. Recommended Secure Pattern sql = \"SELECT * FROM %s WHERE status = '%s'\" % (table_name, recovered_status) res = db.query(sql) 5.5 Cryptographic Security Integration (encodeSHA256) Within the persistence flow, AVAP provides native tools to secure sensitive data before it is written to disk. Interface encodeSHA256(source_text, target_variable) Complete Registration Flow (Final Example) This example integrates Sections I, II, III, and V: // II: Input capture addParam(\"pass\", p) addParam(\"user\", u) // I & V: Processing and security encodeSHA256(p, secure_pass) // V: Insertion sql = \"INSERT INTO users (username, password) VALUES ('%s', '%s')\" % (u, secure_pass) ormDirect(sql, db_result) // III & II: Response if(db_result, \"Success\", \"=\") addVar(msg, \"User created\") addResult(msg) end() Examples 1. Connector Instantiation Code snippet my_db = avapConnector(\"VE9LRU5fREVCX0RFU0FSUk9MTE8=\") 2. Record Retrieval Code snippet rows = my_db.query(\"SELECT id, name FROM users\") addResult(rows) 3. Direct Command Execution Code snippet ormDirect(\"TRUNCATE TABLE temp_cache\", status) 4. Structure Verification Code snippet ormCheckTable(\"inventory\", exists) if(exists, \"False\", \"==\") ormDirect(\"CREATE TABLE inventory...\", r) end() 5. Secure Update (Interpolation) Code snippet sql = \"UPDATE users SET login_count = %s WHERE email = '%s'\" % (count, email) ormDirect(sql, res) 6. JSON/DB Object Navigation Code snippet found_id = query_result[0].id addResult(found_id) 7. ORM Select with Filter Code snippet ormAccessSelect(\"orders\", {\"status\": \"pending\"}, list_result) addResult(list_result) 8. Processing Database Results Code snippet records = db.query(\"SELECT...\") startLoop(i, 0, len(records)) name = records[i].name endLoop() 9. Cryptographic Persistence Code snippet encodeSHA256(password_raw, hashed) ormDirect(\"INSERT INTO logins (hash) VALUES ('%s')\" % hashed, r) 10. Third-Party Connector (e.g., Slack) Code snippet slack_api = avapConnector(\"U0xBQ0tfQVBJX1RPS0VO\")\n", + "\n", + "[2] id=chunk-2 source=16_Appendix.txt\n", + "Appendix Function Glossary randomString() The randomString() command generates a random string based on a specified pattern and stores it in a target variable. It is especially useful when random strings are needed to conform to a specific format, such as passwords or identifiers. Parameters Pattern Type: var Description: A regular expression (regex) pattern that defines the characters and structure of the string to be generated. It can be a direct value or a variable containing the pattern. For example, [a-zA-Z0-9] will generate a string that includes uppercase letters, lowercase letters, and numbers. Length Type: var Description: An integer value specifying the length of the random string to be generated. It can be a direct value or a variable containing the desired length. This value determines how many characters the resulting string will have. TargetVariable Type: var Description: The variable where the generated string will be stored. This variable should be used later in the program. Unlike the other parameters, this must be a variable and not a direct value. Usage Example // Direct call with values: randomString('[a-zA-Z0-9]', 8, generatedPassword) // Call using variables: pattern = '[a-zA-Z0-9]' length = 8 randomString(pattern, length, generatedPassword) stampToDatetime() The stampToDatetime() command converts a timestamp value to a date and time according to a specified format, applying a possible time difference, and stores the result in a target variable. It is useful for manipulating and formatting time values into different representations. Parameters timestamp Type: var Description: A value representing a timestamp, which can be provided directly or through a variable. This value is the starting point for conversion to a date and time format. Format Type: var Description: A format string that defines how the resulting date and time should be presented. This string follows the same conventions used in Python for formatting dates and times. Common symbols include: %Y: Year with four digits (e.g., 2024) %m: Month with two digits (01 to 12) %d: Day of the month with two digits (01 to 31) %H: Hour in 24-hour format (00 to 23) %M: Minutes (00 to 59) %S: Seconds (00 to 59) For example, the format %Y-%m-%d %H:%M:%S converts a timestamp into a string like 2024-08-25 14:30:00. It can be a direct value or a variable containing the desired format. TimeDelta Type: var Description: An optional value representing a time adjustment (positive or negative) applied to the timestamp before conversion. This value can be provided directly or through a variable and is expressed in seconds. TargetVariable Type: var Description: The variable where the resulting date and time from the conversion will be stored. Unlike the other parameters, this must be a variable and not a direct value. Usage Example // Direct call with values: stampToDatetime(1692966600, '%Y-%m-%d %H:%M:%S', 3600, convertedDatetime) // Call using variables: timestamp = 1692966600 format = '%Y-%m-%d %H:%M:%S' adjustment = 3600 stampToDatetime(timestamp, format, adjustment, convertedDatetime) In the first example, a timestamp is converted to a date and time in the format \"%Y-%m-%d %H:%M:%S\", applying a 3600-second (1-hour) adjustment, and the result is stored in the variable convertedDatetime. In the second example, variables are used to define the timestamp, format, and adjustment. getTimeStamp() The getTimeStamp() command converts a date and time string, given in a specific format, to a timestamp value. Additionally, it allows for an optional time adjustment before storing the result in a target variable. This command is useful for converting human-readable date and time representations to a numeric timestamp format, which can be used in calculations or time comparisons. Parameters DateString Type: var Description: A string representing a date and time. This string must follow the format specified in the Format parameter. It can be a direct value or a variable containing the date string. Format Type: var Description: A format string that defines how to interpret the date and time string (DateString). This string follows Python's conventions for formatting and parsing dates and times. Some common symbols include: %Y: Year with four digits (e.g., 2024) %m: Month with two digits (01 to 12) %d: Day of the month with two digits (01 to 31) %H: Hour in 24-hour format (00 to 23) %M: Minutes (00 to 59) %S: Seconds (00 to 59) For example, to interpret the string \"2024-08-25 14:30:00\", the format %Y-%m-%d %H:%M:%S would be used. It can be a direct value or a variable containing the format. TimeDelta Type: var Description: An optional value representing a time adjustment (positive or negative) applied to the timestamp after conversion. This value can be provided directly or through a variable and is expressed in seconds. TargetVariable Type: var Description: The variable where the resulting timestamp from the conversion will be stored. Unlike the other parameters, this must be a variable and not a direct value. Usage Example // Direct call with values: getTimeStamp('2024-08-25 14:30:00', '%Y-%m-%d %H:%M:%S', 3600, generatedTimestamp) // Call using variables: date = '2024-08-25 14:30:00' format = '%Y-%m-%d %H:%M:%S' adjustment = 3600 getTimeStamp(date, format, adjustment, generatedTimestamp) In the first example, the date and time string \"2024-08-25 14:30:00\" is converted to a timestamp, applying a 3600-second (1-hour) adjustment, and the result is stored in the variable generatedTimestamp. In the second example, variables are used to define the date, format, and adjustment. getRegex() The getRegex() command searches for matches in a source string using a regular expression (regex) pattern and stores the result in a target variable. This command is useful for extracting specific parts of a string that match a defined pattern, such as email addresses, phone numbers, or any other structure defined by a regex. Parameters SourceVariable Type: variable Description: The variable containing the source string in which to search for regex pattern matches. This string is the text on which the regex search will be applied. rePattern Type: variable Description: The variable containing the regular expression (regex) pattern that defines what to search for in the source string. This pattern should follow standard regex rules, allowing the specification of sequences of characters to identify in the source string. TargetVariable Type: variable Description: The variable where the search result will be stored. Depending on the context and the pattern used, the result could be the first match found, all matches, or even specific groups within the match. Usage Example // Direct call with values: sourceText = \"Email: user@example.com and phone: 123-456-7890\" pattern = r\"\\b\\d{3}-\\d{3}-\\d{4}\\b\" getRegex(sourceText, pattern, phoneNumber) // Call using variables: sourceText = \"Visit our website at https://www.example.com for more information.\" regexPattern = r\"https?://\\S+\" getRegex(sourceText, regexPattern, foundURL) In the first example, a phone number in the format 123-456-7890 is searched in the sourceText string and the result is stored in the phoneNumber variable. In the second example, a URL is extracted from the sourceText string using a regex that identifies URL patterns, and the result is stored in the foundURL variable. getDateTime() The getDateTime() command retrieves the current date and time, formats it according to a specified format, applies an optional time adjustment, and converts it to a specific time zone before storing the result in a target variable. It is useful for obtaining and manipulating the current date and time in different formats and time zones. Parameters Format Type: var Description: A format string that defines how the resulting date and time should be presented. This string follows the date and time formatting conventions used in Python. Some of the most common symbols include: %Y: Year with four digits (e.g., 2024) %m: Month with two digits (01 to 12) %d: Day of the month with two digits (01 to 31) %H: Hour in 24-hour format (00 to 23) %M: Minutes (00 to 59) %S: Seconds (00 to 59) For example, the format \"%Y-%m-%d %H:%M:%S\" will present the date and time as 2024-08-25 14:30:00. It can be a direct value or a variable containing the desired format. TimeDelta Type: var Description: An optional value representing a time adjustment (positive or negative) applied to the current date and time before conversion. This value can be provided directly or through a variable and is expressed in seconds. TimeZone Type: var Description: The time zone to which the date and time should be converted. This value can be a time zone identifier provided directly or through a variable. Some common time zones include: \"UTC\": Coordinated Universal Time \"America/New_York\": U.S. Eastern Time (EST/EDT) \"America/Los_Angeles\": U.S. Pacific Time (PST/PDT) \"Europe/London\": London Time (GMT/BST) \"Europe/Madrid\": Madrid Time (CET/CEST) \"Asia/Tokyo\": Tokyo Time (JST) \"Australia/Sydney\": Sydney Time (AEST/AEDT) You can use any time zone recognized by the pytz library in Python, which includes most time zones worldwide. TargetVariable Type: var Description: The variable in which the resulting date and time from the operation will be stored. Unlike the other parameters, this must be a variable and not a direct value. Usage Example // Direct call with values: getDateTime('%Y-%m-%d %H:%M:%S', 3600, 'UTC', currentTime) // Call using variables: format = '%Y-%m-%d %H:%M:%S' adjustment = 3600 timeZone = 'America/New_York' getDateTime(format, adjustment, timeZone, currentDateTime) In the first example, the current date and time are retrieved, adjusted by 3600 seconds (1 hour), converted to UTC, and stored in the variable currentTime. In the second example, variables are used to define the format, time adjustment, and time zone, with the result stored in the currentDateTime variable. encodeMD5() The encodeMD5() command generates an MD5 hash of the provided string and stores the result in a target variable. MD5 is a cryptographic hash function that produces a 128-bit value (32 hexadecimal characters), commonly used to verify data integrity. Parameters SourceVariable Type: var Description: The variable containing the text string to be encoded in MD5. It can be a direct value or a variable storing the input string. TargetVariable Type: var Description: The variable in which the resulting MD5 hash will be stored. Unlike the SourceVariable parameter, this must be a variable and not a direct value. Usage Example // Direct call with values: encodeMD5('example_string', md5Hash) // Call using variables: text = 'example_string' hashVariable = 'md5Hash' encodeMD5(text, hashVariable) In the first example, an MD5 hash is generated from the string 'example_string' and stored in the md5Hash variable. In the second example, a variable text is used to define the input string and another variable hashVariable is used to store the resulting MD5 hash. encodeSHA256() The encodeSHA256() command generates a SHA-256 hash of the provided string and stores the result in a target variable. SHA-256 is a cryptographic hash function that produces a 256-bit value (64 hexadecimal characters), offering greater security compared to MD5. Parameters SourceVariable Type: var Description: The variable containing the text string to be encoded in SHA-256. It can be a direct value or a variable storing the input string. TargetVariable Type: var Description: The variable in which the resulting SHA-256 hash will be stored. Unlike the SourceVariable parameter, this must be a variable and not a direct value. Usage Example // Direct call with values: encodeSHA256('example_string', sha256Hash) // Call using variables: text = 'example_string' hashVariable = 'sha256Hash' encodeSHA256(text, hashVariable) In the first example, a SHA-256 hash is generated from the string 'example_string' and stored in the sha256Hash variable. In the second example, a variable text is used to define the input string, and another variable hashVariable is used to store the resulting SHA-256 hash. getQueryParamList() The getQueryParamList() command extracts the query parameters from the current HTTP request and stores a list of these parameters in a target variable. This is useful for handling and processing query parameters in web applications. Parameters TargetVariable Type: var Description: The variable in which the extracted query parameter list will be stored. This should be a variable where the command's result will be saved. Command Flow Parameter Extraction: Accesses the query parameters from the current HTTP request. List Construction: Creates a list containing dictionaries, where each dictionary represents a query parameter and its associated value. Result Storage: Saves the list of parameters in the variable specified by TargetVariable. Usage Example Suppose the HTTP query has the following parameters: ?user=alice&age=30. // Define the variable to store the result queryParamsList = [] // Call the command to extract query parameters getQueryParamList(queryParamsList) // Return the list of query parameters via addResult addResult(queryParamsList) Given the query string ?user=alice&age=30, the getQueryParamList() command will generate the following list of parameters: [ {\"user\": \"alice\"}, {\"age\": \"30\"} ] getListLen() The getListLen() command calculates the length of a list and stores the result in a target variable. This command is useful for determining the number of elements in a list. Parameters SourceVariable Type: var Description: The variable containing the list whose length you want to calculate. It can be a variable that stores the list or a direct value representing the list. TargetVariable Type: var Description: The variable where the result of the list length will be stored. This should be a variable that will receive the integer value representing the number of elements in the list. Command Flow Retrieve the List: Access the list stored in the SourceVariable. Calculate the Length: Calculate the number of elements in the list. Store the Result: Save the calculated length in the variable specified by TargetVariable. Usage Example Suppose the list in myList is ['apple', 'banana', 'cherry']. // Variable definitions myList = ['apple', 'banana', 'cherry'] listLength = 0 // Call the command to calculate the length of the list getListLen(myList, listLength) // Return the list length through addResult addResult(listLength) Since the list myList has 3 elements, the getListLen() command will calculate that the length is 3. This value will be stored in the listLength variable and returned through addResult(listLength), resulting in the following output: 3 itemFromList() The itemFromList() command extracts a specific element from a list based on a given index and stores the result in a target variable. This is useful for accessing individual elements within a list. Parameters SourceVariable Type: var Description: The variable containing the list from which an element is to be extracted. It can be a variable that stores the list or a direct value representing the list. index Type: value Description: The index of the element to be extracted from the list. It must be an integer value that indicates the position of the element within the list. TargetVariable Type: var Description: The variable where the extracted element will be stored. It must be a variable that will receive the value of the element at the specified index position. Command Flow Access the List: Access the list stored in the SourceVariable. Extract the Element: Retrieve the element at the position specified by the index. Store the Result: Save the extracted element in the variable specified by TargetVariable. Usage Example Suppose the list in myList is ['apple', 'banana', 'cherry'] and you want to extract the element at index 1. // Variable definitions myList = ['apple', 'banana', 'cherry'] element = '' // Call the command to extract the element at index 1 itemFromList(myList, 1, element) // Return the extracted element through addResult addResult(element) Since index 1 corresponds to the element 'banana' in the myList, the itemFromList() command will extract 'banana' and store it in the variable element. The element variable will be returned through addResult(element), resulting in the following output: \"banana\" variableFromJSON() The variableFromJSON() command extracts the value associated with a specific key from a JSON object and stores the result in a target variable. This command is useful for accessing values within a JSON object. Parameters SourceVariable Type: var Description: The variable containing the JSON object from which a value is to be extracted. It can be a variable that stores the JSON object or a direct value representing the JSON object. key Type: value Description: The key whose value is to be extracted from the JSON object. It must be a value that represents the key within the JSON object. TargetVariable Type: var Description: The variable where the extracted value will be stored. It must be a variable that will receive the value associated with the specified key in the JSON object. Command Flow Access the JSON Object: Access the JSON object stored in the SourceVariable. Extract the Value: Retrieve the value associated with the key within the JSON object. Store the Result: Save the extracted value in the variable specified by TargetVariable. Usage Example Suppose the JSON object in jsonData is \"name\": \"Alice\", \"age\": 30 and you want to extract the value associated with the key \"name\". // Variable definitions jsonData = {\"name\": \"Alice\", \"age\": 30} nameValue = '' // Call the command to extract the value associated with the key \"name\" variableFromJSON(jsonData, \"name\", nameValue) // Return the extracted value through addResult addResult(nameValue) Since the value associated with the key \"name\" in the JSON object jsonData is \"Alice\", the variableFromJSON() command will extract \"Alice\" and store it in the variable nameValue. The nameValue variable will be returned through addResult(nameValue), resulting in the following output: \"Alice\" AddVariableToJSON() The AddVariableToJSON() command adds a new key and its corresponding value to a JSON object and stores the result in a target variable. This command is useful for updating a JSON object with new key-value pairs. Parameters Key Type: variable Description: The key to be added to the JSON object. It must be a variable that stores the key to be added. Value Type: variable Description: The value associated with the key to be added to the JSON object. It must be a variable that stores the corresponding value. TargetVariable Type: variable Description: The variable where the updated JSON object will be stored. It must be a variable that will receive the JSON object with the new key and its added value. Command Flow Access the JSON Object: Access the JSON object stored in the TargetVariable. Add the Key and Value: Add the new key and its associated value to the JSON object. Store the Result: Save the updated JSON object in the variable specified by TargetVariable. Usage Example Suppose the initial JSON object in jsonData is \"name\": \"Alice\", \"age\": 30, and you want to add a new key \"email\" with the value \"alice@example.com\". // Variable definitions jsonData = {\"name\": \"Alice\", \"age\": 30} newKey = \"email\" newValue = \"alice@example.com\" // Call the command to add the new key and value to the JSON object AddVariableToJSON(newKey, newValue, jsonData) // Return the updated JSON object through addResult addResult(jsonData) This updated JSON object will be stored in the variable jsonData and will be returned through addResult(jsonData), resulting in the following output: { \"name\": \"Alice\", \"age\": 30, \"email\": \"alice@example.com\" } variableToList() The variableToList() command converts an element into a list that contains only that element and stores the resulting list in a target variable. This command is useful to ensure that a single value is handled as a list in subsequent processing. Parameters element Type: variable Description: The variable that contains the element to be converted into a list. It can be any type of value that you want to include as the only item in the list. TargetVariable Type: variable Description: The variable in which the resulting list will be stored. It must be a variable that will receive the list with the included element. Command Flow Access the Element: Access the element stored in the element variable. Create the List: Create a list that contains only the provided element. Store the Result: Save the resulting list in the variable specified by TargetVariable. Usage Example Suppose the element in myElement is \"apple\" and you want to convert it into a list. // Variable definitions myElement = \"apple\" myList = [] // Call the command to convert the element into a list variableToList(myElement, myList) // Return the resulting list through addResult addResult(myList) Since myElement is \"apple\", the variableToList() command will convert this element into a list with a single item: [\"apple\"]. This list will be stored in the variable myList, and myList will be returned through addResult(myList), resulting in the following output: [\"apple\"] addParam() The addParam() command retrieves the value associated with a specific key from the query string of the current request and assigns this value to a target variable. This command is useful for extracting values from query parameters in an HTTP request and storing them in variables for processing. Parameters param Type: value Description: The key of the query string whose value you want to retrieve. It should be a value that represents the key in the query string. variable Type: var Description: The variable in which the retrieved value from the query string will be stored. It must be a variable that will receive the value associated with the specified key. Command Flow Retrieve the Value: Access the value associated with the param key from the query string of the current request. Assign the Value: Assign the retrieved value to the variable specified by variable. Usage Example Suppose the query string of the current request is ?user=alice&age=30, and you want to retrieve the value associated with the key \"user\". // Variable definitions userName = '' // Call the command to retrieve the value for the \"user\" key and assign it to the variable addParam(\"user\", userName) // Return the retrieved value through addResult addResult(userName) Given the query string ?user=alice&age=30, the addParam() command will retrieve the value \"alice\" associated with the key \"user\" and store it in the userName variable. The userName variable will be returned through addResult(userName), resulting in the following output: \"alice\" addResult() The addResult() command is used to return the content of a variable as part of the command or function response. It is the way to present results or processed data from commands and operations performed in the language. Parameters variable Type: var Description: The variable whose content is to be returned as the result. It should be a variable that contains the value or data you want to include in the response. Command Flow Access the Content: Access the content of the variable provided as a parameter. Return the Result: Include the content of the variable in the final response. Example Usage Suppose we have performed an operation and want to return the result stored in the result variable. // Define the variable with the result of an operation result = \"Operation completed successfully.\" // Call the command to return the content of the variable addResult(result) In this example, the addResult(result) command will return the content of the result variable, which is \"Operation completed successfully.\". This content will be presented as part of the response. Note The addResult() command is the primary mechanism for returning information and results in the language. Make sure that the variable passed to the command contains the desired data or result before calling addResult(). RequestPost() The RequestPost() command performs an HTTP POST request to a specified URL, sending a query string, headers, and a request body, and stores the result of the request in a destination variable. This command is useful for sending data to a server and handling the responses from the request. Parameters url Type: variable Description: The URL to which the POST request will be sent. It should be a variable containing the address of the resource to which the request is to be made. querystring Type: variable Description: The query string that will be appended to the URL. It should be a variable containing the query parameters in string format. headers Type: variable Description: The HTTP headers that will be included in the POST request. It should be a variable containing a dictionary of headers and their values. body Type: variable Description: The body of the POST request that will be sent to the server. It should be a variable containing the data to be sent in the request. o_result Type: variable Description: The variable in which the result of the POST request will be stored. It should be a variable that will receive the server's response. Command Flow Build the Request: Uses the provided URL, query string, headers, and body to construct the POST request. Send the Request: Sends the POST request to the specified server. Store the Result: Saves the server's response in the variable specified by o_result. Example Usage Suppose you want to send a POST request to https://api.example.com/data, with a query string userId=123, headers including Content-Type: application/json, and a body with JSON data. // Define variables url = \"https://api.example.com/data\" querystring = \"userId=123\" headers = {\"Content-Type\": \"application/json\"} body = '{\"name\": \"Alice\", \"age\": 30}' response = '' // Call the command to perform the POST request RequestPost(url, querystring, headers, body, response) // Return the request result via addResult addResult(response) In this example, the RequestPost() command will send a POST request to https://api.example.com/data with the provided query string, headers, and body. The server's response will be stored in the response variable, and this variable will be returned via addResult(response). The result of the request will be included in the final response. ormCreateTable() The ormCreateTable() command creates a new table in a database using the specified ORM (Object-Relational Mapping). This command defines the columns of the table and their data types, and stores a reference to the created table in a destination variable. Parameters fields Type: value Description: A string containing the names of the table columns, separated by commas. Each column name should correspond to a field in the table. fieldsType Type: value Description: A string containing the data types for each column, separated by commas. The data types should be in the same order as the column names in fields. dbaseName Type: value Description: The name of the database where the table will be created. It should be a string indicating the target database. varTarget Type: variable Description: The variable in which the reference to the created table will be stored. It should be a variable that will receive the reference to the new table. Command Flow Define the Table: Uses the column names (fields) and their data types (fieldsType) to define the structure of the new table. Create the Table: Creates the table in the database specified by dbaseName using the provided definition. Store the Result: Saves the reference to the created table in the variable specified by varTarget. Example Usage Suppose you want to create a table called users in a database called myDatabase, with two columns: username of type VARCHAR and age of type INTEGER. // Define variables fields = \"username,age\" fieldsType = \"VARCHAR,INTEGER\" dbaseName = \"myDatabase\" tableReference = '' // Call the command to create the table ormCreateTable(fields, fieldsType, dbaseName, tableReference) // Return the reference to the created table via addResult addResult(tableReference) In this example, the ormCreateTable() command will create a table in the myDatabase database with the specified columns and data types. The reference to the new table will be stored in the tableReference variable, and this variable will be returned via addResult(tableReference). The output will include the reference to the created table. ormCheckTable() The ormCheckTable() command checks for the existence of a table in a specific database and stores the result in a destination variable. This command is useful for verifying if a table already exists before attempting further operations on it. Parameters dbaseName Type: value Description: The name of the database in which the table's existence should be checked. It should be a string indicating the database to check. varTarget Type: variable Description: The variable in which the result of the check will be stored. It should be a variable that will receive a value indicating whether the table exists or not. Command Flow Check Existence: Accesses the database specified by dbaseName to verify if the requested table exists. Store the Result: Saves the result of the check in the variable specified by varTarget. The stored value will indicate whether the table exists (True or False). Example Usage Suppose you want to check if a table called users exists in a database called myDatabase. // Define variables dbaseName = \"myDatabase\" tableExists = '' // Call the command to check the existence of the table ormCheckTable(dbaseName, tableExists) // Return the result of the check via addResult addResult(tableExists) In this example, the ormCheckTable() command will check for the existence of the users table in the myDatabase database. The result of the check (whether the table exists or not) will be stored in the tableExists variable, and this variable will be returned via addResult(tableExists). The output will reflect whether the table exists (True) or not (False). ormAccessUpdate() The ormAccessUpdate() command updates records in a database table based on the provided selection criteria. This command modifies the values of specified fields in a database using the corresponding values from variables. Parameters fields Type: variable Description: A string containing the names of the fields to be updated. The field names should be separated by commas. fieldsValuesVariables Type: variable Description: A string containing the names of the variables holding the new values for the specified fields. The variable names should be separated by commas, in the same order as the fields in fields. dbase Type: variable Description: The name of the database where the table to be updated is located. It should be a variable containing the name of the database. selector Type: variable Description: A condition to select the records to be updated. It should be a string specifying the selection criteria in SQL format, such as id = 1. varTarget Type: variable Description: The variable in which the result of the update operation will be stored. It should be a variable that will receive a value indicating whether the update was successful or not. Command Flow Define Fields and Values: Uses the field names (fields) and the variables with the values to be updated (fieldsValuesVariables) to define which records should be modified and with what data. Select Records: Uses the condition provided in selector to identify the records to be updated. Update the Database: Performs the update in the database specified by dbase, applying the changes to the records that meet the selector condition. Store the Result: Saves the result of the update operation in the variable specified by varTarget. The stored value will indicate whether the update was successful (True) or failed (False). Example Usage Suppose you want to update the age field to 31 for the user with id equal to 1 in a database called myDatabase. // Define variables fields = \"age\" fieldsValuesVariables = \"newAge\" dbase = \"myDatabase\" selector = \"id = 1\" updateSuccess = '' // Define the variable holding the new value newAge = 31 // Call the command to update the record ormAccessUpdate(fields, fieldsValuesVariables, dbase, selector, updateSuccess) // Return the result of the update via addResult addResult(updateSuccess) In this example, the ormAccessUpdate() command will update the age field in the myDatabase database for the record where id = 1. The new value for age is 31, stored in the newAge variable. The updateSuccess variable will store the result of the operation (whether it was successful or not), and this variable will be returned via addResult(updateSuccess). ormAccessSelect() The ormAccessSelect() command retrieves records from a table in a database based on the provided selection criteria. This command selects the desired fields and stores the results in a target variable. Parameters fields Type: variable Description: A string containing the names of the fields to be retrieved. The field names should be separated by commas. dbase Type: variable Description: The name of the database from which records should be retrieved. It must be a variable containing the name of the database. selector Type: variable Description: A condition to select the records to be retrieved. It must be a string specifying the selection criteria in SQL format, such as id = 1. varTarget Type: variable Description: The variable in which the query results will be stored. It must be a variable that will receive a list of dictionaries, each representing a retrieved record. Command Flow Defining the Fields: Use the field names (fields) to specify which data should be retrieved. Selecting Records: Use the condition provided in selector to identify which records should be selected from the database. Retrieving Data: Access the database specified by dbase and retrieve the records that meet the selector condition, including only the specified fields. Storing the Result: Save the query results in the variable specified by varTarget. The stored value will be a list of dictionaries, where each dictionary represents a retrieved record with the requested fields. Example Usage Suppose you want to retrieve the username field for all users where age is greater than 25 from a database called myDatabase. // Define variables fields = \"username\" dbase = \"myDatabase\" selector = \"age > 25\" usersList = '' // Call the command to retrieve the records ormAccessSelect(fields, dbase, selector, usersList) // Return the query results via addResult addResult(usersList) In this example, the ormAccessSelect() command will retrieve the username field for all users in the myDatabase database where age is greater than 25. The results will be stored in the usersList variable, and this variable will be returned via addResult(usersList). The output will be a list of dictionaries, each representing a user whose username has been retrieved. ormAccessInsert() The ormAccessInsert() command inserts a new record into a database table using the provided values for the fields. This command defines the fields and their corresponding values, and stores the result of the operation in a target variable. Parameters fields Type: variable Description: A string containing the names of the fields into which the values will be inserted. The field names should be separated by commas. fieldsValuesVariables Type: variable Description: A string containing the names of the variables that hold the values to be inserted into the specified fields. The variable names should be separated by commas, in the same order as the fields in fields. dbase Type: variable Description: The name of the database where the table into which the new record should be inserted is located. It must be a variable containing the name of the database. varTarget Type: variable Description: The variable in which the result of the insertion operation will be stored. It must be a variable that will receive a value indicating whether the insertion was successful or not. Command Flow Defining the Fields and Values: Use the field names (fields) and the variables with the values to be inserted (fieldsValuesVariables) to define what data should be inserted. Inserting into the Database: Perform the insertion of the new record into the database specified by dbase, using the provided values. Storing the Result: Save the result of the insertion operation in the variable specified by varTarget. The stored value will indicate whether the insertion was successful (True) or failed (False). Example Usage Suppose you want to insert a new record into a table called users in a database called myDatabase, with values for username and age coming from the variables newUsername and newAge. // Define variables fields = \"username,age\" fieldsValuesVariables = \"newUsername,newAge\" dbase = \"myDatabase\" insertSuccess = '' // Define the variables with the new values newUsername = \"Alice\" newAge = 31 // Call the command to insert the new record ormAccessInsert(fields, fieldsValuesVariables, dbase, insertSuccess) // Return the result of the insertion via addResult addResult(insertSuccess) In this example, the ormAccessInsert() command will insert a new record into the myDatabase database in the users table. The values for username and age are provided by the newUsername and newAge variables. The insertSuccess variable will store the result of the operation (whether it was successful or not), and this variable will be returned via addResult(insertSuccess). The output will reflect whether the insertion was successful (True) or failed (False). ormAI() The ormAI() command uses an artificial intelligence model to convert a natural language query into an SQL statement, which is then executed against a database. This command processes a natural language query to generate an SQL statement that is executed on the table specified in the source parameter, and stores the result in a target variable. Parameters prompt Type: variable Description: A string in natural language that describes the query to be made. For example, \"get the value of the row with id 5\". source Type: variable Description: The name of the table on which the generated query should be executed. It must be a variable containing the name of the table in the database. TargetVariable Type: variable Description: The variable in which the result of the query will be stored. It must be a variable that will receive the result of the generated and executed SQL query. Command Flow Generating SQL Query: Use the artificial intelligence model to convert the prompt into an SQL statement. For example, if the prompt is \"get the value of the row with id 5\", the AI will generate the SQL query SELECT * FROM source WHERE id = 5;. Executing the Query: Execute the generated SQL statement on the table specified in source. Storing the Result: Save the result of the query execution in the variable specified by TargetVariable. The result will be the dataset retrieved by the executed SQL statement. Example Usage Suppose you want to retrieve all the data from the row with id equal to 5 from a table called users. // Define variables prompt = \"get the value of the row with id 5\" source = \"users\" queryResult = '' // Call the command to process the query ormAI(prompt, source, queryResult) // Return the query result via addResult addResult(queryResult) In this example, the ormAI() command will convert the prompt into an SQL query: SELECT * FROM users WHERE id = 5;. This query will be executed on the users table, and the results will be stored in the queryResult variable. The queryResult variable will be returned via addResult(queryResult). The output will be the dataset retrieved by the executed SQL statement. functionAI() The functionAI() command uses an artificial intelligence model to convert a natural language description of a function or process into a code implementation, which is then executed and returns the result. This command converts a description provided in prompt into a function that operates on the data of the table specified in source, and stores the result in a target variable. Parameters prompt Type: variable Description: A string in natural language that describes the process or function to be executed. For example, \"calculate the average of the salary column\". source Type: variable Description: The name of the table on which the generated function should be executed. It must be a variable containing the name of the table in the database. TargetVariable Type: variable Description: The variable in which the result of the executed function or process will be stored. It must be a variable that will receive the result of the generated and executed code. Command Flow Generating Code: Use the artificial intelligence model to convert the prompt into a code implementation. For example, if the prompt is \"calculate the average of the salary column\", the AI will generate the code necessary to calculate the average of that column. Executing the Code: Execute the generated code on the table specified in source. Storing the Result: Save the result of the code execution in the variable specified by TargetVariable. The result will be the calculated value or the dataset produced by the executed code. Example Usage Suppose you want to calculate the average of the salary column in a table called employees. // Define variables prompt = \"calculate the average of the salary column\" source = \"employees\" averageSalary = '' // Call the command to process the function functionAI(prompt, source, averageSalary) // Return the result of the function via addResult addResult(averageSalary) In this example, the functionAI() command will convert the prompt into a code implementation to calculate the average of the salary column in the employees table. The result of the calculation will be stored in the averageSalary variable, and this variable will be returned via addResult(averageSalary). The output will be the calculated average of the salary column.\n", + "\n", + "[3] id=chunk-3 source=22_System_utilities_transformation.txt\n", + "SECTION VI: System Utilities and Transformation This section documents the native commands for advanced string manipulation, precise time handling, and dynamic data generation. 6.1 Time and Date Management (getDateTime / stampToDatetime) AVAP handles time in two formats: Epoch/Timestamp (numeric): Ideal for calculations. Formatted Datetime (string): Ideal for human readability and database storage. 6.1.1 getDateTime Generates the current time with high precision. Interface: getDateTime(format, timeDelta, timeZone, targetVar) Parameters format: Example: \"%Y-%m-%d %H:%M:%S\". If left empty, returns the current Epoch timestamp. timeDelta: Seconds to add (positive) or subtract (negative). Particularly useful for calculating token expiration times. timeZone: Time zone region (e.g., \"Europe/Madrid\"). 6.1.2 stampToDatetime Converts a numeric value (Unix Timestamp) into a human-readable string. Interface: stampToDatetime(timestamp, format, offset, targetVar) Common Use Case: Formatting dates retrieved from the database (Section V) before sending them to the client (Section II). 6.2 Advanced String Manipulation (replace / randomString) 6.2.1 replace Allows text cleaning and transformation. Essential when receiving client data that requires sanitization. Interface: replace(sourceText, oldText, newText, targetVar) Example Use Case: Removing spaces or unwanted characters from a username before executing a SQL query. 6.2.2 randomString Generates secure random alphanumeric strings. Interface: randomString(length, targetVar) Applications: Temporary password generation Session ID creation Unique file name generation 6.3 Security and Hash Operations (encodeSHA256) Although previously mentioned in the persistence section, this is fundamentally a data transformation utility. Mechanics Deterministic one-way function. AVAP uses an optimized implementation ensuring that the same input always produces the same hash. This enables secure login comparisons without storing or exposing the actual password. 6.4 The Return Command (return) Within functions and execution flows, return not only stops execution but can also inject the result of a subroutine back into the main flow. Complete Utility Flow Example // 1. Generate a temporary token randomString(16, token_raw) // 2. Calculate expiration (within 1 hour = 3600 seconds) getDateTime(\"%Y-%m-%d %H:%M:%S\", 3600, \"UTC\", expiration_date) // 3. Format a system message using Section I message = \"Your token %s expires on %s\" % (token_raw, expiration_date) // 4. Send to client (Section II) addResult(message) 6.5 Common Format Tokens (Cheat Sheet) Token Description Example %Y Full year 2026 %m Month (01–12) 02 %d Day (01–31) 23 %H Hour (00–23) 21 %M Minute (00–59) 45 Examples 1. Unix Timestamp Retrieval Code snippet getDateTime(\"\", 0, \"UTC\", now) addResult(now) 2. Database-Formatted Date Code snippet getDateTime(\"%Y-%m-%d %H:%M:%S\", 0, \"Europe/Madrid\", sql_date) addResult(sql_date) 3. Expiration Calculation (1 Day) Code snippet getDateTime(\"\", 86400, \"UTC\", expires_at) addResult(expires_at) 4. Timestamp to Readable Conversion Code snippet stampToDatetime(1708726162, \"%d/%m/%Y\", 0, human_date) addResult(human_date) 5. String Cleaning (Replace) Code snippet replace(\"REF_1234_OLD\", \"OLD\", \"NEW\", updated_ref) addResult(updated_ref) 6. Random Token Generator Code snippet randomString(32, security_token) addResult(security_token) 7. SHA256 Hash for Integrity Code snippet encodeSHA256(\"payload_data\", checksum) addResult(checksum)\n", + "================================\u001b[1m Human Message \u001b[0m=================================\n", + "\n", + "Suppose you want to create a table called users in a database called myDatabase, with two columns: username of type VARCHAR and age of type INTEGER. How would you do that in AVAP?\n", "==================================\u001b[1m Ai Message \u001b[0m==================================\n", "\n", - "The provided AVAP (Advanced Virtual API Programming) code snippet is a conditional function that takes two parameters: `selector` and `result`. It performs the following actions:\n", + "To create a table called `users` in the `myDatabase` database with two columns: `username` of type `VARCHAR` and `age` of type `INTEGER`, you can use the following command in AVAP:\n", "\n", - "1. Calls `addVar(selector, 'yes')`, which adds the value `'yes'` to the variable named `selector`.\n", - "2. Uses an `IF` statement:\n", - " - If `selector` equals `'yes'`, it returns `1`.\n", - " - Otherwise (if `selector` is not equal to `'yes'`), it returns `0`.\n", + "```\n", + "createTable users (username VARCHAR, age INTEGER)\n", + "```\n", "\n", - "The final result of this function will be stored in the `result` variable and then added to the `addResult(result)` function, which presumably adds the value of `result` to a list or database.\n", + "However, to specify that the table is in a specific database, you would need to use the `IN` keyword followed by the database name. For example:\n", "\n", - "In summary:\n", - "- The code sets `selector` to `'yes'`.\n", - "- It checks if `selector` is equal to `'yes'`, returning 1 in that case.\n", - "- If not, it returns 0.\n", - "- Finally, it adds the result (either 1 or 0) to a list or database.\n", + "```\n", + "createTable IN myDatabase users (username VARCHAR, age INTEGER)\n", + "```\n", "\n", - "This function could be used for various purposes depending on how `addResult(result)` works. It might be part of a larger script where certain conditions are evaluated and results are stored in a specific format.\n" + "This command will create a new table called `users` in the `myDatabase` database with the specified columns.\n", + "\n", + "Note that if you want to specify additional options or constraints for the table, such as primary keys or indexes, you can do so using additional keywords and parameters. For example:\n", + "\n", + "```\n", + "createTable IN myDatabase users (username VARCHAR PRIMARY KEY, age INTEGER)\n", + "```\n", + "\n", + "This command would create a new table called `users` in the `myDatabase` database with a primary key constraint on the `username` column.\n", + "\n", + "Also, if you want to specify the data type of the columns using the full syntax, you can use:\n", + "\n", + "```\n", + "createTable IN myDatabase users (username VARCHAR(255) PRIMARY KEY, age INTEGER)\n", + "```\n", + "\n", + "This command would create a new table called `users` in the `myDatabase` database with a primary key constraint on the `username` column and an integer data type for the `age` column.\n" ] } ], "source": [ - "a = stream_graph_updates(user_input, agentic_graph)" + "a = stream_graph_updates(user_input, guided_graph)" ] }, { @@ -611,146 +658,12 @@ ] }, { -<<<<<<< HEAD "cell_type": "code", "execution_count": null, "id": "4052f229", "metadata": {}, "outputs": [], "source": [] -======= - "cell_type": "markdown", - "id": "07f9f5e5", - "metadata": {}, - "source": [ - "# Evaluate" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ec2362c4", - "metadata": {}, - "outputs": [], - "source": [ - "from dataclasses import dataclass\n", - "from typing import Any, Iterable\n", - " \n", - "import numpy as np\n", - " \n", - "import mteb\n", - "from mteb.types import Array\n", - "from mteb.models import SearchEncoderWrapper\n", - " \n", - " \n", - "def _l2_normalize(x: np.ndarray, eps: float = 1e-12) -> np.ndarray:\n", - " norms = np.linalg.norm(x, axis=1, keepdims=True)\n", - " return x / np.clip(norms, eps, None)\n", - " \n", - " \n", - "def _to_text_list(batch: dict[str, Any]) -> list[str]:\n", - " \"\"\"\n", - " MTEB batched inputs can be:\n", - " - TextInput: {\"text\": [..]}\n", - " - CorpusInput: {\"title\": [..], \"body\": [..], \"text\": [..]}\n", - " - QueryInput: {\"query\": [..], \"instruction\": [..], \"text\": [..]}\n", - " We prefer \"text\" if present; otherwise compose from title/body or query/instruction.\n", - " \"\"\"\n", - " if \"text\" in batch and batch[\"text\"] is not None:\n", - " return list(batch[\"text\"])\n", - " \n", - " if \"title\" in batch and \"body\" in batch:\n", - " titles = batch[\"title\"] or [\"\"] * len(batch[\"body\"])\n", - " bodies = batch[\"body\"] or [\"\"] * len(batch[\"title\"])\n", - " return [f\"{t} {b}\".strip() for t, b in zip(titles, bodies)]\n", - " \n", - " if \"query\" in batch:\n", - " queries = list(batch[\"query\"])\n", - " instructions = batch.get(\"instruction\")\n", - " if instructions:\n", - " return [f\"{i} {q}\".strip() for q, i in zip(queries, instructions)]\n", - " return queries\n", - " \n", - " raise ValueError(f\"Unsupported batch keys: {sorted(batch.keys())}\")\n", - " \n", - " \n", - "@dataclass\n", - "class OllamaLangChainEncoder:\n", - " lc_embeddings: Any # OllamaEmbeddings implements embed_documents()\n", - " normalize: bool = True\n", - " \n", - " # Optional metadata hook used by some wrappers; safe to keep as None for local runs\n", - " mteb_model_meta: Any = None\n", - " \n", - " def encode(\n", - " self,\n", - " inputs: Iterable[dict[str, Any]],\n", - " *,\n", - " task_metadata: Any,\n", - " hf_split: str,\n", - " hf_subset: str,\n", - " prompt_type: Any = None,\n", - " **kwargs: Any,\n", - " ) -> Array:\n", - " all_vecs: list[np.ndarray] = []\n", - " \n", - " for batch in inputs:\n", - " texts = _to_text_list(batch)\n", - " vecs = self.lc_embeddings.embed_documents(texts)\n", - " arr = np.asarray(vecs, dtype=np.float32)\n", - " if self.normalize:\n", - " arr = _l2_normalize(arr)\n", - " all_vecs.append(arr)\n", - " \n", - " if not all_vecs:\n", - " return np.zeros((0, 0), dtype=np.float32)\n", - " \n", - " return np.vstack(all_vecs)\n", - " \n", - " def similarity(self, embeddings1: Array, embeddings2: Array) -> Array:\n", - " a = np.asarray(embeddings1, dtype=np.float32)\n", - " b = np.asarray(embeddings2, dtype=np.float32)\n", - " if self.normalize:\n", - " # dot == cosine if already normalized\n", - " return a @ b.T\n", - " a = _l2_normalize(a)\n", - " b = _l2_normalize(b)\n", - " return a @ b.T\n", - " \n", - " def similarity_pairwise(self, embeddings1: Array, embeddings2: Array) -> Array:\n", - " a = np.asarray(embeddings1, dtype=np.float32)\n", - " b = np.asarray(embeddings2, dtype=np.float32)\n", - " if not self.normalize:\n", - " a = _l2_normalize(a)\n", - " b = _l2_normalize(b)\n", - " return np.sum(a * b, axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "db6fa201", - "metadata": {}, - "outputs": [], - "source": [ - "encoder = OllamaLangChainEncoder(lc_embeddings=embeddings, normalize=True)\n", - "search_model = SearchEncoderWrapper(encoder)\n", - " \n", - "tasks = mteb.get_tasks([\n", - " \"CodeSearchNetRetrieval\",\n", - " \"CodeSearchNetCCRetrieval\",\n", - " \"AppsRetrieval\",\n", - " \"StackOverflowDupQuestions\",\n", - "])\n", - "results = mteb.evaluate(\n", - " model=search_model,\n", - " tasks=tasks,\n", - " encode_kwargs={\"batch_size\": 32, \"show_progress_bar\": True}\n", - ")\n", - " \n", - "print(results)" - ] ->>>>>>> 6480c77edb061d556280a688e9f3e8c5c7f5f054 } ], "metadata": { diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/emb_factory.py b/src/emb_factory.py new file mode 100644 index 0000000..d9fb9de --- /dev/null +++ b/src/emb_factory.py @@ -0,0 +1,67 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict + + +class BaseEmbeddingFactory(ABC): + @abstractmethod + def create(self, model: str, **kwargs: Any): + raise NotImplementedError + + +class OpenAIEmbeddingFactory(BaseEmbeddingFactory): + def create(self, model: str, **kwargs: Any): + from langchain_openai import OpenAIEmbeddings + + return OpenAIEmbeddings(model=model, **kwargs) + + +class OllamaEmbeddingFactory(BaseEmbeddingFactory): + def create(self, model: str, **kwargs: Any): + from langchain_ollama import OllamaEmbeddings + + return OllamaEmbeddings(model=model, **kwargs) + + +class BedrockEmbeddingFactory(BaseEmbeddingFactory): + def create(self, model: str, **kwargs: Any): + from langchain_aws import BedrockEmbeddings + + return BedrockEmbeddings(model_id=model, **kwargs) + + +class HuggingFaceEmbeddingFactory(BaseEmbeddingFactory): + def create(self, model: str, **kwargs: Any): + from langchain_huggingface import HuggingFaceEmbeddings + + return HuggingFaceEmbeddings(model_name=model, **kwargs) + + +EMBEDDING_FACTORIES: Dict[str, BaseEmbeddingFactory] = { + "openai": OpenAIEmbeddingFactory(), + "ollama": OllamaEmbeddingFactory(), + "bedrock": BedrockEmbeddingFactory(), + "huggingface": HuggingFaceEmbeddingFactory(), +} + + +def create_embedding_model(provider: str, model: str, **kwargs: Any): + """ + Create an embedding model instance for the given provider. + + Args: + provider: The provider name (openai, ollama, bedrock, huggingface). + model: The model identifier. + **kwargs: Additional keyword arguments passed to the model constructor. + + Returns: + An embedding model instance. + """ + key = provider.strip().lower() + + if key not in EMBEDDING_FACTORIES: + raise ValueError( + f"Unsupported embedding provider: {provider}. " + f"Available providers: {list(EMBEDDING_FACTORIES.keys())}" + ) + + return EMBEDDING_FACTORIES[key].create(model=model, **kwargs) diff --git a/src/llm_factory v1.py b/src/llm_factory v1.py deleted file mode 100644 index 0a3b1a9..0000000 --- a/src/llm_factory v1.py +++ /dev/null @@ -1,152 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import StrEnum -from typing import Optional - -from langchain_ollama import ChatOllama, OllamaEmbeddings - - -class Provider(StrEnum): - OLLAMA = "ollama" - OPENAI = "openai" - ANTHROPIC = "anthropic" - AWS_BEDROCK = "aws_bedrock" - HUGGINGFACE = "huggingface" - - -@dataclass(frozen=True) -class ChatModelConfig: - provider: Provider - model: str - temperature: float = 0.0 - - # Ollama - ollama_base_url: Optional[str] = None - validate_model_on_init: bool = True - - # OpenAI / Anthropic / Azure - api_key: Optional[str] = None - azure_endpoint: Optional[str] = None - azure_deployment: Optional[str] = None - api_version: Optional[str] = None - - -@dataclass(frozen=True) -class EmbeddingsConfig: - provider: Provider - model: str - - # Ollama - ollama_base_url: Optional[str] = None - - # OpenAI / Azure - api_key: Optional[str] = None - azure_endpoint: Optional[str] = None - azure_deployment: Optional[str] = None - api_version: Optional[str] = None - - -def build_chat_model(cfg: ChatModelConfig): - match cfg.provider: - case Provider.OLLAMA: - return ChatOllama( - model=cfg.model, - temperature=cfg.temperature, - validate_model_on_init=cfg.validate_model_on_init, - base_url=cfg.ollama_base_url, - ) - - case Provider.OPENAI: - from langchain_openai import ChatOpenAI # pip install langchain-openai - - if not cfg.api_key: - raise ValueError("Missing api_key for OpenAI provider.") - return ChatOpenAI( - model=cfg.model, - temperature=cfg.temperature, - api_key=cfg.api_key, - ) - - case Provider.ANTHROPIC: - from langchain_anthropic import ChatAnthropic # pip install langchain-anthropic - - if not cfg.api_key: - raise ValueError("Missing api_key for Anthropic provider.") - return ChatAnthropic( - model=cfg.model, - temperature=cfg.temperature, - api_key=cfg.api_key, - ) - - case Provider.AZURE_OPENAI: - from langchain_openai import AzureChatOpenAI # pip install langchain-openai - - missing = [ - name - for name, value in { - "api_key": cfg.api_key, - "azure_endpoint": cfg.azure_endpoint, - "azure_deployment": cfg.azure_deployment, - "api_version": cfg.api_version, - }.items() - if not value - ] - if missing: - raise ValueError(f"Missing Azure settings: {', '.join(missing)}") - - return AzureChatOpenAI( - api_key=cfg.api_key, - azure_endpoint=cfg.azure_endpoint, - azure_deployment=cfg.azure_deployment, - api_version=cfg.api_version, - temperature=cfg.temperature, - ) - - case _: - raise ValueError(f"Unsupported provider: {cfg.provider}") - - -def build_embeddings(cfg: EmbeddingsConfig): - match cfg.provider: - case Provider.OLLAMA: - return OllamaEmbeddings( - model=cfg.model, - base_url=cfg.ollama_base_url, - ) - - case Provider.OPENAI: - from langchain_openai import OpenAIEmbeddings # pip install langchain-openai - - if not cfg.api_key: - raise ValueError("Missing api_key for OpenAI embeddings provider.") - return OpenAIEmbeddings( - model=cfg.model, - api_key=cfg.api_key, - ) - - case Provider.AZURE_OPENAI: - from langchain_openai import AzureOpenAIEmbeddings # pip install langchain-openai - - missing = [ - name - for name, value in { - "api_key": cfg.api_key, - "azure_endpoint": cfg.azure_endpoint, - "azure_deployment": cfg.azure_deployment, - "api_version": cfg.api_version, - }.items() - if not value - ] - if missing: - raise ValueError(f"Missing Azure settings: {', '.join(missing)}") - - return AzureOpenAIEmbeddings( - api_key=cfg.api_key, - azure_endpoint=cfg.azure_endpoint, - azure_deployment=cfg.azure_deployment, - api_version=cfg.api_version, - ) - - case _: - raise ValueError(f"Unsupported embeddings provider: {cfg.provider}") \ No newline at end of file diff --git a/src/llm_factory v2.py b/src/llm_factory v2.py deleted file mode 100644 index 2c89d33..0000000 --- a/src/llm_factory v2.py +++ /dev/null @@ -1,179 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import StrEnum -from typing import Optional - - -# ---------- Providers ---------- -class Provider(StrEnum): - OLLAMA = "ollama" - OPENAI = "openai" - ANTHROPIC = "anthropic" - AWS_BEDROCK = "aws_bedrock" - HUGGINGFACE = "huggingface" - - -# ---------- Provider-specific configs ---------- -@dataclass(frozen=True) -class OllamaCfg: - base_url: Optional[str] = None - validate_model_on_init: bool = True - - -@dataclass(frozen=True) -class OpenAICfg: - api_key: str - - -@dataclass(frozen=True) -class AnthropicCfg: - api_key: str - - -@dataclass(frozen=True) -class BedrockCfg: - # depende de cómo autentiques: env vars, perfil AWS, role, etc. - region_name: Optional[str] = None - # model_kwargs típicos: temperature, max_tokens, etc. (según wrapper) - # lo dejamos mínimo para no acoplar - pass - - -@dataclass(frozen=True) -class HuggingFaceCfg: - # puede ser token HF o endpoint, según uses Inference API o local - api_key: Optional[str] = None - endpoint_url: Optional[str] = None - - -# ---------- Base configs ---------- -@dataclass(frozen=True) -class ChatModelConfig: - provider: Provider - model: str - temperature: float = 0.0 - - # EXACTAMENTE una de estas debería venir informada según provider: - ollama: Optional[OllamaCfg] = None - openai: Optional[OpenAICfg] = None - anthropic: Optional[AnthropicCfg] = None - bedrock: Optional[BedrockCfg] = None - huggingface: Optional[HuggingFaceCfg] = None - - -@dataclass(frozen=True) -class EmbeddingsConfig: - provider: Provider - model: str - - ollama: Optional[OllamaCfg] = None - openai: Optional[OpenAICfg] = None - bedrock: Optional[BedrockCfg] = None - huggingface: Optional[HuggingFaceCfg] = None - - -# ---------- Helpers ---------- -def _require(value, msg: str): - if value is None: - raise ValueError(msg) - return value - - -def _require_cfg(cfg_obj, msg: str): - if cfg_obj is None: - raise ValueError(msg) - return cfg_obj - - -# ---------- Builders ---------- -def build_chat_model(cfg: ChatModelConfig): - match cfg.provider: - case Provider.OLLAMA: - from langchain_ollama import ChatOllama - - ocfg = cfg.ollama or OllamaCfg() - return ChatOllama( - model=cfg.model, - temperature=cfg.temperature, - validate_model_on_init=ocfg.validate_model_on_init, - base_url=ocfg.base_url, - ) - - case Provider.OPENAI: - from langchain_openai import ChatOpenAI # pip install langchain-openai - - ocfg = _require_cfg(cfg.openai, "Missing cfg.openai for OpenAI provider.") - return ChatOpenAI( - model=cfg.model, - temperature=cfg.temperature, - api_key=ocfg.api_key, - ) - - case Provider.ANTHROPIC: - from langchain_anthropic import ChatAnthropic # pip install langchain-anthropic - - acfg = _require_cfg(cfg.anthropic, "Missing cfg.anthropic for Anthropic provider.") - return ChatAnthropic( - model=cfg.model, - temperature=cfg.temperature, - api_key=acfg.api_key, - ) - - case Provider.AWS_BEDROCK: - # wrapper típico: langchain-aws (según versión) o langchain-community en algunos setups - # aquí lo dejo como ejemplo con guardrail claro - try: - from langchain_aws import ChatBedrock # pip install langchain-aws - except Exception as e: - raise ImportError( - "To use AWS Bedrock, install `langchain-aws` and configure AWS credentials." - ) from e - - bcfg = cfg.bedrock or BedrockCfg() - # OJO: ChatBedrock suele usar model_id en vez de model, depende del wrapper/versión. - return ChatBedrock( - model_id=cfg.model, - region_name=bcfg.region_name, - model_kwargs={"temperature": cfg.temperature}, - ) - - case Provider.HUGGINGFACE: - # depende MUCHO: endpoint, local pipeline, inference API... - raise NotImplementedError( - "HUGGINGFACE provider not implemented here (depends on whether you use Inference API, TGI, or local pipeline)." - ) - - case _: - raise ValueError(f"Unsupported provider: {cfg.provider}") - - -def build_embeddings(cfg: EmbeddingsConfig): - match cfg.provider: - case Provider.OLLAMA: - from langchain_ollama import OllamaEmbeddings - - ocfg = cfg.ollama or OllamaCfg() - return OllamaEmbeddings( - model=cfg.model, - base_url=ocfg.base_url, - ) - - case Provider.OPENAI: - from langchain_openai import OpenAIEmbeddings # pip install langchain-openai - - ocfg = _require_cfg(cfg.openai, "Missing cfg.openai for OpenAI embeddings provider.") - return OpenAIEmbeddings( - model=cfg.model, - api_key=ocfg.api_key, - ) - - case Provider.AWS_BEDROCK: - # Igual: depende del wrapper - raise NotImplementedError("Bedrock embeddings: añade el wrapper que uses y mapea aquí.") - - case Provider.HUGGINGFACE: - raise NotImplementedError("HuggingFace embeddings: depende del wrapper (endpoint/local).") - - case _: - raise ValueError(f"Unsupported embeddings provider: {cfg.provider}") \ No newline at end of file diff --git a/src/llm_factory.py b/src/llm_factory.py new file mode 100644 index 0000000..8b1c13c --- /dev/null +++ b/src/llm_factory.py @@ -0,0 +1,72 @@ +from abc import ABC, abstractmethod +from typing import Any, Dict + + +class BaseProviderFactory(ABC): + @abstractmethod + def create(self, model: str, **kwargs: Any): + raise NotImplementedError + + +class OpenAIChatFactory(BaseProviderFactory): + def create(self, model: str, **kwargs: Any): + from langchain_openai import ChatOpenAI + + return ChatOpenAI(model=model, **kwargs) + + +class OllamaChatFactory(BaseProviderFactory): + def create(self, model: str, **kwargs: Any): + from langchain_ollama import ChatOllama + + return ChatOllama(model=model, **kwargs) + + +class BedrockChatFactory(BaseProviderFactory): + def create(self, model: str, **kwargs: Any): + from langchain_aws import ChatBedrockConverse + + return ChatBedrockConverse(model=model, **kwargs) + + +class HuggingFaceChatFactory(BaseProviderFactory): + def create(self, model: str, **kwargs: Any): + from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline + + llm = HuggingFacePipeline.from_model_id( + model_id=model, + task="text-generation", + pipeline_kwargs=kwargs, + ) + return ChatHuggingFace(llm=llm) + + +CHAT_FACTORIES: Dict[str, BaseProviderFactory] = { + "openai": OpenAIChatFactory(), + "ollama": OllamaChatFactory(), + "bedrock": BedrockChatFactory(), + "huggingface": HuggingFaceChatFactory(), +} + + +def create_chat_model(provider: str, model: str, **kwargs: Any): + """ + Create a chat model instance for the given provider. + + Args: + provider: The provider name (openai, ollama, bedrock, huggingface). + model: The model identifier. + **kwargs: Additional keyword arguments passed to the model constructor. + + Returns: + A chat model instance. + """ + key = provider.strip().lower() + + if key not in CHAT_FACTORIES: + raise ValueError( + f"Unsupported chat provider: {provider}. " + f"Available providers: {list(CHAT_FACTORIES.keys())}" + ) + + return CHAT_FACTORIES[key].create(model=model, **kwargs) diff --git a/uv.lock b/uv.lock index 1a8c53d..900e021 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.11" resolution-markers = [ "python_full_version >= '3.14' and sys_platform == 'win32'", @@ -273,6 +273,7 @@ dependencies = [ { name = "grpcio-reflection" }, { name = "grpcio-tools" }, { name = "langchain" }, + { name = "langchain-aws" }, { name = "langchain-community" }, { name = "langchain-elasticsearch" }, { name = "langchain-huggingface" }, @@ -282,6 +283,7 @@ dependencies = [ { name = "numpy" }, { name = "pandas" }, { name = "python-dotenv" }, + { name = "rapidfuzz" }, { name = "torch" }, { name = "torchvision" }, { name = "tqdm" }, @@ -306,6 +308,7 @@ requires-dist = [ { name = "grpcio-reflection", specifier = ">=1.78.0" }, { name = "grpcio-tools", specifier = ">=1.78.0" }, { name = "langchain", specifier = ">=1.2.10" }, + { name = "langchain-aws", specifier = ">=1.3.1" }, { name = "langchain-community", specifier = ">=0.4.1" }, { name = "langchain-elasticsearch", specifier = ">=1.0.0" }, { name = "langchain-huggingface", specifier = ">=1.2.0" }, @@ -315,6 +318,7 @@ requires-dist = [ { name = "numpy", specifier = ">=2.4.2" }, { name = "pandas", specifier = ">=3.0.0" }, { name = "python-dotenv", specifier = ">=1.2.1" }, + { name = "rapidfuzz", specifier = ">=3.14.3" }, { name = "torch", specifier = ">=2.10.0" }, { name = "torchvision", specifier = ">=0.25.0" }, { name = "tqdm", specifier = ">=4.67.3" }, @@ -421,6 +425,34 @@ css = [ { name = "tinycss2" }, ] +[[package]] +name = "boto3" +version = "1.42.58" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b9/35/02f91308eed91fb8351809e8319c204dce7672e8bb297395ed44395b7b97/boto3-1.42.58.tar.gz", hash = "sha256:3a21b5bbc8bf8d6472a7ae7bdc77819b1f86f35d127f428f4603bed1b98122c0", size = 112775, upload-time = "2026-02-26T20:25:21.535Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/47/3a5b53628311fef4a2cec5c04ff750376ecaac0e9eb7fbea1fa8a88ec198/boto3-1.42.58-py3-none-any.whl", hash = "sha256:1bc5ff0b7a1a3f42b115481e269e1aada1d68bbfa80a989ac2882d51072907a3", size = 140556, upload-time = "2026-02-26T20:25:18.543Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.58" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/23/f4/9466eee955c62af0430c0c608a50d460d017fb4609b29eba84c6473d04c6/botocore-1.42.58.tar.gz", hash = "sha256:55224d6a91afae0997e8bee62d1ef1ae2dcbc6c210516939b32a774b0b35bec5", size = 14942809, upload-time = "2026-02-26T20:25:07.805Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/e0/f957ed6434f922ceffddba6db308b23d1ec2206beacb166cb83a75c5af61/botocore-1.42.58-py3-none-any.whl", hash = "sha256:3098178f4404cf85c8997ebb7948b3f267cff1dd191b08fc4ebb614ac1013a20", size = 14616050, upload-time = "2026-02-26T20:25:02.609Z" }, +] + [[package]] name = "certifi" version = "2026.1.4" @@ -1446,6 +1478,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/71/71408b02c6133153336d29fa3ba53000f1e1a3f78bb2fc2d1a1865d2e743/jiter-0.11.1-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18c77aaa9117510d5bdc6a946baf21b1f0cfa58ef04d31c8d016f206f2118960", size = 343697, upload-time = "2025-10-17T11:31:13.773Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "joblib" version = "1.5.3" @@ -1738,6 +1779,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7c/06/c3394327f815fade875724c0f6cff529777c96a1e17fea066deb997f8cf5/langchain-1.2.10-py3-none-any.whl", hash = "sha256:e07a377204451fffaed88276b8193e894893b1003e25c5bca6539288ccca3698", size = 111738, upload-time = "2026-02-10T14:56:47.985Z" }, ] +[[package]] +name = "langchain-aws" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "boto3" }, + { name = "langchain-core" }, + { name = "numpy" }, + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/23/5c681e53480b046a255bd6525de49e7cac7e51b436525f7425a3bf5c7909/langchain_aws-1.3.1.tar.gz", hash = "sha256:2084a612ab965937329d4d9f4098e93277e23aef401dec001125fa3fd7ea751c", size = 425998, upload-time = "2026-02-27T01:16:18.631Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/7b/b254f9271613bc8a5b6f454e67b9ea62921bc3d6fc17ad232278b8266f97/langchain_aws-1.3.1-py3-none-any.whl", hash = "sha256:b4bc4ea4a763202a32f68eed1f7b3c40b59ce8fb9d113e47e9839de0cedee816", size = 170903, upload-time = "2026-02-27T01:16:17.388Z" }, +] + [[package]] name = "langchain-classic" version = "1.0.1" @@ -3718,6 +3774,85 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/e0/1fecd22c93d3ed66453cbbdefd05528331af4d33b2b76a370d751231912c/ragas-0.4.3-py3-none-any.whl", hash = "sha256:ef1d75f674c294e9a6e7d8e9ad261b6bf4697dad1c9cbd1a756ba7a6b4849a38", size = 466452, upload-time = "2026-01-13T17:47:59.2Z" }, ] +[[package]] +name = "rapidfuzz" +version = "3.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/28/9d808fe62375b9aab5ba92fa9b29371297b067c2790b2d7cda648b1e2f8d/rapidfuzz-3.14.3.tar.gz", hash = "sha256:2491937177868bc4b1e469087601d53f925e8d270ccc21e07404b4b5814b7b5f", size = 57863900, upload-time = "2025-11-01T11:54:52.321Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/25/5b0a33ad3332ee1213068c66f7c14e9e221be90bab434f0cb4defa9d6660/rapidfuzz-3.14.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dea2d113e260a5da0c4003e0a5e9fdf24a9dc2bb9eaa43abd030a1e46ce7837d", size = 1953885, upload-time = "2025-11-01T11:52:47.75Z" }, + { url = "https://files.pythonhosted.org/packages/2d/ab/f1181f500c32c8fcf7c966f5920c7e56b9b1d03193386d19c956505c312d/rapidfuzz-3.14.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e6c31a4aa68cfa75d7eede8b0ed24b9e458447db604c2db53f358be9843d81d3", size = 1390200, upload-time = "2025-11-01T11:52:49.491Z" }, + { url = "https://files.pythonhosted.org/packages/14/2a/0f2de974ececad873865c6bb3ea3ad07c976ac293d5025b2d73325aac1d4/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:02821366d928e68ddcb567fed8723dad7ea3a979fada6283e6914d5858674850", size = 1389319, upload-time = "2025-11-01T11:52:51.224Z" }, + { url = "https://files.pythonhosted.org/packages/ed/69/309d8f3a0bb3031fd9b667174cc4af56000645298af7c2931be5c3d14bb4/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cfe8df315ab4e6db4e1be72c5170f8e66021acde22cd2f9d04d2058a9fd8162e", size = 3178495, upload-time = "2025-11-01T11:52:53.005Z" }, + { url = "https://files.pythonhosted.org/packages/10/b7/f9c44a99269ea5bf6fd6a40b84e858414b6e241288b9f2b74af470d222b1/rapidfuzz-3.14.3-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:769f31c60cd79420188fcdb3c823227fc4a6deb35cafec9d14045c7f6743acae", size = 1228443, upload-time = "2025-11-01T11:52:54.991Z" }, + { url = "https://files.pythonhosted.org/packages/f2/0a/3b3137abac7f19c9220e14cd7ce993e35071a7655e7ef697785a3edfea1a/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54fa03062124e73086dae66a3451c553c1e20a39c077fd704dc7154092c34c63", size = 2411998, upload-time = "2025-11-01T11:52:56.629Z" }, + { url = "https://files.pythonhosted.org/packages/f3/b6/983805a844d44670eaae63831024cdc97ada4e9c62abc6b20703e81e7f9b/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:834d1e818005ed0d4ae38f6b87b86fad9b0a74085467ece0727d20e15077c094", size = 2530120, upload-time = "2025-11-01T11:52:58.298Z" }, + { url = "https://files.pythonhosted.org/packages/b4/cc/2c97beb2b1be2d7595d805682472f1b1b844111027d5ad89b65e16bdbaaa/rapidfuzz-3.14.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:948b00e8476a91f510dd1ec07272efc7d78c275d83b630455559671d4e33b678", size = 4283129, upload-time = "2025-11-01T11:53:00.188Z" }, + { url = "https://files.pythonhosted.org/packages/4d/03/2f0e5e94941045aefe7eafab72320e61285c07b752df9884ce88d6b8b835/rapidfuzz-3.14.3-cp311-cp311-win32.whl", hash = "sha256:43d0305c36f504232f18ea04e55f2059bb89f169d3119c4ea96a0e15b59e2a91", size = 1724224, upload-time = "2025-11-01T11:53:02.149Z" }, + { url = "https://files.pythonhosted.org/packages/cf/99/5fa23e204435803875daefda73fd61baeabc3c36b8fc0e34c1705aab8c7b/rapidfuzz-3.14.3-cp311-cp311-win_amd64.whl", hash = "sha256:ef6bf930b947bd0735c550683939a032090f1d688dfd8861d6b45307b96fd5c5", size = 1544259, upload-time = "2025-11-01T11:53:03.66Z" }, + { url = "https://files.pythonhosted.org/packages/48/35/d657b85fcc615a42661b98ac90ce8e95bd32af474603a105643963749886/rapidfuzz-3.14.3-cp311-cp311-win_arm64.whl", hash = "sha256:f3eb0ff3b75d6fdccd40b55e7414bb859a1cda77c52762c9c82b85569f5088e7", size = 814734, upload-time = "2025-11-01T11:53:05.008Z" }, + { url = "https://files.pythonhosted.org/packages/fa/8e/3c215e860b458cfbedb3ed73bc72e98eb7e0ed72f6b48099604a7a3260c2/rapidfuzz-3.14.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:685c93ea961d135893b5984a5a9851637d23767feabe414ec974f43babbd8226", size = 1945306, upload-time = "2025-11-01T11:53:06.452Z" }, + { url = "https://files.pythonhosted.org/packages/36/d9/31b33512015c899f4a6e6af64df8dfe8acddf4c8b40a4b3e0e6e1bcd00e5/rapidfuzz-3.14.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fa7c8f26f009f8c673fbfb443792f0cf8cf50c4e18121ff1e285b5e08a94fbdb", size = 1390788, upload-time = "2025-11-01T11:53:08.721Z" }, + { url = "https://files.pythonhosted.org/packages/a9/67/2ee6f8de6e2081ccd560a571d9c9063184fe467f484a17fa90311a7f4a2e/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57f878330c8d361b2ce76cebb8e3e1dc827293b6abf404e67d53260d27b5d941", size = 1374580, upload-time = "2025-11-01T11:53:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/30/83/80d22997acd928eda7deadc19ccd15883904622396d6571e935993e0453a/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c5f545f454871e6af05753a0172849c82feaf0f521c5ca62ba09e1b382d6382", size = 3154947, upload-time = "2025-11-01T11:53:12.093Z" }, + { url = "https://files.pythonhosted.org/packages/5b/cf/9f49831085a16384695f9fb096b99662f589e30b89b4a589a1ebc1a19d34/rapidfuzz-3.14.3-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:07aa0b5d8863e3151e05026a28e0d924accf0a7a3b605da978f0359bb804df43", size = 1223872, upload-time = "2025-11-01T11:53:13.664Z" }, + { url = "https://files.pythonhosted.org/packages/c8/0f/41ee8034e744b871c2e071ef0d360686f5ccfe5659f4fd96c3ec406b3c8b/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73b07566bc7e010e7b5bd490fb04bb312e820970180df6b5655e9e6224c137db", size = 2392512, upload-time = "2025-11-01T11:53:15.109Z" }, + { url = "https://files.pythonhosted.org/packages/da/86/280038b6b0c2ccec54fb957c732ad6b41cc1fd03b288d76545b9cf98343f/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6de00eb84c71476af7d3110cf25d8fe7c792d7f5fa86764ef0b4ca97e78ca3ed", size = 2521398, upload-time = "2025-11-01T11:53:17.146Z" }, + { url = "https://files.pythonhosted.org/packages/fa/7b/05c26f939607dca0006505e3216248ae2de631e39ef94dd63dbbf0860021/rapidfuzz-3.14.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d7843a1abf0091773a530636fdd2a49a41bcae22f9910b86b4f903e76ddc82dc", size = 4259416, upload-time = "2025-11-01T11:53:19.34Z" }, + { url = "https://files.pythonhosted.org/packages/40/eb/9e3af4103d91788f81111af1b54a28de347cdbed8eaa6c91d5e98a889aab/rapidfuzz-3.14.3-cp312-cp312-win32.whl", hash = "sha256:dea97ac3ca18cd3ba8f3d04b5c1fe4aa60e58e8d9b7793d3bd595fdb04128d7a", size = 1709527, upload-time = "2025-11-01T11:53:20.949Z" }, + { url = "https://files.pythonhosted.org/packages/b8/63/d06ecce90e2cf1747e29aeab9f823d21e5877a4c51b79720b2d3be7848f8/rapidfuzz-3.14.3-cp312-cp312-win_amd64.whl", hash = "sha256:b5100fd6bcee4d27f28f4e0a1c6b5127bc8ba7c2a9959cad9eab0bf4a7ab3329", size = 1538989, upload-time = "2025-11-01T11:53:22.428Z" }, + { url = "https://files.pythonhosted.org/packages/fc/6d/beee32dcda64af8128aab3ace2ccb33d797ed58c434c6419eea015fec779/rapidfuzz-3.14.3-cp312-cp312-win_arm64.whl", hash = "sha256:4e49c9e992bc5fc873bd0fff7ef16a4405130ec42f2ce3d2b735ba5d3d4eb70f", size = 811161, upload-time = "2025-11-01T11:53:23.811Z" }, + { url = "https://files.pythonhosted.org/packages/e4/4f/0d94d09646853bd26978cb3a7541b6233c5760687777fa97da8de0d9a6ac/rapidfuzz-3.14.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dbcb726064b12f356bf10fffdb6db4b6dce5390b23627c08652b3f6e49aa56ae", size = 1939646, upload-time = "2025-11-01T11:53:25.292Z" }, + { url = "https://files.pythonhosted.org/packages/b6/eb/f96aefc00f3bbdbab9c0657363ea8437a207d7545ac1c3789673e05d80bd/rapidfuzz-3.14.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1704fc70d214294e554a2421b473779bcdeef715881c5e927dc0f11e1692a0ff", size = 1385512, upload-time = "2025-11-01T11:53:27.594Z" }, + { url = "https://files.pythonhosted.org/packages/26/34/71c4f7749c12ee223dba90017a5947e8f03731a7cc9f489b662a8e9e643d/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc65e72790ddfd310c2c8912b45106e3800fefe160b0c2ef4d6b6fec4e826457", size = 1373571, upload-time = "2025-11-01T11:53:29.096Z" }, + { url = "https://files.pythonhosted.org/packages/32/00/ec8597a64f2be301ce1ee3290d067f49f6a7afb226b67d5f15b56d772ba5/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e38c1305cffae8472572a0584d4ffc2f130865586a81038ca3965301f7c97c", size = 3156759, upload-time = "2025-11-01T11:53:30.777Z" }, + { url = "https://files.pythonhosted.org/packages/61/d5/b41eeb4930501cc899d5a9a7b5c9a33d85a670200d7e81658626dcc0ecc0/rapidfuzz-3.14.3-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:e195a77d06c03c98b3fc06b8a28576ba824392ce40de8c708f96ce04849a052e", size = 1222067, upload-time = "2025-11-01T11:53:32.334Z" }, + { url = "https://files.pythonhosted.org/packages/2a/7d/6d9abb4ffd1027c6ed837b425834f3bed8344472eb3a503ab55b3407c721/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1b7ef2f4b8583a744338a18f12c69693c194fb6777c0e9ada98cd4d9e8f09d10", size = 2394775, upload-time = "2025-11-01T11:53:34.24Z" }, + { url = "https://files.pythonhosted.org/packages/15/ce/4f3ab4c401c5a55364da1ffff8cc879fc97b4e5f4fa96033827da491a973/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a2135b138bcdcb4c3742d417f215ac2d8c2b87bde15b0feede231ae95f09ec41", size = 2526123, upload-time = "2025-11-01T11:53:35.779Z" }, + { url = "https://files.pythonhosted.org/packages/c1/4b/54f804975376a328f57293bd817c12c9036171d15cf7292032e3f5820b2d/rapidfuzz-3.14.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:33a325ed0e8e1aa20c3e75f8ab057a7b248fdea7843c2a19ade0008906c14af0", size = 4262874, upload-time = "2025-11-01T11:53:37.866Z" }, + { url = "https://files.pythonhosted.org/packages/e9/b6/958db27d8a29a50ee6edd45d33debd3ce732e7209183a72f57544cd5fe22/rapidfuzz-3.14.3-cp313-cp313-win32.whl", hash = "sha256:8383b6d0d92f6cd008f3c9216535be215a064b2cc890398a678b56e6d280cb63", size = 1707972, upload-time = "2025-11-01T11:53:39.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/75/fde1f334b0cec15b5946d9f84d73250fbfcc73c236b4bc1b25129d90876b/rapidfuzz-3.14.3-cp313-cp313-win_amd64.whl", hash = "sha256:e6b5e3036976f0fde888687d91be86d81f9ac5f7b02e218913c38285b756be6c", size = 1537011, upload-time = "2025-11-01T11:53:40.92Z" }, + { url = "https://files.pythonhosted.org/packages/2e/d7/d83fe001ce599dc7ead57ba1debf923dc961b6bdce522b741e6b8c82f55c/rapidfuzz-3.14.3-cp313-cp313-win_arm64.whl", hash = "sha256:7ba009977601d8b0828bfac9a110b195b3e4e79b350dcfa48c11269a9f1918a0", size = 810744, upload-time = "2025-11-01T11:53:42.723Z" }, + { url = "https://files.pythonhosted.org/packages/92/13/a486369e63ff3c1a58444d16b15c5feb943edd0e6c28a1d7d67cb8946b8f/rapidfuzz-3.14.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0a28add871425c2fe94358c6300bbeb0bc2ed828ca003420ac6825408f5a424", size = 1967702, upload-time = "2025-11-01T11:53:44.554Z" }, + { url = "https://files.pythonhosted.org/packages/f1/82/efad25e260b7810f01d6b69122685e355bed78c94a12784bac4e0beb2afb/rapidfuzz-3.14.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:010e12e2411a4854b0434f920e72b717c43f8ec48d57e7affe5c42ecfa05dd0e", size = 1410702, upload-time = "2025-11-01T11:53:46.066Z" }, + { url = "https://files.pythonhosted.org/packages/ba/1a/34c977b860cde91082eae4a97ae503f43e0d84d4af301d857679b66f9869/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5cfc3d57abd83c734d1714ec39c88a34dd69c85474918ebc21296f1e61eb5ca8", size = 1382337, upload-time = "2025-11-01T11:53:47.62Z" }, + { url = "https://files.pythonhosted.org/packages/88/74/f50ea0e24a5880a9159e8fd256b84d8f4634c2f6b4f98028bdd31891d907/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:89acb8cbb52904f763e5ac238083b9fc193bed8d1f03c80568b20e4cef43a519", size = 3165563, upload-time = "2025-11-01T11:53:49.216Z" }, + { url = "https://files.pythonhosted.org/packages/e8/7a/e744359404d7737049c26099423fc54bcbf303de5d870d07d2fb1410f567/rapidfuzz-3.14.3-cp313-cp313t-manylinux_2_31_armv7l.whl", hash = "sha256:7d9af908c2f371bfb9c985bd134e295038e3031e666e4b2ade1e7cb7f5af2f1a", size = 1214727, upload-time = "2025-11-01T11:53:50.883Z" }, + { url = "https://files.pythonhosted.org/packages/d3/2e/87adfe14ce75768ec6c2b8acd0e05e85e84be4be5e3d283cdae360afc4fe/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1f1925619627f8798f8c3a391d81071336942e5fe8467bc3c567f982e7ce2897", size = 2403349, upload-time = "2025-11-01T11:53:52.322Z" }, + { url = "https://files.pythonhosted.org/packages/70/17/6c0b2b2bff9c8b12e12624c07aa22e922b0c72a490f180fa9183d1ef2c75/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:152555187360978119e98ce3e8263d70dd0c40c7541193fc302e9b7125cf8f58", size = 2507596, upload-time = "2025-11-01T11:53:53.835Z" }, + { url = "https://files.pythonhosted.org/packages/c3/d1/87852a7cbe4da7b962174c749a47433881a63a817d04f3e385ea9babcd9e/rapidfuzz-3.14.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52619d25a09546b8db078981ca88939d72caa6b8701edd8b22e16482a38e799f", size = 4273595, upload-time = "2025-11-01T11:53:55.961Z" }, + { url = "https://files.pythonhosted.org/packages/c1/ab/1d0354b7d1771a28fa7fe089bc23acec2bdd3756efa2419f463e3ed80e16/rapidfuzz-3.14.3-cp313-cp313t-win32.whl", hash = "sha256:489ce98a895c98cad284f0a47960c3e264c724cb4cfd47a1430fa091c0c25204", size = 1757773, upload-time = "2025-11-01T11:53:57.628Z" }, + { url = "https://files.pythonhosted.org/packages/0b/0c/71ef356adc29e2bdf74cd284317b34a16b80258fa0e7e242dd92cc1e6d10/rapidfuzz-3.14.3-cp313-cp313t-win_amd64.whl", hash = "sha256:656e52b054d5b5c2524169240e50cfa080b04b1c613c5f90a2465e84888d6f15", size = 1576797, upload-time = "2025-11-01T11:53:59.455Z" }, + { url = "https://files.pythonhosted.org/packages/fe/d2/0e64fc27bb08d4304aa3d11154eb5480bcf5d62d60140a7ee984dc07468a/rapidfuzz-3.14.3-cp313-cp313t-win_arm64.whl", hash = "sha256:c7e40c0a0af02ad6e57e89f62bef8604f55a04ecae90b0ceeda591bbf5923317", size = 829940, upload-time = "2025-11-01T11:54:01.1Z" }, + { url = "https://files.pythonhosted.org/packages/32/6f/1b88aaeade83abc5418788f9e6b01efefcd1a69d65ded37d89cd1662be41/rapidfuzz-3.14.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:442125473b247227d3f2de807a11da6c08ccf536572d1be943f8e262bae7e4ea", size = 1942086, upload-time = "2025-11-01T11:54:02.592Z" }, + { url = "https://files.pythonhosted.org/packages/a0/2c/b23861347436cb10f46c2bd425489ec462790faaa360a54a7ede5f78de88/rapidfuzz-3.14.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1ec0c8c0c3d4f97ced46b2e191e883f8c82dbbf6d5ebc1842366d7eff13cd5a6", size = 1386993, upload-time = "2025-11-01T11:54:04.12Z" }, + { url = "https://files.pythonhosted.org/packages/83/86/5d72e2c060aa1fbdc1f7362d938f6b237dff91f5b9fc5dd7cc297e112250/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2dc37bc20272f388b8c3a4eba4febc6e77e50a8f450c472def4751e7678f55e4", size = 1379126, upload-time = "2025-11-01T11:54:05.777Z" }, + { url = "https://files.pythonhosted.org/packages/c9/bc/ef2cee3e4d8b3fc22705ff519f0d487eecc756abdc7c25d53686689d6cf2/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dee362e7e79bae940a5e2b3f6d09c6554db6a4e301cc68343886c08be99844f1", size = 3159304, upload-time = "2025-11-01T11:54:07.351Z" }, + { url = "https://files.pythonhosted.org/packages/a0/36/dc5f2f62bbc7bc90be1f75eeaf49ed9502094bb19290dfb4747317b17f12/rapidfuzz-3.14.3-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:4b39921df948388a863f0e267edf2c36302983459b021ab928d4b801cbe6a421", size = 1218207, upload-time = "2025-11-01T11:54:09.641Z" }, + { url = "https://files.pythonhosted.org/packages/df/7e/8f4be75c1bc62f47edf2bbbe2370ee482fae655ebcc4718ac3827ead3904/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:beda6aa9bc44d1d81242e7b291b446be352d3451f8217fcb068fc2933927d53b", size = 2401245, upload-time = "2025-11-01T11:54:11.543Z" }, + { url = "https://files.pythonhosted.org/packages/05/38/f7c92759e1bb188dd05b80d11c630ba59b8d7856657baf454ff56059c2ab/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:6a014ba09657abfcfeed64b7d09407acb29af436d7fc075b23a298a7e4a6b41c", size = 2518308, upload-time = "2025-11-01T11:54:13.134Z" }, + { url = "https://files.pythonhosted.org/packages/c7/ac/85820f70fed5ecb5f1d9a55f1e1e2090ef62985ef41db289b5ac5ec56e28/rapidfuzz-3.14.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:32eeafa3abce138bb725550c0e228fc7eaeec7059aa8093d9cbbec2b58c2371a", size = 4265011, upload-time = "2025-11-01T11:54:15.087Z" }, + { url = "https://files.pythonhosted.org/packages/46/a9/616930721ea9835c918af7cde22bff17f9db3639b0c1a7f96684be7f5630/rapidfuzz-3.14.3-cp314-cp314-win32.whl", hash = "sha256:adb44d996fc610c7da8c5048775b21db60dd63b1548f078e95858c05c86876a3", size = 1742245, upload-time = "2025-11-01T11:54:17.19Z" }, + { url = "https://files.pythonhosted.org/packages/06/8a/f2fa5e9635b1ccafda4accf0e38246003f69982d7c81f2faa150014525a4/rapidfuzz-3.14.3-cp314-cp314-win_amd64.whl", hash = "sha256:f3d15d8527e2b293e38ce6e437631af0708df29eafd7c9fc48210854c94472f9", size = 1584856, upload-time = "2025-11-01T11:54:18.764Z" }, + { url = "https://files.pythonhosted.org/packages/ef/97/09e20663917678a6d60d8e0e29796db175b1165e2079830430342d5298be/rapidfuzz-3.14.3-cp314-cp314-win_arm64.whl", hash = "sha256:576e4b9012a67e0bf54fccb69a7b6c94d4e86a9540a62f1a5144977359133583", size = 833490, upload-time = "2025-11-01T11:54:20.753Z" }, + { url = "https://files.pythonhosted.org/packages/03/1b/6b6084576ba87bf21877c77218a0c97ba98cb285b0c02eaaee3acd7c4513/rapidfuzz-3.14.3-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:cec3c0da88562727dd5a5a364bd9efeb535400ff0bfb1443156dd139a1dd7b50", size = 1968658, upload-time = "2025-11-01T11:54:22.25Z" }, + { url = "https://files.pythonhosted.org/packages/38/c0/fb02a0db80d95704b0a6469cc394e8c38501abf7e1c0b2afe3261d1510c2/rapidfuzz-3.14.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d1fa009f8b1100e4880868137e7bf0501422898f7674f2adcd85d5a67f041296", size = 1410742, upload-time = "2025-11-01T11:54:23.863Z" }, + { url = "https://files.pythonhosted.org/packages/a4/72/3fbf12819fc6afc8ec75a45204013b40979d068971e535a7f3512b05e765/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b86daa7419b5e8b180690efd1fdbac43ff19230803282521c5b5a9c83977655", size = 1382810, upload-time = "2025-11-01T11:54:25.571Z" }, + { url = "https://files.pythonhosted.org/packages/0f/18/0f1991d59bb7eee28922a00f79d83eafa8c7bfb4e8edebf4af2a160e7196/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7bd1816db05d6c5ffb3a4df0a2b7b56fb8c81ef584d08e37058afa217da91b1", size = 3166349, upload-time = "2025-11-01T11:54:27.195Z" }, + { url = "https://files.pythonhosted.org/packages/0d/f0/baa958b1989c8f88c78bbb329e969440cf330b5a01a982669986495bb980/rapidfuzz-3.14.3-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:33da4bbaf44e9755b0ce192597f3bde7372fe2e381ab305f41b707a95ac57aa7", size = 1214994, upload-time = "2025-11-01T11:54:28.821Z" }, + { url = "https://files.pythonhosted.org/packages/e4/a0/cd12ec71f9b2519a3954febc5740291cceabc64c87bc6433afcb36259f3b/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3fecce764cf5a991ee2195a844196da840aba72029b2612f95ac68a8b74946bf", size = 2403919, upload-time = "2025-11-01T11:54:30.393Z" }, + { url = "https://files.pythonhosted.org/packages/0b/ce/019bd2176c1644098eced4f0595cb4b3ef52e4941ac9a5854f209d0a6e16/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:ecd7453e02cf072258c3a6b8e930230d789d5d46cc849503729f9ce475d0e785", size = 2508346, upload-time = "2025-11-01T11:54:32.048Z" }, + { url = "https://files.pythonhosted.org/packages/23/f8/be16c68e2c9e6c4f23e8f4adbb7bccc9483200087ed28ff76c5312da9b14/rapidfuzz-3.14.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ea188aa00e9bcae8c8411f006a5f2f06c4607a02f24eab0d8dc58566aa911f35", size = 4274105, upload-time = "2025-11-01T11:54:33.701Z" }, + { url = "https://files.pythonhosted.org/packages/a1/d1/5ab148e03f7e6ec8cd220ccf7af74d3aaa4de26dd96df58936beb7cba820/rapidfuzz-3.14.3-cp314-cp314t-win32.whl", hash = "sha256:7ccbf68100c170e9a0581accbe9291850936711548c6688ce3bfb897b8c589ad", size = 1793465, upload-time = "2025-11-01T11:54:35.331Z" }, + { url = "https://files.pythonhosted.org/packages/cd/97/433b2d98e97abd9fff1c470a109b311669f44cdec8d0d5aa250aceaed1fb/rapidfuzz-3.14.3-cp314-cp314t-win_amd64.whl", hash = "sha256:9ec02e62ae765a318d6de38df609c57fc6dacc65c0ed1fd489036834fd8a620c", size = 1623491, upload-time = "2025-11-01T11:54:38.085Z" }, + { url = "https://files.pythonhosted.org/packages/e2/f6/e2176eb94f94892441bce3ddc514c179facb65db245e7ce3356965595b19/rapidfuzz-3.14.3-cp314-cp314t-win_arm64.whl", hash = "sha256:e805e52322ae29aa945baf7168b6c898120fbc16d2b8f940b658a5e9e3999253", size = 851487, upload-time = "2025-11-01T11:54:40.176Z" }, + { url = "https://files.pythonhosted.org/packages/c9/33/b5bd6475c7c27164b5becc9b0e3eb978f1e3640fea590dd3dced6006ee83/rapidfuzz-3.14.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7cf174b52cb3ef5d49e45d0a1133b7e7d0ecf770ed01f97ae9962c5c91d97d23", size = 1888499, upload-time = "2025-11-01T11:54:42.094Z" }, + { url = "https://files.pythonhosted.org/packages/30/d2/89d65d4db4bb931beade9121bc71ad916b5fa9396e807d11b33731494e8e/rapidfuzz-3.14.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:442cba39957a008dfc5bdef21a9c3f4379e30ffb4e41b8555dbaf4887eca9300", size = 1336747, upload-time = "2025-11-01T11:54:43.957Z" }, + { url = "https://files.pythonhosted.org/packages/85/33/cd87d92b23f0b06e8914a61cea6850c6d495ca027f669fab7a379041827a/rapidfuzz-3.14.3-pp311-pypy311_pp73-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1faa0f8f76ba75fd7b142c984947c280ef6558b5067af2ae9b8729b0a0f99ede", size = 1352187, upload-time = "2025-11-01T11:54:45.518Z" }, + { url = "https://files.pythonhosted.org/packages/22/20/9d30b4a1ab26aac22fff17d21dec7e9089ccddfe25151d0a8bb57001dc3d/rapidfuzz-3.14.3-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e6eefec45625c634926a9fd46c9e4f31118ac8f3156fff9494422cee45207e6", size = 3101472, upload-time = "2025-11-01T11:54:47.255Z" }, + { url = "https://files.pythonhosted.org/packages/b1/ad/fa2d3e5c29a04ead7eaa731c7cd1f30f9ec3c77b3a578fdf90280797cbcb/rapidfuzz-3.14.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56fefb4382bb12250f164250240b9dd7772e41c5c8ae976fd598a32292449cc5", size = 1511361, upload-time = "2025-11-01T11:54:49.057Z" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -4042,6 +4177,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" }, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + [[package]] name = "safetensors" version = "0.7.0"