feat: implement graph-based agent with LLM integration and dynamic prompts

2026-03-03 17:56:39 +01:00 · 2026-03-03 17:56:39 +01:00 · 6c8261f2b2
parent 5024bde8fb
commit 6c8261f2b2
10 changed files with 493 additions and 78 deletions
--- a/Docker/.dockerignore
+++ b/Docker/.dockerignore
@ -0,0 +1,53 @@
 # Documentation
 *.md
 documentation/
 # Build and dependency files
 Makefile
 *.pyc
 __pycache__/
 *.egg-info/
 dist/
 build/
 # Development and testing
 .venv/
 venv/
 env/
 .pytest_cache/
 .coverage
 # Git and version control
 .git/
 .gitignore
 .gitattributes
 # IDE and editor files
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 .DS_Store
 # Environment files
 .env
 .env.local
 .env.*.local
 # Docker files (no copy Docker files into the image)
 Dockerfile
 docker-compose.yaml
 # CI/CD
 .github/
 .gitlab-ci.yml
 # Temporary files
 *.tmp
 *.log
 scratches/
 # Node modules (if any)
 node_modules/
 npm-debug.log
--- a/Docker/Dockerfile
+++ b/Docker/Dockerfile
@ -3,7 +3,6 @@ FROM python:3.11-slim
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 WORKDIR /app
 COPY ./requirements.txt .
--- a/Docker/docker-compose.yaml
+++ b/Docker/docker-compose.yaml
@ -4,17 +4,18 @@ services:
  brunix-engine:
    build: .
    container_name: brunix-assistance-engine
    env_file: .env
    ports:
      - "50052:50051"
    environment:
-      - ELASTICSEARCH_URL=http://host.docker.internal:9200
+      ELASTICSEARCH_URL: ${ELASTICSEARCH_URL}
-      - DATABASE_URL=postgresql://postgres:brunix_pass@host.docker.internal:5432/postgres
+      ELASTICSEARCH_INDEX: ${ELASTICSEARCH_INDEX}
-
+      POSTGRES_URL: ${POSTGRES_URL}
-      - LANGFUSE_HOST=http://45.77.119.180
+      LANGFUSE_HOST: ${LANGFUSE_HOST}
-      - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY}
+      LANGFUSE_PUBLIC_KEY: ${LANGFUSE_PUBLIC_KEY}
-      - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY}
+      LANGFUSE_SECRET_KEY: ${LANGFUSE_SECRET_KEY}
-      - LLM_BASE_URL=http://host.docker.internal:11434
+      OLLAMA_URL: ${OLLAMA_URL}
      OLLAMA_MODEL_NAME: ${OLLAMA_MODEL_NAME}
      OLLAMA_EMB_MODEL_NAME: ${OLLAMA_EMB_MODEL_NAME}
    extra_hosts:
      - "host.docker.internal:host-gateway"
--- a/Docker/requirements.txt
+++ b/Docker/requirements.txt
@ -1,5 +1,5 @@
 # This file was autogenerated by uv via the following command:
-#    uv export --format requirements-txt --no-hashes --no-dev -o requirements.txt
+#    uv export --format requirements-txt --no-hashes --no-dev -o Docker/requirements.txt
 aiohappyeyeballs==2.6.1
    # via aiohttp
 aiohttp==3.13.3
@ -12,6 +12,12 @@ anyio==4.12.1
    # via httpx
 attrs==25.4.0
    # via aiohttp
 boto3==1.42.58
    # via langchain-aws
 botocore==1.42.58
    # via
    #   boto3
    #   s3transfer
 certifi==2026.1.4
    # via
    #   elastic-transport
@ -20,8 +26,11 @@ certifi==2026.1.4
    #   requests
 charset-normalizer==3.4.4
    # via requests
 click==8.3.1
    # via nltk
 colorama==0.4.6 ; sys_platform == 'win32'
    # via
    #   click
    #   loguru
    #   tqdm
 dataclasses-json==0.6.7
@ -30,72 +39,98 @@ elastic-transport==8.17.1
    # via elasticsearch
 elasticsearch==8.19.3
    # via langchain-elasticsearch
 filelock==3.24.3
    # via huggingface-hub
 frozenlist==1.8.0
    # via
    #   aiohttp
    #   aiosignal
-greenlet==3.3.1 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'
+fsspec==2025.10.0
    # via huggingface-hub
 greenlet==3.3.2 ; platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'
    # via sqlalchemy
-grpcio==1.78.0
+grpcio==1.78.1
    # via
    #   assistance-engine
    #   grpcio-reflection
    #   grpcio-tools
-grpcio-reflection==1.78.0
+grpcio-reflection==1.78.1
    # via assistance-engine
-grpcio-tools==1.78.0
+grpcio-tools==1.78.1
    # via assistance-engine
 h11==0.16.0
    # via httpcore
 hf-xet==1.3.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
    # via huggingface-hub
 httpcore==1.0.9
    # via httpx
 httpx==0.28.1
    # via
    #   langgraph-sdk
    #   langsmith
    #   ollama
 httpx-sse==0.4.3
    # via langchain-community
 huggingface-hub==0.36.2
    # via
    #   langchain-huggingface
    #   tokenizers
 idna==3.11
    # via
    #   anyio
    #   httpx
    #   requests
    #   yarl
 jmespath==1.1.0
    # via
    #   boto3
    #   botocore
 joblib==1.5.3
    # via nltk
 jsonpatch==1.33
    # via langchain-core
 jsonpointer==3.0.0
    # via jsonpatch
 langchain==1.2.10
    # via assistance-engine
 langchain-aws==1.3.1
    # via assistance-engine
 langchain-classic==1.0.1
    # via langchain-community
 langchain-community==0.4.1
    # via assistance-engine
-langchain-core==1.2.11
+langchain-core==1.2.15
    # via
    #   langchain
    #   langchain-aws
    #   langchain-classic
    #   langchain-community
    #   langchain-elasticsearch
    #   langchain-huggingface
    #   langchain-ollama
    #   langchain-text-splitters
    #   langgraph
    #   langgraph-checkpoint
    #   langgraph-prebuilt
 langchain-elasticsearch==1.0.0
    # via assistance-engine
-langchain-text-splitters==1.1.0
+langchain-huggingface==1.2.0
    # via assistance-engine
 langchain-ollama==1.0.1
    # via assistance-engine
 langchain-text-splitters==1.1.1
    # via langchain-classic
-langgraph==1.0.8
+langgraph==1.0.9
    # via langchain
 langgraph-checkpoint==4.0.0
    # via
    #   langgraph
    #   langgraph-prebuilt
-langgraph-prebuilt==1.0.7
+langgraph-prebuilt==1.0.8
    # via langgraph
-langgraph-sdk==0.3.5
+langgraph-sdk==0.3.8
    # via langgraph
-langsmith==0.7.1
+langsmith==0.7.6
    # via
    #   langchain-classic
    #   langchain-community
@ -110,24 +145,30 @@ multidict==6.7.1
    #   yarl
 mypy-extensions==1.1.0
    # via typing-inspect
 nltk==3.9.3
    # via assistance-engine
 numpy==2.4.2
    # via
    #   assistance-engine
    #   elasticsearch
    #   langchain-aws
    #   langchain-community
    #   pandas
 ollama==0.6.1
    # via langchain-ollama
 orjson==3.11.7
    # via
    #   langgraph-sdk
    #   langsmith
 ormsgpack==1.12.2
    # via langgraph-checkpoint
-packaging==26.0
+packaging==24.2
    # via
    #   huggingface-hub
    #   langchain-core
    #   langsmith
    #   marshmallow
-pandas==3.0.0
+pandas==3.0.1
    # via assistance-engine
 propcache==0.4.1
    # via
@ -140,17 +181,20 @@ protobuf==6.33.5
 pydantic==2.12.5
    # via
    #   langchain
    #   langchain-aws
    #   langchain-classic
    #   langchain-core
    #   langgraph
    #   langsmith
    #   ollama
    #   pydantic-settings
 pydantic-core==2.41.5
    # via pydantic
-pydantic-settings==2.12.0
+pydantic-settings==2.13.1
    # via langchain-community
 python-dateutil==2.9.0.post0
    # via
    #   botocore
    #   elasticsearch
    #   pandas
 python-dotenv==1.2.1
@ -159,20 +203,28 @@ python-dotenv==1.2.1
    #   pydantic-settings
 pyyaml==6.0.3
    # via
    #   huggingface-hub
    #   langchain-classic
    #   langchain-community
    #   langchain-core
 rapidfuzz==3.14.3
    # via assistance-engine
 regex==2026.2.19
    # via nltk
 requests==2.32.5
    # via
    #   huggingface-hub
    #   langchain-classic
    #   langchain-community
    #   langsmith
    #   requests-toolbelt
 requests-toolbelt==1.0.0
    # via langsmith
 s3transfer==0.16.0
    # via boto3
 setuptools==82.0.0
    # via grpcio-tools
-simsimd==6.5.12
+simsimd==6.5.13
    # via elasticsearch
 six==1.17.0
    # via python-dateutil
@ -184,14 +236,20 @@ tenacity==9.1.4
    # via
    #   langchain-community
    #   langchain-core
 tokenizers==0.22.2
    # via langchain-huggingface
 tqdm==4.67.3
-    # via assistance-engine
+    # via
    #   assistance-engine
    #   huggingface-hub
    #   nltk
 typing-extensions==4.15.0
    # via
    #   aiosignal
    #   anyio
    #   elasticsearch
    #   grpcio
    #   huggingface-hub
    #   langchain-core
    #   pydantic
    #   pydantic-core
@ -208,9 +266,10 @@ tzdata==2025.3 ; sys_platform == 'emscripten' or sys_platform == 'win32'
    # via pandas
 urllib3==2.6.3
    # via
    #   botocore
    #   elastic-transport
    #   requests
-uuid-utils==0.14.0
+uuid-utils==0.14.1
    # via
    #   langchain-core
    #   langsmith
--- a/Docker/src/graph.py
+++ b/Docker/src/graph.py
@ -0,0 +1,60 @@
 # graph.py
 import logging
 from langchain_core.documents import Document
 from langchain_core.messages import SystemMessage
 from langgraph.graph import END, StateGraph
 from langgraph.graph.state import CompiledStateGraph
 from prompts import GENERATE_PROMPT, REFORMULATE_PROMPT
 from state import AgentState
 logger = logging.getLogger(__name__)
 def format_context(docs: list[Document]) -> str:
    chunks = []
    for i, doc in enumerate(docs, 1):
        source = (doc.metadata or {}).get("source", "Untitled")
        source_id = (doc.metadata or {}).get("id", f"chunk-{i}")
        text = doc.page_content or ""
        chunks.append(f"[{i}] id={source_id} source={source}\n{text}")
    return "\n\n".join(chunks)
 def build_graph(llm, vector_store) -> CompiledStateGraph:
    def reformulate(state: AgentState) -> AgentState:
        user_msg = state["messages"][-1]
        resp = llm.invoke([REFORMULATE_PROMPT, user_msg])
        reformulated = resp.content.strip()
        logger.info(f"[reformulate] '{user_msg.content}' → '{reformulated}'")
        return {"reformulated_query": reformulated}
    def retrieve(state: AgentState) -> AgentState:
        query = state["reformulated_query"]
        docs = vector_store.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 3},
        ).invoke(query)
        context = format_context(docs)
        logger.info(f"[retrieve] {len(docs)} docs fetched")
        logger.info(context)
        return {"context": context}
    def generate(state: AgentState) -> AgentState:
        prompt = SystemMessage(
            content=GENERATE_PROMPT.content.format(context=state["context"])
        )
        resp = llm.invoke([prompt] + state["messages"])
        return {"messages": [resp]}
    graph_builder = StateGraph(AgentState)
    graph_builder.add_node("reformulate", reformulate)
    graph_builder.add_node("retrieve", retrieve)
    graph_builder.add_node("generate", generate)
    graph_builder.set_entry_point("reformulate")
    graph_builder.add_edge("reformulate", "retrieve")
    graph_builder.add_edge("retrieve", "generate")
    graph_builder.add_edge("generate", END)
    return graph_builder.compile()
--- a/Docker/src/prompts.py
+++ b/Docker/src/prompts.py
@ -0,0 +1,89 @@
 from langchain_core.messages import SystemMessage
 REFORMULATE_PROMPT = SystemMessage(
    content=(
        "You are a deterministic lexical query rewriter used for vector retrieval.\n"
        "Your task is to rewrite user questions into optimized keyword search queries.\n\n"
        "CRITICAL RULES (ABSOLUTE):\n"
        "1. NEVER answer the question.\n"
        "2. NEVER expand acronyms.\n"
        "3. NEVER introduce new terms not present in the original query.\n"
        "4. NEVER infer missing information.\n"
        "5. NEVER add explanations, definitions, or interpretations.\n"
        "6. Preserve all technical tokens exactly as written.\n"
        "7. Only remove filler words (e.g., what, does, is, explain, tell me, please).\n"
        "8. You may reorder terms for better retrieval.\n"
        "9. Output must be a single-line plain keyword query.\n"
        "10. If the query is already optimal, return it unchanged.\n\n"
        "11. If you receive something that looks like code, do NOT attempt to rewrite it. Return it verbatim.\n\n"
        "ALLOWED OPERATIONS:\n"
        "- Remove interrogative phrasing.\n"
        "- Remove stopwords.\n"
        "- Reorder words.\n"
        "- Convert to noun phrase form.\n\n"
        "FORBIDDEN OPERATIONS:\n"
        "- Expanding abbreviations.\n"
        "- Paraphrasing into unseen vocabulary.\n"
        "- Adding definitions.\n"
        "- Answering implicitly.\n\n"
        "Examples:\n"
        "Input: What does AVAP stand for?\n"
        "Output: AVAP stand for\n"
        "Input: Hey, I'm trying to understand how AVAP handels a ZeroDivisionError when doing divison or modulus operatoins. Can you explane what situatoins cause a ZeroDivisionError to be raised and how I can catch it in my AVAP scripts?\n"
        "Output: AVAP ZeroDivisionError division / modulus % catch try except\n"
        "Input: What does AVAP stand for?\n"
        "Output: AVAP stand for\n"
        "Input: Please explain how the import statement works in AVAP scripts.\n"
        "Output: AVAP import statement syntax behavior\n\n"
        "Return only the rewritten query."
    )
 )
 GENERATE_PROMPT = SystemMessage(
    content=(
        """You are a retrieval-grounded assistant.
        CRITICAL SCOPE RULES
        - AVAP (Advanced Virtual API Programming) is a new/unknown programming language.
        - If the user message is AVAP-related (explicitly mentions "AVAP" case-insensitive OR the provided context is AVAP code/docs), you MUST answer using ONLY <CONTEXT>.
        - You MUST NOT use general programming knowledge to interpret AVAP.
        - You MUST NOT assume AVAP behaves like any other language even if syntax looks similar.
        - You MUST NOT infer missing details. Only state what is explicitly present in <CONTEXT>.
        GROUNDING REQUIREMENTS (AVAP-RELATED)
        1) Every non-trivial factual claim MUST be directly supported by an EXACT QUOTE from <CONTEXT>.
        2) If a claim is not supported by a quote, DO NOT include it.
        3) If <CONTEXT> does not contain enough information to answer, reply with EXACTLY:
        "I don't have enough information in the provided context to answer that."
        WORKFLOW (AVAP-RELATED) — FOLLOW IN ORDER
        A) Identify the specific question(s) being asked.
        B) Extract the minimum necessary quotes from <CONTEXT> that answer those question(s).
        C) Write the answer using ONLY those quotes (paraphrase is allowed, but every statement must be backed by at least one quote).
        D) Verify: for EACH sentence in your answer, confirm there is a supporting quote. If any sentence lacks a quote, delete it or refuse.
        OUTPUT FORMAT (AVAP-RELATED ONLY)
        Answer:
        <short, direct answer; no extra speculation; no unrelated tips>
        Evidence:
        - "<exact quote 1>"
        - "<exact quote 2>"
        (Include only quotes you actually used. Prefer the smallest quotes that fully support the statements.)
        NON-AVAP QUESTIONS
        - If the question is clearly not AVAP-related, answer normally using general knowledge.
        <CONTEXT>
        {context}
        </CONTEXT>"""
    )
 )
--- a/Docker/src/server.py
+++ b/Docker/src/server.py
@ -1,70 +1,75 @@
 import os
 import grpc
 import logging
 import os
 from concurrent import futures
-from grpc_reflection.v1alpha import reflection
+
 import brunix_pb2
 import brunix_pb2_grpc
-
+import grpc
-from langchain_community.llms import Ollama
+from grpc_reflection.v1alpha import reflection
 from langchain_community.embeddings import OllamaEmbeddings
 from langchain_elasticsearch import ElasticsearchStore
-from langchain_core.prompts import ChatPromptTemplate
+
 from utils.llm_factory import create_chat_model
 from utils.emb_factory import create_embedding_model
 from graph import build_graph
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger("brunix-engine")
 class BrunixEngine(brunix_pb2_grpc.AssistanceEngineServicer):
    def __init__(self):
-
+        self.llm = create_chat_model(
-        self.base_url = os.getenv("LLM_BASE_URL", "http://ollama-light-service:11434")
+            provider="ollama",
-        self.model_name = os.getenv("LLM_MODEL", "qwen2.5:1.5b")
+            model=os.getenv("OLLAMA_MODEL_NAME"),
-        
+            base_url=os.getenv("OLLAMA_URL"),
-        logger.info(f"Starting server")
+            temperature=0,
-
+            validate_model_on_init=True,
        self.llm = Ollama(base_url=self.base_url, model=self.model_name)
        self.embeddings = OllamaEmbeddings(base_url=self.base_url, model="nomic-embed-text")
        es_url = os.getenv("ELASTICSEARCH_URL", "http://elasticsearch:9200")
        logger.info(f"ElasticSearch on: {es_url}")
        self.vector_store = ElasticsearchStore(
            es_url=es_url,
            index_name="avap_manuals",
            embedding=self.embeddings
        )
        self.embeddings = create_embedding_model(
            provider="ollama",
            model=os.getenv("OLLAMA_EMB_MODEL_NAME"),
            base_url=os.getenv("OLLAMA_URL"),
        )
        self.vector_store = ElasticsearchStore(
            es_url=os.getenv("ELASTICSEARCH_URL"),
            index_name=os.getenv("ELASTICSEARCH_INDEX"),
            embedding=self.embeddings,
            query_field="text",
            vector_query_field="vector",
        )
        self.graph = build_graph(
            llm=self.llm,
            vector_store=self.vector_store
        )
        logger.info("Brunix Engine initializing.")
    def AskAgent(self, request, context):
        logger.info(f"request {request.session_id}): {request.query[:50]}.")
        try:
-            context_text = "AVAP is a virtual programming language for API development."
+            final_state = self.graph.invoke({"messages": [{"role": "user", 
-            # 4. Prompt Engineering
+                                                           "content": request.query}]})
            prompt = ChatPromptTemplate.from_template("""
            You are Brunix, the 101OBEX artificial intelligence for the AVAP Sphere platform. Respond in a professional manner.
-            CONTEXT:
+            messages = final_state.get("messages", [])
-            {context}
+            last_msg = messages[-1] if messages else None
            result_text = getattr(last_msg, "content", str(last_msg)) if last_msg else ""
            QUESTION:
            {question}
            """)
            chain = prompt | self.llm
            for chunk in chain.stream({"context": context_text, "question": request.query}):
            yield brunix_pb2.AgentResponse(
-                    text=str(chunk),
+                text=result_text,
                avap_code="AVAP-2026",
-                    is_final=False
+                is_final=True,
            )
            yield brunix_pb2.AgentResponse(text="", avap_code="", is_final=True)
        except Exception as e:
-            logger.error(f"Error in AskAgent: {str(e)}")
+            logger.error(f"Error in AskAgent: {str(e)}", exc_info=True)
-            yield brunix_pb2.AgentResponse(text=f"[Error Motor]: {str(e)}", is_final=True)
+            yield brunix_pb2.AgentResponse(
                text=f"[Error Motor]: {str(e)}",
                is_final=True,
            )
 def serve():
@ -73,15 +78,16 @@ def serve():
    brunix_pb2_grpc.add_AssistanceEngineServicer_to_server(BrunixEngine(), server)
    SERVICE_NAMES = (
-        brunix_pb2.DESCRIPTOR.services_by_name['AssistanceEngine'].full_name,
+        brunix_pb2.DESCRIPTOR.services_by_name["AssistanceEngine"].full_name,
        reflection.SERVICE_NAME,
    )
    reflection.enable_server_reflection(SERVICE_NAMES, server)
-    server.add_insecure_port('[::]:50051')
+    server.add_insecure_port("[::]:50051")
    logger.info("Brunix Engine on port 50051")
    server.start()
    server.wait_for_termination()
-if __name__ == '__main__':
+
 if __name__ == "__main__":
    serve()
--- a/Docker/src/state.py
+++ b/Docker/src/state.py
@ -0,0 +1,9 @@
 from typing import TypedDict, Annotated
 from langgraph.graph.message import add_messages
 class AgentState(TypedDict):
    messages: Annotated[list, add_messages]
    reformulated_query: str
    context: str
--- a/Docker/src/utils/emb_factory.py
+++ b/Docker/src/utils/emb_factory.py
@ -0,0 +1,67 @@
 from abc import ABC, abstractmethod
 from typing import Any, Dict
 class BaseEmbeddingFactory(ABC):
    @abstractmethod
    def create(self, model: str, **kwargs: Any):
        raise NotImplementedError
 class OpenAIEmbeddingFactory(BaseEmbeddingFactory):
    def create(self, model: str, **kwargs: Any):
        from langchain_openai import OpenAIEmbeddings
        return OpenAIEmbeddings(model=model, **kwargs)
 class OllamaEmbeddingFactory(BaseEmbeddingFactory):
    def create(self, model: str, **kwargs: Any):
        from langchain_ollama import OllamaEmbeddings
        return OllamaEmbeddings(model=model, **kwargs)
 class BedrockEmbeddingFactory(BaseEmbeddingFactory):
    def create(self, model: str, **kwargs: Any):
        from langchain_aws import BedrockEmbeddings
        return BedrockEmbeddings(model_id=model, **kwargs)
 class HuggingFaceEmbeddingFactory(BaseEmbeddingFactory):
    def create(self, model: str, **kwargs: Any):
        from langchain_huggingface import HuggingFaceEmbeddings
        return HuggingFaceEmbeddings(model_name=model, **kwargs)
 EMBEDDING_FACTORIES: Dict[str, BaseEmbeddingFactory] = {
    "openai": OpenAIEmbeddingFactory(),
    "ollama": OllamaEmbeddingFactory(),
    "bedrock": BedrockEmbeddingFactory(),
    "huggingface": HuggingFaceEmbeddingFactory(),
 }
 def create_embedding_model(provider: str, model: str, **kwargs: Any):
    """
    Create an embedding model instance for the given provider.
    Args:
        provider: The provider name (openai, ollama, bedrock, huggingface).
        model: The model identifier.
        **kwargs: Additional keyword arguments passed to the model constructor.
    Returns:
        An embedding model instance.
    """
    key = provider.strip().lower()
    if key not in EMBEDDING_FACTORIES:
        raise ValueError(
            f"Unsupported embedding provider: {provider}. "
            f"Available providers: {list(EMBEDDING_FACTORIES.keys())}"
        )
    return EMBEDDING_FACTORIES[key].create(model=model, **kwargs)
--- a/Docker/src/utils/llm_factory.py
+++ b/Docker/src/utils/llm_factory.py
@ -0,0 +1,72 @@
 from abc import ABC, abstractmethod
 from typing import Any, Dict
 class BaseProviderFactory(ABC):
    @abstractmethod
    def create(self, model: str, **kwargs: Any):
        raise NotImplementedError
 class OpenAIChatFactory(BaseProviderFactory):
    def create(self, model: str, **kwargs: Any):
        from langchain_openai import ChatOpenAI
        return ChatOpenAI(model=model, **kwargs)
 class OllamaChatFactory(BaseProviderFactory):
    def create(self, model: str, **kwargs: Any):
        from langchain_ollama import ChatOllama
        return ChatOllama(model=model, **kwargs)
 class BedrockChatFactory(BaseProviderFactory):
    def create(self, model: str, **kwargs: Any):
        from langchain_aws import ChatBedrockConverse
        return ChatBedrockConverse(model=model, **kwargs)
 class HuggingFaceChatFactory(BaseProviderFactory):
    def create(self, model: str, **kwargs: Any):
        from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline
        llm = HuggingFacePipeline.from_model_id(
            model_id=model,
            task="text-generation",
            pipeline_kwargs=kwargs,
        )
        return ChatHuggingFace(llm=llm)
 CHAT_FACTORIES: Dict[str, BaseProviderFactory] = {
    "openai": OpenAIChatFactory(),
    "ollama": OllamaChatFactory(),
    "bedrock": BedrockChatFactory(),
    "huggingface": HuggingFaceChatFactory(),
 }
 def create_chat_model(provider: str, model: str, **kwargs: Any):
    """
    Create a chat model instance for the given provider.
    Args:
        provider: The provider name (openai, ollama, bedrock, huggingface).
        model: The model identifier.
        **kwargs: Additional keyword arguments passed to the model constructor.
    Returns:
        A chat model instance.
    """
    key = provider.strip().lower()
    if key not in CHAT_FACTORIES:
        raise ValueError(
            f"Unsupported chat provider: {provider}. "
            f"Available providers: {list(CHAT_FACTORIES.keys())}"
        )
    return CHAT_FACTORIES[key].create(model=model, **kwargs)