Update changelog for version 1.2.0: add new modules, refactor server integration, and enhance dependency management
This commit is contained in:
parent
f15266f345
commit
183c04829c
24
changelog
24
changelog
|
|
@ -4,6 +4,30 @@ All notable changes to the **Brunix Assistance Engine** will be documented in th
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
## [1.2.0] - 2026-03-04
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- IMPLEMENTED:
|
||||||
|
- `utils/`: factory modules created for embedding model and LLM generation.
|
||||||
|
- `graph.py`: workflow graph orchestration module added.
|
||||||
|
- `prompts.py`: centralized prompt definitions added.
|
||||||
|
- `state.py`: shared state management module added.
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
- REFACTORED: `server.py` updated to integrate the new graph/state/prompt and utils-based architecture.
|
||||||
|
- DEPENDENCIES: `requirements.txt` updated with new libraries required by the new modules.
|
||||||
|
- BUILD/OPS: `Makefile` updated with commands:
|
||||||
|
- `ollama_local`
|
||||||
|
- `tunnels_down`
|
||||||
|
- `sync_data_down`
|
||||||
|
- `sync_data_up`
|
||||||
|
|
||||||
|
### Fixed
|
||||||
|
- RESOLVED: Command coverage and dependency consistency for local execution and data sync workflows.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## [1.1.0] - 2026-02-16
|
## [1.1.0] - 2026-02-16
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,412 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "0a8abbfa",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"True"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"import re\n",
|
||||||
|
"import uuid\n",
|
||||||
|
"from dataclasses import dataclass\n",
|
||||||
|
"from pathlib import Path\n",
|
||||||
|
"from typing import Any, Dict, List, Optional, Tuple\n",
|
||||||
|
"\n",
|
||||||
|
"import nltk\n",
|
||||||
|
"from elasticsearch import Elasticsearch\n",
|
||||||
|
"from langchain_core.documents import Document\n",
|
||||||
|
"from langchain_elasticsearch import ElasticsearchStore\n",
|
||||||
|
"from langchain_ollama import OllamaEmbeddings\n",
|
||||||
|
"from lark import Lark, Token, Transformer, Tree\n",
|
||||||
|
"from transformers import AutoConfig\n",
|
||||||
|
"\n",
|
||||||
|
"from src.config import (DATA_DIR, ELASTICSEARCH_CODE_INDEX,\n",
|
||||||
|
" ELASTICSEARCH_DOCS_INDEX, ELASTICSEARCH_INDEX,\n",
|
||||||
|
" ELASTICSEARCH_URL, HF_EMB_MODEL_NAME,\n",
|
||||||
|
" OLLAMA_EMB_MODEL_NAME, OLLAMA_LOCAL_URL,\n",
|
||||||
|
" OLLAMA_MODEL_NAME, OLLAMA_URL, PROJ_ROOT)\n",
|
||||||
|
"\n",
|
||||||
|
"nltk.download(\"punkt\", quiet=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"id": "5c9d292b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"config = AutoConfig.from_pretrained(HF_EMB_MODEL_NAME)\n",
|
||||||
|
"embedding_dim = config.hidden_size"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 20,
|
||||||
|
"id": "0e1cd9b9",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"grammar = (DATA_DIR / \"raw\" / \"code\" / \"EBNF_v2.txt\").read_text(\n",
|
||||||
|
" encoding=\"utf-8\"\n",
|
||||||
|
")\n",
|
||||||
|
"code = (DATA_DIR / \"raw\" / \"code\" / \"Code_Snippets_v1.txt\").read_text(\n",
|
||||||
|
" encoding=\"utf-8\"\n",
|
||||||
|
")\n",
|
||||||
|
"parser = Lark(grammar=grammar, parser=\"lalr\", propagate_positions=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "baa779f3",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Functions"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "89be8bf6",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"@dataclass\n",
|
||||||
|
"class Chunk:\n",
|
||||||
|
" text: str\n",
|
||||||
|
" kind: str\n",
|
||||||
|
" metadata: Dict[str, Any]\n",
|
||||||
|
"\n",
|
||||||
|
"def _span(node: Tree) -> Optional[Tuple[int, int]]:\n",
|
||||||
|
" m = node.meta\n",
|
||||||
|
" s = getattr(m, \"start_pos\", None)\n",
|
||||||
|
" e = getattr(m, \"end_pos\", None)\n",
|
||||||
|
" if s is None or e is None:\n",
|
||||||
|
" return None\n",
|
||||||
|
" return s, e\n",
|
||||||
|
"\n",
|
||||||
|
"def _iter_trees(t: Tree):\n",
|
||||||
|
" yield t\n",
|
||||||
|
" for c in t.children:\n",
|
||||||
|
" if isinstance(c, Tree):\n",
|
||||||
|
" yield from _iter_trees(c)\n",
|
||||||
|
"\n",
|
||||||
|
"def _cmd_name(line: str) -> Optional[str]:\n",
|
||||||
|
" m = re.match(r\"^\\s*([A-Za-z_][A-Za-z0-9_]*)\\s*\\(\", line)\n",
|
||||||
|
" return m.group(1) if m else None\n",
|
||||||
|
"\n",
|
||||||
|
"def chunk_atomic_lines(code: str) -> List[Chunk]:\n",
|
||||||
|
" tree = parser.parse(code)\n",
|
||||||
|
" chunks: List[Chunk] = []\n",
|
||||||
|
"\n",
|
||||||
|
" for node in _iter_trees(tree):\n",
|
||||||
|
" if node.data == \"stmt_line\":\n",
|
||||||
|
" sp = _span(node)\n",
|
||||||
|
" if not sp:\n",
|
||||||
|
" continue\n",
|
||||||
|
" s, e = sp\n",
|
||||||
|
" text = code[s:e].strip()\n",
|
||||||
|
" if not text:\n",
|
||||||
|
" continue\n",
|
||||||
|
"\n",
|
||||||
|
" chunks.append(\n",
|
||||||
|
" Chunk(\n",
|
||||||
|
" text=text,\n",
|
||||||
|
" kind=\"line\",\n",
|
||||||
|
" metadata={\n",
|
||||||
|
" \"granularity\": \"atomic\",\n",
|
||||||
|
" \"command\": _cmd_name(text)\n",
|
||||||
|
" }\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" return chunks\n",
|
||||||
|
"\n",
|
||||||
|
"def chunk_blocks(code: str) -> List[Chunk]:\n",
|
||||||
|
" tree = parser.parse(code)\n",
|
||||||
|
" chunks: List[Chunk] = []\n",
|
||||||
|
"\n",
|
||||||
|
" for node in _iter_trees(tree):\n",
|
||||||
|
" if node.data in (\"if_block\", \"loop_block\", \"try_block\", \"go_async_block\", \"function_block\"):\n",
|
||||||
|
" sp = _span(node)\n",
|
||||||
|
" if not sp:\n",
|
||||||
|
" continue\n",
|
||||||
|
" s, e = sp\n",
|
||||||
|
" text = code[s:e].strip()\n",
|
||||||
|
" if not text:\n",
|
||||||
|
" continue\n",
|
||||||
|
"\n",
|
||||||
|
" chunks.append(\n",
|
||||||
|
" Chunk(\n",
|
||||||
|
" text=text,\n",
|
||||||
|
" kind=node.data,\n",
|
||||||
|
" metadata={\"granularity\": \"block\"}\n",
|
||||||
|
" )\n",
|
||||||
|
" )\n",
|
||||||
|
" return chunks\n",
|
||||||
|
"\n",
|
||||||
|
"def chunk_avap_code(code: str) -> List[Chunk]:\n",
|
||||||
|
" # Keep original offsets: do NOT lstrip. Grammar already accepts leading _NL.\n",
|
||||||
|
" blocks = chunk_blocks(code)\n",
|
||||||
|
" lines = chunk_atomic_lines(code)\n",
|
||||||
|
" return blocks + lines"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "23a92e13",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# BNF "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 33,
|
||||||
|
"id": "19253100",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"code = \"\"\"\n",
|
||||||
|
" addVar(base, 1000)\n",
|
||||||
|
" addVar(copia, $base) // copia toma el valor 1000, no la cadena \"$base\"\n",
|
||||||
|
" addResult(copia)\n",
|
||||||
|
"\"\"\"\n",
|
||||||
|
"tree = parser.parse(code)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 34,
|
||||||
|
"id": "04bf9223",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'program\\n simple_stmt\\t addVar(base, 1000)\\n simple_stmt\\t addVar(copia, $base) // copia toma el valor 1000, no la cadena \"$base\"\\n simple_stmt\\t addResult(copia)\\n'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 34,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"tree.pretty()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 35,
|
||||||
|
"id": "b2999a98",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"chunks = chunk_avap_code(code)\n",
|
||||||
|
"\n",
|
||||||
|
"for c in chunks:\n",
|
||||||
|
" print(\"----\")\n",
|
||||||
|
" print(\"TYPE:\", c.kind)\n",
|
||||||
|
" print(\"TEXT:\\n\", c.text)\n",
|
||||||
|
" print(\"META:\", c.metadata)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "77f6c552",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Elastic Search"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 51,
|
||||||
|
"id": "09ce3e29",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"es = Elasticsearch(\n",
|
||||||
|
" ELASTICSEARCH_URL,\n",
|
||||||
|
" request_timeout=120,\n",
|
||||||
|
" max_retries=5,\n",
|
||||||
|
" retry_on_timeout=True,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 52,
|
||||||
|
"id": "d575c386",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"if es.indices.exists(index=ELASTICSEARCH_CODE_INDEX):\n",
|
||||||
|
" es.indices.delete(index=ELASTICSEARCH_CODE_INDEX)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 56,
|
||||||
|
"id": "40ea0af8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"avap-code\n",
|
||||||
|
"avap-docs-test\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"for index in es.indices.get(index=\"*\"):\n",
|
||||||
|
" print(index)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 54,
|
||||||
|
"id": "4e091b39",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"OllamaEmbeddings(model='qwen3-0.6B-emb:latest', validate_model_on_init=False, base_url='http://localhost:11434', client_kwargs={}, async_client_kwargs={}, sync_client_kwargs={}, mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, keep_alive=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 54,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"embeddings = OllamaEmbeddings(base_url=OLLAMA_LOCAL_URL, model=OLLAMA_EMB_MODEL_NAME)\n",
|
||||||
|
"embeddings"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 55,
|
||||||
|
"id": "5aff21c0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# index into Elasticsearch\n",
|
||||||
|
"db = ElasticsearchStore.from_documents(\n",
|
||||||
|
" code_chunks,\n",
|
||||||
|
" embeddings,\n",
|
||||||
|
" client=es,\n",
|
||||||
|
" index_name=ELASTICSEARCH_CODE_INDEX,\n",
|
||||||
|
" distance_strategy=\"COSINE\",\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "74c0a377",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"response = es.search(\n",
|
||||||
|
" index=ELASTICSEARCH_CODE_INDEX,\n",
|
||||||
|
" body={\n",
|
||||||
|
" \"query\": {\"match_all\": {}},\n",
|
||||||
|
" \"size\": 10 \n",
|
||||||
|
" }\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"for hit in response[\"hits\"][\"hits\"]:\n",
|
||||||
|
" print(\"ID:\", hit[\"_id\"])\n",
|
||||||
|
" print(\"Source:\", hit[\"_source\"])\n",
|
||||||
|
" print(\"-\" * 40)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "d823650e",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Retrive"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "5732a27d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"base_retriever = db.as_retriever(\n",
|
||||||
|
" search_type=\"similarity\",\n",
|
||||||
|
" search_kwargs={\"k\": 5}\n",
|
||||||
|
" ) \n",
|
||||||
|
"\n",
|
||||||
|
"docs = base_retriever.invoke(\"What reserved words does AVAP have?\")\n",
|
||||||
|
"docs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8706506f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"embeddings = OllamaEmbeddings(base_url=OLLAMA_URL, model=OLLAMA_EMB_MODEL_NAME)\n",
|
||||||
|
"\n",
|
||||||
|
"vector_store = ElasticsearchStore(\n",
|
||||||
|
" client=es,\n",
|
||||||
|
" index_name=ELASTICSEARCH_DOCS_INDEX,\n",
|
||||||
|
" embedding=embeddings,\n",
|
||||||
|
" query_field=\"text\",\n",
|
||||||
|
" vector_query_field=\"vector\",\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"results = vector_store.similarity_search_with_score(\n",
|
||||||
|
" query=\"What data types does AVAP have?\",\n",
|
||||||
|
" k=50\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"results"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "assistance-engine",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.12.11"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
||||||
|
|
@ -10,6 +10,8 @@ OLLAMA_URL=os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
||||||
OLLAMA_LOCAL_URL=os.getenv("OLLAMA_LOCAL_URL", "http://localhost:11434")
|
OLLAMA_LOCAL_URL=os.getenv("OLLAMA_LOCAL_URL", "http://localhost:11434")
|
||||||
OLLAMA_MODEL_NAME=os.getenv("OLLAMA_MODEL_NAME", "qwen3-0.6B:latest")
|
OLLAMA_MODEL_NAME=os.getenv("OLLAMA_MODEL_NAME", "qwen3-0.6B:latest")
|
||||||
OLLAMA_EMB_MODEL_NAME=os.getenv("OLLAMA_EMB_MODEL_NAME", "qwen3-0.6B-emb:latest")
|
OLLAMA_EMB_MODEL_NAME=os.getenv("OLLAMA_EMB_MODEL_NAME", "qwen3-0.6B-emb:latest")
|
||||||
|
ELASTICSEARCH_DOCS_INDEX = os.getenv("ELASTICSEARCH_DOCS_INDEX")
|
||||||
|
ELASTICSEARCH_CODE_INDEX = os.getenv("ELASTICSEARCH_CODE_INDEX")
|
||||||
|
|
||||||
LANGFUSE_HOST=os.getenv("LANGFUSE_HOST", "http://45.77.119.180")
|
LANGFUSE_HOST=os.getenv("LANGFUSE_HOST", "http://45.77.119.180")
|
||||||
LANGFUSE_PUBLIC_KEY=os.getenv("LANGFUSE_PUBLIC_KEY", "pk-lf-0e6db694-3e95-4dd4-aedf-5a2694267058")
|
LANGFUSE_PUBLIC_KEY=os.getenv("LANGFUSE_PUBLIC_KEY", "pk-lf-0e6db694-3e95-4dd4-aedf-5a2694267058")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue