ADR0008 Finished and ADR0009 finalizing
This commit is contained in:
parent
0b9c19d61f
commit
6af0a84f4c
|
|
@ -2,4 +2,4 @@
|
|||
# Do not delete.
|
||||
|
||||
folderID: wmoge-xmh3x
|
||||
created: 2026-04-12T14:02:49-07:00
|
||||
created: 2026-04-12T11:12:38-07:00
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -33,7 +33,7 @@ services:
|
|||
CLASSIFIER_SEED_DATASET: ${CLASSIFIER_SEED_DATASET}
|
||||
CLASSIFIER_MIN_CV_ACCURACY: ${CLASSIFIER_MIN_CV_ACCURACY}
|
||||
CLASSIFIER_HELD_OUT_RATIO: ${CLASSIFIER_HELD_OUT_RATIO}
|
||||
PROXY_THREAD_WORKERS: 10
|
||||
|
||||
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import json as _json
|
||||
import logging
|
||||
import os
|
||||
import re as _re
|
||||
|
|
@ -28,6 +29,7 @@ from prompts import (
|
|||
GENERATE_PROMPT,
|
||||
PLATFORM_PROMPT,
|
||||
REFORMULATE_PROMPT,
|
||||
TEST_GENERATION_PROMPT,
|
||||
)
|
||||
|
||||
from state import AgentState, ClassifyEntry
|
||||
|
|
@ -36,7 +38,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
# ── AVAP Parser client — ADR-0009 (PTVL) ──────────────────────────────────────
|
||||
|
||||
_PARSER_URL = os.getenv("AVAP_PARSER_URL", "")
|
||||
_PARSER_URL = os.getenv("AVAP_PARSER_URL", "http://45.77.193.144:8888")
|
||||
_PARSER_TIMEOUT = int(os.getenv("AVAP_PARSER_TIMEOUT", "2"))
|
||||
_CB_THRESHOLD = int(os.getenv("PARSER_CB_THRESHOLD", "3"))
|
||||
_CB_COOLDOWN = int(os.getenv("PARSER_CB_COOLDOWN", "30"))
|
||||
|
|
@ -87,23 +89,39 @@ class _CircuitBreaker:
|
|||
_parser_cb = _CircuitBreaker(_CB_THRESHOLD, _CB_COOLDOWN)
|
||||
|
||||
|
||||
def _strip_thinking(text: str) -> str:
|
||||
"""Remove qwen3 <think>...</think> blocks from LLM output."""
|
||||
text = _re.sub(r"<think>.*?</think>", "", text, flags=_re.DOTALL)
|
||||
# Also strip a lone closing tag if the model omitted the opening one
|
||||
text = _re.sub(r"</think>", "", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def _extract_avap_code(text: str) -> str:
|
||||
"""Return the first AVAP code block found in an LLM response."""
|
||||
text = _strip_thinking(text)
|
||||
|
||||
for pattern in (r'```avap\s*\n(.*?)```', r'```\s*\n(.*?)```', r'```(.*?)```'):
|
||||
m = _re.search(pattern, text, _re.DOTALL)
|
||||
if m:
|
||||
return m.group(1).strip()
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def _call_parser(text: str) -> tuple:
|
||||
def _call_parser(text: str, test_inputs: dict = None, test_list: list = None) -> tuple:
|
||||
"""Call AVAP Parser REST API.
|
||||
|
||||
Tries /api/v1/execute first (executes the code, catches runtime errors).
|
||||
If test_inputs/test_list are provided, also validates assertions.
|
||||
Falls back to /parse (AST-only validation) if /api/v1/execute is not available.
|
||||
|
||||
Returns:
|
||||
(True, "") — code valid
|
||||
(False, trace) — code invalid, trace contains the error
|
||||
(True, "") — code valid and executed successfully (assertions passed if provided)
|
||||
(False, trace) — code invalid, runtime error, or failed assertions
|
||||
(None, "") — parser unavailable or circuit open
|
||||
"""
|
||||
|
||||
if not _PARSER_URL or _PARSER_TIMEOUT == 0:
|
||||
return None, ""
|
||||
|
||||
|
|
@ -111,27 +129,120 @@ def _call_parser(text: str) -> tuple:
|
|||
return None, ""
|
||||
|
||||
code = _extract_avap_code(text)
|
||||
logger.info(f"[ptvl] extracted code ({len(code)} chars): {repr(code[:120])}")
|
||||
if not code.strip():
|
||||
return None, ""
|
||||
|
||||
base_url = _PARSER_URL.rstrip('/')
|
||||
|
||||
try:
|
||||
payload = {
|
||||
"code": code,
|
||||
"test_inputs": test_inputs or {},
|
||||
"test_list": test_list or [],
|
||||
}
|
||||
resp = _requests.post(
|
||||
f"{_PARSER_URL.rstrip('/')}/parse",
|
||||
json={"code": code},
|
||||
f"{base_url}/api/v1/run",
|
||||
json=payload,
|
||||
timeout=_PARSER_TIMEOUT,
|
||||
)
|
||||
|
||||
if resp.status_code == 404:
|
||||
# /api/v1/run not deployed yet — fall back to /parse
|
||||
logger.info("[ptvl] /api/v1/run not available, falling back to /parse")
|
||||
return _call_parser_parse(base_url, code)
|
||||
|
||||
data = resp.json()
|
||||
if data.get("valid", False):
|
||||
_parser_cb.success()
|
||||
return True, ""
|
||||
_parser_cb.success() # parser responded — it is healthy
|
||||
return False, data.get("error", "parse error")
|
||||
_parser_cb.success()
|
||||
logger.info(f"[ptvl] parser response: {data}")
|
||||
|
||||
if not data.get("success", False):
|
||||
error = data.get("error", "")
|
||||
if not error:
|
||||
failed_logs = [l for l in data.get("logs", []) if not l.get("success")]
|
||||
if failed_logs:
|
||||
error = failed_logs[0].get("error", "runtime error")
|
||||
return False, error or "runtime error"
|
||||
|
||||
# Execution succeeded — check assertion result if assertions were provided
|
||||
if test_list and not data.get("assertion_result", True):
|
||||
return False, "assertion failed: the code ran but did not produce the expected output"
|
||||
|
||||
return True, ""
|
||||
|
||||
except Exception as exc:
|
||||
_parser_cb.failure()
|
||||
logger.warning(f"[ptvl] parser call failed: {exc}")
|
||||
return None, ""
|
||||
|
||||
|
||||
def _call_parser_parse(base_url: str, code: str) -> tuple:
|
||||
"""AST-only fallback via /parse."""
|
||||
try:
|
||||
resp = _requests.post(
|
||||
f"{base_url}/parse",
|
||||
json={"code": code},
|
||||
timeout=_PARSER_TIMEOUT,
|
||||
)
|
||||
data = resp.json()
|
||||
_parser_cb.success()
|
||||
logger.info(f"[ptvl] parser response (/parse fallback): {data}")
|
||||
if data.get("valid", False):
|
||||
return True, ""
|
||||
return False, data.get("error", "parse error")
|
||||
except Exception as exc:
|
||||
_parser_cb.failure()
|
||||
logger.warning(f"[ptvl] /parse fallback failed: {exc}")
|
||||
return None, ""
|
||||
|
||||
|
||||
# ── Test generation helper — used by both build_graph and AskAgentStream ───────
|
||||
|
||||
def _run_generate_tests(user_request: str, generated_code: str, llm) -> tuple:
|
||||
"""Generate test_inputs + test_list for the given AVAP code.
|
||||
|
||||
Returns (test_inputs: dict, test_list: list). Never raises — falls back to
|
||||
({}, []) on timeout or any LLM/parse error so the caller can still validate.
|
||||
"""
|
||||
import concurrent.futures
|
||||
|
||||
def _run():
|
||||
prompt = "/no_think\n\n" + TEST_GENERATION_PROMPT.format(
|
||||
user_request=user_request,
|
||||
generated_code=generated_code,
|
||||
)
|
||||
resp = llm.invoke([SystemMessage(content=prompt)])
|
||||
raw = _strip_thinking(resp.content)
|
||||
logger.info(f"[generate_tests] raw output: {repr(raw[:200])}")
|
||||
|
||||
if raw.startswith("```"):
|
||||
raw = _re.sub(r"^```[a-z]*\n?", "", raw)
|
||||
raw = raw.rstrip("`").strip()
|
||||
|
||||
m = _re.search(r'\{.*\}', raw, _re.DOTALL)
|
||||
if m:
|
||||
raw = m.group(0)
|
||||
|
||||
data = _json.loads(raw)
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"expected JSON object, got {type(data).__name__}: {repr(raw[:80])}")
|
||||
return data.get("test_inputs", {}), data.get("test_list", [])
|
||||
|
||||
try:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex:
|
||||
future = ex.submit(_run)
|
||||
test_inputs, test_list = future.result(timeout=15)
|
||||
logger.info(f"[generate_tests] {len(test_list)} assertions generated")
|
||||
except concurrent.futures.TimeoutError:
|
||||
logger.warning("[generate_tests] timed out after 15s — skipping assertions")
|
||||
test_inputs, test_list = {}, []
|
||||
except Exception as exc:
|
||||
logger.warning(f"[generate_tests] skipped ({type(exc).__name__}): {exc}")
|
||||
test_inputs, test_list = {}, []
|
||||
|
||||
return test_inputs, test_list
|
||||
|
||||
|
||||
# ── Session stores ─────────────────────────────────────────────────────────────
|
||||
|
||||
session_store: dict[str, list] = defaultdict(list)
|
||||
|
|
@ -593,9 +704,18 @@ def build_graph(llm, embeddings, es_client, index_name, llm_conversational=None)
|
|||
)
|
||||
resp = llm.invoke([prompt] + state["messages"])
|
||||
logger.info(f"[generate_code] {len(resp.content)} chars")
|
||||
#logger.info(resp.content)
|
||||
_persist(state, resp)
|
||||
return {"messages": [resp]}
|
||||
|
||||
def generate_tests(state: AgentState) -> AgentState:
|
||||
user_msg = state["messages"][-2] if len(state["messages"]) >= 2 else state["messages"][-1]
|
||||
generated = state["messages"][-1]
|
||||
user_request = getattr(user_msg, "content", str(user_msg))
|
||||
generated_code = getattr(generated, "content", str(generated))
|
||||
test_inputs, test_list = _run_generate_tests(user_request, generated_code, llm)
|
||||
return {"test_inputs": test_inputs, "test_list": test_list}
|
||||
|
||||
def respond_conversational(state):
|
||||
extra_context = state.get("extra_context", "")
|
||||
if extra_context:
|
||||
|
|
@ -629,7 +749,11 @@ def build_graph(llm, embeddings, es_client, index_name, llm_conversational=None)
|
|||
def validate_code(state: AgentState) -> AgentState:
|
||||
last_msg = state["messages"][-1]
|
||||
content = getattr(last_msg, "content", str(last_msg))
|
||||
valid, trace = _call_parser(content)
|
||||
valid, trace = _call_parser(
|
||||
content,
|
||||
test_inputs=state.get("test_inputs") or {},
|
||||
test_list=state.get("test_list") or [],
|
||||
)
|
||||
if valid is None:
|
||||
logger.warning("[ptvl] parser unavailable — returning unvalidated")
|
||||
return {"validation_status": "PARSER_UNAVAILABLE", "parser_trace": ""}
|
||||
|
|
@ -645,9 +769,15 @@ def build_graph(llm, embeddings, es_client, index_name, llm_conversational=None)
|
|||
|
||||
feedback = (
|
||||
"\n\n<parser_feedback>\n"
|
||||
"The previous attempt produced invalid AVAP code. Specific failures:\n\n"
|
||||
"The previous attempt failed when the AVAP code was executed. "
|
||||
"The execution engine reported the following error:\n\n"
|
||||
f"{parser_trace}\n\n"
|
||||
"Correct these errors. Do not repeat the same constructs.\n"
|
||||
"Rules to fix it:\n"
|
||||
"- Read the error carefully — it identifies the exact command or line that failed.\n"
|
||||
"- Do NOT repeat the same construct that caused the error.\n"
|
||||
"- Only use commands from <avap_syntax_reminder> or <context>.\n"
|
||||
"- If the error mentions an unknown command, replace it with the correct AVAP equivalent.\n"
|
||||
"- If the error mentions a variable, make sure it is declared before use.\n"
|
||||
"</parser_feedback>"
|
||||
) if parser_trace else ""
|
||||
|
||||
|
|
@ -738,6 +868,7 @@ def build_graph(llm, embeddings, es_client, index_name, llm_conversational=None)
|
|||
graph_builder.add_node("retrieve", retrieve)
|
||||
graph_builder.add_node("generate", generate)
|
||||
graph_builder.add_node("generate_code", generate_code)
|
||||
graph_builder.add_node("generate_tests", generate_tests)
|
||||
graph_builder.add_node("validate_code", validate_code)
|
||||
graph_builder.add_node("generate_code_retry", generate_code_retry)
|
||||
graph_builder.add_node("validate_code_after_retry",validate_code_after_retry)
|
||||
|
|
@ -771,8 +902,9 @@ def build_graph(llm, embeddings, es_client, index_name, llm_conversational=None)
|
|||
}
|
||||
)
|
||||
|
||||
# CODE_GENERATION path: generate → validate → (retry if invalid) → END
|
||||
graph_builder.add_edge("generate_code", "validate_code")
|
||||
# CODE_GENERATION path: generate → generate_tests → validate → (retry if invalid) → END
|
||||
graph_builder.add_edge("generate_code", "generate_tests")
|
||||
graph_builder.add_edge("generate_tests", "validate_code")
|
||||
graph_builder.add_conditional_edges(
|
||||
"validate_code",
|
||||
route_after_validate,
|
||||
|
|
|
|||
|
|
@ -189,11 +189,12 @@ CODE_GENERATION_PROMPT = SystemMessage(
|
|||
"4. Write the MINIMUM code needed. No extra connectors, no unrelated variables.\n"
|
||||
"5. Add brief inline comments explaining each part.\n"
|
||||
"6. Answer in the same language the user used.\n"
|
||||
"7. Do NOT use registerEndpoint unless the user explicitly asks to configure, "
|
||||
"register, or set up an endpoint, API route, or HTTP handler. "
|
||||
"For all other requests, write the logic directly without endpoint registration.\n"
|
||||
"</critical_rules>\n\n"
|
||||
|
||||
"<avap_syntax_reminder>\n"
|
||||
"// Register an HTTP endpoint\n"
|
||||
"registerEndpoint(\"GET\", \"/path\", [], \"scope\", handlerFn, \"\")\n\n"
|
||||
"// Declare a function — uses curly braces, NOT end()\n"
|
||||
"function handlerFn() {{\n"
|
||||
" msg = \"Hello World\"\n"
|
||||
|
|
@ -220,7 +221,10 @@ CODE_GENERATION_PROMPT = SystemMessage(
|
|||
" // ...\n"
|
||||
"exception(errVar)\n"
|
||||
" // handle\n"
|
||||
"end()\n"
|
||||
"end()\n\n"
|
||||
"// Register an HTTP endpoint — USE ONLY when the user explicitly asks to\n"
|
||||
"// configure, register, or set up an endpoint, API route, or HTTP handler.\n"
|
||||
"registerEndpoint(\"GET\", \"/path\", [], \"scope\", handlerFn, \"\")\n"
|
||||
"</avap_syntax_reminder>\n\n"
|
||||
|
||||
"<task>\n"
|
||||
|
|
@ -237,6 +241,51 @@ CODE_GENERATION_PROMPT = SystemMessage(
|
|||
)
|
||||
)
|
||||
|
||||
TEST_GENERATION_PROMPT = (
|
||||
"<role>\n"
|
||||
"You are a test case generator for AVAP code. "
|
||||
"Given a user request and the AVAP code that was generated, "
|
||||
"produce minimal test inputs and assertions to verify the code behaves correctly.\n"
|
||||
"</role>\n\n"
|
||||
|
||||
"<avap_variables_rule>\n"
|
||||
"In AVAP, variables assigned during execution are available after execution.\n"
|
||||
"Two distinct naming roles exist — do NOT confuse them:\n\n"
|
||||
"1. addParam(\"request_param_name\", avap_variable_name)\n"
|
||||
" - \"request_param_name\" (first arg, a string literal) is the HTTP request parameter. "
|
||||
"Use it as the KEY in test_inputs.\n"
|
||||
" - avap_variable_name (second arg, an identifier) is the AVAP variable that receives "
|
||||
"the value. Use it (unquoted) in assertions.\n\n"
|
||||
" Example: addParam(\"client_id\", id_interno)\n"
|
||||
" → test_inputs key: \"client_id\"\n"
|
||||
" → assertion variable: id_interno\n\n"
|
||||
"2. Direct assignments (e.g. msg = \"Hello\", result = a + b) — use the left-hand "
|
||||
"variable name (unquoted) in assertions. These variables need no test_inputs entry.\n"
|
||||
"</avap_variables_rule>\n\n"
|
||||
|
||||
"<assertion_format>\n"
|
||||
"Each assertion must be a QUOTED JSON STRING in this exact format:\n"
|
||||
" \"re.match(r'<regex_pattern>', str(<avap_variable_name>))\"\n"
|
||||
"Where:\n"
|
||||
"- The entire expression is wrapped in double quotes — it is a JSON string.\n"
|
||||
"- <regex_pattern> is a regex that matches the expected value.\n"
|
||||
"- <avap_variable_name> is the AVAP variable identifier (NOT the request param name, NOT quoted).\n"
|
||||
"</assertion_format>\n\n"
|
||||
|
||||
"<output_rule>\n"
|
||||
"Output ONLY a valid JSON object with exactly two keys. "
|
||||
"Every item in test_list MUST be a quoted string. Raw JSON only — "
|
||||
"no explanation, no markdown, no code block.\n\n"
|
||||
"Example — code uses addParam(\"client_id\", id_interno):\n"
|
||||
"{{\"test_inputs\": {{\"client_id\": \"12345\"}}, "
|
||||
"\"test_list\": [\"re.match(r'^\\\\d+$', str(id_interno))\"]}}\n"
|
||||
"Note: test_inputs key is \"client_id\" (request param), assertion uses id_interno (AVAP variable).\n"
|
||||
"</output_rule>\n\n"
|
||||
|
||||
"<user_request>{user_request}</user_request>\n\n"
|
||||
"<generated_code>{generated_code}</generated_code>"
|
||||
)
|
||||
|
||||
CONVERSATIONAL_PROMPT = SystemMessage(
|
||||
content=(
|
||||
"<role>\n"
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from langchain_core.messages import AIMessage, SystemMessage
|
|||
|
||||
from utils.llm_factory import create_chat_model
|
||||
from utils.emb_factory import create_embedding_model
|
||||
from graph import build_graph, build_prepare_graph, build_final_messages, session_store, classify_history_store, _load_layer2_model, _call_parser, _extract_avap_code
|
||||
from graph import build_graph, build_prepare_graph, build_final_messages, session_store, classify_history_store, _load_layer2_model, _call_parser, _extract_avap_code, _run_generate_tests
|
||||
from utils.classifier_export import maybe_export, force_export
|
||||
|
||||
from evaluate import run_evaluation
|
||||
|
|
@ -302,7 +302,11 @@ class BrunixEngine(brunix_pb2_grpc.AssistanceEngineServicer):
|
|||
complete_block = code_buffer[:close_pos + 3]
|
||||
rest = code_buffer[close_pos + 3:]
|
||||
|
||||
valid, trace = _call_parser(complete_block)
|
||||
# Generate tests before validation so the parser can
|
||||
# execute the code with real inputs and check assertions.
|
||||
_avap_code = _extract_avap_code(complete_block)
|
||||
_ti, _tl = _run_generate_tests(query, _avap_code, active_llm)
|
||||
valid, trace = _call_parser(complete_block, test_inputs=_ti, test_list=_tl)
|
||||
|
||||
if valid is False:
|
||||
# Ask LLM to fix only the code block
|
||||
|
|
@ -318,7 +322,7 @@ class BrunixEngine(brunix_pb2_grpc.AssistanceEngineServicer):
|
|||
fixed_code = _extract_avap_code(fix_resp.content)
|
||||
fixed_block = f"{fence_open}\n{fixed_code}\n```"
|
||||
|
||||
valid2, _ = _call_parser(fixed_block)
|
||||
valid2, _ = _call_parser(fixed_block, test_inputs=_ti, test_list=_tl)
|
||||
if valid2 is False:
|
||||
to_yield = fixed_block
|
||||
validation_status = "INVALID_UNRESOLVED"
|
||||
|
|
|
|||
|
|
@ -28,4 +28,7 @@ class AgentState(TypedDict):
|
|||
# -- PTVL (ADR-0009)
|
||||
parser_trace: str # raw parser error trace from first validation (empty if valid)
|
||||
validation_status: str # "" | "INVALID_UNRESOLVED" | "PARSER_UNAVAILABLE"
|
||||
context_relevant: bool # result of CONFIDENCE_PROMPT check (RETRIEVAL only)
|
||||
context_relevant: bool # result of CONFIDENCE_PROMPT check (RETRIEVAL only)
|
||||
# -- TEST GENERATION
|
||||
test_inputs: dict # variables injected when executing generated code
|
||||
test_list: list # regex assertions validated against output variables
|
||||
19
README.md
19
README.md
|
|
@ -62,7 +62,8 @@ graph TD
|
|||
│ │ ├── golden_dataset.json # Ground-truth Q&A dataset for EvaluateRAG
|
||||
│ │ └── utils/
|
||||
│ │ ├── emb_factory.py # Provider-agnostic embedding model factory
|
||||
│ │ └── llm_factory.py # Provider-agnostic LLM factory
|
||||
│ │ ├── llm_factory.py # Provider-agnostic LLM factory
|
||||
│ │ └── classifier_export.py # Exports classify_history to JSONL; triggers retraining
|
||||
│ ├── tests/
|
||||
│ │ └── test_prd_0002.py # Unit tests — editor context, classifier, proxy parsing
|
||||
│ ├── Dockerfile # Multi-stage container build
|
||||
|
|
@ -82,7 +83,12 @@ graph TD
|
|||
│ │ ├── ADR-0002-two-phase-streaming.md
|
||||
│ │ ├── ADR-0003-hybrid-retrieval-rrf.md
|
||||
│ │ ├── ADR-0004-claude-eval-judge.md
|
||||
│ │ └── ADR-0005-embedding-model-selection.md
|
||||
│ │ ├── ADR-0005-embedding-model-selection.md
|
||||
│ │ ├── ADR-0006-reward-algorithm-dataset-synthesis.md
|
||||
│ │ ├── ADR-0007-mandatory-syntactic-validation-layer.md
|
||||
│ │ ├── ADR-0008-adaptive-query-routing-intent-history.md
|
||||
│ │ ├── ADR-0009-per-type-response-validation.md
|
||||
│ │ └── ADR-0010-classifier-continuous-retraining.md
|
||||
│ └── product/ # Product Requirements Documents
|
||||
│ ├── PRD-0001-openai-compatible-proxy.md
|
||||
│ └── PRD-0002-editor-context-injection.md
|
||||
|
|
@ -113,6 +119,11 @@ graph TD
|
|||
│ │ ├── embeddings.py # OllamaEmbeddings adapter (Chonkie-compatible)
|
||||
│ │ └── prompts.py # Prompt templates for pipeline LLM calls
|
||||
│ │
|
||||
│ ├── classifier/ # [PIPELINE C] Classifier retraining pipeline
|
||||
│ │ ├── retrain_pipeline.py # Champion/Challenger training, evaluation & promotion
|
||||
│ │ ├── seed_classifier_dataset.jsonl # 204 hand-crafted bilingual seed examples
|
||||
│ │ └── README.md # Classifier pipeline reference
|
||||
│ │
|
||||
│ └── ingestion/ # [PIPELINE B] AVAP-native classic ingestion
|
||||
│ ├── avap_chunker.py # Custom AVAP lexer + chunker (MinHash dedup, overlaps)
|
||||
│ ├── avap_ingestor.py # Async ES ingestor with DLQ (producer/consumer pattern)
|
||||
|
|
@ -343,6 +354,7 @@ HF_TOKEN=hf_...
|
|||
HF_EMB_MODEL_NAME=Qwen/Qwen3-Embedding-0.6B
|
||||
ANTHROPIC_API_KEY=sk-ant-...
|
||||
ANTHROPIC_MODEL=claude-sonnet-4-20250514
|
||||
PARSER_URL=http://host.docker.internal:8888
|
||||
```
|
||||
|
||||
| Variable | Required | Description | Example |
|
||||
|
|
@ -366,6 +378,7 @@ ANTHROPIC_MODEL=claude-sonnet-4-20250514
|
|||
| `HF_EMB_MODEL_NAME` | Yes | HuggingFace embeddings model name | `Qwen/Qwen3-Embedding-0.6B` |
|
||||
| `ANTHROPIC_API_KEY` | Yes* | Anthropic API key — required for the `EvaluateRAG` endpoint | `sk-ant-...` |
|
||||
| `ANTHROPIC_MODEL` | No | Claude model used by the RAG evaluation suite | `claude-sonnet-4-20250514` |
|
||||
| `PARSER_URL` | No | AVAP parser REST API base URL — used by PTVL for code execution and assertion validation | `http://host.docker.internal:8888` |
|
||||
|
||||
> Never commit real secret values. Use placeholder values when sharing configuration examples.
|
||||
|
||||
|
|
@ -663,7 +676,7 @@ For the full set of contribution standards, see [CONTRIBUTING.md](./CONTRIBUTING
|
|||
| [docs/API_REFERENCE.md](./docs/API_REFERENCE.md) | Complete gRPC API contract, message types, client examples |
|
||||
| [docs/RUNBOOK.md](./docs/RUNBOOK.md) | Operational playbooks, health checks, incident response |
|
||||
| [docs/AVAP_CHUNKER_CONFIG.md](./docs/AVAP_CHUNKER_CONFIG.md) | `avap_config.json` reference — blocks, statements, semantic tags, how to extend |
|
||||
| [docs/ADR/](./docs/ADR/) | Architecture Decision Records |
|
||||
| [docs/ADR/](./docs/ADR/) | Architecture Decision Records (ADR-0001 through ADR-0010) |
|
||||
| [docs/product/](./docs/product/) | Product Requirements Documents |
|
||||
| [research/](./research/) | Experiment results, benchmarks, and datasets |
|
||||
|
||||
|
|
|
|||
27
changelog
27
changelog
|
|
@ -3,6 +3,33 @@
|
|||
All notable changes to the **Brunix Assistance Engine** will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
---
|
||||
## [1.7.0] - 2026-04-13
|
||||
|
||||
### Added
|
||||
- ENGINE: Added `generate_tests` LangGraph node — after `generate_code`, invokes the LLM with `TEST_GENERATION_PROMPT` to produce `test_inputs` (variable dict) and `test_list` (regex assertion array) used to validate generated code at runtime. Runs under a 15-second `ThreadPoolExecutor` timeout to prevent blocking.
|
||||
- ENGINE: Added `TEST_GENERATION_PROMPT` to `prompts.py` — generates structured JSON with `test_inputs` and `test_list` from the user request and the generated AVAP code. Assertions use `re.match(r'<pattern>', str(<var>))` format evaluated after code execution.
|
||||
- ENGINE: Added `_strip_thinking()` utility in `graph.py` — strips `<think>...</think>` blocks and orphaned `</think>` tags from qwen3 thinking-mode output before code extraction or test parsing.
|
||||
- ENGINE: Upgraded AVAP parser integration from `/parse` (AST-only) to `/api/v1/execute` (execution + assertion validation). Payload now includes `test_inputs` and `test_list` from state. Falls back to `/parse` on HTTP 404.
|
||||
- ENGINE: Added parser response logging (`[ptvl] parser response: ...`) for observability of execution and assertion outcomes.
|
||||
- DATA: Expanded classifier seed dataset (`seed_classifier_dataset.jsonl`) from 95 to 204 examples. Added 100 Spanish-language examples covering all four intent categories, with emphasis on interrogative `CODE_GENERATION` patterns (`como seria`, `como haria`, `puedes escribir`, `muéstrame`, `necesito`).
|
||||
|
||||
### Changed
|
||||
- ENGINE: `CODE_GENERATION_PROMPT` — added rule 7 suppressing `registerEndpoint` unless the user explicitly asks to configure, register, or set up an endpoint. Moved `registerEndpoint` syntax reference to end of `<avap_syntax_reminder>` with conditional comment.
|
||||
- ENGINE: `AgentState` — added `test_inputs: dict` and `test_list: list` fields to carry generated test data between `generate_tests` and `validate_code` nodes.
|
||||
- ENGINE: LangGraph `build_graph` wiring updated: `generate_code → generate_tests → validate_code` (was `generate_code → validate_code`).
|
||||
- ENGINE: `_call_parser()` signature extended — accepts `test_inputs` and `test_list` params, passes them as JSON payload to `/api/v1/execute`. Parser payload key changed from `variables` to `test_inputs`.
|
||||
- ENGINE: `generate_code_retry` feedback message updated to reference runtime execution errors (not just syntax errors).
|
||||
- DOCS: Updated `docs/ADR/ADR-0009-per-type-response-validation.md` — full rewrite of Decision section with three-level validation flow, `generate_tests` node documentation, parser protocol (primary + fallback), `_strip_thinking()` utility, updated `AgentState` fields, and updated consequences.
|
||||
- DOCS: Updated `docs/ARCHITECTURE.md` — version 1.7.x, related ADRs, component inventory with `generate_tests`, updated `build_graph` flowchart, RC-07/RC-08 routing contract entries, all PTVL `AgentState` fields.
|
||||
|
||||
### Fixed
|
||||
- ENGINE: Fixed qwen3 thinking mode leaking `<think>...</think>` and `</think>` tags into generated code and test output — resolved by `_strip_thinking()` applied before code extraction and JSON parsing.
|
||||
- ENGINE: Fixed `KeyError: 'test_inputs'` — parser payload was using key `variables`; updated to `test_inputs` to match `/api/v1/execute` contract.
|
||||
- ENGINE: Fixed `generate_tests` silently skipping — `TEST_GENERATION_PROMPT` contained literal `{` and `}` in example JSON which Python `.format()` interpreted as placeholders. Fixed by escaping all literal braces as `{{` and `}}`.
|
||||
- ENGINE: Fixed classifier misclassifying Spanish interrogative code requests (`como seria un API...`) as `RETRIEVAL` — root cause was English-only seed dataset with no interrogative training examples. Fixed by expanding seed dataset with bilingual examples.
|
||||
|
||||
---
|
||||
|
||||
## [1.6.2] - 2026-03-26
|
||||
### Changed
|
||||
- RESEARCH: updated `embeddings/Embedding model selection.pdf`.
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -112,7 +112,9 @@ Establish the **Mandatory Syntactic Validation Layer (MSVL)** as a non-optional
|
|||
|
||||
### 1. Parser integration in `EvaluateRAG`
|
||||
|
||||
Every code block in a generated response must be submitted to the AVAP Parser via gRPC before RAGAS scoring. The parser returns a binary result: `VALID` or `INVALID` with a failure category (`unknown_token`, `unexpected_construct`, `foreign_keyword`, `syntax_error`).
|
||||
Every code block in a generated response must be submitted to the AVAP Parser before RAGAS scoring. The parser returns a binary result: `VALID` or `INVALID`.
|
||||
|
||||
**Implementation note (2026-04-12):** the AVAP Parser exposes a **REST HTTP API** (Tornado, port 8888), not gRPC as originally anticipated. The call contract is `POST /parse` with body `{"code": "..."}`, returning `{"valid": true/false, "error": "..."}`. The production PTVL (ADR-0009) uses this REST interface. The evaluation pipeline integration should use the same interface.
|
||||
|
||||
### 2. `syntactic_validity` as an independent metric
|
||||
|
||||
|
|
@ -132,7 +134,7 @@ final_answer_relevancy(entry) =
|
|||
|
||||
### 3. Parser SLA and fallback policy
|
||||
|
||||
The AVAP Parser gRPC service must respond within 2 seconds per call. If the parser is unreachable or times out, the evaluation run is **aborted** with an explicit error. Silent fallback to RAGAS-only scoring is prohibited.
|
||||
The AVAP Parser REST service must respond within 2 seconds per call. If the parser is unreachable or times out, the evaluation run is **aborted** with an explicit error. Silent fallback to RAGAS-only scoring is prohibited.
|
||||
|
||||
```python
|
||||
if parser_status == UNAVAILABLE:
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
# ADR-0009: Per-Type Response Validation Layer
|
||||
|
||||
**Date:** 2026-04-10
|
||||
**Status:** Accepted
|
||||
**Last updated:** 2026-04-13
|
||||
**Status:** Implemented
|
||||
**Deciders:** Rafael Ruiz (CTO)
|
||||
**Related ADRs:** ADR-0007 (MSVL for RAG Evaluation), ADR-0008 (Adaptive Query Routing), ADR-0003 (Hybrid Retrieval RRF)
|
||||
|
||||
|
|
@ -47,14 +48,20 @@ Add a **Per-Type Response Validation Layer (PTVL)** to the production LangGraph
|
|||
|
||||
| Type | When | What | Mechanism |
|
||||
|---|---|---|---|
|
||||
| `CODE_GENERATION` | Post-generation | Syntactic validity of generated AVAP code | AVAP Parser gRPC — deterministic |
|
||||
| `CODE_GENERATION` | Post-generation | Syntax + execution + assertion correctness | AVAP Parser REST HTTP — `/api/v1/execute` with `/parse` fallback |
|
||||
| `RETRIEVAL` | Pre-generation | Relevance of retrieved context to the query | LLM relevance check — `CONFIDENCE_PROMPT_TEMPLATE` |
|
||||
| `CONVERSATIONAL` | None | — | No retrieval, no code generated |
|
||||
| `PLATFORM` | None | — | No retrieval, no code generated |
|
||||
|
||||
---
|
||||
|
||||
### Decision 1 — CODE_GENERATION: parser validation with trace-guided retry
|
||||
### Decision 1 — CODE_GENERATION: three-level validation with trace-guided retry
|
||||
|
||||
Validation operates at three levels in order of depth:
|
||||
|
||||
1. **Syntax** — can the code be parsed into a valid AST?
|
||||
2. **Execution** — does the code run without runtime errors?
|
||||
3. **Assertions** — does the code produce the expected output?
|
||||
|
||||
#### Flow
|
||||
|
||||
|
|
@ -62,46 +69,141 @@ Add a **Per-Type Response Validation Layer (PTVL)** to the production LangGraph
|
|||
generate_code node
|
||||
│
|
||||
▼
|
||||
[V1] AVAP Parser gRPC
|
||||
generate_tests node
|
||||
[LLM generates test_inputs + test_list from code + user request]
|
||||
│
|
||||
├── VALID ──────────────────────────────► return response
|
||||
▼
|
||||
validate_code node
|
||||
│
|
||||
└── INVALID + line-by-line trace
|
||||
│
|
||||
▼
|
||||
[inject trace into retry prompt]
|
||||
│
|
||||
▼
|
||||
generate_code_retry node (1 attempt only)
|
||||
│
|
||||
▼
|
||||
[V2] AVAP Parser gRPC
|
||||
│
|
||||
├── VALID ──────────────────────► return response
|
||||
│
|
||||
└── INVALID ────────────────────► return response + validation_status flag
|
||||
▼
|
||||
[V1] AVAP Parser POST /api/v1/execute
|
||||
{"code": "...", "test_inputs": {...}, "test_list": [...]}
|
||||
│
|
||||
├── success=true + assertion_result=true ──► return response
|
||||
│
|
||||
├── success=false (runtime error) ──────────┐
|
||||
│ │
|
||||
└── assertion_result=false ─────────────────┤
|
||||
│
|
||||
[inject error into retry prompt]
|
||||
│
|
||||
▼
|
||||
generate_code_retry node (1 attempt only)
|
||||
│
|
||||
▼
|
||||
validate_code_after_retry
|
||||
│
|
||||
[V2] AVAP Parser /api/v1/execute
|
||||
│
|
||||
├── VALID ──────► return response
|
||||
└── INVALID ────► return response + validation_status flag
|
||||
```
|
||||
|
||||
**Fallback path:** If `/api/v1/execute` returns 404 (endpoint not yet deployed), validation falls back to `POST /parse` for AST-only syntax checking. The fallback is transparent — no configuration change required.
|
||||
|
||||
#### generate_tests node
|
||||
|
||||
Before validation, a dedicated LLM call generates test cases from the user's original request and the generated code:
|
||||
|
||||
```python
|
||||
# Input to TEST_GENERATION_PROMPT
|
||||
user_request: "como seria una api que reciba un parametro y lo devuelva?"
|
||||
generated_code: "addParam(\"client_id\", id_interno)\naddResult(id_interno)"
|
||||
|
||||
# Output
|
||||
{
|
||||
"test_inputs": {"client_id": "12345"},
|
||||
"test_list": ["re.match(r'^\\d{5}$', str(id_interno))"]
|
||||
}
|
||||
```
|
||||
|
||||
`test_inputs` are injected as request variables when the parser executes the code. `test_list` items are regex assertions evaluated against the output variables after execution.
|
||||
|
||||
If the LLM call fails or times out (15s hard limit), `generate_tests` returns empty `test_inputs` and `test_list` — validation continues using execution-only (no assertions). This keeps the node non-blocking.
|
||||
|
||||
#### Trace-guided retry
|
||||
|
||||
The parser trace is injected into the generation prompt as a structured correction context:
|
||||
The parser error is injected into the retry prompt as a structured correction context:
|
||||
|
||||
```
|
||||
<parser_feedback>
|
||||
The previous attempt produced invalid AVAP code. Specific failures:
|
||||
The previous attempt failed when the AVAP code was executed.
|
||||
The execution engine reported the following error:
|
||||
|
||||
Line 3: unknown command 'getSHA256' — expected known identifier
|
||||
Line 7: unexpected construct 'for i in range(...)' — AVAP loop syntax required
|
||||
[error from /api/v1/execute logs]
|
||||
|
||||
Correct these errors. Do not repeat the same constructs.
|
||||
Rules to fix it:
|
||||
- Read the error carefully — it identifies the exact command or line that failed.
|
||||
- Do NOT repeat the same construct that caused the error.
|
||||
- Only use commands from <avap_syntax_reminder> or <context>.
|
||||
- If the error mentions an unknown command, replace it with the correct AVAP equivalent.
|
||||
- If the error mentions a variable, make sure it is declared before use.
|
||||
</parser_feedback>
|
||||
```
|
||||
|
||||
This is not a blind retry. The LLM receives the exact failure points and can target its corrections. ADR-0007 documented the mapping between common hallucinated commands and their valid AVAP equivalents (`getSHA256` → `encodeSHA256`, `returnResult` → `addResult`, etc.) — the trace makes these corrections automatic without hardcoding the mapping.
|
||||
This is not a blind retry. The LLM receives the exact runtime failure and can target its corrections.
|
||||
|
||||
#### qwen3 thinking stripping
|
||||
|
||||
The engine strips `<think>...</think>` blocks from all LLM outputs before passing code to the parser. `qwen3` models emit thinking content by default; without stripping, the parser receives the full response including reasoning text and fails to find valid AVAP constructs.
|
||||
|
||||
```python
|
||||
def _strip_thinking(text: str) -> str:
|
||||
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
|
||||
text = re.sub(r"</think>", "", text) # lone closing tag
|
||||
return text.strip()
|
||||
```
|
||||
|
||||
This runs before code block extraction in `_extract_avap_code` and before JSON parsing in `generate_tests`.
|
||||
|
||||
#### Parser protocol
|
||||
|
||||
The AVAP Parser exposes a **REST HTTP API** (Tornado, port 8888). Two endpoints are used:
|
||||
|
||||
**Primary — `/api/v1/execute`** (execution + assertions):
|
||||
|
||||
```
|
||||
POST {AVAP_PARSER_URL}/api/v1/execute
|
||||
Content-Type: application/json
|
||||
|
||||
{
|
||||
"code": "<AVAP code>",
|
||||
"test_inputs": {"param": "value"},
|
||||
"test_list": ["re.match(r'^pattern$', str(variable))"]
|
||||
}
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"result": [...],
|
||||
"variables": {"param": "value", ...},
|
||||
"assertion_result": true,
|
||||
"logs": [{"command": "...", "duration_ms": 1.2, "success": true}]
|
||||
}
|
||||
```
|
||||
|
||||
**Fallback — `/parse`** (AST-only, when `/api/v1/execute` returns 404):
|
||||
|
||||
```
|
||||
POST {AVAP_PARSER_URL}/parse
|
||||
Content-Type: application/json
|
||||
|
||||
{"code": "<AVAP code>"}
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{"valid": true, "ast": {...}}
|
||||
{"valid": false, "error": "..."}
|
||||
```
|
||||
|
||||
Code is extracted from the LLM response by scanning for the first markdown code block (` ```avap ` or generic ` ``` `). `<think>` content is stripped before extraction. The fence markers are stripped before sending to the parser.
|
||||
|
||||
#### Parser SLA
|
||||
|
||||
Inherited from ADR-0007: ≤2 seconds per call. **Silent fallback is permitted in production** (unlike evaluation, where ADR-0007 mandates abort). The distinction is that evaluation scores must be trustworthy; production responses degrade gracefully.
|
||||
≤2 seconds per call (`AVAP_PARSER_TIMEOUT`). **Silent fallback is permitted in production** (unlike evaluation, where ADR-0007 mandates abort). The distinction is that evaluation scores must be trustworthy; production responses degrade gracefully.
|
||||
|
||||
#### Parser availability — circuit breaker
|
||||
|
||||
|
|
@ -135,8 +237,8 @@ Setting `AVAP_PARSER_TIMEOUT=0` permanently opens the circuit — disables parse
|
|||
#### New environment variables
|
||||
|
||||
```
|
||||
AVAP_PARSER_URL=grpc://... # URL of AVAP Parser gRPC service
|
||||
AVAP_PARSER_TIMEOUT=2 # seconds per call; 0 = disable validation
|
||||
AVAP_PARSER_URL=http://... # URL of AVAP Parser REST service (e.g. http://45.77.193.144:8888)
|
||||
AVAP_PARSER_TIMEOUT=2 # seconds per call; 0 = disable validation entirely
|
||||
PARSER_CB_THRESHOLD=3 # consecutive failures before circuit opens
|
||||
PARSER_CB_COOLDOWN=30 # seconds before circuit attempts half-open probe
|
||||
```
|
||||
|
|
@ -189,16 +291,85 @@ Reformulate this query using broader terms or alternative phrasing.
|
|||
|
||||
---
|
||||
|
||||
---
|
||||
|
||||
### Decision 3 — AskAgentStream: streaming state machine for CODE_GENERATION
|
||||
|
||||
The `AskAgentStream` path streams tokens directly to the client. Post-generation validation (Decision 1) cannot run here because tokens are already yielded before the response is complete.
|
||||
|
||||
**Decision:** implement a **streaming state machine** that operates inline on the token stream. The machine has two states: TEXT and CODE.
|
||||
|
||||
#### Flow
|
||||
|
||||
```
|
||||
LLM token stream
|
||||
│
|
||||
▼
|
||||
STATE: TEXT
|
||||
→ yield token to client immediately
|
||||
→ detect ``` fence in lookahead buffer (2-char safety window for split tokens)
|
||||
│ ``` detected
|
||||
▼
|
||||
STATE: CODE (buffering — nothing yielded to client)
|
||||
→ accumulate tokens in code_buffer
|
||||
→ detect closing ``` after first newline
|
||||
│ closing ``` detected
|
||||
▼
|
||||
_call_parser(complete_block)
|
||||
│
|
||||
├── VALID ──────────────────────────────► yield code block → back to TEXT
|
||||
│
|
||||
├── INVALID
|
||||
│ │
|
||||
│ ▼
|
||||
│ LLM fix call (fix only the code block, not the full response)
|
||||
│ "Fix this AVAP code: {trace}"
|
||||
│ │
|
||||
│ ▼
|
||||
│ _call_parser(fixed_block)
|
||||
│ ├── VALID ──────────────────────► yield fixed block → back to TEXT
|
||||
│ ├── INVALID ────────────────────► yield fixed block + INVALID_UNRESOLVED → back to TEXT
|
||||
│ └── UNAVAILABLE ────────────────► yield fixed block + PARSER_UNAVAILABLE → back to TEXT
|
||||
│
|
||||
└── UNAVAILABLE ────────────────────────► yield block as-is + PARSER_UNAVAILABLE → back to TEXT
|
||||
```
|
||||
|
||||
#### Key properties
|
||||
|
||||
- Text before and after the code block streams to the client without delay.
|
||||
- Only the code block itself introduces latency (one parser call, optionally one LLM fix call + one parser call).
|
||||
- The fix call asks the LLM to correct **only the code block**, not the full response — the text already streamed to the client remains valid.
|
||||
- If the response contains multiple code blocks, each block is processed independently in sequence. `validation_status` in the final `is_final=True` message reflects the last block validated.
|
||||
- If the stream ends while still in CODE mode (malformed response without closing fence), the buffer is flushed as-is.
|
||||
|
||||
#### Difference from AskAgent path
|
||||
|
||||
| Property | AskAgent | AskAgentStream |
|
||||
|---|---|---|
|
||||
| Validation point | Post-generation, pre-delivery | Inline, at code block boundary |
|
||||
| Retry mechanism | Full `generate_code_retry` LangGraph node | Targeted LLM fix call (code block only) |
|
||||
| Text streaming | N/A (non-streaming) | Uninterrupted |
|
||||
| `validation_status` delivery | In `AgentResponse` (only response) | In final `AgentResponse` (`is_final=True`) |
|
||||
|
||||
---
|
||||
|
||||
## Graph changes
|
||||
|
||||
### New nodes
|
||||
|
||||
| Node | Graph | Trigger |
|
||||
|---|---|---|
|
||||
| `validate_code` | `build_graph` | After `generate_code` |
|
||||
| `generate_tests` | `build_graph` | After `generate_code` — generates `test_inputs` + `test_list` |
|
||||
| `validate_code` | `build_graph` | After `generate_tests` |
|
||||
| `generate_code_retry` | `build_graph` | After `validate_code` when INVALID |
|
||||
| `check_context_relevance` | `build_graph` + `build_prepare_graph` | After `retrieve`, before `generate` (RETRIEVAL only) |
|
||||
| `validate_code_after_retry` | `build_graph` | After `generate_code_retry` |
|
||||
| `check_context_relevance` | `build_graph` + `build_prepare_graph` | After `retrieve`, RETRIEVAL only |
|
||||
| `reformulate_with_hint` | `build_graph` + `build_prepare_graph` | After `check_context_relevance` when NO |
|
||||
| `retrieve_retry` | `build_graph` + `build_prepare_graph` | After `reformulate_with_hint` |
|
||||
|
||||
### AskAgentStream
|
||||
|
||||
No new LangGraph nodes. The validation logic runs as a **streaming state machine** inline in `server.py:AskAgentStream`. See Decision 3.
|
||||
|
||||
### Updated flow — `build_graph`
|
||||
|
||||
|
|
@ -221,10 +392,11 @@ flowchart TD
|
|||
RH --> RT2[retrieve retry]
|
||||
RT2 --> GE
|
||||
|
||||
GC --> VC{validate_code\nParser gRPC}
|
||||
VC -->|VALID| END([end])
|
||||
VC -->|INVALID + trace| GCR[generate_code_retry\ntrace-guided]
|
||||
GCR --> VC2{validate_code\nParser gRPC}
|
||||
GC --> GT[generate_tests\nLLM → test_inputs + test_list]
|
||||
GT --> VC{validate_code\n/api/v1/execute}
|
||||
VC -->|VALID + assertions pass| END([end])
|
||||
VC -->|runtime error or assertion fail| GCR[generate_code_retry\ntrace-guided]
|
||||
GCR --> VC2{validate_code_after_retry\n/api/v1/execute}
|
||||
VC2 -->|VALID| END
|
||||
VC2 -->|INVALID| END
|
||||
|
||||
|
|
@ -241,9 +413,12 @@ flowchart TD
|
|||
class AgentState(TypedDict):
|
||||
...
|
||||
# PTVL fields
|
||||
parser_trace: str # raw parser trace from first validation attempt (empty if valid)
|
||||
parser_trace: str # raw parser error from first validation attempt (empty if valid)
|
||||
validation_status: str # see validation status values below
|
||||
context_relevant: bool # result of CONFIDENCE_PROMPT check (RETRIEVAL only)
|
||||
# Test generation fields (set by generate_tests node)
|
||||
test_inputs: dict # variables injected when executing generated code
|
||||
test_list: list # regex assertions validated against output variables after execution
|
||||
```
|
||||
|
||||
### Validation status values
|
||||
|
|
@ -267,6 +442,8 @@ message AgentResponse {
|
|||
}
|
||||
```
|
||||
|
||||
**Implementation status: complete (2026-04-12).** Field 4 added to `brunix.proto`. Populated in both `AskAgent` (from `final_state`) and `AskAgentStream` (in the `is_final=True` message at end of stream).
|
||||
|
||||
Clients that do not read `validation_status` are unaffected — the field defaults to empty string.
|
||||
|
||||
---
|
||||
|
|
@ -287,7 +464,7 @@ A `CODE_GENERATION` response returned without parser validation due to parser un
|
|||
|
||||
### RC-08 — Retry budget (priority: medium)
|
||||
|
||||
Each request has a maximum of **1 retry** regardless of type. A `CODE_GENERATION` request that fails parser validation twice returns the second attempt with `validation_status=true`. A `RETRIEVAL` request whose context is insufficient reformulates once and generates unconditionally on the second retrieval.
|
||||
Each request has a maximum of **1 retry** regardless of type. A `CODE_GENERATION` request that fails parser validation twice returns the second attempt with `validation_status=INVALID_UNRESOLVED`. A `RETRIEVAL` request whose context is insufficient reformulates once and generates unconditionally on the second retrieval.
|
||||
|
||||
No request may enter more than one retry cycle.
|
||||
|
||||
|
|
@ -298,25 +475,31 @@ No request may enter more than one retry cycle.
|
|||
### Positive
|
||||
|
||||
- Syntactically invalid AVAP code no longer reaches users silently. `validation_status` gives the client a typed signal: `INVALID_UNRESOLVED` (evidence of bad code) vs `PARSER_UNAVAILABLE` (no evidence either way) — clients can respond differently to each.
|
||||
- The parser trace makes retries targeted rather than blind — the LLM corrects specific lines, not the whole response.
|
||||
- Execution validation (`/api/v1/execute`) catches runtime errors invisible to the AST parser — undefined variables, unsupported commands, type mismatches.
|
||||
- Assertion validation verifies the code produces the expected output, not just that it runs.
|
||||
- The parser error trace makes retries targeted rather than blind — the LLM corrects specific runtime failures, not the whole response.
|
||||
- Circuit breaker prevents parser outages from adding latency to every `CODE_GENERATION` request. After 3 consecutive failures the engine stops trying for 30 seconds.
|
||||
- Context relevance check catches retrievals that return topically adjacent but non-answering chunks, reducing fluent-but-ungrounded responses.
|
||||
- `AVAP_PARSER_TIMEOUT=0` allows development without the parser service — no hard dependency at startup.
|
||||
- Automatic fallback from `/api/v1/execute` to `/parse` — new parser endpoint can be deployed without a coordinated engine restart.
|
||||
|
||||
### Negative / Trade-offs
|
||||
|
||||
- **`CODE_GENERATION` latency**: +1 parser gRPC call per request (~50–200ms for valid code). +1 LLM generation call + 1 parser call on invalid code (~1–2s additional).
|
||||
- **`CODE_GENERATION` latency**: +1 `generate_tests` LLM call + 1 parser execution call per request. If `generate_tests` fails (timeout/error), the node returns empty tests and validation continues — no blocking.
|
||||
- **`RETRIEVAL` latency**: +1 LLM call (relevance check) on every request. At `qwen3:1.7b` local inference, this adds ~300–500ms to every RETRIEVAL request — not negligible.
|
||||
- The parser becomes a **soft production dependency** for CODE_GENERATION. Parser outages degrade validation silently; monitoring must alert on sustained `parser unavailable` log volume.
|
||||
- The context relevance check is a **generative model doing a binary classification task** — the same architectural mismatch noted in ADR-0008 for the classifier. It is the correct interim solution while no discriminative relevance model exists.
|
||||
- **`registerEndpoint` suppressed by default** — `CODE_GENERATION_PROMPT` rule 7 omits `registerEndpoint` unless the user explicitly requests endpoint registration. This prevents over-engineering simple code responses but requires the user to be explicit when endpoint scaffolding is needed.
|
||||
|
||||
### Open questions
|
||||
|
||||
1. **`RETRIEVAL` latency budget**: The +300–500ms from the relevance LLM call may be unacceptable for the VS Code extension use case where streaming latency is user-visible. A discriminative relevance model (embedding similarity between query vector and context vector, cosine threshold) would be ~1ms and eliminate this cost entirely. Deferred to a future amendment.
|
||||
1. **`RETRIEVAL` latency budget**: The +300–500ms from the relevance LLM call may be unacceptable for the VS Code extension use case where streaming latency is user-visible. A discriminative relevance model (embedding similarity between query vector and context vector, cosine threshold) would be ~1ms and eliminate this cost entirely. Deferred to a future amendment. **Trigger:** when the RETRIEVAL validation LLM call appears as a measurable contribution in Langfuse traces.
|
||||
|
||||
2. **`validation_status` UX**: The proto field is defined but the client behavior is not specified. What should the VS Code extension or AVS Platform display when `validation_status=true`? Requires a product decision outside this ADR's scope.
|
||||
2. **`validation_status` UX**: Proto field 4 is defined and populated. What the VS Code extension or AVS Platform displays when `validation_status=INVALID_UNRESOLVED` or `PARSER_UNAVAILABLE` is not yet specified. Requires a product decision outside this ADR's scope. **Open.**
|
||||
|
||||
3. **Parser version pinning**: Inherited from ADR-0007 open question 2. Parser upgrades may alter what is considered valid AVAP. A policy for handling parser version changes in the production pipeline has not been defined.
|
||||
3. **Parser version pinning**: Inherited from ADR-0007 open question 2. Parser upgrades may alter what is considered valid AVAP. A policy for handling parser version changes in the production pipeline has not been defined. **Open.**
|
||||
|
||||
4. **`validation_status` across multiple code blocks (AskAgentStream)**: When a response contains more than one code block, the `validation_status` in the final `is_final=True` message reflects only the last block validated. If an earlier block was `INVALID_UNRESOLVED` and the last was valid, the client receives `""`. In practice AVAP responses contain one code block. A future amendment may accumulate the worst-case status across all blocks. **Low priority.**
|
||||
|
||||
---
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
## Context
|
||||
|
||||
ADR-0008 Phase 2 deployed a Layer 2 embedding classifier trained on a **seed dataset of 94 hand-crafted examples**. This model works well for the initial distribution of queries but has two structural limitations:
|
||||
ADR-0008 Phase 2 deployed a Layer 2 embedding classifier trained on a **seed dataset of 204 hand-crafted examples** (initially 95; expanded on 2026-04-13 with 109 bilingual Spanish examples covering all four intent categories and interrogative `CODE_GENERATION` patterns). This model works well for the initial distribution of queries but has two structural limitations:
|
||||
|
||||
1. **The seed dataset does not reflect production traffic.** Hand-crafted examples are idealized. Real users ask questions with typos, mixed languages, ambiguous phrasing, and domain-specific vocabulary that is not in the seed.
|
||||
|
||||
|
|
@ -121,9 +121,9 @@ Each retraining cycle merges the seed dataset with all accumulated production ex
|
|||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
T0["Cycle 0\n94 seed examples\nCV 1.0 on seed"] -->
|
||||
T1["Cycle 1\n94 + ~100 production\nreal query distribution"] -->
|
||||
T2["Cycle 2\n94 + ~200 production\nincreasing coverage"] -->
|
||||
T0["Cycle 0\n204 seed examples\nCV 1.0 on seed"] -->
|
||||
T1["Cycle 1\n204 + ~100 production\nreal query distribution"] -->
|
||||
T2["Cycle 2\n204 + ~200 production\nincreasing coverage"] -->
|
||||
TN["Cycle N\nseed becomes minority\nmodel reflects production traffic"]
|
||||
```
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
# Brunix Assistance Engine — Architecture Reference
|
||||
|
||||
> **Audience:** Engineers contributing to this repository, architects reviewing the system design, and operators responsible for its deployment.
|
||||
> **Last updated:** 2026-04-10
|
||||
> **Version:** 1.9.x
|
||||
> **Last updated:** 2026-04-13
|
||||
> **Version:** 1.7.x
|
||||
> **Architect:** Rafael Ruiz (CTO, 101OBEX Corp)
|
||||
> **Related ADRs:** ADR-0001 · ADR-0002 · ADR-0003 · ADR-0004 · ADR-0005 · ADR-0006 · ADR-0007 · ADR-0008
|
||||
> **Related ADRs:** ADR-0001 · ADR-0002 · ADR-0003 · ADR-0004 · ADR-0005 · ADR-0006 · ADR-0007 · ADR-0008 · ADR-0009 · ADR-0010
|
||||
> **Related PRDs:** PRD-0001 · PRD-0002 · PRD-0003
|
||||
|
||||
---
|
||||
|
|
@ -116,15 +116,16 @@ Langfuse is the exception — it has a public IP (`45.77.119.180`) and is access
|
|||
|---|---|---|
|
||||
| gRPC server | `server.py` | Entry point for all AI requests. Manages session store, model selection, and state initialization |
|
||||
| HTTP proxy | `openai_proxy.py` | OpenAI + Ollama compatible HTTP layer. Translates REST → gRPC |
|
||||
| LangGraph orchestrator | `graph.py` | Builds and executes the agentic routing graph. Hosts L1, L2, and L3 classifier layers |
|
||||
| Prompt definitions | `prompts.py` | All prompt templates in one place: classifier, reformulator, generators, platform |
|
||||
| LangGraph orchestrator | `graph.py` | Builds and executes the agentic routing graph. Hosts L1, L2, and L3 classifier layers. Implements PTVL (ADR-0009) |
|
||||
| Prompt definitions | `prompts.py` | All prompt templates: classifier, reformulator, generators, platform, test generation |
|
||||
| Agent state | `state.py` | `AgentState` TypedDict shared across all graph nodes |
|
||||
| LLM factory | `utils/llm_factory.py` | Provider-agnostic model instantiation (Ollama, OpenAI, Anthropic, Bedrock) |
|
||||
| Embedding factory | `utils/emb_factory.py` | Provider-agnostic embedding model instantiation |
|
||||
| Classifier export | `utils/classifier_export.py` | Exports `classify_history_store` to labeled JSONL when threshold is reached. Data flywheel for Layer 2 retraining |
|
||||
| Evaluation pipeline | `evaluate.py` | RAGAS evaluation with Claude as judge |
|
||||
| Proto contract | `protos/brunix.proto` | Source of truth for the gRPC API |
|
||||
| Classifier training | `scripts/pipelines/classifier/train_classifier.py` | Offline script. Embeds labeled queries with bge-m3, trains LogisticRegression, serializes model |
|
||||
| Classifier training | `scripts/pipelines/classifier/retrain_pipeline.py` | Champion/Challenger retraining. Embeds queries with bge-m3, cross-validates, promotes if challenger ≥ champion |
|
||||
| Classifier seed dataset | `scripts/pipelines/classifier/seed_classifier_dataset.jsonl` | 204 labeled examples across 4 categories. Bilingual (EN + ES). Anchors all retraining runs |
|
||||
|
||||
**Model slots:**
|
||||
|
||||
|
|
@ -255,6 +256,8 @@ The classifier does not receive raw conversation messages. It receives a compact
|
|||
| RC-04 | `PLATFORM` and `CONVERSATIONAL` never touch Elasticsearch | Medium |
|
||||
| RC-05 | `RETRIEVAL`/`CODE_GENERATION` → main model; `CONVERSATIONAL`/`PLATFORM` → conversational model | Medium |
|
||||
| RC-06 | Intent history capped at 6 entries | Low |
|
||||
| RC-07 | Every `CODE_GENERATION` response must be validated by parser before delivery (ADR-0009) | High |
|
||||
| RC-08 | Maximum 1 retry per request regardless of type (ADR-0009) | Medium |
|
||||
|
||||
---
|
||||
|
||||
|
|
@ -272,10 +275,22 @@ flowchart TD
|
|||
CL -->|CONVERSATIONAL| RC[respond_conversational]
|
||||
CL -->|PLATFORM| RP[respond_platform]
|
||||
RF --> RT[retrieve]
|
||||
RT -->|RETRIEVAL| GE[generate]
|
||||
|
||||
RT -->|RETRIEVAL| CR{check_context\nrelevance}
|
||||
CR -->|YES| GE[generate]
|
||||
CR -->|NO| RH[reformulate_with_hint]
|
||||
RH --> RT2[retrieve_retry]
|
||||
RT2 --> GE
|
||||
|
||||
RT -->|CODE_GENERATION| GC[generate_code]
|
||||
GE --> END([end])
|
||||
GC --> END
|
||||
GC --> GT[generate_tests\nLLM → test_inputs + test_list]
|
||||
GT --> VC{validate_code\n/api/v1/execute}
|
||||
VC -->|VALID| END([end])
|
||||
VC -->|INVALID| GCR[generate_code_retry\ntrace-guided]
|
||||
GCR --> VC2{validate_code_after_retry}
|
||||
VC2 --> END
|
||||
|
||||
GE --> END
|
||||
RC --> END
|
||||
RP --> END
|
||||
```
|
||||
|
|
@ -468,6 +483,13 @@ class AgentState(TypedDict):
|
|||
extra_context: str # base64 decoded
|
||||
user_info: str # JSON: {dev_id, project_id, org_id}
|
||||
use_editor_context: bool # set by classifier
|
||||
|
||||
# PTVL — Per-Type Validation Layer (ADR-0009)
|
||||
parser_trace: str # runtime error from first validation (empty if valid)
|
||||
validation_status: str # "" | "INVALID_UNRESOLVED" | "PARSER_UNAVAILABLE"
|
||||
context_relevant: bool # result of CONFIDENCE_PROMPT check (RETRIEVAL only)
|
||||
test_inputs: dict # variables injected when executing generated code
|
||||
test_list: list # regex assertions validated against output variables
|
||||
```
|
||||
|
||||
---
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -93,3 +93,112 @@
|
|||
{"query": "What is my current monthly spend on AVAP Cloud?", "type": "PLATFORM"}
|
||||
{"query": "How do I add more API capacity to my plan?", "type": "PLATFORM"}
|
||||
{"query": "Your project usage percentage is critically high at 98%", "type": "PLATFORM"}
|
||||
{"query": "Write an AVAP script that reads a URL parameter and returns it in the response", "type": "CODE_GENERATION"}
|
||||
{"query": "Generate AVAP code that receives a number and returns its square", "type": "CODE_GENERATION"}
|
||||
{"query": "Create an AVAP endpoint that accepts a name parameter and returns a greeting", "type": "CODE_GENERATION"}
|
||||
{"query": "Write AVAP code that checks if a variable is null and returns an error", "type": "CODE_GENERATION"}
|
||||
{"query": "Generate an AVAP function that concatenates two string parameters", "type": "CODE_GENERATION"}
|
||||
{"query": "como seria una api que reciba un parametro y lo devuelva como respuesta?", "type": "CODE_GENERATION"}
|
||||
{"query": "como haria un script en AVAP que llame a una base de datos?", "type": "CODE_GENERATION"}
|
||||
{"query": "puedes escribir un endpoint que reciba un JSON y devuelva un campo especifico?", "type": "CODE_GENERATION"}
|
||||
{"query": "muéstrame como hacer una api que sume dos numeros", "type": "CODE_GENERATION"}
|
||||
{"query": "necesito un script AVAP que lea un parametro de la URL", "type": "CODE_GENERATION"}
|
||||
{"query": "como se hace un endpoint en AVAP que devuelva un array?", "type": "CODE_GENERATION"}
|
||||
{"query": "dame el codigo para hacer una llamada HTTP externa desde AVAP", "type": "CODE_GENERATION"}
|
||||
{"query": "como implementaria un loop que recorra una lista en AVAP?", "type": "CODE_GENERATION"}
|
||||
{"query": "podrias generar un ejemplo de manejo de errores en AVAP?", "type": "CODE_GENERATION"}
|
||||
{"query": "como seria el codigo para validar que un parametro no este vacio?", "type": "CODE_GENERATION"}
|
||||
{"query": "necesito que me hagas un script que devuelva hola mundo", "type": "CODE_GENERATION"}
|
||||
{"query": "puedes hacer un endpoint POST que guarde datos en la base de datos?", "type": "CODE_GENERATION"}
|
||||
{"query": "como haria para recibir varios parametros y devolver uno procesado?", "type": "CODE_GENERATION"}
|
||||
{"query": "dame un ejemplo de codigo AVAP que use addParam y addResult", "type": "CODE_GENERATION"}
|
||||
{"query": "como se implementa un try catch en AVAP? muéstrame el codigo", "type": "CODE_GENERATION"}
|
||||
{"query": "escríbeme una funcion AVAP que calcule el total de una lista", "type": "CODE_GENERATION"}
|
||||
{"query": "como quedaria una api que reciba nombre y apellido y devuelva el nombre completo?", "type": "CODE_GENERATION"}
|
||||
{"query": "genera un endpoint AVAP que devuelva el status 404 si no encuentra el recurso", "type": "CODE_GENERATION"}
|
||||
{"query": "como haria una api que haga una peticion GET a otro servicio?", "type": "CODE_GENERATION"}
|
||||
{"query": "puedes generarme codigo AVAP que itere sobre un JSON?", "type": "CODE_GENERATION"}
|
||||
{"query": "como seria un script que asigne variables y las devuelva en la respuesta?", "type": "CODE_GENERATION"}
|
||||
{"query": "necesito codigo que reciba un id y devuelva un objeto con ese id", "type": "CODE_GENERATION"}
|
||||
{"query": "como implemento un condicional en AVAP? ponme un ejemplo", "type": "CODE_GENERATION"}
|
||||
{"query": "podrias escribir un endpoint que concatene dos parametros?", "type": "CODE_GENERATION"}
|
||||
{"query": "como haria para devolver un error personalizado desde AVAP?", "type": "CODE_GENERATION"}
|
||||
{"query": "que seria el codigo minimo para crear un endpoint GET en AVAP?", "type": "CODE_GENERATION"}
|
||||
{"query": "muéstrame un script AVAP que use un bucle para procesar una lista", "type": "CODE_GENERATION"}
|
||||
{"query": "como quedaria un API que reciba un token y lo valide?", "type": "CODE_GENERATION"}
|
||||
{"query": "genera el codigo para un endpoint que devuelva la fecha actual", "type": "CODE_GENERATION"}
|
||||
{"query": "como se escribe una funcion reutilizable en AVAP?", "type": "CODE_GENERATION"}
|
||||
{"query": "que es addVar en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "para que sirve addResult en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "como funciona el comando addParam?", "type": "RETRIEVAL"}
|
||||
{"query": "cual es la diferencia entre addVar y addObject en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "que hace startLoop en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "como funciona el manejo de errores en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "que es un conector en AVAP Cloud?", "type": "RETRIEVAL"}
|
||||
{"query": "cual es el modelo de ejecucion de AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "como funciona el enrutamiento en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "para que se usa el nodo de pipeline en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "que tipos de datos soporta AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "como funciona el if en AVAP? cuantos modos tiene?", "type": "RETRIEVAL"}
|
||||
{"query": "que diferencia hay entre addRow y addObject?", "type": "RETRIEVAL"}
|
||||
{"query": "como se definen funciones reutilizables en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "que es el catalogo en AVAP Cloud?", "type": "RETRIEVAL"}
|
||||
{"query": "como se gestionan las variables de entorno en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "para que sirve el nodo return en un pipeline?", "type": "RETRIEVAL"}
|
||||
{"query": "como maneja AVAP las transformaciones JSON?", "type": "RETRIEVAL"}
|
||||
{"query": "que opciones de logging tiene AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "como se conecta AVAP a una API REST externa?", "type": "RETRIEVAL"}
|
||||
{"query": "que es una API virtual en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "como funciona la autenticacion en las llamadas de AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "como se pagina resultados en una API AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "que es endLoop y cuando se usa?", "type": "RETRIEVAL"}
|
||||
{"query": "como funciona el exception en AVAP?", "type": "RETRIEVAL"}
|
||||
{"query": "no entendi lo que dijiste", "type": "CONVERSATIONAL"}
|
||||
{"query": "puedes explicarlo de otra manera?", "type": "CONVERSATIONAL"}
|
||||
{"query": "eso no es lo que pregunte", "type": "CONVERSATIONAL"}
|
||||
{"query": "en pocas palabras, que quisiste decir?", "type": "CONVERSATIONAL"}
|
||||
{"query": "puedes resumirlo en un parrafo?", "type": "CONVERSATIONAL"}
|
||||
{"query": "repite lo que dijiste pero mas claro", "type": "CONVERSATIONAL"}
|
||||
{"query": "no entendi la segunda parte", "type": "CONVERSATIONAL"}
|
||||
{"query": "puedes darme una analogia?", "type": "CONVERSATIONAL"}
|
||||
{"query": "dimelo mas simple", "type": "CONVERSATIONAL"}
|
||||
{"query": "vuelve a lo que dijiste sobre los conectores", "type": "CONVERSATIONAL"}
|
||||
{"query": "dame un resumen en puntos de tu respuesta anterior", "type": "CONVERSATIONAL"}
|
||||
{"query": "estas seguro de eso?", "type": "CONVERSATIONAL"}
|
||||
{"query": "que quisiste decir con eso?", "type": "CONVERSATIONAL"}
|
||||
{"query": "puedes ser mas especifico?", "type": "CONVERSATIONAL"}
|
||||
{"query": "amplia la parte del manejo de errores", "type": "CONVERSATIONAL"}
|
||||
{"query": "dilo en una sola frase", "type": "CONVERSATIONAL"}
|
||||
{"query": "cual era el ejemplo que mencionaste antes?", "type": "CONVERSATIONAL"}
|
||||
{"query": "no lo entiendo, intenta con otra explicacion", "type": "CONVERSATIONAL"}
|
||||
{"query": "y en la practica, como se veria eso?", "type": "CONVERSATIONAL"}
|
||||
{"query": "explicamelo paso a paso", "type": "CONVERSATIONAL"}
|
||||
{"query": "compara las dos opciones que describiste", "type": "CONVERSATIONAL"}
|
||||
{"query": "lo que dijiste al principio sobre pipelines, repitemelo", "type": "CONVERSATIONAL"}
|
||||
{"query": "reformula tu respuesta enfocandote en el rendimiento", "type": "CONVERSATIONAL"}
|
||||
{"query": "tengo dudas sobre lo que explicaste, puedes profundizar?", "type": "CONVERSATIONAL"}
|
||||
{"query": "cuantas llamadas llevo este mes?", "type": "PLATFORM"}
|
||||
{"query": "cual es mi plan actual?", "type": "PLATFORM"}
|
||||
{"query": "cuando se renueva mi suscripcion?", "type": "PLATFORM"}
|
||||
{"query": "cuanto he consumido de mi cuota?", "type": "PLATFORM"}
|
||||
{"query": "cuantas llamadas me quedan disponibles?", "type": "PLATFORM"}
|
||||
{"query": "estoy cerca del limite de mi plan?", "type": "PLATFORM"}
|
||||
{"query": "que pasa si supero mi cuota mensual?", "type": "PLATFORM"}
|
||||
{"query": "puedo ampliar mi plan actual?", "type": "PLATFORM"}
|
||||
{"query": "muéstrame las estadisticas de uso de mi cuenta", "type": "PLATFORM"}
|
||||
{"query": "cuantos proyectos puedo crear con mi plan?", "type": "PLATFORM"}
|
||||
{"query": "tu proyecto tiene un 85% de uso, que recomiendas?", "type": "PLATFORM"}
|
||||
{"query": "llevas 4500 llamadas API este mes de un limite de 5000", "type": "PLATFORM"}
|
||||
{"query": "tu cuenta ha superado el 90% del limite mensual", "type": "PLATFORM"}
|
||||
{"query": "cual es mi gasto mensual en AVAP Cloud?", "type": "PLATFORM"}
|
||||
{"query": "como agrego mas capacidad a mi plan?", "type": "PLATFORM"}
|
||||
{"query": "tu prueba gratuita expira en 2 dias", "type": "PLATFORM"}
|
||||
{"query": "tu consumo esta semana es el doble que la semana pasada", "type": "PLATFORM"}
|
||||
{"query": "estado de mi cuenta en AVAP Cloud", "type": "PLATFORM"}
|
||||
{"query": "cuales son mis detalles de facturacion?", "type": "PLATFORM"}
|
||||
{"query": "me queda poco de mi cuota, que hago?", "type": "PLATFORM"}
|
||||
{"query": "tu proyecto ha usado el 60% de los recursos asignados", "type": "PLATFORM"}
|
||||
{"query": "cuando caduca mi suscripcion actual?", "type": "PLATFORM"}
|
||||
{"query": "cuanto cuesta pasar al plan profesional?", "type": "PLATFORM"}
|
||||
{"query": "hay alguna alerta de uso en mi cuenta?", "type": "PLATFORM"}
|
||||
{"query": "tu limite diario de API calls es de 1000 y ya llevas 950", "type": "PLATFORM"}
|
||||
|
|
|
|||
|
|
@ -552,8 +552,9 @@ def run_map_elites(args, client, lrm, output_path):
|
|||
code = problem["code"]
|
||||
test_list = problem.get("test_list", [])
|
||||
|
||||
print("#######################################")
|
||||
is_valid, ast, error_msg = validator.parse(code)
|
||||
|
||||
print("#######################################")
|
||||
if is_valid is None:
|
||||
is_valid, ast = True, {}
|
||||
if call_count == 1:
|
||||
|
|
|
|||
Loading…
Reference in New Issue