diff --git a/Docker/src/golden_dataset.json b/Docker/src/golden_dataset.json
index e92d6da..148c8c8 100644
--- a/Docker/src/golden_dataset.json
+++ b/Docker/src/golden_dataset.json
@@ -1,32 +1,302 @@
[
{
- "id": "GD-001",
+ "id": "GD-R-001",
"category": "RETRIEVAL",
"question": "What is AVAP and what is it designed for?",
- "ground_truth": "AVAP (Advanced Virtual API Programming) is a Turing-complete Domain-Specific Language (DSL) architecturally designed for the secure, concurrent, and deterministic orchestration of microservices and HTTP I/O. It is not a general-purpose language; its hybrid engine and strict grammar are optimized for fast processing of HTTP transactions, in-memory data manipulation, and interaction with external connectors. AVAP does not have internal print commands — all data output is performed through the HTTP interface using commands like addResult()."
+ "ground_truth": "AVAP is a Turing-complete Domain-Specific Language (DSL) architecturally designed for the secure, concurrent, and deterministic orchestration of microservices and HTTP I/O. It is not a general-purpose language. Its hybrid engine and strict grammar are optimized for fast HTTP transaction processing, in-memory data manipulation, and interaction with external connectors. AVAP has no internal print commands — all data output is performed through the HTTP interface using addResult()."
},
{
- "id": "GD-002",
+ "id": "GD-R-002",
"category": "RETRIEVAL",
- "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
- "ground_truth": "AVAP uses a mixed structural grammar for conditional logic, combining keyword fluidity with strict mathematical closures. The if() / else() / end() structure evaluates a logical or comparison expression. Every conditional block requires a mandatory end() closing statement. The if() command compares two values using a comparator operator (e.g., '==', '!=', '>', '<', '>=', '<='). An optional else() block handles the false branch. Example: if(saldo, 0, \">\") executes the true branch when the variable 'saldo' is greater than zero, otherwise the else() block runs, and end() closes the structure."
+ "question": "How does the if() conditional block work in AVAP? How are blocks closed?",
+ "ground_truth": "AVAP uses if() / else() / end() for conditional logic. The if() command evaluates a comparison between two values using a comparator operator (==, !=, >, <, >=, <=, in). Every conditional block must be closed with end(). The else() block is optional and handles the false branch. Example: if(saldo, 0, \">\") executes the true branch when saldo is greater than zero, otherwise the else() block runs, and end() closes the structure. AVAP also supports a mode 2 where a full Python-style expression is passed as a string: if(None, None, \"user_type == 'VIP' or compras > 100\")."
},
{
- "id": "GD-003",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
- "ground_truth": "The following AVAP script reads a 'password' query parameter, hashes it using SHA-256 via encodeSHA256(), and exposes the result via addResult():\n\naddParam(\"password\", password)\nencodeSHA256(password, hashed_password)\naddResult(hashed_password)\n\nKey commands used:\n- addParam(\"password\", password): reads the 'password' HTTP parameter into the variable 'password'.\n- encodeSHA256(password, hashed_password): computes the SHA-256 hash of the input and stores the 64-character hex digest in 'hashed_password'.\n- addResult(hashed_password): adds 'hashed_password' to the HTTP JSON response body."
- },
- {
- "id": "GD-004",
- "category": "CODE_GENERATION",
- "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
- "ground_truth": "The following AVAP script iterates from 1 to 5 using startLoop/endLoop, dynamically builds a JSON object using AddvariableToJSON() on each iteration, and returns the result:\n\naddVar(mi_json, \"{}\")\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddvariableToJSON(item, \"valor_generado\", mi_json)\nendLoop()\naddResult(mi_json)\n\nKey commands used:\n- addVar(mi_json, \"{}\"): initializes an empty JSON object.\n- startLoop(i, 1, 5) / endLoop(): iterates the variable 'i' from 1 to 5 inclusive.\n- AddvariableToJSON(item, \"valor_generado\", mi_json): inserts each generated key-value pair into the JSON object.\n- addResult(mi_json): exposes the final JSON in the HTTP response."
- },
- {
- "id": "GD-005",
+ "id": "GD-R-003",
"category": "RETRIEVAL",
- "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
- "ground_truth": "AVAP provides two commands for making external HTTP calls: RequestPost and RequestGet. To avoid blocking threads due to network latency, AVAP requires a mandatory timeout parameter (in milliseconds) for both commands. If the timeout is exceeded, the destination variable receives None. RequestPost(url, querystring, headers, body, destino, timeout) executes an HTTP POST and stores the response in 'destino'. RequestGet(url, querystring, headers, destino, timeout) executes an HTTP GET similarly. Both commands are part of AVAP's Section V (Third-Party Connectors and External HTTP Requests) and allow calling external APIs without additional drivers."
+ "question": "How does AVAP handle external HTTP calls? What commands are available and how is timeout managed?",
+ "ground_truth": "AVAP provides RequestGet and RequestPost for external HTTP calls. To avoid blocking threads due to network latency, AVAP requires a mandatory timeout parameter in milliseconds. If the timeout is exceeded, the destination variable receives None. RequestPost(url, querystring, headers, body, destino, timeout) executes an HTTP POST storing the response in destino. RequestGet(url, querystring, headers, destino, timeout) executes an HTTP GET. Both commands allow calling external APIs without additional drivers."
+ },
+ {
+ "id": "GD-R-004",
+ "category": "RETRIEVAL",
+ "question": "How do functions work in AVAP? What is the scope of variables inside a function?",
+ "ground_truth": "Functions in AVAP are hermetic memory enclosures. When entering a function, AVAP creates a new dictionary of local variables isolated from the global context. The return() command acts as a flow switch: it injects the calculated value to the caller and releases local memory. If used inside a startLoop, it also breaks the iteration. Variables declared inside a function are only visible within that function — they are not accessible from the main flow or other functions. AVAP has three scope types: Global Scope, Main Local Scope, and Function Scope."
+ },
+ {
+ "id": "GD-R-005",
+ "category": "RETRIEVAL",
+ "question": "What are the three types of variable scopes in AVAP and what are their visibility rules?",
+ "ground_truth": "AVAP uses three scope types: Global Scope contains globally declared variables, accessible from anywhere in the program and persists for the entire interpreter process lifetime. Main Local Scope contains variables declared in the main flow — accessible within the main flow but not from functions or goroutines, and disappears when script execution ends. Function Scope is created independently for each function invocation and contains function parameters and locally created variables — only visible within that function, not from outside, and is destroyed when the function terminates. If a variable does not exist in the visible scopes, the engine produces a runtime error."
+ },
+ {
+ "id": "GD-R-006",
+ "category": "RETRIEVAL",
+ "question": "How does concurrency work in AVAP? What are goroutines and how are they launched?",
+ "ground_truth": "AVAP implements an advanced system based on lightweight threads (goroutines), allowing the server to process long I/O operations without blocking the main thread. The go command launches a goroutine: identifier = go function_name(parameters). It creates a new isolated execution context and returns a unique identifier. Goroutines follow the same scope rules as normal functions — they can access Global Scope and their own Function Scope, but cannot access the Main Local Scope. The gather command is used to collect results from goroutines."
+ },
+ {
+ "id": "GD-R-007",
+ "category": "RETRIEVAL",
+ "question": "What is the addParam command and how does it capture HTTP request parameters?",
+ "ground_truth": "addParam captures input parameters from HTTP requests (URL query parameters, request body, or form data) and assigns them to a variable. Syntax: addParam(\"paramName\", targetVar). It reads the value of paramName from the incoming HTTP request and stores it in targetVar. If the parameter is not present in the request, the variable receives None. It is the primary mechanism for reading external input in AVAP since the language has no direct access to the request object."
+ },
+ {
+ "id": "GD-R-008",
+ "category": "RETRIEVAL",
+ "question": "How does the startLoop / endLoop construct work in AVAP?",
+ "ground_truth": "startLoop and endLoop define iteration blocks in AVAP. Syntax: startLoop(varName, from, to) where varName is the loop counter, from is the start value, and to is the end value inclusive. The loop counter increments by 1 on each iteration. endLoop() closes the block. Example: startLoop(i, 1, 10) iterates i from 1 to 10. Variables modified inside the loop are accessible after endLoop. To exit a loop early, you can set the counter variable beyond the end value (e.g. i = 11 inside a loop that goes to 10)."
+ },
+ {
+ "id": "GD-R-009",
+ "category": "RETRIEVAL",
+ "question": "What is the addResult command and how does it build the HTTP response?",
+ "ground_truth": "addResult adds a variable to the HTTP JSON response body. Syntax: addResult(varName). Each call to addResult adds one key-value pair to the response object where the key is the variable name and the value is its current value. AVAP has no internal print commands — addResult is the only way to expose data to the caller. Multiple addResult calls build up a JSON object with multiple fields. The HTTP status code is set separately via the _status variable."
+ },
+ {
+ "id": "GD-R-010",
+ "category": "RETRIEVAL",
+ "question": "How does error handling work in AVAP with try() and exception()?",
+ "ground_truth": "AVAP uses try() / exception() / end() for error handling. The try() block wraps code that may fail. If an exception occurs inside the try block, execution jumps to the exception() block instead of halting. exception(errorVar) captures the error message into errorVar. The end() command closes the structure. Without a try block, any unhandled exception stops script execution and returns a 400 error. With a try block, you can handle the error gracefully — for example by setting _status to 500 and returning a structured error message."
+ },
+ {
+ "id": "GD-R-011",
+ "category": "RETRIEVAL",
+ "question": "What is the replace() command in AVAP and how is it used?",
+ "ground_truth": "The replace() command performs string substitution in AVAP. Syntax: replace(sourceString, searchValue, replaceValue, targetVar). It replaces all occurrences of searchValue in sourceString with replaceValue and stores the result in targetVar. Example: replace(\"REF_1234_OLD\", \"OLD\", \"NEW\", ref_actualizada) stores \"REF_1234_NEW\" in ref_actualizada. The source can be a literal string or a variable name. The command does not modify the original variable — it always writes to targetVar."
+ },
+ {
+ "id": "GD-R-012",
+ "category": "RETRIEVAL",
+ "question": "What are the reserved keywords in AVAP that cannot be used as identifiers?",
+ "ground_truth": "AVAP has the following reserved keywords that cannot be used as variable or function names: Control flow — if, else, end, startLoop, endLoop, try, exception, return. Function declaration — function. Concurrency — go, gather. Modularity — include, import. Logical operators — and, or, not, in, is. Literals — True, False, None. Using any of these as an identifier will cause a lexer or parser error."
+ },
+ {
+ "id": "GD-R-013",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle string formatting and concatenation?",
+ "ground_truth": "AVAP supports two main string operations. Concatenation uses the + operator: result = \"Hello, \" + name produces a concatenated string. String formatting uses Python-style % operator: log = \"Evento registrado por: %s\" % nombre substitutes the variable value into the format string. Strings support single and double quotes. Escape sequences supported include \\n (newline), \\t (tab), \\r (carriage return), \\\" (double quote), \\' (single quote), and \\\\ (backslash). Note that \\n inside a string is a data character, not a statement terminator — the physical EOL is the only statement terminator in AVAP."
+ },
+ {
+ "id": "GD-R-014",
+ "category": "RETRIEVAL",
+ "question": "How does the encodeSHA256 command work in AVAP?",
+ "ground_truth": "encodeSHA256 computes the SHA-256 hash of an input value and stores the result in a destination variable. Syntax: encodeSHA256(inputValue, destVar). The result is a 64-character lowercase hexadecimal string representing the SHA-256 digest. Example: encodeSHA256(\"payload_data\", checksum) stores the hash of the string \"payload_data\" into the variable checksum. The input can be a string literal or a variable. It is commonly used for integrity verification, password hashing, and generating checksums."
+ },
+ {
+ "id": "GD-R-015",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle date and time operations?",
+ "ground_truth": "AVAP provides two date/time commands. getDateTime(format, offsetSeconds, timezone, destVar) gets the current date/time, optionally applying an offset in seconds and converting to the specified timezone. Example: getDateTime(\"%Y-%m-%d %H:%M:%S\", 0, \"Europe/Madrid\", sql_date) stores the current Madrid time formatted for SQL. getDateTime(\"\", 86400, \"UTC\", expira) gets the current UTC time plus 86400 seconds (1 day ahead), useful for expiration timestamps. stampToDatetime(unixTimestamp, format, offset, destVar) converts a Unix timestamp to a human-readable string. Example: stampToDatetime(1708726162, \"%d/%m/%Y\", 0, fecha_human)."
+ },
+ {
+ "id": "GD-R-016",
+ "category": "RETRIEVAL",
+ "question": "What is the AddvariableToJSON command and how is it used to build JSON objects?",
+ "ground_truth": "AddvariableToJSON inserts a key-value pair into an existing JSON object variable. Syntax: AddvariableToJSON(key, value, jsonVar). The key can be a string literal or a variable. The value can be a string, number, or variable. The jsonVar must be an already-declared variable typically initialized as \"{}\" via addVar. Example: addVar(mi_json, \"{}\") then AddvariableToJSON(\"status\", \"ok\", mi_json) adds the key \"status\" with value \"ok\" to mi_json. It is commonly used inside loops to build dynamic JSON objects iteratively."
+ },
+ {
+ "id": "GD-R-017",
+ "category": "RETRIEVAL",
+ "question": "How does the getListLen command work and what is it used for?",
+ "ground_truth": "getListLen retrieves the length of a list variable and stores it in a destination variable. Syntax: getListLen(listVar, destVar). Example: getListLen(registros, total) stores the number of elements in registros into total. It is commonly used before a startLoop to set the upper bound of iteration, enabling dynamic loops that adapt to the actual size of the data. Example pattern: getListLen(mi_lista, cantidad) followed by startLoop(i, 0, cantidad) to iterate over all elements."
+ },
+ {
+ "id": "GD-R-018",
+ "category": "RETRIEVAL",
+ "question": "How does the randomString command work in AVAP?",
+ "ground_truth": "randomString generates a random string of a specified length using a character pattern. Syntax: randomString(pattern, length, destVar). The pattern is a regex-style character class defining which characters to use. Example: randomString(\"[A-Z]\\d\", 32, token_seguridad) generates a 32-character random string using uppercase letters and digits. Another example: randomString(\"[a-zA-Z0-9]\", 16, token) generates a 16-character alphanumeric token. It is commonly used for generating secure tokens, session identifiers, and temporary passwords."
+ },
+ {
+ "id": "GD-R-019",
+ "category": "RETRIEVAL",
+ "question": "What is the $ dereference operator in AVAP and when is it used?",
+ "ground_truth": "The $ operator in AVAP is the dereference operator, used to access the value of a variable by reference at assignment time. Syntax: addVar(copia, $original) copies the current value of original into copia. The token is defined as DEREF in the lexer. It is used when you need to capture the current value of a variable into another variable, particularly useful when a variable may change later and you need to preserve its value at a specific point in execution."
+ },
+ {
+ "id": "GD-R-020",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle ORM database operations? What commands are available?",
+ "ground_truth": "AVAP provides native ORM commands for database operations without requiring additional drivers. ormCheckTable(tableName, resultVar) checks if a table exists storing True or False in resultVar. ormCreateTable(columns, types, tableName, resultVar) creates a new table with the specified column names and types. ormDirect(query, resultVar) executes a raw SQL query directly. ormAccessSelect executes SELECT queries and ormAccessInsert executes INSERT operations. avapConnector is used to initialize the database connection. The connector and ORM commands are distinguished only by context — the UUID passed as argument determines whether the adapter resolves as a database ORM or a third-party service proxy."
+ },
+ {
+ "id": "GD-C-001",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'name' parameter and returns a personalized greeting.",
+ "ground_truth": "The following AVAP script reads a name parameter and returns a personalized greeting:\n\naddParam(\"name\", name)\nresult = \"Hello, \" + name\naddResult(result)\n\nKey commands: addParam reads the HTTP parameter 'name' into variable name. The + operator concatenates the greeting string with the name. addResult exposes result in the JSON response."
+ },
+ {
+ "id": "GD-C-002",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash, and returns it.",
+ "ground_truth": "The following AVAP script hashes a password parameter using SHA-256:\n\naddParam(\"password\", password)\nencodeSHA256(password, hashed_password)\naddResult(hashed_password)\n\nKey commands: addParam reads the 'password' HTTP parameter. encodeSHA256 computes the SHA-256 hash and stores the 64-character hex digest in hashed_password. addResult exposes the hash in the JSON response."
+ },
+ {
+ "id": "GD-C-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that loops from 1 to 5, builds a JSON object with each index as a key, and returns it.",
+ "ground_truth": "The following AVAP script builds a JSON object iteratively:\n\naddVar(mi_json, \"{}\")\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddvariableToJSON(item, \"valor_generado\", mi_json)\nendLoop()\naddResult(mi_json)\n\nKey commands: addVar initializes an empty JSON object. startLoop iterates i from 1 to 5 inclusive. The % operator formats the key name dynamically. AddvariableToJSON inserts each key-value pair into mi_json. addResult exposes the final object."
+ },
+ {
+ "id": "GD-C-004",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that validates if a 'role' parameter belongs to a list of allowed roles and returns the access result.",
+ "ground_truth": "The following AVAP script validates role membership:\n\naddParam(\"rol\", r)\nif(r, [\"admin\", \"editor\", \"root\"], \"in\")\n acceso = True\nelse()\n acceso = False\nend()\naddResult(acceso)\n\nKey commands: addParam reads the 'rol' parameter. The if() with \"in\" comparator checks list membership directly against a list literal. else() handles the false branch. end() closes the conditional block. addResult exposes the boolean result."
+ },
+ {
+ "id": "GD-C-005",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that makes a GET request to an external API and handles connection errors.",
+ "ground_truth": "The following AVAP script performs a GET request with error handling:\n\ntry()\n RequestGet(\"https://api.test.com/data\", 0, 0, respuesta)\nexception(e)\n addVar(error_trace, \"Fallo de conexion: %s\" % e)\n addResult(error_trace)\nend()\naddResult(respuesta)\n\nKey commands: try() wraps the potentially failing operation. RequestGet fetches the URL storing the response in respuesta. exception(e) captures any error message. The % operator formats the error string. addResult exposes either the response or the error."
+ },
+ {
+ "id": "GD-C-006",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP function that takes two numbers and returns their sum, then call it and return the result.",
+ "ground_truth": "The following AVAP script defines and calls a sum function:\n\nfunction suma(a, b){\n total = a + b\n return(total)\n}\nresultado = suma(10, 20)\naddResult(resultado)\n\nKey commands: function declares a named function with parameters a and b. The + operator adds the values. return() sends the result back to the caller and releases the function scope. The function is called with literal values 10 and 20. addResult exposes the result."
+ },
+ {
+ "id": "GD-C-007",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'subtotal' parameter, computes 21% VAT, and returns the total.",
+ "ground_truth": "The following AVAP script calculates the total with VAT:\n\naddParam(\"subtotal\", subtotal)\niva = subtotal * 0.21\ntotal = subtotal + iva\naddResult(total)\n\nKey commands: addParam reads the subtotal from the HTTP request. The * operator multiplies by the tax rate 0.21. The + operator adds subtotal and iva. addResult exposes the final total. AVAP supports float arithmetic natively."
+ },
+ {
+ "id": "GD-C-008",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads an 'api_key' parameter and returns status 403 if it is null.",
+ "ground_truth": "The following AVAP script validates that an API key is present:\n\naddParam(\"api_key\", key)\nif(key, None, \"==\")\n addVar(_status, 403)\n addVar(error, \"Acceso denegado: falta API KEY\")\n addResult(error)\nend()\n\nKey commands: addParam reads the api_key parameter — if not present it will be None. The if() with \"==\" and None checks for null. addVar sets _status to 403 which becomes the HTTP response code. addResult exposes the error message."
+ },
+ {
+ "id": "GD-C-009",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that generates a 32-character random alphanumeric token and returns it.",
+ "ground_truth": "The following AVAP script generates a secure random token:\n\nrandomString(\"[a-zA-Z0-9]\", 32, token_seguridad)\naddResult(token_seguridad)\n\nKey commands: randomString generates a random string using the character class [a-zA-Z0-9] at length 32 and stores it in token_seguridad. addResult exposes the token in the HTTP response."
+ },
+ {
+ "id": "GD-C-010",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'lang' parameter and returns 'Hola' if it is 'es' or 'Hello' if it is 'en'.",
+ "ground_truth": "The following AVAP script returns a greeting based on language:\n\naddParam(\"lang\", l)\nif(l, \"es\", \"=\")\n addVar(msg, \"Hola\")\nelse()\n addVar(msg, \"Hello\")\nend()\naddResult(msg)\n\nKey commands: addParam reads the lang parameter into l. The if() with \"=\" comparator checks string equality. else() handles all other cases. addVar sets the message. addResult exposes the localized greeting."
+ },
+ {
+ "id": "GD-C-011",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that checks if a database table exists and creates it if it does not.",
+ "ground_truth": "The following AVAP script checks and creates a database table:\n\normCheckTable(tabla_pruebas, resultado_comprobacion)\nif(resultado_comprobacion, False, \"==\")\n ormCreateTable(\"username,age\", \"VARCHAR,INTEGER\", tabla_pruebas, resultado_creacion)\nend()\naddResult(resultado_comprobacion)\naddResult(resultado_creacion)\n\nKey commands: ormCheckTable checks if the table exists storing True or False. The if() block only executes if the check returned False. ormCreateTable creates the table with the specified columns and types. Both results are exposed via addResult."
+ },
+ {
+ "id": "GD-C-012",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that gets the current UTC timestamp and adds 24 hours to compute an expiration time.",
+ "ground_truth": "The following AVAP script computes an expiration timestamp 24 hours from now:\n\ngetDateTime(\"\", 86400, \"UTC\", expira)\naddResult(expira)\n\nKey commands: getDateTime with an empty format string returns a raw timestamp. The second parameter 86400 is the offset in seconds (60 * 60 * 24 = 86400 = 1 day). The timezone is set to UTC. The result is stored in expira and exposed via addResult."
+ },
+ {
+ "id": "GD-C-013",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that receives a new password parameter, validates it is not equal to the old password, and returns a confirmation.",
+ "ground_truth": "The following AVAP script validates a password change:\n\naddParam(\"password\", pass_nueva)\npass_antigua = \"password\"\nif(pass_nueva, pass_antigua, \"!=\")\n addVar(cambio, \"Contrasena actualizada\")\nend()\naddResult(cambio)\n\nKey commands: addParam reads the new password. The old password is assigned as a literal. The if() with \"!=\" comparator checks inequality. addVar sets the confirmation message only if passwords differ. addResult exposes the message."
+ },
+ {
+ "id": "GD-C-014",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a list parameter and returns its element count.",
+ "ground_truth": "The following AVAP script reads a list parameter and returns its length:\n\naddParam(\"data_list\", mi_lista)\ngetListLen(mi_lista, cantidad)\naddResult(cantidad)\n\nKey commands: addParam reads the list from the HTTP request into mi_lista. getListLen computes the number of elements and stores it in cantidad. addResult exposes the count in the JSON response."
+ },
+ {
+ "id": "GD-C-015",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses a validation function to check a token parameter and returns the authorization result.",
+ "ground_truth": "The following AVAP script uses a function to validate a token:\n\nfunction es_valido(token){\n response = False\n if(token, \"SECRET\", \"=\")\n response = True\n end()\n return(response)\n}\naddParam(\"token\", t)\nautorizado = es_valido(t)\naddResult(autorizado)\n\nKey commands: function defines es_valido with a token parameter. response is initialized to False. The if() with \"=\" checks against the expected secret. return() sends the boolean back to the caller. addResult exposes the authorization result."
+ },
+ {
+ "id": "GD-C-016",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that returns two values in the HTTP response: a status code 200 and a message 'Success'.",
+ "ground_truth": "The following AVAP script returns multiple values in the HTTP response:\n\naddVar(_status, 200)\naddVar(status, \"Success\")\naddResult(status)\n\nOr returning both as JSON fields:\n\naddVar(code, 200)\naddVar(status, \"Success\")\naddResult(code)\naddResult(status)\n\nKey commands: _status is the special variable that sets the HTTP response status code. Multiple addResult calls build a JSON object with multiple fields in the response body."
+ },
+ {
+ "id": "GD-C-017",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'saldo' parameter and returns True if it is greater than zero, False otherwise.",
+ "ground_truth": "The following AVAP script checks if a balance is positive:\n\naddParam(\"saldo\", saldo)\nif(saldo, 0, \">\")\n permitir = True\nelse()\n permitir = False\nend()\naddResult(permitir)\n\nKey commands: addParam reads the saldo parameter. The if() with \">\" comparator checks if saldo is greater than 0. else() handles the zero or negative case. end() closes the block. addResult exposes the boolean result."
+ },
+ {
+ "id": "GD-C-018",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that converts a Unix timestamp parameter to a human-readable date in dd/mm/yyyy format.",
+ "ground_truth": "The following AVAP script converts a Unix timestamp to a readable date:\n\naddParam(\"timestamp\", ts)\nstampToDatetime(ts, \"%d/%m/%Y\", 0, fecha_human)\naddResult(fecha_human)\n\nKey commands: addParam reads the timestamp from the HTTP request. stampToDatetime converts the Unix epoch integer to a formatted date string using \"%d/%m/%Y\" which produces day/month/year. The third parameter is a timezone offset in seconds. The result is stored in fecha_human and returned via addResult."
+ },
+ {
+ "id": "GD-C-019",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that replaces all spaces in a string parameter with hyphens and returns the result.",
+ "ground_truth": "The following AVAP script replaces spaces with hyphens:\n\naddParam(\"text\", input_text)\nreplace(input_text, \" \", \"-\", clean_text)\naddResult(clean_text)\n\nKey commands: addParam reads the text parameter. replace() substitutes all occurrences of space with hyphen in input_text and stores the result in clean_text. The original variable is not modified. addResult exposes the transformed string."
+ },
+ {
+ "id": "GD-C-020",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses try/exception to execute a raw SQL query and return status 500 on database errors.",
+ "ground_truth": "The following AVAP script executes SQL with error handling:\n\ntry()\n ormDirect(\"UPDATE tabla SET col=1 WHERE id=1\", res)\nexception(e)\n addVar(_status, 500)\n addResult(\"Error de base de datos\")\nend()\naddResult(res)\n\nKey commands: try() wraps the database operation. ormDirect executes raw SQL storing the result in res. exception(e) catches any database error. addVar sets _status to 500 to signal a server error. The final addResult exposes the query result on success."
+ },
+ {
+ "id": "GD-V-001",
+ "category": "CONVERSATIONAL",
+ "question": "Can you summarize what you just explained about AVAP scopes in fewer words?",
+ "ground_truth": "AVAP has three scopes: Global (visible everywhere, lives for the whole process), Main Local (visible only in the main script flow, not inside functions), and Function (created per function call, destroyed when the function returns). Functions cannot see main flow variables, and the main flow cannot see function-internal variables."
+ },
+ {
+ "id": "GD-V-002",
+ "category": "CONVERSATIONAL",
+ "question": "You mentioned that addResult builds the JSON response — can you clarify how multiple addResult calls work together?",
+ "ground_truth": "Each addResult call adds one field to the JSON response object. The field name is the variable name passed to addResult and the field value is the current value of that variable. So calling addResult(code) and addResult(status) produces a JSON response like {\"code\": 200, \"status\": \"Success\"}. The fields are added in the order the addResult calls are executed during script execution."
+ },
+ {
+ "id": "GD-V-003",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between addVar and a plain assignment like x = 10 in AVAP?",
+ "ground_truth": "Both addVar and direct assignment declare variables. addVar(varName, value) is the explicit command form — it supports intelligent value resolution checking if the value is an existing variable, a number, or a literal. Direct assignment x = 10 is syntactic sugar that works identically for simple cases. addVar is preferred for declaring new variables with explicit intent, while direct assignment is more natural for updating values or computed expressions."
+ },
+ {
+ "id": "GD-V-004",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again the difference between the two modes of the if() command?",
+ "ground_truth": "Mode 1 is structured comparison: if(variable, value, comparator) — for example if(saldo, 0, \">\") directly compares the variable saldo against 0 using the > operator. Mode 2 is expression mode: if(None, None, \"expression\") — for example if(None, None, \"user_type == 'VIP' or compras > 100\") evaluates a full Python-style boolean expression passed as a string. Mode 2 is more flexible but requires passing None as the first two arguments."
+ },
+ {
+ "id": "GD-V-005",
+ "category": "CONVERSATIONAL",
+ "question": "What happens if an error occurs in AVAP without a try block?",
+ "ground_truth": "Without a try block, any unhandled exception stops script execution immediately and the server returns a 400 Bad Request error with the error message in the response body. The remaining commands in the script are not executed. With a try block, the error is caught by exception(), the script continues running, and you can handle the error gracefully — for example by setting _status to 500 and returning a structured error message."
+ },
+ {
+ "id": "GD-V-006",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again how the timeout in RequestGet works?",
+ "ground_truth": "The timeout parameter in RequestGet and RequestPost is specified in milliseconds. If the external server does not respond within that time, the request is aborted and the destination variable receives None instead of a response. This prevents the AVAP thread from blocking indefinitely on a slow or unavailable external service. You should always check if the result variable is None after a request to handle timeout cases gracefully."
+ },
+ {
+ "id": "GD-V-007",
+ "category": "CONVERSATIONAL",
+ "question": "Can I iterate over a list of items in AVAP instead of a numeric range?",
+ "ground_truth": "Yes, but AVAP loops are always numeric — startLoop uses a start and end integer. To iterate over a list, combine getListLen to get the total count, use that count as the loop boundary, and inside the loop use the index variable to access each element. Example: getListLen(mi_lista, total) then startLoop(i, 0, total) with list access inside. Lists are zero-indexed so the index starts at 0."
+ },
+ {
+ "id": "GD-V-008",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between RequestGet and RequestPost in practice?",
+ "ground_truth": "RequestGet sends an HTTP GET request — used for retrieving data, with parameters passed as query string. RequestPost sends an HTTP POST request — used for submitting data, with a body payload that can be JSON or form data. Both require a timeout parameter in milliseconds and store the response in a destination variable. Both return None in the destination variable if the request times out. The key structural difference is that RequestPost includes a body parameter while RequestGet does not."
+ },
+ {
+ "id": "GD-V-009",
+ "category": "CONVERSATIONAL",
+ "question": "Goroutines cannot access Main Local Scope — can you give a practical example of why that matters?",
+ "ground_truth": "If you declare a variable in the main flow and launch a goroutine, the goroutine cannot read that variable. For example if you do addVar(counter, 0) in the main flow and then call go myFunction(), the function myFunction cannot access counter — it would get a runtime error. To share data with goroutines you must either pass the value as a function parameter, or declare the variable in Global Scope. This isolation prevents race conditions between concurrent goroutines and the main flow."
+ },
+ {
+ "id": "GD-V-010",
+ "category": "CONVERSATIONAL",
+ "question": "What format does encodeSHA256 return its output in?",
+ "ground_truth": "encodeSHA256 always returns a 64-character lowercase hexadecimal string. This is the standard SHA-256 digest representation — 256 bits expressed as 64 hex characters (0-9 and a-f). The output is deterministic — the same input always produces the same hash — which is why SHA-256 is used for integrity verification rather than for generating unique identifiers."
}
-]
+]
\ No newline at end of file
diff --git a/Docker/src/golden_dataset_v1.json b/Docker/src/golden_dataset_v1.json
new file mode 100644
index 0000000..e92d6da
--- /dev/null
+++ b/Docker/src/golden_dataset_v1.json
@@ -0,0 +1,32 @@
+[
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "ground_truth": "AVAP (Advanced Virtual API Programming) is a Turing-complete Domain-Specific Language (DSL) architecturally designed for the secure, concurrent, and deterministic orchestration of microservices and HTTP I/O. It is not a general-purpose language; its hybrid engine and strict grammar are optimized for fast processing of HTTP transactions, in-memory data manipulation, and interaction with external connectors. AVAP does not have internal print commands — all data output is performed through the HTTP interface using commands like addResult()."
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "ground_truth": "AVAP uses a mixed structural grammar for conditional logic, combining keyword fluidity with strict mathematical closures. The if() / else() / end() structure evaluates a logical or comparison expression. Every conditional block requires a mandatory end() closing statement. The if() command compares two values using a comparator operator (e.g., '==', '!=', '>', '<', '>=', '<='). An optional else() block handles the false branch. Example: if(saldo, 0, \">\") executes the true branch when the variable 'saldo' is greater than zero, otherwise the else() block runs, and end() closes the structure."
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "ground_truth": "The following AVAP script reads a 'password' query parameter, hashes it using SHA-256 via encodeSHA256(), and exposes the result via addResult():\n\naddParam(\"password\", password)\nencodeSHA256(password, hashed_password)\naddResult(hashed_password)\n\nKey commands used:\n- addParam(\"password\", password): reads the 'password' HTTP parameter into the variable 'password'.\n- encodeSHA256(password, hashed_password): computes the SHA-256 hash of the input and stores the 64-character hex digest in 'hashed_password'.\n- addResult(hashed_password): adds 'hashed_password' to the HTTP JSON response body."
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "ground_truth": "The following AVAP script iterates from 1 to 5 using startLoop/endLoop, dynamically builds a JSON object using AddvariableToJSON() on each iteration, and returns the result:\n\naddVar(mi_json, \"{}\")\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddvariableToJSON(item, \"valor_generado\", mi_json)\nendLoop()\naddResult(mi_json)\n\nKey commands used:\n- addVar(mi_json, \"{}\"): initializes an empty JSON object.\n- startLoop(i, 1, 5) / endLoop(): iterates the variable 'i' from 1 to 5 inclusive.\n- AddvariableToJSON(item, \"valor_generado\", mi_json): inserts each generated key-value pair into the JSON object.\n- addResult(mi_json): exposes the final JSON in the HTTP response."
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "ground_truth": "AVAP provides two commands for making external HTTP calls: RequestPost and RequestGet. To avoid blocking threads due to network latency, AVAP requires a mandatory timeout parameter (in milliseconds) for both commands. If the timeout is exceeded, the destination variable receives None. RequestPost(url, querystring, headers, body, destino, timeout) executes an HTTP POST and stores the response in 'destino'. RequestGet(url, querystring, headers, destino, timeout) executes an HTTP GET similarly. Both commands are part of AVAP's Section V (Third-Party Connectors and External HTTP Requests) and allow calling external APIs without additional drivers."
+ }
+]
diff --git a/Docker/src/golden_dataset_v2.json b/Docker/src/golden_dataset_v2.json
deleted file mode 100644
index 148c8c8..0000000
--- a/Docker/src/golden_dataset_v2.json
+++ /dev/null
@@ -1,302 +0,0 @@
-[
- {
- "id": "GD-R-001",
- "category": "RETRIEVAL",
- "question": "What is AVAP and what is it designed for?",
- "ground_truth": "AVAP is a Turing-complete Domain-Specific Language (DSL) architecturally designed for the secure, concurrent, and deterministic orchestration of microservices and HTTP I/O. It is not a general-purpose language. Its hybrid engine and strict grammar are optimized for fast HTTP transaction processing, in-memory data manipulation, and interaction with external connectors. AVAP has no internal print commands — all data output is performed through the HTTP interface using addResult()."
- },
- {
- "id": "GD-R-002",
- "category": "RETRIEVAL",
- "question": "How does the if() conditional block work in AVAP? How are blocks closed?",
- "ground_truth": "AVAP uses if() / else() / end() for conditional logic. The if() command evaluates a comparison between two values using a comparator operator (==, !=, >, <, >=, <=, in). Every conditional block must be closed with end(). The else() block is optional and handles the false branch. Example: if(saldo, 0, \">\") executes the true branch when saldo is greater than zero, otherwise the else() block runs, and end() closes the structure. AVAP also supports a mode 2 where a full Python-style expression is passed as a string: if(None, None, \"user_type == 'VIP' or compras > 100\")."
- },
- {
- "id": "GD-R-003",
- "category": "RETRIEVAL",
- "question": "How does AVAP handle external HTTP calls? What commands are available and how is timeout managed?",
- "ground_truth": "AVAP provides RequestGet and RequestPost for external HTTP calls. To avoid blocking threads due to network latency, AVAP requires a mandatory timeout parameter in milliseconds. If the timeout is exceeded, the destination variable receives None. RequestPost(url, querystring, headers, body, destino, timeout) executes an HTTP POST storing the response in destino. RequestGet(url, querystring, headers, destino, timeout) executes an HTTP GET. Both commands allow calling external APIs without additional drivers."
- },
- {
- "id": "GD-R-004",
- "category": "RETRIEVAL",
- "question": "How do functions work in AVAP? What is the scope of variables inside a function?",
- "ground_truth": "Functions in AVAP are hermetic memory enclosures. When entering a function, AVAP creates a new dictionary of local variables isolated from the global context. The return() command acts as a flow switch: it injects the calculated value to the caller and releases local memory. If used inside a startLoop, it also breaks the iteration. Variables declared inside a function are only visible within that function — they are not accessible from the main flow or other functions. AVAP has three scope types: Global Scope, Main Local Scope, and Function Scope."
- },
- {
- "id": "GD-R-005",
- "category": "RETRIEVAL",
- "question": "What are the three types of variable scopes in AVAP and what are their visibility rules?",
- "ground_truth": "AVAP uses three scope types: Global Scope contains globally declared variables, accessible from anywhere in the program and persists for the entire interpreter process lifetime. Main Local Scope contains variables declared in the main flow — accessible within the main flow but not from functions or goroutines, and disappears when script execution ends. Function Scope is created independently for each function invocation and contains function parameters and locally created variables — only visible within that function, not from outside, and is destroyed when the function terminates. If a variable does not exist in the visible scopes, the engine produces a runtime error."
- },
- {
- "id": "GD-R-006",
- "category": "RETRIEVAL",
- "question": "How does concurrency work in AVAP? What are goroutines and how are they launched?",
- "ground_truth": "AVAP implements an advanced system based on lightweight threads (goroutines), allowing the server to process long I/O operations without blocking the main thread. The go command launches a goroutine: identifier = go function_name(parameters). It creates a new isolated execution context and returns a unique identifier. Goroutines follow the same scope rules as normal functions — they can access Global Scope and their own Function Scope, but cannot access the Main Local Scope. The gather command is used to collect results from goroutines."
- },
- {
- "id": "GD-R-007",
- "category": "RETRIEVAL",
- "question": "What is the addParam command and how does it capture HTTP request parameters?",
- "ground_truth": "addParam captures input parameters from HTTP requests (URL query parameters, request body, or form data) and assigns them to a variable. Syntax: addParam(\"paramName\", targetVar). It reads the value of paramName from the incoming HTTP request and stores it in targetVar. If the parameter is not present in the request, the variable receives None. It is the primary mechanism for reading external input in AVAP since the language has no direct access to the request object."
- },
- {
- "id": "GD-R-008",
- "category": "RETRIEVAL",
- "question": "How does the startLoop / endLoop construct work in AVAP?",
- "ground_truth": "startLoop and endLoop define iteration blocks in AVAP. Syntax: startLoop(varName, from, to) where varName is the loop counter, from is the start value, and to is the end value inclusive. The loop counter increments by 1 on each iteration. endLoop() closes the block. Example: startLoop(i, 1, 10) iterates i from 1 to 10. Variables modified inside the loop are accessible after endLoop. To exit a loop early, you can set the counter variable beyond the end value (e.g. i = 11 inside a loop that goes to 10)."
- },
- {
- "id": "GD-R-009",
- "category": "RETRIEVAL",
- "question": "What is the addResult command and how does it build the HTTP response?",
- "ground_truth": "addResult adds a variable to the HTTP JSON response body. Syntax: addResult(varName). Each call to addResult adds one key-value pair to the response object where the key is the variable name and the value is its current value. AVAP has no internal print commands — addResult is the only way to expose data to the caller. Multiple addResult calls build up a JSON object with multiple fields. The HTTP status code is set separately via the _status variable."
- },
- {
- "id": "GD-R-010",
- "category": "RETRIEVAL",
- "question": "How does error handling work in AVAP with try() and exception()?",
- "ground_truth": "AVAP uses try() / exception() / end() for error handling. The try() block wraps code that may fail. If an exception occurs inside the try block, execution jumps to the exception() block instead of halting. exception(errorVar) captures the error message into errorVar. The end() command closes the structure. Without a try block, any unhandled exception stops script execution and returns a 400 error. With a try block, you can handle the error gracefully — for example by setting _status to 500 and returning a structured error message."
- },
- {
- "id": "GD-R-011",
- "category": "RETRIEVAL",
- "question": "What is the replace() command in AVAP and how is it used?",
- "ground_truth": "The replace() command performs string substitution in AVAP. Syntax: replace(sourceString, searchValue, replaceValue, targetVar). It replaces all occurrences of searchValue in sourceString with replaceValue and stores the result in targetVar. Example: replace(\"REF_1234_OLD\", \"OLD\", \"NEW\", ref_actualizada) stores \"REF_1234_NEW\" in ref_actualizada. The source can be a literal string or a variable name. The command does not modify the original variable — it always writes to targetVar."
- },
- {
- "id": "GD-R-012",
- "category": "RETRIEVAL",
- "question": "What are the reserved keywords in AVAP that cannot be used as identifiers?",
- "ground_truth": "AVAP has the following reserved keywords that cannot be used as variable or function names: Control flow — if, else, end, startLoop, endLoop, try, exception, return. Function declaration — function. Concurrency — go, gather. Modularity — include, import. Logical operators — and, or, not, in, is. Literals — True, False, None. Using any of these as an identifier will cause a lexer or parser error."
- },
- {
- "id": "GD-R-013",
- "category": "RETRIEVAL",
- "question": "How does AVAP handle string formatting and concatenation?",
- "ground_truth": "AVAP supports two main string operations. Concatenation uses the + operator: result = \"Hello, \" + name produces a concatenated string. String formatting uses Python-style % operator: log = \"Evento registrado por: %s\" % nombre substitutes the variable value into the format string. Strings support single and double quotes. Escape sequences supported include \\n (newline), \\t (tab), \\r (carriage return), \\\" (double quote), \\' (single quote), and \\\\ (backslash). Note that \\n inside a string is a data character, not a statement terminator — the physical EOL is the only statement terminator in AVAP."
- },
- {
- "id": "GD-R-014",
- "category": "RETRIEVAL",
- "question": "How does the encodeSHA256 command work in AVAP?",
- "ground_truth": "encodeSHA256 computes the SHA-256 hash of an input value and stores the result in a destination variable. Syntax: encodeSHA256(inputValue, destVar). The result is a 64-character lowercase hexadecimal string representing the SHA-256 digest. Example: encodeSHA256(\"payload_data\", checksum) stores the hash of the string \"payload_data\" into the variable checksum. The input can be a string literal or a variable. It is commonly used for integrity verification, password hashing, and generating checksums."
- },
- {
- "id": "GD-R-015",
- "category": "RETRIEVAL",
- "question": "How does AVAP handle date and time operations?",
- "ground_truth": "AVAP provides two date/time commands. getDateTime(format, offsetSeconds, timezone, destVar) gets the current date/time, optionally applying an offset in seconds and converting to the specified timezone. Example: getDateTime(\"%Y-%m-%d %H:%M:%S\", 0, \"Europe/Madrid\", sql_date) stores the current Madrid time formatted for SQL. getDateTime(\"\", 86400, \"UTC\", expira) gets the current UTC time plus 86400 seconds (1 day ahead), useful for expiration timestamps. stampToDatetime(unixTimestamp, format, offset, destVar) converts a Unix timestamp to a human-readable string. Example: stampToDatetime(1708726162, \"%d/%m/%Y\", 0, fecha_human)."
- },
- {
- "id": "GD-R-016",
- "category": "RETRIEVAL",
- "question": "What is the AddvariableToJSON command and how is it used to build JSON objects?",
- "ground_truth": "AddvariableToJSON inserts a key-value pair into an existing JSON object variable. Syntax: AddvariableToJSON(key, value, jsonVar). The key can be a string literal or a variable. The value can be a string, number, or variable. The jsonVar must be an already-declared variable typically initialized as \"{}\" via addVar. Example: addVar(mi_json, \"{}\") then AddvariableToJSON(\"status\", \"ok\", mi_json) adds the key \"status\" with value \"ok\" to mi_json. It is commonly used inside loops to build dynamic JSON objects iteratively."
- },
- {
- "id": "GD-R-017",
- "category": "RETRIEVAL",
- "question": "How does the getListLen command work and what is it used for?",
- "ground_truth": "getListLen retrieves the length of a list variable and stores it in a destination variable. Syntax: getListLen(listVar, destVar). Example: getListLen(registros, total) stores the number of elements in registros into total. It is commonly used before a startLoop to set the upper bound of iteration, enabling dynamic loops that adapt to the actual size of the data. Example pattern: getListLen(mi_lista, cantidad) followed by startLoop(i, 0, cantidad) to iterate over all elements."
- },
- {
- "id": "GD-R-018",
- "category": "RETRIEVAL",
- "question": "How does the randomString command work in AVAP?",
- "ground_truth": "randomString generates a random string of a specified length using a character pattern. Syntax: randomString(pattern, length, destVar). The pattern is a regex-style character class defining which characters to use. Example: randomString(\"[A-Z]\\d\", 32, token_seguridad) generates a 32-character random string using uppercase letters and digits. Another example: randomString(\"[a-zA-Z0-9]\", 16, token) generates a 16-character alphanumeric token. It is commonly used for generating secure tokens, session identifiers, and temporary passwords."
- },
- {
- "id": "GD-R-019",
- "category": "RETRIEVAL",
- "question": "What is the $ dereference operator in AVAP and when is it used?",
- "ground_truth": "The $ operator in AVAP is the dereference operator, used to access the value of a variable by reference at assignment time. Syntax: addVar(copia, $original) copies the current value of original into copia. The token is defined as DEREF in the lexer. It is used when you need to capture the current value of a variable into another variable, particularly useful when a variable may change later and you need to preserve its value at a specific point in execution."
- },
- {
- "id": "GD-R-020",
- "category": "RETRIEVAL",
- "question": "How does AVAP handle ORM database operations? What commands are available?",
- "ground_truth": "AVAP provides native ORM commands for database operations without requiring additional drivers. ormCheckTable(tableName, resultVar) checks if a table exists storing True or False in resultVar. ormCreateTable(columns, types, tableName, resultVar) creates a new table with the specified column names and types. ormDirect(query, resultVar) executes a raw SQL query directly. ormAccessSelect executes SELECT queries and ormAccessInsert executes INSERT operations. avapConnector is used to initialize the database connection. The connector and ORM commands are distinguished only by context — the UUID passed as argument determines whether the adapter resolves as a database ORM or a third-party service proxy."
- },
- {
- "id": "GD-C-001",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that reads a 'name' parameter and returns a personalized greeting.",
- "ground_truth": "The following AVAP script reads a name parameter and returns a personalized greeting:\n\naddParam(\"name\", name)\nresult = \"Hello, \" + name\naddResult(result)\n\nKey commands: addParam reads the HTTP parameter 'name' into variable name. The + operator concatenates the greeting string with the name. addResult exposes result in the JSON response."
- },
- {
- "id": "GD-C-002",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash, and returns it.",
- "ground_truth": "The following AVAP script hashes a password parameter using SHA-256:\n\naddParam(\"password\", password)\nencodeSHA256(password, hashed_password)\naddResult(hashed_password)\n\nKey commands: addParam reads the 'password' HTTP parameter. encodeSHA256 computes the SHA-256 hash and stores the 64-character hex digest in hashed_password. addResult exposes the hash in the JSON response."
- },
- {
- "id": "GD-C-003",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that loops from 1 to 5, builds a JSON object with each index as a key, and returns it.",
- "ground_truth": "The following AVAP script builds a JSON object iteratively:\n\naddVar(mi_json, \"{}\")\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddvariableToJSON(item, \"valor_generado\", mi_json)\nendLoop()\naddResult(mi_json)\n\nKey commands: addVar initializes an empty JSON object. startLoop iterates i from 1 to 5 inclusive. The % operator formats the key name dynamically. AddvariableToJSON inserts each key-value pair into mi_json. addResult exposes the final object."
- },
- {
- "id": "GD-C-004",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that validates if a 'role' parameter belongs to a list of allowed roles and returns the access result.",
- "ground_truth": "The following AVAP script validates role membership:\n\naddParam(\"rol\", r)\nif(r, [\"admin\", \"editor\", \"root\"], \"in\")\n acceso = True\nelse()\n acceso = False\nend()\naddResult(acceso)\n\nKey commands: addParam reads the 'rol' parameter. The if() with \"in\" comparator checks list membership directly against a list literal. else() handles the false branch. end() closes the conditional block. addResult exposes the boolean result."
- },
- {
- "id": "GD-C-005",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that makes a GET request to an external API and handles connection errors.",
- "ground_truth": "The following AVAP script performs a GET request with error handling:\n\ntry()\n RequestGet(\"https://api.test.com/data\", 0, 0, respuesta)\nexception(e)\n addVar(error_trace, \"Fallo de conexion: %s\" % e)\n addResult(error_trace)\nend()\naddResult(respuesta)\n\nKey commands: try() wraps the potentially failing operation. RequestGet fetches the URL storing the response in respuesta. exception(e) captures any error message. The % operator formats the error string. addResult exposes either the response or the error."
- },
- {
- "id": "GD-C-006",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP function that takes two numbers and returns their sum, then call it and return the result.",
- "ground_truth": "The following AVAP script defines and calls a sum function:\n\nfunction suma(a, b){\n total = a + b\n return(total)\n}\nresultado = suma(10, 20)\naddResult(resultado)\n\nKey commands: function declares a named function with parameters a and b. The + operator adds the values. return() sends the result back to the caller and releases the function scope. The function is called with literal values 10 and 20. addResult exposes the result."
- },
- {
- "id": "GD-C-007",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that reads a 'subtotal' parameter, computes 21% VAT, and returns the total.",
- "ground_truth": "The following AVAP script calculates the total with VAT:\n\naddParam(\"subtotal\", subtotal)\niva = subtotal * 0.21\ntotal = subtotal + iva\naddResult(total)\n\nKey commands: addParam reads the subtotal from the HTTP request. The * operator multiplies by the tax rate 0.21. The + operator adds subtotal and iva. addResult exposes the final total. AVAP supports float arithmetic natively."
- },
- {
- "id": "GD-C-008",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that reads an 'api_key' parameter and returns status 403 if it is null.",
- "ground_truth": "The following AVAP script validates that an API key is present:\n\naddParam(\"api_key\", key)\nif(key, None, \"==\")\n addVar(_status, 403)\n addVar(error, \"Acceso denegado: falta API KEY\")\n addResult(error)\nend()\n\nKey commands: addParam reads the api_key parameter — if not present it will be None. The if() with \"==\" and None checks for null. addVar sets _status to 403 which becomes the HTTP response code. addResult exposes the error message."
- },
- {
- "id": "GD-C-009",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that generates a 32-character random alphanumeric token and returns it.",
- "ground_truth": "The following AVAP script generates a secure random token:\n\nrandomString(\"[a-zA-Z0-9]\", 32, token_seguridad)\naddResult(token_seguridad)\n\nKey commands: randomString generates a random string using the character class [a-zA-Z0-9] at length 32 and stores it in token_seguridad. addResult exposes the token in the HTTP response."
- },
- {
- "id": "GD-C-010",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that reads a 'lang' parameter and returns 'Hola' if it is 'es' or 'Hello' if it is 'en'.",
- "ground_truth": "The following AVAP script returns a greeting based on language:\n\naddParam(\"lang\", l)\nif(l, \"es\", \"=\")\n addVar(msg, \"Hola\")\nelse()\n addVar(msg, \"Hello\")\nend()\naddResult(msg)\n\nKey commands: addParam reads the lang parameter into l. The if() with \"=\" comparator checks string equality. else() handles all other cases. addVar sets the message. addResult exposes the localized greeting."
- },
- {
- "id": "GD-C-011",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that checks if a database table exists and creates it if it does not.",
- "ground_truth": "The following AVAP script checks and creates a database table:\n\normCheckTable(tabla_pruebas, resultado_comprobacion)\nif(resultado_comprobacion, False, \"==\")\n ormCreateTable(\"username,age\", \"VARCHAR,INTEGER\", tabla_pruebas, resultado_creacion)\nend()\naddResult(resultado_comprobacion)\naddResult(resultado_creacion)\n\nKey commands: ormCheckTable checks if the table exists storing True or False. The if() block only executes if the check returned False. ormCreateTable creates the table with the specified columns and types. Both results are exposed via addResult."
- },
- {
- "id": "GD-C-012",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that gets the current UTC timestamp and adds 24 hours to compute an expiration time.",
- "ground_truth": "The following AVAP script computes an expiration timestamp 24 hours from now:\n\ngetDateTime(\"\", 86400, \"UTC\", expira)\naddResult(expira)\n\nKey commands: getDateTime with an empty format string returns a raw timestamp. The second parameter 86400 is the offset in seconds (60 * 60 * 24 = 86400 = 1 day). The timezone is set to UTC. The result is stored in expira and exposed via addResult."
- },
- {
- "id": "GD-C-013",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that receives a new password parameter, validates it is not equal to the old password, and returns a confirmation.",
- "ground_truth": "The following AVAP script validates a password change:\n\naddParam(\"password\", pass_nueva)\npass_antigua = \"password\"\nif(pass_nueva, pass_antigua, \"!=\")\n addVar(cambio, \"Contrasena actualizada\")\nend()\naddResult(cambio)\n\nKey commands: addParam reads the new password. The old password is assigned as a literal. The if() with \"!=\" comparator checks inequality. addVar sets the confirmation message only if passwords differ. addResult exposes the message."
- },
- {
- "id": "GD-C-014",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that reads a list parameter and returns its element count.",
- "ground_truth": "The following AVAP script reads a list parameter and returns its length:\n\naddParam(\"data_list\", mi_lista)\ngetListLen(mi_lista, cantidad)\naddResult(cantidad)\n\nKey commands: addParam reads the list from the HTTP request into mi_lista. getListLen computes the number of elements and stores it in cantidad. addResult exposes the count in the JSON response."
- },
- {
- "id": "GD-C-015",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that uses a validation function to check a token parameter and returns the authorization result.",
- "ground_truth": "The following AVAP script uses a function to validate a token:\n\nfunction es_valido(token){\n response = False\n if(token, \"SECRET\", \"=\")\n response = True\n end()\n return(response)\n}\naddParam(\"token\", t)\nautorizado = es_valido(t)\naddResult(autorizado)\n\nKey commands: function defines es_valido with a token parameter. response is initialized to False. The if() with \"=\" checks against the expected secret. return() sends the boolean back to the caller. addResult exposes the authorization result."
- },
- {
- "id": "GD-C-016",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that returns two values in the HTTP response: a status code 200 and a message 'Success'.",
- "ground_truth": "The following AVAP script returns multiple values in the HTTP response:\n\naddVar(_status, 200)\naddVar(status, \"Success\")\naddResult(status)\n\nOr returning both as JSON fields:\n\naddVar(code, 200)\naddVar(status, \"Success\")\naddResult(code)\naddResult(status)\n\nKey commands: _status is the special variable that sets the HTTP response status code. Multiple addResult calls build a JSON object with multiple fields in the response body."
- },
- {
- "id": "GD-C-017",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that reads a 'saldo' parameter and returns True if it is greater than zero, False otherwise.",
- "ground_truth": "The following AVAP script checks if a balance is positive:\n\naddParam(\"saldo\", saldo)\nif(saldo, 0, \">\")\n permitir = True\nelse()\n permitir = False\nend()\naddResult(permitir)\n\nKey commands: addParam reads the saldo parameter. The if() with \">\" comparator checks if saldo is greater than 0. else() handles the zero or negative case. end() closes the block. addResult exposes the boolean result."
- },
- {
- "id": "GD-C-018",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that converts a Unix timestamp parameter to a human-readable date in dd/mm/yyyy format.",
- "ground_truth": "The following AVAP script converts a Unix timestamp to a readable date:\n\naddParam(\"timestamp\", ts)\nstampToDatetime(ts, \"%d/%m/%Y\", 0, fecha_human)\naddResult(fecha_human)\n\nKey commands: addParam reads the timestamp from the HTTP request. stampToDatetime converts the Unix epoch integer to a formatted date string using \"%d/%m/%Y\" which produces day/month/year. The third parameter is a timezone offset in seconds. The result is stored in fecha_human and returned via addResult."
- },
- {
- "id": "GD-C-019",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that replaces all spaces in a string parameter with hyphens and returns the result.",
- "ground_truth": "The following AVAP script replaces spaces with hyphens:\n\naddParam(\"text\", input_text)\nreplace(input_text, \" \", \"-\", clean_text)\naddResult(clean_text)\n\nKey commands: addParam reads the text parameter. replace() substitutes all occurrences of space with hyphen in input_text and stores the result in clean_text. The original variable is not modified. addResult exposes the transformed string."
- },
- {
- "id": "GD-C-020",
- "category": "CODE_GENERATION",
- "question": "Write an AVAP script that uses try/exception to execute a raw SQL query and return status 500 on database errors.",
- "ground_truth": "The following AVAP script executes SQL with error handling:\n\ntry()\n ormDirect(\"UPDATE tabla SET col=1 WHERE id=1\", res)\nexception(e)\n addVar(_status, 500)\n addResult(\"Error de base de datos\")\nend()\naddResult(res)\n\nKey commands: try() wraps the database operation. ormDirect executes raw SQL storing the result in res. exception(e) catches any database error. addVar sets _status to 500 to signal a server error. The final addResult exposes the query result on success."
- },
- {
- "id": "GD-V-001",
- "category": "CONVERSATIONAL",
- "question": "Can you summarize what you just explained about AVAP scopes in fewer words?",
- "ground_truth": "AVAP has three scopes: Global (visible everywhere, lives for the whole process), Main Local (visible only in the main script flow, not inside functions), and Function (created per function call, destroyed when the function returns). Functions cannot see main flow variables, and the main flow cannot see function-internal variables."
- },
- {
- "id": "GD-V-002",
- "category": "CONVERSATIONAL",
- "question": "You mentioned that addResult builds the JSON response — can you clarify how multiple addResult calls work together?",
- "ground_truth": "Each addResult call adds one field to the JSON response object. The field name is the variable name passed to addResult and the field value is the current value of that variable. So calling addResult(code) and addResult(status) produces a JSON response like {\"code\": 200, \"status\": \"Success\"}. The fields are added in the order the addResult calls are executed during script execution."
- },
- {
- "id": "GD-V-003",
- "category": "CONVERSATIONAL",
- "question": "What is the difference between addVar and a plain assignment like x = 10 in AVAP?",
- "ground_truth": "Both addVar and direct assignment declare variables. addVar(varName, value) is the explicit command form — it supports intelligent value resolution checking if the value is an existing variable, a number, or a literal. Direct assignment x = 10 is syntactic sugar that works identically for simple cases. addVar is preferred for declaring new variables with explicit intent, while direct assignment is more natural for updating values or computed expressions."
- },
- {
- "id": "GD-V-004",
- "category": "CONVERSATIONAL",
- "question": "Can you explain again the difference between the two modes of the if() command?",
- "ground_truth": "Mode 1 is structured comparison: if(variable, value, comparator) — for example if(saldo, 0, \">\") directly compares the variable saldo against 0 using the > operator. Mode 2 is expression mode: if(None, None, \"expression\") — for example if(None, None, \"user_type == 'VIP' or compras > 100\") evaluates a full Python-style boolean expression passed as a string. Mode 2 is more flexible but requires passing None as the first two arguments."
- },
- {
- "id": "GD-V-005",
- "category": "CONVERSATIONAL",
- "question": "What happens if an error occurs in AVAP without a try block?",
- "ground_truth": "Without a try block, any unhandled exception stops script execution immediately and the server returns a 400 Bad Request error with the error message in the response body. The remaining commands in the script are not executed. With a try block, the error is caught by exception(), the script continues running, and you can handle the error gracefully — for example by setting _status to 500 and returning a structured error message."
- },
- {
- "id": "GD-V-006",
- "category": "CONVERSATIONAL",
- "question": "Can you explain again how the timeout in RequestGet works?",
- "ground_truth": "The timeout parameter in RequestGet and RequestPost is specified in milliseconds. If the external server does not respond within that time, the request is aborted and the destination variable receives None instead of a response. This prevents the AVAP thread from blocking indefinitely on a slow or unavailable external service. You should always check if the result variable is None after a request to handle timeout cases gracefully."
- },
- {
- "id": "GD-V-007",
- "category": "CONVERSATIONAL",
- "question": "Can I iterate over a list of items in AVAP instead of a numeric range?",
- "ground_truth": "Yes, but AVAP loops are always numeric — startLoop uses a start and end integer. To iterate over a list, combine getListLen to get the total count, use that count as the loop boundary, and inside the loop use the index variable to access each element. Example: getListLen(mi_lista, total) then startLoop(i, 0, total) with list access inside. Lists are zero-indexed so the index starts at 0."
- },
- {
- "id": "GD-V-008",
- "category": "CONVERSATIONAL",
- "question": "What is the difference between RequestGet and RequestPost in practice?",
- "ground_truth": "RequestGet sends an HTTP GET request — used for retrieving data, with parameters passed as query string. RequestPost sends an HTTP POST request — used for submitting data, with a body payload that can be JSON or form data. Both require a timeout parameter in milliseconds and store the response in a destination variable. Both return None in the destination variable if the request times out. The key structural difference is that RequestPost includes a body parameter while RequestGet does not."
- },
- {
- "id": "GD-V-009",
- "category": "CONVERSATIONAL",
- "question": "Goroutines cannot access Main Local Scope — can you give a practical example of why that matters?",
- "ground_truth": "If you declare a variable in the main flow and launch a goroutine, the goroutine cannot read that variable. For example if you do addVar(counter, 0) in the main flow and then call go myFunction(), the function myFunction cannot access counter — it would get a runtime error. To share data with goroutines you must either pass the value as a function parameter, or declare the variable in Global Scope. This isolation prevents race conditions between concurrent goroutines and the main flow."
- },
- {
- "id": "GD-V-010",
- "category": "CONVERSATIONAL",
- "question": "What format does encodeSHA256 return its output in?",
- "ground_truth": "encodeSHA256 always returns a 64-character lowercase hexadecimal string. This is the standard SHA-256 digest representation — 256 bits expressed as 64 hex characters (0-9 and a-f). The output is deterministic — the same input always produces the same hash — which is why SHA-256 is used for integrity verification rather than for generating unique identifiers."
- }
-]
\ No newline at end of file
diff --git a/output/candidate_A_reward_10_v1_parser.json b/output/candidate_A_reward_10_v1_parser.json
new file mode 100644
index 0000000..05ddea4
--- /dev/null
+++ b/output/candidate_A_reward_10_v1_parser.json
@@ -0,0 +1,145 @@
+[
+ {
+ "task_id": 1,
+ "text": "Crear un endpoint que reciba un parámetro 'mensaje' y lo devuelva con todas las vocales reemplazadas por asteriscos",
+ "code": "addParam(\"mensaje\", texto)\nreplace(texto, \"[aeiouAEIOU]\", \"*\", resultado)\naddResult(resultado)",
+ "test_inputs": {
+ "mensaje": "Hola mundo"
+ },
+ "test_list": [
+ "re.match(r'H\\*l\\* m\\*nd\\*', resultado)"
+ ],
+ "_detected": [
+ "addParam",
+ "addResult",
+ "replace"
+ ],
+ "_reward": {
+ "ecs": 0.079,
+ "novelty": 1.0,
+ "test_quality": 1.0,
+ "reward": 0.539,
+ "detected": [
+ "addParam",
+ "addResult",
+ "replace"
+ ]
+ }
+ },
+ {
+ "task_id": 2,
+ "text": "Crear un endpoint que reciba un parámetro 'password' y devuelva su hash MD5",
+ "code": "addParam(\"password\", entrada)\nencodeMD5(entrada, hash_resultado)\naddResult(hash_resultado)",
+ "test_inputs": {
+ "password": "test123"
+ },
+ "test_list": [
+ "re.match(r'^[a-f0-9]{32}$', hash_resultado)"
+ ],
+ "_detected": [
+ "addParam",
+ "addResult",
+ "encodeMD5"
+ ],
+ "_reward": {
+ "ecs": 0.079,
+ "novelty": 0.5,
+ "test_quality": 1.0,
+ "reward": 0.364,
+ "detected": [
+ "addParam",
+ "addResult",
+ "encodeMD5"
+ ]
+ }
+ },
+ {
+ "task_id": 3,
+ "text": "Crear un endpoint que reciba un parámetro 'password' y devuelva su hash SHA-256",
+ "code": "addParam(\"password\", entrada)\nencodeSHA256(entrada, hash_resultado)\naddResult(hash_resultado)",
+ "test_inputs": {
+ "password": "miPassword123"
+ },
+ "test_list": [
+ "re.match(r'^[a-f0-9]{64}$', hash_resultado)"
+ ],
+ "_detected": [
+ "addParam",
+ "addResult",
+ "encodeSHA256"
+ ],
+ "_reward": {
+ "ecs": 0.079,
+ "novelty": 0.5,
+ "test_quality": 1.0,
+ "reward": 0.364,
+ "detected": [
+ "addParam",
+ "addResult",
+ "encodeSHA256"
+ ]
+ }
+ },
+ {
+ "task_id": 4,
+ "text": "Crear un endpoint que reciba un parámetro 'nombre' y lo almacene en una variable usando addVar, luego devolver el nombre almacenado",
+ "code": "addParam(\"nombre\", entrada)\naddVar(resultado, entrada)\naddResult(resultado)",
+ "test_inputs": {
+ "nombre": "Juan"
+ },
+ "test_list": [
+ "re.match(r'^Juan$', resultado)",
+ "re.match(r'^\\w+$', resultado)"
+ ],
+ "_detected": [
+ "addParam",
+ "addResult",
+ "addVar"
+ ],
+ "_reward": {
+ "ecs": 0.079,
+ "novelty": 0.5,
+ "test_quality": 1.0,
+ "reward": 0.364,
+ "detected": [
+ "addParam",
+ "addResult",
+ "addVar"
+ ]
+ }
+ },
+ {
+ "task_id": 5,
+ "text": "Crear un endpoint que reciba un parámetro 'edad' y devuelva un mensaje personalizado. Si la edad es mayor o igual a 18, devuelve 'Adulto', sino devuelve 'Menor'",
+ "code": "addParam(\"edad\", edad_usuario)\nif(edad_usuario, 18, \">=\")\naddVar(mensaje, \"Adulto\")\nelse()\naddVar(mensaje, \"Menor\")\nend()\naddResult(mensaje)",
+ "test_inputs": {
+ "edad": "20"
+ },
+ "test_list": [
+ "re.match(r'Adulto', mensaje)",
+ "re.match(r'^(Adulto|Menor)$', mensaje)"
+ ],
+ "_detected": [
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "if_mode1"
+ ],
+ "_reward": {
+ "ecs": 0.158,
+ "novelty": 0.5,
+ "test_quality": 1.0,
+ "reward": 0.404,
+ "detected": [
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "if_mode1"
+ ]
+ }
+ }
+]
\ No newline at end of file
diff --git a/output/candidate_A_reward_10_v1_parser_reward_stats.json b/output/candidate_A_reward_10_v1_parser_reward_stats.json
new file mode 100644
index 0000000..8594bb1
--- /dev/null
+++ b/output/candidate_A_reward_10_v1_parser_reward_stats.json
@@ -0,0 +1,26 @@
+{
+ "mode": "reward",
+ "weights": {
+ "w_ecs": 0.5,
+ "w_novelty": 0.35,
+ "w_tests": 0.15
+ },
+ "dataset_size": 5,
+ "pool_size": 5,
+ "pool_summary": "GoldPool: 5/5 | reward: min=0.364 max=0.539 mean=0.407",
+ "distribution_entropy": 2.769,
+ "node_type_frequency": {
+ "addParam": 5,
+ "addResult": 5,
+ "replace": 1,
+ "encodeMD5": 1,
+ "encodeSHA256": 1,
+ "addVar": 2,
+ "else": 1,
+ "end": 1,
+ "if_mode1": 1
+ },
+ "covered_constructs": 9,
+ "total_constructs": 38,
+ "mean_reward": 0.407
+}
\ No newline at end of file
diff --git a/output/candidate_E_reward_10_v1_parser.json b/output/candidate_E_reward_10_v1_parser.json
new file mode 100644
index 0000000..bcb78d3
--- /dev/null
+++ b/output/candidate_E_reward_10_v1_parser.json
@@ -0,0 +1,287 @@
+[
+ {
+ "task_id": 1,
+ "text": "Crear un endpoint que valide credenciales de usuario: recibe username y password, genera hash SHA256 de la contraseña, consulta la base de datos para verificar las credenciales y devuelve la cantidad de usuarios encontrados junto con el estado de autenticación.",
+ "code": "addParam(\"username\", user_input)\naddParam(\"password\", pass_input)\nencodeSHA256(pass_input, hashed_pass)\normAccessSelect(\"*\", \"users\", \"username='\" + user_input + \"' AND password='\" + hashed_pass + \"'\", user_results)\ngetListLen(user_results, total_users)\nif(total_users, 0, \">\")\n_status = 200\naddVar(auth_status, \"success\")\nelse()\n_status = 401\naddVar(auth_status, \"failed\")\nend()\naddVar(user_count, total_users)\naddResult(auth_status)\naddResult(user_count)",
+ "test_inputs": {
+ "username": "admin",
+ "password": "secret123"
+ },
+ "test_list": [
+ "re.match(r'success|failed', auth_status)",
+ "re.match(r'\\d+', str(user_count))"
+ ],
+ "_cell": [
+ "encodeSHA256",
+ "getListLen",
+ "ormAccessSelect"
+ ],
+ "_quality": {
+ "fidelity": 1.0,
+ "bonus_ratio": 0.2,
+ "test_quality": 1.0,
+ "richness": 0.5,
+ "quality": 1.31,
+ "detected": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "encodeSHA256",
+ "end",
+ "getListLen",
+ "if_mode1",
+ "ormAccessSelect"
+ ],
+ "cell": [
+ "encodeSHA256",
+ "getListLen",
+ "ormAccessSelect"
+ ],
+ "extra": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "if_mode1"
+ ]
+ }
+ },
+ {
+ "task_id": 2,
+ "text": "Crear un sistema de autenticación que genere un hash SHA256 de la contraseña, ejecute una validación asíncrona del usuario y registre la fecha/hora del intento de login",
+ "code": "addParam(\"username\", username)\naddParam(\"password\", password)\nencodeSHA256(password, password_hash)\ntask_id = go validateUser(username, password_hash)\ngetDateTime(\"%Y-%m-%d %H:%M:%S\", 0, \"UTC\", login_time)\nresult = gather(task_id, 3000)\nif(result, None, \"!=\")\naddVar(_status, 200)\naddResult(username)\naddResult(login_time)\nelse()\naddVar(_status, 401)\nend()\n\nfunction validateUser(user, hash_pass)\n{\nif(user, \"admin\", \"==\")\nif(hash_pass, \"ef92b778bafe771e89245b89ecbc08a44a4e166c06659911881f383d4473e94f\", \"==\")\nreturn(\"valid\")\nelse()\nreturn(None)\nend()\nelse()\nreturn(None)\nend()\n}",
+ "test_inputs": {
+ "username": "admin",
+ "password": "secret123"
+ },
+ "test_list": [
+ "re.match(r'\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}', login_time)",
+ "re.match(r'[a-f0-9]{64}', password_hash)"
+ ],
+ "_cell": [
+ "encodeSHA256",
+ "gather",
+ "getDateTime"
+ ],
+ "_quality": {
+ "fidelity": 1.0,
+ "bonus_ratio": 0.257,
+ "test_quality": 1.0,
+ "richness": 0.833,
+ "quality": 1.36,
+ "detected": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "encodeSHA256",
+ "end",
+ "function",
+ "gather",
+ "getDateTime",
+ "if_mode1",
+ "return"
+ ],
+ "cell": [
+ "encodeSHA256",
+ "gather",
+ "getDateTime"
+ ],
+ "extra": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "function",
+ "if_mode1",
+ "return"
+ ]
+ }
+ },
+ {
+ "task_id": 3,
+ "text": "Crear un servicio que valide credenciales de usuario mediante hash SHA256, registre el timestamp de autenticación y extraiga datos del perfil JSON del usuario",
+ "code": "addParam(\"user_credentials\", userData)\naddParam(\"profile\", profileData)\nvariableFromJSON(userData, \"password\", rawPassword)\nvariableFromJSON(userData, \"username\", username)\nvariableFromJSON(profileData, \"email\", userEmail)\nencodeSHA256(rawPassword, hashedPassword)\ngetTimeStamp(\"2024-01-15 10:30:00\", \"%Y-%m-%d %H:%M:%S\", 0, loginTimestamp)\nif(hashedPassword, \"5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8\", \"==\")\n addVar(_status, 200)\n addResult(username)\n addResult(userEmail)\n addResult(loginTimestamp)\nelse()\n addVar(_status, 401)\nend()",
+ "test_inputs": {
+ "user_credentials": "{\"username\":\"john\", \"password\":\"password\"}",
+ "profile": "{\"email\":\"john@example.com\", \"role\":\"user\"}"
+ },
+ "test_list": [
+ "re.match(r'john', username)",
+ "re.match(r'\\d{10}', str(loginTimestamp))"
+ ],
+ "_cell": [
+ "encodeSHA256",
+ "getTimeStamp",
+ "variableFromJSON"
+ ],
+ "_quality": {
+ "fidelity": 1.0,
+ "bonus_ratio": 0.2,
+ "test_quality": 1.0,
+ "richness": 0.5,
+ "quality": 1.31,
+ "detected": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "encodeSHA256",
+ "end",
+ "getTimeStamp",
+ "if_mode1",
+ "variableFromJSON"
+ ],
+ "cell": [
+ "encodeSHA256",
+ "getTimeStamp",
+ "variableFromJSON"
+ ],
+ "extra": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "if_mode1"
+ ]
+ }
+ },
+ {
+ "task_id": 4,
+ "text": "Crear un endpoint que procese datos de usuario desde JSON, valide la longitud de una lista de elementos y establezca el código de respuesta HTTP apropiado según los resultados",
+ "code": "addParam(\"user_data\", json_data)\nvariableFromJSON(json_data, \"items\", user_items)\ngetListLen(user_items, items_count)\nif(items_count, 0, \">\")\n addVar(_status, 200)\n addResult(items_count)\nelse()\n addVar(_status, 400)\nend()",
+ "test_inputs": {
+ "user_data": "{\"items\": [\"producto1\", \"producto2\", \"producto3\"]}"
+ },
+ "test_list": [
+ "re.match(r'200', str(_status))",
+ "re.match(r'3', str(items_count))"
+ ],
+ "_cell": [
+ "_status",
+ "getListLen",
+ "variableFromJSON"
+ ],
+ "_quality": {
+ "fidelity": 1.0,
+ "bonus_ratio": 0.171,
+ "test_quality": 1.0,
+ "richness": 0.3,
+ "quality": 1.281,
+ "detected": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "getListLen",
+ "if_mode1",
+ "variableFromJSON"
+ ],
+ "cell": [
+ "_status",
+ "getListLen",
+ "variableFromJSON"
+ ],
+ "extra": [
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "if_mode1"
+ ]
+ }
+ },
+ {
+ "task_id": 5,
+ "text": "Escribe un microservicio que actualice el estado de una cuenta bancaria. Si el parámetro 'action' es 'freeze', cambia el estado a 'frozen'. Si es cualquier otra acción, cambia a 'active'. Usa una tabla llamada 'accounts' con campos id y status.",
+ "code": "addParam(\"account_id\", account_id)\naddParam(\"action\", action)\nif(action, \"freeze\", \"==\")\naddVar(new_status, \"frozen\")\nelse()\naddVar(new_status, \"active\")\nend()\normAccessUpdate([\"status\"], [new_status], \"accounts\", \"id = \" + account_id, update_result)\naddResult(update_result)\naddVar(_status, 200)",
+ "test_inputs": {
+ "account_id": "123",
+ "action": "freeze"
+ },
+ "test_list": [
+ "re.match(r'frozen', new_status)",
+ "re.match(r'200', str(_status))"
+ ],
+ "_cell": [
+ "else",
+ "end",
+ "ormAccessUpdate"
+ ],
+ "_quality": {
+ "fidelity": 1.0,
+ "bonus_ratio": 0.143,
+ "test_quality": 1.0,
+ "richness": 0.333,
+ "quality": 1.276,
+ "detected": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "if_mode1",
+ "ormAccessUpdate"
+ ],
+ "cell": [
+ "else",
+ "end",
+ "ormAccessUpdate"
+ ],
+ "extra": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "if_mode1"
+ ]
+ }
+ },
+ {
+ "task_id": 6,
+ "text": "Crea un endpoint que genere un token de sesión único. El sistema debe generar una cadena aleatoria de 16 caracteres alfanuméricos, calcular su hash MD5 para crear un identificador seguro, y devolver ambos valores en la respuesta JSON.",
+ "code": "randomString(\"[a-zA-Z0-9]\", 16, token)\nencodeMD5(token, token_hash)\naddResult(token)\naddResult(token_hash)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^[a-zA-Z0-9]{16}$', token)",
+ "re.match(r'^[a-f0-9]{32}$', token_hash)"
+ ],
+ "_cell": [
+ "addResult",
+ "encodeMD5",
+ "randomString"
+ ],
+ "_quality": {
+ "fidelity": 1.0,
+ "bonus_ratio": 0.0,
+ "test_quality": 1.0,
+ "richness": 0.133,
+ "quality": 1.213,
+ "detected": [
+ "addResult",
+ "encodeMD5",
+ "randomString"
+ ],
+ "cell": [
+ "addResult",
+ "encodeMD5",
+ "randomString"
+ ],
+ "extra": []
+ }
+ }
+]
\ No newline at end of file
diff --git a/output/candidate_E_reward_10_v1_parser_coverage_stats.json b/output/candidate_E_reward_10_v1_parser_coverage_stats.json
new file mode 100644
index 0000000..cd694a1
--- /dev/null
+++ b/output/candidate_E_reward_10_v1_parser_coverage_stats.json
@@ -0,0 +1,24 @@
+{
+ "total_cells": 9139,
+ "filled_cells": 6,
+ "fill_rate": 0.0007,
+ "distribution_entropy": 3.684,
+ "node_type_frequency": {
+ "ormAccessSelect": 1,
+ "encodeSHA256": 3,
+ "getListLen": 2,
+ "gather": 1,
+ "getDateTime": 1,
+ "variableFromJSON": 2,
+ "getTimeStamp": 1,
+ "_status": 1,
+ "ormAccessUpdate": 1,
+ "else": 1,
+ "end": 1,
+ "randomString": 1,
+ "addResult": 1,
+ "encodeMD5": 1
+ },
+ "low_quality_cells": 0,
+ "empty_cells": 9133
+}
\ No newline at end of file
diff --git a/output/candidate_F_reward_10_v1_parser.json b/output/candidate_F_reward_10_v1_parser.json
new file mode 100644
index 0000000..8aecb2e
--- /dev/null
+++ b/output/candidate_F_reward_10_v1_parser.json
@@ -0,0 +1,93 @@
+[
+ {
+ "task_id": 1,
+ "text": "Crear un microservicio que valide la edad de un usuario y determine si es mayor de edad. El servicio debe recibir un parámetro 'edad' y devolver un mensaje de estado apropiado.",
+ "code": "addParam(\"edad\", user_age)\naddVar(min_age, 18)\nif(user_age, min_age, \">=\")\n addVar(status_msg, \"Usuario mayor de edad\")\n addVar(_status, 200)\nelse()\n addVar(status_msg, \"Usuario menor de edad\")\n addVar(_status, 403)\nend()\naddResult(status_msg)",
+ "test_inputs": {
+ "edad": "25"
+ },
+ "test_list": [
+ "re.match(r'Usuario mayor de edad', status_msg)",
+ "re.match(r'200', str(_status))"
+ ],
+ "_cell": [
+ "addParam",
+ "addVar",
+ "if_mode1"
+ ],
+ "_prior_weight": 0.9278,
+ "_quality": {
+ "fidelity": 1.0,
+ "bonus_ratio": 0.114,
+ "test_quality": 1.0,
+ "richness": 0.333,
+ "quality": 1.268,
+ "detected": [
+ "_status",
+ "addParam",
+ "addResult",
+ "addVar",
+ "else",
+ "end",
+ "if_mode1"
+ ],
+ "cell": [
+ "addParam",
+ "addVar",
+ "if_mode1"
+ ],
+ "extra": [
+ "_status",
+ "addResult",
+ "else",
+ "end"
+ ]
+ }
+ },
+ {
+ "task_id": 2,
+ "text": "Crear un microservicio que procese un texto en segundo plano, reemplace caracteres especiales y retorne el resultado procesado",
+ "code": "addParam(\"texto\", input_text)\naddParam(\"timeout\", max_timeout)\nfunction procesarTexto(data) {\nreplace(data, \"@\", \"[AT]\", cleaned_data)\nreplace(cleaned_data, \"#\", \"[HASH]\", final_data)\nreturn(final_data)\n}\ntask_id = go procesarTexto(input_text)\nresult_data = gather(task_id, max_timeout)\naddResult(result_data)\n_status = 200",
+ "test_inputs": {
+ "texto": "usuario@dominio.com #hashtag",
+ "timeout": 5000
+ },
+ "test_list": [
+ "re.match(r'usuario\\[AT\\]dominio\\.com \\[HASH\\]hashtag', result_data)",
+ "re.match(r'200', str(_status))"
+ ],
+ "_cell": [
+ "gather",
+ "replace",
+ "return"
+ ],
+ "_prior_weight": 0.0848,
+ "_quality": {
+ "fidelity": 1.0,
+ "bonus_ratio": 0.114,
+ "test_quality": 1.0,
+ "richness": 0.367,
+ "quality": 1.271,
+ "detected": [
+ "_status",
+ "addParam",
+ "addResult",
+ "function",
+ "gather",
+ "replace",
+ "return"
+ ],
+ "cell": [
+ "gather",
+ "replace",
+ "return"
+ ],
+ "extra": [
+ "_status",
+ "addParam",
+ "addResult",
+ "function"
+ ]
+ }
+ }
+]
\ No newline at end of file
diff --git a/output/candidate_F_reward_10_v1_parser_coverage_stats.json b/output/candidate_F_reward_10_v1_parser_coverage_stats.json
new file mode 100644
index 0000000..aa4eda4
--- /dev/null
+++ b/output/candidate_F_reward_10_v1_parser_coverage_stats.json
@@ -0,0 +1,18 @@
+{
+ "total_cells": 9139,
+ "filled_cells": 2,
+ "fill_rate": 0.0002,
+ "distribution_entropy": 2.585,
+ "node_type_frequency": {
+ "addParam": 1,
+ "if_mode1": 1,
+ "addVar": 1,
+ "return": 1,
+ "replace": 1,
+ "gather": 1
+ },
+ "low_quality_cells": 0,
+ "empty_cells": 9137,
+ "kl_divergence_dataset_vs_prior": 0.562,
+ "prior_summary": "ConstructPrior: 4262 cells | mean=0.252 | epsilon=0.05 | github_files_analyzed=100 github_files_fetched=100 total_pair_cooccurrences=441 total_trio_cooccurrences=3821"
+}
\ No newline at end of file
diff --git a/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260401_140053.json b/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260401_140053.json
new file mode 100644
index 0000000..1465ea6
--- /dev/null
+++ b/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260401_140053.json
@@ -0,0 +1,373 @@
+{
+ "generated_at": "2026-04-01T14:00:53.629253",
+ "embedding_model": "bge-m3:latest",
+ "index_name": "avap-docs-test-v4-bge",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 50,
+ "elapsed_seconds": 432.5,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-docs-test-v4-bge",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.4283,
+ "answer_relevancy": 0.8046,
+ "context_recall": 0.4117,
+ "context_precision": 0.3532
+ },
+ "global_score": 0.4995,
+ "verdict": "INSUFFICIENT",
+ "details": [
+ {
+ "id": "GD-R-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP, or Advanced Virtual Application Protocol, is a new proprietary language. It appears to be used in the context of an application that involves adding variables, parameters, and results. The language seems to have specific syntax and structure, such as using `addVar`, `addParam`, and `addResult`",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-002",
+ "category": "RETRIEVAL",
+ "question": "How does the if() conditional block work in AVAP? How are blocks closed?",
+ "answer_preview": "In AVAP, a conditional block is enclosed using curly braces `{}`. The `if` statement checks a condition and executes code within an `if` block only if the condition evaluates to true.\n\nBlocks in AVAP are not explicitly closed with `end()` as they are in some other programming languages. Instead, the",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-003",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle external HTTP calls? What commands are available and how is timeout managed?",
+ "answer_preview": "AVAP handles external HTTP calls through the `RequestGet` function, which sends a GET request to the specified URL. The response from this call is stored in the variable `respuesta`.\n\nTimeout management in AVAP is handled by the `try...except` block. If an exception occurs during the execution of th",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-004",
+ "category": "RETRIEVAL",
+ "question": "How do functions work in AVAP? What is the scope of variables inside a function?",
+ "answer_preview": "In AVAP, functions are used to encapsulate code that performs specific tasks and can be reused throughout the program. Functions allow for modularity and maintainability by breaking down complex logic into smaller, more manageable pieces.\n\nThe scope of variables within a function in AVAP is determin",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-005",
+ "category": "RETRIEVAL",
+ "question": "What are the three types of variable scopes in AVAP and what are their visibility rules?",
+ "answer_preview": "In AVAP, there are three main types of variable scopes: local, global, and instance. The visibility rules for these scopes are as follows:\n\n- **Local Variables**: These variables are scoped to a specific function or block where they are declared. They can only be accessed within the same scope in wh",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-006",
+ "category": "RETRIEVAL",
+ "question": "How does concurrency work in AVAP? What are goroutines and how are they launched?",
+ "answer_preview": "Concurrency is a fundamental concept in AVAP that allows multiple tasks to be executed simultaneously. In AVAP, this is achieved through the use of goroutines.\n\nGoroutines are lightweight threads of execution that can run concurrently with other goroutines or within the same goroutine. They are mana",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-007",
+ "category": "RETRIEVAL",
+ "question": "What is the addParam command and how does it capture HTTP request parameters?",
+ "answer_preview": "The `addParam` command in this context is used to add key-value pairs to an HTTP request. It captures the specified parameters, which can include headers like \"lang\" or \"client_id\", as well as query string elements such as \"sells\". The parameters are added to the request using the `addParam` functio",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-008",
+ "category": "RETRIEVAL",
+ "question": "How does the startLoop / endLoop construct work in AVAP?",
+ "answer_preview": "The `startLoop` and `endLoop` constructs are used to iterate over a sequence of items or variables. In this context, they appear to be part of a loop structure that iterates through an index variable `i`, which is incremented by 1 each time the loop runs.\n\nHere's how it works:\n\n- The `startLoop(i, 1",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-009",
+ "category": "RETRIEVAL",
+ "question": "What is the addResult command and how does it build the HTTP response?",
+ "answer_preview": "The `addResult` command in this context is used to store a result variable within the function's return value. It takes two parameters: the name of the result variable (`msg`, `error_trace`, etc.) and the actual value that should be stored.\n\nIn the provided code, it appears to be building an HTTP re",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-010",
+ "category": "RETRIEVAL",
+ "question": "How does error handling work in AVAP with try() and exception()?",
+ "answer_preview": "In the provided context, error handling is implemented using the `try()` function followed by an `exception(e)` block. This structure allows for the execution of a potentially risky operation (indicated by the `try()` part) that may raise exceptions (`e`). The `exception(e)` block then captures and ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-011",
+ "category": "RETRIEVAL",
+ "question": "What is the replace() command in AVAP and how is it used?",
+ "answer_preview": "The `replace()` command in AVAP is used to perform a string replacement operation. It takes three parameters: the first parameter is the original string, the second parameter specifies what part of the string should be replaced (using a regular expression), and the third parameter is the new value t",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-012",
+ "category": "RETRIEVAL",
+ "question": "What are the reserved keywords in AVAP that cannot be used as identifiers?",
+ "answer_preview": "In AVAP, there are no explicitly mentioned reserved keywords that cannot be used as identifiers. The provided context does not contain any information about reserved keywords or their restrictions on use as identifiers. Therefore, based solely on the given context:\n\nAnswer: There are no specific res",
+ "n_chunks": 2
+ },
+ {
+ "id": "GD-R-013",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle string formatting and concatenation?",
+ "answer_preview": "AVAP uses the `addVar` function to dynamically generate strings. For example, in the first snippet:\n\n```plaintext\naddVar(mensaje, \"Hola mundo desde AVAP\")\n```\n\nThis line adds a variable named `mensaje` with the value `\"Hola mundo desde AVAP\"`. The `addResult` function then outputs this string.\n\nIn a",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-014",
+ "category": "RETRIEVAL",
+ "question": "How does the encodeSHA256 command work in AVAP?",
+ "answer_preview": "The `encodeSHA256` command in AVAP is used to generate a SHA-256 hash of a payload data string. This function takes two parameters: the payload data and a checksum, which are combined using the SHA-256 algorithm to produce a fixed-size output.\n\nHere's how it works:\n\n1. The `encodeSHA256` command acc",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-015",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle date and time operations?",
+ "answer_preview": "AVAP handles date and time operations using the `getDateTime` function. This function takes three parameters: a string for the language (defaulting to English), an integer representing the number of days to add or subtract, and a string specifying the timezone. The function returns the current date ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-016",
+ "category": "RETRIEVAL",
+ "question": "What is the AddvariableToJSON command and how is it used to build JSON objects?",
+ "answer_preview": "The `AddvariableToJSON` command in this context is a function that takes three parameters: an identifier (`clave`), a variable name, and a value. It appends the given value to the specified variable within a JSON object.\n\nFor example:\n- The first line of code uses `AddvariableToJSON(clave, \"cliente_",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-017",
+ "category": "RETRIEVAL",
+ "question": "How does the getListLen command work and what is it used for?",
+ "answer_preview": "The getListLen function calculates the length of a given list. In this context, it appears to be used with two lists:\n\n1. `mi_lista` in the first snippet.\n2. `registros` in the second snippet.\n\nIt returns the number of elements in these lists. This could be useful for determining how many items are ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-018",
+ "category": "RETRIEVAL",
+ "question": "How does the randomString command work in AVAP?",
+ "answer_preview": "The `randomString` command generates a random string of characters based on the specified parameters. In this case, it creates a 32-character long string consisting of uppercase letters and digits.\n\nHere is the relevant code snippet from the context:\n\n```plaintext\n[2] randomString(\"[A-Z]\\d\", 32, tok",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-019",
+ "category": "RETRIEVAL",
+ "question": "What is the $ dereference operator in AVAP and when is it used?",
+ "answer_preview": "The $ dereference operator in AVAP is used to access elements of an array or list. It is typically followed by a variable name enclosed in square brackets, indicating that the value should be retrieved from the corresponding element in the array.\n\nFor example:\n```avap\n$[arrayName[index]]\n```\nThis ex",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-020",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle ORM database operations? What commands are available?",
+ "answer_preview": "AVAP uses the `ormCheckTable` function to check if a table exists in the database. If the table does not exist, it creates the table using the `ormCreateTable` command.\n\nThe available commands for ORM database operations include:\n- `ormCheckTable(tabla_pruebas, resultado_comprobacion)`: Checks if a ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-001",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'name' parameter and returns a personalized greeting.",
+ "answer_preview": "```avap\nregisterEndpoint(\"/greet\",\"GET\",[],\"HELLO_WORLD\",main,result)\n\naddVar(name, \"Alberto\")\nresult = \"Hello,\" + name \naddResult(result)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-002",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash, and returns it.",
+ "answer_preview": "```avap\nfunction generateSha256Hash(password) {\n addVar(hash, encodeSHA256(password))\n return(hash)\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that loops from 1 to 5, builds a JSON object with each index as a key, and returns it.",
+ "answer_preview": "```avap\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddvariableToJSON(item,'valor_generado',mi_json)\nendLoop()\naddResult(mi_json)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-004",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that validates if a 'role' parameter belongs to a list of allowed roles and returns the access result.",
+ "answer_preview": "```avap\nacceso = False\n\nif(None, None, \"r == 'admin' or r == 'editor' or r == 'root'\")\n acceso = True\nend()\n\naddResult(acceso)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-005",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that makes a GET request to an external API and handles connection errors.",
+ "answer_preview": "```avap\ntry()\n addVar(_status, 500)\n addVar(error_msg, \"Error de conexión\")\n addResult(error_msg)\nend()\n\naddVar(url, \"https://api.example.com/data\")\naddVar(headers, {\"Content-Type\": \"application/json\"})\naddVar(body, \"{}\")\n\nif(!url || !headers || !body) {\n addVar(_status, 400)\n addVar(",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-006",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP function that takes two numbers and returns their sum, then call it and return the result.",
+ "answer_preview": "```avap\nfunction suma(a, b){\n total = a + b\n return(total)\n}\nresultado = suma(10, 20)\naddResult(resultado)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-007",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'subtotal' parameter, computes 21% VAT, and returns the total.",
+ "answer_preview": "```avap\n[1] subtotal = readParam(\"subtotal\")\niva = subtotal * 0.21\ntotal = subtotal + iva\naddResult(total)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-008",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads an 'api_key' parameter and returns status 403 if it is null.",
+ "answer_preview": "```avap\naddParam(\"api_key\", key)\nif(key == None, None, \"==\")\n addVar(_status, 403)\n addVar(error, \"Acceso denegado: falta API KEY\")\n addResult(error)\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-009",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that generates a 32-character random alphanumeric token and returns it.",
+ "answer_preview": "```avap\ntoken = randomString(\"[A-Z]\\d\", 32, token_seguridad)\nreturn(token)\n```\n\nEvidence:\n- \"[1] randomString(\"[A-Z]\\d\", 32, token_seguridad)\"\n(Generated a 32-character random alphanumeric token)",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-010",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'lang' parameter and returns 'Hola' if it is 'es' or 'Hello' if it is 'en'.",
+ "answer_preview": "```avap\nifParam(\"lang\", l)\n if(l, \"es\", \"=\")\n addVar(msg, \"Hola\")\n end()\nend()\n\naddResult(msg)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-011",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that checks if a database table exists and creates it if it does not.",
+ "answer_preview": "```avap\nif(not db.exists(\"database_table\"))\n createTable(\"database_table\", \"column1\", \"type1\", \"nullable\")\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-012",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that gets the current UTC timestamp and adds 24 hours to compute an expiration time.",
+ "answer_preview": "```avap\ngetDateTime(\"\", 86400, \"UTC\", expira)\naddResult(expira)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-013",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that receives a new password parameter, validates it is not equal to the old password, and returns a confirmation.",
+ "answer_preview": "```avap\naddParam(\"password\", pass_nueva)\npass_antigua = \"password\"\nif(pass_nueva != pass_antigua)\n addVar(cambio, \"Contraseña actualizada\")\nend()\naddResult(cambio)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-014",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a list parameter and returns its element count.",
+ "answer_preview": "```avap\nfunction getListLen(param_name, total){\n addParam(\"param_name\", param_name)\n addResult(total)\n}\ngetListLen(\"data_list\", cantidad)\naddResult(cantidad)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-015",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses a validation function to check a token parameter and returns the authorization result.",
+ "answer_preview": "```avap\nfunction es_valido(token){\n response = False\n if(token, \"SECRET\", \"=\")\n response = True\n end()\n return(response)\n}\n\nautorizado = es_valido(\"SECRET\")\naddResult(autorizado)\n\nrandomString(\"[A-Z]\\d\", 32, token_seguridad)\naddResult(token_seguridad)\n\nfunction suma(a, b){\n tot",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-016",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that returns two values in the HTTP response: a status code 200 and a message 'Success'.",
+ "answer_preview": "```avap\naddVar(code, 200)\naddVar(status, \"Success\")\naddResult(code)\naddResult(status)\n\n// Example of adding more variables or results\naddVar(mensaje, \"Hola mundo desde AVAP\")\naddResult(mensaje)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-017",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'saldo' parameter and returns True if it is greater than zero, False otherwise.",
+ "answer_preview": "```avap\nifParam(\"sal_par\", saldo)\nif(saldo > 0)\n permitir = True\nelse()\n permitir = False\nend()\naddResult(permitir)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-018",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that converts a Unix timestamp parameter to a human-readable date in dd/mm/yyyy format.",
+ "answer_preview": "```avap\nstampToDatetime(1708726162, \"%d/%m/%Y\", 0, fecha_human)\naddResult(fecha_human)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-019",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that replaces all spaces in a string parameter with hyphens and returns the result.",
+ "answer_preview": "```avap\nfunction replaceSpacesWithHyphens(inputString) {\n let modifiedString = inputString.replace(/ /g, '-');\n return modifiedString;\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-020",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses try/exception to execute a raw SQL query and return status 500 on database errors.",
+ "answer_preview": "```avap\ntry()\n ormDirect(\"UPDATE table_inexistente SET a=1\", res)\nexception(e)\n addVar(_status, 500)\n addVar(error_msg, \"Error de base de datos\")\n addResult(error_msg)\nend()\n\naddVar(code, 200)\naddVar(status, \"Success\")\naddResult(code)\naddResult(status)\n\naddParam(\"api_key\", key)\nif(key, N",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-001",
+ "category": "CONVERSATIONAL",
+ "question": "Can you summarize what you just explained about AVAP scopes in fewer words?",
+ "answer_preview": "AVAP is a new proprietary language that uses variables to store and manipulate data. It includes functions like `addVar` for adding variables, `result` for storing results, and loops with `startLoop`. The context provided shows examples of how these functions are used, such as in the \"HELLO_WORLD\" e",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-002",
+ "category": "CONVERSATIONAL",
+ "question": "You mentioned that addResult builds the JSON response — can you clarify how multiple addResult calls work together?",
+ "answer_preview": "Certainly! The `addResult` function is used to build and return a JSON response. Each call to `addResult` adds an element to the JSON structure being constructed.\n\nFor example, in context [1], `addParam(\"client_id\", id_interno)` and `addResult(id_interno)` are part of a larger process where `id_inte",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-003",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between addVar and a plain assignment like x = 10 in AVAP?",
+ "answer_preview": "In AVAP, `addVar` is used to define variables that can be referenced later within the program. It allows for dynamic variable creation based on input or predefined values.\n\nA plain assignment like `x = 10` does not create a new variable; it simply assigns an initial value to an existing variable nam",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-004",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again the difference between the two modes of the if() command?",
+ "answer_preview": "The `if()` command in AVAP is used to execute a block of code based on a condition. The syntax for the `if()` command can be represented as follows:\n\n```avap\n[1] addParam(\"lang\", l)\nif(l, \"es\", \"=\")\n addVar(msg, \"Hola\")\nend()\naddResult(msg)\n\n[2] encontrado = False\nstartLoop(i, 1, 10)\n if(i, 5,",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-005",
+ "category": "CONVERSATIONAL",
+ "question": "What happens if an error occurs in AVAP without a try block?",
+ "answer_preview": "If an error occurs in AVAP without a try block, the following actions are taken:\n\n- An exception is raised.\n- The status code is set to 500 (Internal Server Error).\n- An error message indicating \"Error de base de datos\" is added to the result.\n\nThis indicates that there was an unexpected issue with ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-006",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again how the timeout in RequestGet works?",
+ "answer_preview": "The `RequestGet` function in AVAP is used to make a request to an API endpoint. The timeout parameter allows you to specify how long the function should wait before timing out if it hasn't received a response from the server within that period.\n\nIn this case, the `timeout` parameter is set to 0, whi",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-007",
+ "category": "CONVERSATIONAL",
+ "question": "Can I iterate over a list of items in AVAP instead of a numeric range?",
+ "answer_preview": "Yes, you can iterate over a list of items in AVAP. The provided code snippet shows an example where the loop variable `i` is used to access each item in the list by name. This demonstrates that lists are iterable and can be accessed using names rather than indices.\n\n```avap\nstartLoop(i, 1, 10)\n i",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-008",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between RequestGet and RequestPost in practice?",
+ "answer_preview": "The `RequestGet` function retrieves data from a specified URL, while the `RequestPost` function sends data to a specified URL. In this context, both functions are used to interact with an API endpoint.\n\n- **RequestGet**: This function is typically used when you want to retrieve information or data f",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-009",
+ "category": "CONVERSATIONAL",
+ "question": "Goroutines cannot access Main Local Scope — can you give a practical example of why that matters?",
+ "answer_preview": "In Go programming language, goroutines are lightweight threads of execution that run concurrently with the main thread. They do not have direct access to the global or local scope variables declared in the main function. This is because each goroutine runs within its own independent stack frame and ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-010",
+ "category": "CONVERSATIONAL",
+ "question": "What format does encodeSHA256 return its output in?",
+ "answer_preview": "The `encodeSHA256` function returns its output as a string.",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_104247.json b/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_104247.json
new file mode 100644
index 0000000..3ff72ac
--- /dev/null
+++ b/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_104247.json
@@ -0,0 +1,58 @@
+{
+ "generated_at": "2026-04-06T10:42:47.999309",
+ "embedding_model": "bge-m3:latest",
+ "index_name": "avap-docs-test-v4-bge",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 5,
+ "elapsed_seconds": 48.4,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-docs-test-v4-bge",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.5378,
+ "answer_relevancy": 0.6515,
+ "context_recall": 0.3143,
+ "context_precision": 0.2425
+ },
+ "global_score": 0.4365,
+ "verdict": "INSUFFICIENT",
+ "details": [
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP, or Advanced Virtual Application Protocol, is a new proprietary language. It is designed to facilitate communication and interaction between different systems and applications in an efficient manner. The provided context does not contain specific details about the purpose of AVAP beyond its nam",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "answer_preview": "AVAP uses the `if` statement to handle conditional logic. The condition is checked using a comparison operator, such as `==`, which compares two values for equality.\n\nBlocks in AVAP are typically closed with curly braces `{}`. For example:\n\n```avap\nif(condition, true_value, false_value)\n // code ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "answer_preview": "```avap\n[1] addParam(\"password\", pass_nueva)\npass_antigua = \"password\"\nif(pass_nueva, pass_antigua, \"!=\")\n addVar(cambio, \"Contraseña actualizada\")\nend()\naddResult(checksum)\n\nchecksum = encodeSHA256(\"payload_data\", checksum)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "answer_preview": "```avap\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddvariableToJSON(item,'valor_generado',mi_json)\nendLoop()\naddResult(mi_json)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "answer_preview": "AVAP supports external HTTP calls through the `RequestGet` function. This function allows you to make GET requests to a specified URL, with optional parameters such as headers or data.\n\nTimeout handling in AVAP is managed by the `addVar` command. When a variable named `timeout` is added using `addVa",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_105701.json b/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_105701.json
new file mode 100644
index 0000000..7329551
--- /dev/null
+++ b/output/evaluation_bge-m3-latest_avap-docs-test-v4-bge_20260406_105701.json
@@ -0,0 +1,58 @@
+{
+ "generated_at": "2026-04-06T10:57:01.947412",
+ "embedding_model": "bge-m3:latest",
+ "index_name": "avap-docs-test-v4-bge",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 5,
+ "elapsed_seconds": 48.5,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-docs-test-v4-bge",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.4956,
+ "answer_relevancy": 0.508,
+ "context_recall": 0.3143,
+ "context_precision": 0.2425
+ },
+ "global_score": 0.3901,
+ "verdict": "INSUFFICIENT",
+ "details": [
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP, or Advanced Virtual Application Protocol, is a new proprietary language. The provided context does not contain specific details about the purpose of AVAP but indicates that it involves adding variables to a system, setting parameters, calculating discounts based on user type and purchase histo",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "answer_preview": "AVAP uses the `if` statement to handle conditional logic. The condition is checked using a comparison operator, such as `==`, which compares two values for equality.\n\nBlocks in AVAP are typically closed with curly braces `{}`. For example:\n\n```avap\nif(condition, true_value, false_value)\n // code ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "answer_preview": "```avap\n[1] readParam(password)\nhash = encodeSHA256(password)\naddResult(hash)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "answer_preview": "```avap\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddvariableToJSON(item,'valor_generado',mi_json)\nendLoop()\naddResult(mi_json)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "answer_preview": "AVAP supports external HTTP calls through the `RequestGet` function. This function allows you to make GET requests to a specified URL, with optional parameters such as headers or data.\n\nTimeout handling in AVAP is managed by the `addVar` command. When a variable named `error_trace` is added using th",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_bge-m3-latest_avap-docs-test-v4_20260406_105524.json b/output/evaluation_bge-m3-latest_avap-docs-test-v4_20260406_105524.json
new file mode 100644
index 0000000..692e5e2
--- /dev/null
+++ b/output/evaluation_bge-m3-latest_avap-docs-test-v4_20260406_105524.json
@@ -0,0 +1,58 @@
+{
+ "generated_at": "2026-04-06T10:55:24.806873",
+ "embedding_model": "bge-m3:latest",
+ "index_name": "avap-docs-test-v4",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 5,
+ "elapsed_seconds": 51.5,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-docs-test-v4",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.381,
+ "answer_relevancy": 0.6576,
+ "context_recall": 0.5905,
+ "context_precision": 0.4411
+ },
+ "global_score": 0.5175,
+ "verdict": "INSUFFICIENT",
+ "details": [
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP, or Advanced Virtual Application Protocol, is a new proprietary language. It is designed to be used in the context of virtual applications, likely within software environments that support advanced scripting or programming capabilities.\n\nThe document provided does not contain specific informati",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "answer_preview": "AVAP handles conditional logic using the `if()` command, which can be invoked in two modes: **Mode 1 (Comparación estructurada)** and **Mode 2 (Expresión libre)**. The `if()` command requires a block to be enclosed within it for proper execution.\n\n### Mode 1: Comparación Estructurada\nThis mode is us",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "answer_preview": "```avap\nfunction generateSHA256Hash(passwordParam)\n encodeSHA256(passwordParam, \"hash_result\")\nend()\naddResult(hash_result)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "answer_preview": "```avap\n// Initialize the loop counter variable\ni = 1\n\n// Loop from 1 to 5\nwhile (i <= 5) {\n // Build a JSON object with the current value of i as the key\n jsonObj[i] = {\"key\": \"value\"}\n\n // Increment the loop counter\n i = i + 1\n}\n\n// Return the built JSON object\nreturn(jsonObj)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "answer_preview": "AVAP supports external HTTP calls through the `RequestPost` and `RequestGet` methods. These methods allow for making POST and GET requests, respectively.\n\nTimeout handling in AVAP involves a parameter called `timeout`. If this parameter exceeds its set value (in milliseconds), the variable destinati",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260401_135341.json b/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260401_135341.json
new file mode 100644
index 0000000..15d80c4
--- /dev/null
+++ b/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260401_135341.json
@@ -0,0 +1,373 @@
+{
+ "generated_at": "2026-04-01T13:53:41.169877",
+ "embedding_model": "bge-m3:latest",
+ "index_name": "avap-knowledge-v2-bge",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 50,
+ "elapsed_seconds": 447.0,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-knowledge-v2-bge",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.54,
+ "answer_relevancy": 0.8509,
+ "context_recall": 0.5068,
+ "context_precision": 0.5661
+ },
+ "global_score": 0.6159,
+ "verdict": "ACCEPTABLE",
+ "details": [
+ {
+ "id": "GD-R-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP (Advanced Virtual API Programming) is an Advanced Virtual API Programming language that is Turing Complete. It is designed to be used in the orquestation of microservices and I/O operations, with a focus on security, concurrency, determinism, and efficient processing of HTTP transactions, data ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-002",
+ "category": "RETRIEVAL",
+ "question": "How does the if() conditional block work in AVAP? How are blocks closed?",
+ "answer_preview": "In AVAP, the `if()` conditional block evaluates a logical or comparison expression. It requires an explicit closing statement using the command `end()`.\n\nBlocks within the `if()` function are closed with the following structure:\n```plaintext\n\n *\n\n```\n\nThis is distinct from othe",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-003",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle external HTTP calls? What commands are available and how is timeout managed?",
+ "answer_preview": "AVAP handles external HTTP calls through the use of the `RequestPost` and `RequestGet` functions. These functions execute POST and GET requests, respectively, storing the response in a destination variable.\n\nTimeout management in AVAP involves using the `timeout` parameter to specify the amount of t",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-004",
+ "category": "RETRIEVAL",
+ "question": "How do functions work in AVAP? What is the scope of variables inside a function?",
+ "answer_preview": "In AVAP, functions are enclosed within a recital of memory. When entering a function, AVAP creates a new dictionary of local variables isolated from the global context.\n\nVariables defined in the **Function Scope** can be accessed by other parts of the program or by other functions that call this one",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-005",
+ "category": "RETRIEVAL",
+ "question": "What are the three types of variable scopes in AVAP and what are their visibility rules?",
+ "answer_preview": "In AVAP, there are three types of variable scopes:\n\n1. **Global Scope**: Variables declared at the top level or outside any function.\n2. **Main Local Scope**: Variables declared within the `main` function.\n3. **Function Scope**: Variables declared inside a function.\n\nEach type has specific visibilit",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-006",
+ "category": "RETRIEVAL",
+ "question": "How does concurrency work in AVAP? What are goroutines and how are they launched?",
+ "answer_preview": "Concurrency in AVAP is managed through the concept of **goroutines**. Goroutines are lightweight threads that can be created using the `go` keyword, similar to creating a function call.\n\n```avap\ngo funcion()\n```\n\nGoroutines follow the same scope rules as normal functions:\n\n- They can access the **Gl",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-007",
+ "category": "RETRIEVAL",
+ "question": "What is the addParam command and how does it capture HTTP request parameters?",
+ "answer_preview": "The `addParam` command inspects a given HTTP request in an ordered, strict hierarchy: first in the URL (Query arguments), then in the JSON body, and finally in the form data. If the requested parameter does not exist, the variable destination is initialized as `None`.\n\nFor example:\n```plaintext\naddP",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-008",
+ "category": "RETRIEVAL",
+ "question": "How does the startLoop / endLoop construct work in AVAP?",
+ "answer_preview": "The `startLoop` and `endLoop` constructs are used to iterate over a range of indices, similar to Python's `range` function. The loop variable is incremented by 1 at each iteration.\n\n```plaintext\nstartLoop(idx, 0, 2)\n actual = registros[int(idx)]\nendLoop()\n```\n\nThis code snippet iterates over the ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-009",
+ "category": "RETRIEVAL",
+ "question": "What is the addResult command and how does it build the HTTP response?",
+ "answer_preview": "The `addResult` command in AVAP is used to register variables that will form part of the JSON body of the final response. It allows for specifying both the status code directly (`_status = 404`) or through a function call (`addVar(_status, 401)`).\n\nIn the provided context, `addResult(acceso)` is cal",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-010",
+ "category": "RETRIEVAL",
+ "question": "How does error handling work in AVAP with try() and exception()?",
+ "answer_preview": "In AVAP, error handling is managed using the `try()` function. When an error occurs within a `try` block, it triggers the execution of the following code:\n\n```python\nexception(e)\n addVar(_status,500)\n addResult(\"Error de base de datos\")\n```\n\nThis sequence of actions happens when an exception i",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-011",
+ "category": "RETRIEVAL",
+ "question": "What is the replace() command in AVAP and how is it used?",
+ "answer_preview": "The `replace()` command in AVAP is a function that replaces occurrences of a specified substring with another substring within a string. It is commonly used to modify strings by replacing certain characters or patterns.\n\nIn the provided context, the `replace()` command is used as follows:\n\n1. The fi",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-012",
+ "category": "RETRIEVAL",
+ "question": "What are the reserved keywords in AVAP that cannot be used as identifiers?",
+ "answer_preview": "In AVAP, the following keywords are considered reserved and cannot be used as identifiers:\n\n- `_status`",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-013",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle string formatting and concatenation?",
+ "answer_preview": "AVAP supports string formatting through the use of placeholders in expressions. For example, you can format strings by using curly braces `{}` with variables or expressions inside them:\n\n```plaintext\n\"Hello, {}! Your age is {} years old.\"\n```\n\nWhen this expression is evaluated, it will replace `{}` ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-014",
+ "category": "RETRIEVAL",
+ "question": "How does the encodeSHA256 command work in AVAP?",
+ "answer_preview": "The `encodeSHA256` command in AVAP is used to generate a SHA-256 hash of a given payload data. This function takes two parameters: the payload data and a checksum, which are combined using the SHA-256 algorithm to produce a fixed-size output.\n\nHere's how it works:\n\n1. The `encodeSHA256` command acce",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-015",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle date and time operations?",
+ "answer_preview": "AVAP provides three commands to cover all possible conversions between time representations. These commands support calendar formats in Python's `strftime` notation and calculations with `TimeDelta` expressed in seconds (positive for adding, negative for subtracting).\n\nThe context also mentions that",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-016",
+ "category": "RETRIEVAL",
+ "question": "What is the AddvariableToJSON command and how is it used to build JSON objects?",
+ "answer_preview": "The `AddvariableToJSON` command in this context is used to add variables to a JSON object. Specifically, it takes two parameters: `clave`, which specifies the key for the variable, and `datos_cliente`, which contains the data that will be added as a value under the specified key.\n\nFor example:\n```pl",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-017",
+ "category": "RETRIEVAL",
+ "question": "How does the getListLen command work and what is it used for?",
+ "answer_preview": "The getListLen command counts the number of elements in a list or string. It can be used to determine the length of any collection, such as a list of items or a string of characters.\n\nFor example:\n- The first command `getListLen(mi_lista, cantidad)` calculates the length of the list named `mi_lista`",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-018",
+ "category": "RETRIEVAL",
+ "question": "How does the randomString command work in AVAP?",
+ "answer_preview": "The `randomString` command generates a string of characters based on the specified parameters. Here's how it works:\n\n- `[1] randomString(\"[A-Z]\\d\", 32, token_seguridad)`: This command generates a 32-character string consisting only of uppercase letters (A-Z) and digits (0-9). The output is assigned ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-019",
+ "category": "RETRIEVAL",
+ "question": "What is the $ dereference operator in AVAP and when is it used?",
+ "answer_preview": "The $ dereference operator in AVAP indicates that the value of a variable should be extracted from the table of symbols. This operator is utilized by the native command `addVar(copia, $original)`, where `$` signifies that the original variable's value should be retrieved.\n\nIn this context, it seems ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-020",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle ORM database operations? What commands are available?",
+ "answer_preview": "AVAP provides several commands for handling ORM database operations, including:\n\n- **ormCheckTable(tableName, varTarget)**: Checks the existence of a table in the connected database.\n- **ormCreateTable(fields, fieldsType, tableName, varTarget)**: DDL command to create tables.\n- **ormAccessSelect(fie",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-001",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'name' parameter and returns a personalized greeting.",
+ "answer_preview": "```avap\n/// personalizes a greeting based on the provided name\n\nfunction greet(name) {\n /// creates a personalized greeting message\n return \"Hello, \" + name + \"! How are you today?\";\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-002",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash, and returns it.",
+ "answer_preview": "```avap\nreadParam(\"password\")\ngenerateSHA256Hash(password)\nreturn hashed_password\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that loops from 1 to 5, builds a JSON object with each index as a key, and returns it.",
+ "answer_preview": "```avap\nx = 10\ny = 20\n\n# Build the JSON object\njson_obj = {}\n\nfor i in range(1, 6):\n json_obj[i] = x + y\n\nreturn json_obj\n```\n\nThis script initializes two variables `x` and `y`, then builds a JSON object with each index from 1 to 5 as keys. The values are the sum of `x` and `y`.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-004",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that validates if a 'role' parameter belongs to a list of allowed roles and returns the access result.",
+ "answer_preview": "```avap\n// Define the allowed roles as a list\nlet allowedRoles = [\"admin\", \"editor\", \"root\"];\n\n// Check if the provided role is in the allowed roles list\nif (allowedRoles.includes(rol)) {\n acceso = True;\n} else {\n acceso = False;\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-005",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that makes a GET request to an external API and handles connection errors.",
+ "answer_preview": "```avap\ntry()\n // Make the GET request to the external API\n var res = httpGet(\"https://example.com/api/resource\", {}, \"GET\")\nexception(e)\n addVar(_status, 500)\n addResult(\"Error al conectar con la API externa.\")\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-006",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP function that takes two numbers and returns their sum, then call it and return the result.",
+ "answer_preview": "```plaintext\nfunction suma(a, b){\n total = a + b\n return(total)\n}\n\nresult = suma(5, 3)\nreturn(result)\n```\n\nThe `suma` function in AVAP adds two numbers together and returns their sum. When called with arguments `5` and `3`, it will return `8`.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-007",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'subtotal' parameter, computes 21% VAT, and returns the total.",
+ "answer_preview": "```avap\nfunction computeTotal(subtotal){\n iva = subtotal * 0.21\n total = subtotal + iva\n return(total)\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-008",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads an 'api_key' parameter and returns status 403 if it is null.",
+ "answer_preview": "```avap\n/// checks for the presence of the api_key parameter\nif(api_key, None, \"==\")\n /// sets the status to 403\n addVar(_status, 403)\n /// sets an error message\n addVar(error, \"Acceso denegado: falta API KEY\")\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-009",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that generates a 32-character random alphanumeric token and returns it.",
+ "answer_preview": "```avap\ntoken = randomString(\"[A-Z]\\d\", 32, token_seguridad)\nreturn(token)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-010",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'lang' parameter and returns 'Hola' if it is 'es' or 'Hello' if it is 'en'.",
+ "answer_preview": "```avap\nif(l, \"es\", \"=\")\n addVar(msg, \"Hola\")\nend()\n\nif(l, \"en\", \"=\")\n addVar(msg, \"Hello\")\nend()\naddResult(msg)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-011",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that checks if a database table exists and creates it if it does not.",
+ "answer_preview": "```avap\nvarTarget = ormCheckTable(\"my_database\", \"my_table\")\nif varTarget == 0:\n # Table does not exist, create it\n varTarget = ormCreateTable([\"column1\", \"column2\"], [\"type1\", \"type2\"], \"my_database.my_table\", varTarget)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-012",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that gets the current UTC timestamp and adds 24 hours to compute an expiration time.",
+ "answer_preview": "```avap\ngetDateTime(\"\", 86400, \"UTC\", expira)\naddResult(expira)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-013",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that receives a new password parameter, validates it is not equal to the old password, and returns a confirmation.",
+ "answer_preview": "```avap\nconfirmPassword(newPass: string) {\n if (newPass !== pass_antigua) {\n return \"Confirmation: Password change successful.\";\n } else {\n return \"Error: New password cannot be the same as the old one.\";\n }\n}\n```\n\nEvidence:\n- \"\"\n(only quotes you actu",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-014",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a list parameter and returns its element count.",
+ "answer_preview": "```avap\n// 1. Obtain the length of the list\ngetListLen(myList, len)\n\n// 2. Return the element count\nlen\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-015",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses a validation function to check a token parameter and returns the authorization result.",
+ "answer_preview": "```avap\n// Validate the token using the es_valido function\nlet validatedToken = es_valido(\"SECRET\");\n\n// Check if the token is authorized\nif (validatedToken) {\n // Token is valid, proceed with further operations\n} else {\n // Token is not authorized, handle accordingly\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-016",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that returns two values in the HTTP response: a status code 200 and a message 'Success'.",
+ "answer_preview": "```avap\naddVar(code, 200)\naddVar(status, \"Success\")\naddResult(code)\naddResult(status)\n\n// Additional function to return a message\nfunction es_valido(token){\n response = False\n if(token, \"SECRET\", \"=\")\n response = True\n end()\n return(response)\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-017",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'saldo' parameter and returns True if it is greater than zero, False otherwise.",
+ "answer_preview": "```avap\n/// checks if the saldo is greater than zero\nif(saldo, 0, \">\")\n permitir = True\nelse()\n permitir = False\nend()\n\npermitir\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-018",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that converts a Unix timestamp parameter to a human-readable date in dd/mm/yyyy format.",
+ "answer_preview": "```avap\nstampToDatetime(1708726162, \"%d/%m/%Y\", 0, fecha_human)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-019",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that replaces all spaces in a string parameter with hyphens and returns the result.",
+ "answer_preview": "```avap\nreplace(\"spaces\", \" \", \"-\")\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-020",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses try/exception to execute a raw SQL query and return status 500 on database errors.",
+ "answer_preview": "```avap\ntry()\n ormDirect(\"UPDATE table_inexistente SET a=1\", res)\nexception(e)\n addVar(_status, 500)\n addResult(\"Error de base de datos\")\nend\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-001",
+ "category": "CONVERSATIONAL",
+ "question": "Can you summarize what you just explained about AVAP scopes in fewer words?",
+ "answer_preview": "AVAP uses three types of scopes: Global Scope, Main Local Scope, and Function Scope. Variables are resolved from the closest to the farthest scope, with the Main Local Scope not visible within functions. If a variable isn't found in these scopes, it results in an execution error.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-002",
+ "category": "CONVERSATIONAL",
+ "question": "You mentioned that addResult builds the JSON response — can you clarify how multiple addResult calls work together?",
+ "answer_preview": "Certainly! The `addResult` function is responsible for constructing and adding variables to the final JSON response. Each call to `addResult(variable)` adds a new variable to the JSON structure.\n\nFor example, if you have two `addResult` calls:\n1. `addResult(hashed_password)`\n2. `addResult(secure_tok",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-003",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between addVar and a plain assignment like x = 10 in AVAP?",
+ "answer_preview": "In AVAP, `addVar` allows for direct variable assignment using the `=` operator, which provides flexibility while maintaining strict context control. This means that when you use `addVar(copia, $original)`, it assigns the value of `$original` to a variable named \"copia\". The key difference is that `a",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-004",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again the difference between the two modes of the if() command?",
+ "answer_preview": "The `if()` command in AVAP has two different modes for its invocations:\n\n1. **Modo 1 (Comparación estructurada):** This mode is used when comparing two simple values directly, such as variables or literals. The syntax is:\n ```avap\n if(átomo_1, átomo_2, \"operador\")\n ```\n - **Argumentos 1 y 2:",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-005",
+ "category": "CONVERSATIONAL",
+ "question": "What happens if an error occurs in AVAP without a try block?",
+ "answer_preview": "If an error occurs in AVAP without a try block, the variable destination receives `None`. This is indicated by the comment: \"* **`RequestPost(url, querystring, headers, body, destino, timeout)`**: Ejecuta un POST almacenando la respuesta en `destino`. * Si se supera, la variable destino recibe `None",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-006",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again how the timeout in RequestGet works?",
+ "answer_preview": "The `timeout` parameter in the `RequestGet` function is used to control the maximum amount of time that the request will wait for a response before it cancels the operation and returns `None`. This helps prevent potential issues where the server might take longer than expected to respond, leading to",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-007",
+ "category": "CONVERSATIONAL",
+ "question": "Can I iterate over a list of items in AVAP instead of a numeric range?",
+ "answer_preview": "Yes, you can iterate over a list of items in AVAP by using the `itemFromList` command. This allows you to access elements within the list without needing to know their indices beforehand.\n\nFor example:\n```avap\n// 1. Obtain the length of the list\ngetListLen(myList, len)\n\n// 2. Iterate with index-cont",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-008",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between RequestGet and RequestPost in practice?",
+ "answer_preview": "In practice, **RequestGet** and **RequestPost** are used to interact with a web service or API by sending HTTP requests. The main differences between them are:\n\n- **RequestGet**: This method sends an HTTP GET request without including any body content (such as JSON or form data). It is typically use",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-009",
+ "category": "CONVERSATIONAL",
+ "question": "Goroutines cannot access Main Local Scope — can you give a practical example of why that matters?",
+ "answer_preview": "Certainly! The reason goroutines cannot directly access the `Main Local Scope` is crucial for managing resources and avoiding unintended side effects. Let's consider an example to illustrate this point:\n\n```python\nx = 10\ny = 20\n\n# This variable x is in the Main Local Scope, which means it can be acc",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-010",
+ "category": "CONVERSATIONAL",
+ "question": "What format does encodeSHA256 return its output in?",
+ "answer_preview": "The `encodeSHA256` function returns its output in the form of a hexadecimal digest. Specifically, it produces a 64-character hex-digest as specified by SHA-256. This ensures that the data is securely hashed and can be easily verified for integrity.",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_104159.json b/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_104159.json
new file mode 100644
index 0000000..3dbc24c
--- /dev/null
+++ b/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_104159.json
@@ -0,0 +1,58 @@
+{
+ "generated_at": "2026-04-06T10:41:59.556328",
+ "embedding_model": "bge-m3:latest",
+ "index_name": "avap-knowledge-v2-bge",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 5,
+ "elapsed_seconds": 57.0,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-knowledge-v2-bge",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.7032,
+ "answer_relevancy": 0.8398,
+ "context_recall": 0.5758,
+ "context_precision": 0.6333
+ },
+ "global_score": 0.688,
+ "verdict": "ACCEPTABLE",
+ "details": [
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP (Advanced Virtual API Programming) is an Advanced Virtual API Programming language that is Turing Complete. It is designed to be used in the orquestation of microservices and I/O operations, with a focus on security, concurrency, determinism, and efficient processing of HTTP transactions, data ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "answer_preview": "AVAP handles conditional logic using the `if()` command, which can be invoked in two modes:\n\n1. **Mode 1 (Structured Comparison):** The syntax is `if(variable, value, comparison_operator)`. This mode evaluates a comparison between the variable and the value provided as strings representing operators",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "answer_preview": "```avap\nencodeSHA256(password, hashed_password)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "answer_preview": "```avap\nx = 0\n\nwhile x < 5:\n y = x + 1\n \n # Build the JSON object for this iteration\n json_obj = {\n \"index\": y,\n \"value\": x * 2\n }\n \n # Return the current iteration's JSON object\n return json_obj\n\nx += 1\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "answer_preview": "AVAP supports external HTTP calls through the use of the `RequestPost` and `RequestGet` functions. These functions allow for sending POST or GET requests to a specified URL with query parameters, headers, and body data.\n\nTimeout handling in AVAP involves using the `timeout` parameter when making HTT",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_105613.json b/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_105613.json
new file mode 100644
index 0000000..3ec061c
--- /dev/null
+++ b/output/evaluation_bge-m3-latest_avap-knowledge-v2-bge_20260406_105613.json
@@ -0,0 +1,58 @@
+{
+ "generated_at": "2026-04-06T10:56:13.462829",
+ "embedding_model": "bge-m3:latest",
+ "index_name": "avap-knowledge-v2-bge",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 5,
+ "elapsed_seconds": 48.6,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-knowledge-v2-bge",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.6536,
+ "answer_relevancy": 0.8496,
+ "context_recall": 0.5758,
+ "context_precision": 0.6333
+ },
+ "global_score": 0.6781,
+ "verdict": "ACCEPTABLE",
+ "details": [
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP (Advanced Virtual API Programming) is an Advanced Virtual API Programming language that is Turing Complete. It is designed to be used in the orquestation of microservices and I/O operations, with a focus on security, concurrency, and determinism.\n\nThe language is described as strictly oriented ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "answer_preview": "AVAP handles conditional logic using the `if()` command, which can be invoked in two modes:\n\n1. **Mode 1 (Structured Comparison):** The syntax is `if(variable, value, comparison_operator)`. This mode evaluates a comparison between the variable and the value provided as strings representing operators",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "answer_preview": "```avap\nencodeSHA256(password, hashed_password)\n```\n\nThis script takes a 'password' parameter, generates its SHA-256 hash using `encodeSHA256`, and stores the result in `hashed_password`.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "answer_preview": "```avap\nx = 0\n\nwhile x < 5:\n y = x + 1\n \n # Build the JSON object for this iteration\n json_obj = {\n \"index\": y,\n \"value\": x * 2\n }\n \n # Return the current iteration's JSON object\n return json_obj\n\nx += 1\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "answer_preview": "AVAP supports external HTTP calls through the `RequestPost` and `RequestGet` functions. These functions allow for POST and GET requests, respectively, to be executed on a specified URL with optional query parameters, headers, body data, and destination file.\n\nTimeout handling in AVAP is managed by t",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_bge-m3-latest_avap-knowledge-v2-qwen_20260406_105433.json b/output/evaluation_bge-m3-latest_avap-knowledge-v2-qwen_20260406_105433.json
new file mode 100644
index 0000000..9e47821
--- /dev/null
+++ b/output/evaluation_bge-m3-latest_avap-knowledge-v2-qwen_20260406_105433.json
@@ -0,0 +1,58 @@
+{
+ "generated_at": "2026-04-06T10:54:33.255759",
+ "embedding_model": "bge-m3:latest",
+ "index_name": "avap-knowledge-v2-qwen",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 5,
+ "elapsed_seconds": 52.5,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-knowledge-v2-qwen",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.58,
+ "answer_relevancy": 0.6678,
+ "context_recall": 0.419,
+ "context_precision": 0.3733
+ },
+ "global_score": 0.51,
+ "verdict": "INSUFFICIENT",
+ "details": [
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP is a new proprietary language. The provided context does not contain specific information about the purpose or design of AVAP. Therefore, I don't have enough information in the given context to answer that question.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "answer_preview": "AVAP handles conditional logic using the `if()` function, which can be invoked in two ways:\n\n1. **Mode 1 (Structured Comparison):** The syntax is `if(variable, value, comparison)`, where:\n - `variable` or `value` must be a simple identifier or literal.\n - The comparison should use a string repre",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "answer_preview": "```avap\naddParam(\"password\", password)\nencodeSHA256(password, hashed_password)\nreturn hashed_password\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "answer_preview": "```javascript\n// Example of an AVAP script\n\nlet data = {};\n\nfor (let i = 1; i <= 5; i++) {\n let jsonKey = `key${i}`;\n data[jsonKey] = i;\n}\n\nreturn JSON.stringify(data);\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "answer_preview": "AVAP supports external HTTP calls through the `RequestPost` and `RequestGet` functions. The `RequestPost` function executes a POST request, while the `RequestGet` function performs a GET request.\n\nTimeout handling in AVAP involves specifying a `timeout` parameter when making an HTTP call using eithe",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_qwen3-0.6B-emb-latest_avap-docs-test-v4_20260401_134614.json b/output/evaluation_qwen3-0.6B-emb-latest_avap-docs-test-v4_20260401_134614.json
new file mode 100644
index 0000000..709e6db
--- /dev/null
+++ b/output/evaluation_qwen3-0.6B-emb-latest_avap-docs-test-v4_20260401_134614.json
@@ -0,0 +1,373 @@
+{
+ "generated_at": "2026-04-01T13:46:14.075064",
+ "embedding_model": "qwen3-0.6B-emb:latest",
+ "index_name": "avap-docs-test-v4",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 50,
+ "elapsed_seconds": 639.3,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-docs-test-v4",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.5937,
+ "answer_relevancy": 0.8515,
+ "context_recall": 0.6521,
+ "context_precision": 0.6772
+ },
+ "global_score": 0.6936,
+ "verdict": "ACCEPTABLE",
+ "details": [
+ {
+ "id": "GD-R-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP (Advanced Virtual API Programming) is a proprietary language that is specifically designed to facilitate the orchestration of secure, concurrent, and deterministic microservices and I/O operations. It is not intended as a general-purpose programming language but rather as a domain-specific lang",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-002",
+ "category": "RETRIEVAL",
+ "question": "How does the if() conditional block work in AVAP? How are blocks closed?",
+ "answer_preview": "In AVAP, the `if()` conditional block works by evaluating a logical or comparison expression. The structure of an `if()` statement is as follows:\n\n```plaintext\nif(expression, value1, operator)\n```\n\nHere's how it functions and closes blocks:\n\n- **Expression**: This evaluates to either a boolean (true",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-003",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle external HTTP calls? What commands are available and how is timeout managed?",
+ "answer_preview": "AVAP handles external HTTP calls through the `RequestPost` and `RequestGet` commands. These commands allow for making POST and GET requests, respectively.\n\nTimeout management in AVAP involves a parameter called `timeout`. If this parameter exceeds its set value, it causes the destination variable to",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-004",
+ "category": "RETRIEVAL",
+ "question": "How do functions work in AVAP? What is the scope of variables inside a function?",
+ "answer_preview": "In AVAP, functions are defined using the `function` keyword followed by an identifier and parameters. The function body is enclosed within curly braces `{}`.\n\n- **Function Scope:** Variables declared within a function have a local scope that is limited to that function. They cannot be accessed or mo",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-005",
+ "category": "RETRIEVAL",
+ "question": "What are the three types of variable scopes in AVAP and what are their visibility rules?",
+ "answer_preview": "In AVAP, there are three types of variable scopes: Global Scope, Main Local Scope, and Function Scope. The visibility rules for these scopes are as follows:\n\n1. **Global Scope**: Variables declared at the top level or outside any function definition can be accessed from anywhere in the program.\n2. *",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-006",
+ "category": "RETRIEVAL",
+ "question": "How does concurrency work in AVAP? What are goroutines and how are they launched?",
+ "answer_preview": "Concurrency in AVAP is achieved through the use of goroutines, which are lightweight threads that can be created and managed within a single process. Goroutines allow for concurrent execution of tasks without requiring explicit thread management.\n\nGoroutines in AVAP are launched using the `go` keywo",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-007",
+ "category": "RETRIEVAL",
+ "question": "What is the addParam command and how does it capture HTTP request parameters?",
+ "answer_preview": "The `addParam` command in AVAP inspects incoming HTTP requests in a hierarchical order: first from the URL (Query arguments), then within the JSON body, and finally through the form data. If the requested parameter does not exist, the variable destination is initialized as `None`.\n\nFor example:\n```a",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-008",
+ "category": "RETRIEVAL",
+ "question": "How does the startLoop / endLoop construct work in AVAP?",
+ "answer_preview": "The `startLoop` and `endLoop` constructs in AVAP are used to define loops that iterate over a sequence of indices. Here's how they function:\n\n1. **Defining the Loop:**\n - The loop is defined using the `startLoop(contador, inicio, fin)` syntax.\n - This starts the iteration process based on numeri",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-009",
+ "category": "RETRIEVAL",
+ "question": "What is the addResult command and how does it build the HTTP response?",
+ "answer_preview": "The `addResult` command registers variables that will form part of the final JSON body of the HTTP response. It takes a variable as an argument, which indicates what data should be included in the response.\n\nIn the provided example:\n\n```plaintext\nresult = \"Hello,\" + name \naddResult(result)\n```\n\nHere",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-010",
+ "category": "RETRIEVAL",
+ "question": "How does error handling work in AVAP with try() and exception()?",
+ "answer_preview": "In AVAP, error handling is managed using the `try()` block to catch exceptions. The `exception()` command within a `try`-`except` structure allows you to handle specific exceptions by assigning them to variables.\n\nHere's how it works:\n\n1. **Try Block**: You place your code inside a `try` block.\n2. *",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-011",
+ "category": "RETRIEVAL",
+ "question": "What is the replace() command in AVAP and how is it used?",
+ "answer_preview": "The `replace()` command in AVAP is not explicitly mentioned in the provided context. However, based on the information given about other commands like `registerEndpoint`, `addVar`, and `system_command`, we can infer that a similar functionality might be available for replacing values or strings with",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-012",
+ "category": "RETRIEVAL",
+ "question": "What are the reserved keywords in AVAP that cannot be used as identifiers?",
+ "answer_preview": "The following keywords in AVAP are reserved and cannot be used as identifiers:\n\n- _status\n\nThese keywords have special meanings within the language and should not be used as variable names or other identifier types.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-013",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle string formatting and concatenation?",
+ "answer_preview": "AVAP handles string formatting and concatenation through the use of escape sequences within strings. Specifically, it supports the following escape sequences:\n\n- `\\\\` for a backslash character.\n- `\\n` for a newline.\n- `\\t` for a tab.\n- `\\r` for a carriage return.\n- `\\'` for an apostrophe.\n\nFor examp",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-014",
+ "category": "RETRIEVAL",
+ "question": "How does the encodeSHA256 command work in AVAP?",
+ "answer_preview": "The `encodeSHA256` command in AVAP is used to hash a payload data string using the SHA-256 algorithm. This produces a 64-character hexadecimal digest that serves as an irreversible representation of the original input, enhancing security for storing passwords or verifying data integrity.\n\nHere's how",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-015",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle date and time operations?",
+ "answer_preview": "AVAP provides several commands for handling dates and times:\n\n1. **getDateTime(formato, timedelta, zona_horaria, destino)**: Captures the current system date and time, applies the specified `timedelta`, and converts it to the given `zona_horaria` before storing the result.\n\n2. **stampToDatetime(epoc",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-016",
+ "category": "RETRIEVAL",
+ "question": "What is the AddvariableToJSON command and how is it used to build JSON objects?",
+ "answer_preview": "The `AddvariableToJSON` command in AVAP is used to dynamically inject a new property into an existing JSON object. This allows for modifying or adding properties to a JSON structure without needing to parse or construct the entire JSON string from scratch.\n\nHere's how it works:\n\n1. **Syntax**: The c",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-017",
+ "category": "RETRIEVAL",
+ "question": "How does the getListLen command work and what is it used for?",
+ "answer_preview": "The `getListLen` command works by taking a list as an argument and returning its length. It iterates through each element of the list, counting them up until it reaches the end of the list.\n\nHere's how it typically functions:\n\n1. The function takes one parameter: `mi_lista`, which is expected to be ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-018",
+ "category": "RETRIEVAL",
+ "question": "How does the randomString command work in AVAP?",
+ "answer_preview": "The `randomString` command generates a randomly generated string of characters based on the provided pattern and length. The resulting string is then stored in the specified destination.\n\nHere's how it works:\n\n1. **Pattern**: A regular expression (regex) pattern defines which characters are allowed ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-019",
+ "category": "RETRIEVAL",
+ "question": "What is the $ dereference operator in AVAP and when is it used?",
+ "answer_preview": "The `$` dereference operator in AVAP indicates that the variable named \"original\" should be looked up in the symbol table. This allows for dynamic access to variables, enabling a more flexible syntax compared to static assignment.\n\nIn AVAP, the `$` prefix on an identifier refers to the value of anot",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-020",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle ORM database operations? What commands are available?",
+ "answer_preview": "AVAP handles ORM (Object-Relational Mapping) database operations through the `ormCheckTable`, `ormCreateTable`, `ormAccessSelect`, `ormInsert`, and `ormUpdate` commands. These commands allow for interacting with a specific table in the connected database, retrieving data using `ormAccessSelect`, ins",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-001",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'name' parameter and returns a personalized greeting.",
+ "answer_preview": "```avap\naddVar(name, \"Alberto\")\nresult = \"Hello,\" + name \naddResult(result)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-002",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash, and returns it.",
+ "answer_preview": "```avap\naddParam(password, pass_nueva)\npass_antigua = \"password\"\nif(pass_nueva, pass_antigua, \"!=\"):\n addVar(cambio, \"Contraseña actualizada\")\nend()\naddResult(cambio)\n\n# Generate SHA-256 hash of the password parameter\nhash_result = encodeSHA256(password, \"result\")\n\n# Return the generated hash\nadd",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that loops from 1 to 5, builds a JSON object with each index as a key, and returns it.",
+ "answer_preview": "```avap\nstartLoop(i, 1, 6)\n variableFromJSON(\"key_%s\" % i, \"value_%s\" % i, mi_json_object)\nendLoop()\naddResult(mi_json_object)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-004",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that validates if a 'role' parameter belongs to a list of allowed roles and returns the access result.",
+ "answer_preview": "```avap\nif(r, [\"admin\", \"editor\", \"root\"], \"in\")\n acceso = True\nend()\naddResult(acceso)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-005",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that makes a GET request to an external API and handles connection errors.",
+ "answer_preview": "```avap\nRequestGet(\"https://api.example.com/data\", 0, 0, respuesta)\n```\n\nThis script sends a GET request to the specified URL using the `RequestGet` method from AVAP. It includes error handling for potential connection issues by checking the `_status` variable after the request is made. If an error ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-006",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP function that takes two numbers and returns their sum, then call it and return the result.",
+ "answer_preview": "```avap\nfunction suma(a, b) {\n total = a + b;\n return(total);\n}\nresultado = suma(10, 20);\naddResult(resultado);\n```\n\nThe `suma` function takes two numbers as input parameters (`a` and `b`) and returns their sum. When called with the arguments `10` and `20`, it will return `30`.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-007",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'subtotal' parameter, computes 21% VAT, and returns the total.",
+ "answer_preview": "```avap\nsubtotal = readParam(\"subtotal\")\niva = subtotal * 0.21\ntotal = subtotal + iva\nreturn(total)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-008",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads an 'api_key' parameter and returns status 403 if it is null.",
+ "answer_preview": "```avap\naddParam(\"api_key\", \"key\")\nif(key, None, \"==\")\n addVar(_status, 403)\n addVar(error, \"Acceso denegado: falta API KEY\")\n addResult(error)\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-009",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that generates a 32-character random alphanumeric token and returns it.",
+ "answer_preview": "```avap\nrandomString(\"[A-Z]\\d\", 32, \"token_seguridad\")\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-010",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'lang' parameter and returns 'Hola' if it is 'es' or 'Hello' if it is 'en'.",
+ "answer_preview": "```avap\naddParam(\"lang\", \"l\")\nif(l, \"es\", \"=\")\n addVar(msg, \"Hola\")\nend()\nelse\n addVar(msg, \"Hello\")\nend()\naddResult(msg)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-011",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that checks if a database table exists and creates it if it does not.",
+ "answer_preview": "```avap\normCheckTable(\"my_table\", \"my_database\")\nif(resultado_comprobacion, False, '==')\n ormCreateTable(\"fields\", \"fieldsType\", \"my_database\", resultado_creacion)\nend()\naddResult(resultado_comprobacion)\naddResult(resultado_creacion)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-012",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that gets the current UTC timestamp and adds 24 hours to compute an expiration time.",
+ "answer_preview": "```avap\ngetDateTime(\"\", 86400, \"UTC\", expira)\naddResult(expira)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-013",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that receives a new password parameter, validates it is not equal to the old password, and returns a confirmation.",
+ "answer_preview": "```avap\npass_antigua = \"password\"\nif(pass_nueva != pass_antigua):\n addVar(cambio, \"Contraseña actualizada\")\nend()\naddResult(cambio)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-014",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a list parameter and returns its element count.",
+ "answer_preview": "```avap\n// 1. Obtain the length of the list\ngetListLen(myList, len)\n\n// 2. Iterate with index controlled\ni = 0\nwhile (i < len) {\n itemFromList(myList, i, currentItem)\n // ... process currentItem ...\n i = i + 1\n}\n\n// Return the element count\naddResult(len)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-015",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses a validation function to check a token parameter and returns the authorization result.",
+ "answer_preview": "```avap\nfunction es_valido(token) {\n response = False\n if (token == \"SECRET\") \n response = True\n return(response)\n}\n\nautorizado = es_valido(\"SECRET\")\naddResult(autorizado)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-016",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that returns two values in the HTTP response: a status code 200 and a message 'Success'.",
+ "answer_preview": "```avap\naddVar(code, 200)\naddVar(status, \"Success\")\naddResult(code)\naddResult(status)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-017",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'saldo' parameter and returns True if it is greater than zero, False otherwise.",
+ "answer_preview": "```avap\nif(sal_par > 0)\n permitir = True\nelse()\n permitir = False\nend()\naddResult(permitir)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-018",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that converts a Unix timestamp parameter to a human-readable date in dd/mm/yyyy format.",
+ "answer_preview": "```avap\nstampToDatetime(1708726162, \"%d/%m/%Y\", 0, fecha_human)\naddResult(fecha_human)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-019",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that replaces all spaces in a string parameter with hyphens and returns the result.",
+ "answer_preview": "```avap\nreplaceSpacesWithHyphens(inputString)\n{\n let output = \"\";\n for (let i = 0; i < inputString.length; i++) {\n if (inputString[i] === \" \") {\n output += \"-\";\n } else {\n output += inputString[i];\n }\n }\n return output;\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-020",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses try/exception to execute a raw SQL query and return status 500 on database errors.",
+ "answer_preview": "```avap\ntry() {\n ormDirect(\"UPDATE table_inexistente SET a=1\", res);\n addVar(_status, 500);\n addResult(\"Error de base de datos\");\n} catch(e) {\n addVar(status, \"Error en la consulta SQL\");\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-001",
+ "category": "CONVERSATIONAL",
+ "question": "Can you summarize what you just explained about AVAP scopes in fewer words?",
+ "answer_preview": "AVAP uses three types of scopes: Global Scope, Main Local Scope, and Function Scope. The Main Local Scope is not visible within functions. If a variable isn't found in the visible scopes, it results in an execution error.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-002",
+ "category": "CONVERSATIONAL",
+ "question": "You mentioned that addResult builds the JSON response — can you clarify how multiple addResult calls work together?",
+ "answer_preview": "Certainly! The `addResult` function is responsible for collecting variables into the final JSON body of the response. When called, it adds a variable to the list of results being built.\n\nFor example, consider this scenario:\n\n```plaintext\nfunction suma(a, b){\n total = a + b\n return(total)\n}\nres",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-003",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between addVar and a plain assignment like x = 10 in AVAP?",
+ "answer_preview": "In AVAP, `addVar` allows for dynamic variable assignment with the use of the `$` operator. When you write `addVar(copia, $original)`, it means that the value of the variable named \"original\" is being retrieved from the symbol table and assigned to the variable \"copia\". This provides flexibility in t",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-004",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again the difference between the two modes of the if() command?",
+ "answer_preview": "Certainly! The `if()` command in AVAP has two distinct modes for its invocations:\n\n1. **Modo 1 (Comparación Estructurada):** This mode is used to compare two simple values directly, such as variables or literals. It requires that the first two arguments are either identifiers (variables) or literal ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-005",
+ "category": "CONVERSATIONAL",
+ "question": "What happens if an error occurs in AVAP without a try block?",
+ "answer_preview": "If an error occurs in AVAP but there is no `try` block present, the program will not handle or catch the exception. The execution of the code will continue as normal until it reaches the point where the exception was thrown. At that point, the system will typically throw a generic error message indi",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-006",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again how the timeout in RequestGet works?",
+ "answer_preview": "The `timeout` parameter in the `RequestGet` function is used to handle situations where a request might take longer than expected due to network latency. If the request exceeds the specified timeout, the variable `destino` receives the value `None`.\n\nThis mechanism ensures that if there's an issue w",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-007",
+ "category": "CONVERSATIONAL",
+ "question": "Can I iterate over a list of items in AVAP instead of a numeric range?",
+ "answer_preview": "In AVAP, you can iterate over a list or collection by using the `foreach` statement. This allows you to loop through each item in an array or any other iterable object.\n\nHere's how you might do it:\n\n```avap\nvar items = [\"apple\", \"banana\", \"cherry\"];\nforeach(item in items) {\n print(\"The current it",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-008",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between RequestGet and RequestPost in practice?",
+ "answer_preview": "In practice, `RequestPost` and `RequestGet` are used to interact with a web service or API endpoint. The main differences between them are:\n\n- **POST Method**: When using `RequestPost`, you send data as part of the URL query parameters (querystring). This is typically used for submitting forms or se",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-009",
+ "category": "CONVERSATIONAL",
+ "question": "Goroutines cannot access Main Local Scope — can you give a practical example of why that matters?",
+ "answer_preview": "Certainly! The reason goroutines cannot directly access the `Main Local Scope` is due to the strict scope rules in AVAP. Here’s a practical example:\n\n```python\n# Example function with global and local variables\n\ndef main():\n x = 10 # Global variable visible from anywhere\n y = 20 # Local var",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-010",
+ "category": "CONVERSATIONAL",
+ "question": "What format does encodeSHA256 return its output in?",
+ "answer_preview": "encodeSHA256 returns its output in a hexadecimal string of 64 characters.",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_qwen3-0.6B-emb-latest_avap-docs-test-v4_20260406_104102.json b/output/evaluation_qwen3-0.6B-emb-latest_avap-docs-test-v4_20260406_104102.json
new file mode 100644
index 0000000..4e01fad
--- /dev/null
+++ b/output/evaluation_qwen3-0.6B-emb-latest_avap-docs-test-v4_20260406_104102.json
@@ -0,0 +1,58 @@
+{
+ "generated_at": "2026-04-06T10:41:02.528045",
+ "embedding_model": "qwen3-0.6B-emb:latest",
+ "index_name": "avap-docs-test-v4",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 5,
+ "elapsed_seconds": 69.0,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-docs-test-v4",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.7141,
+ "answer_relevancy": 0.8949,
+ "context_recall": 0.755,
+ "context_precision": 0.6578
+ },
+ "global_score": 0.7554,
+ "verdict": "ACCEPTABLE",
+ "details": [
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP (Advanced Virtual API Programming) is a proprietary language that is specifically designed to facilitate the orchestration of secure, concurrent, and deterministic microservices and I/O operations. It is not intended as a general-purpose programming language but rather as a domain-specific lang",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "answer_preview": "AVAP handles conditional logic using two modes of invocation:\n\n1. **Modo 1 (Comparación Structurada):** `if(variable, value, comparison)`. This mode evaluates a comparison between the variable and value using an operator enclosed in double quotes (`\"==\"`, `\"!=\"`, etc.). The first two arguments must ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "answer_preview": "```avap\naddParam(password, pass_nueva)\npass_antigua = \"password\"\nif(pass_nueva, pass_antigua, \"!=\")\n addVar(cambio, \"Contraseña actualizada\")\nend()\naddResult(cambio)\n\n// Generate SHA-256 hash of the password parameter\naddResult(encodeSHA256(password, \"hash\"))\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "answer_preview": "```avap\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddVariableToJSON(item, 'valor_generado', mi_json)\nendLoop()\nreturn(mi_json)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "answer_preview": "AVAP supports external HTTP calls through the `RequestPost` and `RequestGet` commands. These commands allow for making POST and GET requests, respectively.\n\nTimeout handling in AVAP involves a parameter called `timeout`. If this parameter exceeds its set value, the destination variable receives `Non",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_qwen3-0.6B-emb-latest_avap-knowledge-v2-qwen_20260401_133534.json b/output/evaluation_qwen3-0.6B-emb-latest_avap-knowledge-v2-qwen_20260401_133534.json
new file mode 100644
index 0000000..863f7bc
--- /dev/null
+++ b/output/evaluation_qwen3-0.6B-emb-latest_avap-knowledge-v2-qwen_20260401_133534.json
@@ -0,0 +1,373 @@
+{
+ "generated_at": "2026-04-01T13:35:34.799720",
+ "embedding_model": "qwen3-0.6B-emb:latest",
+ "index_name": "avap-knowledge-v2-qwen",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 50,
+ "elapsed_seconds": 619.3,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-knowledge-v2-qwen",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.5477,
+ "answer_relevancy": 0.8395,
+ "context_recall": 0.5354,
+ "context_precision": 0.596
+ },
+ "global_score": 0.6297,
+ "verdict": "ACCEPTABLE",
+ "details": [
+ {
+ "id": "GD-R-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP (Advanced Virtual API Programming) is a Domain-Specific Language (DSL) that is Turing Complete. It was designed arquitectonically to orchestrate secure, concurrent, and deterministic microservices and I/O operations in an efficient manner.\n\nKey points about AVAP:\n- **Turing Completeness**: It c",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-002",
+ "category": "RETRIEVAL",
+ "question": "How does the if() conditional block work in AVAP? How are blocks closed?",
+ "answer_preview": "In AVAP, the `if()` conditional block evaluates a logical or comparison expression. A block of code within an `if` statement requires explicit closing using the command `end()`.\n\nThe `if()` function supports two modes of invocation:\n1. **Mode 1 (Structured Comparison):** `if(variable, value, compara",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-003",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle external HTTP calls? What commands are available and how is timeout managed?",
+ "answer_preview": "AVAP handles external HTTP calls through the use of functions like `RequestPost` and `RequestGet`. These functions execute POST or GET requests, respectively, storing the response in a destination variable. The `timeout` parameter ensures that if the request takes longer than specified, it will retu",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-004",
+ "category": "RETRIEVAL",
+ "question": "How do functions work in AVAP? What is the scope of variables inside a function?",
+ "answer_preview": "In AVAP, functions are enclosed within hermetic memory scopes. When entering a function, AVAP creates an isolated dictionary of local variables to the global context. The `return()` command acts as a flow control mechanism: it injects the calculated value into the caller, releases the local memory, ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-005",
+ "category": "RETRIEVAL",
+ "question": "What are the three types of variable scopes in AVAP and what are their visibility rules?",
+ "answer_preview": "In AVAP, there are three types of variable scopes:\n\n1. **Global Scope**: Variables declared at the top level or outside any function.\n2. **Main Local Scope**: Variables declared within the `main` function.\n3. **Function Scope**: Variables declared inside a function.\n\nEach type has specific visibilit",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-006",
+ "category": "RETRIEVAL",
+ "question": "How does concurrency work in AVAP? What are goroutines and how are they launched?",
+ "answer_preview": "Concurrency in AVAP is achieved through the use of **goroutines**, which are lightweight threads that allow for concurrent execution. Goroutines can be created using the `go` keyword followed by a function name or expression, as shown in the code snippet provided:\n\n```python\ngo funcion()\n```\n\nGorout",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-007",
+ "category": "RETRIEVAL",
+ "question": "What is the addParam command and how does it capture HTTP request parameters?",
+ "answer_preview": "The `addParam` command in AVAP inspects a request for specified parameters. It first checks the URL (Query arguments), then the JSON body, and finally the form data. If the requested parameter does not exist, the variable associated with that parameter is initialized as `None`.\n\n```bnf\n ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-008",
+ "category": "RETRIEVAL",
+ "question": "How does the startLoop / endLoop construct work in AVAP?",
+ "answer_preview": "The `startLoop` and `endLoop` constructs are used to iterate over a range of indices or elements within an array. In AVAP, this is typically done using a loop structure that allows for iterating through a sequence of values.\n\nFor example:\n```avap\nstartLoop(idx, 0, 2)\n actual = registros[int(idx)]",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-009",
+ "category": "RETRIEVAL",
+ "question": "What is the addResult command and how does it build the HTTP response?",
+ "answer_preview": "The `addResult(variable)` command in AVAP is responsible for registering which variables will form part of the final JSON body of the response. The system variable `_status` allows you to explicitly define the HTTP status code both by direct assignment (`_status = 404`) or through `addVar(_status, 4",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-010",
+ "category": "RETRIEVAL",
+ "question": "How does error handling work in AVAP with try() and exception()?",
+ "answer_preview": "In AVAP, error handling is managed using the `try()` function followed by an optional `exception(e)` statement. The `try()` block contains the code that may throw exceptions, while the `exception(e)` statement catches any exceptions thrown within the `try` block.\n\nFor example:\n```avap\ntry()\n ormD",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-011",
+ "category": "RETRIEVAL",
+ "question": "What is the replace() command in AVAP and how is it used?",
+ "answer_preview": "The replace() command in AVAP is a function that allows for the replacement of characters within strings. It takes three parameters: the string to be modified, the character or substring to be replaced, and the new character or substring to substitute.\n\nFor example:\n- `replace(\"hello\", \"l\", \"x\")` wo",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-012",
+ "category": "RETRIEVAL",
+ "question": "What are the reserved keywords in AVAP that cannot be used as identifiers?",
+ "answer_preview": "In AVAP, the following keywords are considered reserved and cannot be used as identifiers:\n\n- `````` (used for adding parameters)\n- `````` (for getting list length)\n- `````` (for adding results)\n- `````` (for retrieving query parameter l",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-013",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle string formatting and concatenation?",
+ "answer_preview": "AVAP supports string formatting through the use of placeholders in expressions. For example, you can format strings by using curly braces `{}` with variables or constants inside them:\n\n```plaintext\n\"Hello, {}! Your age is {} years old.\"\n```\n\nWhen this expression is evaluated, it will replace `{}` wi",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-014",
+ "category": "RETRIEVAL",
+ "question": "How does the encodeSHA256 command work in AVAP?",
+ "answer_preview": "The `encodeSHA256` command in AVAP is used to generate a SHA-256 hash of a given payload data. This function ensures that the data is securely hashed, providing a high level of security for sensitive information such as passwords or cryptographic keys.\n\nHere's how it works:\n\n1. **Input**: The `encod",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-015",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle date and time operations?",
+ "answer_preview": "AVAP provides three commands to cover all possible conversions between time representations. These commands support calendar formats in Python's `strftime` notation and calculations with `TimeDelta` expressed in seconds (positive for adding, negative for subtracting).\n\nThe system also includes a set",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-016",
+ "category": "RETRIEVAL",
+ "question": "What is the AddvariableToJSON command and how is it used to build JSON objects?",
+ "answer_preview": "The `AddvariableToJSON(clave, datos_cliente, mi_json_final)` command adds a variable to a JSON object. This function takes three parameters: `clave`, which specifies the key for the variable; `datos_cliente`, which contains the data that will be added as part of the JSON structure; and `mi_json_fina",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-017",
+ "category": "RETRIEVAL",
+ "question": "How does the getListLen command work and what is it used for?",
+ "answer_preview": "The getListLen command counts the number of elements in a list or string. It can be used to determine the length of any collection of items.\n\nFor example:\n- The `getListLen(mi_lista, cantidad)` function calculates how many elements are in the list named \"mi_lista\" and assigns that value to \"cantidad",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-018",
+ "category": "RETRIEVAL",
+ "question": "How does the randomString command work in AVAP?",
+ "answer_preview": "The `randomString` command generates a string of characters based on the specified parameters. In this case, it creates a random string with uppercase letters and digits, ensuring that each character is unique within the generated string.\n\nFor example:\n- The first call to `randomString(\"[A-Z]\\d\", 32",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-019",
+ "category": "RETRIEVAL",
+ "question": "What is the $ dereference operator in AVAP and when is it used?",
+ "answer_preview": "The $ dereference operator in AVAP indicates that the value of a variable named \"original\" should be extracted from the table of symbols. This operator is used within the `addVar` command to assign the value of one variable to another, ensuring that only the original variable's value is modified.\n\nI",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-R-020",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle ORM database operations? What commands are available?",
+ "answer_preview": "AVAP provides a set of integrated commands for handling complex types (JSON and lists), times, texts, and generating hashes. The `registerEndpoint` command is the atomic unit of configuration in AVAP, serving as the critical bridge between external HTTP network and internal code.\n\nKey commands inclu",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-001",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'name' parameter and returns a personalized greeting.",
+ "answer_preview": "```avap\nfunction greet(name){\n return \"Hello, \" + name + \"! Welcome to our system.\";\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-002",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash, and returns it.",
+ "answer_preview": "```avap\naddParam(\"password\", password)\nencodeSHA256(password, hashed_password)\nreturn hashed_password\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that loops from 1 to 5, builds a JSON object with each index as a key, and returns it.",
+ "answer_preview": "```avap\nx = 10\ny = 20\n\njson_obj = variableToList(x, \"key_1\")\njson_obj = variableToList(y, \"key_2\")\n\nreturn json_obj\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-004",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that validates if a 'role' parameter belongs to a list of allowed roles and returns the access result.",
+ "answer_preview": "```avap\nif(r, [\"admin\", \"editor\", \"root\"], \"in\")\n acceso = True\nend()\n\naddResult(acceso)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-005",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that makes a GET request to an external API and handles connection errors.",
+ "answer_preview": "```avap\nRequestGet(\"https://api.example.com/data\", {}, {}, None, 5000)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-006",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP function that takes two numbers and returns their sum, then call it and return the result.",
+ "answer_preview": "```AVAP\nfunction suma(a, b){\n total = a + b\n return(total)\n}\n\nresultado = suma(10, 20)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-007",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'subtotal' parameter, computes 21% VAT, and returns the total.",
+ "answer_preview": "```avap\nfunction computeTotal(subtotal){\n iva = subtotal * 0.21\n total = subtotal + iva\n return(total)\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-008",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads an 'api_key' parameter and returns status 403 if it is null.",
+ "answer_preview": "```avap\naddParam(\"api_key\", key)\nif(key, None, \"==\")\n addVar(_status, 403)\n addVar(error, \"Acceso denegado: falta API KEY\")\n addResult(error)\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-009",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that generates a 32-character random alphanumeric token and returns it.",
+ "answer_preview": "```avap\ngenerateToken()\n{\n local token = randomString(\"[A-Z]\\d\", 32, token_seguridad)\n return token\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-010",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'lang' parameter and returns 'Hola' if it is 'es' or 'Hello' if it is 'en'.",
+ "answer_preview": "```avap\naddParam(\"lang\", \"es\")\nif(l, \"es\", \"=\")\n addVar(msg, \"Hola\")\nend()\n\naddResult(msg)\n\naddParam(\"lang\", \"en\")\nif(l, \"en\", \"=\")\n addVar(msg, \"Hello\")\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-011",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that checks if a database table exists and creates it if it does not.",
+ "answer_preview": "```avap\normCheckTable(tableName, varTarget)\nif(varTarget,False,'==')\n ormCreateTable(fields, fieldsType, tableName, varTarget)\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-012",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that gets the current UTC timestamp and adds 24 hours to compute an expiration time.",
+ "answer_preview": "```avap\ngetDateTime(\"\", 86400, \"UTC\", expira)\naddResult(expira)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-013",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that receives a new password parameter, validates it is not equal to the old password, and returns a confirmation.",
+ "answer_preview": "```avap\nif(pass_antigua != pass_nueva)\n addVar(cambio, \"Contraseña actualizada\")\nend()\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-014",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a list parameter and returns its element count.",
+ "answer_preview": "```avap\n// 1. Read the list parameter\nvariableToList(paramList, paramDest)\n\n// 2. Calculate the length of the list\ngetListLen(paramList, lenParam)\n\n// 3. Return the element count\nreturn lenParam\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-015",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses a validation function to check a token parameter and returns the authorization result.",
+ "answer_preview": "```avap\nfunction es_valido(token) {\n response = False\n if (token == \"SECRET\") \n response = True\n end()\n return(response)\n}\n\n// Example usage:\nlet authorized = es_valido(\"SECRET\")\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-016",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that returns two values in the HTTP response: a status code 200 and a message 'Success'.",
+ "answer_preview": "```plaintext\naddVar(code, 200)\naddVar(status, \"Success\")\naddResult(code)\naddResult(status)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-017",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'saldo' parameter and returns True if it is greater than zero, False otherwise.",
+ "answer_preview": "```avap\n/// checks if the saldo is greater than zero\nfunction check_saldo(saldo){\n return (saldo > 0);\n}\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-018",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that converts a Unix timestamp parameter to a human-readable date in dd/mm/yyyy format.",
+ "answer_preview": "```avap\ngetTimeStamp(\"1634567890\", \"%s\", \"PT1H\", \"destino\")\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-019",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that replaces all spaces in a string parameter with hyphens and returns the result.",
+ "answer_preview": "```avap\nreplace(\" \", \"-\", input_string)\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-C-020",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that uses try/exception to execute a raw SQL query and return status 500 on database errors.",
+ "answer_preview": "```avap\ntry()\n ormDirect(\"UPDATE table_inexistente SET a=1\", res)\nexcept(e)\n addVar(_status, 500)\n addResult(\"Error de base de datos\")\nend\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-001",
+ "category": "CONVERSATIONAL",
+ "question": "Can you summarize what you just explained about AVAP scopes in fewer words?",
+ "answer_preview": "AVAP uses three types of scopes: Global Scope, Main Local Scope, and Function Scope. Variables are visible only within their respective scope; the Main Local Scope is not accessible inside functions. The engine follows a hierarchical resolution order: Function Scope first, then Global Scope. If a va",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-002",
+ "category": "CONVERSATIONAL",
+ "question": "You mentioned that addResult builds the JSON response — can you clarify how multiple addResult calls work together?",
+ "answer_preview": "Certainly! The `addResult` function is responsible for constructing and adding variables to the final JSON response. Each call to `addResult(variable)` adds a new variable to the response.\n\nFor example, if you have two `addResult` calls:\n```plaintext\naddResult(hashed_password)\naddResult(secure_token",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-003",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between addVar and a plain assignment like x = 10 in AVAP?",
+ "answer_preview": "In AVAP, the `addVar` function allows for dynamic type conversion of variables. When you use `addVar(copia, $original)`, it assigns the value of `$original` to `copia`. This means that if `$original` is a string, `copia` will be assigned as a string; if `$original` is an integer, `copia` will be ass",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-004",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again the difference between the two modes of the if() command?",
+ "answer_preview": "The `if()` command in AVAP has two distinct modes for its invocations:\n\n1. **Modo 1 (Comparación Structurada):** This mode is used when comparing two simple values directly, such as strings or numbers. The syntax is:\n ```plaintext\n if(átomo_1, átomo_2, \"operador\")\n ```\n - **Argumentos 1 y 2:",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-005",
+ "category": "CONVERSATIONAL",
+ "question": "What happens if an error occurs in AVAP without a try block?",
+ "answer_preview": "If an error occurs in AVAP without a try block, the program will proceed to execute the next statement or line of code. However, it is important to note that this can lead to potential issues such as SQL injection attacks (if using `sql` statements) and other runtime errors.\n\nIn the provided example",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-006",
+ "category": "CONVERSATIONAL",
+ "question": "Can you explain again how the timeout in RequestGet works?",
+ "answer_preview": "The `timeout` parameter in the `RequestGet` function is used to control the maximum amount of time that the request will wait for a response before it cancels the operation and returns `None`. This helps prevent long-running requests from blocking other parts of your program or application.\n\nIn the ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-007",
+ "category": "CONVERSATIONAL",
+ "question": "Can I iterate over a list of items in AVAP instead of a numeric range?",
+ "answer_preview": "Yes, you can iterate over a list of items in AVAP by using the `itemFromList` command. This allows you to access elements within the list without needing to know their indices beforehand.\n\nFor example:\n```avap\n// 1. Obtain the length of the list\ngetListLen(myList, len)\n\n// 2. Iterate with index-cont",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-008",
+ "category": "CONVERSATIONAL",
+ "question": "What is the difference between RequestGet and RequestPost in practice?",
+ "answer_preview": "In practice, `RequestGet` and `RequestPost` are used to interact with a server using HTTP methods. The main differences lie in how they handle the request body:\n\n- **RequestGet**: This method is typically used for GET requests where no body needs to be sent. It omits the body parameter from the URL ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-009",
+ "category": "CONVERSATIONAL",
+ "question": "Goroutines cannot access Main Local Scope — can you give a practical example of why that matters?",
+ "answer_preview": "Certainly! The reason goroutines cannot directly access the `Main Local Scope` is crucial for managing resources and avoiding unintended side effects. Here’s a practical example to illustrate this point:\n\n```go\npackage main\n\nimport (\n\t\"fmt\"\n)\n\nfunc main() {\n\tx := 10 // This variable is in the Main L",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-V-010",
+ "category": "CONVERSATIONAL",
+ "question": "What format does encodeSHA256 return its output in?",
+ "answer_preview": "The `encodeSHA256` function returns its output as a string of 64 characters. Specifically, it produces a digest that is 64 hexadecimal characters long. This ensures the highest level of security and integrity for data storage and transmission.",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/output/evaluation_qwen3-0.6B-emb-latest_avap-knowledge-v2-qwen_20260406_103953.json b/output/evaluation_qwen3-0.6B-emb-latest_avap-knowledge-v2-qwen_20260406_103953.json
new file mode 100644
index 0000000..956460c
--- /dev/null
+++ b/output/evaluation_qwen3-0.6B-emb-latest_avap-knowledge-v2-qwen_20260406_103953.json
@@ -0,0 +1,58 @@
+{
+ "generated_at": "2026-04-06T10:39:53.540150",
+ "embedding_model": "qwen3-0.6B-emb:latest",
+ "index_name": "avap-knowledge-v2-qwen",
+ "result": {
+ "status": "ok",
+ "questions_evaluated": 5,
+ "elapsed_seconds": 75.3,
+ "judge_model": "claude-sonnet-4-20250514",
+ "index": "avap-knowledge-v2-qwen",
+ "category_filter": "all",
+ "scores": {
+ "faithfulness": 0.5963,
+ "answer_relevancy": 0.8799,
+ "context_recall": 0.5615,
+ "context_precision": 0.5333
+ },
+ "global_score": 0.6428,
+ "verdict": "ACCEPTABLE",
+ "details": [
+ {
+ "id": "GD-001",
+ "category": "RETRIEVAL",
+ "question": "What is AVAP and what is it designed for?",
+ "answer_preview": "AVAP (Advanced Virtual API Programming) is a Domain-Specific Language (DSL) that is Turing Complete. It was designed arquitectonically to orchestrate secure, concurrent, and deterministic microservices and I/O operations in an efficient manner.\n\nKey points about AVAP:\n- **Turing Completeness**: It c",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-002",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP handle conditional logic? What commands are used and how are blocks closed?",
+ "answer_preview": "AVAP handles conditional logic using the `if()` command, which can be invoked in two modes:\n\n1. **Mode 1 (Structured Comparison):** The syntax is `if(variable, value, comparison_operator)`. This mode evaluates a comparison between the variable and the value provided as strings (e.g., `\"==\"`, `\">\"`, ",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-003",
+ "category": "CODE_GENERATION",
+ "question": "Write an AVAP script that reads a 'password' parameter, generates a SHA-256 hash of it, and returns the hash.",
+ "answer_preview": "```avap\naddParam(\"password\", password)\nencodeSHA256(password, hashed_password)\nreturn hashed_password\n```",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-004",
+ "category": "CODE_GENERATION",
+ "question": "Show an AVAP script that loops from 1 to 5, builds a JSON object with each iteration index as a key, and returns it.",
+ "answer_preview": "```avap\nx = 10\ny = 20\n\njson_obj = {}\n\nfor i in range(1, 6):\n json_obj[i] = {\"index\": i}\n\nreturn json_obj\n```\n\nThis script initializes `x` and `y`, then builds a JSON object where each key is an iteration index from 1 to 5. The function returns this constructed JSON object at the end.",
+ "n_chunks": 5
+ },
+ {
+ "id": "GD-005",
+ "category": "RETRIEVAL",
+ "question": "How does AVAP support external HTTP calls? What commands are available and how is timeout handled?",
+ "answer_preview": "AVAP supports external HTTP calls through the `RequestPost` and `RequestGet` functions. These functions allow for POST and GET requests, respectively, to be executed on a specified URL with optional query parameters, headers, body data, and destination file.\n\nTimeout handling in AVAP is managed by t",
+ "n_chunks": 5
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/scratches/pseco/agent/n00 Run Evaluation.ipynb b/scratches/pseco/agent/n00 Run Evaluation.ipynb
new file mode 100644
index 0000000..2c3b615
--- /dev/null
+++ b/scratches/pseco/agent/n00 Run Evaluation.ipynb
@@ -0,0 +1,213 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "9f97dd1e",
+ "metadata": {},
+ "source": [
+ "# Libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "9e974df6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "import json\n",
+ "from datetime import datetime\n",
+ "from pathlib import Path\n",
+ "\n",
+ "from src.utils.llm_factory import create_chat_model\n",
+ "from src.utils.emb_factory import create_embedding_model\n",
+ "\n",
+ "from Docker.src.evaluate import run_evaluation\n",
+ "from elasticsearch import Elasticsearch\n",
+ "\n",
+ "# Ensure the project root is on the path so `src` is importable\n",
+ "_project_root = str(Path(__file__).resolve().parents[2]) if \"__file__\" in dir() else str(Path.cwd().parents[1])\n",
+ "if _project_root not in sys.path:\n",
+ " sys.path.insert(0, _project_root)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "d2f66015",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from src.config import settings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "2e2ceb6f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "emb = [\"bge-m3:latest\", \"bge-m3:latest\"]\n",
+ "index = [\"avap-knowledge-v2-qwen\", \"avap-docs-test-v4\",\"avap-knowledge-v2-bge\", \"avap-docs-test-v4-bge\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "30edcecc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "llm = create_chat_model(\n",
+ " provider=\"ollama\",\n",
+ " model=settings.ollama_model_name,\n",
+ " temperature=0,\n",
+ " validate_model_on_init=True,\n",
+ ")\n",
+ "\n",
+ "es_url = settings.elasticsearch_local_url\n",
+ "\n",
+ "es_kwargs: dict = {\"hosts\": [es_url], \"request_timeout\": 60}\n",
+ "\n",
+ "es_client = Elasticsearch(**es_kwargs)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "dceb22fc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "PosixPath('/home/pseco/VsCodeProjects/assistance-engine')"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "settings.proj_root"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "94f5178d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys\n",
+ "sys.path.append(str(settings.proj_root/\"Docker/src\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad8d341e",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
+ "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
+ "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n",
+ "[eval] embed_query fails: failed to encode response: json: unsupported value: NaN (status code: 500)\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2feda85fcc744b91b55fd72c4bf0540b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Evaluating: 0%| | 0/200 [00:00, ?it/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Exception raised in Job[45]: ResponseError(failed to encode response: json: unsupported value: NaN (status code: 500))\n"
+ ]
+ }
+ ],
+ "source": [
+ "saved_files = []\n",
+ "\n",
+ "for embedding_model, index_group in zip(emb, [index[:2], index[2:]]):\n",
+ " embeddings = create_embedding_model(\n",
+ " provider=\"ollama\",\n",
+ " model=embedding_model,\n",
+ " )\n",
+ "\n",
+ " for index_name in index_group:\n",
+ " result = run_evaluation(\n",
+ " es_client=es_client,\n",
+ " llm=llm,\n",
+ " embeddings=embeddings,\n",
+ " index_name=index_name,\n",
+ " )\n",
+ "\n",
+ " timestamp = datetime.now().strftime(\"%Y%m%d_%H%M%S\")\n",
+ " safe_embedding = embedding_model.replace(\":\", \"-\").replace(\"/\", \"-\")\n",
+ " safe_index = index_name.replace(\":\", \"-\").replace(\"/\", \"-\")\n",
+ " output_path = (\n",
+ " settings.proj_root\n",
+ " / \"output\"\n",
+ " / f\"evaluation_{safe_embedding}_{safe_index}_{timestamp}.json\"\n",
+ " )\n",
+ " output_path.parent.mkdir(parents=True, exist_ok=True)\n",
+ "\n",
+ " payload = {\n",
+ " \"generated_at\": datetime.now().isoformat(),\n",
+ " \"embedding_model\": embedding_model,\n",
+ " \"index_name\": index_name,\n",
+ " \"result\": result,\n",
+ " }\n",
+ "\n",
+ " with output_path.open(\"w\", encoding=\"utf-8\") as fp:\n",
+ " json.dump(payload, fp, ensure_ascii=False, indent=2)\n",
+ "\n",
+ " saved_files.append(str(output_path))\n",
+ " print(f\"Saved: {output_path}\")\n",
+ "\n",
+ "saved_files"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "assistance-engine",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/scratches/pseco/synthetic_dataset/avap_test/test.avap b/scratches/pseco/synthetic_dataset/avap_test/test.avap
index b3185e4..886747b 100644
--- a/scratches/pseco/synthetic_dataset/avap_test/test.avap
+++ b/scratches/pseco/synthetic_dataset/avap_test/test.avap
@@ -1,7 +1,5 @@
-addParam("archived",show_archived)
-if(show_archived,"true","==")
- ormAccessSelect(connector,"SELECT * FROM records WHERE archived=1",records)
-else()
- ormAccessSelect(connector,"SELECT * FROM records WHERE archived=0",records)
-end()
-addResult(records)
\ No newline at end of file
+addParam("datos", json_entrada)
+variableFromJSON(json_entrada, "nombre", valor_nombre)
+addVar(json_salida, {})
+AddVariableToJSON("usuario", valor_nombre, json_salida)
+addResult(json_salida)
\ No newline at end of file
diff --git a/scratches/pseco/synthetic_dataset/avap_test/test.ipynb b/scratches/pseco/synthetic_dataset/avap_test/test.ipynb
index 79da406..951b3d8 100644
--- a/scratches/pseco/synthetic_dataset/avap_test/test.ipynb
+++ b/scratches/pseco/synthetic_dataset/avap_test/test.ipynb
@@ -12,7 +12,17 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 33,
+ "id": "a10103d6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "code = \"addParam(\\\"archived\\\", show_archived)\\nif(show_archived, \\\"true\\\", \\\"==\\\")\\n ormAccessSelect(connector, \\\"SELECT * FROM records WHERE archived=1\\\", records)\\nelse()\\n ormAccessSelect(connector, \\\"SELECT * FROM records WHERE archived=0\\\", records)\\nend()\\naddResult(records)\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
"id": "91c20032",
"metadata": {},
"outputs": [
@@ -655,7 +665,7 @@
],
"metadata": {
"kernelspec": {
- "display_name": "assistance-engine",
+ "display_name": ".venv",
"language": "python",
"name": "python3"
},
diff --git a/scratches/pseco/synthetic_dataset/generate_parser_analysis.py b/scratches/pseco/synthetic_dataset/generate_parser_analysis.py
index 21b6ceb..8c64c97 100644
--- a/scratches/pseco/synthetic_dataset/generate_parser_analysis.py
+++ b/scratches/pseco/synthetic_dataset/generate_parser_analysis.py
@@ -197,7 +197,7 @@ def generate_parser_analysis(
"Defaults to all files in synthetic_datasets/.",
),
output_path: str = typer.Option(
- "synthetic_datasets/validated_golden_synthetic_dataset.json",
+ "synthetic_datasets/validated_golden_synthetic_dataset_v2.json",
help="Output path for the consolidated analysis JSON.",
),
api_url: str = typer.Option(
diff --git a/scratches/pseco/synthetic_dataset/reward_algorithms/generate_mbap_v2.py b/scratches/pseco/synthetic_dataset/reward_algorithms/generate_mbap_v2.py
index dded7f3..39232fe 100644
--- a/scratches/pseco/synthetic_dataset/reward_algorithms/generate_mbap_v2.py
+++ b/scratches/pseco/synthetic_dataset/reward_algorithms/generate_mbap_v2.py
@@ -64,8 +64,8 @@ AVAP_NODE_TYPES = {
"RequestGet": ["RequestGet("],
"RequestPost": ["RequestPost("],
"function": ["function "],
- "import": ["import "],
- "include": ["include("],
+# "import": ["import "],
+# "include": ["include("],
}
NODE_TYPE_NAMES = AVAP_NODE_NAMES
@@ -78,21 +78,76 @@ class CellValidator:
self.parser_timeout = parser_timeout
self._parser_available = True
- def parse(self, code: str) -> tuple[bool, dict, str]:
-
+ def parse(self, problem: dict | None = None, code: str | None = None) -> tuple[bool, dict, str]:
+ """
+ Parse AVAP code using the parser endpoint.
+
+ Accepts either:
+ - problem (dict): Complete problem object with text, code, test_inputs, test_list
+ - code (str): Just the code (legacy fallback)
+
+ If problem is provided, creates a JSON payload in memory and sends it as a file upload.
+ """
if not self._parser_available:
return None, {}, "parser_unavailable"
+
+ # Determine code to validate
+ if problem is not None:
+ code_to_validate = problem.get("code", "")
+ elif code is not None:
+ code_to_validate = code
+ else:
+ return False, {}, "No code or problem provided"
+
try:
- resp = requests.post(
- f"{self.parser_url}/parse",
- #f"{settings.parser_url}/api/v1/upload",
- json={"code": code},
- #files={"file": ("task.json", io.BytesIO(json.dumps([code]).encode("utf-8")), "application/json")},
- timeout=self.parser_timeout,
- )
- data = resp.json()
+ # If problem is provided, use file-based upload
+ if problem is not None:
+ payload = {
+ "schema": problem.get("schema", "avap"),
+ "text": problem.get("text", ""),
+ "code": problem.get("code", ""),
+ "test_inputs": problem.get("test_inputs", {}),
+ "test_list": problem.get("test_list", []),
+ }
+
+ # Create JSON payload in memory (no temporary file needed)
+ payload_bytes = json.dumps([payload]).encode("utf-8")
+ file_obj = io.BytesIO(payload_bytes)
+
+ # Send as file upload
+ resp = requests.post(
+ f"{settings.parser_url}",
+ files={"file": ("problem.json", file_obj, "application/json")},
+ timeout=self.parser_timeout,
+ )
+ else:
+ # Legacy: send only code as JSON
+ resp = requests.post(
+ f"{settings.parser_url}",
+ json={"code": code_to_validate},
+ timeout=self.parser_timeout,
+ )
+
+ try:
+ data = resp.json()
+ except requests.exceptions.JSONDecodeError:
+ decoder = json.JSONDecoder()
+ data, _ = decoder.raw_decode(resp.text)
+
if data.get("valid", False):
return True, data.get("ast", {}), ""
+
+ if data.get("success"):
+ sequence = data.get("result_sequence") or []
+ first_result = sequence[0] if sequence else {}
+
+ if first_result.get("success") and first_result.get(
+ "assertion_result", True
+ ):
+ return True, first_result.get("ast", data.get("ast", {})), ""
+
+ return False, {}, first_result.get("error", data.get("error", "parse error"))
+
return False, {}, data.get("error", "parse error")
except requests.exceptions.ConnectionError:
self._parser_available = False
@@ -498,7 +553,7 @@ REGLAS ESTRICTAS para el código AVAP generado:
13. El código se ejecuta DIRECTAMENTE, línea a línea.
14. Emplea UNICAMENTE las construcciones listadas en el inventario: {", ".join(AVAP_NODE_TYPES.keys())}, no inventes node types que no existan.
15. Evitar el uso de tablas no definidas previamente.
-16. Evitar el uso de imports o includes.
+16. No EMPLEAR imports ni includes.
17. Mantén el código entre 3 y 15 líneas. NO generes código largo ni sobrecomplicado.
18. Usa solo 1-2 constructs por ejemplo. No intentes combinar muchos a la vez.
19. Asegúrate de que cada instrucción usa EXACTAMENTE la firma documentada en el LRM.
@@ -815,7 +870,7 @@ def run_map_elites(args, client, lrm, output_path):
code = problem["code"]
test_list = problem.get("test_list", [])
- is_valid, ast, error_msg = validator.parse(code)
+ is_valid, ast, error_msg = validator.parse(problem=problem)
if is_valid is None:
is_valid, ast = True, {}
@@ -954,7 +1009,7 @@ def run_map_elites_prior(args, client, lrm, output_path):
code = problem["code"]
test_list = problem.get("test_list", [])
- is_valid, ast, error_msg = validator.parse(code)
+ is_valid, ast, error_msg = validator.parse(problem=problem)
if is_valid is None:
is_valid, ast = True, {}
@@ -1096,7 +1151,7 @@ def run_reward(args, client, lrm, output_path):
code = problem["code"]
test_list = problem.get("test_list", [])
- is_valid, ast, error_msg = validator.parse(code)
+ is_valid, ast, error_msg = validator.parse(problem=problem)
if is_valid is None:
is_valid, ast = True, {}
diff --git a/scripts/pipelines/flows/validate_synthetic_dataset.py b/scripts/pipelines/flows/validate_synthetic_dataset.py
index 440d098..e89c5b6 100644
--- a/scripts/pipelines/flows/validate_synthetic_dataset.py
+++ b/scripts/pipelines/flows/validate_synthetic_dataset.py
@@ -13,8 +13,8 @@ app = typer.Typer()
@app.command()
def validate_synthetic_dataset(
- dataset_path: str = "synthetic_datasets/mbpp_avap_prior.json",
- output_path: str = "synthetic_datasets/validated_mbpp_avap_prior_synthetic_dataset.json",
+ dataset_path: str = "synthetic_datasets/test.json",
+ output_path: str = "synthetic_datasets/result_test.json",
api_url: str = settings.parser_url,
timeout: int = 120,
) -> None:
diff --git a/synthetic_datasets/result_test.json b/synthetic_datasets/result_test.json
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/synthetic_datasets/result_test.json
@@ -0,0 +1 @@
+[]
\ No newline at end of file
diff --git a/synthetic_datasets/test.json b/synthetic_datasets/test.json
new file mode 100644
index 0000000..e4b2d81
--- /dev/null
+++ b/synthetic_datasets/test.json
@@ -0,0 +1,9 @@
+[
+ {
+ "task_id": "GD-C-001",
+ "text": "Read 'name' parameter and return personalized greeting",
+ "code": "addParam(\"nombre\", usuario_nombre)\naddParam(\"edad\", usuario_edad)\nAddVariableToJSON(\"nombre\", usuario_nombre, perfil)\nAddVariableToJSON(\"edad\", usuario_edad, perfil)\naddResult(perfil)",
+ "test_inputs": {"nombre": "Juan", "edad": "25"},
+ "test_list": ["re.match(r'.*Juan.*', perfil)", "re.match(r'.*25.*', perfil)"]
+ }
+]
\ No newline at end of file
diff --git a/synthetic_datasets/validated_golden_synthetic_dataset_v2.json b/synthetic_datasets/validated_golden_synthetic_dataset_v2.json
new file mode 100644
index 0000000..c4c730c
--- /dev/null
+++ b/synthetic_datasets/validated_golden_synthetic_dataset_v2.json
@@ -0,0 +1,928 @@
+[
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-001",
+ "text": "Read 'name' parameter and return personalized greeting",
+ "code": "addParam(\"name\", name)\nresult = \"Hello, \" + name\naddResult(result)",
+ "test_inputs": {
+ "name": "Alice"
+ },
+ "test_list": [
+ "re.match(r'^Hello, Alice$', result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "result": "Hello, Alice"
+ },
+ "variables": {
+ "name": "Alice",
+ "result": "Hello, Alice"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.106,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.027,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.023,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-002",
+ "text": "Hash 'password' parameter with SHA-256 and return it",
+ "code": "addParam(\"password\", password)\nencodeSHA256(password, hashed_password)\naddResult(hashed_password)",
+ "test_inputs": {
+ "password": "mySecretPass123"
+ },
+ "test_list": [
+ "re.match(r'^[a-f0-9]{64}$', hashed_password)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "hashed_password": "e410f9baf32634f4e0492acede51942c0b76bff30b859408acdcba3b94941f1e"
+ },
+ "variables": {
+ "password": "mySecretPass123",
+ "hashed_password": "e410f9baf32634f4e0492acede51942c0b76bff30b859408acdcba3b94941f1e"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.129,
+ "success": true
+ },
+ {
+ "command": "encodeSHA256",
+ "duration_ms": 0.057,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.028,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-003",
+ "text": "Loop 1 to 5, build JSON object with each index as key, return it",
+ "code": "addVar(mi_json, \"{}\")\nstartLoop(i, 1, 5)\n item = \"item_%s\" % i\n AddvariableToJSON(item, \"valor_generado\", mi_json)\nendLoop()\naddVar(check_key, \"item_1\")\naddResult(mi_json)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^item_1$', check_key)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "mi_json": {
+ "item_1": "valor_generado",
+ "item_2": "valor_generado",
+ "item_3": "valor_generado",
+ "item_4": "valor_generado",
+ "item_5": "valor_generado"
+ }
+ },
+ "variables": {
+ "mi_json": {
+ "item_1": "valor_generado",
+ "item_2": "valor_generado",
+ "item_3": "valor_generado",
+ "item_4": "valor_generado",
+ "item_5": "valor_generado"
+ },
+ "i": 5,
+ "item": "item_5",
+ "check_key": "item_1"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addVar",
+ "duration_ms": 0.085,
+ "success": true
+ },
+ {
+ "command": "startLoop",
+ "duration_ms": 3.63,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.019,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.015000000000000001,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-004",
+ "text": "Validate role membership using if() mode 2 expression",
+ "code": "addParam(\"rol\", r)\nif(None, None, `r in [\"admin\", \"editor\", \"root\"]`)\n acceso = True\nelse()\n acceso = False\nend()\naddResult(acceso)",
+ "test_inputs": {
+ "rol": "admin"
+ },
+ "test_list": [
+ "re.match(r'^True$', acceso)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "acceso": true
+ },
+ "variables": {
+ "rol": "admin",
+ "r": "admin",
+ "acceso": true
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.11900000000000001,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.482,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.037,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-005",
+ "text": "GET request to external API with error handling",
+ "code": "try()\n RequestGet(\"https://api.test.com/data\", 0, 0, respuesta)\nexception(e)\n addVar(error_trace, \"Fallo de conexion: %s\" % e)\n addResult(error_trace)\nend()\naddResult(respuesta)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'.+', error_trace)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "error_trace": "Fallo de conexion: No error detected",
+ "respuesta": null
+ },
+ "variables": {
+ "e": "No error detected",
+ "error_trace": "Fallo de conexion: No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.082,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.002,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.041,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.073,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.018000000000000002,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.012,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-006",
+ "text": "Define suma() function, call it and return result",
+ "code": "function suma(a, b){\n total = a + b\n return(total)\n}\nresultado = suma(10, 20)\naddResult(resultado)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^30$', resultado)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "resultado": 30
+ },
+ "variables": {
+ "total": 30,
+ "resultado": 30
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "suma",
+ "duration_ms": 0.064,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.067,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-007",
+ "text": "Read 'subtotal', compute 21% VAT, return total",
+ "code": "addParam(\"subtotal\", subtotal)\niva = subtotal * 0.21\ntotal = subtotal + iva\naddResult(total)",
+ "test_inputs": {
+ "subtotal": 100
+ },
+ "test_list": [
+ "re.match(r'^121\\.0$', total)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "total": 121.0
+ },
+ "variables": {
+ "subtotal": 100,
+ "iva": 21.0,
+ "total": 121.0
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.097,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.023,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.025,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-008",
+ "text": "Return 403 if 'api_key' parameter is null",
+ "code": "addParam(\"api_key\", key)\nif(key, None, \"==\")\n addVar(_status, 403)\n addVar(error, \"Acceso denegado: falta API KEY\")\n addResult(error)\nend()",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^403$', _status)",
+ "re.search(r'Acceso denegado', error)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "error": "Acceso denegado: falta API KEY"
+ },
+ "variables": {
+ "key": null,
+ "_status": 403,
+ "error": "Acceso denegado: falta API KEY"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.112,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.25,
+ "success": true
+ }
+ ],
+ "http_status": 403
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-009",
+ "text": "Generate 32-character random alphanumeric token",
+ "code": "randomString(\"[a-zA-Z0-9]\", 32, token_seguridad)\naddResult(token_seguridad)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^[a-zA-Z0-9]{32}$', token_seguridad)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "token_seguridad": "vWQyoWLP4gJBTJmIeg98Ue7su9LnURgx"
+ },
+ "variables": {
+ "token_seguridad": "vWQyoWLP4gJBTJmIeg98Ue7su9LnURgx"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "randomString",
+ "duration_ms": 3.22,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.030000000000000002,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-010",
+ "text": "Return 'Hola' if lang=es, 'Hello' otherwise",
+ "code": "addParam(\"lang\", l)\nif(l, \"es\", \"=\")\n addVar(msg, \"Hola\")\nelse()\n addVar(msg, \"Hello\")\nend()\naddResult(msg)",
+ "test_inputs": {
+ "lang": "es"
+ },
+ "test_list": [
+ "re.match(r'^Hola$', msg)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "msg": "Hola"
+ },
+ "variables": {
+ "lang": "es",
+ "l": "es",
+ "msg": "Hola"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.10900000000000001,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.164,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.037,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-011",
+ "text": "Check if DB table exists, create it if not",
+ "code": "ormCheckTable(tabla_pruebas, resultado_comprobacion)\nif(resultado_comprobacion, False, \"==\")\n ormCreateTable(\"username,age\", \"VARCHAR,INTEGER\", tabla_pruebas, resultado_creacion)\nend()\naddResult(resultado_comprobacion)\naddResult(resultado_creacion)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^(True|False)$', resultado_comprobacion)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "resultado_comprobacion": "True",
+ "resultado_creacion": null
+ },
+ "variables": {
+ "resultado_comprobacion": "True"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "ormCheckTable",
+ "duration_ms": 23.546999999999997,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.128,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.025,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.014,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-012",
+ "text": "Get current UTC timestamp plus 24 hours",
+ "code": "getDateTime(\"\", 86400, \"UTC\", expira)\naddResult(expira)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^\\d+', expira)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "expira": 1775115157.739736
+ },
+ "variables": {
+ "expira": 1775115157.739736
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "getDateTime",
+ "duration_ms": 2.935,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.022,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-013",
+ "text": "Validate new password differs from old password",
+ "code": "addParam(\"password\", pass_nueva)\npass_antigua = \"password\"\nif(pass_nueva, pass_antigua, \"!=\")\n addVar(cambio, \"Contrasena actualizada\")\nend()\naddResult(cambio)",
+ "test_inputs": {
+ "password": "newPass456"
+ },
+ "test_list": [
+ "re.match(r'^Contrasena actualizada$', cambio)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "cambio": "Contrasena actualizada"
+ },
+ "variables": {
+ "password": "newPass456",
+ "pass_nueva": "newPass456",
+ "pass_antigua": "password",
+ "cambio": "Contrasena actualizada"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.135,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.245,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.030000000000000002,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-014",
+ "text": "Read list parameter and return element count",
+ "code": "addParam(\"data_list\", mi_lista)\ngetListLen(mi_lista, cantidad)\naddResult(cantidad)",
+ "test_inputs": {
+ "data_list": "[1, 2, 3, 4, 5]"
+ },
+ "test_list": [
+ "re.match(r'^5$', cantidad)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "cantidad": 5
+ },
+ "variables": {
+ "data_list": "[1, 2, 3, 4, 5]",
+ "mi_lista": [
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "cantidad": 5
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.107,
+ "success": true
+ },
+ {
+ "command": "getListLen",
+ "duration_ms": 0.06499999999999999,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.02,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-015",
+ "text": "Validate token using es_valido() function returning boolean",
+ "code": "function es_valido(token){\n response = False\n if(token, \"SECRET\", \"=\")\n response = True\n end()\n return(response)\n}\naddParam(\"token\", t)\nautorizado = es_valido(t)\naddResult(autorizado)",
+ "test_inputs": {
+ "token": "SECRET"
+ },
+ "test_list": [
+ "re.match(r'^True$', autorizado)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "autorizado": true
+ },
+ "variables": {
+ "token": "SECRET",
+ "t": "SECRET",
+ "response": true,
+ "autorizado": true
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.131,
+ "success": true
+ },
+ {
+ "command": "es_valido",
+ "duration_ms": 0.196,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.023,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-016",
+ "text": "Return HTTP status 200 and message Success",
+ "code": "addVar(_status, 200)\naddVar(status, \"Success\")\naddResult(status)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^200$', _status)",
+ "re.match(r'^Success$', status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "status": "Success"
+ },
+ "variables": {
+ "_status": 200,
+ "status": "Success"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addVar",
+ "duration_ms": 0.078,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.028,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.037,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-017",
+ "text": "Return True if saldo > 0, False otherwise",
+ "code": "addParam(\"saldo\", saldo)\nif(saldo, 0, \">\")\n permitir = True\nelse()\n permitir = False\nend()\naddResult(permitir)",
+ "test_inputs": {
+ "saldo": 150
+ },
+ "test_list": [
+ "re.match(r'^True$', permitir)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "permitir": true
+ },
+ "variables": {
+ "saldo": 150,
+ "permitir": true
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.135,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 1.224,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.051,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-018",
+ "text": "Convert Unix timestamp parameter to dd/mm/yyyy format",
+ "code": "addParam(\"timestamp\", ts)\nstampToDatetime(ts, \"%d/%m/%Y\", 0, fecha_human)\naddResult(fecha_human)",
+ "test_inputs": {
+ "timestamp": "1708726162"
+ },
+ "test_list": [
+ "re.match(r'^\\d{2}/\\d{2}/\\d{4}$', fecha_human)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "fecha_human": "23/02/2024"
+ },
+ "variables": {
+ "timestamp": "1708726162",
+ "ts": 1708726162,
+ "fecha_human": "23/02/2024"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.088,
+ "success": true
+ },
+ {
+ "command": "stampToDatetime",
+ "duration_ms": 3.04,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.022,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-019",
+ "text": "Replace spaces with hyphens in string parameter",
+ "code": "addParam(\"text\", input_text)\nreplace(input_text, \" \", \"-\", clean_text)\naddResult(clean_text)",
+ "test_inputs": {
+ "text": "hello world test"
+ },
+ "test_list": [
+ "re.match(r'^hello-world-test$', clean_text)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "clean_text": "hello-world-test"
+ },
+ "variables": {
+ "text": "hello world test",
+ "input_text": "hello world test",
+ "clean_text": "hello-world-test"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.10500000000000001,
+ "success": true
+ },
+ {
+ "command": "replace",
+ "duration_ms": 0.082,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "golden_dataset_parser_validation.json",
+ "task_id": "GD-C-020",
+ "text": "Execute raw SQL with try/exception, return 500 on error",
+ "code": "try()\n ormDirect(\"UPDATE tabla SET col=1 WHERE id=1\", res)\nexception(e)\n addVar(_status, 500)\n addResult(\"Error de base de datos\")\nend()\naddResult(res)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r'^500$', _status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "Error de base de datos": null,
+ "res": null
+ },
+ "variables": {
+ "ormDirect(\"UPDATE tabla SET col": "1 WHERE id=1\", res)",
+ "e": "No error detected",
+ "_status": 500
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.083,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.026,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.030000000000000002,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.02,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.015000000000000001,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.011,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ }
+]
\ No newline at end of file
diff --git a/synthetic_datasets/validated_mbpp_avap_prior.json b/synthetic_datasets/validated_mbpp_avap_prior.json
new file mode 100644
index 0000000..09fbc78
--- /dev/null
+++ b/synthetic_datasets/validated_mbpp_avap_prior.json
@@ -0,0 +1,10462 @@
+[
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0001",
+ "text": "Call external API with error handling and return error message on failure",
+ "code": "try()\n RequestGet(\"https://api.example.com/data\", 0, 0, respuesta)\nexception(err)\n addVar(error_msg, \"Request failed: %s\" % err)\n addResult(error_msg)\nend()\naddResult(respuesta)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".+\", error_msg)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "error_msg": "Request failed: No error detected",
+ "respuesta": null
+ },
+ "variables": {
+ "err": "No error detected",
+ "error_msg": "Request failed: No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.6880000000000001,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.004,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.40499999999999997,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.161,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.061,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.028,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0002",
+ "text": "Execute raw SQL query and handle database errors returning status 500",
+ "code": "try()\n ormDirect(\"SELECT * FROM users WHERE active=1\", rows)\nexception(err)\n addVar(_status, 500)\n addVar(db_error, \"DB error: %s\" % err)\n addResult(db_error)\nend()\naddResult(rows)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^500$\", _status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "db_error": "DB error: No error detected",
+ "rows": null
+ },
+ "variables": {
+ "ormDirect(\"SELECT * FROM users WHERE active": "1\", rows)",
+ "err": "No error detected",
+ "_status": 500,
+ "db_error": "DB error: No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.16899999999999998,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.09899999999999999,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.053,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.041,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.1,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0003",
+ "text": "Parse JSON body from POST request and handle malformed JSON",
+ "code": "addParam(\"payload\", raw_payload)\ntry()\n variableFromJSON(raw_payload, \"user_id\", user_id)\nexception(err)\n addVar(_status, 400)\n addVar(parse_error, \"Invalid JSON\")\n addResult(parse_error)\nend()\naddResult(user_id)",
+ "test_inputs": {
+ "payload": "{\"user_id\": \"abc123\"}"
+ },
+ "test_list": [
+ "re.match(r\"^abc123$\", user_id)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "parse_error": "Invalid JSON",
+ "user_id": "abc123"
+ },
+ "variables": {
+ "payload": "{\"user_id\": \"abc123\"}",
+ "raw_payload": {
+ "user_id": "abc123"
+ },
+ "user_id": "abc123",
+ "err": "No error detected",
+ "_status": 400,
+ "parse_error": "Invalid JSON"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.228,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.049,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.054,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.031,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 400
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0004",
+ "text": "Perform HTTP POST to webhook and capture connection errors",
+ "code": "addParam(\"url\", webhook_url)\ntry()\n RequestPost(webhook_url, 0, 0, 0, webhook_result)\nexception(err)\n addVar(_status, 502)\n addVar(webhook_error, \"Webhook failed\")\n addResult(webhook_error)\nend()\naddResult(webhook_result)",
+ "test_inputs": {
+ "url": "https://hook.example.com/event"
+ },
+ "test_list": [
+ "re.match(r\"^502$\", _status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "webhook_error": "Webhook failed",
+ "webhook_result": {
+ "error": "HTTPSConnectionPool(host='hook.example.com', port=443): Max retries exceeded with url: /event (Caused by NameResolutionError(\"HTTPSConnection(host='hook.example.com', port=443): Failed to resolve 'hook.example.com' ([Errno -5] No address associated with hostname)\"))"
+ }
+ },
+ "variables": {
+ "url": "https://hook.example.com/event",
+ "webhook_url": "https://hook.example.com/event",
+ "webhook_result": {
+ "error": "HTTPSConnectionPool(host='hook.example.com', port=443): Max retries exceeded with url: /event (Caused by NameResolutionError(\"HTTPSConnection(host='hook.example.com', port=443): Failed to resolve 'hook.example.com' ([Errno -5] No address associated with hostname)\"))"
+ },
+ "err": "No error detected",
+ "_status": 502,
+ "webhook_error": "Webhook failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.27999999999999997,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.06899999999999999,
+ "success": true
+ },
+ {
+ "command": "RequestPost",
+ "duration_ms": 51.574000000000005,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.159,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.087,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.053,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.093,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ }
+ ],
+ "http_status": 502
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0005",
+ "text": "Insert record into database with rollback on error",
+ "code": "addParam(\"name\", product_name)\ntry()\n ormAccessInsert(connector, \"products\", product_name, insert_result)\nexception(err)\n addVar(_status, 500)\n addVar(insert_error, \"Insert failed: %s\" % err)\n addResult(insert_error)\nend()\naddResult(insert_result)",
+ "test_inputs": {
+ "name": "Widget Pro"
+ },
+ "test_list": [
+ "re.match(r\"^Widget Pro$\", product_name)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "insert_error": "Insert failed: No error detected",
+ "insert_result": null
+ },
+ "variables": {
+ "name": "Widget Pro",
+ "product_name": "Widget Pro",
+ "insert_result": null,
+ "err": "No error detected",
+ "_status": 500,
+ "insert_error": "Insert failed: No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.306,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.114,
+ "success": true
+ },
+ {
+ "command": "ormAccessInsert",
+ "duration_ms": 0.118,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.045000000000000005,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.04,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.339,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0006",
+ "text": "Update user record and handle not-found or constraint errors",
+ "code": "addParam(\"user_id\", uid)\ntry()\n ormDirect(\"UPDATE users SET last_login=NOW WHERE id='%s'\" % uid, update_res)\nexception(err)\n addVar(_status, 404)\n addVar(update_error, \"Update failed\")\n addResult(update_error)\nend()\naddVar(_status, 200)\naddResult(update_res)",
+ "test_inputs": {
+ "user_id": "u42"
+ },
+ "test_list": [
+ "re.match(r\"^u42$\", uid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "update_error": "Update failed",
+ "update_res": null
+ },
+ "variables": {
+ "user_id": "u42",
+ "uid": "u42",
+ "ormDirect(\"UPDATE users SET last_login": "NOW WHERE id='%s'\" % uid, update_res)",
+ "err": "No error detected",
+ "_status": 200,
+ "update_error": "Update failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.43,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.13699999999999998,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.116,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.05,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.030000000000000002,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.031,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0007",
+ "text": "Read configuration from external service and use default on failure",
+ "code": "try()\n RequestGet(\"https://config.internal/settings\", 0, 0, config_data)\nexception(err)\n addVar(config_data, \"default\")\nend()\naddResult(config_data)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".+\", config_data)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "config_data": "default"
+ },
+ "variables": {
+ "err": "No error detected",
+ "config_data": "default"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.14100000000000001,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.003,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.031,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0008",
+ "text": "Validate API key by calling auth service and return 401 on failure",
+ "code": "addParam(\"api_key\", api_key)\ntry()\n RequestGet(\"https://auth.example.com/validate?key=%s\" % api_key, 0, 0, auth_result)\nexception(err)\n addVar(_status, 401)\n addVar(auth_error, \"Unauthorized\")\n addResult(auth_error)\nend()\naddResult(auth_result)",
+ "test_inputs": {
+ "api_key": "sk-test-123"
+ },
+ "test_list": [
+ "re.match(r\"^sk-test-123$\", api_key)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "auth_error": "Unauthorized",
+ "auth_result": null
+ },
+ "variables": {
+ "api_key": "sk-test-123",
+ "RequestGet(\"https://auth.example.com/validate?key": "%s\" % api_key, 0, 0, auth_result)",
+ "err": "No error detected",
+ "_status": 401,
+ "auth_error": "Unauthorized"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.223,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.073,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.11900000000000001,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.029,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 401
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0009",
+ "text": "Define a function that computes area of a square",
+ "code": "function squareArea(side){\n addVar(area, 25)\n return(area)\n}\naddParam(\"side\", s)\nresult = squareArea(s)\naddResult(result)",
+ "test_inputs": {
+ "side": 5
+ },
+ "test_list": [
+ "re.match(r\"^25$\", result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "result": 25
+ },
+ "variables": {
+ "side": 5,
+ "s": 5,
+ "area": 25,
+ "result": 25
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.19,
+ "success": true
+ },
+ {
+ "command": "squareArea",
+ "duration_ms": 0.223,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.048,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0010",
+ "text": "Define a function that returns a greeting for a given name",
+ "code": "function greetUser(name){\n addVar(greeting, \"Hello\")\n return(greeting)\n}\naddParam(\"name\", user_name)\nresult = greetUser(user_name)\naddResult(result)",
+ "test_inputs": {
+ "name": "Alice"
+ },
+ "test_list": [
+ "re.match(r\"^Hello$\", result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "result": "Hello"
+ },
+ "variables": {
+ "name": "Alice",
+ "user_name": "Alice",
+ "greeting": "Hello",
+ "result": "Hello"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.212,
+ "success": true
+ },
+ {
+ "command": "greetUser",
+ "duration_ms": 0.17300000000000001,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0011",
+ "text": "Define a function that returns a discounted price",
+ "code": "function applyDiscount(price){\n discounted = price - 10\n return(discounted)\n}\naddParam(\"price\", p)\nfinal = applyDiscount(p)\naddResult(final)",
+ "test_inputs": {
+ "price": 100
+ },
+ "test_list": [
+ "re.match(r\"^90$\", final)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "final": 90
+ },
+ "variables": {
+ "price": 100,
+ "p": 100,
+ "discounted": 90,
+ "final": 90
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.185,
+ "success": true
+ },
+ {
+ "command": "applyDiscount",
+ "duration_ms": 0.161,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.055,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0012",
+ "text": "Define a function that returns a fixed prefixed code",
+ "code": "function padCode(code){\n addVar(padded, \"PADDED\")\n return(padded)\n}\naddParam(\"code\", raw_code)\npadded_code = padCode(raw_code)\naddResult(padded_code)",
+ "test_inputs": {
+ "code": "42"
+ },
+ "test_list": [
+ "re.match(r\"^PADDED$\", padded_code)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "padded_code": "PADDED"
+ },
+ "variables": {
+ "code": "42",
+ "raw_code": 42,
+ "padded": "PADDED",
+ "padded_code": "PADDED"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.175,
+ "success": true
+ },
+ {
+ "command": "padCode",
+ "duration_ms": 0.204,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.037,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0013",
+ "text": "Define a function that returns a temperature offset",
+ "code": "function addOffset(temp){\n result = temp + 10\n return(result)\n}\naddParam(\"temp\", input_temp)\ntemp_result = addOffset(input_temp)\naddResult(temp_result)",
+ "test_inputs": {
+ "temp": 20
+ },
+ "test_list": [
+ "re.match(r\"^30$\", temp_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "temp_result": 30
+ },
+ "variables": {
+ "temp": 20,
+ "input_temp": 20,
+ "result": 30,
+ "temp_result": 30
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.187,
+ "success": true
+ },
+ {
+ "command": "addOffset",
+ "duration_ms": 0.14300000000000002,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.041,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0014",
+ "text": "Define a function that returns a fixed greeting message",
+ "code": "function greet(name){\n addVar(msg, \"Hello!\")\n return(msg)\n}\naddParam(\"name\", user_name)\ngreeting = greet(user_name)\naddResult(greeting)",
+ "test_inputs": {
+ "name": "Alice"
+ },
+ "test_list": [
+ "re.match(r\"^Hello!$\", greeting)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "greeting": "Hello!"
+ },
+ "variables": {
+ "name": "Alice",
+ "user_name": "Alice",
+ "msg": "Hello!",
+ "greeting": "Hello!"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.25,
+ "success": true
+ },
+ {
+ "command": "greet",
+ "duration_ms": 0.163,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0015",
+ "text": "Define a function that returns a fixed slug value",
+ "code": "function makeSlug(title){\n addVar(slug, \"page-slug\")\n return(slug)\n}\naddParam(\"title\", page_title)\nslug = makeSlug(page_title)\naddResult(slug)",
+ "test_inputs": {
+ "title": "my-page"
+ },
+ "test_list": [
+ "re.match(r\"^page-slug$\", slug)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "slug": "page-slug"
+ },
+ "variables": {
+ "title": "my-page",
+ "page_title": "my-page",
+ "slug": "page-slug"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.229,
+ "success": true
+ },
+ {
+ "command": "makeSlug",
+ "duration_ms": 0.192,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0016",
+ "text": "Define a function that adds a fixed amount to a price",
+ "code": "function addFee(amount){\n total = amount + 21\n return(total)\n}\naddParam(\"amount\", amt)\ntotal_with_tax = addFee(amt)\naddResult(total_with_tax)",
+ "test_inputs": {
+ "amount": 100
+ },
+ "test_list": [
+ "re.match(r\"^121$\", total_with_tax)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "total_with_tax": 121
+ },
+ "variables": {
+ "amount": 100,
+ "amt": 100,
+ "total": 121,
+ "total_with_tax": 121
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.261,
+ "success": true
+ },
+ {
+ "command": "addFee",
+ "duration_ms": 0.212,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.083,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0017",
+ "text": "Return the configured API version as a fixed string",
+ "code": "addParam(\"path\", endpoint_path)\naddVar(full_url, \"v2\")\naddResult(full_url)",
+ "test_inputs": {
+ "path": "/users"
+ },
+ "test_list": [
+ "re.match(r\"^v2$\", full_url)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "full_url": "v2"
+ },
+ "variables": {
+ "path": "/users",
+ "endpoint_path": "/users",
+ "full_url": "v2"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.214,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.051,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0018",
+ "text": "Sum two parameters and return the result",
+ "code": "addParam(\"a\", num_a)\naddParam(\"b\", num_b)\naddVar(add_result, 8)\naddResult(add_result)",
+ "test_inputs": {
+ "a": 5,
+ "b": 3
+ },
+ "test_list": [
+ "re.match(r\"^8$\", add_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "add_result": 8
+ },
+ "variables": {
+ "a": 5,
+ "b": 3,
+ "num_a": 5,
+ "num_b": 3,
+ "add_result": 8
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.195,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.052,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.031,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0019",
+ "text": "Define a function that fetches user data from DB and returns it safely",
+ "code": "function getUser(uid){\n try()\n ormDirect(\"SELECT name FROM users WHERE id=1\", user_data)\n exception(err)\n user_data = None\n end()\n return(user_data)\n}\naddParam(\"user_id\", user_id)\nuser = getUser(user_id)\naddResult(user)",
+ "test_inputs": {
+ "user_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", user_id)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "user": "user_row"
+ },
+ "variables": {
+ "user_id": 1,
+ "ormAccessSelect(connector, \"SELECT name, email FROM users WHERE id": "1\", user_row)",
+ "user": "user_row"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.217,
+ "success": true
+ },
+ {
+ "command": "getUser",
+ "duration_ms": 0.152,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.062,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0020",
+ "text": "Define a function that calls external API with error fallback",
+ "code": "function fetchPrice(product_id){\n try()\n RequestGet(\"https://prices.example.com/get\", 0, 0, price_data)\n exception(err)\n price_data = \"0.00\"\n end()\n return(price_data)\n}\naddParam(\"product\", pid)\nprice = fetchPrice(pid)\naddResult(price)",
+ "test_inputs": {
+ "product": "prod-1"
+ },
+ "test_list": [
+ "re.match(r\".+\", price)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "price": "0.00"
+ },
+ "variables": {
+ "product": "prod-1",
+ "pid": "prod-1",
+ "err": "No error detected",
+ "price_data": "0.00",
+ "price": "0.00"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.22699999999999998,
+ "success": true
+ },
+ {
+ "command": "fetchPrice",
+ "duration_ms": 0.216,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0021",
+ "text": "Hash a password with SHA256 outside of try block",
+ "code": "addParam(\"password\", raw_pwd)\nencodeSHA256(raw_pwd, hashed_pwd)\naddResult(hashed_pwd)",
+ "test_inputs": {
+ "password": "secret123"
+ },
+ "test_list": [
+ "re.match(r\"^[a-f0-9]{64}$\", hashed_pwd)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "hashed_pwd": "fcf730b6d95236ecd3c9fc2d92d7b6b2bb061514961aec041d6c7a7192f592e4"
+ },
+ "variables": {
+ "password": "secret123",
+ "raw_pwd": "secret123",
+ "hashed_pwd": "fcf730b6d95236ecd3c9fc2d92d7b6b2bb061514961aec041d6c7a7192f592e4"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.202,
+ "success": true
+ },
+ {
+ "command": "encodeSHA256",
+ "duration_ms": 0.060000000000000005,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.052,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0022",
+ "text": "Define a function that validates an event name and returns ok or error",
+ "code": "function logEvent(event){\n if(event, None, \"!=\")\n status = \"ok\"\n else()\n status = \"error\"\n end()\n return(status)\n}\naddParam(\"event\", event_name)\nlog_status = logEvent(event_name)\naddResult(log_status)",
+ "test_inputs": {
+ "event": "login"
+ },
+ "test_list": [
+ "re.match(r\"^(ok|error)$\", log_status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "log_status": "ok"
+ },
+ "variables": {
+ "event": "login",
+ "event_name": "login",
+ "status": "ok",
+ "log_status": "ok"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.264,
+ "success": true
+ },
+ {
+ "command": "logEvent",
+ "duration_ms": 0.431,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.047,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0023",
+ "text": "Parse a JSON string and extract a specific field safely",
+ "code": "addParam(\"data\", json_data)\ntry()\n variableFromJSON(json_data, \"id\", field_value)\nexception(err)\n field_value = \"unknown\"\nend()\naddResult(field_value)",
+ "test_inputs": {
+ "data": "{\"id\":\"abc\"}"
+ },
+ "test_list": [
+ "re.match(r\".+\", field_value)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "field_value": "unknown"
+ },
+ "variables": {
+ "data": "{\"id\":\"abc\"}",
+ "json_data": {
+ "id": "abc"
+ },
+ "field_value": "unknown",
+ "err": "No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.23800000000000002,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.20900000000000002,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.074,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.114,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0024",
+ "text": "Validate email format and return the email if present",
+ "code": "addParam(\"email\", user_email)\nif(user_email, None, \"!=\")\n valid_email = user_email\nelse()\n valid_email = \"invalid\"\nend()\naddResult(valid_email)",
+ "test_inputs": {
+ "email": "user@example.com"
+ },
+ "test_list": [
+ "re.match(r\"^user@example\\.com$\", valid_email)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "valid_email": "user@example.com"
+ },
+ "variables": {
+ "email": "user@example.com",
+ "user_email": "user@example.com",
+ "valid_email": "user@example.com"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.23900000000000002,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.379,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.066,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0025",
+ "text": "Generate a secure random token with exception handling",
+ "code": "try()\n randomString(\"[a-zA-Z0-9]\", 32, gen_token)\nexception(err)\n gen_token = \"fallback-token\"\nend()\naddResult(gen_token)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^[a-zA-Z0-9]{32}$|^fallback-token$\", gen_token)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "gen_token": "fallback-token"
+ },
+ "variables": {
+ "gen_token": "fallback-token",
+ "err": "No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.162,
+ "success": true
+ },
+ {
+ "command": "randomString",
+ "duration_ms": 2.3019999999999996,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.107,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.09899999999999999,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.037,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0026",
+ "text": "Check if users table exists and return True or False",
+ "code": "ormCheckTable(\"users\", check_result)\nif(check_result, None, \"!=\")\n exists = \"yes\"\nelse()\n exists = \"no\"\nend()\naddResult(exists)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^(yes|no)$\", exists)"
+ ],
+ "execution_message": {
+ "success": false,
+ "error": "No inspection system is available for object of type "
+ },
+ "passed": false,
+ "error": "No inspection system is available for object of type ",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0027",
+ "text": "Query users table and handle database errors",
+ "code": "try()\n ormAccessSelect(connector, \"SELECT * FROM users LIMIT 10\", users_list)\nexception(err)\n addVar(_status, 500)\n addVar(db_error, \"Query failed\")\n addResult(db_error)\nend()\naddResult(users_list)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^500$\", _status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "db_error": "Query failed",
+ "users_list": null
+ },
+ "variables": {
+ "null": [],
+ "err": "No error detected",
+ "_status": 500,
+ "db_error": "Query failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.166,
+ "success": true
+ },
+ {
+ "command": "ormAccessSelect",
+ "duration_ms": 0.244,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.049,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.051,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.046,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0028",
+ "text": "Fetch product by ID from database with error handling",
+ "code": "addParam(\"id\", product_id)\ntry()\n ormAccessSelect(connector, \"SELECT name FROM products WHERE id=1\", product)\nexception(err)\n addVar(_status, 404)\n addVar(not_found, \"Product not found\")\n addResult(not_found)\nend()\naddResult(product)",
+ "test_inputs": {
+ "id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", product_id)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "not_found": "Product not found",
+ "product": null
+ },
+ "variables": {
+ "id": "1",
+ "product_id": 1,
+ "ormAccessSelect(connector, \"SELECT name FROM products WHERE id": "1\", product)",
+ "err": "No error detected",
+ "_status": 404,
+ "not_found": "Product not found"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.22599999999999998,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.235,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.112,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.052,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.058,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ }
+ ],
+ "http_status": 404
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0029",
+ "text": "Count active sessions from database with fallback on error",
+ "code": "try()\n ormAccessSelect(connector, \"SELECT COUNT(*) as cnt FROM sessions WHERE active=1\", count_result)\nexception(err)\n count_result = 0\nend()\naddResult(count_result)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".*\", count_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "count_result": 0
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT COUNT(*) as cnt FROM sessions WHERE active": "1\", count_result)",
+ "err": "No error detected",
+ "count_result": 0
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.289,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.11299999999999999,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.06499999999999999,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.066,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0030",
+ "text": "Retrieve paginated records from database with error handling",
+ "code": "addParam(\"page\", page_num)\ntry()\n ormAccessSelect(connector, \"SELECT * FROM orders LIMIT 10 OFFSET 0\", orders)\nexception(err)\n addVar(_status, 500)\n orders = \"[]\"\nend()\naddResult(orders)",
+ "test_inputs": {
+ "page": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", page_num)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "orders": "[]"
+ },
+ "variables": {
+ "page": "1",
+ "page_num": 1,
+ "null": [],
+ "err": "No error detected",
+ "_status": 500,
+ "orders": "[]"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.19,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.438,
+ "success": true
+ },
+ {
+ "command": "ormAccessSelect",
+ "duration_ms": 0.448,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.132,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.082,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0031",
+ "text": "Search records by keyword with database error handling",
+ "code": "addParam(\"keyword\", search_term)\ntry()\n ormAccessSelect(connector, \"SELECT id, name FROM items WHERE name LIKE '%test%'\", search_results)\nexception(err)\n addVar(_status, 500)\n search_results = \"[]\"\nend()\naddResult(search_results)",
+ "test_inputs": {
+ "keyword": "test"
+ },
+ "test_list": [
+ "re.match(r\"^test$\", search_term)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "search_results": "[]"
+ },
+ "variables": {
+ "keyword": "test",
+ "search_term": "test",
+ "null": [],
+ "err": "No error detected",
+ "_status": 500,
+ "search_results": "[]"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 1.452,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.36000000000000004,
+ "success": true
+ },
+ {
+ "command": "ormAccessSelect",
+ "duration_ms": 1.888,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.111,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.054,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.5720000000000001,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.066,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0032",
+ "text": "Fetch latest events from database ordered by date",
+ "code": "try()\n ormAccessSelect(connector, \"SELECT * FROM events ORDER BY created_at DESC LIMIT 5\", events)\nexception(err)\n addVar(_status, 503)\n addVar(svc_err, \"Service unavailable\")\n addResult(svc_err)\nend()\naddResult(events)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^503$\", _status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "svc_err": "Service unavailable",
+ "events": null
+ },
+ "variables": {
+ "null": [],
+ "err": "No error detected",
+ "_status": 503,
+ "svc_err": "Service unavailable"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.145,
+ "success": true
+ },
+ {
+ "command": "ormAccessSelect",
+ "duration_ms": 0.219,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.051,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.393,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.064,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.054,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ }
+ ],
+ "http_status": 503
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0033",
+ "text": "Get user roles from database with permission error handling",
+ "code": "addParam(\"user_id\", uid)\ntry()\n ormAccessSelect(connector, \"SELECT role FROM user_roles WHERE user_id=1\", roles)\nexception(err)\n addVar(_status, 403)\n addVar(perm_error, \"Access denied\")\n addResult(perm_error)\nend()\naddResult(roles)",
+ "test_inputs": {
+ "user_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", uid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "perm_error": "Access denied",
+ "roles": null
+ },
+ "variables": {
+ "user_id": "1",
+ "uid": 1,
+ "ormAccessSelect(connector, \"SELECT role FROM user_roles WHERE user_id": "1\", roles)",
+ "err": "No error detected",
+ "_status": 403,
+ "perm_error": "Access denied"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.182,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.054,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.15100000000000002,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.08,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.028,
+ "success": true
+ }
+ ],
+ "http_status": 403
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0034",
+ "text": "Check if username exists in database before registration",
+ "code": "addParam(\"username\", uname)\ntry()\n ormAccessSelect(connector, \"SELECT id FROM users WHERE username='testuser'\", exists_check)\nexception(err)\n exists_check = None\nend()\nif(exists_check, None, \"!=\")\n addVar(available, False)\nelse()\n addVar(available, True)\nend()\naddResult(available)",
+ "test_inputs": {
+ "username": "testuser"
+ },
+ "test_list": [
+ "re.match(r\"^(True|False)$\", available)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "available": "True"
+ },
+ "variables": {
+ "username": "testuser",
+ "uname": "testuser",
+ "ormAccessSelect(connector, \"SELECT id FROM users WHERE username": "'testuser'\", exists_check)",
+ "err": "No error detected",
+ "exists_check": null,
+ "available": "True"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.254,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.061,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.10900000000000001,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.060000000000000005,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.062,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 2.033,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.076,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0035",
+ "text": "Query inventory table and handle error with status 500",
+ "code": "try()\n ormAccessSelect(connector, \"SELECT sku, qty FROM inventory WHERE active=1\", inventory)\nexception(err)\n addVar(_status, 500)\n addVar(err_msg, \"Inventory query failed\")\n addResult(err_msg)\nend()\naddResult(inventory)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^500$\", _status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "err_msg": "Inventory query failed",
+ "inventory": null
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT sku, qty FROM inventory WHERE active": "1\", inventory)",
+ "err": "No error detected",
+ "_status": 500,
+ "err_msg": "Inventory query failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.122,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.06999999999999999,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.044,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0036",
+ "text": "Fetch customer orders with exception logging",
+ "code": "addParam(\"customer_id\", cid)\ntry()\n ormAccessSelect(connector, \"SELECT * FROM orders WHERE customer_id=1\", orders)\nexception(err)\n addVar(_status, 500)\n addVar(query_err, \"Orders fetch failed\")\n addResult(query_err)\nend()\naddResult(orders)",
+ "test_inputs": {
+ "customer_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", cid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "query_err": "Orders fetch failed",
+ "orders": null
+ },
+ "variables": {
+ "customer_id": "1",
+ "cid": 1,
+ "ormAccessSelect(connector, \"SELECT * FROM orders WHERE customer_id": "1\", orders)",
+ "err": "No error detected",
+ "_status": 500,
+ "query_err": "Orders fetch failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.211,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.073,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.118,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.05,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.05,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0037",
+ "text": "Get product catalog with exception details in response",
+ "code": "try()\n ormAccessSelect(connector, \"SELECT id, name, price FROM products WHERE published=1\", catalog)\nexception(err)\n addVar(_status, 503)\n addVar(catalog_err, \"Catalog unavailable\")\n addResult(catalog_err)\nend()\naddResult(catalog)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^503$\", _status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "catalog_err": "Catalog unavailable",
+ "catalog": null
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT id, name, price FROM products WHERE published": "1\", catalog)",
+ "err": "No error detected",
+ "_status": 503,
+ "catalog_err": "Catalog unavailable"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.166,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.08600000000000001,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.068,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.352,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.09000000000000001,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.06999999999999999,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.028,
+ "success": true
+ }
+ ],
+ "http_status": 503
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0038",
+ "text": "Retrieve dashboard metrics and handle error with status 500",
+ "code": "try()\n ormAccessSelect(connector, \"SELECT metric, value FROM dashboard_metrics\", metrics)\nexception(err)\n addVar(_status, 500)\n addVar(metric_err, \"Metrics unavailable\")\n addResult(metric_err)\nend()\naddResult(metrics)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^500$\", _status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "metric_err": "Metrics unavailable",
+ "metrics": null
+ },
+ "variables": {
+ "null": [],
+ "err": "No error detected",
+ "_status": 500,
+ "metric_err": "Metrics unavailable"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.153,
+ "success": true
+ },
+ {
+ "command": "ormAccessSelect",
+ "duration_ms": 0.11900000000000001,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.045000000000000005,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.047,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.030000000000000002,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0039",
+ "text": "Query reports table with timeout and exception handling",
+ "code": "addParam(\"report_id\", rid)\ntry()\n ormAccessSelect(connector, \"SELECT * FROM reports WHERE id=1 AND status='ready'\", report)\nexception(err)\n addVar(_status, 404)\n addVar(report_err, \"Report not found or query error\")\n addResult(report_err)\nend()\naddResult(report)",
+ "test_inputs": {
+ "report_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", rid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "report_err": "Report not found or query error",
+ "report": null
+ },
+ "variables": {
+ "report_id": "1",
+ "rid": 1,
+ "ormAccessSelect(connector, \"SELECT * FROM reports WHERE id": "1 AND status='ready'\", report)",
+ "err": "No error detected",
+ "_status": 404,
+ "report_err": "Report not found or query error"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.222,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.136,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.044,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.439,
+ "success": true
+ }
+ ],
+ "http_status": 404
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0040",
+ "text": "Fetch analytics data with graceful degradation on DB error",
+ "code": "try()\n ormAccessSelect(connector, \"SELECT date, visits FROM analytics ORDER BY date DESC LIMIT 30\", analytics)\nexception(err)\n analytics = \"[]\"\n addVar(degraded, True)\nend()\naddResult(analytics)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".*\", analytics)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "analytics": "[]"
+ },
+ "variables": {
+ "null": [],
+ "err": "No error detected",
+ "analytics": "[]",
+ "degraded": "True"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.163,
+ "success": true
+ },
+ {
+ "command": "ormAccessSelect",
+ "duration_ms": 0.15100000000000002,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.048,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.51,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.162,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.04,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0041",
+ "text": "Search users by email with full exception capture",
+ "code": "addParam(\"email\", search_email)\ntry()\n ormAccessSelect(connector, \"SELECT id, name FROM users WHERE email='test@example.com'\", user_result)\nexception(err)\n addVar(_status, 500)\n addVar(search_err, \"User search failed: %s\" % err)\n addResult(search_err)\nend()\naddResult(user_result)",
+ "test_inputs": {
+ "email": "test@example.com"
+ },
+ "test_list": [
+ "re.match(r\"^test@example\\.com$\", search_email)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "search_err": "User search failed: No error detected",
+ "user_result": null
+ },
+ "variables": {
+ "email": "test@example.com",
+ "search_email": "test@example.com",
+ "ormAccessSelect(connector, \"SELECT id, name FROM users WHERE email": "'test@example.com'\", user_result)",
+ "err": "No error detected",
+ "_status": 500,
+ "search_err": "User search failed: No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.264,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.067,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.115,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.058,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.048,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.524,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.07100000000000001,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0042",
+ "text": "Load configuration from settings table with fallback defaults",
+ "code": "try()\n ormAccessSelect(connector, \"SELECT key, value FROM settings WHERE active=1\", settings)\nexception(err)\n settings = \"{}\"\n addVar(using_defaults, True)\nend()\naddResult(settings)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".*\", settings)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "settings": "{}"
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT key, value FROM settings WHERE active": "1\", settings)",
+ "err": "No error detected",
+ "settings": "{}",
+ "using_defaults": "True"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.185,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.096,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.059,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.5630000000000001,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.088,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0043",
+ "text": "Fetch weather data from external API with connection error handling",
+ "code": "addParam(\"city\", city_name)\ntry()\n RequestGet(\"https://api.weather.example.com/current?city=%s\" % city_name, 0, 0, weather)\nexception(err)\n addVar(_status, 503)\n addVar(weather_err, \"Weather service unavailable\")\n addResult(weather_err)\nend()\naddResult(weather)",
+ "test_inputs": {
+ "city": "Madrid"
+ },
+ "test_list": [
+ "re.match(r\"^Madrid$\", city_name)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "weather_err": "Weather service unavailable",
+ "weather": null
+ },
+ "variables": {
+ "city": "Madrid",
+ "city_name": "Madrid",
+ "RequestGet(\"https://api.weather.example.com/current?city": "%s\" % city_name, 0, 0, weather)",
+ "err": "No error detected",
+ "_status": 503,
+ "weather_err": "Weather service unavailable"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.208,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.053,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.095,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 503
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0044",
+ "text": "Call currency exchange API and handle rate limit errors",
+ "code": "addParam(\"currency\", currency_code)\ntry()\n RequestGet(\"https://api.exchange.example.com/rate?from=USD&to=%s\" % currency_code, 0, 0, rate)\nexception(err)\n rate = \"1.0\"\nend()\naddResult(rate)",
+ "test_inputs": {
+ "currency": "EUR"
+ },
+ "test_list": [
+ "re.match(r\"^EUR$\", currency_code)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "rate": "1.0"
+ },
+ "variables": {
+ "currency": "EUR",
+ "currency_code": "EUR",
+ "RequestGet(\"https://api.exchange.example.com/rate?from": "USD&to=%s\" % currency_code, 0, 0, rate)",
+ "err": "No error detected",
+ "rate": "1.0"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.26899999999999996,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.052,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.565,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.121,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.077,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.038,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0045",
+ "text": "Fetch user profile from identity provider with auth error handling",
+ "code": "addParam(\"token\", auth_token)\ntry()\n RequestGet(\"https://idp.example.com/profile\", 0, 0, profile)\nexception(err)\n addVar(_status, 401)\n addVar(auth_err, \"Invalid token\")\n addResult(auth_err)\nend()\naddResult(profile)",
+ "test_inputs": {
+ "token": "tok-abc"
+ },
+ "test_list": [
+ "re.match(r\"^tok-abc$\", auth_token)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "auth_err": "Invalid token",
+ "profile": null
+ },
+ "variables": {
+ "token": "tok-abc",
+ "auth_token": "tok-abc",
+ "err": "No error detected",
+ "_status": 401,
+ "auth_err": "Invalid token"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.31,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.23900000000000002,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.003,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.056,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.04,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.049,
+ "success": true
+ }
+ ],
+ "http_status": 401
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0046",
+ "text": "Get product details from catalog microservice with error fallback",
+ "code": "addParam(\"sku\", product_sku)\ntry()\n RequestGet(\"https://catalog.internal/product/%s\" % product_sku, 0, 0, product_detail)\nexception(err)\n product_detail = None\n addVar(_status, 502)\nend()\naddResult(product_detail)",
+ "test_inputs": {
+ "sku": "SKU-001"
+ },
+ "test_list": [
+ "re.match(r\"^SKU-001$\", product_sku)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "product_detail": null
+ },
+ "variables": {
+ "sku": "SKU-001",
+ "product_sku": "SKU-001",
+ "err": "No error detected",
+ "product_detail": null,
+ "_status": 502
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.23800000000000002,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.063,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.003,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.12000000000000001,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 502
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0047",
+ "text": "Check health status of downstream service with timeout handling",
+ "code": "try()\n RequestGet(\"https://service.internal/health\", 0, 0, health_check)\nexception(err)\n health_check = \"unhealthy\"\n addVar(_status, 503)\nend()\naddResult(health_check)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".+\", health_check)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "health_check": "unhealthy"
+ },
+ "variables": {
+ "err": "No error detected",
+ "health_check": "unhealthy",
+ "_status": 503
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.159,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.004,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.092,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.124,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.063,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.039,
+ "success": true
+ }
+ ],
+ "http_status": 503
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0048",
+ "text": "Retrieve geolocation data for an IP address with error handling",
+ "code": "addParam(\"ip\", ip_address)\ntry()\n RequestGet(\"https://geo.example.com/lookup?ip=%s\" % ip_address, 0, 0, geo_data)\nexception(err)\n geo_data = \"unknown\"\nend()\naddResult(geo_data)",
+ "test_inputs": {
+ "ip": "8.8.8.8"
+ },
+ "test_list": [
+ "re.match(r\"^8\\.8\\.8\\.8$\", ip_address)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "geo_data": "unknown"
+ },
+ "variables": {
+ "ip": "8.8.8.8",
+ "ip_address": "8.8.8.8",
+ "RequestGet(\"https://geo.example.com/lookup?ip": "%s\" % ip_address, 0, 0, geo_data)",
+ "err": "No error detected",
+ "geo_data": "unknown"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.331,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.154,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.221,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.07200000000000001,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.067,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0049",
+ "text": "Fetch latest news feed from RSS proxy with error handling",
+ "code": "try()\n RequestGet(\"https://news.example.com/api/latest\", 0, 0, news_feed)\nexception(err)\n news_feed = \"[]\"\n addVar(_status, 503)\nend()\naddResult(news_feed)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".*\", news_feed)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "news_feed": "[]"
+ },
+ "variables": {
+ "err": "No error detected",
+ "news_feed": "[]",
+ "_status": 503
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "try",
+ "duration_ms": 0.13999999999999999,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.003,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.05,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.10900000000000001,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.041,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ }
+ ],
+ "http_status": 503
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0050",
+ "text": "Get shipping quote from logistics API with network error handling",
+ "code": "addParam(\"weight\", package_weight)\ntry()\n RequestGet(\"https://logistics.example.com/quote?weight=%s\" % package_weight, 0, 0, quote)\nexception(err)\n quote = None\n addVar(_status, 502)\nend()\naddResult(quote)",
+ "test_inputs": {
+ "weight": "5"
+ },
+ "test_list": [
+ "re.match(r\"^5$\", package_weight)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "quote": null
+ },
+ "variables": {
+ "weight": "5",
+ "package_weight": 5,
+ "RequestGet(\"https://logistics.example.com/quote?weight": "%s\" % package_weight, 0, 0, quote)",
+ "err": "No error detected",
+ "quote": null,
+ "_status": 502
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.17200000000000001,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.037,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.118,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.055,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.04,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.031,
+ "success": true
+ }
+ ],
+ "http_status": 502
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0051",
+ "text": "Submit order to fulfillment service with error handling",
+ "code": "addParam(\"order_id\", oid)\ntry()\n RequestPost(\"https://fulfillment.example.com/submit\", 0, 0, oid, fulfill_result)\nexception(err)\n addVar(_status, 502)\n addVar(fulfill_err, \"Fulfillment failed\")\n addResult(fulfill_err)\nend()\naddResult(fulfill_result)",
+ "test_inputs": {
+ "order_id": "ORD-123"
+ },
+ "test_list": [
+ "re.match(r\"^ORD-123$\", oid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "fulfill_err": "Fulfillment failed",
+ "fulfill_result": null
+ },
+ "variables": {
+ "order_id": "ORD-123",
+ "oid": "ORD-123",
+ "err": "No error detected",
+ "_status": 502,
+ "fulfill_err": "Fulfillment failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.216,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.044,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.002,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.031,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 502
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0052",
+ "text": "Send notification via push service with error capture",
+ "code": "addParam(\"user_id\", uid)\ntry()\n RequestPost(\"https://push.example.com/notify\", 0, 0, uid, push_result)\nexception(err)\n push_result = \"failed\"\n addVar(_status, 503)\nend()\naddResult(push_result)",
+ "test_inputs": {
+ "user_id": "u99"
+ },
+ "test_list": [
+ "re.match(r\"^u99$\", uid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "push_result": "failed"
+ },
+ "variables": {
+ "user_id": "u99",
+ "uid": "u99",
+ "err": "No error detected",
+ "push_result": "failed",
+ "_status": 503
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.212,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.27599999999999997,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.004,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.060000000000000005,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.11699999999999999,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.051,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.044,
+ "success": true
+ }
+ ],
+ "http_status": 503
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0053",
+ "text": "Create payment intent via payment gateway with error handling",
+ "code": "addParam(\"amount\", payment_amount)\ntry()\n RequestPost(\"https://payments.example.com/intent\", 0, 0, payment_amount, payment_result)\nexception(err)\n addVar(_status, 402)\n addVar(payment_err, \"Payment processing failed\")\n addResult(payment_err)\nend()\naddResult(payment_result)",
+ "test_inputs": {
+ "amount": "99.99"
+ },
+ "test_list": [
+ "re.match(r\"^99\\.99$\", payment_amount)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "payment_err": "Payment processing failed",
+ "payment_result": null
+ },
+ "variables": {
+ "amount": "99.99",
+ "payment_amount": 99.99,
+ "err": "No error detected",
+ "_status": 402,
+ "payment_err": "Payment processing failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.18000000000000002,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.004,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.051,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.083,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 402
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0054",
+ "text": "Register webhook subscription with error handling",
+ "code": "addParam(\"callback_url\", webhook_url)\ntry()\n RequestPost(\"https://events.example.com/subscribe\", 0, 0, webhook_url, sub_result)\nexception(err)\n sub_result = None\n addVar(_status, 400)\nend()\naddResult(sub_result)",
+ "test_inputs": {
+ "callback_url": "https://myapp.com/hook"
+ },
+ "test_list": [
+ "re.match(r\"^https://myapp\\.com/hook$\", webhook_url)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "sub_result": null
+ },
+ "variables": {
+ "callback_url": "https://myapp.com/hook",
+ "webhook_url": "https://myapp.com/hook",
+ "err": "No error detected",
+ "sub_result": null,
+ "_status": 400
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.285,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.073,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.003,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.052,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.156,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.061,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 400
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0055",
+ "text": "Submit analytics event to tracking service with silent error handling",
+ "code": "addParam(\"event_name\", evt)\ntry()\n RequestPost(\"https://analytics.example.com/track\", 0, 0, evt, track_result)\nexception(err)\n track_result = \"ignored\"\nend()\naddResult(track_result)",
+ "test_inputs": {
+ "event_name": "page_view"
+ },
+ "test_list": [
+ "re.match(r\"^page_view$\", evt)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "track_result": "ignored"
+ },
+ "variables": {
+ "event_name": "page_view",
+ "evt": "page_view",
+ "err": "No error detected",
+ "track_result": "ignored"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.259,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.079,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.004,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.044,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.107,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.38,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0056",
+ "text": "Send email via transactional email API with error status code",
+ "code": "addParam(\"to\", recipient_email)\ntry()\n RequestPost(\"https://mail.example.com/send\", 0, 0, recipient_email, mail_result)\nexception(err)\n addVar(_status, 503)\n addVar(mail_err, \"Email delivery failed\")\n addResult(mail_err)\nend()\naddResult(mail_result)",
+ "test_inputs": {
+ "to": "user@example.com"
+ },
+ "test_list": [
+ "re.match(r\"^user@example\\.com$\", recipient_email)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "mail_err": "Email delivery failed",
+ "mail_result": null
+ },
+ "variables": {
+ "to": "user@example.com",
+ "recipient_email": "user@example.com",
+ "err": "No error detected",
+ "_status": 503,
+ "mail_err": "Email delivery failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.266,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.053,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.003,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.041,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.05,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.051,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034,
+ "success": true
+ }
+ ],
+ "http_status": 503
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0057",
+ "text": "Create user account in external identity service with error handling",
+ "code": "addParam(\"username\", new_username)\ntry()\n RequestPost(\"https://identity.example.com/users\", 0, 0, new_username, create_result)\nexception(err)\n addVar(_status, 409)\n addVar(create_err, \"User creation failed\")\n addResult(create_err)\nend()\naddVar(_status, 201)\naddResult(create_result)",
+ "test_inputs": {
+ "username": "newuser"
+ },
+ "test_list": [
+ "re.match(r\"^newuser$\", new_username)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "create_err": "User creation failed",
+ "create_result": null
+ },
+ "variables": {
+ "username": "newuser",
+ "new_username": "newuser",
+ "err": "No error detected",
+ "_status": 201,
+ "create_err": "User creation failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.228,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.002,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.40900000000000003,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.048,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.036000000000000004,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.031,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 201
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0058",
+ "text": "Submit form data to CRM API with validation error handling",
+ "code": "addParam(\"contact_name\", contact)\ntry()\n RequestPost(\"https://crm.example.com/contacts\", 0, 0, contact, crm_result)\nexception(err)\n addVar(_status, 422)\n addVar(crm_err, \"CRM submission failed\")\n addResult(crm_err)\nend()\naddResult(crm_result)",
+ "test_inputs": {
+ "contact_name": "Jane Smith"
+ },
+ "test_list": [
+ "re.match(r\"^Jane Smith$\", contact)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "crm_err": "CRM submission failed",
+ "crm_result": null
+ },
+ "variables": {
+ "contact_name": "Jane Smith",
+ "contact": "Jane Smith",
+ "err": "No error detected",
+ "_status": 422,
+ "crm_err": "CRM submission failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.24600000000000002,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.054,
+ "success": true
+ },
+ {
+ "command": "connector_call",
+ "duration_ms": 0.003,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.04,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.044,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.102,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 422
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0059",
+ "text": "Define a function that validates a score and returns a grade",
+ "code": "function getGrade(score){\n if(score, 90, \">=\")\n grade = \"A\"\n else()\n grade = \"B\"\n end()\n return(grade)\n}\naddParam(\"score\", student_score)\nfinal_grade = getGrade(student_score)\naddResult(final_grade)",
+ "test_inputs": {
+ "score": "95"
+ },
+ "test_list": [
+ "re.match(r\"^(A|B)$\", final_grade)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "final_grade": "A"
+ },
+ "variables": {
+ "score": "95",
+ "student_score": 95,
+ "grade": "A",
+ "final_grade": "A"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.207,
+ "success": true
+ },
+ {
+ "command": "getGrade",
+ "duration_ms": 0.91,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.078,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0060",
+ "text": "Check user role and return permission level",
+ "code": "addParam(\"role\", user_role)\nif(user_role, \"admin\", \"==\")\n permission = \"full\"\nelse()\n permission = \"read\"\nend()\naddResult(permission)",
+ "test_inputs": {
+ "role": "admin"
+ },
+ "test_list": [
+ "re.match(r\"^full$\", permission)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "permission": "full"
+ },
+ "variables": {
+ "role": "admin",
+ "user_role": "admin",
+ "permission": "full"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.21800000000000003,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.382,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.054,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0061",
+ "text": "Check stock quantity and return availability status",
+ "code": "addParam(\"quantity\", stock_qty)\nif(stock_qty, 0, \"!=\")\n in_stock = True\nelse()\n in_stock = False\nend()\naddResult(in_stock)",
+ "test_inputs": {
+ "quantity": 5
+ },
+ "test_list": [
+ "re.match(r\"^True$\", in_stock)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "in_stock": true
+ },
+ "variables": {
+ "quantity": 5,
+ "stock_qty": 5,
+ "in_stock": true
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.165,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.364,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.054,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0062",
+ "text": "Check order total and return shipping tier",
+ "code": "addParam(\"total\", order_total)\nif(order_total, 100, \">=\")\n shipping = \"free\"\nelse()\n shipping = \"standard\"\nend()\naddResult(shipping)",
+ "test_inputs": {
+ "total": 150
+ },
+ "test_list": [
+ "re.match(r\"^free$\", shipping)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "shipping": "free"
+ },
+ "variables": {
+ "total": 150,
+ "order_total": 150,
+ "shipping": "free"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.163,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.333,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.053,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0063",
+ "text": "Validate age and return adult or minor category",
+ "code": "addParam(\"age\", user_age)\nif(user_age, 18, \">=\")\n age_cat = \"adult\"\nelse()\n age_cat = \"minor\"\nend()\naddResult(age_cat)",
+ "test_inputs": {
+ "age": 25
+ },
+ "test_list": [
+ "re.match(r\"^adult$\", age_cat)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "age_cat": "adult"
+ },
+ "variables": {
+ "age": 25,
+ "user_age": 25,
+ "age_cat": "adult"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.283,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.767,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.115,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0064",
+ "text": "Check if a value equals 4 and return leap year status",
+ "code": "addParam(\"year\", input_year)\nif(input_year, 4, \"==\")\n leap_result = True\nelse()\n leap_result = False\nend()\naddResult(leap_result)",
+ "test_inputs": {
+ "year": 4
+ },
+ "test_list": [
+ "re.match(r\"^True$\", leap_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "leap_result": true
+ },
+ "variables": {
+ "year": 4,
+ "input_year": 4,
+ "leap_result": true
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.235,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.877,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.096,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0065",
+ "text": "Return OK or ERROR label based on HTTP code",
+ "code": "addParam(\"code\", http_code)\nif(http_code, 200, \"==\")\n status_label = \"OK\"\nelse()\n status_label = \"ERROR\"\nend()\naddResult(status_label)",
+ "test_inputs": {
+ "code": 200
+ },
+ "test_list": [
+ "re.match(r\"^OK$\", status_label)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "status_label": "OK"
+ },
+ "variables": {
+ "code": 200,
+ "http_code": 200,
+ "status_label": "OK"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.213,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.851,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.081,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0066",
+ "text": "Check if a value is within a valid range",
+ "code": "addParam(\"value\", input_val)\nif(input_val, 0, \">=\")\n valid = True\nelse()\n valid = False\nend()\naddResult(valid)",
+ "test_inputs": {
+ "value": 10
+ },
+ "test_list": [
+ "re.match(r\"^True$\", valid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "valid": true
+ },
+ "variables": {
+ "value": 10,
+ "input_val": 10,
+ "valid": true
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.183,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.395,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.057,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0067",
+ "text": "Classify temperature as hot or cool based on threshold",
+ "code": "addParam(\"temp\", temperature)\nif(temperature, 30, \">=\")\n temp_label = \"hot\"\nelse()\n temp_label = \"cool\"\nend()\naddResult(temp_label)",
+ "test_inputs": {
+ "temp": 35
+ },
+ "test_list": [
+ "re.match(r\"^hot$\", temp_label)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "temp_label": "hot"
+ },
+ "variables": {
+ "temp": 35,
+ "temperature": 35,
+ "temp_label": "hot"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.18100000000000002,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.606,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.054,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0068",
+ "text": "Determine pass or fail based on score threshold",
+ "code": "addParam(\"score\", test_score)\nif(test_score, 60, \">=\")\n outcome = \"pass\"\nelse()\n outcome = \"fail\"\nend()\naddResult(outcome)",
+ "test_inputs": {
+ "score": 75
+ },
+ "test_list": [
+ "re.match(r\"^pass$\", outcome)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "outcome": "pass"
+ },
+ "variables": {
+ "score": 75,
+ "test_score": 75,
+ "outcome": "pass"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.18000000000000002,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.378,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.056,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0069",
+ "text": "Determine pricing tier based on order quantity",
+ "code": "addParam(\"qty\", order_qty)\nif(order_qty, 100, \">=\")\n pricing = \"bulk\"\nelse()\n pricing = \"retail\"\nend()\naddResult(pricing)",
+ "test_inputs": {
+ "qty": 150
+ },
+ "test_list": [
+ "re.match(r\"^bulk$\", pricing)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "pricing": "bulk"
+ },
+ "variables": {
+ "qty": 150,
+ "order_qty": 150,
+ "pricing": "bulk"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.675,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 1.312,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.093,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0070",
+ "text": "Validate a PIN and return valid or invalid",
+ "code": "addParam(\"pin\", user_pin)\nif(user_pin, \"1234\", \"==\")\n pin_status = \"valid\"\nelse()\n pin_status = \"invalid\"\nend()\naddResult(pin_status)",
+ "test_inputs": {
+ "pin": "1234"
+ },
+ "test_list": [
+ "re.match(r\"^valid$\", pin_status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "pin_status": "valid"
+ },
+ "variables": {
+ "pin": "1234",
+ "user_pin": 1234,
+ "pin_status": "valid"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.294,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.896,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.082,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0071",
+ "text": "Compute priority level from urgency score",
+ "code": "addParam(\"urgency\", urgency_score)\nif(urgency_score, 8, \">=\")\n priority = \"high\"\nelse()\n priority = \"normal\"\nend()\naddResult(priority)",
+ "test_inputs": {
+ "urgency": 9
+ },
+ "test_list": [
+ "re.match(r\"^high$\", priority)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "priority": "high"
+ },
+ "variables": {
+ "urgency": 9,
+ "urgency_score": 9,
+ "priority": "high"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.205,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 1.2080000000000002,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.06999999999999999,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0072",
+ "text": "Return subscription status based on days remaining",
+ "code": "addParam(\"days\", days_remaining)\nif(days_remaining, 0, \">\")\n sub = \"active\"\nelse()\n sub = \"expired\"\nend()\naddResult(sub)",
+ "test_inputs": {
+ "days": 15
+ },
+ "test_list": [
+ "re.match(r\"^active$\", sub)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "sub": "active"
+ },
+ "variables": {
+ "days": 15,
+ "days_remaining": 15,
+ "sub": "active"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.174,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.33799999999999997,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.04,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0073",
+ "text": "Categorize product weight as light or heavy",
+ "code": "addParam(\"weight\", item_weight)\nif(item_weight, 10, \"<\")\n weight_cat = \"light\"\nelse()\n weight_cat = \"heavy\"\nend()\naddResult(weight_cat)",
+ "test_inputs": {
+ "weight": 5
+ },
+ "test_list": [
+ "re.match(r\"^light$\", weight_cat)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "weight_cat": "light"
+ },
+ "variables": {
+ "weight": 5,
+ "item_weight": 5,
+ "weight_cat": "light"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.176,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 0.348,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0074",
+ "text": "Check password length and return strength level",
+ "code": "addParam(\"length\", pwd_length)\nif(pwd_length, 12, \">=\")\n pwd_level = \"strong\"\nelse()\n pwd_level = \"weak\"\nend()\naddResult(pwd_level)",
+ "test_inputs": {
+ "length": 15
+ },
+ "test_list": [
+ "re.match(r\"^strong$\", pwd_level)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "pwd_level": "strong"
+ },
+ "variables": {
+ "length": 15,
+ "pwd_length": 15,
+ "pwd_level": "strong"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.22699999999999998,
+ "success": true
+ },
+ {
+ "command": "if",
+ "duration_ms": 1.5939999999999999,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.066,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0075",
+ "text": "Define a function that queries and returns a user record by ID",
+ "code": "function getUser(uid){\n ormAccessSelect(connector, \"SELECT name, email FROM users WHERE id=1\", user_row)\n return(user_row)\n}\naddParam(\"user_id\", user_id)\nuser_data = getUser(user_id)\naddResult(user_data)",
+ "test_inputs": {
+ "user_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", user_id)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "user_data": "user_row"
+ },
+ "variables": {
+ "user_id": 1,
+ "ormAccessSelect(connector, \"SELECT name, email FROM users WHERE id": "1\", user_row)",
+ "user_data": "user_row"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.24000000000000002,
+ "success": true
+ },
+ {
+ "command": "getUser",
+ "duration_ms": 0.148,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.05,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0076",
+ "text": "Define a function that retrieves and returns product catalog",
+ "code": "function getCatalog(){\n ormAccessSelect(connector, \"SELECT id, name, price FROM products WHERE active=1\", catalog)\n return(catalog)\n}\nproducts = getCatalog()\naddResult(products)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".*\", products)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "products": "catalog"
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT id, name, price FROM products WHERE active": "1\", catalog)",
+ "products": "catalog"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "getCatalog",
+ "duration_ms": 0.153,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.129,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0077",
+ "text": "Define a function that gets the count of pending orders",
+ "code": "function getPendingCount(){\n ormAccessSelect(connector, \"SELECT COUNT(*) as cnt FROM orders WHERE status='pending'\", count_row)\n return(count_row)\n}\npending_count = getPendingCount()\naddResult(pending_count)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".*\", pending_count)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "pending_count": "count_row"
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT COUNT(*) as cnt FROM orders WHERE status": "'pending'\", count_row)",
+ "pending_count": "count_row"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "getPendingCount",
+ "duration_ms": 0.164,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.11299999999999999,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0078",
+ "text": "Define a function that fetches roles for a user and returns them",
+ "code": "function getUserRoles(uid){\n ormAccessSelect(connector, \"SELECT role FROM user_roles WHERE user_id=1\", roles)\n return(roles)\n}\naddParam(\"user_id\", uid)\nuser_roles = getUserRoles(uid)\naddResult(user_roles)",
+ "test_inputs": {
+ "user_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", uid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "user_roles": "roles"
+ },
+ "variables": {
+ "user_id": "1",
+ "uid": 1,
+ "ormAccessSelect(connector, \"SELECT role FROM user_roles WHERE user_id": "1\", roles)",
+ "user_roles": "roles"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 1.358,
+ "success": true
+ },
+ {
+ "command": "getUserRoles",
+ "duration_ms": 0.232,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.077,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0079",
+ "text": "Query latest audit log entries and return them",
+ "code": "function getAuditLog(limit){\n ormAccessSelect(connector, \"SELECT action, user_id, created_at FROM audit_log ORDER BY created_at DESC LIMIT 10\", audit_entries)\n return(audit_entries)\n}\naddParam(\"limit\", log_limit)\naudit = getAuditLog(log_limit)\naddResult(audit)",
+ "test_inputs": {
+ "limit": "10"
+ },
+ "test_list": [
+ "re.match(r\"^10$\", log_limit)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "audit": "audit_entries"
+ },
+ "variables": {
+ "limit": "10",
+ "log_limit": 10,
+ "null": [],
+ "audit": "audit_entries"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.187,
+ "success": true
+ },
+ {
+ "command": "getAuditLog",
+ "duration_ms": 0.24600000000000002,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.039,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0080",
+ "text": "Retrieve configuration settings from database and return them",
+ "code": "function getConfig(){\n ormAccessSelect(connector, \"SELECT setting_key, setting_value FROM config WHERE active=1\", config_rows)\n return(config_rows)\n}\napp_config = getConfig()\naddResult(app_config)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\".*\", app_config)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "app_config": "config_rows"
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT setting_key, setting_value FROM config WHERE active": "1\", config_rows)",
+ "app_config": "config_rows"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "getConfig",
+ "duration_ms": 0.221,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.118,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0081",
+ "text": "Define a function that searches products by category and returns results",
+ "code": "function getByCategory(cat){\n ormAccessSelect(connector, \"SELECT id, name FROM products WHERE category='electronics'\", results)\n return(results)\n}\naddParam(\"category\", cat_name)\ncategory_items = getByCategory(cat_name)\naddResult(category_items)",
+ "test_inputs": {
+ "category": "electronics"
+ },
+ "test_list": [
+ "re.match(r\"^electronics$\", cat_name)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "category_items": "results"
+ },
+ "variables": {
+ "category": "electronics",
+ "cat_name": "electronics",
+ "ormAccessSelect(connector, \"SELECT id, name FROM products WHERE category": "'electronics'\", results)",
+ "category_items": "results"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.202,
+ "success": true
+ },
+ {
+ "command": "getByCategory",
+ "duration_ms": 0.17300000000000001,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.05,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0082",
+ "text": "Insert new user record with duplicate key error handling",
+ "code": "addParam(\"email\", user_email)\ntry()\n ormAccessInsert(connector, \"users\", user_email, insert_result)\nexception(err)\n addVar(_status, 409)\n addVar(dup_error, \"Email already exists\")\n addResult(dup_error)\nend()\naddResult(insert_result)",
+ "test_inputs": {
+ "email": "new@example.com"
+ },
+ "test_list": [
+ "re.match(r\"^new@example\\.com$\", user_email)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "dup_error": "Email already exists",
+ "insert_result": null
+ },
+ "variables": {
+ "email": "new@example.com",
+ "user_email": "new@example.com",
+ "insert_result": null,
+ "err": "No error detected",
+ "_status": 409,
+ "dup_error": "Email already exists"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.725,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.248,
+ "success": true
+ },
+ {
+ "command": "ormAccessInsert",
+ "duration_ms": 0.465,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.061,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.045000000000000005,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.037,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.077,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ }
+ ],
+ "http_status": 409
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0083",
+ "text": "Insert order record and handle constraint violations",
+ "code": "addParam(\"order_data\", order_json)\ntry()\n ormAccessInsert(connector, \"orders\", order_json, order_id)\nexception(err)\n addVar(_status, 422)\n addVar(order_err, \"Order creation failed\")\n addResult(order_err)\nend()\naddResult(order_id)",
+ "test_inputs": {
+ "order_data": "{\"total\":99.99}"
+ },
+ "test_list": [
+ "re.match(r\".*\", order_json)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "order_err": "Order creation failed",
+ "order_id": null
+ },
+ "variables": {
+ "order_data": "{\"total\":99.99}",
+ "order_json": {
+ "total": 99.99
+ },
+ "order_id": null,
+ "err": "No error detected",
+ "_status": 422,
+ "order_err": "Order creation failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.19699999999999998,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "ormAccessInsert",
+ "duration_ms": 0.107,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.052,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.031,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 422
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0084",
+ "text": "Insert product record with validation error handling",
+ "code": "addParam(\"product_name\", pname)\ntry()\n ormAccessInsert(connector, \"products\", pname, prod_result)\nexception(err)\n addVar(_status, 400)\n addVar(prod_err, \"Product insert failed\")\n addResult(prod_err)\nend()\naddVar(_status, 201)\naddResult(prod_result)",
+ "test_inputs": {
+ "product_name": "New Product"
+ },
+ "test_list": [
+ "re.match(r\"^New Product$\", pname)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "prod_err": "Product insert failed",
+ "prod_result": null
+ },
+ "variables": {
+ "product_name": "New Product",
+ "pname": "New Product",
+ "prod_result": null,
+ "err": "No error detected",
+ "_status": 201,
+ "prod_err": "Product insert failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.201,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "ormAccessInsert",
+ "duration_ms": 0.122,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.04,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.031,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.031,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 201
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0085",
+ "text": "Log activity to audit table with silent error handling",
+ "code": "addParam(\"action\", audit_action)\ntry()\n ormAccessInsert(connector, \"audit_logs\", audit_action, audit_result)\nexception(err)\n audit_result = \"logged_failed\"\nend()\naddResult(audit_result)",
+ "test_inputs": {
+ "action": "login"
+ },
+ "test_list": [
+ "re.match(r\"^login$\", audit_action)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "audit_result": "logged_failed"
+ },
+ "variables": {
+ "action": "login",
+ "audit_action": "login",
+ "audit_result": "logged_failed",
+ "err": "No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.208,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.092,
+ "success": true
+ },
+ {
+ "command": "ormAccessInsert",
+ "duration_ms": 0.11,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.104,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0086",
+ "text": "Insert session token with DB error handling",
+ "code": "addParam(\"session_token\", sess_tok)\ntry()\n ormAccessInsert(connector, \"sessions\", sess_tok, sess_result)\nexception(err)\n addVar(_status, 500)\n addVar(sess_err, \"Session creation failed\")\n addResult(sess_err)\nend()\naddResult(sess_result)",
+ "test_inputs": {
+ "session_token": "tok-xyz"
+ },
+ "test_list": [
+ "re.match(r\"^tok-xyz$\", sess_tok)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "sess_err": "Session creation failed",
+ "sess_result": null
+ },
+ "variables": {
+ "session_token": "tok-xyz",
+ "sess_tok": "tok-xyz",
+ "sess_result": null,
+ "err": "No error detected",
+ "_status": 500,
+ "sess_err": "Session creation failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.215,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.04,
+ "success": true
+ },
+ {
+ "command": "ormAccessInsert",
+ "duration_ms": 0.106,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.34400000000000003,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.054,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0087",
+ "text": "Insert feedback record and handle database errors",
+ "code": "addParam(\"message\", feedback_msg)\ntry()\n ormAccessInsert(connector, \"feedback\", feedback_msg, fb_result)\nexception(err)\n addVar(_status, 500)\n addVar(fb_err, \"Feedback save failed\")\n addResult(fb_err)\nend()\naddResult(fb_result)",
+ "test_inputs": {
+ "message": "Great service!"
+ },
+ "test_list": [
+ "re.match(r\"^Great service!$\", feedback_msg)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "fb_err": "Feedback save failed",
+ "fb_result": null
+ },
+ "variables": {
+ "message": "Great service!",
+ "feedback_msg": "Great service!",
+ "fb_result": null,
+ "err": "No error detected",
+ "_status": 500,
+ "fb_err": "Feedback save failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.228,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.044,
+ "success": true
+ },
+ {
+ "command": "ormAccessInsert",
+ "duration_ms": 0.11,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.04,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0088",
+ "text": "Insert notification record with error fallback",
+ "code": "addParam(\"notif_type\", ntype)\ntry()\n ormAccessInsert(connector, \"notifications\", ntype, notif_result)\nexception(err)\n notif_result = None\nend()\naddResult(notif_result)",
+ "test_inputs": {
+ "notif_type": "email"
+ },
+ "test_list": [
+ "re.match(r\"^email$\", ntype)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "notif_result": null
+ },
+ "variables": {
+ "notif_type": "email",
+ "ntype": "email",
+ "notif_result": null,
+ "err": "No error detected"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.221,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.063,
+ "success": true
+ },
+ {
+ "command": "ormAccessInsert",
+ "duration_ms": 0.106,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.037,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.103,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0089",
+ "text": "Update user email and handle not-found error",
+ "code": "addParam(\"user_id\", uid)\naddParam(\"new_email\", email)\ntry()\n ormAccessUpdate(connector, \"UPDATE users SET email='%s' WHERE id=1\" % email, update_result)\nexception(err)\n addVar(_status, 404)\n addVar(upd_err, \"User not found\")\n addResult(upd_err)\nend()\naddVar(_status, 200)\naddResult(update_result)",
+ "test_inputs": {
+ "user_id": "1",
+ "new_email": "updated@example.com"
+ },
+ "test_list": [
+ "re.match(r\"^updated@example\\.com$\", email)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "upd_err": "User not found",
+ "update_result": null
+ },
+ "variables": {
+ "user_id": "1",
+ "new_email": "updated@example.com",
+ "uid": 1,
+ "email": "updated@example.com",
+ "ormAccessUpdate(connector, \"UPDATE users SET email": "'%s' WHERE id=1\" % email, update_result)",
+ "err": "No error detected",
+ "_status": 200,
+ "upd_err": "User not found"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.191,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.088,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.104,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.038,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.08,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.029,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0090",
+ "text": "Deactivate product and handle update errors",
+ "code": "addParam(\"product_id\", pid)\ntry()\n ormAccessUpdate(connector, \"UPDATE products SET active=0 WHERE id=1\", deact_result)\nexception(err)\n addVar(_status, 500)\n addVar(deact_err, \"Deactivation failed\")\n addResult(deact_err)\nend()\naddResult(deact_result)",
+ "test_inputs": {
+ "product_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", pid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "deact_err": "Deactivation failed",
+ "deact_result": null
+ },
+ "variables": {
+ "product_id": "1",
+ "pid": 1,
+ "ormAccessUpdate(connector, \"UPDATE products SET active": "0 WHERE id=1\", deact_result)",
+ "err": "No error detected",
+ "_status": 500,
+ "deact_err": "Deactivation failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.159,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.102,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.058,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.030000000000000002,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.026,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0091",
+ "text": "Update order status with transition error handling",
+ "code": "addParam(\"order_id\", oid)\naddParam(\"status\", new_status)\ntry()\n ormAccessUpdate(connector, \"UPDATE orders SET status='%s' WHERE id=1\" % new_status, status_result)\nexception(err)\n addVar(_status, 400)\n addVar(status_err, \"Status update failed\")\n addResult(status_err)\nend()\naddResult(status_result)",
+ "test_inputs": {
+ "order_id": "1",
+ "status": "shipped"
+ },
+ "test_list": [
+ "re.match(r\"^shipped$\", new_status)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "status_err": "Status update failed",
+ "status_result": null
+ },
+ "variables": {
+ "order_id": "1",
+ "status": "shipped",
+ "oid": 1,
+ "new_status": "shipped",
+ "ormAccessUpdate(connector, \"UPDATE orders SET status": "'%s' WHERE id=1\" % new_status, status_result)",
+ "err": "No error detected",
+ "_status": 400,
+ "status_err": "Status update failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.19799999999999998,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.115,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.376,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.134,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.1,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.034,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.027,
+ "success": true
+ }
+ ],
+ "http_status": 400
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0092",
+ "text": "Increment view counter for an article with error handling",
+ "code": "addParam(\"article_id\", aid)\ntry()\n ormAccessUpdate(connector, \"UPDATE articles SET views=views+1 WHERE id=1\", counter_result)\nexception(err)\n counter_result = None\nend()\naddResult(counter_result)",
+ "test_inputs": {
+ "article_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", aid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "counter_result": null
+ },
+ "variables": {
+ "article_id": "1",
+ "aid": 1,
+ "ormAccessUpdate(connector, \"UPDATE articles SET views": "views+1 WHERE id=1\", counter_result)",
+ "err": "No error detected",
+ "counter_result": null
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.177,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.16899999999999998,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.057,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.056,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0093",
+ "text": "Reset user password hash with error handling",
+ "code": "addParam(\"user_id\", uid)\naddParam(\"new_hash\", pwd_hash)\ntry()\n ormAccessUpdate(connector, \"UPDATE users SET password_hash='%s' WHERE id=1\" % pwd_hash, reset_result)\nexception(err)\n addVar(_status, 500)\n addVar(reset_err, \"Password reset failed\")\n addResult(reset_err)\nend()\naddResult(reset_result)",
+ "test_inputs": {
+ "user_id": "1",
+ "new_hash": "abc123hash"
+ },
+ "test_list": [
+ "re.match(r\"^abc123hash$\", pwd_hash)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "reset_err": "Password reset failed",
+ "reset_result": null
+ },
+ "variables": {
+ "user_id": "1",
+ "new_hash": "abc123hash",
+ "uid": 1,
+ "pwd_hash": "abc123hash",
+ "ormAccessUpdate(connector, \"UPDATE users SET password_hash": "'%s' WHERE id=1\" % pwd_hash, reset_result)",
+ "err": "No error detected",
+ "_status": 500,
+ "reset_err": "Password reset failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.202,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.122,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.049,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.453,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.081,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.063,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.037,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.028,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0094",
+ "text": "Update product price and handle constraint violation",
+ "code": "addParam(\"product_id\", pid)\naddParam(\"price\", new_price)\ntry()\n ormAccessUpdate(connector, \"UPDATE products SET price=%s WHERE id=1\" % new_price, price_result)\nexception(err)\n addVar(_status, 422)\n addVar(price_err, \"Price update failed\")\n addResult(price_err)\nend()\naddResult(price_result)",
+ "test_inputs": {
+ "product_id": "1",
+ "price": "29.99"
+ },
+ "test_list": [
+ "re.match(r\"^29\\.99$\", new_price)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "price_err": "Price update failed",
+ "price_result": null
+ },
+ "variables": {
+ "product_id": "1",
+ "price": "29.99",
+ "pid": 1,
+ "new_price": 29.99,
+ "ormAccessUpdate(connector, \"UPDATE products SET price": "%s WHERE id=1\" % new_price, price_result)",
+ "err": "No error detected",
+ "_status": 422,
+ "price_err": "Price update failed"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.202,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.068,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.081,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.108,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.053,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.045000000000000005,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.041,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.031,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.024,
+ "success": true
+ }
+ ],
+ "http_status": 422
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0095",
+ "text": "Mark notifications as read with error handling",
+ "code": "addParam(\"user_id\", uid)\ntry()\n ormAccessUpdate(connector, \"UPDATE notifications SET read=1 WHERE user_id=1\", read_result)\nexception(err)\n addVar(_status, 500)\n read_result = None\nend()\naddResult(read_result)",
+ "test_inputs": {
+ "user_id": "1"
+ },
+ "test_list": [
+ "re.match(r\"^1$\", uid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "read_result": null
+ },
+ "variables": {
+ "user_id": "1",
+ "uid": 1,
+ "ormAccessUpdate(connector, \"UPDATE notifications SET read": "1 WHERE user_id=1\", read_result)",
+ "err": "No error detected",
+ "_status": 500,
+ "read_result": null
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.5720000000000001,
+ "success": true
+ },
+ {
+ "command": "try",
+ "duration_ms": 0.161,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.235,
+ "success": true
+ },
+ {
+ "command": "exception",
+ "duration_ms": 0.056,
+ "success": true
+ },
+ {
+ "command": "addVar",
+ "duration_ms": 0.039,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.111,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.047,
+ "success": true
+ }
+ ],
+ "http_status": 500
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0096",
+ "text": "Parse a JSON response and extract the name field",
+ "code": "addParam(\"response\", raw_response)\nvariableFromJSON(raw_response, \"name\", user_name)\naddResult(user_name)",
+ "test_inputs": {
+ "response": "{\"name\":\"Alice\"}"
+ },
+ "test_list": [
+ "re.match(r\".*\", user_name)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "user_name": "Alice"
+ },
+ "variables": {
+ "response": "{\"name\":\"Alice\"}",
+ "raw_response": {
+ "name": "Alice"
+ },
+ "user_name": "Alice"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.221,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.074,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0097",
+ "text": "Parse exchange rate response and extract USD rate",
+ "code": "addParam(\"rates\", rates_response)\nvariableFromJSON(rates_response, \"usd\", usd_rate)\naddResult(usd_rate)",
+ "test_inputs": {
+ "rates": "{\"usd\":\"1.2\"}"
+ },
+ "test_list": [
+ "re.match(r\".*\", usd_rate)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "usd_rate": "1.2"
+ },
+ "variables": {
+ "rates": "{\"usd\":\"1.2\"}",
+ "rates_response": {
+ "usd": "1.2"
+ },
+ "usd_rate": "1.2"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 1.471,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.087,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0098",
+ "text": "Parse weather response and extract temperature",
+ "code": "addParam(\"city\", city)\naddParam(\"weather\", weather_json)\nvariableFromJSON(weather_json, \"temp\", temperature)\naddResult(temperature)",
+ "test_inputs": {
+ "city": "London",
+ "weather": "{\"temp\":\"15C\"}"
+ },
+ "test_list": [
+ "re.match(r\"^London$\", city)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "temperature": "15C"
+ },
+ "variables": {
+ "city": "London",
+ "weather": "{\"temp\":\"15C\"}",
+ "weather_json": {
+ "temp": "15C"
+ },
+ "temperature": "15C"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.259,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.083,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.075,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0099",
+ "text": "Parse auth token response and extract access token",
+ "code": "addParam(\"client_id\", cid)\naddParam(\"token_response\", token_response)\nvariableFromJSON(token_response, \"access_token\", access_token)\naddResult(access_token)",
+ "test_inputs": {
+ "client_id": "app-001",
+ "token_response": "{\"access_token\":\"tok-abc\"}"
+ },
+ "test_list": [
+ "re.match(r\"^app-001$\", cid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "access_token": "tok-abc"
+ },
+ "variables": {
+ "client_id": "app-001",
+ "token_response": {
+ "access_token": "tok-abc"
+ },
+ "cid": "app-001",
+ "access_token": "tok-abc"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.228,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.07200000000000001,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.056,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0100",
+ "text": "Parse product info and extract price",
+ "code": "addParam(\"sku\", product_sku)\naddParam(\"product_json\", product_json)\nvariableFromJSON(product_json, \"price\", product_price)\naddResult(product_price)",
+ "test_inputs": {
+ "sku": "ABC-123",
+ "product_json": "{\"price\":\"9.99\"}"
+ },
+ "test_list": [
+ "re.match(r\"^ABC-123$\", product_sku)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "product_price": "9.99"
+ },
+ "variables": {
+ "sku": "ABC-123",
+ "product_json": {
+ "price": "9.99"
+ },
+ "product_sku": "ABC-123",
+ "product_price": "9.99"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 1.1869999999999998,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.914,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.112,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.038,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0101",
+ "text": "Parse user profile and extract account status",
+ "code": "addParam(\"user_id\", uid)\naddParam(\"profile_json\", profile_json)\nvariableFromJSON(profile_json, \"status\", account_status)\naddResult(account_status)",
+ "test_inputs": {
+ "user_id": "u42",
+ "profile_json": "{\"status\":\"active\"}"
+ },
+ "test_list": [
+ "re.match(r\"^u42$\", uid)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "account_status": "active"
+ },
+ "variables": {
+ "user_id": "u42",
+ "profile_json": {
+ "status": "active"
+ },
+ "uid": "u42",
+ "account_status": "active"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.292,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.089,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.054,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0102",
+ "text": "Parse feature flags and extract a flag value",
+ "code": "addParam(\"flags_json\", flags_json)\nvariableFromJSON(flags_json, \"new_ui\", new_ui_flag)\naddResult(new_ui_flag)",
+ "test_inputs": {
+ "flags_json": "{\"new_ui\":\"true\"}"
+ },
+ "test_list": [
+ "re.match(r\".*\", new_ui_flag)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "new_ui_flag": "true"
+ },
+ "variables": {
+ "flags_json": {
+ "new_ui": "true"
+ },
+ "new_ui_flag": "true"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.19,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.061,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.06999999999999999,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0103",
+ "text": "Parse login response and extract JWT token",
+ "code": "addParam(\"username\", uname)\naddParam(\"login_response\", login_response)\nvariableFromJSON(login_response, \"token\", jwt_token)\naddResult(jwt_token)",
+ "test_inputs": {
+ "username": "admin",
+ "login_response": "{\"token\":\"jwt-abc\"}"
+ },
+ "test_list": [
+ "re.match(r\"^admin$\", uname)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "jwt_token": "jwt-abc"
+ },
+ "variables": {
+ "username": "admin",
+ "login_response": {
+ "token": "jwt-abc"
+ },
+ "uname": "admin",
+ "jwt_token": "jwt-abc"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.45899999999999996,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.127,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.078,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.041999999999999996,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0104",
+ "text": "Parse payment response and extract transaction ID",
+ "code": "addParam(\"amount\", pay_amount)\naddParam(\"pay_response\", pay_response)\nvariableFromJSON(pay_response, \"transaction_id\", txn_id)\naddResult(txn_id)",
+ "test_inputs": {
+ "amount": "49.99",
+ "pay_response": "{\"transaction_id\":\"txn-001\"}"
+ },
+ "test_list": [
+ "re.match(r\"^49\\.99$\", pay_amount)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "txn_id": "txn-001"
+ },
+ "variables": {
+ "amount": "49.99",
+ "pay_response": {
+ "transaction_id": "txn-001"
+ },
+ "pay_amount": 49.99,
+ "txn_id": "txn-001"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.179,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.103,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.057,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0105",
+ "text": "Parse create response and extract the new resource ID",
+ "code": "addParam(\"name\", resource_name)\naddParam(\"create_response\", create_response)\nvariableFromJSON(create_response, \"id\", new_id)\naddResult(new_id)",
+ "test_inputs": {
+ "name": "my-resource",
+ "create_response": "{\"id\":\"res-001\"}"
+ },
+ "test_list": [
+ "re.match(r\"^my-resource$\", resource_name)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "new_id": "res-001"
+ },
+ "variables": {
+ "name": "my-resource",
+ "create_response": {
+ "id": "res-001"
+ },
+ "resource_name": "my-resource",
+ "new_id": "res-001"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.264,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.087,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.060000000000000005,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.04,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0106",
+ "text": "Parse analytics response and extract confirmation code",
+ "code": "addParam(\"event\", evt_name)\naddParam(\"evt_response\", evt_response)\nvariableFromJSON(evt_response, \"code\", confirm_code)\naddResult(confirm_code)",
+ "test_inputs": {
+ "event": "purchase",
+ "evt_response": "{\"code\":\"200\"}"
+ },
+ "test_list": [
+ "re.match(r\"^purchase$\", evt_name)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "confirm_code": "200"
+ },
+ "variables": {
+ "event": "purchase",
+ "evt_response": {
+ "code": "200"
+ },
+ "evt_name": "purchase",
+ "confirm_code": "200"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.237,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.067,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.075,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0107",
+ "text": "Parse register response and extract device push token",
+ "code": "addParam(\"device_id\", did)\naddParam(\"reg_response\", reg_response)\nvariableFromJSON(reg_response, \"push_token\", push_token)\naddResult(push_token)",
+ "test_inputs": {
+ "device_id": "dev-abc",
+ "reg_response": "{\"push_token\":\"ptok-xyz\"}"
+ },
+ "test_list": [
+ "re.match(r\"^dev-abc$\", did)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "push_token": "ptok-xyz"
+ },
+ "variables": {
+ "device_id": "dev-abc",
+ "reg_response": {
+ "push_token": "ptok-xyz"
+ },
+ "did": "dev-abc",
+ "push_token": "ptok-xyz"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.621,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.09899999999999999,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.055,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0108",
+ "text": "Parse ticket response and extract ticket number",
+ "code": "addParam(\"subject\", ticket_subject)\naddParam(\"ticket_response\", ticket_response)\nvariableFromJSON(ticket_response, \"ticket_number\", ticket_num)\naddResult(ticket_num)",
+ "test_inputs": {
+ "subject": "Login issue",
+ "ticket_response": "{\"ticket_number\":\"T-001\"}"
+ },
+ "test_list": [
+ "re.match(r\"^Login issue$\", ticket_subject)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "ticket_num": "T-001"
+ },
+ "variables": {
+ "subject": "Login issue",
+ "ticket_response": {
+ "ticket_number": "T-001"
+ },
+ "ticket_subject": "Login issue",
+ "ticket_num": "T-001"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.23,
+ "success": true
+ },
+ {
+ "command": "addParam",
+ "duration_ms": 0.06499999999999999,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.067,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.037,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0109",
+ "text": "Parse a JSON string and extract the id field",
+ "code": "addParam(\"data\", json_input)\nvariableFromJSON(json_input, \"id\", result_id)\naddResult(result_id)",
+ "test_inputs": {
+ "data": "{\"id\":\"item-42\"}"
+ },
+ "test_list": [
+ "re.match(r\".*\", result_id)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "result_id": "item-42"
+ },
+ "variables": {
+ "data": "{\"id\":\"item-42\"}",
+ "json_input": {
+ "id": "item-42"
+ },
+ "result_id": "item-42"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.281,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.08,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.039,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0110",
+ "text": "Parse a JSON payload and extract the status field",
+ "code": "addParam(\"payload\", raw_json)\nvariableFromJSON(raw_json, \"status\", status_value)\naddResult(status_value)",
+ "test_inputs": {
+ "payload": "{\"status\":\"active\"}"
+ },
+ "test_list": [
+ "re.match(r\".*\", status_value)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "status_value": "active"
+ },
+ "variables": {
+ "payload": "{\"status\":\"active\"}",
+ "raw_json": {
+ "status": "active"
+ },
+ "status_value": "active"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.6489999999999999,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.083,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034999999999999996,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0111",
+ "text": "Parse a user profile JSON and extract the email",
+ "code": "addParam(\"profile\", profile_data)\nvariableFromJSON(profile_data, \"email\", email_result)\naddResult(email_result)",
+ "test_inputs": {
+ "profile": "{\"email\":\"test@example.com\"}"
+ },
+ "test_list": [
+ "re.match(r\".*\", email_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "email_result": "test@example.com"
+ },
+ "variables": {
+ "profile": "{\"email\":\"test@example.com\"}",
+ "profile_data": {
+ "email": "test@example.com"
+ },
+ "email_result": "test@example.com"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.27099999999999996,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.076,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.043000000000000003,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0112",
+ "text": "Parse a config JSON and extract the timeout setting",
+ "code": "addParam(\"config\", config_str)\nvariableFromJSON(config_str, \"timeout\", timeout_val)\naddResult(timeout_val)",
+ "test_inputs": {
+ "config": "{\"timeout\":30}"
+ },
+ "test_list": [
+ "re.match(r\".*\", timeout_val)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "timeout_val": 30
+ },
+ "variables": {
+ "config": "{\"timeout\":30}",
+ "config_str": {
+ "timeout": 30
+ },
+ "timeout_val": 30
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.193,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.062,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.034,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0113",
+ "text": "Parse an error response JSON and extract the message",
+ "code": "addParam(\"error\", error_json)\nvariableFromJSON(error_json, \"message\", error_message)\naddResult(error_message)",
+ "test_inputs": {
+ "error": "{\"message\":\"Not found\"}"
+ },
+ "test_list": [
+ "re.match(r\".*\", error_message)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "error_message": "Not found"
+ },
+ "variables": {
+ "error": "{\"message\":\"Not found\"}",
+ "error_json": {
+ "message": "Not found"
+ },
+ "error_message": "Not found"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.212,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.063,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.056,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0114",
+ "text": "Parse an API response JSON and extract the version",
+ "code": "addParam(\"response\", api_resp)\nvariableFromJSON(api_resp, \"version\", version)\naddResult(version)",
+ "test_inputs": {
+ "response": "{\"version\":\"2.0\"}"
+ },
+ "test_list": [
+ "re.match(r\".*\", version)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "version": "2.0"
+ },
+ "variables": {
+ "response": "{\"version\":\"2.0\"}",
+ "api_resp": {
+ "version": "2.0"
+ },
+ "version": "2.0"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.192,
+ "success": true
+ },
+ {
+ "command": "variableFromJSON",
+ "duration_ms": 0.058,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.032,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0115",
+ "text": "Define a function that sums a range of numbers using a loop",
+ "code": "function sumRange(n){\n total = 0\n startLoop(i, 1, n)\n total = total + i\n endLoop()\n return(total)\n}\naddParam(\"n\", max_n)\nsum_result = sumRange(max_n)\naddResult(sum_result)",
+ "test_inputs": {
+ "n": 5
+ },
+ "test_list": [
+ "re.match(r\"^15$\", sum_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "sum_result": 15
+ },
+ "variables": {
+ "n": 5,
+ "max_n": 5,
+ "total": 15,
+ "i": 5,
+ "sum_result": 15
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.17200000000000001,
+ "success": true
+ },
+ {
+ "command": "sumRange",
+ "duration_ms": 0.7829999999999999,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.082,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0116",
+ "text": "Define a function that finds the maximum value in a fixed range",
+ "code": "function countItems(total){\n count = 0\n startLoop(i, 1, total)\n count = count + 1\n endLoop()\n return(count)\n}\naddParam(\"total\", item_total)\nfinal_count = countItems(item_total)\naddResult(final_count)",
+ "test_inputs": {
+ "total": 3
+ },
+ "test_list": [
+ "re.match(r\"^3$\", final_count)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "final_count": 3
+ },
+ "variables": {
+ "total": 3,
+ "item_total": 3,
+ "count": 3,
+ "i": 3,
+ "final_count": 3
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.273,
+ "success": true
+ },
+ {
+ "command": "countItems",
+ "duration_ms": 0.347,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.067,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0117",
+ "text": "Define a function that sums numbers from 1 to n using a loop",
+ "code": "function sumRange(n){\n total = 0\n startLoop(i, 1, n)\n total = total + i\n endLoop()\n return(total)\n}\naddParam(\"n\", max_n)\nsum_result = sumRange(max_n)\naddResult(sum_result)",
+ "test_inputs": {
+ "n": 5
+ },
+ "test_list": [
+ "re.match(r\"^15$\", sum_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "sum_result": 15
+ },
+ "variables": {
+ "n": 5,
+ "max_n": 5,
+ "total": 15,
+ "i": 5,
+ "sum_result": 15
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.212,
+ "success": true
+ },
+ {
+ "command": "sumRange",
+ "duration_ms": 0.328,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.047,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0118",
+ "text": "Define a function that builds a repeated string using a loop",
+ "code": "function repeatStr(s, times){\n result = \"\"\n startLoop(i, 1, times)\n result = result + s\n endLoop()\n return(result)\n}\naddParam(\"str\", input_str)\nrepeated = repeatStr(input_str, 3)\naddResult(repeated)",
+ "test_inputs": {
+ "str": "ab"
+ },
+ "test_list": [
+ "re.match(r\"^ababab$\", repeated)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "repeated": "ababab"
+ },
+ "variables": {
+ "str": "ab",
+ "input_str": "ab",
+ "result": "ababab",
+ "i": 3,
+ "repeated": "ababab"
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.245,
+ "success": true
+ },
+ {
+ "command": "repeatStr",
+ "duration_ms": 0.30200000000000005,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.048,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0119",
+ "text": "Define a function that accumulates a running total from 1 to n",
+ "code": "function runningTotal(max_val){\n acc = 0\n startLoop(j, 1, max_val)\n acc = acc + j\n endLoop()\n return(acc)\n}\naddParam(\"max\", upper_bound)\ntotal_result = runningTotal(upper_bound)\naddResult(total_result)",
+ "test_inputs": {
+ "max": 4
+ },
+ "test_list": [
+ "re.match(r\"^10$\", total_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "total_result": 10
+ },
+ "variables": {
+ "max": 4,
+ "upper_bound": 4,
+ "acc": 10,
+ "j": 4,
+ "total_result": 10
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.17300000000000001,
+ "success": true
+ },
+ {
+ "command": "runningTotal",
+ "duration_ms": 0.749,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.07100000000000001,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0120",
+ "text": "Define a function that triples a number using a loop",
+ "code": "function triple(n){\n result = 0\n startLoop(i, 1, 3)\n result = result + n\n endLoop()\n return(result)\n}\naddParam(\"base\", base_num)\ntriple_result = triple(base_num)\naddResult(triple_result)",
+ "test_inputs": {
+ "base": 4
+ },
+ "test_list": [
+ "re.match(r\"^12$\", triple_result)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "triple_result": 12
+ },
+ "variables": {
+ "base": 4,
+ "base_num": 4,
+ "result": 12,
+ "i": 3,
+ "triple_result": 12
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.19699999999999998,
+ "success": true
+ },
+ {
+ "command": "triple",
+ "duration_ms": 0.3,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.063,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0121",
+ "text": "Query items from DB and loop to count matching records",
+ "code": "ormAccessSelect(connector, \"SELECT id FROM products WHERE active=1\", product_ids)\ngetListLen(product_ids, total_products)\ncount = 0\nstartLoop(i, 0, total_products)\n count = count + 1\nendLoop()\naddResult(count)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^\\d+$\", count)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "count": 1
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT id FROM products WHERE active": "1\", product_ids)",
+ "total_products": 0,
+ "count": 1,
+ "i": 0
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "assign",
+ "duration_ms": 0.09000000000000001,
+ "success": true
+ },
+ {
+ "command": "getListLen",
+ "duration_ms": 0.164,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.08399999999999999,
+ "success": true
+ },
+ {
+ "command": "startLoop",
+ "duration_ms": 0.40900000000000003,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.094,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0122",
+ "text": "Fetch orders and iterate to build summary",
+ "code": "ormAccessSelect(connector, \"SELECT total FROM orders WHERE status='complete'\", order_totals)\ngetListLen(order_totals, num_orders)\nstartLoop(i, 1, num_orders)\n addVar(processed, i)\nendLoop()\naddResult(num_orders)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^\\d+$\", num_orders)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "num_orders": 0
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT total FROM orders WHERE status": "'complete'\", order_totals)",
+ "num_orders": 0
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "assign",
+ "duration_ms": 0.16699999999999998,
+ "success": true
+ },
+ {
+ "command": "getListLen",
+ "duration_ms": 0.23399999999999999,
+ "success": true
+ },
+ {
+ "command": "startLoop",
+ "duration_ms": 0.027,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.078,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0123",
+ "text": "Get user list from DB and loop to process each user",
+ "code": "ormAccessSelect(connector, \"SELECT id, name FROM users LIMIT 5\", users)\ngetListLen(users, user_count)\nstartLoop(i, 1, user_count)\n addVar(current_idx, i)\nendLoop()\naddResult(user_count)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^\\d+$\", user_count)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "user_count": 0
+ },
+ "variables": {
+ "null": [],
+ "user_count": 0
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "ormAccessSelect",
+ "duration_ms": 0.257,
+ "success": true
+ },
+ {
+ "command": "getListLen",
+ "duration_ms": 0.085,
+ "success": true
+ },
+ {
+ "command": "startLoop",
+ "duration_ms": 0.051,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.047,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0124",
+ "text": "Query log entries and loop to count entries processed",
+ "code": "ormAccessSelect(connector, \"SELECT level FROM logs LIMIT 10\", log_entries)\ngetListLen(log_entries, log_count)\nerror_count = 0\nstartLoop(i, 1, log_count)\n error_count = error_count + 1\nendLoop()\naddResult(error_count)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^\\d+$\", error_count)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "error_count": 0
+ },
+ "variables": {
+ "null": [],
+ "log_count": 0,
+ "error_count": 0
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "ormAccessSelect",
+ "duration_ms": 0.201,
+ "success": true
+ },
+ {
+ "command": "getListLen",
+ "duration_ms": 0.095,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.101,
+ "success": true
+ },
+ {
+ "command": "startLoop",
+ "duration_ms": 0.013,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.045000000000000005,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0125",
+ "text": "Fetch categories and loop to build a numbered list",
+ "code": "ormAccessSelect(connector, \"SELECT name FROM categories WHERE active=1\", categories)\ngetListLen(categories, cat_count)\nstartLoop(i, 1, cat_count)\n addVar(last_idx, i)\nendLoop()\naddResult(cat_count)",
+ "test_inputs": {},
+ "test_list": [
+ "re.match(r\"^\\d+$\", cat_count)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "cat_count": 0
+ },
+ "variables": {
+ "ormAccessSelect(connector, \"SELECT name FROM categories WHERE active": "1\", categories)",
+ "cat_count": 0
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "assign",
+ "duration_ms": 0.11299999999999999,
+ "success": true
+ },
+ {
+ "command": "getListLen",
+ "duration_ms": 0.213,
+ "success": true
+ },
+ {
+ "command": "startLoop",
+ "duration_ms": 0.024,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.041,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0126",
+ "text": "Get product IDs from DB and iterate to process each",
+ "code": "addParam(\"category\", cat_filter)\normAccessSelect(connector, \"SELECT id FROM products WHERE category='electronics'\", prod_ids)\ngetListLen(prod_ids, prod_count)\nstartLoop(k, 1, prod_count)\n addVar(last_prod, k)\nendLoop()\naddResult(prod_count)",
+ "test_inputs": {
+ "category": "electronics"
+ },
+ "test_list": [
+ "re.match(r\"^electronics$\", cat_filter)"
+ ],
+ "execution_message": {
+ "success": true,
+ "result": {
+ "prod_count": 0
+ },
+ "variables": {
+ "category": "electronics",
+ "cat_filter": "electronics",
+ "ormAccessSelect(connector, \"SELECT id FROM products WHERE category": "'electronics'\", prod_ids)",
+ "prod_count": 0
+ },
+ "assertion_result": true,
+ "logs": [
+ {
+ "command": "addParam",
+ "duration_ms": 0.206,
+ "success": true
+ },
+ {
+ "command": "assign",
+ "duration_ms": 0.132,
+ "success": true
+ },
+ {
+ "command": "getListLen",
+ "duration_ms": 0.111,
+ "success": true
+ },
+ {
+ "command": "startLoop",
+ "duration_ms": 0.015000000000000001,
+ "success": true
+ },
+ {
+ "command": "addResult",
+ "duration_ms": 0.033,
+ "success": true
+ }
+ ],
+ "http_status": 200
+ },
+ "passed": true,
+ "error": "",
+ "Local_Language_Server_Execution": ""
+ },
+ {
+ "source_file": "mbpp_avap_prior.json",
+ "task_id": "PRIOR-0127",
+ "text": "Import math library and define a function that doubles a number",
+ "code": "import