From 907967411421e0ce260d99214a85e2955d0faadf Mon Sep 17 00:00:00 2001 From: pseco Date: Wed, 4 Mar 2026 13:58:38 +0100 Subject: [PATCH] working on retrieve from ES --- .../retrieval/n00 Retrieve from ES.ipynb | 370 ++++++++++++++++++ 1 file changed, 370 insertions(+) create mode 100644 scratches/pseco/evaluation/retrieval/n00 Retrieve from ES.ipynb diff --git a/scratches/pseco/evaluation/retrieval/n00 Retrieve from ES.ipynb b/scratches/pseco/evaluation/retrieval/n00 Retrieve from ES.ipynb new file mode 100644 index 0000000..98e7e8e --- /dev/null +++ b/scratches/pseco/evaluation/retrieval/n00 Retrieve from ES.ipynb @@ -0,0 +1,370 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "d751ffb9", + "metadata": {}, + "source": [ + "# Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e9ab075", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "from langchain_ollama import ChatOllama, OllamaEmbeddings\n", + "from langchain_elasticsearch import ElasticsearchStore\n", + "from elasticsearch import Elasticsearch\n", + "import nltk\n", + "from pprint import pprint\n", + "from typing import Any\n", + "\n", + "from src.config import PROJ_ROOT, DATA_DIR\n", + "nltk.download(\"punkt\", quiet=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cb0c0d0c", + "metadata": {}, + "outputs": [], + "source": [ + "ES_URL = os.getenv(\"ELASTICSEARCH_LOCAL_URL\")\n", + "INDEX_NAME = os.getenv(\"ELASTICSEARCH_INDEX\")\n", + "CODE_INDEX = os.getenv(\"ELASTICSEARCH_CODE_INDEX\")\n", + "BASE_URL = os.getenv(\"OLLAMA_LOCAL_URL\")\n", + "MODEL_NAME = os.getenv(\"OLLAMA_MODEL_NAME\")\n", + "EMB_MODEL_NAME = os.getenv(\"OLLAMA_EMB_MODEL_NAME\")\n", + "LANGFUSE_PUBLIC_KEY = os.getenv(\"LANGFUSE_PUBLIC_KEY\")\n", + "LANGFUSE_SECRET_KEY = os.getenv(\"LANGFUSE_SECRET_KEY\")\n", + "LANGFUSE_HOST = os.getenv(\"LANGFUSE_HOST\")\n", + "ELASTICSEARCH_URL = os.getenv(\"ELASTICSEARCH_URL\")\n", + "\n", + "\n", + "embeddings = OllamaEmbeddings(base_url=BASE_URL, model=EMB_MODEL_NAME)\n", + "llm = ChatOllama(base_url=BASE_URL, model=MODEL_NAME, temperature=0)\n", + "\n", + "vector_store = ElasticsearchStore(\n", + " es_url=ES_URL,\n", + " index_name=INDEX_NAME,\n", + " embedding=embeddings,\n", + " query_field=\"text\",\n", + " vector_query_field=\"vector\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "94ae911b", + "metadata": {}, + "outputs": [], + "source": [ + "grammar = (DATA_DIR / \"raw\" / \"code\" / \"BNF_v3.txt\").read_text(\n", + " encoding=\"utf-8\"\n", + ")\n", + "code = (DATA_DIR / \"raw\" / \"code\" / \"Code_Snippets_v1.txt\").read_text(\n", + " encoding=\"utf-8\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "002a0436", + "metadata": {}, + "source": [ + "# Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d7da1fdf", + "metadata": {}, + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", + "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", + "\u001b[1;31mClick here for more info. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], + "source": [ + "def pretty_print_results(items: list[Any]) -> None:\n", + " \"\"\"Pretty-print retrieval results from a list.\"\"\"\n", + " for index, item in enumerate(items, start=1):\n", + " print(f\"\\n--- Result {index} ---\")\n", + "\n", + " if isinstance(item, tuple) and len(item) == 2:\n", + " document, score = item\n", + " metadata = getattr(document, \"metadata\", {})\n", + " title = metadata.get(\"title\", \"N/A\")\n", + " content = getattr(document, \"page_content\", \"\")\n", + "\n", + " print(f\"Title: {title}\")\n", + " print(f\"Score: {score:.6f}\" if isinstance(score, float) else score)\n", + " print(\"Content:\")\n", + " print(content)\n", + " continue\n", + "\n", + " pprint(item)" + ] + }, + { + "cell_type": "markdown", + "id": "dd0c0ce9", + "metadata": {}, + "source": [ + "# Test" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "41720f2c", + "metadata": {}, + "outputs": [], + "source": [ + "es = Elasticsearch(\n", + " ELASTICSEARCH_URL,\n", + " request_timeout=120,\n", + " max_retries=5,\n", + " retry_on_timeout=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "d2644e84", + "metadata": {}, + "outputs": [], + "source": [ + "query = \"Give an example of a concatenation of string in AVAP?\"" + ] + }, + { + "cell_type": "markdown", + "id": "729c7a9d", + "metadata": {}, + "source": [ + "## Documentation" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2e596181", + "metadata": {}, + "outputs": [], + "source": [ + "doc_vector_store = ElasticsearchStore(\n", + " es_url=ES_URL,\n", + " index_name=INDEX_NAME,\n", + " embedding=embeddings,\n", + " query_field=\"text\",\n", + " vector_query_field=\"vector\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "67909bc6", + "metadata": {}, + "outputs": [], + "source": [ + "base_retriever = doc_vector_store.as_retriever(\n", + " search_type=\"similarity\",\n", + " search_kwargs={\"k\": 5}\n", + " ) " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "aa3c1712", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[(Document(metadata={'source': '10.1_Expressions.txt'}, page_content='6. Expressions in AVAP This chapter explains the meaning of expression elements in AVAP. 6.1. Arithmetic Conversions When describing an arithmetic operator in AVAP and using the phrase \"numeric arguments are converted to a common type,\" it means that the operator\\'s implementation for built-in types works as follows: If either of the arguments is a complex number, the other is converted to complex. Otherwise, if either of the arguments is a floating-point number, the other is converted to floating-point. Otherwise, both must be integers, and no conversion is needed. Additional rules may apply for certain operators. 6.2. Atoms Atoms are the most basic elements of expressions in AVAP. The simplest atoms are identifiers or literals. Forms enclosed in parentheses, brackets, or braces are also syntactically categorized as atoms. The syntax for atoms is: atom ::= identifier | literal | enclosure enclosure ::= parenth_form | list_display | dict_display | set_display | generator_expression 6.2.1. Identifiers (Names) An identifier that appears as an atom is a name. When the name is bound to an object, evaluating the atom yields that object. When a name is not bound, an attempt to evaluate it raises a NameError exception. Private Name Mangling When an identifier that occurs literally in a class definition begins with two or more underscores and does not end with two or more underscores, it is considered a private name of that class. Private names are transformed into a longer form before code is generated for them. The transformation inserts the class name, with the initial underscores removed and a single underscore inserted, in front of the name. 6.2.2. Literals AVAP supports string and bytes literals, as well as various numeric literals: literal ::= stringliteral | bytesliteral | integer | floatnumber | imagnumber Evaluating a literal produces an object of the given type (string, bytes, integer, floating-point number, complex number) with the given value. All literals correspond to immutable data types. 6.2.3. Parenthesized Forms A parenthesized form is an optional list of expressions enclosed in parentheses: parenth_form ::= \"(\" [starred_expression] \")\" A parenthesized expression produces whatever the expression list produces: if the list contains at least one comma, it produces a tuple; otherwise, it produces the single expression that makes up the list of expressions. 6.2.4. Comprehensions for Lists, Sets and Dictionaries To construct a list, set, or dictionary, AVAP provides special syntax called \"comprehension,\" each in two flavors: The contents of the container are listed explicitly. They are computed using a set of loop and filtering instructions, called a \"comprehension.\" Common syntax elements for comprehensions are: comprehension ::= assignment_expression comp_for comp_for ::= \"for\" target_list \"in\" or_test [comp_iter] comp_iter ::= comp_for | comp_if comp_if ::= \"if\" or_test [comp_iter] A comprehension consists of a single expression followed by at least one for clause and zero or more for or if clauses. In this case, the elements of the new container are those produced by considering each for or if clause as a block, nested from left to right, and evaluating the expression to produce an element each time the innermost block is reached. 6.2.5. List Displays In AVAP, lists are generated and handled differently. To construct a list, the command variableToList(variable, list) is used, and an item from the list is retrieved with itemFromList(list, index, variable_to_store_item). To get the number of elements in the list, getListLen(list, var_to_store_list_length) is used. The syntax for list displays is: list_display ::= \"[\" [starred_list | comprehension] \"]\" A list display produces a new list object, whose content is specified by a list of expressions or a comprehension. When a list of expressions is provided, its elements are evaluated from left to right and placed in the list object in that order. 6.2.6. Set Displays A set display is denoted by curly braces and is distinguished from dictionary displays by the absence of colon characters separating keys and values: set_display ::= \"{\" (starred_list | comprehension) \"}\" A set display produces a new mutable set object, whose content is specified by a sequence of expressions or a comprehension. 6.2.7. Dictionary Displays In AVAP, objects are created and managed using specific commands. An object is created with AddvariableToJSON(key, value, object_variable), and a key from the object is retrieved with variableFromJSON(object_variable, key, var_to_store_key_value). The syntax for dictionary displays is: dict_display ::= \"{\" [dict_item_list | dict_comprehension] \"}\" dict_item_list ::= dict_item (\",\" dict_item)* [\",\"] dict_item ::= expression \":\" expression | \"**\" or_expr dict_comprehension ::= expression \":\" expression comp_for A dictionary display produces a new dictionary object. If a comma-separated sequence of dictionary items is provided, they are evaluated from left to right to define the dictionary entries. Slices A slice selects a range of elements in a sequence object (e.g., a string, tuple, or list). Slices can be used as expressions or as targets in assignments or statements. The syntax for a slice is as follows: slicing ::= primary \"[\" slice_list \"]\" slice_list ::= slice_item (\",\" slice_item)* [\",\"] slice_item ::= expression | proper_slice proper_slice ::= [lower_bound] \":\" [upper_bound] [ \":\" [stride] ] lower_bound ::= expression upper_bound ::= expression stride ::= expression There is ambiguity in the formal syntax here: anything that looks like a list expression also looks like a list slice, so any subscription might be interpreted as a slice. Instead of complicating the syntax further, this is disambiguated by defining that in this case, the interpretation as a subscription takes precedence over the interpretation as a slice (this is the case if the list slice does not contain a proper slice). The semantics for a slice are as follows. The primary is indexed (using the same __getitem__() method as in a normal subscription) with a key constructed from the slice list, as follows. If the slice list contains at least one comma, the key is a tuple that contains the conversion of the slice elements; otherwise, the conversion of the single slice element is the key. The conversion of a slice element that is an expression is that expression. The conversion of a proper slice is a slice object whose start, stop, and step attributes are the values of the expressions given as the lower bound, upper bound, and step, respectively, substituting None for missing expressions. Calls A call invokes a callable object (e.g., a function) with a possibly empty series of arguments: call ::= primary \"(\" [argument_list [\",\"] | comprehension] \")\" argument_list ::= positional_arguments [\",\" starred_and_keywords] [\",\" keywords_arguments] | starred_and_keywords [\",\" keywords_arguments] | keywords_arguments positional_arguments ::= positional_item (\",\" positional_item)* positional_item ::= assignment_expression | \"*\" expression starred_and_keywords ::= (\"*\" expression | keyword_item) (\",\" \"*\" expression | \",\" keyword_item)* keywords_arguments ::= (keyword_item | \"**\" expression) (\",\" keyword_item | \",\" \"**\" expression)* keyword_item ::= identifier \"=\" expression An optional trailing comma may be present after positional and keyword arguments but does not affect the semantics. The primary must evaluate to a callable object (user-defined functions, built-in functions, built-in object methods, class objects, class instance methods, and any object with a __call__() method are callable). All argument expressions are evaluated before attempting the call. Please refer to the Function Definitions section for the syntax of formal parameter lists. If keyword arguments are present, they are first converted into positional arguments as follows. First, a list of unfilled slots is created for the formal parameters. If there are N positional arguments, they are placed in the first N slots. Then, for each keyword argument, the identifier is used to determine the corresponding slot. If the slot is already filled, a TypeError exception is raised. Otherwise, the argument is placed in the slot, filling it (even if the expression is None, it fills the slot). When all arguments have been processed, any slots that are still empty are filled with the default value from the function definition. If there are unfilled slots for which no default value is specified, a TypeError exception is raised. Otherwise, the list of filled slots is used as the argument list for the call. Implementation Details in AVAP In AVAP, variables are stored as strings, and lists and objects are managed using specific commands: Lists: To generate a list, use variableToList(variable, list). To retrieve an item from the list, use itemFromList(list, index, variable_to_store_item). To get the number of items in the list, use getListLen(list, var_to_store_list_length). Objects (dictionaries): An object is created with AddvariableToJSON(key, value, object_variable). To retrieve a key from the object, use variableFromJSON(object_variable, key, var_to_store_key_value). Usage Example Creation and management of lists: // Creating a list variableToList(\"item1\", \"myList\") variableToList(\"item2\", \"myList\") variableToList(\"item3\", \"myList\") // Retrieving an item from the list itemFromList(\"myList\", 1, \"myVariable\") // Getting the length of the list getListLen(\"myList\", \"listLength\") Creation and management of objects (dictionaries): // Creating an object AddvariableToJSON(\"key1\", \"value1\", \"myObject\") AddvariableToJSON(\"key2\", \"value2\", \"myObject\") // Retrieving a value by key from the object variableFromJSON(\"myObject\", \"key1\", \"myVariable\") In this way, lists and objects in AVAP can be manipulated using the specific functions provided for working with variables stored as strings.'),\n", + " 0.8155183),\n", + " (Document(metadata={'source': '3_Notation.txt'}, page_content='Chapter 2: Notation in AVAP™ Introduction Notation in AVAP™ refers to the conventions and rules used to write and format code in the AVAP™ programming language. Notation is essential to ensure code readability and comprehension, as well as to establish a coherent and consistent syntax across all projects. General Conventions In AVAP™, several general notation conventions are followed, similar to those used in other programming languages like Python. Some of these conventions include: Indentation: Code is structured through indentation, using white spaces or tabs to indicate the hierarchy and structure of the code. It is recommended to use four spaces for each level of indentation. Case Sensitivity: AVAP™ is case-sensitive, meaning that identifiers, variable names, and keywords must be consistently written using the same capitalization format throughout the code. Comments: Comments are used to document the code and explain its functionality. Single-line comments begin with the // symbol, while multi-line comments start with /* and end with */. Specific Notation Rules In addition to general conventions, AVAP™ follows specific notation rules for different elements of the language, including: Variables: Variable names should be descriptive and meaningful, using lowercase letters and underscores to separate words if necessary for readability (e.g., variable_name). Functions: Function names should follow the same conventions as variables, with the addition of parentheses to indicate function parameters (e.g., function_name(parameter1, parameter2)). Constants: Constants are typically written in uppercase letters with underscores separating words (e.g., EXAMPLE_CONSTANT). The descriptions of lexical analysis and syntax use a modified Backus–Naur form (BNF) grammar notation. This uses the following style of definition: ::= ::= | ::= | | ::= \"addVar(\" \",\" \")\" ::= \"=\" ::= \"\"\" \"\"\" ::= | ::= | ::= | ::= \" \" ::= | ::= | | ::= | ::= | ::= \"+\" | \"-\" | \"*\" | \"/\" ::= ::= any character except `\" ` and `\\\\` ::= \"a\" | \"b\" | \"c\" | \"d\" | \"e\" | \"f\" | \"g\" | \"h\" | \"i\" | \"j\" | \"k\" | \"l\" | \"m\" | \"n\" | \"o\" | \"p\" | \"q\" | \"r\" | \"s\" | \"t\" | \"u\" | \"v\" | \"w\" | \"x\" | \"y\" | \"z\" | \"A\" | \"B\" | \"C\" | \"D\" | \"E\" | \"F\" | \"G\" | \"H\" | \"I\" | \"J\" | \"K\" | \"L\" | \"M\" | \"N\" | \"O\" | \"P\" | \"Q\" | \"R\" | \"S\" | \"T\" | \"U\" | \"V\" | \"W\" | \"X\" | \"Y\" | \"Z\" | \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" | \"_\" ::= \"0\" | \"1\" | \"2\" | \"3\" | \"4\" | \"5\" | \"6\" | \"7\" | \"8\" | \"9\" Explanation: : A program is a list of statements. : A list of statements can be a single statement or a statement followed by another list of statements. : A statement can be a global assignment, a local assignment, or a command. : A global assignment follows the format addVar(\\'value\\', variable_name). : A local assignment follows the Python syntax variable_name = value. : A string value is enclosed in double quotes and contains string content. : The content of a string can be a string part or a string part followed by more string content. : A string part can be literal text or a variable reference. : Text is a series of characters. : A variable reference follows the format $ variable . : A variable name can be a letter or a combination of letters. : A value can be a string value, a number, or an expression. : A number can be a digit or a series of digits. : An expression can be a value or a combination of two values with an operator. : An operator can be +, -, *, or /. : A command can be any valid command syntax. : A character can be any character except double quotes and the backslash. : A letter can be an alphabetical character, a digit, or an underscore. : A digit is a number from 0 to 9. This BNF notation covers the assignment of global and local variables, as well as variable substitution in strings. Practical Example // Definition of a variable example_variable = 10 // Definition of a function example_function(parameter): // Function body result = parameter * 2 return result // Function call result = example_function(example_variable) In this example, notation conventions are used to define a variable, a function, and to call the function with a parameter. Conclusions Notation in AVAP™ is a fundamental part of software development in the language. By following clear and consistent notation conventions, developers can write and maintain code more effectively, contributing to the readability, understanding, and maintainability of the code in projects of any size and complexity. With this understanding of notation in AVAP™, developers can write clean and structured code that is easy to understand and maintain over time.'),\n", + " 0.8048278),\n", + " (Document(metadata={'source': '9_Expressions_in_avap.txt'}, page_content=\"Expressions in AVAP™ Introduction Expressions in AVAP™ are combinations of values, variables, operators, and function calls that can be evaluated to produce a result. Just like in Python, expressions in AVAP™ can be simple or complex, and they can contain a variety of elements that manipulate and process data. Types of Expressions In AVAP™, as in Python, there are several types of expressions that can be used to perform different operations and calculations. Some of the most common types of expressions include: Arithmetic: Perform mathematical operations such as addition, subtraction, multiplication, and division. Logical: Evaluate logical conditions and return boolean values, such as True or False. Comparative: Compare two values and return a result based on their relationship, such as equality, inequality, greater than, less than, etc. Assignment: Assign a value to a variable. Function Calls: Invoke functions and methods to perform specific tasks. Operators In AVAP™, as in Python, expressions can include a variety of operators that perform specific operations on data. Some of the most common operators include: Arithmetic: +, -, *, /, %, etc. Logical: and, or, not. Comparative: ==, !=, >, <, >=, <=, etc. Assignment: =, +=, -=, *=, /=, etc. Working with Lists Lists are a very versatile data structure in AVAP™ that allows you to store collections of elements of different types. Expressions in AVAP™ can involve operations and manipulations of lists, such as accessing individual elements, concatenation, searching, deletion, and more. // Definition of a list my_list = [1, 2, 3, 4, 5] // Accessing individual elements first_element = my_list[0] // Output: 1 // Concatenation of lists another_list = [6, 7, 8] combined_list = my_list + another_list // Output: [1, 2, 3, 4, 5, 6, 7, 8] // Length of a list length = len(my_list) // Output: 5 // Searching in a list is_present = 5 in my_list // Output: True // Removing elements my_list.remove(3) // Removes the element 3 from the list Practical Example Below is a practical example that illustrates the use of expressions in AVAP™ with lists: // Definition of a list of numbers numbers = [1, 2, 3, 4, 5] // Calculation of the sum of the elements total = sum(numbers) // Output: 15 // Checking if a number is present in the list is_present = 6 in numbers // Output: False Conclusions Expressions in AVAP™ are a fundamental part of programming, allowing for a wide variety of data operations and manipulations. By understanding the different types of expressions and operators, as well as working with data structures such as lists, developers can write clear and effective code that meets the program's requirements.\"),\n", + " 0.7978314),\n", + " (Document(metadata={'source': '10.3_Statements.txt'}, page_content='Simple Statements In AVAP, a simple statement consists of a single logical line. Multiple simple statements can be placed on a single line, separated by semicolons. The syntax for simple statements is: simple_stmt ::= expression_stmt | assert_stmt | assignment_stmt | augmented_assignment_stmt | annotated_assignment_stmt | pass_stmt | del_stmt | return_stmt | yield_stmt | raise_stmt | break_stmt | continue_stmt | import_stmt | future_stmt | global_stmt | nonlocal_stmt | type_stmt Here’s a brief overview of each type of simple statement: Expression Statement (expression_stmt): Executes an expression, which can be used for operations or calling functions. Assert Statement (assert_stmt): Used for debugging purposes to test conditions. Assignment Statement (assignment_stmt): Assigns values to variables or data structures. Augmented Assignment Statement (augmented_assignment_stmt): Performs an operation on a variable and assigns the result back to the variable (e.g., x += 1). Annotated Assignment Statement (annotated_assignment_stmt): Used for assigning values with annotations (e.g., type hints). Pass Statement (pass_stmt): A placeholder that does nothing; used for syntactic requirements. Del Statement (del_stmt): Deletes variables, items, or attributes. Return Statement (return_stmt): Exits a function and optionally returns a value. Yield Statement (yield_stmt): Produces a value from a generator function. Raise Statement (raise_stmt): Raises exceptions for error handling. Break Statement (break_stmt): Exits the closest enclosing loop. Continue Statement (continue_stmt): Skips the current iteration of the closest enclosing loop. Import Statement (import_stmt): Imports modules or specific components from modules. Future Statement (future_stmt): Enables features from future versions of Python. Global Statement (global_stmt): Declares variables as global within a function. Nonlocal Statement (nonlocal_stmt): Declares variables as non-local, affecting scope in nested functions. Type Statement (type_stmt): Declares or checks types (e.g., type hints). Each simple statement performs a specific task and contributes to the overall functionality of the AVAP program. Expression Statements Expression statements are used (mostly interactively) to compute and write a value, or (usually) to call a method (a function that does not return a meaningful result; in Python, methods return the value None). Other uses of expression statements are allowed and occasionally useful. The syntax for an expression statement is: expression_stmt ::= starred_expression An expression statement evaluates the list of expressions (which can be a single expression). In interactive mode, if the value is not None, it is converted to a string using the built-in function repr(), and the resulting string is written to the standard output on a line by itself (except if the result is None, in which case the called procedure produces no output). Assignment Statements Assignment statements in AVAP are used to (re)assign names to values and to modify attributes or elements of mutable objects. Here is the syntax: assignment_stmt ::= (target_list \"=\")+ (starred_expression | yield_expression) target_list ::= target (\",\" target)* [\",\"] target ::= identifier | \"(\" [target_list] \")\" | \"[\" [target_list] \"]\" | attributeref | subscription | slicing | \"*\" target Here\\'s a breakdown of how assignment statements work: Assignment Operation: An assignment statement evaluates the list of expressions and assigns the single resulting object to each of the target lists, from left to right. Recursive Definition: The assignment operation is defined recursively depending on the form of the target list. Target List: If the target list is a single object without ending in a comma, the object is assigned to that target. If the list contains a target prefixed with an asterisk, the object must be iterable with at least as many elements as targets, minus one. Elements before the starred target are assigned to the respective targets, and the remaining elements are assigned to the starred target. Single Target: If the target is an identifier (name), it is bound to the object in the current local namespace. For other targets, names are bound in the global or enclosing namespace, depending on `nonlocal`. Attribute Reference: If the target is an attribute reference, the primary expression is evaluated. It must produce an object with assignable attributes. Subscription: If the target is a subscription, the primary expression is evaluated to produce a mutable sequence or mapping object, which is then used to assign the value. Slice: If the target is a slice, the primary expression is evaluated, and the sequence object is requested to replace the slice with the assigned sequence elements. In summary, assignment statements in AVAP are crucial for assigning values to variables and modifying data structures effectively. Return Statement The return statement in AVAP is used to return the value of a desired variable from a function. Here is the syntax: return(variable_to_return): Here is an overview of how the return statement works: Function Context: The return statement can only occur within a function definition, not inside a nested class definition. Variable Evaluation: If a variable is provided, it is evaluated. If no variable is specified, None is used by default. Function Exit: The return statement exits the current function call and returns the specified value. Interaction with try-finally: When the return statement is executed within a try statement that has a finally clause, the finally clause is executed before the function exits. Generator Functions: In generator functions, the return statement indicates the end of the generator. It causes a StopIteration exception to be raised, with the returned value (if any) used to construct the StopIteration exception and set as the StopIteration.value attribute. The return statement is a fundamental part of functions and generators, allowing for the output of values and proper function termination. Raise Statement In AVAP, the raise statement is used to throw an exception. The syntax for the raise statement is as follows: raise [expression [\"from\" expression]] If no expressions are present, raise re-raises the currently handled exception, also known as the active exception. If there is no active exception, a RuntimeError is raised indicating that it is an error. Otherwise, raise evaluates the first expression as the exception object. It must be a subclass or an instance of BaseException. If it is a class, the exception instance is obtained when needed by creating an instance of the class without arguments. The type of the exception is the instance of the exception class, and the value is the instance itself. The from clause is used for exception chaining: if provided, the second expression must be another class or instance of exception. If the second expression is an exception instance, it will be attached to the raised exception as the __cause__ attribute (which is modifiable). If the expression is an exception class, the class will be instantiated and the resulting exception instance will be attached to the raised exception as the __cause__ attribute. If the raised exception is not handled, both exceptions will be printed. startLoop() try: print(1 / 0) except Exception as exc: raise RuntimeError(\"Something went wrong\") from exc endLoop() A mechanism works implicitly if a new exception is raised while an exception is already being handled. An exception may be handled by an except or finally clause, or a with statement. The previous exception is then attached as the new exception’s __context__ attribute: startLoop() try: print(1 / 0) except: raise RuntimeError(\"Something went wrong\") from None endLoop() Exception chaining can be explicitly suppressed by specifying None in the from clause: startLoop() try: print(1 / 0) except: raise RuntimeError(\"Something went wrong\") from None endLoop() Break Statement In AVAP, the break statement is used to terminate the closest enclosing loop. The syntax for the break statement is as follows: break When a break statement is encountered, it causes the loop to exit immediately, regardless of the loop\\'s condition or any remaining iterations. This effectively transfers control to the statement following the loop. The break statement is typically used within for or while loops to provide a way to exit the loop prematurely based on a certain condition. for i in range(10): if i == 5: break print(i) print(\"Loop ended\") In this example, the loop will terminate when i equals 5, and \"Loop ended\" will be printed. The numbers 0 through 4 will be printed before the loop is exited. Break Statement The break statement in AVAP is used to terminate the closest enclosing loop. Here is an overview of its behavior: Usage Context: The break statement can only occur within a for or while loop. It cannot be nested within a function or class definition inside that loop. Loop Termination: It terminates the closest enclosing loop and skips the optional else clause if the loop has one. Loop Control Target: If a for loop is terminated by break, the loop control target retains its current value. Interaction with try-finally: When break is executed within a try statement with a finally clause, the finally clause is executed before actually exiting the loop. The break statement is essential for controlling loop execution, allowing for early exit from loops and proper handling of loop cleanup. Continue Statement In AVAP, the continue statement is used to proceed with the next iteration of the closest enclosing loop. The syntax for the continue statement is as follows: continue The continue statement can only syntactically occur nested within a for or while loop, but not within a function or class definition inside that loop. When continue is used within a loop that is also handling exceptions with a try statement containing a finally clause, the finally clause is executed before the next iteration of the loop begins. for i in range(10): try: if i % 2 == 0: continue print(i) finally: print(\"In finally clause\") print(\"Loop ended\") In this example, the continue statement will skip the current iteration when i is even, but before moving to the next iteration, the finally clause will print \"In finally clause.\" For odd numbers, the loop will print the number and then \"In finally clause.\" After the loop finishes, \"Loop ended\" will be printed. Import Statement In AVAP, the import statement is used to import an entire code file and define names in the local namespace. The syntax for the import statement is as follows: import file.avap The import statement in AVAP imports an entire code file and makes it available in the local namespace. No alias is assigned to the imported file; the file is simply referred to by its name. For example: # In the \\'module.avap\\' file example_variable = 10 # In the main file import module.avap print(module.avap.example_variable) # Will print 10 In this example, the main file imports the module.avap file and can access the example_variable defined in that file using the module.avap syntax. Compound Statements In AVAP, compound statements contain (groups of) other statements; these affect or control the execution of those other statements in some way. In general, compound statements span multiple lines, though in simpler representations a complete compound statement might be contained within a single line. if statements implement traditional flow control constructs. match specifies matching patterns for variable values. Function and class definitions are also syntactically compound statements. A compound statement consists of one or more \"clauses.\" A clause consists of a header and a \"suite.\" The clause headers of a particular compound statement are all at the same level of indentation. Each clause header begins with a uniquely identifying keyword and ends with a colon. A suite is a group of statements controlled by a clause. A suite can be one or more simple statements separated by semicolons on the same line as the header, following the colon of the header, or it can be one or more statements indented on subsequent lines. Only the latter form of a suite can contain nested compound statements. Control Flow Structures in AVAP In AVAP, control flow structures include conditional statements and loops, which allow you to control the flow of execution based on conditions and iterate over a range of values. If Statements The syntax for an if statement in AVAP is: if (variable, variableValue, comparator, expression): code to execute This structure checks if the condition (variable compared to variableValue with the given comparator) is true, and if so, executes the block of code. Loops The syntax for a loop in AVAP is: startLoop(variable, from, to) code to execute endLoop() This structure initiates a loop where the variable iterates from the \\'from\\' value to the \\'to\\' value, executing the code block for each iteration. The if Statement The if statement in AVAP is used for conditional execution. The syntax is as follows: if (variable, variableValue, comparator, expression): code to execute This statement evaluates the condition specified by the variable, variableValue, comparator, and expression. It selects exactly one of the suites (blocks of code) by evaluating the expressions one by one until a true condition is found. The corresponding suite is then executed. If all conditions are false, no suites are executed. The try Statement The try statement in AVAP specifies exception handlers and/or cleanup code for a block of statements. The syntax is as follows: try(): code to execute except(): code to execute The try block contains code that might raise an exception. The except block contains code to handle exceptions raised by the try block. If an exception occurs, control is transferred to the except block. If no exception occurs, the except block is skipped. Additional information about exceptions can be found in the section Exceptions, and information about using the raise statement to throw exceptions can be found in the section The raise Statement.'),\n", + " 0.7883588),\n", + " (Document(metadata={'source': '10.4_Patterns.txt'}, page_content='Patterns in AVAP In AVAP, patterns provide a powerful way to match and destructure values. Patterns can be used in match statements to perform complex value comparisons and deconstructions. Here is a description of the available patterns and how they are used: Literal Patterns: Match specific literal values such as numbers, strings, or booleans. For example: match value: case 10: # Code to execute if value is 10 case \"hello\": # Code to execute if value is \"hello\" Variable Patterns: Capture the value of a variable. This allows you to use the matched value in the corresponding case block: match value: case x: # Code to execute, x will be assigned the value Sequence Patterns: Match sequences like lists or tuples. You can also use the * operator to capture remaining elements: match value: case [1, 2, *rest]: # Code to execute, rest will capture any additional elements Mapping Patterns: Match dictionaries or similar mappings by specifying keys and their corresponding patterns: match value: case \"key\": 42: # Code to execute if the dictionary has \"key\" with value 42 Class Patterns: Match instances of classes. You can also match specific attributes within the instance: match value: case MyClass(attr1=42): # Code to execute if value is an instance of MyClass with attr1 equal to 42 Patterns in AVAP offer a flexible approach for handling different kinds of data structures and values, making it easier to write expressive and maintainable code. OR Patterns An OR pattern in AVAP allows you to specify multiple patterns separated by vertical bars (|). The OR pattern attempts to match each of its subpatterns with the subject value in order. If any of the subpatterns match, the OR pattern is considered successful. If none of the subpatterns match, the OR pattern fails. or_pattern ::= \"|\".closed_pattern+ Here\\'s how you can use OR patterns in practice: match value: case 1 | 2 | 3: # Code to execute if value is 1, 2, or 3 case \"hello\" | \"world\": # Code to execute if value is \"hello\" or \"world\" case _: # Code to execute if value does not match any of the above In this example: The first case will match if value is either 1, 2, or 3. The second case will match if value is either \"hello\" or \"world\". The last case is a catch-all pattern that will execute if none of the previous patterns match. OR patterns provide a concise way to handle multiple possible values or types, simplifying pattern matching and making your code more readable. AS Patterns An AS pattern in AVAP is used to bind an OR pattern to a name. This allows you to match a value with an OR pattern and simultaneously capture it under a specified name for further use. The syntax for an AS pattern is: as_pattern ::= or_pattern \"as\" capture_pattern When an AS pattern is used, if the OR pattern succeeds, the subject is bound to the name specified by the capture pattern, and the AS pattern itself succeeds. Here\\'s an example of how to use AS patterns: match value: case 1 | 2 | 3 as x: print(f\"Matched a number: x\") case \"hello\" | \"world\" as greeting: print(f\"Matched a greeting: greeting\") case _: print(\"No match\") In this example: The first case matches if value is 1, 2, or 3. The matched value is bound to the name x, which is then used in the print statement. The second case matches if value is \"hello\" or \"world\". The matched value is bound to the name greeting, which is then used in the print statement. The last case is a catch-all pattern that executes if none of the previous patterns match. AS patterns are useful for capturing matched values under a name while using OR patterns, allowing for more flexible and readable pattern matching in your code. Literal Patterns In AVAP, literal patterns are used to match specific literal values, such as numbers, strings, or boolean values. The syntax for a literal pattern is: literal_pattern ::= signed_number | strings | \"None\" | \"True\" | \"False\" A literal pattern only succeeds if the value of the subject is equal to the specified literal value. Here are examples of literal patterns and their usage: match value: case 42: print(\"Matched the number 42\") case \"hello\": print(\"Matched the string \\'hello\\'\") case None: print(\"Matched None\") case True: print(\"Matched True\") case False: print(\"Matched False\") case _: print(\"No match\") In this example: case 42: matches if value is exactly 42. case \"hello\": matches if value is the string \"hello\". case None: matches if value is None. case True: matches if value is True. case False: matches if value is False. case _: is a catch-all pattern that executes if none of the previous patterns match. Literal patterns are useful for matching specific, known values and are a fundamental part of pattern matching in AVAP. Capture Patterns In AVAP, capture patterns are used to bind the subject\\'s value to a name. The syntax for a capture pattern is: capture_pattern ::= NAME Capture patterns always succeed and bind the value of the subject to the specified name. Here’s how you might use capture patterns in AVAP: match value: case x: print(f\"Captured value: x\") In this example: case x: captures whatever value is in value and binds it to the name x. The pattern always succeeds. Capture patterns are useful when you want to extract and use the value of the subject within your code, regardless of what that value is. Wildcard Patterns In AVAP, wildcard patterns are used to match any value without binding it to a name. The syntax for a wildcard pattern is: wildcard_pattern ::= \\'_\\' Wildcard patterns always succeed and do not create any bindings. They are useful when you want to ignore the value of the subject and only care about whether it matches a certain pattern. Here’s how you might use wildcard patterns in AVAP: match value: case _: print(\"Matched any value\") In this example: case _: matches any value and does not bind it to a name. The pattern always succeeds, and the code within this case will be executed regardless of the value. Wildcard patterns are particularly useful when you need to handle a broad range of possibilities and are only interested in whether a value fits a general condition, not in the value itself. Value Patterns In AVAP, value patterns are used to match specific values. The syntax for a value pattern is: value_pattern ::= attr Value patterns only succeed if the subject\\'s value matches the specified value. They are useful when you want to perform actions based on an exact value. Here’s how you might use value patterns in AVAP: match value: case 42: print(\"Matched the value 42\") case \"hello\": print(\"Matched the string \\'hello\\'\") case _: print(\"Matched something else\") In this example: case 42: matches the value 42 specifically. case \"hello\": matches the string \"hello\" specifically. case _: matches any other value not covered by the previous cases. Value patterns are ideal for scenarios where you need to check for specific values and respond accordingly. They provide precise control over the matching process. Group Patterns In AVAP, group patterns are used to group multiple patterns together. The syntax for a group pattern is: group_pattern ::= \"(\" pattern \")\" Group patterns are useful when you want to combine patterns or when patterns need to be evaluated together. They have the same effect as the pattern they contain but allow for more complex pattern structures. Here’s an example of how to use group patterns in AVAP: match value: case (42 | 43): print(\"Matched either 42 or 43\") case (name, age) if age > 18: print(f\" is an adult\") case _: print(\"Matched something else\") In this example: case (42 | 43): uses a group pattern to match either the value 42 or 43. case (name, age) if age > 18: uses a group pattern to match a tuple and includes an additional condition on the age. case _: matches any other value not covered by the previous cases. Group patterns are ideal for creating more complex matching scenarios where patterns need to be combined or grouped together. Sequence Patterns In AVAP, sequence patterns are used to match elements within sequences like lists or tuples. The syntax for sequence patterns is: sequence_pattern ::= \"[\" [maybe_sequence_pattern] \"]\" | \"(\" [open_sequence_pattern] \")\" Sequence patterns can match elements of sequences based on specific rules. Here’s how they work: List Patterns: Use square brackets [ ] to match lists. You can include patterns for the elements within the list. case [a, b, c]: print(\"Matched a list with three elements\") Tuple Patterns: Use parentheses ( ) to match tuples. Similarly, you can specify patterns for the tuple elements. case (x, y): print(\"Matched a tuple with two elements\") Sequence patterns allow for flexible and powerful matching of sequence types. They can match sequences of various lengths and structures by defining the pattern for each element. Here’s an example of using sequence patterns in a match statement: match value: case [1, 2, 3]: print(\"Matched a list with elements 1, 2, 3\") case (a, b, c) if a + b == c: print(\"Matched a tuple where a + b equals c\") case _: print(\"Matched something else\") In this example: case [1, 2, 3]: matches a list with exactly the elements 1, 2, and 3. case (a, b, c) if a + b == c: matches a tuple and includes a condition to check if a + b equals c. case _: matches any other value not covered by the previous cases. Mapping Patterns In AVAP, mapping patterns are used to match mapping elements, such as dictionaries. Here is the syntax and behavior of mapping patterns: mapping_pattern ::= { [items_pattern] } Mapping Patterns are designed to match elements within mappings, such as dictionaries. They use specific rules to determine if a pattern matches the given mapping. Syntax: Mapping patterns are enclosed in curly braces { ... }. The items_pattern specifies the pattern for the mapping items. Matching Rules: The rules for matching mapping patterns include checking for key-value pairs in the mapping and ensuring they align with the specified pattern. Usage: Mapping patterns are useful for destructuring dictionaries and other mapping types in a concise manner. Mapping patterns enhance pattern matching capabilities by allowing for specific and flexible matching of dictionary elements. Class Patterns In AVAP, class patterns are used to match instances of specific classes. Here is a detailed overview: class_pattern ::= name \"(\" [pattern_arguments \",\"?] \")\" Pattern Syntax: A class pattern specifies the class name followed by a parenthesized list of pattern_arguments. The pattern matches instances of the specified class. Matching Instances: The pattern will match if the subject is an instance of the specified class and the pattern_arguments (if any) match according to the rules defined for the pattern. Usage: Class patterns are useful for deconstructing objects based on their class and extracting values from them, enabling more precise pattern matching. These patterns provide a way to work with objects based on their class type and structure, facilitating more sophisticated pattern matching and value extraction.'),\n", + " 0.78715813)]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = doc_vector_store.similarity_search_with_score(\n", + " query=query,\n", + " k=5\n", + ")\n", + "\n", + "results" + ] + }, + { + "cell_type": "markdown", + "id": "9bf8e75a", + "metadata": {}, + "source": [ + "## Code" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "eeac86ce", + "metadata": {}, + "outputs": [], + "source": [ + "code_vector_store = ElasticsearchStore(\n", + " es_url=ES_URL,\n", + " index_name=CODE_INDEX,\n", + " embedding=embeddings,\n", + " query_field=\"text\",\n", + " vector_query_field=\"vector\",\n", + ")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "9297af72", + "metadata": {}, + "outputs": [], + "source": [ + "base_retriever = code_vector_store.as_retriever(\n", + " search_type=\"similarity\",\n", + " search_kwargs={\"k\": 5}\n", + " ) " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c588ea2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Result 1 ---\n", + "Title: Conector a Terceros (Ej. Slack)\n", + "Score: 0.835321\n", + "Content:\n", + "slack_api = avapConnector(\"U0xBQ0tfQVBJX1RPS0VO\")\n", + "\n", + "--- Result 2 ---\n", + "Title: Instanciación de Conector\n", + "Score: 0.811696\n", + "Content:\n", + "mi_db = avapConnector(\"VE9LRU5fREVCX0RFU0FSUk9MTE8=\")\n", + "\n", + "--- Result 3 ---\n", + "Title: Hola Mundo\n", + "Score: 0.765275\n", + "Content:\n", + "addVar(mensaje, \"Hola mundo desde AVAP\")\n", + "addResult(mensaje)\n", + "\n", + "--- Result 4 ---\n", + "Title: Salida de Bucle Correcta (Uso de Variable de Control)\n", + "Score: 0.743226\n", + "Content:\n", + "encontrado = False\n", + "startLoop(i, 1, 10)\n", + "if(i, 5, \"==\")\n", + "encontrado = True\n", + "// En AVAP para salir puedes forzar el índice al final\n", + "i = 11\n", + "end()\n", + "endLoop()\n", + "addResult(encontrado)\n", + "\n", + "--- Result 5 ---\n", + "Title: Referencia por Valor ($)\n", + "Score: 0.730708\n", + "Content:\n", + "addVar(base, 1000)\n", + "addVar(copia, $base) // copia toma el valor 1000, no la cadena \"$base\"\n", + "addResult(copia)\n" + ] + } + ], + "source": [ + "results = code_vector_store.similarity_search_with_score(\n", + " query=query,\n", + " k=5\n", + ")\n", + "\n", + "pretty_print_results(results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cb8ba6a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "assistance-engine", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}