diff --git a/scripts/pipelines/tasks/chunk.py b/scripts/pipelines/tasks/chunk.py
index 06af90a..eaef395 100644
--- a/scripts/pipelines/tasks/chunk.py
+++ b/scripts/pipelines/tasks/chunk.py
@@ -4,7 +4,7 @@ from dataclasses import replace
 from pathlib import Path
 from typing import Any, Union
 
-from lark import Lark
+from lark import Lark, Tree
 from chonkie import (
     Chunk,
     ElasticHandshake,
@@ -21,6 +21,70 @@ from transformers import AutoTokenizer
 from scripts.pipelines.tasks.embeddings import OllamaEmbeddings
 from src.config import settings
 
+COMMAND_METADATA_NAMES = {
+    # system
+    "register_cmd": "registerEndpoint",
+    "addvar_cmd": "addVar",
+    "addparam_cmd": "addParam",
+    "getlistlen_cmd": "getListLen",
+    "getparamlist_cmd": "getQueryParamList",
+    "addresult_cmd": "addResult",
+
+    # async
+    "go_stmt": "go",
+    "gather_stmt": "gather",
+
+    # connector
+    "connector_instantiation": "avapConnector",
+
+    # http
+    "req_post_cmd": "RequestPost",
+    "req_get_cmd": "RequestGet",
+
+    # db
+    "orm_direct": "ormDirect",
+    "orm_check": "ormCheckTable",
+    "orm_create": "ormCreateTable",
+    "orm_select": "ormAccessSelect",
+    "orm_insert": "ormAccessInsert",
+    "orm_update": "ormAccessUpdate",
+
+    # util
+    "json_list_cmd": "json_list_ops",
+    "crypto_cmd": "crypto_ops",
+    "regex_cmd": "getRegex",
+    "datetime_cmd": "getDateTime",
+    "stamp_cmd": "timestamp_ops",
+    "string_cmd": "randomString",
+    "replace_cmd": "replace",
+
+    # modularity
+    "include_stmt": "include",
+    "import_stmt": "import",
+
+    # generic statements
+    "assignment": "assignment",
+    "call_stmt": "call",
+    "return_stmt": "return",
+    "if_stmt": "if",
+    "loop_stmt": "startLoop",
+    "try_stmt": "try",
+    "function_decl": "function",
+}
+
+
+def _extract_command_metadata(ast: Tree | None) -> dict[str, bool]:
+    if ast is None:
+        return {}
+
+    used_commands: set[str] = set()
+
+    for subtree in ast.iter_subtrees():
+        if subtree.data in COMMAND_METADATA_NAMES:
+            used_commands.add(COMMAND_METADATA_NAMES[subtree.data])
+
+    return {command_name: True for command_name in sorted(used_commands)}
+
 
 def _get_text(element) -> str:
     for attr in ("text", "content", "markdown"):
@@ -168,60 +232,70 @@ def fetch_documents(docs_folder_path: str, docs_extension: list[str]) -> list[Pa
     return docs_path
 
 
-def process_documents(docs_path: list[Path]) -> list[dict[str, Chunk | dict[str, Any]]]:
+def process_documents(docs_path: list[Path]) -> list[dict[str, Any]]:
     """
     Process documents by applying appropriate chefs and chunking strategies based on file type.
 
     Args:
-        docs_path (list[Path]): List of Paths to the documents to be processed
+        docs_path: List of Paths to the documents to be processed.
 
     Returns:
-        List of dicts with "chunk" (Chunk object) and "metadata" (dict with file info)
+        List of dicts with "chunk" (Chunk object) and "extra_metadata" (dict with file info).
     """
     processed_docs = []
-    specific_metadata = {}
     custom_tokenizer = AutoTokenizer.from_pretrained(settings.hf_emb_model_name)
 
     chef_md = MarkdownChef(tokenizer=custom_tokenizer)
     chef_txt = TextChef()
     chunker = TokenChunker(tokenizer=custom_tokenizer)
-    with open(settings.proj_root / "docs/BNF/avap.lark") as grammar:
-       lark_parser = Lark(grammar=grammar, parser="lalr", propagate_positions=True, start="program")
+
+    with open(settings.proj_root / "research/code_indexing/BNF/avap.lark", encoding="utf-8") as grammar:
+        lark_parser = Lark(
+            grammar.read(),
+            parser="lalr",
+            propagate_positions=True,
+            start="program",
+        )
 
     for doc_path in docs_path:
         doc_extension = doc_path.suffix.lower()
-        
+
         if doc_extension == ".md":
             processed_doc = chef_md.process(doc_path)
             fused_doc = _merge_markdown_document(processed_doc)
             chunked_doc = fused_doc.chunks
             specific_metadata = {
                 "file_type": "avap_docs",
-                "filename": doc_path.name
+                "filename": doc_path.name,
             }
 
         elif doc_extension == ".avap":
             processed_doc = chef_txt.process(doc_path)
-            chunked_doc = chunker.chunk(processed_doc.content)
+
             try:
                 ast = lark_parser.parse(processed_doc.content)
             except Exception as e:
                 logger.error(f"Error parsing AVAP code in {doc_path.name}: {e}")
                 ast = None
+
+            chunked_doc = chunker.chunk(processed_doc.content)
+
             specific_metadata = {
                 "file_type": "avap_code",
                 "filename": doc_path.name,
-                "AST": str(ast)
+                **_extract_command_metadata(ast),
             }
 
         else:
             continue
 
-        for chunk in chunked_doc:            
-            processed_docs.append({
-                "chunk": chunk,
-                "extra_metadata": {**specific_metadata}
-            })
+        for chunk in chunked_doc:
+            processed_docs.append(
+                {
+                    "chunk": chunk,
+                    "extra_metadata": {**specific_metadata},
+                }
+            )
 
     return processed_docs