{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "9d524159", "metadata": {}, "outputs": [], "source": [ "import pprint\n", "\n", "from llama_cpp.llama import Llama, LlamaGrammar\n", "\n", "from src.config import settings" ] }, { "cell_type": "code", "execution_count": 2, "id": "330f1975", "metadata": {}, "outputs": [], "source": [ "bnf = []\n", "\n", "for file_path in (settings.proj_root / \"ingestion/code/BNF/\").glob(\"*.txt\"):\n", " with file_path.open(\"r\", encoding=\"utf-8\") as file:\n", " bnf.append(file.read())\n", "\n", "bnf = \"\\n\".join(bnf)" ] }, { "cell_type": "code", "execution_count": 3, "id": "68887173", "metadata": {}, "outputs": [], "source": [ "grammar_string = r'''\n", "root ::= program\n", "\n", "program ::= (line | block-comment)*\n", "\n", "line ::= statement-with-comment eol | trailing-comment eol | eol\n", "statement-with-comment ::= statement trailing-comment? | statement?\n", "trailing-comment ::= line-comment | doc-comment\n", "\n", "eol ::= \"\\r\\n\" | \"\\n\"\n", "\n", "doc-comment ::= \"///\" any-text\n", "line-comment ::= \"//\" any-text\n", "block-comment ::= \"/*\" block-comment-body \"*/\"\n", "\n", "any-text ::= any-text-char*\n", "any-text-char ::= [^\\r\\n]\n", "\n", "block-comment-body ::= block-comment-part*\n", "block-comment-part ::= [^*] | \"*\" [^/]\n", "\n", "statement ::= assignment | method-call-stmt | function-call-stmt | function-decl | return-stmt | system-command | io-command | control-flow | async-command | connector-cmd | db-command | http-command | util-command | modularity-cmd\n", "\n", "assignment ::= identifier \"=\" expression\n", "\n", "function-call-stmt ::= identifier \"(\" argument-list? \")\"\n", "method-call-stmt ::= identifier \"=\" identifier \".\" identifier \"(\" argument-list? \")\"\n", "\n", "system-command ::= register-cmd | addvar-cmd\n", "register-cmd ::= \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" list-display \",\" stringliteral \",\" identifier \",\" identifier \")\"\n", "addvar-cmd ::= \"addVar(\" addvar-arg \",\" addvar-arg \")\"\n", "addvar-arg ::= identifier | literal | \"$\" identifier\n", "\n", "system-variable ::= \"_status\"\n", "\n", "io-command ::= addparam-cmd | getlistlen-cmd | addresult-cmd | getparamlist-cmd\n", "addparam-cmd ::= \"addParam(\" stringliteral \",\" identifier \")\"\n", "getlistlen-cmd ::= \"getListLen(\" identifier \",\" identifier \")\"\n", "getparamlist-cmd ::= \"getQueryParamList(\" stringliteral \",\" identifier \")\"\n", "addresult-cmd ::= \"addResult(\" identifier \")\"\n", "\n", "control-flow ::= if-stmt | loop-stmt | try-stmt\n", "\n", "if-stmt ::= \"if(\" if-condition \")\" eol block else-clause? \"end()\" eol\n", "else-clause ::= \"else()\" eol block\n", "if-condition ::= if-atom \",\" if-atom \",\" stringliteral | \"None\" \",\" \"None\" \",\" stringliteral\n", "if-atom ::= identifier | literal\n", "\n", "loop-stmt ::= \"startLoop(\" identifier \",\" expression \",\" expression \")\" eol block \"endLoop()\" eol\n", "\n", "try-stmt ::= \"try()\" eol block \"exception(\" identifier \")\" eol block \"end()\" eol\n", "\n", "block ::= line*\n", "\n", "async-command ::= go-stmt | gather-stmt\n", "go-stmt ::= identifier \"=\" \"go\" identifier \"(\" argument-list? \")\"\n", "gather-stmt ::= identifier \"=\" \"gather(\" identifier gather-tail? \")\"\n", "gather-tail ::= \",\" expression\n", "\n", "connector-cmd ::= connector-instantiation | connector-method-call\n", "connector-instantiation ::= identifier \"=\" \"avapConnector(\" stringliteral \")\"\n", "connector-method-call ::= connector-method-assignment? identifier \".\" identifier \"(\" argument-list? \")\"\n", "connector-method-assignment ::= identifier \"=\"\n", "\n", "http-command ::= req-post-cmd | req-get-cmd\n", "req-post-cmd ::= \"RequestPost(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n", "req-get-cmd ::= \"RequestGet(\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n", "\n", "db-command ::= orm-direct | orm-check | orm-create | orm-select | orm-insert | orm-update\n", "orm-direct ::= \"ormDirect(\" expression \",\" identifier \")\"\n", "orm-check ::= \"ormCheckTable(\" expression \",\" identifier \")\"\n", "orm-create ::= \"ormCreateTable(\" expression \",\" expression \",\" expression \",\" identifier \")\"\n", "orm-select ::= \"ormAccessSelect(\" orm-fields \",\" expression orm-select-tail? \",\" identifier \")\"\n", "orm-select-tail ::= \",\" expression\n", "orm-fields ::= \"*\" | expression\n", "orm-insert ::= \"ormAccessInsert(\" expression \",\" expression \",\" identifier \")\"\n", "orm-update ::= \"ormAccessUpdate(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \")\"\n", "\n", "util-command ::= json-list-cmd | crypto-cmd | regex-cmd | datetime-cmd | stamp-cmd | string-cmd | replace-cmd\n", "\n", "json-list-cmd ::= \"variableToList(\" expression \",\" identifier \")\" | \"itemFromList(\" identifier \",\" expression \",\" identifier \")\" | \"variableFromJSON(\" identifier \",\" expression \",\" identifier \")\" | \"AddVariableToJSON(\" expression \",\" expression \",\" identifier \")\"\n", "\n", "crypto-cmd ::= \"encodeSHA256(\" identifier-or-string \",\" identifier \")\" | \"encodeMD5(\" identifier-or-string \",\" identifier \")\"\n", "\n", "regex-cmd ::= \"getRegex(\" identifier \",\" stringliteral \",\" identifier \")\"\n", "\n", "datetime-cmd ::= \"getDateTime(\" stringliteral \",\" expression \",\" stringliteral \",\" identifier \")\"\n", "\n", "stamp-cmd ::= \"stampToDatetime(\" expression \",\" stringliteral \",\" expression \",\" identifier \")\" | \"getTimeStamp(\" stringliteral \",\" stringliteral \",\" expression \",\" identifier \")\"\n", "\n", "string-cmd ::= \"randomString(\" expression \",\" identifier \")\"\n", "\n", "replace-cmd ::= \"replace(\" identifier-or-string \",\" stringliteral \",\" stringliteral \",\" identifier \")\"\n", "\n", "function-decl ::= \"function\" identifier \"(\" param-list? \")\" \"{\" eol block \"}\" eol\n", "param-list ::= identifier (\",\" identifier)*\n", "\n", "return-stmt ::= \"return(\" expression? \")\"\n", "\n", "modularity-cmd ::= include-stmt | import-stmt\n", "include-stmt ::= \"include\" \" \" stringliteral\n", "import-stmt ::= \"import\" \" \" (\"<\" identifier \">\" | stringliteral)\n", "\n", "expression ::= logical-or\n", "\n", "logical-or ::= logical-and (\"or\" logical-and)*\n", "logical-and ::= logical-not (\"and\" logical-not)*\n", "logical-not ::= \"not\" logical-not | comparison\n", "\n", "comparison ::= arithmetic (comp-op arithmetic)*\n", "comp-op ::= \"==\" | \"!=\" | \"<=\" | \">=\" | \"<\" | \">\" | \"in\" | \"is\"\n", "\n", "arithmetic ::= term (add-op term)*\n", "add-op ::= \"+\" | \"-\"\n", "\n", "term ::= factor (mul-op factor)*\n", "mul-op ::= \"*\" | \"/\" | \"%\"\n", "\n", "factor ::= unary-op factor | power\n", "unary-op ::= \"+\" | \"-\"\n", "\n", "power ::= primary power-tail?\n", "power-tail ::= \"**\" factor\n", "\n", "primary ::= atom postfix-part*\n", "postfix-part ::= \".\" identifier | \"[\" subscript \"]\" | \"(\" argument-list? \")\"\n", "\n", "subscript ::= expression | slice-expr\n", "slice-expr ::= expression? \":\" expression? slice-step?\n", "slice-step ::= \":\" expression?\n", "\n", "atom ::= identifier | \"$\" identifier | literal | \"(\" expression \")\" | list-display | dict-display\n", "\n", "list-display ::= \"[\" list-display-body? \"]\"\n", "list-display-body ::= list-comprehension | argument-list\n", "list-comprehension ::= expression \"for\" identifier \"in\" expression if-clause?\n", "if-clause ::= \"if\" expression\n", "\n", "dict-display ::= \"{\" key-datum-list? \"}\"\n", "key-datum-list ::= key-datum (\",\" key-datum)*\n", "key-datum ::= expression \":\" expression\n", "\n", "argument-list ::= expression (\",\" expression)*\n", "\n", "literal ::= stringliteral | number | boolean | \"None\"\n", "boolean ::= \"True\" | \"False\"\n", "\n", "number ::= floatnumber | integer\n", "integer ::= digit+\n", "floatnumber ::= digit+ \".\" digit* | \".\" digit+\n", "digit ::= [0-9]\n", "\n", "stringliteral ::= \"\\\"\" text-double \"\\\"\" | \"'\" text-single \"'\"\n", "escape-sequence ::= \"\\\\\" escape-char\n", "escape-char ::= \"\\\"\" | \"'\" | \"\\\\\" | \"n\" | \"t\" | \"r\" | \"0\"\n", "\n", "text-double ::= (text-double-char | escape-sequence)*\n", "text-single ::= (text-single-char | escape-sequence)*\n", "\n", "text-double-char ::= [^\"\\\\]\n", "text-single-char ::= [^'\\\\]\n", "\n", "identifier-or-string ::= identifier | stringliteral\n", "\n", "identifier ::= ident-start ident-rest*\n", "ident-start ::= [A-Za-z_]\n", "ident-rest ::= [A-Za-z0-9_]\n", "'''\n", "grammar = LlamaGrammar.from_string(grammar_string)" ] }, { "cell_type": "code", "execution_count": 4, "id": "e693a3fa", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "llama_model_loader: loaded meta data with 33 key-value pairs and 338 tensors from /home/acano/PycharmProjects/assistance-engine/data/models/qwen2.5-coder-1.5b-q8_0.gguf (version GGUF V3 (latest))\n", "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", "llama_model_loader: - kv 0: general.architecture str = qwen2\n", "llama_model_loader: - kv 1: general.type str = model\n", "llama_model_loader: - kv 2: general.name str = Qwen2.5 Coder 1.5B\n", "llama_model_loader: - kv 3: general.basename str = Qwen2.5-Coder\n", "llama_model_loader: - kv 4: general.size_label str = 1.5B\n", "llama_model_loader: - kv 5: general.license str = apache-2.0\n", "llama_model_loader: - kv 6: general.license.link str = https://huggingface.co/Qwen/Qwen2.5-C...\n", "llama_model_loader: - kv 7: general.base_model.count u32 = 1\n", "llama_model_loader: - kv 8: general.base_model.0.name str = Qwen2.5 1.5B\n", "llama_model_loader: - kv 9: general.base_model.0.organization str = Qwen\n", "llama_model_loader: - kv 10: general.base_model.0.repo_url str = https://huggingface.co/Qwen/Qwen2.5-1.5B\n", "llama_model_loader: - kv 11: general.tags arr[str,5] = [\"code\", \"qwen\", \"qwen-coder\", \"codeq...\n", "llama_model_loader: - kv 12: general.languages arr[str,1] = [\"en\"]\n", "llama_model_loader: - kv 13: qwen2.block_count u32 = 28\n", "llama_model_loader: - kv 14: qwen2.context_length u32 = 32768\n", "llama_model_loader: - kv 15: qwen2.embedding_length u32 = 1536\n", "llama_model_loader: - kv 16: qwen2.feed_forward_length u32 = 8960\n", "llama_model_loader: - kv 17: qwen2.attention.head_count u32 = 12\n", "llama_model_loader: - kv 18: qwen2.attention.head_count_kv u32 = 2\n", "llama_model_loader: - kv 19: qwen2.rope.freq_base f32 = 1000000.000000\n", "llama_model_loader: - kv 20: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001\n", "llama_model_loader: - kv 21: general.file_type u32 = 7\n", "llama_model_loader: - kv 22: tokenizer.ggml.model str = gpt2\n", "llama_model_loader: - kv 23: tokenizer.ggml.pre str = qwen2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "llama_model_loader: - kv 24: tokenizer.ggml.tokens arr[str,151936] = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n", "llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n", "llama_model_loader: - kv 26: tokenizer.ggml.merges arr[str,151387] = [\"Ġ Ġ\", \"ĠĠ ĠĠ\", \"i n\", \"Ġ t\",...\n", "llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 151643\n", "llama_model_loader: - kv 28: tokenizer.ggml.padding_token_id u32 = 151643\n", "llama_model_loader: - kv 29: tokenizer.ggml.bos_token_id u32 = 151643\n", "llama_model_loader: - kv 30: tokenizer.ggml.add_bos_token bool = false\n", "llama_model_loader: - kv 31: tokenizer.chat_template str = {%- if tools %}\\n {{- '<|im_start|>...\n", "llama_model_loader: - kv 32: general.quantization_version u32 = 2\n", "llama_model_loader: - type f32: 141 tensors\n", "llama_model_loader: - type q8_0: 197 tensors\n", "print_info: file format = GGUF V3 (latest)\n", "print_info: file type = Q8_0\n", "print_info: file size = 1.53 GiB (8.50 BPW) \n", "init_tokenizer: initializing tokenizer for type 2\n", "load: control token: 151660 '<|fim_middle|>' is not marked as EOG\n", "load: control token: 151659 '<|fim_prefix|>' is not marked as EOG\n", "load: control token: 151653 '<|vision_end|>' is not marked as EOG\n", "load: control token: 151648 '<|box_start|>' is not marked as EOG\n", "load: control token: 151646 '<|object_ref_start|>' is not marked as EOG\n", "load: control token: 151649 '<|box_end|>' is not marked as EOG\n", "load: control token: 151655 '<|image_pad|>' is not marked as EOG\n", "load: control token: 151651 '<|quad_end|>' is not marked as EOG\n", "load: control token: 151647 '<|object_ref_end|>' is not marked as EOG\n", "load: control token: 151652 '<|vision_start|>' is not marked as EOG\n", "load: control token: 151654 '<|vision_pad|>' is not marked as EOG\n", "load: control token: 151656 '<|video_pad|>' is not marked as EOG\n", "load: control token: 151644 '<|im_start|>' is not marked as EOG\n", "load: control token: 151661 '<|fim_suffix|>' is not marked as EOG\n", "load: control token: 151650 '<|quad_start|>' is not marked as EOG\n", "load: printing all EOG tokens:\n", "load: - 151643 ('<|endoftext|>')\n", "load: - 151645 ('<|im_end|>')\n", "load: - 151662 ('<|fim_pad|>')\n", "load: - 151663 ('<|repo_name|>')\n", "load: - 151664 ('<|file_sep|>')\n", "load: special tokens cache size = 22\n", "load: token to piece cache size = 0.9310 MB\n", "print_info: arch = qwen2\n", "print_info: vocab_only = 0\n", "print_info: n_ctx_train = 32768\n", "print_info: n_embd = 1536\n", "print_info: n_layer = 28\n", "print_info: n_head = 12\n", "print_info: n_head_kv = 2\n", "print_info: n_rot = 128\n", "print_info: n_swa = 0\n", "print_info: is_swa_any = 0\n", "print_info: n_embd_head_k = 128\n", "print_info: n_embd_head_v = 128\n", "print_info: n_gqa = 6\n", "print_info: n_embd_k_gqa = 256\n", "print_info: n_embd_v_gqa = 256\n", "print_info: f_norm_eps = 0.0e+00\n", "print_info: f_norm_rms_eps = 1.0e-06\n", "print_info: f_clamp_kqv = 0.0e+00\n", "print_info: f_max_alibi_bias = 0.0e+00\n", "print_info: f_logit_scale = 0.0e+00\n", "print_info: f_attn_scale = 0.0e+00\n", "print_info: n_ff = 8960\n", "print_info: n_expert = 0\n", "print_info: n_expert_used = 0\n", "print_info: causal attn = 1\n", "print_info: pooling type = -1\n", "print_info: rope type = 2\n", "print_info: rope scaling = linear\n", "print_info: freq_base_train = 1000000.0\n", "print_info: freq_scale_train = 1\n", "print_info: n_ctx_orig_yarn = 32768\n", "print_info: rope_finetuned = unknown\n", "print_info: model type = 1.5B\n", "print_info: model params = 1.54 B\n", "print_info: general.name = Qwen2.5 Coder 1.5B\n", "print_info: vocab type = BPE\n", "print_info: n_vocab = 151936\n", "print_info: n_merges = 151387\n", "print_info: BOS token = 151643 '<|endoftext|>'\n", "print_info: EOS token = 151643 '<|endoftext|>'\n", "print_info: EOT token = 151645 '<|im_end|>'\n", "print_info: PAD token = 151643 '<|endoftext|>'\n", "print_info: LF token = 198 'Ċ'\n", "print_info: FIM PRE token = 151659 '<|fim_prefix|>'\n", "print_info: FIM SUF token = 151661 '<|fim_suffix|>'\n", "print_info: FIM MID token = 151660 '<|fim_middle|>'\n", "print_info: FIM PAD token = 151662 '<|fim_pad|>'\n", "print_info: FIM REP token = 151663 '<|repo_name|>'\n", "print_info: FIM SEP token = 151664 '<|file_sep|>'\n", "print_info: EOG token = 151643 '<|endoftext|>'\n", "print_info: EOG token = 151645 '<|im_end|>'\n", "print_info: EOG token = 151662 '<|fim_pad|>'\n", "print_info: EOG token = 151663 '<|repo_name|>'\n", "print_info: EOG token = 151664 '<|file_sep|>'\n", "print_info: max token length = 256\n", "load_tensors: loading model tensors, this can take a while... (mmap = true)\n", "load_tensors: layer 0 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 1 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 2 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 3 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 4 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 5 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 6 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 7 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 8 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 9 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 10 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 11 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 12 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 13 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 14 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 15 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 16 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 17 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 18 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 19 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 20 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 21 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 22 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 23 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 24 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 25 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 26 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 27 assigned to device CPU, is_swa = 0\n", "load_tensors: layer 28 assigned to device CPU, is_swa = 0\n", "load_tensors: tensor 'token_embd.weight' (q8_0) (and 338 others) cannot be used with preferred buffer type CPU_REPACK, using CPU instead\n", "load_tensors: CPU_Mapped model buffer size = 1564.62 MiB\n", "......................................................................................\n", "llama_context: constructing llama_context\n", "llama_context: n_seq_max = 1\n", "llama_context: n_ctx = 512\n", "llama_context: n_ctx_per_seq = 512\n", "llama_context: n_batch = 512\n", "llama_context: n_ubatch = 512\n", "llama_context: causal_attn = 1\n", "llama_context: flash_attn = 0\n", "llama_context: kv_unified = false\n", "llama_context: freq_base = 1000000.0\n", "llama_context: freq_scale = 1\n", "llama_context: n_ctx_per_seq (512) < n_ctx_train (32768) -- the full capacity of the model will not be utilized\n", "set_abort_callback: call\n", "llama_context: CPU output buffer size = 0.58 MiB\n", "create_memory: n_ctx = 512 (padded)\n", "llama_kv_cache_unified: layer 0: dev = CPU\n", "llama_kv_cache_unified: layer 1: dev = CPU\n", "llama_kv_cache_unified: layer 2: dev = CPU\n", "llama_kv_cache_unified: layer 3: dev = CPU\n", "llama_kv_cache_unified: layer 4: dev = CPU\n", "llama_kv_cache_unified: layer 5: dev = CPU\n", "llama_kv_cache_unified: layer 6: dev = CPU\n", "llama_kv_cache_unified: layer 7: dev = CPU\n", "llama_kv_cache_unified: layer 8: dev = CPU\n", "llama_kv_cache_unified: layer 9: dev = CPU\n", "llama_kv_cache_unified: layer 10: dev = CPU\n", "llama_kv_cache_unified: layer 11: dev = CPU\n", "llama_kv_cache_unified: layer 12: dev = CPU\n", "llama_kv_cache_unified: layer 13: dev = CPU\n", "llama_kv_cache_unified: layer 14: dev = CPU\n", "llama_kv_cache_unified: layer 15: dev = CPU\n", "llama_kv_cache_unified: layer 16: dev = CPU\n", "llama_kv_cache_unified: layer 17: dev = CPU\n", "llama_kv_cache_unified: layer 18: dev = CPU\n", "llama_kv_cache_unified: layer 19: dev = CPU\n", "llama_kv_cache_unified: layer 20: dev = CPU\n", "llama_kv_cache_unified: layer 21: dev = CPU\n", "llama_kv_cache_unified: layer 22: dev = CPU\n", "llama_kv_cache_unified: layer 23: dev = CPU\n", "llama_kv_cache_unified: layer 24: dev = CPU\n", "llama_kv_cache_unified: layer 25: dev = CPU\n", "llama_kv_cache_unified: layer 26: dev = CPU\n", "llama_kv_cache_unified: layer 27: dev = CPU\n", "llama_kv_cache_unified: CPU KV buffer size = 14.00 MiB\n", "llama_kv_cache_unified: size = 14.00 MiB ( 512 cells, 28 layers, 1/1 seqs), K (f16): 7.00 MiB, V (f16): 7.00 MiB\n", "llama_context: enumerating backends\n", "llama_context: backend_ptrs.size() = 1\n", "llama_context: max_nodes = 2704\n", "llama_context: worst-case: n_tokens = 512, n_seqs = 1, n_outputs = 0\n", "graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512\n", "graph_reserve: reserving a graph for ubatch with n_tokens = 1, n_seqs = 1, n_outputs = 1\n", "graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512\n", "llama_context: CPU compute buffer size = 299.75 MiB\n", "llama_context: graph nodes = 1070\n", "llama_context: graph splits = 1\n", "CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 | \n", "Model metadata: {'tokenizer.ggml.bos_token_id': '151643', 'general.file_type': '7', 'qwen2.attention.layer_norm_rms_epsilon': '0.000001', 'tokenizer.ggml.eos_token_id': '151643', 'qwen2.rope.freq_base': '1000000.000000', 'qwen2.attention.head_count': '12', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'gpt2', 'qwen2.feed_forward_length': '8960', 'general.architecture': 'qwen2', 'tokenizer.ggml.padding_token_id': '151643', 'qwen2.embedding_length': '1536', 'general.basename': 'Qwen2.5-Coder', 'tokenizer.ggml.add_bos_token': 'false', 'general.base_model.0.organization': 'Qwen', 'tokenizer.ggml.pre': 'qwen2', 'general.name': 'Qwen2.5 Coder 1.5B', 'general.base_model.0.name': 'Qwen2.5 1.5B', 'qwen2.block_count': '28', 'general.type': 'model', 'general.size_label': '1.5B', 'tokenizer.chat_template': '{%- if tools %}\\n {{- \\'<|im_start|>system\\\\n\\' }}\\n {%- if messages[0][\\'role\\'] == \\'system\\' %}\\n {{- messages[0][\\'content\\'] }}\\n {%- else %}\\n {{- \\'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\\' }}\\n {%- endif %}\\n {{- \"\\\\n\\\\n# Tools\\\\n\\\\nYou may call one or more functions to assist with the user query.\\\\n\\\\nYou are provided with function signatures within XML tags:\\\\n\" }}\\n {%- for tool in tools %}\\n {{- \"\\\\n\" }}\\n {{- tool | tojson }}\\n {%- endfor %}\\n {{- \"\\\\n\\\\n\\\\nFor each function call, return a json object with function name and arguments within XML tags:\\\\n\\\\n{\\\\\"name\\\\\": , \\\\\"arguments\\\\\": }\\\\n<|im_end|>\\\\n\" }}\\n{%- else %}\\n {%- if messages[0][\\'role\\'] == \\'system\\' %}\\n {{- \\'<|im_start|>system\\\\n\\' + messages[0][\\'content\\'] + \\'<|im_end|>\\\\n\\' }}\\n {%- else %}\\n {{- \\'<|im_start|>system\\\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\\\n\\' }}\\n {%- endif %}\\n{%- endif %}\\n{%- for message in messages %}\\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\\n {{- \\'<|im_start|>\\' + message.role + \\'\\\\n\\' + message.content + \\'<|im_end|>\\' + \\'\\\\n\\' }}\\n {%- elif message.role == \"assistant\" %}\\n {{- \\'<|im_start|>\\' + message.role }}\\n {%- if message.content %}\\n {{- \\'\\\\n\\' + message.content }}\\n {%- endif %}\\n {%- for tool_call in message.tool_calls %}\\n {%- if tool_call.function is defined %}\\n {%- set tool_call = tool_call.function %}\\n {%- endif %}\\n {{- \\'\\\\n\\\\n{\"name\": \"\\' }}\\n {{- tool_call.name }}\\n {{- \\'\", \"arguments\": \\' }}\\n {{- tool_call.arguments | tojson }}\\n {{- \\'}\\\\n\\' }}\\n {%- endfor %}\\n {{- \\'<|im_end|>\\\\n\\' }}\\n {%- elif message.role == \"tool\" %}\\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\\n {{- \\'<|im_start|>user\\' }}\\n {%- endif %}\\n {{- \\'\\\\n\\\\n\\' }}\\n {{- message.content }}\\n {{- \\'\\\\n\\' }}\\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\\n {{- \\'<|im_end|>\\\\n\\' }}\\n {%- endif %}\\n {%- endif %}\\n{%- endfor %}\\n{%- if add_generation_prompt %}\\n {{- \\'<|im_start|>assistant\\\\n\\' }}\\n{%- endif %}\\n', 'qwen2.attention.head_count_kv': '2', 'general.license.link': 'https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B/blob/main/LICENSE', 'general.base_model.count': '1', 'general.license': 'apache-2.0', 'general.base_model.0.repo_url': 'https://huggingface.co/Qwen/Qwen2.5-1.5B', 'qwen2.context_length': '32768'}\n", "Available chat formats from metadata: chat_template.default\n", "Using gguf chat template: {%- if tools %}\n", " {{- '<|im_start|>system\\n' }}\n", " {%- if messages[0]['role'] == 'system' %}\n", " {{- messages[0]['content'] }}\n", " {%- else %}\n", " {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n", " {%- endif %}\n", " {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n", " {%- for tool in tools %}\n", " {{- \"\\n\" }}\n", " {{- tool | tojson }}\n", " {%- endfor %}\n", " {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n", "{%- else %}\n", " {%- if messages[0]['role'] == 'system' %}\n", " {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n", " {%- else %}\n", " {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n", " {%- endif %}\n", "{%- endif %}\n", "{%- for message in messages %}\n", " {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n", " {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n", " {%- elif message.role == \"assistant\" %}\n", " {{- '<|im_start|>' + message.role }}\n", " {%- if message.content %}\n", " {{- '\\n' + message.content }}\n", " {%- endif %}\n", " {%- for tool_call in message.tool_calls %}\n", " {%- if tool_call.function is defined %}\n", " {%- set tool_call = tool_call.function %}\n", " {%- endif %}\n", " {{- '\\n\\n{\"name\": \"' }}\n", " {{- tool_call.name }}\n", " {{- '\", \"arguments\": ' }}\n", " {{- tool_call.arguments | tojson }}\n", " {{- '}\\n' }}\n", " {%- endfor %}\n", " {{- '<|im_end|>\\n' }}\n", " {%- elif message.role == \"tool\" %}\n", " {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n", " {{- '<|im_start|>user' }}\n", " {%- endif %}\n", " {{- '\\n\\n' }}\n", " {{- message.content }}\n", " {{- '\\n' }}\n", " {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n", " {{- '<|im_end|>\\n' }}\n", " {%- endif %}\n", " {%- endif %}\n", "{%- endfor %}\n", "{%- if add_generation_prompt %}\n", " {{- '<|im_start|>assistant\\n' }}\n", "{%- endif %}\n", "\n", "Using chat eos_token: <|endoftext|>\n", "Using chat bos_token: <|endoftext|>\n" ] } ], "source": [ "# grammar = LlamaGrammar.from_string(bnf)\n", "llm_model = Llama(str(settings.models_path / \"qwen2.5-coder-1.5b-q8_0.gguf\"))" ] }, { "cell_type": "code", "execution_count": 10, "id": "aa66f897", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Llama.generate: 9 prefix-match hit, remaining 1 prompt tokens to eval\n", "llama_perf_context_print: load time = 1762.25 ms\n", "llama_perf_context_print: prompt eval time = 0.00 ms / 1 tokens ( 0.00 ms per token, inf tokens per second)\n", "llama_perf_context_print: eval time = 64123.14 ms / 502 runs ( 127.74 ms per token, 7.83 tokens per second)\n", "llama_perf_context_print: total time = 133698.82 ms / 503 tokens\n", "llama_perf_context_print: graphs reused = 486\n" ] } ], "source": [ "response = llm_model(\n", " \"Create a simple hello world function in AVAP language\",\n", " grammar=grammar, max_tokens=-1, temperature=0\n", ")" ] }, { "cell_type": "code", "execution_count": 11, "id": "317b96ae", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'choices': [{'finish_reason': 'length',\n", " 'index': 0,\n", " 'logprobs': None,\n", " 'text': '\\n'\n", " 'Sure.HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n", " '\\n'\n", " 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltint'}],\n", " 'created': 1773656986,\n", " 'id': 'cmpl-3c382cd0-7254-4bbc-8e71-84f97f06006a',\n", " 'model': '/home/acano/PycharmProjects/assistance-engine/data/models/qwen2.5-coder-1.5b-q8_0.gguf',\n", " 'object': 'text_completion',\n", " 'usage': {'completion_tokens': 502, 'prompt_tokens': 10, 'total_tokens': 512}}\n" ] } ], "source": [ "pprint.pprint(response)" ] }, { "cell_type": "code", "execution_count": null, "id": "b6aa106d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "assistance-engine", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }