610 lines
37 KiB
Plaintext
610 lines
37 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "9d524159",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pprint\n",
|
|
"\n",
|
|
"from llama_cpp.llama import Llama, LlamaGrammar\n",
|
|
"\n",
|
|
"from src.config import settings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "330f1975",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"bnf = []\n",
|
|
"\n",
|
|
"for file_path in (settings.proj_root / \"ingestion/code/BNF/\").glob(\"*.txt\"):\n",
|
|
" with file_path.open(\"r\", encoding=\"utf-8\") as file:\n",
|
|
" bnf.append(file.read())\n",
|
|
"\n",
|
|
"bnf = \"\\n\".join(bnf)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "68887173",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"grammar_string = r'''\n",
|
|
"root ::= program\n",
|
|
"\n",
|
|
"program ::= (line | block-comment)*\n",
|
|
"\n",
|
|
"line ::= statement-with-comment eol | trailing-comment eol | eol\n",
|
|
"statement-with-comment ::= statement trailing-comment? | statement?\n",
|
|
"trailing-comment ::= line-comment | doc-comment\n",
|
|
"\n",
|
|
"eol ::= \"\\r\\n\" | \"\\n\"\n",
|
|
"\n",
|
|
"doc-comment ::= \"///\" any-text\n",
|
|
"line-comment ::= \"//\" any-text\n",
|
|
"block-comment ::= \"/*\" block-comment-body \"*/\"\n",
|
|
"\n",
|
|
"any-text ::= any-text-char*\n",
|
|
"any-text-char ::= [^\\r\\n]\n",
|
|
"\n",
|
|
"block-comment-body ::= block-comment-part*\n",
|
|
"block-comment-part ::= [^*] | \"*\" [^/]\n",
|
|
"\n",
|
|
"statement ::= assignment | method-call-stmt | function-call-stmt | function-decl | return-stmt | system-command | io-command | control-flow | async-command | connector-cmd | db-command | http-command | util-command | modularity-cmd\n",
|
|
"\n",
|
|
"assignment ::= identifier \"=\" expression\n",
|
|
"\n",
|
|
"function-call-stmt ::= identifier \"(\" argument-list? \")\"\n",
|
|
"method-call-stmt ::= identifier \"=\" identifier \".\" identifier \"(\" argument-list? \")\"\n",
|
|
"\n",
|
|
"system-command ::= register-cmd | addvar-cmd\n",
|
|
"register-cmd ::= \"registerEndpoint(\" stringliteral \",\" stringliteral \",\" list-display \",\" stringliteral \",\" identifier \",\" identifier \")\"\n",
|
|
"addvar-cmd ::= \"addVar(\" addvar-arg \",\" addvar-arg \")\"\n",
|
|
"addvar-arg ::= identifier | literal | \"$\" identifier\n",
|
|
"\n",
|
|
"system-variable ::= \"_status\"\n",
|
|
"\n",
|
|
"io-command ::= addparam-cmd | getlistlen-cmd | addresult-cmd | getparamlist-cmd\n",
|
|
"addparam-cmd ::= \"addParam(\" stringliteral \",\" identifier \")\"\n",
|
|
"getlistlen-cmd ::= \"getListLen(\" identifier \",\" identifier \")\"\n",
|
|
"getparamlist-cmd ::= \"getQueryParamList(\" stringliteral \",\" identifier \")\"\n",
|
|
"addresult-cmd ::= \"addResult(\" identifier \")\"\n",
|
|
"\n",
|
|
"control-flow ::= if-stmt | loop-stmt | try-stmt\n",
|
|
"\n",
|
|
"if-stmt ::= \"if(\" if-condition \")\" eol block else-clause? \"end()\" eol\n",
|
|
"else-clause ::= \"else()\" eol block\n",
|
|
"if-condition ::= if-atom \",\" if-atom \",\" stringliteral | \"None\" \",\" \"None\" \",\" stringliteral\n",
|
|
"if-atom ::= identifier | literal\n",
|
|
"\n",
|
|
"loop-stmt ::= \"startLoop(\" identifier \",\" expression \",\" expression \")\" eol block \"endLoop()\" eol\n",
|
|
"\n",
|
|
"try-stmt ::= \"try()\" eol block \"exception(\" identifier \")\" eol block \"end()\" eol\n",
|
|
"\n",
|
|
"block ::= line*\n",
|
|
"\n",
|
|
"async-command ::= go-stmt | gather-stmt\n",
|
|
"go-stmt ::= identifier \"=\" \"go\" identifier \"(\" argument-list? \")\"\n",
|
|
"gather-stmt ::= identifier \"=\" \"gather(\" identifier gather-tail? \")\"\n",
|
|
"gather-tail ::= \",\" expression\n",
|
|
"\n",
|
|
"connector-cmd ::= connector-instantiation | connector-method-call\n",
|
|
"connector-instantiation ::= identifier \"=\" \"avapConnector(\" stringliteral \")\"\n",
|
|
"connector-method-call ::= connector-method-assignment? identifier \".\" identifier \"(\" argument-list? \")\"\n",
|
|
"connector-method-assignment ::= identifier \"=\"\n",
|
|
"\n",
|
|
"http-command ::= req-post-cmd | req-get-cmd\n",
|
|
"req-post-cmd ::= \"RequestPost(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n",
|
|
"req-get-cmd ::= \"RequestGet(\" expression \",\" expression \",\" expression \",\" identifier \",\" expression \")\"\n",
|
|
"\n",
|
|
"db-command ::= orm-direct | orm-check | orm-create | orm-select | orm-insert | orm-update\n",
|
|
"orm-direct ::= \"ormDirect(\" expression \",\" identifier \")\"\n",
|
|
"orm-check ::= \"ormCheckTable(\" expression \",\" identifier \")\"\n",
|
|
"orm-create ::= \"ormCreateTable(\" expression \",\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"orm-select ::= \"ormAccessSelect(\" orm-fields \",\" expression orm-select-tail? \",\" identifier \")\"\n",
|
|
"orm-select-tail ::= \",\" expression\n",
|
|
"orm-fields ::= \"*\" | expression\n",
|
|
"orm-insert ::= \"ormAccessInsert(\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"orm-update ::= \"ormAccessUpdate(\" expression \",\" expression \",\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"util-command ::= json-list-cmd | crypto-cmd | regex-cmd | datetime-cmd | stamp-cmd | string-cmd | replace-cmd\n",
|
|
"\n",
|
|
"json-list-cmd ::= \"variableToList(\" expression \",\" identifier \")\" | \"itemFromList(\" identifier \",\" expression \",\" identifier \")\" | \"variableFromJSON(\" identifier \",\" expression \",\" identifier \")\" | \"AddVariableToJSON(\" expression \",\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"crypto-cmd ::= \"encodeSHA256(\" identifier-or-string \",\" identifier \")\" | \"encodeMD5(\" identifier-or-string \",\" identifier \")\"\n",
|
|
"\n",
|
|
"regex-cmd ::= \"getRegex(\" identifier \",\" stringliteral \",\" identifier \")\"\n",
|
|
"\n",
|
|
"datetime-cmd ::= \"getDateTime(\" stringliteral \",\" expression \",\" stringliteral \",\" identifier \")\"\n",
|
|
"\n",
|
|
"stamp-cmd ::= \"stampToDatetime(\" expression \",\" stringliteral \",\" expression \",\" identifier \")\" | \"getTimeStamp(\" stringliteral \",\" stringliteral \",\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"string-cmd ::= \"randomString(\" expression \",\" identifier \")\"\n",
|
|
"\n",
|
|
"replace-cmd ::= \"replace(\" identifier-or-string \",\" stringliteral \",\" stringliteral \",\" identifier \")\"\n",
|
|
"\n",
|
|
"function-decl ::= \"function\" identifier \"(\" param-list? \")\" \"{\" eol block \"}\" eol\n",
|
|
"param-list ::= identifier (\",\" identifier)*\n",
|
|
"\n",
|
|
"return-stmt ::= \"return(\" expression? \")\"\n",
|
|
"\n",
|
|
"modularity-cmd ::= include-stmt | import-stmt\n",
|
|
"include-stmt ::= \"include\" \" \" stringliteral\n",
|
|
"import-stmt ::= \"import\" \" \" (\"<\" identifier \">\" | stringliteral)\n",
|
|
"\n",
|
|
"expression ::= logical-or\n",
|
|
"\n",
|
|
"logical-or ::= logical-and (\"or\" logical-and)*\n",
|
|
"logical-and ::= logical-not (\"and\" logical-not)*\n",
|
|
"logical-not ::= \"not\" logical-not | comparison\n",
|
|
"\n",
|
|
"comparison ::= arithmetic (comp-op arithmetic)*\n",
|
|
"comp-op ::= \"==\" | \"!=\" | \"<=\" | \">=\" | \"<\" | \">\" | \"in\" | \"is\"\n",
|
|
"\n",
|
|
"arithmetic ::= term (add-op term)*\n",
|
|
"add-op ::= \"+\" | \"-\"\n",
|
|
"\n",
|
|
"term ::= factor (mul-op factor)*\n",
|
|
"mul-op ::= \"*\" | \"/\" | \"%\"\n",
|
|
"\n",
|
|
"factor ::= unary-op factor | power\n",
|
|
"unary-op ::= \"+\" | \"-\"\n",
|
|
"\n",
|
|
"power ::= primary power-tail?\n",
|
|
"power-tail ::= \"**\" factor\n",
|
|
"\n",
|
|
"primary ::= atom postfix-part*\n",
|
|
"postfix-part ::= \".\" identifier | \"[\" subscript \"]\" | \"(\" argument-list? \")\"\n",
|
|
"\n",
|
|
"subscript ::= expression | slice-expr\n",
|
|
"slice-expr ::= expression? \":\" expression? slice-step?\n",
|
|
"slice-step ::= \":\" expression?\n",
|
|
"\n",
|
|
"atom ::= identifier | \"$\" identifier | literal | \"(\" expression \")\" | list-display | dict-display\n",
|
|
"\n",
|
|
"list-display ::= \"[\" list-display-body? \"]\"\n",
|
|
"list-display-body ::= list-comprehension | argument-list\n",
|
|
"list-comprehension ::= expression \"for\" identifier \"in\" expression if-clause?\n",
|
|
"if-clause ::= \"if\" expression\n",
|
|
"\n",
|
|
"dict-display ::= \"{\" key-datum-list? \"}\"\n",
|
|
"key-datum-list ::= key-datum (\",\" key-datum)*\n",
|
|
"key-datum ::= expression \":\" expression\n",
|
|
"\n",
|
|
"argument-list ::= expression (\",\" expression)*\n",
|
|
"\n",
|
|
"literal ::= stringliteral | number | boolean | \"None\"\n",
|
|
"boolean ::= \"True\" | \"False\"\n",
|
|
"\n",
|
|
"number ::= floatnumber | integer\n",
|
|
"integer ::= digit+\n",
|
|
"floatnumber ::= digit+ \".\" digit* | \".\" digit+\n",
|
|
"digit ::= [0-9]\n",
|
|
"\n",
|
|
"stringliteral ::= \"\\\"\" text-double \"\\\"\" | \"'\" text-single \"'\"\n",
|
|
"escape-sequence ::= \"\\\\\" escape-char\n",
|
|
"escape-char ::= \"\\\"\" | \"'\" | \"\\\\\" | \"n\" | \"t\" | \"r\" | \"0\"\n",
|
|
"\n",
|
|
"text-double ::= (text-double-char | escape-sequence)*\n",
|
|
"text-single ::= (text-single-char | escape-sequence)*\n",
|
|
"\n",
|
|
"text-double-char ::= [^\"\\\\]\n",
|
|
"text-single-char ::= [^'\\\\]\n",
|
|
"\n",
|
|
"identifier-or-string ::= identifier | stringliteral\n",
|
|
"\n",
|
|
"identifier ::= ident-start ident-rest*\n",
|
|
"ident-start ::= [A-Za-z_]\n",
|
|
"ident-rest ::= [A-Za-z0-9_]\n",
|
|
"'''\n",
|
|
"grammar = LlamaGrammar.from_string(grammar_string)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "e693a3fa",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"llama_model_loader: loaded meta data with 33 key-value pairs and 338 tensors from /home/acano/PycharmProjects/assistance-engine/data/models/qwen2.5-coder-1.5b-q8_0.gguf (version GGUF V3 (latest))\n",
|
|
"llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
|
|
"llama_model_loader: - kv 0: general.architecture str = qwen2\n",
|
|
"llama_model_loader: - kv 1: general.type str = model\n",
|
|
"llama_model_loader: - kv 2: general.name str = Qwen2.5 Coder 1.5B\n",
|
|
"llama_model_loader: - kv 3: general.basename str = Qwen2.5-Coder\n",
|
|
"llama_model_loader: - kv 4: general.size_label str = 1.5B\n",
|
|
"llama_model_loader: - kv 5: general.license str = apache-2.0\n",
|
|
"llama_model_loader: - kv 6: general.license.link str = https://huggingface.co/Qwen/Qwen2.5-C...\n",
|
|
"llama_model_loader: - kv 7: general.base_model.count u32 = 1\n",
|
|
"llama_model_loader: - kv 8: general.base_model.0.name str = Qwen2.5 1.5B\n",
|
|
"llama_model_loader: - kv 9: general.base_model.0.organization str = Qwen\n",
|
|
"llama_model_loader: - kv 10: general.base_model.0.repo_url str = https://huggingface.co/Qwen/Qwen2.5-1.5B\n",
|
|
"llama_model_loader: - kv 11: general.tags arr[str,5] = [\"code\", \"qwen\", \"qwen-coder\", \"codeq...\n",
|
|
"llama_model_loader: - kv 12: general.languages arr[str,1] = [\"en\"]\n",
|
|
"llama_model_loader: - kv 13: qwen2.block_count u32 = 28\n",
|
|
"llama_model_loader: - kv 14: qwen2.context_length u32 = 32768\n",
|
|
"llama_model_loader: - kv 15: qwen2.embedding_length u32 = 1536\n",
|
|
"llama_model_loader: - kv 16: qwen2.feed_forward_length u32 = 8960\n",
|
|
"llama_model_loader: - kv 17: qwen2.attention.head_count u32 = 12\n",
|
|
"llama_model_loader: - kv 18: qwen2.attention.head_count_kv u32 = 2\n",
|
|
"llama_model_loader: - kv 19: qwen2.rope.freq_base f32 = 1000000.000000\n",
|
|
"llama_model_loader: - kv 20: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001\n",
|
|
"llama_model_loader: - kv 21: general.file_type u32 = 7\n",
|
|
"llama_model_loader: - kv 22: tokenizer.ggml.model str = gpt2\n",
|
|
"llama_model_loader: - kv 23: tokenizer.ggml.pre str = qwen2\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"llama_model_loader: - kv 24: tokenizer.ggml.tokens arr[str,151936] = [\"!\", \"\\\"\", \"#\", \"$\", \"%\", \"&\", \"'\", ...\n",
|
|
"llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...\n",
|
|
"llama_model_loader: - kv 26: tokenizer.ggml.merges arr[str,151387] = [\"Ġ Ġ\", \"ĠĠ ĠĠ\", \"i n\", \"Ġ t\",...\n",
|
|
"llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 151643\n",
|
|
"llama_model_loader: - kv 28: tokenizer.ggml.padding_token_id u32 = 151643\n",
|
|
"llama_model_loader: - kv 29: tokenizer.ggml.bos_token_id u32 = 151643\n",
|
|
"llama_model_loader: - kv 30: tokenizer.ggml.add_bos_token bool = false\n",
|
|
"llama_model_loader: - kv 31: tokenizer.chat_template str = {%- if tools %}\\n {{- '<|im_start|>...\n",
|
|
"llama_model_loader: - kv 32: general.quantization_version u32 = 2\n",
|
|
"llama_model_loader: - type f32: 141 tensors\n",
|
|
"llama_model_loader: - type q8_0: 197 tensors\n",
|
|
"print_info: file format = GGUF V3 (latest)\n",
|
|
"print_info: file type = Q8_0\n",
|
|
"print_info: file size = 1.53 GiB (8.50 BPW) \n",
|
|
"init_tokenizer: initializing tokenizer for type 2\n",
|
|
"load: control token: 151660 '<|fim_middle|>' is not marked as EOG\n",
|
|
"load: control token: 151659 '<|fim_prefix|>' is not marked as EOG\n",
|
|
"load: control token: 151653 '<|vision_end|>' is not marked as EOG\n",
|
|
"load: control token: 151648 '<|box_start|>' is not marked as EOG\n",
|
|
"load: control token: 151646 '<|object_ref_start|>' is not marked as EOG\n",
|
|
"load: control token: 151649 '<|box_end|>' is not marked as EOG\n",
|
|
"load: control token: 151655 '<|image_pad|>' is not marked as EOG\n",
|
|
"load: control token: 151651 '<|quad_end|>' is not marked as EOG\n",
|
|
"load: control token: 151647 '<|object_ref_end|>' is not marked as EOG\n",
|
|
"load: control token: 151652 '<|vision_start|>' is not marked as EOG\n",
|
|
"load: control token: 151654 '<|vision_pad|>' is not marked as EOG\n",
|
|
"load: control token: 151656 '<|video_pad|>' is not marked as EOG\n",
|
|
"load: control token: 151644 '<|im_start|>' is not marked as EOG\n",
|
|
"load: control token: 151661 '<|fim_suffix|>' is not marked as EOG\n",
|
|
"load: control token: 151650 '<|quad_start|>' is not marked as EOG\n",
|
|
"load: printing all EOG tokens:\n",
|
|
"load: - 151643 ('<|endoftext|>')\n",
|
|
"load: - 151645 ('<|im_end|>')\n",
|
|
"load: - 151662 ('<|fim_pad|>')\n",
|
|
"load: - 151663 ('<|repo_name|>')\n",
|
|
"load: - 151664 ('<|file_sep|>')\n",
|
|
"load: special tokens cache size = 22\n",
|
|
"load: token to piece cache size = 0.9310 MB\n",
|
|
"print_info: arch = qwen2\n",
|
|
"print_info: vocab_only = 0\n",
|
|
"print_info: n_ctx_train = 32768\n",
|
|
"print_info: n_embd = 1536\n",
|
|
"print_info: n_layer = 28\n",
|
|
"print_info: n_head = 12\n",
|
|
"print_info: n_head_kv = 2\n",
|
|
"print_info: n_rot = 128\n",
|
|
"print_info: n_swa = 0\n",
|
|
"print_info: is_swa_any = 0\n",
|
|
"print_info: n_embd_head_k = 128\n",
|
|
"print_info: n_embd_head_v = 128\n",
|
|
"print_info: n_gqa = 6\n",
|
|
"print_info: n_embd_k_gqa = 256\n",
|
|
"print_info: n_embd_v_gqa = 256\n",
|
|
"print_info: f_norm_eps = 0.0e+00\n",
|
|
"print_info: f_norm_rms_eps = 1.0e-06\n",
|
|
"print_info: f_clamp_kqv = 0.0e+00\n",
|
|
"print_info: f_max_alibi_bias = 0.0e+00\n",
|
|
"print_info: f_logit_scale = 0.0e+00\n",
|
|
"print_info: f_attn_scale = 0.0e+00\n",
|
|
"print_info: n_ff = 8960\n",
|
|
"print_info: n_expert = 0\n",
|
|
"print_info: n_expert_used = 0\n",
|
|
"print_info: causal attn = 1\n",
|
|
"print_info: pooling type = -1\n",
|
|
"print_info: rope type = 2\n",
|
|
"print_info: rope scaling = linear\n",
|
|
"print_info: freq_base_train = 1000000.0\n",
|
|
"print_info: freq_scale_train = 1\n",
|
|
"print_info: n_ctx_orig_yarn = 32768\n",
|
|
"print_info: rope_finetuned = unknown\n",
|
|
"print_info: model type = 1.5B\n",
|
|
"print_info: model params = 1.54 B\n",
|
|
"print_info: general.name = Qwen2.5 Coder 1.5B\n",
|
|
"print_info: vocab type = BPE\n",
|
|
"print_info: n_vocab = 151936\n",
|
|
"print_info: n_merges = 151387\n",
|
|
"print_info: BOS token = 151643 '<|endoftext|>'\n",
|
|
"print_info: EOS token = 151643 '<|endoftext|>'\n",
|
|
"print_info: EOT token = 151645 '<|im_end|>'\n",
|
|
"print_info: PAD token = 151643 '<|endoftext|>'\n",
|
|
"print_info: LF token = 198 'Ċ'\n",
|
|
"print_info: FIM PRE token = 151659 '<|fim_prefix|>'\n",
|
|
"print_info: FIM SUF token = 151661 '<|fim_suffix|>'\n",
|
|
"print_info: FIM MID token = 151660 '<|fim_middle|>'\n",
|
|
"print_info: FIM PAD token = 151662 '<|fim_pad|>'\n",
|
|
"print_info: FIM REP token = 151663 '<|repo_name|>'\n",
|
|
"print_info: FIM SEP token = 151664 '<|file_sep|>'\n",
|
|
"print_info: EOG token = 151643 '<|endoftext|>'\n",
|
|
"print_info: EOG token = 151645 '<|im_end|>'\n",
|
|
"print_info: EOG token = 151662 '<|fim_pad|>'\n",
|
|
"print_info: EOG token = 151663 '<|repo_name|>'\n",
|
|
"print_info: EOG token = 151664 '<|file_sep|>'\n",
|
|
"print_info: max token length = 256\n",
|
|
"load_tensors: loading model tensors, this can take a while... (mmap = true)\n",
|
|
"load_tensors: layer 0 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 1 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 2 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 3 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 4 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 5 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 6 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 7 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 8 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 9 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 10 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 11 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 12 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 13 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 14 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 15 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 16 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 17 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 18 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 19 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 20 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 21 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 22 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 23 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 24 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 25 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 26 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 27 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: layer 28 assigned to device CPU, is_swa = 0\n",
|
|
"load_tensors: tensor 'token_embd.weight' (q8_0) (and 338 others) cannot be used with preferred buffer type CPU_REPACK, using CPU instead\n",
|
|
"load_tensors: CPU_Mapped model buffer size = 1564.62 MiB\n",
|
|
"......................................................................................\n",
|
|
"llama_context: constructing llama_context\n",
|
|
"llama_context: n_seq_max = 1\n",
|
|
"llama_context: n_ctx = 512\n",
|
|
"llama_context: n_ctx_per_seq = 512\n",
|
|
"llama_context: n_batch = 512\n",
|
|
"llama_context: n_ubatch = 512\n",
|
|
"llama_context: causal_attn = 1\n",
|
|
"llama_context: flash_attn = 0\n",
|
|
"llama_context: kv_unified = false\n",
|
|
"llama_context: freq_base = 1000000.0\n",
|
|
"llama_context: freq_scale = 1\n",
|
|
"llama_context: n_ctx_per_seq (512) < n_ctx_train (32768) -- the full capacity of the model will not be utilized\n",
|
|
"set_abort_callback: call\n",
|
|
"llama_context: CPU output buffer size = 0.58 MiB\n",
|
|
"create_memory: n_ctx = 512 (padded)\n",
|
|
"llama_kv_cache_unified: layer 0: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 1: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 2: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 3: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 4: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 5: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 6: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 7: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 8: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 9: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 10: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 11: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 12: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 13: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 14: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 15: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 16: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 17: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 18: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 19: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 20: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 21: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 22: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 23: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 24: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 25: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 26: dev = CPU\n",
|
|
"llama_kv_cache_unified: layer 27: dev = CPU\n",
|
|
"llama_kv_cache_unified: CPU KV buffer size = 14.00 MiB\n",
|
|
"llama_kv_cache_unified: size = 14.00 MiB ( 512 cells, 28 layers, 1/1 seqs), K (f16): 7.00 MiB, V (f16): 7.00 MiB\n",
|
|
"llama_context: enumerating backends\n",
|
|
"llama_context: backend_ptrs.size() = 1\n",
|
|
"llama_context: max_nodes = 2704\n",
|
|
"llama_context: worst-case: n_tokens = 512, n_seqs = 1, n_outputs = 0\n",
|
|
"graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512\n",
|
|
"graph_reserve: reserving a graph for ubatch with n_tokens = 1, n_seqs = 1, n_outputs = 1\n",
|
|
"graph_reserve: reserving a graph for ubatch with n_tokens = 512, n_seqs = 1, n_outputs = 512\n",
|
|
"llama_context: CPU compute buffer size = 299.75 MiB\n",
|
|
"llama_context: graph nodes = 1070\n",
|
|
"llama_context: graph splits = 1\n",
|
|
"CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 | \n",
|
|
"Model metadata: {'tokenizer.ggml.bos_token_id': '151643', 'general.file_type': '7', 'qwen2.attention.layer_norm_rms_epsilon': '0.000001', 'tokenizer.ggml.eos_token_id': '151643', 'qwen2.rope.freq_base': '1000000.000000', 'qwen2.attention.head_count': '12', 'general.quantization_version': '2', 'tokenizer.ggml.model': 'gpt2', 'qwen2.feed_forward_length': '8960', 'general.architecture': 'qwen2', 'tokenizer.ggml.padding_token_id': '151643', 'qwen2.embedding_length': '1536', 'general.basename': 'Qwen2.5-Coder', 'tokenizer.ggml.add_bos_token': 'false', 'general.base_model.0.organization': 'Qwen', 'tokenizer.ggml.pre': 'qwen2', 'general.name': 'Qwen2.5 Coder 1.5B', 'general.base_model.0.name': 'Qwen2.5 1.5B', 'qwen2.block_count': '28', 'general.type': 'model', 'general.size_label': '1.5B', 'tokenizer.chat_template': '{%- if tools %}\\n {{- \\'<|im_start|>system\\\\n\\' }}\\n {%- if messages[0][\\'role\\'] == \\'system\\' %}\\n {{- messages[0][\\'content\\'] }}\\n {%- else %}\\n {{- \\'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\\' }}\\n {%- endif %}\\n {{- \"\\\\n\\\\n# Tools\\\\n\\\\nYou may call one or more functions to assist with the user query.\\\\n\\\\nYou are provided with function signatures within <tools></tools> XML tags:\\\\n<tools>\" }}\\n {%- for tool in tools %}\\n {{- \"\\\\n\" }}\\n {{- tool | tojson }}\\n {%- endfor %}\\n {{- \"\\\\n</tools>\\\\n\\\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\\\n<tool_call>\\\\n{\\\\\"name\\\\\": <function-name>, \\\\\"arguments\\\\\": <args-json-object>}\\\\n</tool_call><|im_end|>\\\\n\" }}\\n{%- else %}\\n {%- if messages[0][\\'role\\'] == \\'system\\' %}\\n {{- \\'<|im_start|>system\\\\n\\' + messages[0][\\'content\\'] + \\'<|im_end|>\\\\n\\' }}\\n {%- else %}\\n {{- \\'<|im_start|>system\\\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\\\n\\' }}\\n {%- endif %}\\n{%- endif %}\\n{%- for message in messages %}\\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\\n {{- \\'<|im_start|>\\' + message.role + \\'\\\\n\\' + message.content + \\'<|im_end|>\\' + \\'\\\\n\\' }}\\n {%- elif message.role == \"assistant\" %}\\n {{- \\'<|im_start|>\\' + message.role }}\\n {%- if message.content %}\\n {{- \\'\\\\n\\' + message.content }}\\n {%- endif %}\\n {%- for tool_call in message.tool_calls %}\\n {%- if tool_call.function is defined %}\\n {%- set tool_call = tool_call.function %}\\n {%- endif %}\\n {{- \\'\\\\n<tool_call>\\\\n{\"name\": \"\\' }}\\n {{- tool_call.name }}\\n {{- \\'\", \"arguments\": \\' }}\\n {{- tool_call.arguments | tojson }}\\n {{- \\'}\\\\n</tool_call>\\' }}\\n {%- endfor %}\\n {{- \\'<|im_end|>\\\\n\\' }}\\n {%- elif message.role == \"tool\" %}\\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\\n {{- \\'<|im_start|>user\\' }}\\n {%- endif %}\\n {{- \\'\\\\n<tool_response>\\\\n\\' }}\\n {{- message.content }}\\n {{- \\'\\\\n</tool_response>\\' }}\\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\\n {{- \\'<|im_end|>\\\\n\\' }}\\n {%- endif %}\\n {%- endif %}\\n{%- endfor %}\\n{%- if add_generation_prompt %}\\n {{- \\'<|im_start|>assistant\\\\n\\' }}\\n{%- endif %}\\n', 'qwen2.attention.head_count_kv': '2', 'general.license.link': 'https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B/blob/main/LICENSE', 'general.base_model.count': '1', 'general.license': 'apache-2.0', 'general.base_model.0.repo_url': 'https://huggingface.co/Qwen/Qwen2.5-1.5B', 'qwen2.context_length': '32768'}\n",
|
|
"Available chat formats from metadata: chat_template.default\n",
|
|
"Using gguf chat template: {%- if tools %}\n",
|
|
" {{- '<|im_start|>system\\n' }}\n",
|
|
" {%- if messages[0]['role'] == 'system' %}\n",
|
|
" {{- messages[0]['content'] }}\n",
|
|
" {%- else %}\n",
|
|
" {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n",
|
|
" {%- endif %}\n",
|
|
" {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n",
|
|
" {%- for tool in tools %}\n",
|
|
" {{- \"\\n\" }}\n",
|
|
" {{- tool | tojson }}\n",
|
|
" {%- endfor %}\n",
|
|
" {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n",
|
|
"{%- else %}\n",
|
|
" {%- if messages[0]['role'] == 'system' %}\n",
|
|
" {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n",
|
|
" {%- else %}\n",
|
|
" {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n",
|
|
" {%- endif %}\n",
|
|
"{%- endif %}\n",
|
|
"{%- for message in messages %}\n",
|
|
" {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n",
|
|
" {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n",
|
|
" {%- elif message.role == \"assistant\" %}\n",
|
|
" {{- '<|im_start|>' + message.role }}\n",
|
|
" {%- if message.content %}\n",
|
|
" {{- '\\n' + message.content }}\n",
|
|
" {%- endif %}\n",
|
|
" {%- for tool_call in message.tool_calls %}\n",
|
|
" {%- if tool_call.function is defined %}\n",
|
|
" {%- set tool_call = tool_call.function %}\n",
|
|
" {%- endif %}\n",
|
|
" {{- '\\n<tool_call>\\n{\"name\": \"' }}\n",
|
|
" {{- tool_call.name }}\n",
|
|
" {{- '\", \"arguments\": ' }}\n",
|
|
" {{- tool_call.arguments | tojson }}\n",
|
|
" {{- '}\\n</tool_call>' }}\n",
|
|
" {%- endfor %}\n",
|
|
" {{- '<|im_end|>\\n' }}\n",
|
|
" {%- elif message.role == \"tool\" %}\n",
|
|
" {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n",
|
|
" {{- '<|im_start|>user' }}\n",
|
|
" {%- endif %}\n",
|
|
" {{- '\\n<tool_response>\\n' }}\n",
|
|
" {{- message.content }}\n",
|
|
" {{- '\\n</tool_response>' }}\n",
|
|
" {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n",
|
|
" {{- '<|im_end|>\\n' }}\n",
|
|
" {%- endif %}\n",
|
|
" {%- endif %}\n",
|
|
"{%- endfor %}\n",
|
|
"{%- if add_generation_prompt %}\n",
|
|
" {{- '<|im_start|>assistant\\n' }}\n",
|
|
"{%- endif %}\n",
|
|
"\n",
|
|
"Using chat eos_token: <|endoftext|>\n",
|
|
"Using chat bos_token: <|endoftext|>\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# grammar = LlamaGrammar.from_string(bnf)\n",
|
|
"llm_model = Llama(str(settings.models_path / \"qwen2.5-coder-1.5b-q8_0.gguf\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "aa66f897",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Llama.generate: 9 prefix-match hit, remaining 1 prompt tokens to eval\n",
|
|
"llama_perf_context_print: load time = 1762.25 ms\n",
|
|
"llama_perf_context_print: prompt eval time = 0.00 ms / 1 tokens ( 0.00 ms per token, inf tokens per second)\n",
|
|
"llama_perf_context_print: eval time = 64123.14 ms / 502 runs ( 127.74 ms per token, 7.83 tokens per second)\n",
|
|
"llama_perf_context_print: total time = 133698.82 ms / 503 tokens\n",
|
|
"llama_perf_context_print: graphs reused = 486\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"response = llm_model(\n",
|
|
" \"Create a simple hello world function in AVAP language\",\n",
|
|
" grammar=grammar, max_tokens=-1, temperature=0\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "317b96ae",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"{'choices': [{'finish_reason': 'length',\n",
|
|
" 'index': 0,\n",
|
|
" 'logprobs': None,\n",
|
|
" 'text': '\\n'\n",
|
|
" 'Sure.HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltintextoutputfunctionalityinAVAPlanguage(assumingyouhaveAVAPinstalledandconfiguredonyoursystem)\\n'\n",
|
|
" '\\n'\n",
|
|
" 'HereisaprogramthatprintsouthestringhelloworldinAVAPlanguageusingthebuiltint'}],\n",
|
|
" 'created': 1773656986,\n",
|
|
" 'id': 'cmpl-3c382cd0-7254-4bbc-8e71-84f97f06006a',\n",
|
|
" 'model': '/home/acano/PycharmProjects/assistance-engine/data/models/qwen2.5-coder-1.5b-q8_0.gguf',\n",
|
|
" 'object': 'text_completion',\n",
|
|
" 'usage': {'completion_tokens': 502, 'prompt_tokens': 10, 'total_tokens': 512}}\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"pprint.pprint(response)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b6aa106d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "assistance-engine",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.13"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|