diff --git a/scratches/acano/generate_synthethic_data.ipynb b/scratches/acano/generate_synthethic_data.ipynb index 2dfc40f..721fd09 100644 --- a/scratches/acano/generate_synthethic_data.ipynb +++ b/scratches/acano/generate_synthethic_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "b657efd2", "metadata": {}, "outputs": [], @@ -15,12 +15,51 @@ "from langchain_core.messages import SystemMessage, HumanMessage\n", "\n", "from src.utils.llm_factory import create_chat_model\n", - "from src.config import DATA_DIR" + "from src.config import RAW_DIR, INTERIM_DIR" + ] + }, + { + "cell_type": "markdown", + "id": "e6e90339", + "metadata": {}, + "source": [ + "### Create llm instance" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, + "id": "20eecc53", + "metadata": {}, + "outputs": [], + "source": [ + "config = Config(\n", + " region_name=\"us-east-1\",\n", + " connect_timeout=10, \n", + " read_timeout=600, \n", + ")\n", + "\n", + "client = boto3.client(\"bedrock-runtime\", config=config)\n", + "\n", + "llm = create_chat_model(\n", + " provider=\"bedrock\",\n", + " client=client,\n", + " model=\"global.anthropic.claude-sonnet-4-6\",\n", + " temperature=0,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "96f12a22", + "metadata": {}, + "source": [ + "### Load mbpp data " + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "78e29dc2", "metadata": {}, "outputs": [ @@ -53,39 +92,35 @@ } ], "source": [ - "with open(DATA_DIR / \"avap.txt\", \"r\") as f:\n", - " avap_docs = f.read()\n", "dataset_full = load_dataset(\"mbpp\")\n", "dataset_full" ] }, { - "cell_type": "code", - "execution_count": 3, - "id": "f81203fc", + "cell_type": "markdown", + "id": "3e7544bb", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'task_id': 11,\n", - " 'text': 'Write a python function to remove first and last occurrence of a given character from the string.',\n", - " 'code': 'def remove_Occ(s,ch): \\r\\n for i in range(len(s)): \\r\\n if (s[i] == ch): \\r\\n s = s[0 : i] + s[i + 1:] \\r\\n break\\r\\n for i in range(len(s) - 1,-1,-1): \\r\\n if (s[i] == ch): \\r\\n s = s[0 : i] + s[i + 1:] \\r\\n break\\r\\n return s ',\n", - " 'test_list': ['assert remove_Occ(\"hello\",\"l\") == \"heo\"',\n", - " 'assert remove_Occ(\"abcda\",\"a\") == \"bcd\"',\n", - " 'assert remove_Occ(\"PHP\",\"P\") == \"H\"'],\n", - " 'test_setup_code': '',\n", - " 'challenge_test_list': ['assert remove_Occ(\"hellolloll\",\"l\") == \"helollol\"',\n", - " 'assert remove_Occ(\"\",\"l\") == \"\"']}" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "dataset_full[\"test\"][0]" + "### Load AVAP data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e88b2d3", + "metadata": {}, + "outputs": [], + "source": [ + "with open(RAW_DIR / \"avap.txt\", \"r\") as f:\n", + " avap_docs = f.read()" + ] + }, + { + "cell_type": "markdown", + "id": "c039d79f", + "metadata": {}, + "source": [ + "### Random sample mbpp" ] }, { @@ -109,36 +144,21 @@ ] }, { - "cell_type": "code", - "execution_count": 5, - "id": "446e109e", + "cell_type": "markdown", + "id": "21a0422d", "metadata": {}, - "outputs": [], "source": [ - "config = Config(\n", - " region_name=\"us-east-1\",\n", - " connect_timeout=10, # seconds\n", - " read_timeout=600, \n", - ")\n", - "\n", - "client = boto3.client(\"bedrock-runtime\", config=config)\n", - "\n", - "llm = create_chat_model(\n", - " provider=\"bedrock\",\n", - " client=client,\n", - " model=\"global.anthropic.claude-sonnet-4-6\",\n", - " temperature=0,\n", - ")" + "### Prompts" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "2c075e29", "metadata": {}, "outputs": [], "source": [ - "CONTEXT_PROMPT = SystemMessage(\n", + "PROMPT_MBPP = SystemMessage(\n", " content=f\"\"\"\n", " You are an AI agent responsible for generating a synthetic dataset for a programming language called AVAP (Advanced Virtual API Programming).\n", "\n", @@ -169,223 +189,38 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, + "id": "cbb459f8", + "metadata": {}, + "outputs": [], + "source": [ + "PROMPT_NO_MBPP = HumanMessage(\n", + " content=\"\"\"\n", + " I gave you all the context for a new programming language called AVAP (Advanced Virtual API Programming). \n", + " With that context, I'd like you to generate a \"golden dataset\" of 100 question and answer where the user query is asking code snippets and the response is the full code. \n", + " I want the questions and answers to cover the entire context provided, and I want there to be questions and answers at all possible levels, from questions with simple code snippets to more complex questions with more complex code. \n", + " \n", + " You must return a .json file with user_query and response for each question and answer\n", + " \"\"\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ea1e824e", + "metadata": {}, + "source": [ + "### Generate dataset using mbpp dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "8b7dcf2f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "```json\n", - "[\n", - " {\n", - " \"text\": \"Write a function in AVAP to remove the matching tuples from the given two tuples (lists).\",\n", - " \"code\": \"function remove_matching_tuple(test_list1, test_list2){\\n result = []\\n getListLen(test_list1, len1)\\n getListLen(test_list2, len2)\\n startLoop(i, 0, len1)\\n itemFromList(test_list1, i, current)\\n found = False\\n startLoop(j, 0, len2)\\n itemFromList(test_list2, j, item2)\\n if(current, item2, \\\"=\\\")\\n found = True\\n end()\\n endLoop()\\n if(found, False, \\\"=\\\")\\n variableToList(current, result)\\n end()\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the number of lists present in the given tuple (list).\",\n", - " \"code\": \"function find_lists(Input){\\n getListLen(Input, length)\\n return(length)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the first natural number whose factorial is divisible by x.\",\n", - " \"code\": \"function first_Factorial_Divisible_Number(x){\\n i = 1\\n fact = 1\\n startLoop(i, 1, x)\\n fact = fact * i\\n mod = fact % x\\n if(mod, 0, \\\"=\\\")\\n result = i\\n i = x\\n end()\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the largest number that can be formed with the given digits.\",\n", - " \"code\": \"function find_Max_Num(arr, n){\\n // Sort array in descending order using bubble sort\\n startLoop(i, 0, n)\\n startLoop(j, 0, n)\\n itemFromList(arr, j, valJ)\\n next = j + 1\\n itemFromList(arr, next, valNext)\\n if(valJ, valNext, \\\"<\\\")\\n arr[j] = valNext\\n arr[next] = valJ\\n end()\\n endLoop()\\n endLoop()\\n itemFromList(arr, 0, num)\\n startLoop(i, 1, n)\\n num = num * 10\\n itemFromList(arr, i, digit)\\n num = num + digit\\n endLoop()\\n return(num)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to check if a triangle is equilateral or not.\",\n", - " \"code\": \"function check_equilateral(x, y, z){\\n if(x, y, \\\"=\\\")\\n if(y, z, \\\"=\\\")\\n return(True)\\n end()\\n end()\\n return(False)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to sort a given list of tuples based on the occurrence of the first element.\",\n", - " \"code\": \"function sort_on_occurence(lst){\\n // Build a count dictionary for first elements\\n counts = {}\\n getListLen(lst, length)\\n startLoop(i, 0, length)\\n itemFromList(lst, i, pair)\\n itemFromList(pair, 0, key)\\n variableFromJSON(counts, key, current_count)\\n if(current_count, None, \\\"=\\\")\\n AddvariableToJSON(key, 1, counts)\\n else()\\n new_count = current_count + 1\\n AddvariableToJSON(key, new_count, counts)\\n end()\\n endLoop()\\n addResult(counts)\\n return(counts)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to check if a given number is one less than twice its reverse.\",\n", - " \"code\": \"function rev(num){\\n rev_num = 0\\n startLoop(i, 1, num)\\n if(num, 0, \\\">\\\")\\n mod = num % 10\\n rev_num = rev_num * 10 + mod\\n num = num // 10\\n end()\\n endLoop()\\n return(rev_num)\\n}\\n\\nfunction check(n){\\n reversed_n = rev(n)\\n double_rev = 2 * reversed_n\\n n_plus_1 = n + 1\\n if(double_rev, n_plus_1, \\\"=\\\")\\n return(True)\\n end()\\n return(False)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to convert a list of multiple integers into a single integer.\",\n", - " \"code\": \"function multiple_to_single(L){\\n getListLen(L, length)\\n result = \\\"\\\"\\n startLoop(i, 0, length)\\n itemFromList(L, i, digit)\\n result = \\\"%s%s\\\" % (result, digit)\\n endLoop()\\n final = int(result)\\n return(final)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP that matches a word at the end of a string, with optional punctuation.\",\n", - " \"code\": \"function text_match_word(text){\\n pattern = \\\"\\\\\\\\w+\\\\\\\\S*$\\\"\\n getRegex(text, pattern, match_result)\\n if(match_result, None, \\\"!=\\\")\\n return(\\\"Found a match!\\\")\\n end()\\n return(\\\"Not matched!\\\")\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the sum of numbers in a list between the indices of a specified range.\",\n", - " \"code\": \"function sum_range_list(list1, m, n){\\n sum_range = 0\\n startLoop(i, m, n)\\n itemFromList(list1, i, val)\\n sum_range = sum_range + val\\n endLoop()\\n return(sum_range)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the most significant bit number which is also a set bit.\",\n", - " \"code\": \"function set_Bit_Number(n){\\n if(n, 0, \\\"=\\\")\\n return(0)\\n end()\\n msb = 0\\n n = n // 2\\n startLoop(i, 1, n)\\n if(n, 0, \\\">\\\")\\n n = n // 2\\n msb = msb + 1\\n end()\\n endLoop()\\n result = 1 << msb\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP for computing square roots using the Babylonian method.\",\n", - " \"code\": \"function babylonian_squareroot(number){\\n if(number, 0, \\\"=\\\")\\n return(0)\\n end()\\n g = number / 2.0\\n g2 = g + 1\\n startLoop(i, 1, 10000)\\n if(g, g2, \\\"!=\\\")\\n n = number / g\\n g2 = g\\n g = (g + n) / 2\\n end()\\n endLoop()\\n return(g)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to check if all values are the same in a dictionary (JSON object).\",\n", - " \"code\": \"function check_value(dict, n){\\n keys = []\\n getQueryParamList(dict, keys)\\n getListLen(keys, length)\\n result = True\\n startLoop(i, 0, length)\\n itemFromList(keys, i, key)\\n variableFromJSON(dict, key, val)\\n if(val, n, \\\"!=\\\")\\n result = False\\n end()\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to convert a string to a list of words.\",\n", - " \"code\": \"function string_to_list(string){\\n result = []\\n word = \\\"\\\"\\n getListLen(string, length)\\n startLoop(i, 0, length)\\n char = string[i]\\n if(char, \\\" \\\", \\\"=\\\")\\n variableToList(word, result)\\n word = \\\"\\\"\\n else()\\n word = \\\"%s%s\\\" % (word, char)\\n end()\\n endLoop()\\n if(word, \\\"\\\", \\\"!=\\\")\\n variableToList(word, result)\\n end()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to shift the last element to the first position in a given list.\",\n", - " \"code\": \"function move_first(test_list){\\n getListLen(test_list, length)\\n last_index = length - 1\\n itemFromList(test_list, last_index, last_elem)\\n new_list = []\\n variableToList(last_elem, new_list)\\n startLoop(i, 0, last_index)\\n itemFromList(test_list, i, elem)\\n variableToList(elem, new_list)\\n endLoop()\\n return(new_list)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the sum of common divisors of two given numbers.\",\n", - " \"code\": \"function sum_common_divisors(a, b){\\n total = 0\\n if(a, b, \\\"<\\\")\\n limit = a\\n else()\\n limit = b\\n end()\\n startLoop(i, 1, limit)\\n mod_a = a % i\\n mod_b = b % i\\n if(mod_a, 0, \\\"=\\\")\\n if(mod_b, 0, \\\"=\\\")\\n total = total + i\\n end()\\n end()\\n endLoop()\\n return(total)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the number of ways of painting a fence with n posts and k colors such that at most 2 adjacent posts have the same color.\",\n", - " \"code\": \"function count_no_of_ways(n, k){\\n mod = 1000000007\\n dp = []\\n startLoop(i, 0, n)\\n variableToList(0, dp)\\n endLoop()\\n dp[1] = k\\n dp[2] = k * k\\n startLoop(i, 3, n)\\n prev1 = dp[i - 1]\\n prev2 = dp[i - 2]\\n dp[i] = ((k - 1) * (prev1 + prev2)) % mod\\n endLoop()\\n itemFromList(dp, n, result)\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to repeat a given tuple (list) n times.\",\n", - " \"code\": \"function repeat_tuples(test_tup, N){\\n result = []\\n startLoop(i, 1, N)\\n variableToList(test_tup, result)\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to remove all tuples with length k from a list.\",\n", - " \"code\": \"function remove_tuples(test_list, K){\\n result = []\\n getListLen(test_list, length)\\n startLoop(i, 0, length)\\n itemFromList(test_list, i, elem)\\n getListLen(elem, elem_len)\\n if(elem_len, K, \\\"!=\\\")\\n variableToList(elem, result)\\n end()\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to count the number of substrings with the same first and last characters.\",\n", - " \"code\": \"function count_Substring_With_Equal_Ends(s){\\n result = 0\\n n = len(s)\\n startLoop(i, 0, n)\\n startLoop(j, 1, n)\\n end_idx = i + j\\n if(end_idx, n, \\\"<=\\\")\\n sub = s[i:end_idx]\\n first_char = sub[0]\\n sub_len = len(sub)\\n last_idx = sub_len - 1\\n last_char = sub[last_idx]\\n if(first_char, last_char, \\\"=\\\")\\n result = result + 1\\n end()\\n end()\\n endLoop()\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to sort a list of tuples by the second element using a comparator.\",\n", - " \"code\": \"function subject_marks(subjectmarks){\\n // Bubble sort by second element of each tuple\\n getListLen(subjectmarks, length)\\n startLoop(i, 0, length)\\n startLoop(j, 0, length)\\n next_j = j + 1\\n if(next_j, length, \\\"<\\\")\\n itemFromList(subjectmarks, j, tupleA)\\n itemFromList(subjectmarks, next_j, tupleB)\\n itemFromList(tupleA, 1, valA)\\n itemFromList(tupleB, 1, valB)\\n if(valA, valB, \\\">\\\")\\n subjectmarks[j] = tupleB\\n subjectmarks[next_j] = tupleA\\n end()\\n end()\\n endLoop()\\n endLoop()\\n return(subjectmarks)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to check whether all the bits are unset in the given range or not.\",\n", - " \"code\": \"function all_Bits_Set_In_The_Given_Range(n, l, r){\\n shift_r = 1 << r\\n shift_l = 1 << (l - 1)\\n num = (shift_r - 1) ^ (shift_l - 1)\\n new_num = n & num\\n if(new_num, 0, \\\"=\\\")\\n return(True)\\n end()\\n return(False)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the surface area of a cone.\",\n", - " \"code\": \"function surfacearea_cone(r, h){\\n pi = 3.141592653589793\\n l = (r * r + h * h) ** 0.5\\n SA = pi * r * (r + l)\\n return(SA)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the smallest number in a list.\",\n", - " \"code\": \"function smallest_num(xs){\\n getListLen(xs, length)\\n itemFromList(xs, 0, min_val)\\n startLoop(i, 1, length)\\n itemFromList(xs, i, current)\\n if(current, min_val, \\\"<\\\")\\n min_val = current\\n end()\\n endLoop()\\n return(min_val)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the first element occurring k times in a given array.\",\n", - " \"code\": \"function first_Element(arr, n, k){\\n counts = {}\\n startLoop(i, 0, n)\\n itemFromList(arr, i, elem)\\n variableFromJSON(counts, elem, cnt)\\n if(cnt, None, \\\"=\\\")\\n AddvariableToJSON(elem, 1, counts)\\n else()\\n new_cnt = cnt + 1\\n AddvariableToJSON(elem, new_cnt, counts)\\n end()\\n endLoop()\\n startLoop(i, 0, n)\\n itemFromList(arr, i, elem)\\n variableFromJSON(counts, elem, cnt)\\n if(cnt, k, \\\"=\\\")\\n return(elem)\\n end()\\n endLoop()\\n return(-1)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to count hexadecimal numbers for a given range.\",\n", - " \"code\": \"function count_Hexadecimal(L, R){\\n count = 0\\n startLoop(i, L, R)\\n if(i, 10, \\\">=\\\")\\n if(i, 15, \\\"<=\\\")\\n count = count + 1\\n end()\\n end()\\n if(i, 15, \\\">\\\")\\n k = i\\n startLoop(iter, 1, i)\\n if(k, 0, \\\"!=\\\")\\n mod = k % 16\\n if(mod, 10, \\\">=\\\")\\n count = count + 1\\n end()\\n k = k // 16\\n end()\\n endLoop()\\n end()\\n endLoop()\\n return(count)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to check whether a hexadecimal number (as string) is even or odd.\",\n", - " \"code\": \"function even_or_odd(N){\\n l = len(N)\\n last_idx = l - 1\\n last_char = N[last_idx]\\n even_chars = [\\\"0\\\", \\\"2\\\", \\\"4\\\", \\\"6\\\", \\\"8\\\", \\\"A\\\", \\\"C\\\", \\\"E\\\"]\\n if(last_char, even_chars, \\\"in\\\")\\n return(\\\"Even\\\")\\n end()\\n return(\\\"Odd\\\")\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the sublist having minimum length.\",\n", - " \"code\": \"function Find_Min(lst){\\n getListLen(lst, length)\\n itemFromList(lst, 0, min_list)\\n getListLen(min_list, min_len)\\n startLoop(i, 1, length)\\n itemFromList(lst, i, current_list)\\n getListLen(current_list, current_len)\\n if(current_len, min_len, \\\"<\\\")\\n min_list = current_list\\n min_len = current_len\\n end()\\n endLoop()\\n return(min_list)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to check a given decimal with a precision of 2 using regex.\",\n", - " \"code\": \"function is_decimal(num){\\n pattern = \\\"^[0-9]+(\\\\\\\\.[0-9]{1,2})?$\\\"\\n getRegex(num, pattern, result)\\n if(result, None, \\\"!=\\\")\\n return(True)\\n end()\\n return(False)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to count the number of prime numbers less than a given non-negative number.\",\n", - " \"code\": \"function count_Primes_nums(n){\\n ctr = 0\\n startLoop(num, 2, n)\\n is_prime = True\\n startLoop(i, 2, num)\\n if(i, num, \\\"<\\\")\\n mod = num % i\\n if(mod, 0, \\\"=\\\")\\n is_prime = False\\n end()\\n end()\\n endLoop()\\n if(is_prime, True, \\\"=\\\")\\n ctr = ctr + 1\\n end()\\n endLoop()\\n return(ctr)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to count the maximum number of equilateral triangles that can be formed within a given equilateral triangle.\",\n", - " \"code\": \"function No_of_Triangle(N, K){\\n if(N, K, \\\"<\\\")\\n return(-1)\\n end()\\n diff = N - K + 1\\n Tri_up = (diff * (diff + 1)) // 2\\n diff2 = N - 2 * K + 1\\n if(diff2, 0, \\\">\\\")\\n Tri_down = (diff2 * (diff2 + 1)) // 2\\n else()\\n Tri_down = 0\\n end()\\n result = Tri_up + Tri_down\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to remove the first and last occurrence of a given character from a string.\",\n", - " \"code\": \"function remove_Occ(s, ch){\\n n = len(s)\\n // Remove first occurrence\\n first_found = False\\n new_s = \\\"\\\"\\n startLoop(i, 0, n)\\n char = s[i]\\n if(char, ch, \\\"=\\\")\\n if(first_found, False, \\\"=\\\")\\n first_found = True\\n else()\\n new_s = \\\"%s%s\\\" % (new_s, char)\\n end()\\n else()\\n new_s = \\\"%s%s\\\" % (new_s, char)\\n end()\\n endLoop()\\n // Remove last occurrence\\n n2 = len(new_s)\\n last_idx = -1\\n startLoop(i, 0, n2)\\n char = new_s[i]\\n if(char, ch, \\\"=\\\")\\n last_idx = i\\n end()\\n endLoop()\\n if(last_idx, -1, \\\"!=\\\")\\n result = \\\"%s%s\\\" % (new_s[0:last_idx], new_s[last_idx + 1:n2])\\n else()\\n result = new_s\\n end()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the nth Newman-Shanks-Williams prime number.\",\n", - " \"code\": \"function newman_prime(n){\\n if(n, 0, \\\"=\\\")\\n return(1)\\n end()\\n if(n, 1, \\\"=\\\")\\n return(1)\\n end()\\n a = 1\\n b = 1\\n startLoop(i, 2, n)\\n c = 2 * b + a\\n a = b\\n b = c\\n endLoop()\\n return(b)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to perform index-wise addition of tuple elements in two nested tuples (lists of lists).\",\n", - " \"code\": \"function add_nested_tuples(test_tup1, test_tup2){\\n result = []\\n getListLen(test_tup1, outer_len)\\n startLoop(i, 0, outer_len)\\n itemFromList(test_tup1, i, tup1)\\n itemFromList(test_tup2, i, tup2)\\n inner_result = []\\n getListLen(tup1, inner_len)\\n startLoop(j, 0, inner_len)\\n itemFromList(tup1, j, a)\\n itemFromList(tup2, j, b)\\n sum_val = a + b\\n variableToList(sum_val, inner_result)\\n endLoop()\\n variableToList(inner_result, result)\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to remove even-indexed characters from a string.\",\n", - " \"code\": \"function remove_even(str1){\\n str2 = \\\"\\\"\\n n = len(str1)\\n startLoop(i, 1, n)\\n mod = i % 2\\n if(mod, 0, \\\"!=\\\")\\n char = str1[i - 1]\\n str2 = \\\"%s%s\\\" % (str2, char)\\n end()\\n endLoop()\\n return(str2)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to sort a counter (dictionary) by value in descending order.\",\n", - " \"code\": \"function sort_counter(dict1){\\n // Build list of (key, value) pairs sorted by value descending\\n keys = []\\n getQueryParamList(dict1, keys)\\n getListLen(keys, length)\\n pairs = []\\n startLoop(i, 0, length)\\n itemFromList(keys, i, key)\\n variableFromJSON(dict1, key, val)\\n pair = [key, val]\\n variableToList(pair, pairs)\\n endLoop()\\n // Bubble sort descending by value\\n startLoop(i, 0, length)\\n startLoop(j, 0, length)\\n next_j = j + 1\\n if(next_j, length, \\\"<\\\")\\n itemFromList(pairs, j, pA)\\n itemFromList(pairs, next_j, pB)\\n itemFromList(pA, 1, vA)\\n itemFromList(pB, 1, vB)\\n if(vA, vB, \\\"<\\\")\\n pairs[j] = pB\\n pairs[next_j] = pA\\n end()\\n end()\\n endLoop()\\n endLoop()\\n return(pairs)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to check if a substring is present in a given list of string values.\",\n", - " \"code\": \"function find_substring(str1, sub_str){\\n getListLen(str1, length)\\n startLoop(i, 0, length)\\n itemFromList(str1, i, s)\\n getRegex(s, sub_str, match)\\n if(match, None, \\\"!=\\\")\\n return(True)\\n end()\\n endLoop()\\n return(False)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to count the occurrences of an element in a tuple (list).\",\n", - " \"code\": \"function count_X(tup, x){\\n count = 0\\n getListLen(tup, length)\\n startLoop(i, 0, length)\\n itemFromList(tup, i, elem)\\n if(elem, x, \\\"=\\\")\\n count = count + 1\\n end()\\n endLoop()\\n return(count)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to count positive numbers in a list.\",\n", - " \"code\": \"function pos_count(list){\\n count = 0\\n getListLen(list, length)\\n startLoop(i, 0, length)\\n itemFromList(list, i, num)\\n if(num, 0, \\\">=\\\")\\n count = count + 1\\n end()\\n endLoop()\\n return(count)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to perform index-wise multiplication of tuple elements in two given tuples (lists of lists).\",\n", - " \"code\": \"function index_multiplication(test_tup1, test_tup2){\\n result = []\\n getListLen(test_tup1, outer_len)\\n startLoop(i, 0, outer_len)\\n itemFromList(test_tup1, i, tup1)\\n itemFromList(test_tup2, i, tup2)\\n inner_result = []\\n getListLen(tup1, inner_len)\\n startLoop(j, 0, inner_len)\\n itemFromList(tup1, j, a)\\n itemFromList(tup2, j, b)\\n prod = a * b\\n variableToList(prod, inner_result)\\n endLoop()\\n variableToList(inner_result, result)\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the maximum occurring character in a given string.\",\n", - " \"code\": \"function get_max_occuring_char(str1){\\n counts = {}\\n n = len(str1)\\n startLoop(i, 0, n)\\n char = str1[i]\\n variableFromJSON(counts, char, cnt)\\n if(cnt, None, \\\"=\\\")\\n AddvariableToJSON(char, 1, counts)\\n else()\\n new_cnt = cnt + 1\\n AddvariableToJSON(char, new_cnt, counts)\\n end()\\n endLoop()\\n max_count = 0\\n max_char = \\\"\\\"\\n startLoop(i, 0, n)\\n char = str1[i]\\n variableFromJSON(counts, char, cnt)\\n if(cnt, max_count, \\\">\\\")\\n max_count = cnt\\n max_char = char\\n end()\\n endLoop()\\n return(max_char)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the n-th Lucas number.\",\n", - " \"code\": \"function find_lucas(n){\\n if(n, 0, \\\"=\\\")\\n return(2)\\n end()\\n if(n, 1, \\\"=\\\")\\n return(1)\\n end()\\n a = 2\\n b = 1\\n startLoop(i, 2, n)\\n c = a + b\\n a = b\\n b = c\\n endLoop()\\n return(b)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the maximum sum of increasing subsequence from prefix till ith index also including a given kth element which is after i.\",\n", - " \"code\": \"function max_sum_increasing_subseq(a, n, index, k){\\n // Initialize dp as n x n matrix of zeros\\n dp = []\\n startLoop(i, 0, n)\\n row = []\\n startLoop(j, 0, n)\\n variableToList(0, row)\\n endLoop()\\n variableToList(row, dp)\\n endLoop()\\n // Fill first row\\n itemFromList(a, 0, a0)\\n startLoop(i, 0, n)\\n itemFromList(a, i, ai)\\n if(ai, a0, \\\">\\\")\\n dp[0][i] = ai + a0\\n else()\\n dp[0][i] = ai\\n end()\\n endLoop()\\n // Fill rest\\n startLoop(i, 1, n)\\n startLoop(j, 0, n)\\n itemFromList(a, j, aj)\\n itemFromList(a, i, ai)\\n if(aj, ai, \\\">\\\")\\n if(j, i, \\\">\\\")\\n prev_ii = dp[i - 1][i]\\n prev_ij = dp[i - 1][j]\\n candidate = prev_ii + aj\\n if(candidate, prev_ij, \\\">\\\")\\n dp[i][j] = candidate\\n else()\\n dp[i][j] = prev_ij\\n end()\\n else()\\n dp[i][j] = dp[i - 1][j]\\n end()\\n else()\\n dp[i][j] = dp[i - 1][j]\\n end()\\n endLoop()\\n endLoop()\\n result = dp[index][k]\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find uppercase, lowercase, special characters, and numeric values using regex.\",\n", - " \"code\": \"function find_character(string){\\n upper_pattern = \\\"[A-Z]\\\"\\n lower_pattern = \\\"[a-z]\\\"\\n num_pattern = \\\"[0-9]\\\"\\n special_pattern = \\\"[, .!?]\\\"\\n getRegex(string, upper_pattern, uppercase_characters)\\n getRegex(string, lower_pattern, lowercase_characters)\\n getRegex(string, num_pattern, numerical_characters)\\n getRegex(string, special_pattern, special_characters)\\n result = {}\\n AddvariableToJSON(\\\"uppercase\\\", uppercase_characters, result)\\n AddvariableToJSON(\\\"lowercase\\\", lowercase_characters, result)\\n AddvariableToJSON(\\\"numerical\\\", numerical_characters, result)\\n AddvariableToJSON(\\\"special\\\", special_characters, result)\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find the top k integers that occur most frequently from given lists of sorted and distinct integers.\",\n", - " \"code\": \"function func(nums, k){\\n counts = {}\\n getListLen(nums, outer_len)\\n startLoop(i, 0, outer_len)\\n itemFromList(nums, i, row)\\n getListLen(row, row_len)\\n startLoop(j, 0, row_len)\\n itemFromList(row, j, elem)\\n variableFromJSON(counts, elem, cnt)\\n if(cnt, None, \\\"=\\\")\\n AddvariableToJSON(elem, 1, counts)\\n else()\\n new_cnt = cnt + 1\\n AddvariableToJSON(elem, new_cnt, counts)\\n end()\\n endLoop()\\n endLoop()\\n // Return top k keys by count (simplified: return counts object)\\n addResult(counts)\\n return(counts)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to convert a given string to lower case.\",\n", - " \"code\": \"function is_lower(string){\\n result = string.lower()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find k number of pairs consisting of one element from the first array and one element from the second array with the smallest sums.\",\n", - " \"code\": \"function k_smallest_pairs(nums1, nums2, k){\\n pairs = []\\n getListLen(nums1, len1)\\n getListLen(nums2, len2)\\n startLoop(i, 0, len1)\\n startLoop(j, 0, len2)\\n itemFromList(nums1, i, v1)\\n itemFromList(nums2, j, v2)\\n pair = [v1, v2]\\n variableToList(pair, pairs)\\n endLoop()\\n endLoop()\\n // Sort pairs by sum (bubble sort)\\n getListLen(pairs, total_pairs)\\n startLoop(i, 0, total_pairs)\\n startLoop(j, 0, total_pairs)\\n next_j = j + 1\\n if(next_j, total_pairs, \\\"<\\\")\\n itemFromList(pairs, j, pA)\\n itemFromList(pairs, next_j, pB)\\n itemFromList(pA, 0, a0)\\n itemFromList(pA, 1, a1)\\n itemFromList(pB, 0, b0)\\n itemFromList(pB, 1, b1)\\n sumA = a0 + a1\\n sumB = b0 + b1\\n if(sumA, sumB, \\\">\\\")\\n pairs[j] = pB\\n pairs[next_j] = pA\\n end()\\n end()\\n endLoop()\\n endLoop()\\n result = pairs[0:k]\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to remove specific words from a given list.\",\n", - " \"code\": \"function remove_words(list1, removewords){\\n result = []\\n getListLen(list1, length)\\n startLoop(i, 0, length)\\n itemFromList(list1, i, word)\\n if(word, removewords, \\\"in\\\")\\n // skip this word\\n else()\\n variableToList(word, result)\\n end()\\n endLoop()\\n return(result)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to round a given number to the nearest multiple of a specific number.\",\n", - " \"code\": \"function round_num(n, m){\\n a = (n // m) * m\\n b = a + m\\n diff_a = n - a\\n diff_b = b - n\\n if(diff_a, diff_b, \\\">\\\")\\n return(b)\\n end()\\n return(a)\\n}\"\n", - " },\n", - " {\n", - " \"text\": \"Write a function in AVAP to find two distinct numbers such that their LCM lies within the given range.\",\n", - " \"code\": \"function answer(L, R){\\n double_L = 2 * L\\n if(double_L, R, \\\"<=\\\")\\n result = [L, double_L]\\n return(result)\\n end()\\n return(-1)\\n}\"\n", - " }\n", - "]\n", - "```\n" - ] - } - ], + "outputs": [], "source": [ - "llm_response = llm.invoke([CONTEXT_PROMPT, HumanMessage(content=str(test_samples_dict))])\n", + "llm_response = llm.invoke([PROMPT_MBPP, HumanMessage(content=str(test_samples_dict))])\n", "print(llm_response.content)" ] }, @@ -402,154 +237,54 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "eaa21047", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'text': 'Write a function in AVAP to remove the matching tuples from the given two tuples (lists).',\n", - " 'code': 'function remove_matching_tuple(test_list1, test_list2){\\n result = []\\n getListLen(test_list1, len1)\\n getListLen(test_list2, len2)\\n startLoop(i, 0, len1)\\n itemFromList(test_list1, i, current)\\n found = False\\n startLoop(j, 0, len2)\\n itemFromList(test_list2, j, item2)\\n if(current, item2, \"=\")\\n found = True\\n end()\\n endLoop()\\n if(found, False, \"=\")\\n variableToList(current, result)\\n end()\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the number of lists present in the given tuple (list).',\n", - " 'code': 'function find_lists(Input){\\n getListLen(Input, length)\\n return(length)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the first natural number whose factorial is divisible by x.',\n", - " 'code': 'function first_Factorial_Divisible_Number(x){\\n i = 1\\n fact = 1\\n startLoop(i, 1, x)\\n fact = fact * i\\n mod = fact % x\\n if(mod, 0, \"=\")\\n result = i\\n i = x\\n end()\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the largest number that can be formed with the given digits.',\n", - " 'code': 'function find_Max_Num(arr, n){\\n // Sort array in descending order using bubble sort\\n startLoop(i, 0, n)\\n startLoop(j, 0, n)\\n itemFromList(arr, j, valJ)\\n next = j + 1\\n itemFromList(arr, next, valNext)\\n if(valJ, valNext, \"<\")\\n arr[j] = valNext\\n arr[next] = valJ\\n end()\\n endLoop()\\n endLoop()\\n itemFromList(arr, 0, num)\\n startLoop(i, 1, n)\\n num = num * 10\\n itemFromList(arr, i, digit)\\n num = num + digit\\n endLoop()\\n return(num)\\n}'},\n", - " {'text': 'Write a function in AVAP to check if a triangle is equilateral or not.',\n", - " 'code': 'function check_equilateral(x, y, z){\\n if(x, y, \"=\")\\n if(y, z, \"=\")\\n return(True)\\n end()\\n end()\\n return(False)\\n}'},\n", - " {'text': 'Write a function in AVAP to sort a given list of tuples based on the occurrence of the first element.',\n", - " 'code': 'function sort_on_occurence(lst){\\n // Build a count dictionary for first elements\\n counts = {}\\n getListLen(lst, length)\\n startLoop(i, 0, length)\\n itemFromList(lst, i, pair)\\n itemFromList(pair, 0, key)\\n variableFromJSON(counts, key, current_count)\\n if(current_count, None, \"=\")\\n AddvariableToJSON(key, 1, counts)\\n else()\\n new_count = current_count + 1\\n AddvariableToJSON(key, new_count, counts)\\n end()\\n endLoop()\\n addResult(counts)\\n return(counts)\\n}'},\n", - " {'text': 'Write a function in AVAP to check if a given number is one less than twice its reverse.',\n", - " 'code': 'function rev(num){\\n rev_num = 0\\n startLoop(i, 1, num)\\n if(num, 0, \">\")\\n mod = num % 10\\n rev_num = rev_num * 10 + mod\\n num = num // 10\\n end()\\n endLoop()\\n return(rev_num)\\n}\\n\\nfunction check(n){\\n reversed_n = rev(n)\\n double_rev = 2 * reversed_n\\n n_plus_1 = n + 1\\n if(double_rev, n_plus_1, \"=\")\\n return(True)\\n end()\\n return(False)\\n}'},\n", - " {'text': 'Write a function in AVAP to convert a list of multiple integers into a single integer.',\n", - " 'code': 'function multiple_to_single(L){\\n getListLen(L, length)\\n result = \"\"\\n startLoop(i, 0, length)\\n itemFromList(L, i, digit)\\n result = \"%s%s\" % (result, digit)\\n endLoop()\\n final = int(result)\\n return(final)\\n}'},\n", - " {'text': 'Write a function in AVAP that matches a word at the end of a string, with optional punctuation.',\n", - " 'code': 'function text_match_word(text){\\n pattern = \"\\\\\\\\w+\\\\\\\\S*$\"\\n getRegex(text, pattern, match_result)\\n if(match_result, None, \"!=\")\\n return(\"Found a match!\")\\n end()\\n return(\"Not matched!\")\\n}'},\n", - " {'text': 'Write a function in AVAP to find the sum of numbers in a list between the indices of a specified range.',\n", - " 'code': 'function sum_range_list(list1, m, n){\\n sum_range = 0\\n startLoop(i, m, n)\\n itemFromList(list1, i, val)\\n sum_range = sum_range + val\\n endLoop()\\n return(sum_range)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the most significant bit number which is also a set bit.',\n", - " 'code': 'function set_Bit_Number(n){\\n if(n, 0, \"=\")\\n return(0)\\n end()\\n msb = 0\\n n = n // 2\\n startLoop(i, 1, n)\\n if(n, 0, \">\")\\n n = n // 2\\n msb = msb + 1\\n end()\\n endLoop()\\n result = 1 << msb\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP for computing square roots using the Babylonian method.',\n", - " 'code': 'function babylonian_squareroot(number){\\n if(number, 0, \"=\")\\n return(0)\\n end()\\n g = number / 2.0\\n g2 = g + 1\\n startLoop(i, 1, 10000)\\n if(g, g2, \"!=\")\\n n = number / g\\n g2 = g\\n g = (g + n) / 2\\n end()\\n endLoop()\\n return(g)\\n}'},\n", - " {'text': 'Write a function in AVAP to check if all values are the same in a dictionary (JSON object).',\n", - " 'code': 'function check_value(dict, n){\\n keys = []\\n getQueryParamList(dict, keys)\\n getListLen(keys, length)\\n result = True\\n startLoop(i, 0, length)\\n itemFromList(keys, i, key)\\n variableFromJSON(dict, key, val)\\n if(val, n, \"!=\")\\n result = False\\n end()\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to convert a string to a list of words.',\n", - " 'code': 'function string_to_list(string){\\n result = []\\n word = \"\"\\n getListLen(string, length)\\n startLoop(i, 0, length)\\n char = string[i]\\n if(char, \" \", \"=\")\\n variableToList(word, result)\\n word = \"\"\\n else()\\n word = \"%s%s\" % (word, char)\\n end()\\n endLoop()\\n if(word, \"\", \"!=\")\\n variableToList(word, result)\\n end()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to shift the last element to the first position in a given list.',\n", - " 'code': 'function move_first(test_list){\\n getListLen(test_list, length)\\n last_index = length - 1\\n itemFromList(test_list, last_index, last_elem)\\n new_list = []\\n variableToList(last_elem, new_list)\\n startLoop(i, 0, last_index)\\n itemFromList(test_list, i, elem)\\n variableToList(elem, new_list)\\n endLoop()\\n return(new_list)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the sum of common divisors of two given numbers.',\n", - " 'code': 'function sum_common_divisors(a, b){\\n total = 0\\n if(a, b, \"<\")\\n limit = a\\n else()\\n limit = b\\n end()\\n startLoop(i, 1, limit)\\n mod_a = a % i\\n mod_b = b % i\\n if(mod_a, 0, \"=\")\\n if(mod_b, 0, \"=\")\\n total = total + i\\n end()\\n end()\\n endLoop()\\n return(total)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the number of ways of painting a fence with n posts and k colors such that at most 2 adjacent posts have the same color.',\n", - " 'code': 'function count_no_of_ways(n, k){\\n mod = 1000000007\\n dp = []\\n startLoop(i, 0, n)\\n variableToList(0, dp)\\n endLoop()\\n dp[1] = k\\n dp[2] = k * k\\n startLoop(i, 3, n)\\n prev1 = dp[i - 1]\\n prev2 = dp[i - 2]\\n dp[i] = ((k - 1) * (prev1 + prev2)) % mod\\n endLoop()\\n itemFromList(dp, n, result)\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to repeat a given tuple (list) n times.',\n", - " 'code': 'function repeat_tuples(test_tup, N){\\n result = []\\n startLoop(i, 1, N)\\n variableToList(test_tup, result)\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to remove all tuples with length k from a list.',\n", - " 'code': 'function remove_tuples(test_list, K){\\n result = []\\n getListLen(test_list, length)\\n startLoop(i, 0, length)\\n itemFromList(test_list, i, elem)\\n getListLen(elem, elem_len)\\n if(elem_len, K, \"!=\")\\n variableToList(elem, result)\\n end()\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to count the number of substrings with the same first and last characters.',\n", - " 'code': 'function count_Substring_With_Equal_Ends(s){\\n result = 0\\n n = len(s)\\n startLoop(i, 0, n)\\n startLoop(j, 1, n)\\n end_idx = i + j\\n if(end_idx, n, \"<=\")\\n sub = s[i:end_idx]\\n first_char = sub[0]\\n sub_len = len(sub)\\n last_idx = sub_len - 1\\n last_char = sub[last_idx]\\n if(first_char, last_char, \"=\")\\n result = result + 1\\n end()\\n end()\\n endLoop()\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to sort a list of tuples by the second element using a comparator.',\n", - " 'code': 'function subject_marks(subjectmarks){\\n // Bubble sort by second element of each tuple\\n getListLen(subjectmarks, length)\\n startLoop(i, 0, length)\\n startLoop(j, 0, length)\\n next_j = j + 1\\n if(next_j, length, \"<\")\\n itemFromList(subjectmarks, j, tupleA)\\n itemFromList(subjectmarks, next_j, tupleB)\\n itemFromList(tupleA, 1, valA)\\n itemFromList(tupleB, 1, valB)\\n if(valA, valB, \">\")\\n subjectmarks[j] = tupleB\\n subjectmarks[next_j] = tupleA\\n end()\\n end()\\n endLoop()\\n endLoop()\\n return(subjectmarks)\\n}'},\n", - " {'text': 'Write a function in AVAP to check whether all the bits are unset in the given range or not.',\n", - " 'code': 'function all_Bits_Set_In_The_Given_Range(n, l, r){\\n shift_r = 1 << r\\n shift_l = 1 << (l - 1)\\n num = (shift_r - 1) ^ (shift_l - 1)\\n new_num = n & num\\n if(new_num, 0, \"=\")\\n return(True)\\n end()\\n return(False)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the surface area of a cone.',\n", - " 'code': 'function surfacearea_cone(r, h){\\n pi = 3.141592653589793\\n l = (r * r + h * h) ** 0.5\\n SA = pi * r * (r + l)\\n return(SA)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the smallest number in a list.',\n", - " 'code': 'function smallest_num(xs){\\n getListLen(xs, length)\\n itemFromList(xs, 0, min_val)\\n startLoop(i, 1, length)\\n itemFromList(xs, i, current)\\n if(current, min_val, \"<\")\\n min_val = current\\n end()\\n endLoop()\\n return(min_val)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the first element occurring k times in a given array.',\n", - " 'code': 'function first_Element(arr, n, k){\\n counts = {}\\n startLoop(i, 0, n)\\n itemFromList(arr, i, elem)\\n variableFromJSON(counts, elem, cnt)\\n if(cnt, None, \"=\")\\n AddvariableToJSON(elem, 1, counts)\\n else()\\n new_cnt = cnt + 1\\n AddvariableToJSON(elem, new_cnt, counts)\\n end()\\n endLoop()\\n startLoop(i, 0, n)\\n itemFromList(arr, i, elem)\\n variableFromJSON(counts, elem, cnt)\\n if(cnt, k, \"=\")\\n return(elem)\\n end()\\n endLoop()\\n return(-1)\\n}'},\n", - " {'text': 'Write a function in AVAP to count hexadecimal numbers for a given range.',\n", - " 'code': 'function count_Hexadecimal(L, R){\\n count = 0\\n startLoop(i, L, R)\\n if(i, 10, \">=\")\\n if(i, 15, \"<=\")\\n count = count + 1\\n end()\\n end()\\n if(i, 15, \">\")\\n k = i\\n startLoop(iter, 1, i)\\n if(k, 0, \"!=\")\\n mod = k % 16\\n if(mod, 10, \">=\")\\n count = count + 1\\n end()\\n k = k // 16\\n end()\\n endLoop()\\n end()\\n endLoop()\\n return(count)\\n}'},\n", - " {'text': 'Write a function in AVAP to check whether a hexadecimal number (as string) is even or odd.',\n", - " 'code': 'function even_or_odd(N){\\n l = len(N)\\n last_idx = l - 1\\n last_char = N[last_idx]\\n even_chars = [\"0\", \"2\", \"4\", \"6\", \"8\", \"A\", \"C\", \"E\"]\\n if(last_char, even_chars, \"in\")\\n return(\"Even\")\\n end()\\n return(\"Odd\")\\n}'},\n", - " {'text': 'Write a function in AVAP to find the sublist having minimum length.',\n", - " 'code': 'function Find_Min(lst){\\n getListLen(lst, length)\\n itemFromList(lst, 0, min_list)\\n getListLen(min_list, min_len)\\n startLoop(i, 1, length)\\n itemFromList(lst, i, current_list)\\n getListLen(current_list, current_len)\\n if(current_len, min_len, \"<\")\\n min_list = current_list\\n min_len = current_len\\n end()\\n endLoop()\\n return(min_list)\\n}'},\n", - " {'text': 'Write a function in AVAP to check a given decimal with a precision of 2 using regex.',\n", - " 'code': 'function is_decimal(num){\\n pattern = \"^[0-9]+(\\\\\\\\.[0-9]{1,2})?$\"\\n getRegex(num, pattern, result)\\n if(result, None, \"!=\")\\n return(True)\\n end()\\n return(False)\\n}'},\n", - " {'text': 'Write a function in AVAP to count the number of prime numbers less than a given non-negative number.',\n", - " 'code': 'function count_Primes_nums(n){\\n ctr = 0\\n startLoop(num, 2, n)\\n is_prime = True\\n startLoop(i, 2, num)\\n if(i, num, \"<\")\\n mod = num % i\\n if(mod, 0, \"=\")\\n is_prime = False\\n end()\\n end()\\n endLoop()\\n if(is_prime, True, \"=\")\\n ctr = ctr + 1\\n end()\\n endLoop()\\n return(ctr)\\n}'},\n", - " {'text': 'Write a function in AVAP to count the maximum number of equilateral triangles that can be formed within a given equilateral triangle.',\n", - " 'code': 'function No_of_Triangle(N, K){\\n if(N, K, \"<\")\\n return(-1)\\n end()\\n diff = N - K + 1\\n Tri_up = (diff * (diff + 1)) // 2\\n diff2 = N - 2 * K + 1\\n if(diff2, 0, \">\")\\n Tri_down = (diff2 * (diff2 + 1)) // 2\\n else()\\n Tri_down = 0\\n end()\\n result = Tri_up + Tri_down\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to remove the first and last occurrence of a given character from a string.',\n", - " 'code': 'function remove_Occ(s, ch){\\n n = len(s)\\n // Remove first occurrence\\n first_found = False\\n new_s = \"\"\\n startLoop(i, 0, n)\\n char = s[i]\\n if(char, ch, \"=\")\\n if(first_found, False, \"=\")\\n first_found = True\\n else()\\n new_s = \"%s%s\" % (new_s, char)\\n end()\\n else()\\n new_s = \"%s%s\" % (new_s, char)\\n end()\\n endLoop()\\n // Remove last occurrence\\n n2 = len(new_s)\\n last_idx = -1\\n startLoop(i, 0, n2)\\n char = new_s[i]\\n if(char, ch, \"=\")\\n last_idx = i\\n end()\\n endLoop()\\n if(last_idx, -1, \"!=\")\\n result = \"%s%s\" % (new_s[0:last_idx], new_s[last_idx + 1:n2])\\n else()\\n result = new_s\\n end()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the nth Newman-Shanks-Williams prime number.',\n", - " 'code': 'function newman_prime(n){\\n if(n, 0, \"=\")\\n return(1)\\n end()\\n if(n, 1, \"=\")\\n return(1)\\n end()\\n a = 1\\n b = 1\\n startLoop(i, 2, n)\\n c = 2 * b + a\\n a = b\\n b = c\\n endLoop()\\n return(b)\\n}'},\n", - " {'text': 'Write a function in AVAP to perform index-wise addition of tuple elements in two nested tuples (lists of lists).',\n", - " 'code': 'function add_nested_tuples(test_tup1, test_tup2){\\n result = []\\n getListLen(test_tup1, outer_len)\\n startLoop(i, 0, outer_len)\\n itemFromList(test_tup1, i, tup1)\\n itemFromList(test_tup2, i, tup2)\\n inner_result = []\\n getListLen(tup1, inner_len)\\n startLoop(j, 0, inner_len)\\n itemFromList(tup1, j, a)\\n itemFromList(tup2, j, b)\\n sum_val = a + b\\n variableToList(sum_val, inner_result)\\n endLoop()\\n variableToList(inner_result, result)\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to remove even-indexed characters from a string.',\n", - " 'code': 'function remove_even(str1){\\n str2 = \"\"\\n n = len(str1)\\n startLoop(i, 1, n)\\n mod = i % 2\\n if(mod, 0, \"!=\")\\n char = str1[i - 1]\\n str2 = \"%s%s\" % (str2, char)\\n end()\\n endLoop()\\n return(str2)\\n}'},\n", - " {'text': 'Write a function in AVAP to sort a counter (dictionary) by value in descending order.',\n", - " 'code': 'function sort_counter(dict1){\\n // Build list of (key, value) pairs sorted by value descending\\n keys = []\\n getQueryParamList(dict1, keys)\\n getListLen(keys, length)\\n pairs = []\\n startLoop(i, 0, length)\\n itemFromList(keys, i, key)\\n variableFromJSON(dict1, key, val)\\n pair = [key, val]\\n variableToList(pair, pairs)\\n endLoop()\\n // Bubble sort descending by value\\n startLoop(i, 0, length)\\n startLoop(j, 0, length)\\n next_j = j + 1\\n if(next_j, length, \"<\")\\n itemFromList(pairs, j, pA)\\n itemFromList(pairs, next_j, pB)\\n itemFromList(pA, 1, vA)\\n itemFromList(pB, 1, vB)\\n if(vA, vB, \"<\")\\n pairs[j] = pB\\n pairs[next_j] = pA\\n end()\\n end()\\n endLoop()\\n endLoop()\\n return(pairs)\\n}'},\n", - " {'text': 'Write a function in AVAP to check if a substring is present in a given list of string values.',\n", - " 'code': 'function find_substring(str1, sub_str){\\n getListLen(str1, length)\\n startLoop(i, 0, length)\\n itemFromList(str1, i, s)\\n getRegex(s, sub_str, match)\\n if(match, None, \"!=\")\\n return(True)\\n end()\\n endLoop()\\n return(False)\\n}'},\n", - " {'text': 'Write a function in AVAP to count the occurrences of an element in a tuple (list).',\n", - " 'code': 'function count_X(tup, x){\\n count = 0\\n getListLen(tup, length)\\n startLoop(i, 0, length)\\n itemFromList(tup, i, elem)\\n if(elem, x, \"=\")\\n count = count + 1\\n end()\\n endLoop()\\n return(count)\\n}'},\n", - " {'text': 'Write a function in AVAP to count positive numbers in a list.',\n", - " 'code': 'function pos_count(list){\\n count = 0\\n getListLen(list, length)\\n startLoop(i, 0, length)\\n itemFromList(list, i, num)\\n if(num, 0, \">=\")\\n count = count + 1\\n end()\\n endLoop()\\n return(count)\\n}'},\n", - " {'text': 'Write a function in AVAP to perform index-wise multiplication of tuple elements in two given tuples (lists of lists).',\n", - " 'code': 'function index_multiplication(test_tup1, test_tup2){\\n result = []\\n getListLen(test_tup1, outer_len)\\n startLoop(i, 0, outer_len)\\n itemFromList(test_tup1, i, tup1)\\n itemFromList(test_tup2, i, tup2)\\n inner_result = []\\n getListLen(tup1, inner_len)\\n startLoop(j, 0, inner_len)\\n itemFromList(tup1, j, a)\\n itemFromList(tup2, j, b)\\n prod = a * b\\n variableToList(prod, inner_result)\\n endLoop()\\n variableToList(inner_result, result)\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the maximum occurring character in a given string.',\n", - " 'code': 'function get_max_occuring_char(str1){\\n counts = {}\\n n = len(str1)\\n startLoop(i, 0, n)\\n char = str1[i]\\n variableFromJSON(counts, char, cnt)\\n if(cnt, None, \"=\")\\n AddvariableToJSON(char, 1, counts)\\n else()\\n new_cnt = cnt + 1\\n AddvariableToJSON(char, new_cnt, counts)\\n end()\\n endLoop()\\n max_count = 0\\n max_char = \"\"\\n startLoop(i, 0, n)\\n char = str1[i]\\n variableFromJSON(counts, char, cnt)\\n if(cnt, max_count, \">\")\\n max_count = cnt\\n max_char = char\\n end()\\n endLoop()\\n return(max_char)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the n-th Lucas number.',\n", - " 'code': 'function find_lucas(n){\\n if(n, 0, \"=\")\\n return(2)\\n end()\\n if(n, 1, \"=\")\\n return(1)\\n end()\\n a = 2\\n b = 1\\n startLoop(i, 2, n)\\n c = a + b\\n a = b\\n b = c\\n endLoop()\\n return(b)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the maximum sum of increasing subsequence from prefix till ith index also including a given kth element which is after i.',\n", - " 'code': 'function max_sum_increasing_subseq(a, n, index, k){\\n // Initialize dp as n x n matrix of zeros\\n dp = []\\n startLoop(i, 0, n)\\n row = []\\n startLoop(j, 0, n)\\n variableToList(0, row)\\n endLoop()\\n variableToList(row, dp)\\n endLoop()\\n // Fill first row\\n itemFromList(a, 0, a0)\\n startLoop(i, 0, n)\\n itemFromList(a, i, ai)\\n if(ai, a0, \">\")\\n dp[0][i] = ai + a0\\n else()\\n dp[0][i] = ai\\n end()\\n endLoop()\\n // Fill rest\\n startLoop(i, 1, n)\\n startLoop(j, 0, n)\\n itemFromList(a, j, aj)\\n itemFromList(a, i, ai)\\n if(aj, ai, \">\")\\n if(j, i, \">\")\\n prev_ii = dp[i - 1][i]\\n prev_ij = dp[i - 1][j]\\n candidate = prev_ii + aj\\n if(candidate, prev_ij, \">\")\\n dp[i][j] = candidate\\n else()\\n dp[i][j] = prev_ij\\n end()\\n else()\\n dp[i][j] = dp[i - 1][j]\\n end()\\n else()\\n dp[i][j] = dp[i - 1][j]\\n end()\\n endLoop()\\n endLoop()\\n result = dp[index][k]\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to find uppercase, lowercase, special characters, and numeric values using regex.',\n", - " 'code': 'function find_character(string){\\n upper_pattern = \"[A-Z]\"\\n lower_pattern = \"[a-z]\"\\n num_pattern = \"[0-9]\"\\n special_pattern = \"[, .!?]\"\\n getRegex(string, upper_pattern, uppercase_characters)\\n getRegex(string, lower_pattern, lowercase_characters)\\n getRegex(string, num_pattern, numerical_characters)\\n getRegex(string, special_pattern, special_characters)\\n result = {}\\n AddvariableToJSON(\"uppercase\", uppercase_characters, result)\\n AddvariableToJSON(\"lowercase\", lowercase_characters, result)\\n AddvariableToJSON(\"numerical\", numerical_characters, result)\\n AddvariableToJSON(\"special\", special_characters, result)\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to find the top k integers that occur most frequently from given lists of sorted and distinct integers.',\n", - " 'code': 'function func(nums, k){\\n counts = {}\\n getListLen(nums, outer_len)\\n startLoop(i, 0, outer_len)\\n itemFromList(nums, i, row)\\n getListLen(row, row_len)\\n startLoop(j, 0, row_len)\\n itemFromList(row, j, elem)\\n variableFromJSON(counts, elem, cnt)\\n if(cnt, None, \"=\")\\n AddvariableToJSON(elem, 1, counts)\\n else()\\n new_cnt = cnt + 1\\n AddvariableToJSON(elem, new_cnt, counts)\\n end()\\n endLoop()\\n endLoop()\\n // Return top k keys by count (simplified: return counts object)\\n addResult(counts)\\n return(counts)\\n}'},\n", - " {'text': 'Write a function in AVAP to convert a given string to lower case.',\n", - " 'code': 'function is_lower(string){\\n result = string.lower()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to find k number of pairs consisting of one element from the first array and one element from the second array with the smallest sums.',\n", - " 'code': 'function k_smallest_pairs(nums1, nums2, k){\\n pairs = []\\n getListLen(nums1, len1)\\n getListLen(nums2, len2)\\n startLoop(i, 0, len1)\\n startLoop(j, 0, len2)\\n itemFromList(nums1, i, v1)\\n itemFromList(nums2, j, v2)\\n pair = [v1, v2]\\n variableToList(pair, pairs)\\n endLoop()\\n endLoop()\\n // Sort pairs by sum (bubble sort)\\n getListLen(pairs, total_pairs)\\n startLoop(i, 0, total_pairs)\\n startLoop(j, 0, total_pairs)\\n next_j = j + 1\\n if(next_j, total_pairs, \"<\")\\n itemFromList(pairs, j, pA)\\n itemFromList(pairs, next_j, pB)\\n itemFromList(pA, 0, a0)\\n itemFromList(pA, 1, a1)\\n itemFromList(pB, 0, b0)\\n itemFromList(pB, 1, b1)\\n sumA = a0 + a1\\n sumB = b0 + b1\\n if(sumA, sumB, \">\")\\n pairs[j] = pB\\n pairs[next_j] = pA\\n end()\\n end()\\n endLoop()\\n endLoop()\\n result = pairs[0:k]\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to remove specific words from a given list.',\n", - " 'code': 'function remove_words(list1, removewords){\\n result = []\\n getListLen(list1, length)\\n startLoop(i, 0, length)\\n itemFromList(list1, i, word)\\n if(word, removewords, \"in\")\\n // skip this word\\n else()\\n variableToList(word, result)\\n end()\\n endLoop()\\n return(result)\\n}'},\n", - " {'text': 'Write a function in AVAP to round a given number to the nearest multiple of a specific number.',\n", - " 'code': 'function round_num(n, m){\\n a = (n // m) * m\\n b = a + m\\n diff_a = n - a\\n diff_b = b - n\\n if(diff_a, diff_b, \">\")\\n return(b)\\n end()\\n return(a)\\n}'},\n", - " {'text': 'Write a function in AVAP to find two distinct numbers such that their LCM lies within the given range.',\n", - " 'code': 'function answer(L, R){\\n double_L = 2 * L\\n if(double_L, R, \"<=\")\\n result = [L, double_L]\\n return(result)\\n end()\\n return(-1)\\n}'}]" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "synthetic_data" - ] - }, - { - "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "d26cbba7", "metadata": {}, "outputs": [], "source": [ - "with open(DATA_DIR /'synthetic_data.json', 'w') as f:\n", + "with open(INTERIM_DIR /'synthetic_datasets/synthetic_data.json', 'w') as f:\n", " json.dump(synthetic_data, f)" ] }, { - "cell_type": "code", - "execution_count": 18, - "id": "fea49ed8", + "cell_type": "markdown", + "id": "fc52b327", "metadata": {}, - "outputs": [], "source": [ - "with open(DATA_DIR / \"synthetic_data_aws.json\", \"r\") as f:\n", - " synthetic_data_aws = json.load(f)" + "### Generate dataset without mbpp dataset" ] }, { "cell_type": "code", "execution_count": null, - "id": "bf0b70a8", + "id": "b16137cb", "metadata": {}, "outputs": [], "source": [ - "synthetic_data_aws" + "llm_response = llm.invoke([SystemMessage(content=avap_docs), PROMPT_NO_MBPP])\n", + "print(llm_response.content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80d207fa", + "metadata": {}, + "outputs": [], + "source": [ + "json_str = llm_response.content.removeprefix(\"```json\").removesuffix(\"```\").strip()\n", + "synthetic_data = json.loads(json_str)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13e53200", + "metadata": {}, + "outputs": [], + "source": [ + "with open(INTERIM_DIR /'synthetic_datasets/synthetic_data_no_mbpp.json', 'w') as f:\n", + " json.dump(synthetic_data, f)" ] } ],