{
"cells": [
{
"cell_type": "markdown",
"id": "925b048c",
"metadata": {},
"source": [
"# Libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "c3215835",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import pandas as pd\n",
"import plotly.graph_objects as go\n",
"import plotly.subplots as sp\n",
"from pathlib import Path\n",
"import numpy as np\n",
"\n",
"\n",
"base_path = Path(\"/home/pseco/VsCodeProjects/assistance-engine/output\")"
]
},
{
"cell_type": "markdown",
"id": "c8024618",
"metadata": {},
"source": [
"# Functions"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e4b843f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "d8b63d88",
"metadata": {},
"source": [
"# Read and Prepare Data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d052f1bc",
"metadata": {},
"outputs": [],
"source": [
"candidates = {}\n",
"\n",
"with open(base_path / \"candidate_E_reward_10_coverage_stats.json\") as f:\n",
" candidates[\"E\"] = json.load(f)\n",
"\n",
"with open(base_path / \"candidate_F_reward_10_coverage_stats.json\") as f:\n",
" candidates[\"F\"] = json.load(f)\n",
"\n",
"with open(base_path / \"mbpp_avap_v2_reward_stats_A.json\") as f:\n",
" candidates[\"A\"] = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "8d6a48f4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Candidate E: 21 unique node types\n",
"Top 5 node types:\n",
" node_type frequency\n",
"11 addResult 3\n",
"4 ormAccessUpdate 2\n",
"3 gather 2\n",
"5 getListLen 2\n",
"0 startLoop 2\n",
"\n",
"\n",
"Candidate F: 18 unique node types\n",
"Top 5 node types:\n",
" node_type frequency\n",
"8 ormAccessSelect 3\n",
"0 startLoop 2\n",
"1 randomString 2\n",
"2 import 2\n",
"4 RequestGet 2\n",
"\n",
"\n",
"Candidate A: 34 unique node types\n",
"Top 5 node types:\n",
" node_type frequency\n",
"0 AddVariableToJSON 10\n",
"2 _status 10\n",
"16 return 10\n",
"3 addParam 10\n",
"4 addResult 10\n",
"\n"
]
}
],
"source": [
"data_for_viz = {}\n",
"\n",
"for candidate_name, stats in candidates.items():\n",
" node_freq = stats.get(\"node_type_frequency\", {})\n",
" \n",
" if node_freq:\n",
" df = pd.DataFrame({\n",
" \"node_type\": list(node_freq.keys()),\n",
" \"frequency\": list(node_freq.values())\n",
" }).sort_values(\"frequency\", ascending=False)\n",
" else:\n",
" df = pd.DataFrame({\n",
" \"node_type\": [],\n",
" \"frequency\": []\n",
" })\n",
" \n",
" data_for_viz[candidate_name] = {\n",
" \"dataframe\": df,\n",
" \"entropy\": stats.get(\"distribution_entropy\", 0),\n",
" \"total_nodes\": len(node_freq)\n",
" }\n",
" \n",
" print(f\"\\nCandidate {candidate_name}: {len(df)} unique node types\")\n",
" if len(df) > 0:\n",
" print(f\"Top 5 node types:\\n{df.head()}\\n\")"
]
},
{
"cell_type": "markdown",
"id": "399a5931",
"metadata": {},
"source": [
"# Analysis"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "aa3a031e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"==================================================\n",
"Candidate E\n",
"==================================================\n",
"Distribution Entropy: 4.2800\n",
"Total Node Types: 21\n",
"Total Cells: 9139\n",
"Filled Cells: 10\n",
"Fill Rate: 0.1100%\n",
"\n",
"==================================================\n",
"Candidate F\n",
"==================================================\n",
"Distribution Entropy: 4.0600\n",
"Total Node Types: 18\n",
"Total Cells: 9139\n",
"Filled Cells: 10\n",
"Fill Rate: 0.1100%\n",
"\n",
"==================================================\n",
"Candidate A\n",
"==================================================\n",
"Distribution Entropy: 4.9590\n",
"Total Node Types: 34\n",
"Total Cells: 0\n",
"Filled Cells: 0\n",
"Fill Rate: 0.0000%\n"
]
}
],
"source": [
"for candidate_name, data in candidates.items():\n",
" entropy = data.get(\"distribution_entropy\", 0)\n",
" node_count = len(data.get(\"node_type_frequency\", {}))\n",
" total_cells = data.get(\"total_cells\", 0)\n",
" filled_cells = data.get(\"filled_cells\", 0)\n",
" fill_rate = data.get(\"fill_rate\", 0)\n",
" \n",
" print(f\"\\n{'='*50}\")\n",
" print(f\"Candidate {candidate_name}\")\n",
" print(f\"{'='*50}\")\n",
" print(f\"Distribution Entropy: {entropy:.4f}\")\n",
" print(f\"Total Node Types: {node_count}\")\n",
" print(f\"Total Cells: {total_cells}\")\n",
" print(f\"Filled Cells: {filled_cells}\")\n",
" print(f\"Fill Rate: {fill_rate:.4%}\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "725d9cd2",
"metadata": {},
"outputs": [],
"source": [
"colors = {\"A\": \"#1f77b4\", \"E\": \"#ff7f0e\", \"F\": \"#2ca02c\"}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "fbdc11d2",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"marker": {
"color": "#1f77b4"
},
"text": [
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
9,
9,
9,
9,
9,
7,
4,
4,
4,
4,
4,
4,
4,
4,
4,
3,
3,
3,
3
],
"textposition": "auto",
"type": "bar",
"x": [
"AddVariableToJSON",
"_status",
"return",
"addParam",
"addResult",
"addVar",
"else",
"end",
"function",
"gather",
"variableFromJSON",
"getListLen",
"if_mode2",
"itemFromList",
"try",
"ormAccessInsert",
"endLoop",
"startLoop",
"avapConnector",
"getDateTime",
"import",
"RequestGet",
"replace",
"getQueryParamList",
"encodeSHA256",
"encodeMD5",
"getTimeStamp",
"ormAccessSelect",
"ormCheckTable",
"randomString",
"ormAccessUpdate",
"RequestPost",
"ormDirect",
"stampToDatetime"
],
"y": [
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
10,
9,
9,
9,
9,
9,
7,
4,
4,
4,
4,
4,
4,
4,
4,
4,
3,
3,
3,
3
]
}
],
"layout": {
"height": 400,
"hovermode": "x unified",
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"fillpattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Candidate A - Node Type Distribution
(Distribution Entropy: 4.9590, Total Node Types: 34)"
},
"xaxis": {
"tickangle": 45,
"title": {
"text": "Node Types"
}
},
"yaxis": {
"title": {
"text": "Frequency"
}
}
}
},
"text/html": [
"