assistance-engine/scripts/pipelines/samples_generator/generate_mbap_v2.py

#!/usr/bin/env python3
"""
AVAP Dataset Generator v2 — MAP-Elites Quality-Diversity Pipeline
==================================================================

View reference

"""

import argparse
import json
import math
import os
import sys
import time
from collections import defaultdict
from itertools import combinations
from pathlib import Path

import anthropic
import requests

from construct_prior import ConstructPrior, AVAP_NODE_NAMES

AVAP_NODE_TYPES = {
    "addParam":           ["addParam("],
    "addResult":          ["addResult("],
    "_status":            ["_status"],
    "addVar":             ["addVar("],
    "getListLen":         ["getListLen("],
    "getQueryParamList":  ["getQueryParamList("],
    "itemFromList":       ["itemFromList("],
    "replace":            ["replace("],
    "randomString":       ["randomString("],
    "if_mode1":           ["if("],
    "if_mode2":           ["if(None, None,"],
    "else":               ["else()"],
    "end":                ["end()"],
    "startLoop":          ["startLoop("],
    "endLoop":            ["endLoop()"],
    "try":                ["try()"],
    "exception":          ["exception()"],
    "return":             ["return("],
    "go":                 ["go("],
    "gather":             ["gather("],
    "avapConnector":      ["avapConnector("],
    "ormCheckTable":      ["ormCheckTable("],
    "ormDirect":          ["ormDirect("],
    "ormAccessSelect":    ["ormAccessSelect("],
    "ormAccessInsert":    ["ormAccessInsert("],
    "ormAccessUpdate":    ["ormAccessUpdate("],
    "variableFromJSON":   ["variableFromJSON("],
    "AddVariableToJSON":  ["AddVariableToJSON("],
    "encodeSHA256":       ["encodeSHA256("],
    "encodeMD5":          ["encodeMD5("],
    "getTimeStamp":       ["getTimeStamp("],
    "getDateTime":        ["getDateTime("],
    "stampToDatetime":    ["stampToDatetime("],
    "RequestGet":         ["RequestGet("],
    "RequestPost":        ["RequestPost("],
    "function":           ["function "],
    "import":             ["import "],
    "include":            ["include("],
}

NODE_TYPE_NAMES = AVAP_NODE_NAMES
_PRIOR_EPSILON = 0.05

class CellValidator:

    def __init__(self, parser_url: str, parser_timeout: int = 5):
        self.parser_url     = parser_url.rstrip("/")
        self.parser_timeout = parser_timeout
        self._parser_available = True


    def parse(self, code: str) -> tuple[bool, dict, str]:

        if not self._parser_available:
            return None, {}, "parser_unavailable"
        try:
            resp = requests.post(
                f"{self.parser_url}/parse",
                json={"code": code},
                timeout=self.parser_timeout,
            )
            data = resp.json()
            if data.get("valid", False):
                return True, data.get("ast", {}), ""
            return False, {}, data.get("error", "parse error")
        except requests.exceptions.ConnectionError:
            self._parser_available = False
            return None, {}, "parser_unavailable"
        except Exception as e:
            return False, {}, str(e)
    def detect_constructs(self, code: str, ast: dict) -> set:
        if ast:
            return self._from_ast(ast)
        return self._from_source(code)

    def _from_ast(self, ast: dict) -> set:
        found = set()
        if isinstance(ast, dict):
            if "type" in ast:
                found.add(ast["type"])
            for v in ast.values():
                found |= self._from_ast(v)
        elif isinstance(ast, list):
            for item in ast:
                found |= self._from_ast(item)
        return found

    def _from_source(self, code: str) -> set:
        found = set()
        if "if(None, None," in code:
            found.add("if_mode2")
        elif "if(" in code:
            found.add("if_mode1")
        for name, patterns in AVAP_NODE_TYPES.items():
            if name in ("if_mode1", "if_mode2"):
                continue  # already handled
            for pat in patterns:
                if pat in code:
                    found.add(name)
                    break
        return found

    def cell_quality(
        self,
        code: str,
        ast: dict,
        test_list: list,
        cell: frozenset,
        alpha: float = 0.3,
        beta: float  = 0.2,
        gamma: float = 0.1,
    ) -> tuple[float, dict]:

        detected = self.detect_constructs(code, ast)
        all_types = set(NODE_TYPE_NAMES)

        cell_constructs = set(cell)
        present_required = cell_constructs & detected
        fidelity = len(present_required) / max(len(cell_constructs), 1)

        extra  = detected - cell_constructs
        bonus_ratio = len(extra) / max(len(all_types) - len(cell_constructs), 1)

        tq = sum(
            1 for t in test_list
            if isinstance(t, str) and "re.match(" in t and len(t.strip()) > 10
        ) / max(len(test_list), 1)

        lines = [l.strip() for l in code.split("\n") if l.strip()]
        richness = min(len(lines) / 30.0, 1.0)  # cap at 30 lines = 1.0

        quality = fidelity + alpha * bonus_ratio + beta * tq + gamma * richness

        return quality, {
            "fidelity": round(fidelity, 3),
            "bonus_ratio": round(bonus_ratio, 3),
            "test_quality": round(tq, 3),
            "richness": round(richness, 3),
            "quality": round(quality, 3),
            "detected": sorted(detected),
            "cell": sorted(cell),
            "extra": sorted(extra),
        }


class CoverageMap:


    def __init__(self, cell_size: int = 3):

        self.cell_size = cell_size
        self._map: dict[frozenset, tuple[dict, float, dict]] = {}
        self._attempts: dict[frozenset, int] = defaultdict(int)
        self._all_cells = self._build_cells()

    def _build_cells(self) -> list[frozenset]:
        cells = []
        for size in range(2, self.cell_size + 1):
            for combo in combinations(NODE_TYPE_NAMES, size):
                cells.append(frozenset(combo))
        return cells

    @property
    def total_cells(self) -> int:
        return len(self._all_cells)

    @property
    def filled_cells(self) -> int:
        return len(self._map)

    @property
    def fill_rate(self) -> float:
        return self.filled_cells / max(self.total_cells, 1)

    def update(
        self,
        cell: frozenset,
        example: dict,
        quality: float,
        components: dict,
    ) -> bool:
        self._attempts[cell] += 1
        current = self._map.get(cell)
        if current is None or quality > current[1]:
            self._map[cell] = (example, quality, components)
            return True
        return False

    def get_empty_cells(self) -> list[frozenset]:
        return [c for c in self._all_cells if c not in self._map]

    def get_low_quality_cells(self, threshold: float = 0.7) -> list[frozenset]:
        return [
            c for c, (_, q, _) in self._map.items()
            if q < threshold
        ]

    def get_example(self, cell: frozenset) -> dict | None:
        entry = self._map.get(cell)
        return entry[0] if entry else None

    def all_examples(self) -> list[dict]:
        return [ex for ex, _, _ in self._map.values()]

    def node_type_frequency(self) -> dict[str, int]:

        freq = defaultdict(int)
        for cell in self._map:
            for nt in cell:
                freq[nt] += 1
        return dict(freq)

    def distribution_entropy(self) -> float:

        freq = self.node_type_frequency()
        total = sum(freq.values())
        if total == 0:
            return 0.0
        entropy = 0.0
        for count in freq.values():
            p = count / total
            if p > 0:
                entropy -= p * math.log2(p)
        return round(entropy, 3)

    def fill_summary(self) -> str:
        empty = len(self.get_empty_cells())
        low = len(self.get_low_quality_cells())
        entropy = self.distribution_entropy()
        return (
            f"Cells: {self.filled_cells}/{self.total_cells} filled "
            f"({100*self.fill_rate:.1f}%)  |  "
            f"Low quality: {low}  |  "
            f"Empty: {empty}  |  "
            f"Entropy: {entropy:.2f} bits"
        )

class CellSelector:


    def __init__(
        self,
        coverage_map: CoverageMap,
        quality_threshold: float = 0.80,
        ucb_c: float = 1.0,
    ):
        self.map = coverage_map
        self.quality_threshold = quality_threshold
        self.ucb_c = ucb_c
        self._total_calls = 0
        import random
        self._rng = random.Random(42)

    def select(self) -> frozenset:
        self._total_calls += 1
        empty = self.map.get_empty_cells()
        if empty:
            return self._rng.choice(empty)

        low = self.map.get_low_quality_cells(self.quality_threshold)
        if low:
            return self._rng.choice(low)

        return self._ucb_select()

    def _ucb_select(self) -> frozenset:
        best_cell = None
        best_score = -float("inf")
        total = max(self._total_calls, 1)

        for cell in self.map._all_cells:
            attempts = max(self.map._attempts.get(cell, 0), 1)
            entry = self.map._map.get(cell)
            quality = entry[1] if entry else 0.0
            score = quality + self.ucb_c * math.sqrt(math.log(total) / attempts)
            if score > best_score:
                best_score = score
                best_cell  = cell

        return best_cell

class CellSelectorPrior(CellSelector):

    def __init__(
        self,
        coverage_map: CoverageMap,
        prior: ConstructPrior,
        quality_threshold: float = 0.80,
        ucb_c: float = 1.0,
        phase3_threshold: float = 0.70,
    ):
        super().__init__(coverage_map, quality_threshold, ucb_c)
        self.prior = prior
        self.phase3_threshold = phase3_threshold
        self._tail_cells: set[frozenset] = set()
        self._phase3_active = False

    def select(self) -> frozenset:
        self._total_calls += 1
        empty = self.map.get_empty_cells()

        if empty:
            high_prior_empty = [
                c for c in empty
                if self.prior.cell_weight(c) > self.prior.epsilon * 1.5
            ]
            if high_prior_empty:
                return self._weighted_sample(high_prior_empty)
            return self._weighted_sample(empty)

        low = self.map.get_low_quality_cells(self.quality_threshold)
        if low:
            return self._ucb_prior_select(low)

        return self._ucb_prior_select(self.map._all_cells)

    def _weighted_sample(self, cells: list[frozenset]) -> frozenset:
        weights = [self.prior.cell_weight(c) for c in cells]
        total = sum(weights)
        if total == 0:
            return self._rng.choice(cells)
        r = self._rng.random() * total
        cumsum = 0.0
        for cell, w in zip(cells, weights):
            cumsum += w
            if r <= cumsum:
                return cell
        return cells[-1]

    def _ucb_prior_select(self, cells) -> frozenset:

        best_cell = None
        best_score = -float("inf")
        total = max(self._total_calls, 1)

        for cell in cells:
            attempts = max(self.map._attempts.get(cell, 0), 1)
            entry = self.map._map.get(cell)
            quality = entry[1] if entry else 0.0
            prior_w = self.prior.cell_weight(cell)
            ucb_term = self.ucb_c * math.sqrt(math.log(total) / attempts)
            score = prior_w * (quality + ucb_term)
            if score > best_score:
                best_score = score
                best_cell  = cell

        return best_cell

SYSTEM_PROMPT = """Eres un experto en el lenguaje AVAP.
Se te proporciona el Language Reference Manual (LRM) completo de AVAP.
Tu tarea es generar UN problema de benchmark estilo MBPP para evaluar
modelos de lenguaje en su capacidad de generar código AVAP correcto.

REGLAS ESTRICTAS para el código AVAP generado:
1. Una instrucción por línea. EOL es el terminador absoluto.
2. Sin indentación significativa (es solo decorativa).
3. Bloques: if()...else()...end(), startLoop()...endLoop(), try()...exception()...end()
4. Funciones: function name(args) { ... return(val) }
5. if() Modo 1: if(var_o_literal, var_o_literal, "operador")
6. if() Modo 2: if(None, None, `expresion_completa_como_string`)
7. _status se asigna con: addVar(_status, 404)
8. ormAccessSelect firma: ormAccessSelect(campos, "tabla", selector, varTarget)
9. ormCheckTable firma: ormCheckTable(nombre_tabla, varTarget)
10. ormDirect firma: ormDirect("SELECT ... %s" % var, varTarget)
11. getQueryParamList firma: getQueryParamList(param_name, varTarget)
12. NUNCA uses registerEndpoint(), NUNCA uses mainHandler().
13. El código se ejecuta DIRECTAMENTE, línea a línea.

FORMATO DE SALIDA: responde ÚNICAMENTE con UN objeto JSON válido (no array).
Sin texto adicional, sin bloques de código markdown.
{
  "task_id": 1,
  "text": "<enunciado del problema en español>",
  "code": "<código AVAP con saltos de línea como \\n>",
  "test_inputs": { "<param1>": <valor1> },
  "test_list": ["re.match(r'<patrón>', <variable>)", ...]
}

test_list: USA ÚNICAMENTE re.match(). NUNCA comparaciones directas (==, !=).
"""


def build_cell_prompt(
    lrm: str,
    cell: frozenset,
    existing_example: dict | None,
    map_summary: str,
) -> str:
    constructs_list = ", ".join(f"`{c}`" for c in sorted(cell))

    improvement_note = ""
    if existing_example:
        improvement_note = f"""
El siguiente ejemplo YA existe para esta combinación con calidad mejorable.
Genera algo DISTINTO y MÁS COMPLEJO que lo supere:

```
{existing_example.get('code', '')}
```
"""

    return f"""# LRM AVAP — Language Reference Manual

{lrm}

---

# ESTADO DEL MAPA DE COBERTURA

{map_summary}

---

# TAREA — ESPECIFICACIÓN OBLIGATORIA

Genera UN ejemplo AVAP que use OBLIGATORIAMENTE TODOS estos constructs:

**{constructs_list}**

El ejemplo DEBE contener todos los constructs listados arriba.
Si tu código no los usa todos, la tarea fracasa.

Adicionalmente:
- Combina los constructs requeridos en un escenario realista de microservicio HTTP
- Añade constructs adicionales donde sea natural (aumenta la puntuación)
- Código complejo y rico — no ejemplos triviales de 3 líneas
- 2-3 aserciones re.match() en test_list
{improvement_note}
Responde ÚNICAMENTE con el objeto JSON. Sin texto antes ni después.
"""


def call_api(
    client: anthropic.Anthropic,
    lrm: str,
    cell: frozenset,
    task_id: int,
    existing_example: dict | None,
    map_summary: str,
    retries: int = 3,
) -> dict | None:

    for attempt in range(1, retries + 1):
        try:
            message = client.messages.create(
                model="claude-sonnet-4-20250514",
                max_tokens=4000,
                system=SYSTEM_PROMPT,
                messages=[{
                    "role": "user",
                    "content": build_cell_prompt(lrm, cell, existing_example, map_summary),
                }],
            )
            raw = message.content[0].text.strip()

            if raw.startswith("```"):
                lines = raw.splitlines()
                raw   = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])

            problem = json.loads(raw)
            if not isinstance(problem, dict):
                raise ValueError("Response is not a JSON object")
            for field in ("text", "code", "test_list"):
                if field not in problem:
                    raise ValueError(f"Missing field '{field}'")
            if "test_inputs" not in problem:
                problem["test_inputs"] = {}
            problem["task_id"] = task_id
            return problem

        except (json.JSONDecodeError, ValueError) as e:
            print(f"\n   Attempt {attempt}/{retries} — parse error: {e}")
            if attempt < retries:
                time.sleep(2 ** attempt)
        except anthropic.RateLimitError:
            wait = 30 * attempt
            print(f"\n   Rate limit — waiting {wait}s...")
            time.sleep(wait)
        except anthropic.APIError as e:
            print(f"\n   API error at attempt {attempt}: {e}")
            if attempt < retries:
                time.sleep(5)

    return None


def run_map_elites(args, client, lrm, output_path):

    validator = CellValidator(parser_url=args.parser)
    cmap = CoverageMap(cell_size=args.cell_size)
    selector = CellSelector(cmap, quality_threshold=args.quality_threshold)
    dataset = []
    task_id = 1
    call_count = 0
    valid_count = 0
    cell_updates = 0

    print(f"\n MAP-Elites mode  |  cells: {cmap.total_cells}  |  target: {args.problems} examples")
    print(f" Cell size: {args.cell_size}  |  Quality threshold: {args.quality_threshold}")
    print("─" * 65)

    max_calls = args.problems * 4

    while len(dataset) < args.problems and call_count < max_calls:

        cell = selector.select()
        existing = cmap.get_example(cell)
        call_count += 1

        print(
            f"  [{call_count:04d}] Cell {sorted(cell)}  "
            f"| filled={cmap.filled_cells}/{cmap.total_cells} "
            f"| dataset={len(dataset)} ... ",
            end="", flush=True,
        )

        problem = call_api(
            client, lrm, cell, task_id,
            existing_example=existing,
            map_summary=cmap.fill_summary(),
        )

        if problem is None:
            print("SKIP (generation failed)")
            continue

        code      = problem["code"]
        test_list = problem.get("test_list", [])

        is_valid, ast, error_msg = validator.parse(code)

        if is_valid is None:
            is_valid, ast = True, {}
            if call_count == 1:
                print(f"\n Parser unavailable — using keyword fallback", flush=True)

        if is_valid is False:
            print(f"INVALID ({error_msg[:40]})")
            problem["_validation"] = {"valid": False, "error": error_msg}
            continue

        valid_count += 1

        # Compute cell quality
        quality, components = validator.cell_quality(
            code, ast, test_list, cell,
            alpha=args.alpha, beta=args.beta, gamma=args.gamma,
        )
        problem["_cell"] = sorted(cell)
        problem["_quality"] = components

        if components["fidelity"] < 1.0:
            missing = set(cell) - set(components["detected"])
            print(f"MISSING constructs: {sorted(missing)}")
            continue

        updated = cmap.update(cell, problem, quality, components)
        if updated:
            cell_updates += 1

        dataset.append(problem)
        task_id += 1

        print(
            f"OK  quality={quality:.3f}  "
            f"fidelity={components['fidelity']:.2f}  "
            f"extra={len(components['extra'])}"
        )

        if len(dataset) % 50 == 0:
            _save(dataset, output_path, cmap)
            freq = cmap.node_type_frequency()
            entropy = cmap.distribution_entropy()
            print(f"\n  ── Checkpoint ──────────────────────────────────")
            print(f"  Dataset: {len(dataset)}  |  Valid: {valid_count}/{call_count}")
            print(f"  {cmap.fill_summary()}")
            print(f"  Top-5 most frequent: {sorted(freq, key=freq.get, reverse=True)[:5]}")
            print(f"  Top-5 least frequent: {sorted(freq, key=freq.get)[:5]}")
            print(f"  ────────────────────────────────────────────────\n")

        time.sleep(0.5)

    _save(dataset, output_path, cmap)
    return dataset, cmap, valid_count, call_count

def run_map_elites_prior(args, client, lrm, output_path):

    print("\n Loading ConstructPrior...", flush=True)
    prior_map = getattr(args, "prior_map","construct_map.yaml")
    epsilon = getattr(args, "prior_epsilon",  _PRIOR_EPSILON)
    yaml_path = Path(prior_map)

    if yaml_path.exists():
        prior = ConstructPrior.from_yaml(yaml_path, epsilon=epsilon)
    else:
        # Fallback: yaml not found — use static prior and warn
        print(f"  [WARN] construct_map.yaml not found at '{yaml_path}'.")
        print(f"  [WARN] Using static fallback prior. Generate the real prior with:")
        print(f"  [WARN]   python construct_prior.py --generate-map --github-token TOKEN")
        prior = ConstructPrior.from_static_fallback(epsilon=epsilon)

    print(f" {prior.coverage_summary()}")

    validator    = CellValidator(parser_url=args.parser)
    cmap         = CoverageMap(cell_size=args.cell_size)
    selector     = CellSelectorPrior(
        cmap, prior,
        quality_threshold=args.quality_threshold,
        phase3_threshold=getattr(args, "prior_phase3_threshold", 0.70),
    )
    dataset = []
    task_id = 1
    call_count = 0
    valid_count = 0
    cell_updates = 0

    print(f"\n MAP-Elites+Prior mode  |  cells: {cmap.total_cells}  |  target: {args.problems} examples")
    print(f" Cell size: {args.cell_size}  |  Quality threshold: {args.quality_threshold}")
    print("─" * 65)

    max_calls = args.problems * 4

    while len(dataset) < args.problems and call_count < max_calls:

        cell = selector.select()
        existing = cmap.get_example(cell)
        prior_w = prior.cell_weight(cell)
        call_count += 1

        print(
            f"  [{call_count:04d}] Cell {sorted(cell)}  "
            f"| prior={prior_w:.3f}  "
            f"| filled={cmap.filled_cells}/{cmap.total_cells} "
            f"| dataset={len(dataset)} ... ",
            end="", flush=True,
        )

        problem = call_api(
            client, lrm, cell, task_id,
            existing_example=existing,
            map_summary=cmap.fill_summary(),
        )

        if problem is None:
            print("SKIP (generation failed)")
            continue

        code      = problem["code"]
        test_list = problem.get("test_list", [])

        is_valid, ast, error_msg = validator.parse(code)

        if is_valid is None:
            is_valid, ast = True, {}
            if call_count == 1:
                print(f"\n Parser unavailable — using keyword fallback", flush=True)

        if is_valid is False:
            print(f"INVALID ({error_msg[:40]})")
            problem["_validation"] = {"valid": False, "error": error_msg}
            continue

        valid_count += 1

        quality, components = validator.cell_quality(
            code, ast, test_list, cell,
            alpha=args.alpha, beta=args.beta, gamma=args.gamma,
        )
        problem["_cell"] = sorted(cell)
        problem["_prior_weight"] = round(prior_w, 4)
        problem["_quality"] = components

        if components["fidelity"] < 1.0:
            missing = set(cell) - set(components["detected"])
            print(f"MISSING constructs: {sorted(missing)}")
            continue

        updated = cmap.update(cell, problem, quality, components)
        if updated:
            cell_updates += 1

        dataset.append(problem)
        task_id += 1

        print(
            f"OK  quality={quality:.3f}  "
            f"fidelity={components['fidelity']:.2f}  "
            f"prior={prior_w:.3f}  "
            f"extra={len(components['extra'])}"
        )

        if len(dataset) % 50 == 0:
            _save(dataset, output_path, cmap, prior=prior)
            freq    = cmap.node_type_frequency()
            entropy = cmap.distribution_entropy()
            kl      = prior.kl_divergence(freq)
            print(f"\n  ── Checkpoint ──────────────────────────────────")
            print(f"  Dataset: {len(dataset)}  |  Valid: {valid_count}/{call_count}")
            print(f"  {cmap.fill_summary()}")
            print(f"  KL(dataset ‖ prior): {kl:.4f}  (lower = closer to production patterns)")
            print(f"  Top-5 most frequent: {sorted(freq, key=freq.get, reverse=True)[:5]}")
            print(f"  Top-5 least frequent: {sorted(freq, key=freq.get)[:5]}")
            print(f"  ────────────────────────────────────────────────\n")

        time.sleep(0.5)

    _save(dataset, output_path, cmap, prior=prior)
    return dataset, cmap, valid_count, call_count, prior


def _save(dataset: list, path: Path, cmap: CoverageMap, prior: ConstructPrior = None):
    with open(path, "w", encoding="utf-8") as f:
        json.dump(dataset, f, ensure_ascii=False, indent=2)

    # Save coverage map statistics alongside dataset
    stats_path = path.with_name(path.stem + "_coverage_stats.json")
    freq  = cmap.node_type_frequency()
    stats = {
        "total_cells": cmap.total_cells,
        "filled_cells": cmap.filled_cells,
        "fill_rate": round(cmap.fill_rate, 4),
        "distribution_entropy": cmap.distribution_entropy(),
        "node_type_frequency": freq,
        "low_quality_cells": len(cmap.get_low_quality_cells()),
        "empty_cells": len(cmap.get_empty_cells()),
    }
    if prior is not None:
        stats["kl_divergence_dataset_vs_prior"] = prior.kl_divergence(freq)
        stats["prior_summary"] = prior.coverage_summary()
    with open(stats_path, "w", encoding="utf-8") as f:
        json.dump(stats, f, ensure_ascii=False, indent=2)

def main():
    parser = argparse.ArgumentParser(
        description="AVAP Dataset Generator v2 — MAP-Elites Quality-Diversity Pipeline"
    )
    parser.add_argument("--lrm",               default="avap.md")
    parser.add_argument("--output",            default="output/mbpp_avap_v2.json")
    parser.add_argument("--problems",          type=int,   default=5000)
    parser.add_argument("--parser",            default="http://localhost:8080",
                        help="AVAP parser URL")
    parser.add_argument("--cell-size",         type=int,   default=3,
                        help="Max constructs per cell: 2=pairs, 3=pairs+trios (default: 3)")
    parser.add_argument("--quality-threshold", type=float, default=0.80,
                        help="Min quality to consider a cell 'good' (default: 0.80)")
    parser.add_argument("--alpha",             type=float, default=0.30,
                        help="Weight for bonus constructs in cell quality (default: 0.30)")
    parser.add_argument("--beta",              type=float, default=0.20,
                        help="Weight for test quality in cell quality (default: 0.20)")
    parser.add_argument("--gamma",             type=float, default=0.10,
                        help="Weight for code richness in cell quality (default: 0.10)")
    parser.add_argument(
        "--mode",
        choices=["map-elites-prior", "map-elites", "reward"],
        default="map-elites-prior",
        help=(
            "map-elites-prior: Candidate F — MAP-Elites + ConstructPrior (default)\n"
            "map-elites:       Candidate E — MAP-Elites, uniform cell weighting\n"
            "reward:           Candidate A — CW-Reward pool (comparison baseline)"
        ),
    )
    parser.add_argument(
        "--prior-map",
        default="construct_map.yaml",
        metavar="FILE",
        help=(
            "Path to construct_map.yaml generated by construct_prior.py.\n"
            "Generate it first: python construct_prior.py --generate-map\n"
            "Default: construct_map.yaml (in current directory)"
        ),
    )
    parser.add_argument(
        "--prior-epsilon",
        type=float,
        default=_PRIOR_EPSILON,
        help=f"Minimum prior weight for tail cells (default: {_PRIOR_EPSILON})",
    )
    parser.add_argument(
        "--prior-phase3-threshold",
        type=float,
        default=0.70,
        help=(
            "Quality threshold above which Phase 2 ends and tail (low-prior) "
            "cells become the focus. Default: 0.70"
        ),
    )
    parser.add_argument("--api-key", default=None)
    args = parser.parse_args()

    api_key = args.api_key or os.environ.get("ANTHROPIC_API_KEY")
    if not api_key:
        sys.exit("ERROR: ANTHROPIC_API_KEY not set.")

    lrm_path = Path(args.lrm)
    if not lrm_path.exists():
        sys.exit(f"ERROR: LRM '{lrm_path}' not found.")
    lrm = lrm_path.read_text(encoding="utf-8")

    output_path = Path(args.output)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    client = anthropic.Anthropic(api_key=api_key)

    mode_label = {
        "map-elites-prior": "Candidate F — MAP-Elites + ConstructPrior",
        "map-elites": "Candidate E — MAP-Elites (uniform)",
        "reward": "Candidate A — CW-Reward pool",
    }[args.mode]

    print("=" * 65)
    print(" AVAP Dataset Generator v2 — MAP-Elites Pipeline")
    print("=" * 65)
    print(f" Mode           : {mode_label}")
    print(f" LRM            : {lrm_path}")
    print(f" Output         : {output_path}")
    print(f" Target examples: {args.problems}")
    print(f" Parser URL     : {args.parser}")
    print(f" Cell size      : {args.cell_size}")
    print(f" Quality thresh : {args.quality_threshold}")
    if args.mode == "map-elites-prior":
        yaml_exists = Path(args.prior_map).exists()
        print(f" Prior map      : {args.prior_map}  ({'✓ found' if yaml_exists else '✗ not found — will use static fallback'})")
        print(f" Prior epsilon  : {args.prior_epsilon}")
    print("=" * 65)

    prior = None

    if args.mode == "map-elites-prior":
        result = run_map_elites_prior(args, client, lrm, output_path)
        dataset, cmap, valid_count, call_count, prior = result
    elif args.mode == "map-elites":
        dataset, cmap, valid_count, call_count = run_map_elites(args, client, lrm, output_path)
    else:
        sys.exit("ERROR: --mode reward (Candidate A) is not yet implemented in v2. "
                 "Use generate_mbap.py for the v1 reward baseline.")

    # Final report
    freq    = cmap.node_type_frequency()
    entropy = cmap.distribution_entropy()

    print("\n" + "=" * 65)
    print(" Pipeline complete")
    print(f" Mode             : {mode_label}")
    print(f" Total API calls  : {call_count}")
    print(f" Valid examples   : {valid_count}  ({100*valid_count/max(call_count,1):.1f}%)")
    print(f" Dataset size     : {len(dataset)}")
    print(f" {cmap.fill_summary()}")
    print(f" Distribution entropy : {entropy:.3f} bits  (max={math.log2(len(NODE_TYPE_NAMES)):.2f})")
    if prior is not None:
        kl = prior.kl_divergence(freq)
        print(f" KL(dataset ‖ prior) : {kl:.4f}  (0 = perfect alignment with production code)")
    print(f" Most covered   : {sorted(freq, key=freq.get, reverse=True)[:5]}")
    print(f" Least covered  : {sorted(freq, key=freq.get)[:5]}")
    print(f" Output         : {output_path}")
    print("=" * 65)


if __name__ == "__main__":
    main()