assistance-engine/scripts/pipelines/flows/validate_synthetic_dataset.py

50 lines
1.4 KiB
Python

import typer
from loguru import logger
from scripts.pipelines.tasks.validate import (
load_tasks,
save_validated_tasks,
validate_all_tasks,
)
from src.config import settings
app = typer.Typer()
@app.command()
def validate_synthetic_dataset(
dataset_path: str = "synthetic_datasets/mbpp_avap_prior.json",
output_path: str = "synthetic_datasets/validated_mbpp_avap_prior_synthetic_dataset.json",
api_url: str = settings.parser_url,
timeout: int = 120,
) -> None:
"""Validate a synthetic dataset against the AVAP runtime.
Sends the dataset to the validation API, collects per-task results,
and writes a new JSON file containing only the tasks that passed.
Args:
dataset_path: Path to the input synthetic dataset JSON file.
output_path: Path where the validated dataset JSON file will be saved.
api_url: URL of the validation API endpoint.
timeout: Timeout in seconds for the API request.
Returns:
None
"""
dataset_path = settings.proj_root / dataset_path
output_path = settings.proj_root / output_path
tasks = load_tasks(dataset_path)
validated_tasks = validate_all_tasks(tasks, api_url, timeout)
save_validated_tasks(validated_tasks, output_path)
if __name__ == "__main__":
try:
app()
except Exception as exc:
logger.exception(exc)
raise