From 01ce959aabdcf7e3b4717416330f92d9b09eb216 Mon Sep 17 00:00:00 2001 From: acano Date: Thu, 12 Mar 2026 12:31:05 +0100 Subject: [PATCH] refactor: remove unused BNF file generator script --- .../pipelines/flows/bnf_files_generator.py | 244 ------------------ 1 file changed, 244 deletions(-) delete mode 100644 scripts/pipelines/flows/bnf_files_generator.py diff --git a/scripts/pipelines/flows/bnf_files_generator.py b/scripts/pipelines/flows/bnf_files_generator.py deleted file mode 100644 index 81e14e1..0000000 --- a/scripts/pipelines/flows/bnf_files_generator.py +++ /dev/null @@ -1,244 +0,0 @@ -""" -Generator for BNF specification files from AVAP documentation. - -This script extracts BNF specifications from the AVAP Language Reference Manual (LRM) -and generates individual text files for each BNF section. - -Output format: n0X_BNF.txt (where X is the section number) -Default output directory: ingestion/code/BNF/ -Default markdown source: docs/LRM/avap.md - -USAGE EXAMPLES: - -Use default configuration: - python scripts/pipelines/flows/bnf_files_generator.py - -Customize input and output paths: - python scripts/pipelines/flows/bnf_files_generator.py --markdown docs/LRM/avap.md --output ingestion/code - python scripts/pipelines/flows/bnf_files_generator.py -m docs/LRM/avap.md -o ingestion/code - -OPTIONS: - --markdown, -m: Path to the AVAP markdown file (relative to project root) - --output, -o: Output directory for BNF files (relative to project root) -""" - -import re -import typer -from pathlib import Path -from typing import List, Tuple, Optional - -app = typer.Typer() - - -class BNFExtractor: - """Extract BNF specifications from AVAP markdown documentation.""" - - def __init__(self, markdown_file: Path, output_dir: Path): - """ - Initialize BNF extractor. - - Args: - markdown_file: Path to the AVAP markdown file - output_dir: Directory where BNF files will be saved - """ - self.markdown_file = markdown_file - self.output_dir = output_dir - self.bnf_sections: List[Tuple[int, str, str]] = [] - - @staticmethod - def _roman_to_int(roman: str) -> int: - """ - Convert Roman numerals to integers. - - Args: - roman: Roman numeral string (e.g., 'I', 'IV', 'IX', 'XII') - - Returns: - Integer value of the Roman numeral - """ - roman_values = { - 'I': 1, 'V': 5, 'X': 10, 'L': 50, - 'C': 100, 'D': 500, 'M': 1000 - } - total = 0 - prev_value = 0 - - for char in reversed(roman): - value = roman_values.get(char, 0) - if value < prev_value: - total -= value - else: - total += value - prev_value = value - - return total - - def read_markdown_file(self) -> str: - """Read the markdown file content.""" - with open(self.markdown_file, "r", encoding="utf-8") as f: - return f.read() - - def extract_bnf_sections(self, content: str) -> List[Tuple[int, str, str]]: - """ - Extract all BNF specifications from markdown content. - - Pattern: ### Especificación BNF (Sección I) - ```bnf - ... BNF content ... - ``` - - Args: - content: Markdown file content - - Returns: - List of tuples: (section_number, section_title, bnf_content) - """ - bnf_sections = [] - - # Pattern to find BNF specification headers and extract Roman numerals - # Matches: ### Especificación BNF (Sección I), (Sección II), etc. - header_pattern = r"### Especificación BNF \(Sección ([IVXLCDM]+)\)" - - # Find all BNF headers with their positions - for match in re.finditer(header_pattern, content): - roman_numeral = match.group(1) - section_number = self._roman_to_int(roman_numeral) - header_start = match.start() - header_end = match.end() - - # Find the code block after this header - code_block_pattern = r"```bnf\n(.*?)```" - search_start = header_end - - code_match = re.search(code_block_pattern, content[search_start:], re.DOTALL) - - if code_match: - bnf_content = code_match.group(1).strip() - section_title = f"Especificación BNF - Sección {roman_numeral}" - bnf_sections.append((section_number, section_title, bnf_content)) - - self.bnf_sections = bnf_sections - return bnf_sections - - def format_bnf_file_content(self, section_number: int, title: str, bnf_content: str) -> str: - """ - Format BNF content for file output. - - Args: - section_number: Section number (1-9, etc.) - title: Section title - bnf_content: Raw BNF grammar content - - Returns: - BNF content without additional formatting - """ - return bnf_content - - def save_bnf_files(self) -> int: - """ - Save extracted BNF sections to individual files. - - File naming convention: n0X_BNF.txt (e.g., n01_BNF.txt, n02_BNF.txt, etc.) - - Returns: - Number of files created - """ - # Ensure output directory exists - self.output_dir.mkdir(parents=True, exist_ok=True) - - files_created = 0 - - for section_number, title, bnf_content in self.bnf_sections: - # Format filename with zero-padded section number - filename = f"n{section_number:02d}_BNF.txt" - filepath = self.output_dir / filename - - # Format and write file content - formatted_content = self.format_bnf_file_content( - section_number, title, bnf_content - ) - - with open(filepath, "w", encoding="utf-8") as f: - f.write(formatted_content) - - print(f"Created: {filepath}") - files_created += 1 - - return files_created - - def generate(self) -> Tuple[int, List[str]]: - """ - Execute the complete BNF extraction and file generation process. - - Returns: - Tuple of (number_of_files_created, list_of_file_paths) - """ - print(f"Reading markdown file: {self.markdown_file}") - content = self.read_markdown_file() - - print(f"Extracting BNF specifications...") - bnf_sections = self.extract_bnf_sections(content) - - print(f"Found {len(bnf_sections)} BNF sections:") - for section_number, title, _ in bnf_sections: - print(f" - {title}") - - print(f"\nSaving BNF files to: {self.output_dir}") - files_created = self.save_bnf_files() - - # Generate list of created file paths - file_paths = [ - str(self.output_dir / f"n{i:02d}_BNF.txt") - for i, _, _ in bnf_sections - ] - - return files_created, file_paths - - -@app.command() -def main( - markdown_file: str = typer.Option( - "docs/LRM/avap.md", - "--markdown", - "-m", - help="Path to AVAP markdown file (relative to project root)" - ), - output_dir: str = typer.Option( - "ingestion/code/BNF/", - "--output", - "-o", - help="Output directory for BNF files (relative to project root)" - ) -): - """Extract BNF specifications from AVAP documentation. - - Default behavior: - - Reads from: docs/LRM/avap.md - - Writes to: ingestion/code/BNF/ - """ - # Get project root directory (scripts/pipelines/flows -> project root) - script_dir = Path(__file__).parent - project_root = script_dir.parent.parent.parent - - # Convert relative paths to absolute - markdown_path = project_root / markdown_file - output_path = project_root / output_dir - - # Verify markdown file exists - if not markdown_path.exists(): - typer.echo(f"Error: Markdown file not found: {markdown_path}", err=True) - raise typer.Exit(code=1) - - # Create extractor and generate files - extractor = BNFExtractor(markdown_path, output_path) - files_created, file_paths = extractor.generate() - - print(f"\n{'='*80}") - print(f"BNF extraction complete!") - print(f"Total files created: {files_created}") - print(f"Output directory: {output_path}") - print(f"{'='*80}") - - -if __name__ == "__main__": - app()