refactor: remove unused BNF file generator script

This commit is contained in:
acano 2026-03-12 12:31:05 +01:00
parent 3463fb05e8
commit 01ce959aab
1 changed files with 0 additions and 244 deletions

View File

@ -1,244 +0,0 @@
"""
Generator for BNF specification files from AVAP documentation.
This script extracts BNF specifications from the AVAP Language Reference Manual (LRM)
and generates individual text files for each BNF section.
Output format: n0X_BNF.txt (where X is the section number)
Default output directory: ingestion/code/BNF/
Default markdown source: docs/LRM/avap.md
USAGE EXAMPLES:
Use default configuration:
python scripts/pipelines/flows/bnf_files_generator.py
Customize input and output paths:
python scripts/pipelines/flows/bnf_files_generator.py --markdown docs/LRM/avap.md --output ingestion/code
python scripts/pipelines/flows/bnf_files_generator.py -m docs/LRM/avap.md -o ingestion/code
OPTIONS:
--markdown, -m: Path to the AVAP markdown file (relative to project root)
--output, -o: Output directory for BNF files (relative to project root)
"""
import re
import typer
from pathlib import Path
from typing import List, Tuple, Optional
app = typer.Typer()
class BNFExtractor:
"""Extract BNF specifications from AVAP markdown documentation."""
def __init__(self, markdown_file: Path, output_dir: Path):
"""
Initialize BNF extractor.
Args:
markdown_file: Path to the AVAP markdown file
output_dir: Directory where BNF files will be saved
"""
self.markdown_file = markdown_file
self.output_dir = output_dir
self.bnf_sections: List[Tuple[int, str, str]] = []
@staticmethod
def _roman_to_int(roman: str) -> int:
"""
Convert Roman numerals to integers.
Args:
roman: Roman numeral string (e.g., 'I', 'IV', 'IX', 'XII')
Returns:
Integer value of the Roman numeral
"""
roman_values = {
'I': 1, 'V': 5, 'X': 10, 'L': 50,
'C': 100, 'D': 500, 'M': 1000
}
total = 0
prev_value = 0
for char in reversed(roman):
value = roman_values.get(char, 0)
if value < prev_value:
total -= value
else:
total += value
prev_value = value
return total
def read_markdown_file(self) -> str:
"""Read the markdown file content."""
with open(self.markdown_file, "r", encoding="utf-8") as f:
return f.read()
def extract_bnf_sections(self, content: str) -> List[Tuple[int, str, str]]:
"""
Extract all BNF specifications from markdown content.
Pattern: ### Especificación BNF (Sección I)
```bnf
... BNF content ...
```
Args:
content: Markdown file content
Returns:
List of tuples: (section_number, section_title, bnf_content)
"""
bnf_sections = []
# Pattern to find BNF specification headers and extract Roman numerals
# Matches: ### Especificación BNF (Sección I), (Sección II), etc.
header_pattern = r"### Especificación BNF \(Sección ([IVXLCDM]+)\)"
# Find all BNF headers with their positions
for match in re.finditer(header_pattern, content):
roman_numeral = match.group(1)
section_number = self._roman_to_int(roman_numeral)
header_start = match.start()
header_end = match.end()
# Find the code block after this header
code_block_pattern = r"```bnf\n(.*?)```"
search_start = header_end
code_match = re.search(code_block_pattern, content[search_start:], re.DOTALL)
if code_match:
bnf_content = code_match.group(1).strip()
section_title = f"Especificación BNF - Sección {roman_numeral}"
bnf_sections.append((section_number, section_title, bnf_content))
self.bnf_sections = bnf_sections
return bnf_sections
def format_bnf_file_content(self, section_number: int, title: str, bnf_content: str) -> str:
"""
Format BNF content for file output.
Args:
section_number: Section number (1-9, etc.)
title: Section title
bnf_content: Raw BNF grammar content
Returns:
BNF content without additional formatting
"""
return bnf_content
def save_bnf_files(self) -> int:
"""
Save extracted BNF sections to individual files.
File naming convention: n0X_BNF.txt (e.g., n01_BNF.txt, n02_BNF.txt, etc.)
Returns:
Number of files created
"""
# Ensure output directory exists
self.output_dir.mkdir(parents=True, exist_ok=True)
files_created = 0
for section_number, title, bnf_content in self.bnf_sections:
# Format filename with zero-padded section number
filename = f"n{section_number:02d}_BNF.txt"
filepath = self.output_dir / filename
# Format and write file content
formatted_content = self.format_bnf_file_content(
section_number, title, bnf_content
)
with open(filepath, "w", encoding="utf-8") as f:
f.write(formatted_content)
print(f"Created: {filepath}")
files_created += 1
return files_created
def generate(self) -> Tuple[int, List[str]]:
"""
Execute the complete BNF extraction and file generation process.
Returns:
Tuple of (number_of_files_created, list_of_file_paths)
"""
print(f"Reading markdown file: {self.markdown_file}")
content = self.read_markdown_file()
print(f"Extracting BNF specifications...")
bnf_sections = self.extract_bnf_sections(content)
print(f"Found {len(bnf_sections)} BNF sections:")
for section_number, title, _ in bnf_sections:
print(f" - {title}")
print(f"\nSaving BNF files to: {self.output_dir}")
files_created = self.save_bnf_files()
# Generate list of created file paths
file_paths = [
str(self.output_dir / f"n{i:02d}_BNF.txt")
for i, _, _ in bnf_sections
]
return files_created, file_paths
@app.command()
def main(
markdown_file: str = typer.Option(
"docs/LRM/avap.md",
"--markdown",
"-m",
help="Path to AVAP markdown file (relative to project root)"
),
output_dir: str = typer.Option(
"ingestion/code/BNF/",
"--output",
"-o",
help="Output directory for BNF files (relative to project root)"
)
):
"""Extract BNF specifications from AVAP documentation.
Default behavior:
- Reads from: docs/LRM/avap.md
- Writes to: ingestion/code/BNF/
"""
# Get project root directory (scripts/pipelines/flows -> project root)
script_dir = Path(__file__).parent
project_root = script_dir.parent.parent.parent
# Convert relative paths to absolute
markdown_path = project_root / markdown_file
output_path = project_root / output_dir
# Verify markdown file exists
if not markdown_path.exists():
typer.echo(f"Error: Markdown file not found: {markdown_path}", err=True)
raise typer.Exit(code=1)
# Create extractor and generate files
extractor = BNFExtractor(markdown_path, output_path)
files_created, file_paths = extractor.generate()
print(f"\n{'='*80}")
print(f"BNF extraction complete!")
print(f"Total files created: {files_created}")
print(f"Output directory: {output_path}")
print(f"{'='*80}")
if __name__ == "__main__":
app()