refactor: remove unused BNF file generator script
This commit is contained in:
parent
dc7568b622
commit
a4478cb7ff
|
|
@ -1,244 +0,0 @@
|
||||||
"""
|
|
||||||
Generator for BNF specification files from AVAP documentation.
|
|
||||||
|
|
||||||
This script extracts BNF specifications from the AVAP Language Reference Manual (LRM)
|
|
||||||
and generates individual text files for each BNF section.
|
|
||||||
|
|
||||||
Output format: n0X_BNF.txt (where X is the section number)
|
|
||||||
Default output directory: ingestion/code/BNF/
|
|
||||||
Default markdown source: docs/LRM/avap.md
|
|
||||||
|
|
||||||
USAGE EXAMPLES:
|
|
||||||
|
|
||||||
Use default configuration:
|
|
||||||
python scripts/pipelines/flows/bnf_files_generator.py
|
|
||||||
|
|
||||||
Customize input and output paths:
|
|
||||||
python scripts/pipelines/flows/bnf_files_generator.py --markdown docs/LRM/avap.md --output ingestion/code
|
|
||||||
python scripts/pipelines/flows/bnf_files_generator.py -m docs/LRM/avap.md -o ingestion/code
|
|
||||||
|
|
||||||
OPTIONS:
|
|
||||||
--markdown, -m: Path to the AVAP markdown file (relative to project root)
|
|
||||||
--output, -o: Output directory for BNF files (relative to project root)
|
|
||||||
"""
|
|
||||||
|
|
||||||
import re
|
|
||||||
import typer
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import List, Tuple, Optional
|
|
||||||
|
|
||||||
app = typer.Typer()
|
|
||||||
|
|
||||||
|
|
||||||
class BNFExtractor:
|
|
||||||
"""Extract BNF specifications from AVAP markdown documentation."""
|
|
||||||
|
|
||||||
def __init__(self, markdown_file: Path, output_dir: Path):
|
|
||||||
"""
|
|
||||||
Initialize BNF extractor.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
markdown_file: Path to the AVAP markdown file
|
|
||||||
output_dir: Directory where BNF files will be saved
|
|
||||||
"""
|
|
||||||
self.markdown_file = markdown_file
|
|
||||||
self.output_dir = output_dir
|
|
||||||
self.bnf_sections: List[Tuple[int, str, str]] = []
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _roman_to_int(roman: str) -> int:
|
|
||||||
"""
|
|
||||||
Convert Roman numerals to integers.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
roman: Roman numeral string (e.g., 'I', 'IV', 'IX', 'XII')
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Integer value of the Roman numeral
|
|
||||||
"""
|
|
||||||
roman_values = {
|
|
||||||
'I': 1, 'V': 5, 'X': 10, 'L': 50,
|
|
||||||
'C': 100, 'D': 500, 'M': 1000
|
|
||||||
}
|
|
||||||
total = 0
|
|
||||||
prev_value = 0
|
|
||||||
|
|
||||||
for char in reversed(roman):
|
|
||||||
value = roman_values.get(char, 0)
|
|
||||||
if value < prev_value:
|
|
||||||
total -= value
|
|
||||||
else:
|
|
||||||
total += value
|
|
||||||
prev_value = value
|
|
||||||
|
|
||||||
return total
|
|
||||||
|
|
||||||
def read_markdown_file(self) -> str:
|
|
||||||
"""Read the markdown file content."""
|
|
||||||
with open(self.markdown_file, "r", encoding="utf-8") as f:
|
|
||||||
return f.read()
|
|
||||||
|
|
||||||
def extract_bnf_sections(self, content: str) -> List[Tuple[int, str, str]]:
|
|
||||||
"""
|
|
||||||
Extract all BNF specifications from markdown content.
|
|
||||||
|
|
||||||
Pattern: ### Especificación BNF (Sección I)
|
|
||||||
```bnf
|
|
||||||
... BNF content ...
|
|
||||||
```
|
|
||||||
|
|
||||||
Args:
|
|
||||||
content: Markdown file content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List of tuples: (section_number, section_title, bnf_content)
|
|
||||||
"""
|
|
||||||
bnf_sections = []
|
|
||||||
|
|
||||||
# Pattern to find BNF specification headers and extract Roman numerals
|
|
||||||
# Matches: ### Especificación BNF (Sección I), (Sección II), etc.
|
|
||||||
header_pattern = r"### Especificación BNF \(Sección ([IVXLCDM]+)\)"
|
|
||||||
|
|
||||||
# Find all BNF headers with their positions
|
|
||||||
for match in re.finditer(header_pattern, content):
|
|
||||||
roman_numeral = match.group(1)
|
|
||||||
section_number = self._roman_to_int(roman_numeral)
|
|
||||||
header_start = match.start()
|
|
||||||
header_end = match.end()
|
|
||||||
|
|
||||||
# Find the code block after this header
|
|
||||||
code_block_pattern = r"```bnf\n(.*?)```"
|
|
||||||
search_start = header_end
|
|
||||||
|
|
||||||
code_match = re.search(code_block_pattern, content[search_start:], re.DOTALL)
|
|
||||||
|
|
||||||
if code_match:
|
|
||||||
bnf_content = code_match.group(1).strip()
|
|
||||||
section_title = f"Especificación BNF - Sección {roman_numeral}"
|
|
||||||
bnf_sections.append((section_number, section_title, bnf_content))
|
|
||||||
|
|
||||||
self.bnf_sections = bnf_sections
|
|
||||||
return bnf_sections
|
|
||||||
|
|
||||||
def format_bnf_file_content(self, section_number: int, title: str, bnf_content: str) -> str:
|
|
||||||
"""
|
|
||||||
Format BNF content for file output.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
section_number: Section number (1-9, etc.)
|
|
||||||
title: Section title
|
|
||||||
bnf_content: Raw BNF grammar content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
BNF content without additional formatting
|
|
||||||
"""
|
|
||||||
return bnf_content
|
|
||||||
|
|
||||||
def save_bnf_files(self) -> int:
|
|
||||||
"""
|
|
||||||
Save extracted BNF sections to individual files.
|
|
||||||
|
|
||||||
File naming convention: n0X_BNF.txt (e.g., n01_BNF.txt, n02_BNF.txt, etc.)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Number of files created
|
|
||||||
"""
|
|
||||||
# Ensure output directory exists
|
|
||||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
files_created = 0
|
|
||||||
|
|
||||||
for section_number, title, bnf_content in self.bnf_sections:
|
|
||||||
# Format filename with zero-padded section number
|
|
||||||
filename = f"n{section_number:02d}_BNF.txt"
|
|
||||||
filepath = self.output_dir / filename
|
|
||||||
|
|
||||||
# Format and write file content
|
|
||||||
formatted_content = self.format_bnf_file_content(
|
|
||||||
section_number, title, bnf_content
|
|
||||||
)
|
|
||||||
|
|
||||||
with open(filepath, "w", encoding="utf-8") as f:
|
|
||||||
f.write(formatted_content)
|
|
||||||
|
|
||||||
print(f"Created: {filepath}")
|
|
||||||
files_created += 1
|
|
||||||
|
|
||||||
return files_created
|
|
||||||
|
|
||||||
def generate(self) -> Tuple[int, List[str]]:
|
|
||||||
"""
|
|
||||||
Execute the complete BNF extraction and file generation process.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple of (number_of_files_created, list_of_file_paths)
|
|
||||||
"""
|
|
||||||
print(f"Reading markdown file: {self.markdown_file}")
|
|
||||||
content = self.read_markdown_file()
|
|
||||||
|
|
||||||
print(f"Extracting BNF specifications...")
|
|
||||||
bnf_sections = self.extract_bnf_sections(content)
|
|
||||||
|
|
||||||
print(f"Found {len(bnf_sections)} BNF sections:")
|
|
||||||
for section_number, title, _ in bnf_sections:
|
|
||||||
print(f" - {title}")
|
|
||||||
|
|
||||||
print(f"\nSaving BNF files to: {self.output_dir}")
|
|
||||||
files_created = self.save_bnf_files()
|
|
||||||
|
|
||||||
# Generate list of created file paths
|
|
||||||
file_paths = [
|
|
||||||
str(self.output_dir / f"n{i:02d}_BNF.txt")
|
|
||||||
for i, _, _ in bnf_sections
|
|
||||||
]
|
|
||||||
|
|
||||||
return files_created, file_paths
|
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
|
||||||
def main(
|
|
||||||
markdown_file: str = typer.Option(
|
|
||||||
"docs/LRM/avap.md",
|
|
||||||
"--markdown",
|
|
||||||
"-m",
|
|
||||||
help="Path to AVAP markdown file (relative to project root)"
|
|
||||||
),
|
|
||||||
output_dir: str = typer.Option(
|
|
||||||
"ingestion/code/BNF/",
|
|
||||||
"--output",
|
|
||||||
"-o",
|
|
||||||
help="Output directory for BNF files (relative to project root)"
|
|
||||||
)
|
|
||||||
):
|
|
||||||
"""Extract BNF specifications from AVAP documentation.
|
|
||||||
|
|
||||||
Default behavior:
|
|
||||||
- Reads from: docs/LRM/avap.md
|
|
||||||
- Writes to: ingestion/code/BNF/
|
|
||||||
"""
|
|
||||||
# Get project root directory (scripts/pipelines/flows -> project root)
|
|
||||||
script_dir = Path(__file__).parent
|
|
||||||
project_root = script_dir.parent.parent.parent
|
|
||||||
|
|
||||||
# Convert relative paths to absolute
|
|
||||||
markdown_path = project_root / markdown_file
|
|
||||||
output_path = project_root / output_dir
|
|
||||||
|
|
||||||
# Verify markdown file exists
|
|
||||||
if not markdown_path.exists():
|
|
||||||
typer.echo(f"Error: Markdown file not found: {markdown_path}", err=True)
|
|
||||||
raise typer.Exit(code=1)
|
|
||||||
|
|
||||||
# Create extractor and generate files
|
|
||||||
extractor = BNFExtractor(markdown_path, output_path)
|
|
||||||
files_created, file_paths = extractor.generate()
|
|
||||||
|
|
||||||
print(f"\n{'='*80}")
|
|
||||||
print(f"BNF extraction complete!")
|
|
||||||
print(f"Total files created: {files_created}")
|
|
||||||
print(f"Output directory: {output_path}")
|
|
||||||
print(f"{'='*80}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
app()
|
|
||||||
Loading…
Reference in New Issue