refactor: remove unused BNF file generator script
This commit is contained in:
parent
dc7568b622
commit
a4478cb7ff
|
|
@ -1,244 +0,0 @@
|
|||
"""
|
||||
Generator for BNF specification files from AVAP documentation.
|
||||
|
||||
This script extracts BNF specifications from the AVAP Language Reference Manual (LRM)
|
||||
and generates individual text files for each BNF section.
|
||||
|
||||
Output format: n0X_BNF.txt (where X is the section number)
|
||||
Default output directory: ingestion/code/BNF/
|
||||
Default markdown source: docs/LRM/avap.md
|
||||
|
||||
USAGE EXAMPLES:
|
||||
|
||||
Use default configuration:
|
||||
python scripts/pipelines/flows/bnf_files_generator.py
|
||||
|
||||
Customize input and output paths:
|
||||
python scripts/pipelines/flows/bnf_files_generator.py --markdown docs/LRM/avap.md --output ingestion/code
|
||||
python scripts/pipelines/flows/bnf_files_generator.py -m docs/LRM/avap.md -o ingestion/code
|
||||
|
||||
OPTIONS:
|
||||
--markdown, -m: Path to the AVAP markdown file (relative to project root)
|
||||
--output, -o: Output directory for BNF files (relative to project root)
|
||||
"""
|
||||
|
||||
import re
|
||||
import typer
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional
|
||||
|
||||
app = typer.Typer()
|
||||
|
||||
|
||||
class BNFExtractor:
|
||||
"""Extract BNF specifications from AVAP markdown documentation."""
|
||||
|
||||
def __init__(self, markdown_file: Path, output_dir: Path):
|
||||
"""
|
||||
Initialize BNF extractor.
|
||||
|
||||
Args:
|
||||
markdown_file: Path to the AVAP markdown file
|
||||
output_dir: Directory where BNF files will be saved
|
||||
"""
|
||||
self.markdown_file = markdown_file
|
||||
self.output_dir = output_dir
|
||||
self.bnf_sections: List[Tuple[int, str, str]] = []
|
||||
|
||||
@staticmethod
|
||||
def _roman_to_int(roman: str) -> int:
|
||||
"""
|
||||
Convert Roman numerals to integers.
|
||||
|
||||
Args:
|
||||
roman: Roman numeral string (e.g., 'I', 'IV', 'IX', 'XII')
|
||||
|
||||
Returns:
|
||||
Integer value of the Roman numeral
|
||||
"""
|
||||
roman_values = {
|
||||
'I': 1, 'V': 5, 'X': 10, 'L': 50,
|
||||
'C': 100, 'D': 500, 'M': 1000
|
||||
}
|
||||
total = 0
|
||||
prev_value = 0
|
||||
|
||||
for char in reversed(roman):
|
||||
value = roman_values.get(char, 0)
|
||||
if value < prev_value:
|
||||
total -= value
|
||||
else:
|
||||
total += value
|
||||
prev_value = value
|
||||
|
||||
return total
|
||||
|
||||
def read_markdown_file(self) -> str:
|
||||
"""Read the markdown file content."""
|
||||
with open(self.markdown_file, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
def extract_bnf_sections(self, content: str) -> List[Tuple[int, str, str]]:
|
||||
"""
|
||||
Extract all BNF specifications from markdown content.
|
||||
|
||||
Pattern: ### Especificación BNF (Sección I)
|
||||
```bnf
|
||||
... BNF content ...
|
||||
```
|
||||
|
||||
Args:
|
||||
content: Markdown file content
|
||||
|
||||
Returns:
|
||||
List of tuples: (section_number, section_title, bnf_content)
|
||||
"""
|
||||
bnf_sections = []
|
||||
|
||||
# Pattern to find BNF specification headers and extract Roman numerals
|
||||
# Matches: ### Especificación BNF (Sección I), (Sección II), etc.
|
||||
header_pattern = r"### Especificación BNF \(Sección ([IVXLCDM]+)\)"
|
||||
|
||||
# Find all BNF headers with their positions
|
||||
for match in re.finditer(header_pattern, content):
|
||||
roman_numeral = match.group(1)
|
||||
section_number = self._roman_to_int(roman_numeral)
|
||||
header_start = match.start()
|
||||
header_end = match.end()
|
||||
|
||||
# Find the code block after this header
|
||||
code_block_pattern = r"```bnf\n(.*?)```"
|
||||
search_start = header_end
|
||||
|
||||
code_match = re.search(code_block_pattern, content[search_start:], re.DOTALL)
|
||||
|
||||
if code_match:
|
||||
bnf_content = code_match.group(1).strip()
|
||||
section_title = f"Especificación BNF - Sección {roman_numeral}"
|
||||
bnf_sections.append((section_number, section_title, bnf_content))
|
||||
|
||||
self.bnf_sections = bnf_sections
|
||||
return bnf_sections
|
||||
|
||||
def format_bnf_file_content(self, section_number: int, title: str, bnf_content: str) -> str:
|
||||
"""
|
||||
Format BNF content for file output.
|
||||
|
||||
Args:
|
||||
section_number: Section number (1-9, etc.)
|
||||
title: Section title
|
||||
bnf_content: Raw BNF grammar content
|
||||
|
||||
Returns:
|
||||
BNF content without additional formatting
|
||||
"""
|
||||
return bnf_content
|
||||
|
||||
def save_bnf_files(self) -> int:
|
||||
"""
|
||||
Save extracted BNF sections to individual files.
|
||||
|
||||
File naming convention: n0X_BNF.txt (e.g., n01_BNF.txt, n02_BNF.txt, etc.)
|
||||
|
||||
Returns:
|
||||
Number of files created
|
||||
"""
|
||||
# Ensure output directory exists
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
files_created = 0
|
||||
|
||||
for section_number, title, bnf_content in self.bnf_sections:
|
||||
# Format filename with zero-padded section number
|
||||
filename = f"n{section_number:02d}_BNF.txt"
|
||||
filepath = self.output_dir / filename
|
||||
|
||||
# Format and write file content
|
||||
formatted_content = self.format_bnf_file_content(
|
||||
section_number, title, bnf_content
|
||||
)
|
||||
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(formatted_content)
|
||||
|
||||
print(f"Created: {filepath}")
|
||||
files_created += 1
|
||||
|
||||
return files_created
|
||||
|
||||
def generate(self) -> Tuple[int, List[str]]:
|
||||
"""
|
||||
Execute the complete BNF extraction and file generation process.
|
||||
|
||||
Returns:
|
||||
Tuple of (number_of_files_created, list_of_file_paths)
|
||||
"""
|
||||
print(f"Reading markdown file: {self.markdown_file}")
|
||||
content = self.read_markdown_file()
|
||||
|
||||
print(f"Extracting BNF specifications...")
|
||||
bnf_sections = self.extract_bnf_sections(content)
|
||||
|
||||
print(f"Found {len(bnf_sections)} BNF sections:")
|
||||
for section_number, title, _ in bnf_sections:
|
||||
print(f" - {title}")
|
||||
|
||||
print(f"\nSaving BNF files to: {self.output_dir}")
|
||||
files_created = self.save_bnf_files()
|
||||
|
||||
# Generate list of created file paths
|
||||
file_paths = [
|
||||
str(self.output_dir / f"n{i:02d}_BNF.txt")
|
||||
for i, _, _ in bnf_sections
|
||||
]
|
||||
|
||||
return files_created, file_paths
|
||||
|
||||
|
||||
@app.command()
|
||||
def main(
|
||||
markdown_file: str = typer.Option(
|
||||
"docs/LRM/avap.md",
|
||||
"--markdown",
|
||||
"-m",
|
||||
help="Path to AVAP markdown file (relative to project root)"
|
||||
),
|
||||
output_dir: str = typer.Option(
|
||||
"ingestion/code/BNF/",
|
||||
"--output",
|
||||
"-o",
|
||||
help="Output directory for BNF files (relative to project root)"
|
||||
)
|
||||
):
|
||||
"""Extract BNF specifications from AVAP documentation.
|
||||
|
||||
Default behavior:
|
||||
- Reads from: docs/LRM/avap.md
|
||||
- Writes to: ingestion/code/BNF/
|
||||
"""
|
||||
# Get project root directory (scripts/pipelines/flows -> project root)
|
||||
script_dir = Path(__file__).parent
|
||||
project_root = script_dir.parent.parent.parent
|
||||
|
||||
# Convert relative paths to absolute
|
||||
markdown_path = project_root / markdown_file
|
||||
output_path = project_root / output_dir
|
||||
|
||||
# Verify markdown file exists
|
||||
if not markdown_path.exists():
|
||||
typer.echo(f"Error: Markdown file not found: {markdown_path}", err=True)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
# Create extractor and generate files
|
||||
extractor = BNFExtractor(markdown_path, output_path)
|
||||
files_created, file_paths = extractor.generate()
|
||||
|
||||
print(f"\n{'='*80}")
|
||||
print(f"BNF extraction complete!")
|
||||
print(f"Total files created: {files_created}")
|
||||
print(f"Output directory: {output_path}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
Loading…
Reference in New Issue