import typer import logging from loguru import logger from scripts.pipelines.tasks.chunk import fetch_documents, process_documents, ingest_documents app = typer.Typer() @app.command() def elasticsearch_ingestion( docs_folder_path: str = "docs/samples", docs_extension: list[str] = [".md", ".avap"], es_index: str = "avap-docs-test-v3", es_request_timeout: int = 120, es_max_retries: int = 5, es_retry_on_timeout: bool = True, delete_es_index: bool = True ): logger.info("Starting Elasticsearch ingestion pipeline...") logger.info(f"Fetching files from {docs_folder_path}...") docs_path = fetch_documents(docs_folder_path, docs_extension) logger.info("Processing docs...") chunked_docs = process_documents(docs_path) logger.info(f"Ingesting chunks in Elasticsearch index: {es_index}...") ingest_documents(chunked_docs, es_index, es_request_timeout, es_max_retries, es_retry_on_timeout, delete_es_index) logger.info(f"Finished ingesting in {es_index}.") if __name__ == "__main__": logging.basicConfig( level=logging.INFO, format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", ) try: app() except Exception as exc: logger.exception(exc) raise