Update Elasticsearch index version and modify imports in ingestion and translation scripts

- Changed Elasticsearch index from "avap-docs-test-v3" to "avap-docs-test-v4" in elasticsearch_ingestion.py.
- Removed unused import SystemMessage from langchain_core.messages in translate_mbpp.py.
- Added import for Lark in chunk.py to support new functionality.
This commit is contained in:
acano 2026-03-19 11:30:00 +01:00
parent 868a17523a
commit 752bf9c7d9
4 changed files with 61257 additions and 14 deletions

File diff suppressed because one or more lines are too long

View File

@ -17,7 +17,7 @@ def elasticsearch_ingestion(
docs_folder_path: str = "docs/samples",
output_path: str = "ingestion/chunks.json",
docs_extension: list[str] = [".md", ".avap"],
es_index: str = "avap-docs-test-v3",
es_index: str = "avap-docs-test-v4",
es_request_timeout: int = 120,
es_max_retries: int = 5,
es_retry_on_timeout: bool = True,

View File

@ -6,7 +6,7 @@ import typer
from loguru import logger
from botocore.config import Config
from pathlib import Path
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.messages import HumanMessage
from src.utils.llm_factory import create_chat_model
from scripts.pipelines.tasks.prompts import get_prompt_mbpp

View File

@ -4,6 +4,7 @@ from dataclasses import replace
from pathlib import Path
from typing import Any, Union
from lark import Lark
from chonkie import (
Chunk,
ElasticHandshake,