modified config.py

This commit is contained in:
pseco 2026-03-11 10:41:28 +01:00
parent 0ed7dfc653
commit cd3922abbd
2 changed files with 167 additions and 42 deletions

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 5,
"id": "0a8abbfa", "id": "0a8abbfa",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -12,7 +12,7 @@
"True" "True"
] ]
}, },
"execution_count": 1, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -24,7 +24,7 @@
"from dataclasses import dataclass\n", "from dataclasses import dataclass\n",
"from pathlib import Path\n", "from pathlib import Path\n",
"from typing import Any, Dict, List, Optional, Tuple\n", "from typing import Any, Dict, List, Optional, Tuple\n",
"from bnf import grammar\n", "# from bnf import grammar\n",
"import nltk\n", "import nltk\n",
"from elasticsearch import Elasticsearch\n", "from elasticsearch import Elasticsearch\n",
"from langchain_core.documents import Document\n", "from langchain_core.documents import Document\n",
@ -33,26 +33,40 @@
"from lark import Lark, Token, Transformer, Tree\n", "from lark import Lark, Token, Transformer, Tree\n",
"from transformers import AutoConfig\n", "from transformers import AutoConfig\n",
"\n", "\n",
"from src.config import (DATA_DIR, ELASTICSEARCH_CODE_INDEX,\n", "from src.config import settings\n",
" ELASTICSEARCH_DOCS_INDEX, ELASTICSEARCH_INDEX,\n",
" ELASTICSEARCH_URL, HF_EMB_MODEL_NAME,\n",
" OLLAMA_EMB_MODEL_NAME, OLLAMA_LOCAL_URL,\n",
" OLLAMA_MODEL_NAME, OLLAMA_URL, PROJ_ROOT)\n",
"\n", "\n",
"nltk.download(\"punkt\", quiet=True)" "nltk.download(\"punkt\", quiet=True)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 6,
"id": "5c9d292b", "id": "5c9d292b",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"config = AutoConfig.from_pretrained(HF_EMB_MODEL_NAME)\n", "config = AutoConfig.from_pretrained(settings.hf_emb_model_name)\n",
"embedding_dim = config.hidden_size" "embedding_dim = config.hidden_size"
] ]
}, },
{
"cell_type": "code",
"execution_count": 9,
"id": "d2009c2b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"qwen3.5:2b\n"
]
}
],
"source": [
"print(settings.ollama_model_name)"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "baa779f3", "id": "baa779f3",

View File

@ -1,39 +1,150 @@
from pathlib import Path from pathlib import Path
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field
from dotenv import load_dotenv from dotenv import load_dotenv
import os from datetime import timedelta
import warnings
load_dotenv() load_dotenv()
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY", "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") class Settings(BaseSettings):
raw_path_: str
data_path_: str
processed_path_: str
models_path_: str
external_path_: str
kubeconfig_path: str
interim_path_: str
database_url: str
openai_api_key: str
elasticsearch_index: str
elasticsearch_docs_index: str
elasticsearch_code_index: str
llm_base_url: str
ollama_url: str
ollama_local_url: str
langfuse_host: str
elasticsearch_url: str
elasticsearch_local_url: str
ollama_model_name: str
ollama_emb_model_name: str
model_name: str
hf_emb_model_name: str
langfuse_public_key: str
langfuse_secret_key: str
hf_token: str
OLLAMA_URL=os.getenv("OLLAMA_URL", "http://host.docker.internal:11434") model_config = SettingsConfigDict(
OLLAMA_LOCAL_URL=os.getenv("OLLAMA_LOCAL_URL", "http://localhost:11434") env_file=".env",
OLLAMA_MODEL_NAME=os.getenv("OLLAMA_MODEL_NAME", "qwen3-0.6B:latest") env_file_encoding="utf-8",
OLLAMA_EMB_MODEL_NAME=os.getenv("OLLAMA_EMB_MODEL_NAME", "qwen3-0.6B-emb:latest") case_sensitive=False,
ELASTICSEARCH_DOCS_INDEX = os.getenv("ELASTICSEARCH_DOCS_INDEX") extra="ignore",
ELASTICSEARCH_CODE_INDEX = os.getenv("ELASTICSEARCH_CODE_INDEX") )
LANGFUSE_HOST=os.getenv("LANGFUSE_HOST", "http://45.77.119.180") @property
LANGFUSE_PUBLIC_KEY=os.getenv("LANGFUSE_PUBLIC_KEY", "pk-lf-0e6db694-3e95-4dd4-aedf-5a2694267058") def data_path(self) -> Path:
LANGFUSE_SECRET_KEY=os.getenv("LANGFUSE_SECRET_KEY", "sk-lf-dbf28bb9-15bb-4d03-a8c3-05caa3e3905f") return Path(self.data_path_)
ELASTICSEARCH_URL=os.getenv("ELASTICSEARCH_URL", "http://host.docker.internal:9200") @property
ELASTICSEARCH_LOCAL_URL=os.getenv("ELASTICSEARCH_LOCAL_URL", "http://localhost:9200") def models_path(self) -> Path:
ELASTICSEARCH_INDEX=os.getenv("ELASTICSEARCH_INDEX", "avap-docs-test") return Path(self.models_path_)
DATABASE_URL=os.getenv("DATABASE_URL", "postgresql://postgres:brunix_pass@host.docker.internal:5432/postgres") @property
def processed_path(self) -> Path:
return Path(self.processed_path_)
KUBECONFIG_PATH=os.getenv("KUBECONFIG_PATH", "kubernetes/kubeconfig.yaml") @property
def raw_path(self) -> Path:
return Path(self.raw_path_)
HF_TOKEN=os.getenv("HF_TOKEN", "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") @property
HF_EMB_MODEL_NAME=os.getenv("HF_EMB_MODEL_NAME", "Qwen/Qwen3-Embedding-0.6B") def interim_path(self) -> Path:
return Path(self.interim_path_)
PROJ_ROOT = Path(__file__).resolve().parents[1] @property
def external_path(self) -> Path:
return Path(self.external_path_)
DATA_DIR=PROJ_ROOT / "data" @property
MODELS_DIR=DATA_DIR / "models" def proj_root(self) -> Path:
RAW_DIR=DATA_DIR / "raw" return Path(__file__).resolve().parents[1]
PROCESSED_DIR=DATA_DIR / "processed"
INTERIM_DIR=DATA_DIR / "interim" @property
EXTERNAL_DIR=DATA_DIR / "external" def database_url(self) -> str:
DOCS_DIR=PROJ_ROOT / "docs" return self.database_url
@property
def openai_api_key(self) -> str:
return self.openai_api_key
@property
def elasticsearch_index(self) -> str:
return self.elasticsearch_index
@property
def elasticsearch_docs_index(self) -> str:
return self.elasticsearch_docs_index
@property
def elasticsearch_code_index(self) -> str:
return self.elasticsearch_code_index
@property
def llm_base_url(self) -> str:
return self.llm_base_url
@property
def ollama_url(self) -> str:
return self.ollama_url
@property
def ollama_local_url(self) -> str:
return self.ollama_local_url
@property
def langfuse_host(self) -> str:
return self.langfuse_host
@property
def elasticsearch_url(self) -> str:
return self.elasticsearch_url
@property
def elasticsearch_local_url(self) -> str:
return self.elasticsearch_local_url
@property
def ollama_model_name(self) -> str:
return self.ollama_model_name
@property
def ollama_emb_model_name(self) -> str:
return self.ollama_emb_model_name
@property
def model_name(self) -> str:
return self.model_name
@property
def hf_emb_model_name(self) -> str:
return self.hf_emb_model_name
@property
def langfuse_public_key(self) -> str:
return self.langfuse_public_key
@property
def langfuse_secret_key(self) -> str:
return self.langfuse_secret_key
@property
def hf_token(self) -> str:
return self.hf_token
@property
def kubeconfig_path(self) -> Path:
return Path(self.kubeconfig_path)
settings = Settings()