modified config.py
This commit is contained in:
parent
0ed7dfc653
commit
cd3922abbd
|
|
@ -2,7 +2,7 @@
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 5,
|
||||||
"id": "0a8abbfa",
|
"id": "0a8abbfa",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
|
@ -12,7 +12,7 @@
|
||||||
"True"
|
"True"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 1,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
|
|
@ -24,7 +24,7 @@
|
||||||
"from dataclasses import dataclass\n",
|
"from dataclasses import dataclass\n",
|
||||||
"from pathlib import Path\n",
|
"from pathlib import Path\n",
|
||||||
"from typing import Any, Dict, List, Optional, Tuple\n",
|
"from typing import Any, Dict, List, Optional, Tuple\n",
|
||||||
"from bnf import grammar\n",
|
"# from bnf import grammar\n",
|
||||||
"import nltk\n",
|
"import nltk\n",
|
||||||
"from elasticsearch import Elasticsearch\n",
|
"from elasticsearch import Elasticsearch\n",
|
||||||
"from langchain_core.documents import Document\n",
|
"from langchain_core.documents import Document\n",
|
||||||
|
|
@ -33,26 +33,40 @@
|
||||||
"from lark import Lark, Token, Transformer, Tree\n",
|
"from lark import Lark, Token, Transformer, Tree\n",
|
||||||
"from transformers import AutoConfig\n",
|
"from transformers import AutoConfig\n",
|
||||||
"\n",
|
"\n",
|
||||||
"from src.config import (DATA_DIR, ELASTICSEARCH_CODE_INDEX,\n",
|
"from src.config import settings\n",
|
||||||
" ELASTICSEARCH_DOCS_INDEX, ELASTICSEARCH_INDEX,\n",
|
|
||||||
" ELASTICSEARCH_URL, HF_EMB_MODEL_NAME,\n",
|
|
||||||
" OLLAMA_EMB_MODEL_NAME, OLLAMA_LOCAL_URL,\n",
|
|
||||||
" OLLAMA_MODEL_NAME, OLLAMA_URL, PROJ_ROOT)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"nltk.download(\"punkt\", quiet=True)"
|
"nltk.download(\"punkt\", quiet=True)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 6,
|
||||||
"id": "5c9d292b",
|
"id": "5c9d292b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"config = AutoConfig.from_pretrained(HF_EMB_MODEL_NAME)\n",
|
"config = AutoConfig.from_pretrained(settings.hf_emb_model_name)\n",
|
||||||
"embedding_dim = config.hidden_size"
|
"embedding_dim = config.hidden_size"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "d2009c2b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"qwen3.5:2b\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(settings.ollama_model_name)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "baa779f3",
|
"id": "baa779f3",
|
||||||
|
|
|
||||||
163
src/config.py
163
src/config.py
|
|
@ -1,39 +1,150 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
from pydantic import Field
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
from datetime import timedelta
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY", "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")
|
class Settings(BaseSettings):
|
||||||
|
raw_path_: str
|
||||||
|
data_path_: str
|
||||||
|
processed_path_: str
|
||||||
|
models_path_: str
|
||||||
|
external_path_: str
|
||||||
|
kubeconfig_path: str
|
||||||
|
interim_path_: str
|
||||||
|
database_url: str
|
||||||
|
openai_api_key: str
|
||||||
|
elasticsearch_index: str
|
||||||
|
elasticsearch_docs_index: str
|
||||||
|
elasticsearch_code_index: str
|
||||||
|
llm_base_url: str
|
||||||
|
ollama_url: str
|
||||||
|
ollama_local_url: str
|
||||||
|
langfuse_host: str
|
||||||
|
elasticsearch_url: str
|
||||||
|
elasticsearch_local_url: str
|
||||||
|
ollama_model_name: str
|
||||||
|
ollama_emb_model_name: str
|
||||||
|
model_name: str
|
||||||
|
hf_emb_model_name: str
|
||||||
|
langfuse_public_key: str
|
||||||
|
langfuse_secret_key: str
|
||||||
|
hf_token: str
|
||||||
|
|
||||||
OLLAMA_URL=os.getenv("OLLAMA_URL", "http://host.docker.internal:11434")
|
model_config = SettingsConfigDict(
|
||||||
OLLAMA_LOCAL_URL=os.getenv("OLLAMA_LOCAL_URL", "http://localhost:11434")
|
env_file=".env",
|
||||||
OLLAMA_MODEL_NAME=os.getenv("OLLAMA_MODEL_NAME", "qwen3-0.6B:latest")
|
env_file_encoding="utf-8",
|
||||||
OLLAMA_EMB_MODEL_NAME=os.getenv("OLLAMA_EMB_MODEL_NAME", "qwen3-0.6B-emb:latest")
|
case_sensitive=False,
|
||||||
ELASTICSEARCH_DOCS_INDEX = os.getenv("ELASTICSEARCH_DOCS_INDEX")
|
extra="ignore",
|
||||||
ELASTICSEARCH_CODE_INDEX = os.getenv("ELASTICSEARCH_CODE_INDEX")
|
)
|
||||||
|
|
||||||
LANGFUSE_HOST=os.getenv("LANGFUSE_HOST", "http://45.77.119.180")
|
@property
|
||||||
LANGFUSE_PUBLIC_KEY=os.getenv("LANGFUSE_PUBLIC_KEY", "pk-lf-0e6db694-3e95-4dd4-aedf-5a2694267058")
|
def data_path(self) -> Path:
|
||||||
LANGFUSE_SECRET_KEY=os.getenv("LANGFUSE_SECRET_KEY", "sk-lf-dbf28bb9-15bb-4d03-a8c3-05caa3e3905f")
|
return Path(self.data_path_)
|
||||||
|
|
||||||
ELASTICSEARCH_URL=os.getenv("ELASTICSEARCH_URL", "http://host.docker.internal:9200")
|
@property
|
||||||
ELASTICSEARCH_LOCAL_URL=os.getenv("ELASTICSEARCH_LOCAL_URL", "http://localhost:9200")
|
def models_path(self) -> Path:
|
||||||
ELASTICSEARCH_INDEX=os.getenv("ELASTICSEARCH_INDEX", "avap-docs-test")
|
return Path(self.models_path_)
|
||||||
|
|
||||||
DATABASE_URL=os.getenv("DATABASE_URL", "postgresql://postgres:brunix_pass@host.docker.internal:5432/postgres")
|
@property
|
||||||
|
def processed_path(self) -> Path:
|
||||||
|
return Path(self.processed_path_)
|
||||||
|
|
||||||
KUBECONFIG_PATH=os.getenv("KUBECONFIG_PATH", "kubernetes/kubeconfig.yaml")
|
@property
|
||||||
|
def raw_path(self) -> Path:
|
||||||
|
return Path(self.raw_path_)
|
||||||
|
|
||||||
HF_TOKEN=os.getenv("HF_TOKEN", "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")
|
@property
|
||||||
HF_EMB_MODEL_NAME=os.getenv("HF_EMB_MODEL_NAME", "Qwen/Qwen3-Embedding-0.6B")
|
def interim_path(self) -> Path:
|
||||||
|
return Path(self.interim_path_)
|
||||||
|
|
||||||
PROJ_ROOT = Path(__file__).resolve().parents[1]
|
@property
|
||||||
|
def external_path(self) -> Path:
|
||||||
|
return Path(self.external_path_)
|
||||||
|
|
||||||
DATA_DIR=PROJ_ROOT / "data"
|
@property
|
||||||
MODELS_DIR=DATA_DIR / "models"
|
def proj_root(self) -> Path:
|
||||||
RAW_DIR=DATA_DIR / "raw"
|
return Path(__file__).resolve().parents[1]
|
||||||
PROCESSED_DIR=DATA_DIR / "processed"
|
|
||||||
INTERIM_DIR=DATA_DIR / "interim"
|
@property
|
||||||
EXTERNAL_DIR=DATA_DIR / "external"
|
def database_url(self) -> str:
|
||||||
DOCS_DIR=PROJ_ROOT / "docs"
|
return self.database_url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def openai_api_key(self) -> str:
|
||||||
|
return self.openai_api_key
|
||||||
|
|
||||||
|
@property
|
||||||
|
def elasticsearch_index(self) -> str:
|
||||||
|
return self.elasticsearch_index
|
||||||
|
|
||||||
|
@property
|
||||||
|
def elasticsearch_docs_index(self) -> str:
|
||||||
|
return self.elasticsearch_docs_index
|
||||||
|
|
||||||
|
@property
|
||||||
|
def elasticsearch_code_index(self) -> str:
|
||||||
|
return self.elasticsearch_code_index
|
||||||
|
|
||||||
|
@property
|
||||||
|
def llm_base_url(self) -> str:
|
||||||
|
return self.llm_base_url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ollama_url(self) -> str:
|
||||||
|
return self.ollama_url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ollama_local_url(self) -> str:
|
||||||
|
return self.ollama_local_url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def langfuse_host(self) -> str:
|
||||||
|
return self.langfuse_host
|
||||||
|
|
||||||
|
@property
|
||||||
|
def elasticsearch_url(self) -> str:
|
||||||
|
return self.elasticsearch_url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def elasticsearch_local_url(self) -> str:
|
||||||
|
return self.elasticsearch_local_url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ollama_model_name(self) -> str:
|
||||||
|
return self.ollama_model_name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ollama_emb_model_name(self) -> str:
|
||||||
|
return self.ollama_emb_model_name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def model_name(self) -> str:
|
||||||
|
return self.model_name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hf_emb_model_name(self) -> str:
|
||||||
|
return self.hf_emb_model_name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def langfuse_public_key(self) -> str:
|
||||||
|
return self.langfuse_public_key
|
||||||
|
|
||||||
|
@property
|
||||||
|
def langfuse_secret_key(self) -> str:
|
||||||
|
return self.langfuse_secret_key
|
||||||
|
|
||||||
|
@property
|
||||||
|
def hf_token(self) -> str:
|
||||||
|
return self.hf_token
|
||||||
|
|
||||||
|
@property
|
||||||
|
def kubeconfig_path(self) -> Path:
|
||||||
|
return Path(self.kubeconfig_path)
|
||||||
|
|
||||||
|
|
||||||
|
settings = Settings()
|
||||||
Loading…
Reference in New Issue