-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.py
57 lines (43 loc) · 1.63 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import os
from pathlib import Path
from chromadb.config import Settings
from langchain.document_loaders import TextLoader, PDFMinerLoader, CSVLoader, UnstructuredExcelLoader
from default_config import default_config
_config = default_config
# BASE Directory
BASE_DIR = Path(__file__).resolve().parent
# Audio files directory
AUDIO_DIR = BASE_DIR.joinpath(_config["AUDIO_DIR"])
# Whisper transribed output directory
TRANSCRIBE_DIR = BASE_DIR.joinpath(_config['TRANSCRIBE_DIR'])
# Additional documents for knowledge base
DOCUMENTS_DIR = BASE_DIR.joinpath(_config["DOCUMENTS_DIR"])
# Supported audio file extensions (TODO: More will be added later)
SUPPORTED_AUDIO_FILE_EXTENSIONS = ['.mp3', '.m4a', '.wav']
# Whisper Model to be used
WHISPER_MODEL_NAME = _config["WHISPER_MODEL_NAME"]
# Device type i.e 'cuda' or 'cpu'
DEVICE_TYPE = _config["DEVICE_TYPE"]
# documents reader currently supported document types with reader
SUPPORTED_DOCUMENT_MAP = {
".txt": TextLoader,
'.pdf': PDFMinerLoader,
'.csv': CSVLoader,
'.xls': UnstructuredExcelLoader,
'.xlxs': UnstructuredExcelLoader
}
# Default Instructor Model
EMBEDDING_MODEL_NAME = "hkunlp/instructor-large"
# Persisted Knoledge Bage Directory
KB_DIR = BASE_DIR.joinpath("DB")
# KB Threads
KB_THREADS = os.cpu_count() or 8
# Chroma settings
CHROMA_SETTINGS = Settings(
chroma_db_impl="duckdb+parquet",
persist_directory=str(KB_DIR),
anonymized_telemetry=False
)
# default model_id and Basename (See details at load_model.py)
LLM_MODEL_ID = "TheBloke/WizardLM-7B-uncensored-GPTQ"
LLM_MODEL_BASENAME = "WizardLM-7B-uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors"