-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b258ec1
commit 6fb07d2
Showing
9 changed files
with
196 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from collections.abc import Callable | ||
|
||
from danswer.llm.factory import get_default_llm | ||
from danswer.llm.utils import dict_based_prompt_to_langchain_prompt | ||
from danswer.prompts.secondary_llm_flows import LANGUAGE_REPHRASE_PROMPT | ||
from danswer.utils.logger import setup_logger | ||
from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel | ||
|
||
logger = setup_logger() | ||
|
||
|
||
def llm_rephrase_query(query: str, language: str) -> str: | ||
def _get_rephrase_messages() -> list[dict[str, str]]: | ||
messages = [ | ||
{ | ||
"role": "user", | ||
"content": LANGUAGE_REPHRASE_PROMPT.format( | ||
query=query, target_language=language | ||
), | ||
}, | ||
] | ||
|
||
return messages | ||
|
||
messages = _get_rephrase_messages() | ||
filled_llm_prompt = dict_based_prompt_to_langchain_prompt(messages) | ||
model_output = get_default_llm().invoke(filled_llm_prompt) | ||
logger.debug(model_output) | ||
|
||
return model_output | ||
|
||
|
||
def rephrase_query( | ||
query: str, | ||
multilingual_query_expansion: str, | ||
use_threads: bool = True, | ||
) -> list[str]: | ||
languages = multilingual_query_expansion.split(",") | ||
languages = [language.strip() for language in languages] | ||
if use_threads: | ||
functions_with_args: list[tuple[Callable, tuple]] = [ | ||
(llm_rephrase_query, (query, language)) for language in languages | ||
] | ||
|
||
return run_functions_tuples_in_parallel(functions_with_args) | ||
|
||
else: | ||
return [llm_rephrase_query(query, language) for language in languages] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# This env template shows how to configure Danswer for multilingual use | ||
# In this case, it is configured for French and English | ||
# To use it, copy it to .env in the docker_compose directory. | ||
# Feel free to combine it with the other templates to suit your needs | ||
|
||
|
||
# A recent MIT license multilingual model: https://huggingface.co/intfloat/multilingual-e5-small | ||
DOCUMENT_ENCODER_MODEL="intfloat/multilingual-e5-small" | ||
|
||
# The model above is trained with the following prefix for queries and passages to improve retrieval | ||
# by letting the model know which of the two type is currently being embedded | ||
ASYM_QUERY_PREFIX="query: " | ||
ASYM_PASSAGE_PREFIX="passage: " | ||
|
||
# Depends model by model, this one is tuned with this as True | ||
NORMALIZE_EMBEDDINGS="True" | ||
|
||
# Due to the loss function used in training, this model outputs similarity scores from range ~0.6 to 1 | ||
SIM_SCORE_RANGE_LOW="0.6" | ||
SIM_SCORE_RANGE_LOW="0.8" | ||
|
||
# No recent multilingual reranking models small enough to run on CPU, so turning it off | ||
SKIP_RERANKING="True" | ||
|
||
# Use LLM to determine if chunks are relevant to the query | ||
# may not work well for languages that do not have much training data in the LLM training set | ||
DISABLE_LLM_CHUNK_FILTER="True" | ||
|
||
# Rephrase the user query in specified languages using LLM, use comma separated values | ||
MULTILINGUAL_QUERY_EXPANSION="English, French" | ||
|
||
# Enables fine-grained embeddings for better retrieval | ||
# At the cost of indexing speed (~5x slower), query time is same speed | ||
ENABLE_MINI_CHUNK="True" | ||
|
||
# Stronger model will help with multilingual tasks | ||
GEN_AI_MODEL_VERSION="gpt-4" | ||
GEN_AI_API_KEY=<provide your api key> | ||
|
||
# More verbose logging if desired | ||
LOG_LEVEL="debug" |
6fb07d2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Successfully deployed to the following URLs:
internal-search – ./
internal-search-danswer.vercel.app
internal-search-git-main-danswer.vercel.app
internal-search.vercel.app
danswer.dev
www.danswer.dev