From 8a9b16fdd617e9325aaa81310b2edfa92d2a0c9f Mon Sep 17 00:00:00 2001 From: Alex Co Date: Fri, 13 Sep 2024 07:13:03 +0000 Subject: [PATCH] Make Internet Search a system persona Signed-off-by: Alex Co --- backend/danswer/chat/load_yamls.py | 12 +++++++++++- backend/danswer/chat/personas.yaml | 23 ++++++++++++----------- backend/danswer/chat/prompts.yaml | 15 +++++++++++++++ backend/danswer/tools/built_in_tools.py | 1 - backend/requirements/default.txt | 2 +- 5 files changed, 39 insertions(+), 14 deletions(-) diff --git a/backend/danswer/chat/load_yamls.py b/backend/danswer/chat/load_yamls.py index 0690f08b759..1839b3a5f23 100644 --- a/backend/danswer/chat/load_yamls.py +++ b/backend/danswer/chat/load_yamls.py @@ -96,7 +96,17 @@ def load_personas_from_yaml( # Set specific overrides for image generation persona if persona.get("image_generation"): llm_model_version_override = "gpt-4o" - + + # Load Internet Search Tool. + if persona.get("internet_search"): + internet_search_tool = ( + db_session.query(ToolDBModel) + .filter(ToolDBModel.name == "InternetSearchTool") + .first() + ) + if internet_search_tool: + tool_ids.append(internet_search_tool.id) + existing_persona = ( db_session.query(Persona) .filter(Persona.name == persona["name"]) diff --git a/backend/danswer/chat/personas.yaml b/backend/danswer/chat/personas.yaml index 9955b1d73c5..4c97d79efbc 100644 --- a/backend/danswer/chat/personas.yaml +++ b/backend/danswer/chat/personas.yaml @@ -19,11 +19,11 @@ personas: # Default number of chunks to include as context, set to 0 to disable retrieval # Remove the field to set to the system default number of chunks/tokens to pass to Gen AI # Each chunk is 512 tokens long - num_chunks: 50 + num_chunks: 20 # Enable/Disable usage of the LLM chunk filter feature whereby each chunk is passed to the LLM to determine # if the chunk is useful or not towards the latest user query # This feature can be overriden for all personas via DISABLE_LLM_CHUNK_FILTER env variable - llm_relevance_filter: false + llm_relevance_filter: true # Enable/Disable usage of the LLM to extract query time filters including source type and time range filters llm_filter_extraction: true # Decay documents priority as they age, options are: @@ -44,11 +44,11 @@ personas: document_sets: [] icon_shape: 23013 icon_color: "#6FB1FF" - display_priority: 1 + display_priority: 0 is_visible: true - id: 1 - name: "General" + name: "General GPT" description: > Assistant with no access to documents. Chat with just the Large Language Model. prompts: @@ -60,16 +60,16 @@ personas: document_sets: [] icon_shape: 50910 icon_color: "#FF6F6F" - display_priority: 0 + display_priority: 1 is_visible: true - id: 2 - name: "Paraphrase" + name: "GPT Internet Search" description: > - Assistant that is heavily constrained and only provides exact quotes from Connected Sources. + Use this Assistant to search the Internet for you (via Bing) and getting the answer prompts: - - "Paraphrase" - num_chunks: 10 + - "InternetSearch" + num_chunks: 0 llm_relevance_filter: true llm_filter_extraction: true recency_bias: "auto" @@ -77,7 +77,8 @@ personas: icon_shape: 45519 icon_color: "#6FFF8D" display_priority: 2 - is_visible: false + is_visible: true + internet_search: true - id: 3 @@ -95,4 +96,4 @@ personas: icon_color: "#9B59B6" image_generation: true display_priority: 3 - is_visible: true + is_visible: false diff --git a/backend/danswer/chat/prompts.yaml b/backend/danswer/chat/prompts.yaml index b3b9bae6467..4fe3c447495 100644 --- a/backend/danswer/chat/prompts.yaml +++ b/backend/danswer/chat/prompts.yaml @@ -107,3 +107,18 @@ prompts: directly from the documents. datetime_aware: true include_citations: true + + - name: "InternetSearch" + description: "Use this Assistant to search the Internet for you (via Bing) and getting the answer" + system: > + You are an intelligent AI agent designed to assist users by providing accurate and relevant information through internet searches. Your primary objectives are: + Information Retrieval: Search the internet to find reliable and up-to-date information based on user queries. Ensure that the sources you reference are credible and trustworthy. + Context Understanding: Analyze user questions to understand context and intent. Provide answers that are directly related to the user's needs, offering additional context when necessary. + Summarization: When presenting information, summarize findings clearly and concisely. Highlight key points and relevant details to enhance user understanding. + User Engagement: Maintain a friendly and engaging tone in your responses. Encourage users to ask follow-up questions or request further information. + Privacy and Safety: Respect user privacy and ensure that any personal information is handled securely. Avoid sharing sensitive or inappropriate content. + Continuous Learning: Adapt and improve your responses based on user interactions and feedback. Stay updated with the latest information and trends to provide the best assistance. + task: > + Search the internet for relevant information based on the user query. Provide a concise summary of the findings and include the sources of information. + datetime_aware: true + include_citations: true diff --git a/backend/danswer/tools/built_in_tools.py b/backend/danswer/tools/built_in_tools.py index 99b2ae3bbb6..1bfecef7ce4 100644 --- a/backend/danswer/tools/built_in_tools.py +++ b/backend/danswer/tools/built_in_tools.py @@ -146,7 +146,6 @@ def auto_add_search_tool_to_personas(db_session: Session) -> None: db_session.commit() logger.notice("Completed adding SearchTool to relevant Personas.") - _built_in_tools_cache: dict[int, Type[Tool]] | None = None diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index ee2c78dc1cf..82a1ee320c9 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -4,7 +4,7 @@ asyncpg==0.27.0 atlassian-python-api==3.37.0 beautifulsoup4==4.12.2 boto3==1.34.84 -celery[redis]==5.3.4 +celery==5.3.4 boto3==1.34.84 chardet==5.2.0 dask==2023.8.1