Skip to content

Commit

Permalink
Merge pull request #36 from mindvalley/chore/merge-upstream-2024052801
Browse files Browse the repository at this point in the history
chore/merge upstream 2024052801
  • Loading branch information
onimsha authored May 28, 2024
2 parents 6a5fd8f + 48fa342 commit 94c9e35
Show file tree
Hide file tree
Showing 13 changed files with 288 additions and 145 deletions.
20 changes: 18 additions & 2 deletions backend/danswer/chat/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,23 @@ prompts:
# System Prompt (as shown in UI)
system: >
You are EVE AI, a question answering system that is constantly learning and improving.
The current date is DANSWER_DATETIME_REPLACEMENT.
You can process and comprehend vast amounts of text and utilize this knowledge to provide grounded, accurate, and concise answers to diverse queries.
Develop a conversational AI system capable of accessing the company's workspace and document repository to provide insightful responses to user inquiries.
Utilize natural language processing techniques for document understanding and information extraction, ensuring the system comprehends the content and context of documents.
Integrate machine learning algorithms to refine the system's understanding and responsiveness over time.
Implement features like semantic search, document summarization, and entity recognition to enhance information retrieval accuracy and user experience.
You have access to Mindvalley's Jira, Confluence Github and Slack systems to answer the questions
You have access to Mindvalley's Jira, Confluence and Github to answer the questions
# Task Prompt (as shown in UI)
task: >
If asked to find stuff give the exact find you got, if asked for links provide the links.
Make sure you pay attention to the timeframe of the data if specified in the query for example if asked for the latest ticket, it should be the earliest date you have in the knowledge base data set.
Do not use Knowledge base content older than 1 year in any response.
Ifspecified for a certain data set only pull information from that data set such as Jira
# Inject a statement at the end of system prompt to inform the LLM of the current date/time
# If the DANSWER_DATETIME_REPLACEMENT is set, the date/time is inserted there instead
# Format looks like: "October 16, 2023 14:30"
datetime_aware: true
# Prompts the LLM to include citations in the for [1], [2] etc.
Expand All @@ -30,7 +33,16 @@ prompts:

- name: "OnlyLLM"
description: "Chat directly with the LLM!"
system: "You are a helpful assistant."
system: >
You are a helpful AI assistant. The current date is DANSWER_DATETIME_REPLACEMENT
You give concise responses to very simple questions, but provide more thorough responses to
more complex and open-ended questions.
You are happy to help with writing, analysis, question answering, math, coding and all sorts
of other tasks. You use markdown where reasonable and also for coding.
task: ""
datetime_aware: true
include_citations: true
Expand All @@ -41,6 +53,8 @@ prompts:
You are a text summarizing assistant that highlights the most important knowledge from the
context provided, prioritizing the information that relates to the user query.
The current date is DANSWER_DATETIME_REPLACEMENT.
You ARE NOT creative and always stick to the provided documents.
If there are no documents, refer to the conversation history.
Expand All @@ -58,6 +72,8 @@ prompts:
system: >
Quote and cite relevant information from provided context based on the user query.
The current date is DANSWER_DATETIME_REPLACEMENT.
You only provide quotes that are EXACT substrings from provided documents!
If there are no documents provided,
Expand Down
1 change: 1 addition & 0 deletions backend/danswer/configs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,3 +138,4 @@ class FileOrigin(str, Enum):
CHAT_UPLOAD = "chat_upload"
CHAT_IMAGE_GEN = "chat_image_gen"
CONNECTOR = "connector"
OTHER = "other"
47 changes: 46 additions & 1 deletion backend/danswer/danswerbot/slack/handlers/handle_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@
from slack_sdk import WebClient
from slack_sdk.errors import SlackApiError
from slack_sdk.models.blocks import DividerBlock
from slack_sdk.models.blocks import SectionBlock
from sqlalchemy.orm import Session

from danswer.configs.app_configs import DISABLE_GENERATIVE_AI
from danswer.configs.danswerbot_configs import DANSWER_BOT_ANSWER_GENERATION_TIMEOUT
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_COT
from danswer.configs.danswerbot_configs import DANSWER_BOT_DISABLE_DOCS_ONLY_ANSWER
Expand Down Expand Up @@ -295,7 +297,7 @@ def handle_message(
logger=logger,
)
@rate_limits(client=client, channel=channel, thread_ts=message_ts_to_respond_to)
def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse:
def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | None:
action = "slack_message"
if is_bot_msg:
action = "slack_slash_message"
Expand Down Expand Up @@ -340,6 +342,9 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse:
- check_number_of_tokens(query_text)
)

if DISABLE_GENERATIVE_AI:
return None

# This also handles creating the query event in postgres
answer = get_search_answer(
query_req=new_message_request,
Expand Down Expand Up @@ -422,6 +427,46 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse:

return True

# Edge case handling, for tracking down the Slack usage issue
if answer is None:
assert DISABLE_GENERATIVE_AI is True
try:
respond_in_thread(
client=client,
channel=channel,
receiver_ids=send_to,
text="Hello! EVE AI has some results for you!",
blocks=[
SectionBlock(
text="EVE AI is down for maintenance.\nWe're working hard on recharging the AI!"
)
],
thread_ts=message_ts_to_respond_to,
# don't unfurl, since otherwise we will have 5+ previews which makes the message very long
unfurl=False,
)

# For DM (ephemeral message), we need to create a thread via a normal message so the user can see
# the ephemeral message. This also will give the user a notification which ephemeral message does not.
if respond_team_member_list:
respond_in_thread(
client=client,
channel=channel,
text=(
"👋 Hi, we've just gathered and forwarded the relevant "
+ "information to the team. They'll get back to you shortly!"
),
thread_ts=message_ts_to_respond_to,
)

return False

except Exception:
logger.exception(
f"Unable to process message - could not respond in slack in {num_retries} attempts"
)
return True

# Got an answer at this point, can remove reaction and give results
try:
update_emote_react(
Expand Down
11 changes: 9 additions & 2 deletions backend/danswer/db/tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,24 @@ def create_or_add_document_tag_list(
raise ValueError("Invalid Document, cannot attach Tags")

existing_tags_stmt = select(Tag).where(
Tag.tag_key == tag_key, Tag.tag_value.in_(tag_values), Tag.source == source
Tag.tag_key == tag_key,
Tag.tag_value.in_(valid_tag_values),
Tag.source == source,
)
existing_tags = list(db_session.execute(existing_tags_stmt).scalars().all())
existing_tag_values = {tag.tag_value for tag in existing_tags}

new_tags = []
for tag_value in tag_values:
for tag_value in valid_tag_values:
if tag_value not in existing_tag_values:
new_tag = Tag(tag_key=tag_key, tag_value=tag_value, source=source)
db_session.add(new_tag)
new_tags.append(new_tag)
existing_tag_values.add(tag_value)

logger.debug(
f"Created new tags: {', '.join([f'{tag.tag_key}:{tag.tag_value}' for tag in new_tags])}"
)

all_tags = existing_tags + new_tags

Expand Down
10 changes: 2 additions & 8 deletions backend/danswer/llm/answering/prompts/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
from danswer.llm.utils import check_message_tokens
from danswer.llm.utils import get_default_llm_tokenizer
from danswer.llm.utils import translate_history_to_basemessages
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT
from danswer.prompts.prompt_utils import add_time_to_system_prompt
from danswer.prompts.prompt_utils import drop_messages_history_overflow
from danswer.prompts.prompt_utils import get_current_llm_day_time
from danswer.tools.message import ToolCallSummary


Expand All @@ -26,12 +25,7 @@ def default_build_system_message(
) -> SystemMessage | None:
system_prompt = prompt_config.system_prompt.strip()
if prompt_config.datetime_aware:
if system_prompt:
system_prompt += ADDITIONAL_INFO.format(
datetime_info=get_current_llm_day_time()
)
else:
system_prompt = get_current_llm_day_time()
system_prompt = add_time_to_system_prompt(system_prompt=system_prompt)

if not system_prompt:
return None
Expand Down
16 changes: 4 additions & 12 deletions backend/danswer/llm/answering/prompts/citations_prompt.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from functools import lru_cache

from langchain.schema.messages import HumanMessage
from langchain.schema.messages import SystemMessage

Expand All @@ -15,14 +13,13 @@
from danswer.llm.utils import build_content_with_imgs
from danswer.llm.utils import check_number_of_tokens
from danswer.llm.utils import get_max_input_tokens
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT
from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT
from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT
from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT_FOR_TOOL_CALLING
from danswer.prompts.prompt_utils import add_time_to_system_prompt
from danswer.prompts.prompt_utils import build_complete_context_str
from danswer.prompts.prompt_utils import build_task_prompt_reminders
from danswer.prompts.prompt_utils import get_current_llm_day_time
from danswer.prompts.token_counts import ADDITIONAL_INFO_TOKEN_CNT
from danswer.prompts.token_counts import (
CHAT_USER_PROMPT_WITH_CONTEXT_OVERHEAD_TOKEN_CNT,
Expand Down Expand Up @@ -117,19 +114,14 @@ def compute_max_llm_input_tokens(llm_config: LLMConfig) -> int:
return input_tokens - _MISC_BUFFER


@lru_cache()
def build_citations_system_message(
prompt_config: PromptConfig,
) -> SystemMessage:
system_prompt = prompt_config.system_prompt.strip()
system_prompt += REQUIRE_CITATION_STATEMENT
if prompt_config.include_citations:
system_prompt += REQUIRE_CITATION_STATEMENT
if prompt_config.datetime_aware:
if system_prompt:
system_prompt += ADDITIONAL_INFO.format(
datetime_info=get_current_llm_day_time()
)
else:
system_prompt = get_current_llm_day_time()
system_prompt = add_time_to_system_prompt(system_prompt=system_prompt)

return SystemMessage(content=system_prompt)

Expand Down
44 changes: 40 additions & 4 deletions backend/danswer/llm/chat_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,25 @@ def __init__(
def _log_prompt(prompt: LanguageModelInput) -> None:
if isinstance(prompt, list):
for ind, msg in enumerate(prompt):
logger.debug(f"Message {ind}:\n{msg.content}")
if isinstance(msg, AIMessageChunk):
if msg.content:
log_msg = msg.content
elif msg.tool_call_chunks:
log_msg = "Tool Calls: " + str(
[
{
key: value
for key, value in tool_call.items()
if key != "index"
}
for tool_call in msg.tool_call_chunks
]
)
else:
log_msg = ""
logger.debug(f"Message {ind}:\n{log_msg}")
else:
logger.debug(f"Message {ind}:\n{msg.content}")
if isinstance(prompt, str):
logger.debug(f"Prompt:\n{prompt}")

Expand Down Expand Up @@ -332,6 +350,24 @@ def stream(

yield message_chunk

full_output = output.content if output else ""
if LOG_ALL_MODEL_INTERACTIONS:
logger.debug(f"Raw Model Output:\n{full_output}")
if LOG_ALL_MODEL_INTERACTIONS and output:
content = output.content or ""
if isinstance(output, AIMessage):
if content:
log_msg = content
elif output.tool_calls:
log_msg = "Tool Calls: " + str(
[
{
key: value
for key, value in tool_call.items()
if key != "index"
}
for tool_call in output.tool_calls
]
)
else:
log_msg = ""
logger.debug(f"Raw Model Output:\n{log_msg}")
else:
logger.debug(f"Raw Model Output:\n{content}")
13 changes: 0 additions & 13 deletions backend/danswer/prompts/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,3 @@
SOURCES_KEY = "sources"

DEFAULT_IGNORE_STATEMENT = " Ignore any context documents that are not relevant."

REQUIRE_CITATION_STATEMENT = """
Cite relevant statements INLINE using the format [1], [2], [3], etc to reference the document number, \
DO NOT provide a reference section at the end and DO NOT provide any links following the citations.
""".rstrip()

NO_CITATION_STATEMENT = """
Do not provide any citations even if there are examples in the chat history.
""".rstrip()

CITATION_REMINDER = """
Remember to provide inline citations in the format [1], [2], [3], etc.
"""
35 changes: 33 additions & 2 deletions backend/danswer/prompts/prompt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,49 @@
from danswer.configs.constants import DocumentSource
from danswer.db.models import Prompt
from danswer.llm.answering.models import PromptConfig
from danswer.prompts.chat_prompts import ADDITIONAL_INFO
from danswer.prompts.chat_prompts import CITATION_REMINDER
from danswer.prompts.constants import CODE_BLOCK_PAT
from danswer.prompts.direct_qa_prompts import LANGUAGE_HINT
from danswer.search.models import InferenceChunk


def get_current_llm_day_time() -> str:
MOST_BASIC_PROMPT = "You are a helpful AI assistant."
DANSWER_DATETIME_REPLACEMENT = "DANSWER_DATETIME_REPLACEMENT"
BASIC_TIME_STR = "The current date is {datetime_info}."


def get_current_llm_day_time(
include_day_of_week: bool = True, full_sentence: bool = True
) -> str:
current_datetime = datetime.now()
# Format looks like: "October 16, 2023 14:30"
formatted_datetime = current_datetime.strftime("%B %d, %Y %H:%M")
day_of_week = current_datetime.strftime("%A")
return f"The current day and time is {day_of_week} {formatted_datetime}"
if full_sentence:
return f"The current day and time is {day_of_week} {formatted_datetime}"
if include_day_of_week:
return f"{day_of_week} {formatted_datetime}"
return f"{formatted_datetime}"


def add_time_to_system_prompt(system_prompt: str) -> str:
if DANSWER_DATETIME_REPLACEMENT in system_prompt:
return system_prompt.replace(
DANSWER_DATETIME_REPLACEMENT,
get_current_llm_day_time(full_sentence=False, include_day_of_week=False),
)

if system_prompt:
return system_prompt + ADDITIONAL_INFO.format(
datetime_info=get_current_llm_day_time()
)
else:
return (
MOST_BASIC_PROMPT
+ " "
+ BASIC_TIME_STR.format(datetime_info=get_current_llm_day_time())
)


def build_task_prompt_reminders(
Expand Down
1 change: 1 addition & 0 deletions backend/danswer/prompts/token_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

LANGUAGE_HINT_TOKEN_CNT = check_number_of_tokens(LANGUAGE_HINT)

# If the date/time is inserted directly as a replacement in the prompt, this is a slight over count
ADDITIONAL_INFO_TOKEN_CNT = check_number_of_tokens(
ADDITIONAL_INFO.format(datetime_info=get_current_llm_day_time())
)
Loading

0 comments on commit 94c9e35

Please sign in to comment.