From 483399b2603d401d7edf818b457119eb9b95e124 Mon Sep 17 00:00:00 2001 From: Jacob Lee Date: Tue, 17 Oct 2023 18:16:03 -0700 Subject: [PATCH] Adds Kay and YDC (#13) * Fix overly large payloads, retry on tab switch * Adds Kay and YDC --- README.md | 8 +- main.py | 10 ++ nextjs/app/components/ChatWindow.tsx | 121 +++++++++++----------- nextjs/app/components/DefaultQuestion.tsx | 15 +++ nextjs/app/components/SourceBubble.tsx | 22 +--- poetry.lock | 16 ++- pyproject.toml | 1 + 7 files changed, 114 insertions(+), 79 deletions(-) create mode 100644 nextjs/app/components/DefaultQuestion.tsx diff --git a/README.md b/README.md index a6a78cb..aa1db6c 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,16 @@ The code includes a simple backup that uses the Google Custom Search Engine for export OPENAI_API_KEY= export TAVILY_API_KEY= -# if you'd like to use the backup retriever +# if you'd like to use the You.com retriever +export YDC_API_KEY= + +# if you'd like to use the Google retriever export GOOGLE_CSE_ID= export GOOGLE_API_KEY= +# if you'd like to use the Kay.ai retriever +export KAY_API_KEY= + # for tracing export LANGCHAIN_TRACING_V2=true export LANGCHAIN_ENDPOINT="https://api.smith.langchain.com" diff --git a/main.py b/main.py index 1700f6b..2acbffd 100644 --- a/main.py +++ b/main.py @@ -18,6 +18,7 @@ TavilySearchAPIRetriever) from langchain.retrievers.document_compressors import ( DocumentCompressorPipeline, EmbeddingsFilter) +from langchain.retrievers.kay import KayAiRetriever from langchain.retrievers.you import YouRetriever from langchain.schema import Document from langchain.schema.document import Document @@ -173,6 +174,14 @@ def get_retriever(): you_retriever = ContextualCompressionRetriever( base_compressor=pipeline_compressor, base_retriever=base_you_retriever ) + base_kay_retriever = KayAiRetriever.create( + dataset_id="company", + data_types=["10-K", "10-Q", "PressRelease"], + num_contexts=3, + ) + kay_retriever = ContextualCompressionRetriever( + base_compressor=pipeline_compressor, base_retriever=base_kay_retriever + ) return tavily_retriever.configurable_alternatives( # This gives this field an id # When configuring the end runnable, we can then use this id to configure this field @@ -180,6 +189,7 @@ def get_retriever(): default_key="tavily", google=google_retriever, you=you_retriever, + kay=kay_retriever, ).with_config(run_name="FinalSourceRetriever") diff --git a/nextjs/app/components/ChatWindow.tsx b/nextjs/app/components/ChatWindow.tsx index 8ba2194..b50e5a3 100644 --- a/nextjs/app/components/ChatWindow.tsx +++ b/nextjs/app/components/ChatWindow.tsx @@ -24,6 +24,9 @@ import { } from "@chakra-ui/react"; import { ArrowUpIcon } from "@chakra-ui/icons"; import { Source } from "./SourceBubble"; +import { DefaultQuestion } from "./DefaultQuestion"; + +type RetrieverName = "tavily" | "kay" | "you" | "google"; export function ChatWindow(props: { apiBaseUrl: string; @@ -35,7 +38,7 @@ export function ChatWindow(props: { const [messages, setMessages] = useState>([]); const [input, setInput] = useState(""); const [isLoading, setIsLoading] = useState(false); - const [retriever, setRetriever] = useState("tavily"); + const [retriever, setRetriever] = useState("tavily"); const [llm, setLlm] = useState("openai"); const [chatHistory, setChatHistory] = useState< @@ -141,7 +144,8 @@ export function ChatWindow(props: { sources = streamedResponse.logs[ sourceStepName ].final_output.documents.map((doc: Record) => ({ - url: doc.metadata.source, + url: doc.metadata.source ?? doc.metadata.data_source_link, + defaultSourceUrl: retriever === "you" ? "https://you.com" : "", title: doc.metadata.title, images: doc.metadata.images, })); @@ -183,6 +187,30 @@ export function ChatWindow(props: { } }; + const defaultQuestions = [ + "what is langchain?", + "history of mesopotamia", + "how to build a discord bot", + "leonardo dicaprio girlfriend", + "fun gift ideas for software engineers", + "how does a prism separate light", + "what bear is best", + ]; + + const DEFAULT_QUESTIONS: Record = { + tavily: defaultQuestions, + you: defaultQuestions, + google: defaultQuestions, + kay: [ + "Is Johnson & Johnson increasing its marketing budget?", + "How is Lululemon adapting to new customer trends?", + "Which industries are growing in recent 10-Q reports?", + "Who are Etsy’s competitors?", + "Which companies reported data breaches?", + "What were the biggest strategy changes made by Roku in 2023?", + ], + }; + const sendInitialQuestion = async (question: string) => { await sendMessage(question); }; @@ -216,9 +244,12 @@ export function ChatWindow(props: {
Powered by - setRetriever(e.target.value as RetrieverName)} + > - {/* */} + + and @@ -285,64 +316,34 @@ export function ChatWindow(props: { {messages.length === 0 ? (
-
- sendInitialQuestion((e.target as HTMLDivElement).innerText) - } - className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500" - > - what is langchain? -
-
- sendInitialQuestion((e.target as HTMLDivElement).innerText) - } - className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500" - > - history of mesopotamia -
-
- sendInitialQuestion((e.target as HTMLDivElement).innerText) - } - className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500" - > - how to build a discord bot -
-
- sendInitialQuestion((e.target as HTMLDivElement).innerText) - } - className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500" - > - leonardo dicaprio girlfriend -
+ {DEFAULT_QUESTIONS[retriever] + .slice(0, 4) + .map((defaultQuestion, i) => { + return ( + + sendInitialQuestion( + (e.target as HTMLDivElement).innerText, + ) + } + > + ); + })}
-
- sendInitialQuestion((e.target as HTMLDivElement).innerText) - } - className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500" - > - fun gift ideas for software engineers -
-
- sendInitialQuestion((e.target as HTMLDivElement).innerText) - } - className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500" - > - how does a prism separate light -
-
- sendInitialQuestion((e.target as HTMLDivElement).innerText) - } - className="bg-stone-700 px-2 py-1 mx-2 rounded cursor-pointer justify-center text-stone-200 hover:bg-stone-500" - > - what bear is best -
+ {DEFAULT_QUESTIONS[retriever].slice(4).map((defaultQuestion, i) => { + return ( + + sendInitialQuestion((e.target as HTMLDivElement).innerText) + } + > + ); + })}
) : ( diff --git a/nextjs/app/components/DefaultQuestion.tsx b/nextjs/app/components/DefaultQuestion.tsx new file mode 100644 index 0000000..201e1db --- /dev/null +++ b/nextjs/app/components/DefaultQuestion.tsx @@ -0,0 +1,15 @@ +import { MouseEventHandler } from "react"; + +export function DefaultQuestion(props: { + question: string; + onMouseUp: MouseEventHandler; +}) { + return ( +
+ {props.question} +
+ ); +} diff --git a/nextjs/app/components/SourceBubble.tsx b/nextjs/app/components/SourceBubble.tsx index 6a031f2..4f21243 100644 --- a/nextjs/app/components/SourceBubble.tsx +++ b/nextjs/app/components/SourceBubble.tsx @@ -5,6 +5,7 @@ export type Source = { url: string; title: string; images: string[]; + defaultSourceUrl?: string; }; export function SourceBubble(props: { @@ -14,26 +15,13 @@ export function SourceBubble(props: { onMouseEnter: () => any; onMouseLeave: () => any; }) { - const cumulativeOffset = function (element: HTMLElement | null) { - var top = 0, - left = 0; - do { - top += element?.offsetTop || 0; - left += element?.offsetLeft || 0; - element = (element?.offsetParent as HTMLElement) || null; - } while (element); - - return { - top: top, - left: left, - }; - }; - - const hostname = new URL(props.source.url).hostname.replace("www.", ""); + const hostname = new URL( + props.source.url ?? props.source.defaultSourceUrl, + ).hostname.replace("www.", ""); return (