From 2418ff4e90d6872069e56e2ab03115fe331f7807 Mon Sep 17 00:00:00 2001
From: Rohan Rao <158347344+rohrao@users.noreply.github.com>
Date: Tue, 20 Aug 2024 13:27:27 -0700
Subject: [PATCH 1/4] Update main.py for new API Catalog models

---
 community/5_mins_rag_no_gpu/main.py | 79 +++++++++++------------------
 1 file changed, 31 insertions(+), 48 deletions(-)

diff --git a/community/5_mins_rag_no_gpu/main.py b/community/5_mins_rag_no_gpu/main.py
index 5640f8f5..86071cff 100644
--- a/community/5_mins_rag_no_gpu/main.py
+++ b/community/5_mins_rag_no_gpu/main.py
@@ -22,56 +22,44 @@
 
 import streamlit as st
 import os
+from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_loaders import DirectoryLoader
+from langchain_community.vectorstores import FAISS
+import pickle
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
 
-st.set_page_config(layout = "wide")
+st.set_page_config(layout="wide")
 
+# Component #1 - Document Upload
 with st.sidebar:
     DOCS_DIR = os.path.abspath("./uploaded_docs")
     if not os.path.exists(DOCS_DIR):
         os.makedirs(DOCS_DIR)
     st.subheader("Add to the Knowledge Base")
     with st.form("my-form", clear_on_submit=True):
-        uploaded_files = st.file_uploader("Upload a file to the Knowledge Base:", accept_multiple_files = True)
+        uploaded_files = st.file_uploader("Upload a file to the Knowledge Base:", accept_multiple_files=True)
         submitted = st.form_submit_button("Upload!")
 
     if uploaded_files and submitted:
         for uploaded_file in uploaded_files:
             st.success(f"File {uploaded_file.name} uploaded successfully!")
-            with open(os.path.join(DOCS_DIR, uploaded_file.name),"wb") as f:
+            with open(os.path.join(DOCS_DIR, uploaded_file.name), "wb") as f:
                 f.write(uploaded_file.read())
 
-############################################
 # Component #2 - Embedding Model and LLM
-############################################
+llm = ChatNVIDIA(model="meta/llama3-70b-instruct")
+document_embedder = NVIDIAEmbeddings(model="NV-Embed-QA", model_type="passage")
+#query_embedder = NVIDIAEmbeddings(model="NV-Embed-QA", model_type="query")
 
-from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
-
-# make sure to export your NVIDIA AI Playground key as NVIDIA_API_KEY!
-llm = ChatNVIDIA(model="ai-llama3-70b")
-document_embedder = NVIDIAEmbeddings(model="ai-embed-qa-4", model_type="passage")
-query_embedder = NVIDIAEmbeddings(model="ai-embed-qa-4", model_type="query")
-
-############################################
 # Component #3 - Vector Database Store
-############################################
-
-from langchain.text_splitter import CharacterTextSplitter
-from langchain_community.document_loaders import DirectoryLoader
-from langchain_community.vectorstores import FAISS
-import pickle
-
 with st.sidebar:
-    # Option for using an existing vector store
     use_existing_vector_store = st.radio("Use existing vector store if available", ["Yes", "No"], horizontal=True)
 
-# Path to the vector store file
 vector_store_path = "vectorstore.pkl"
-
-# Load raw documents from the directory
 raw_documents = DirectoryLoader(DOCS_DIR).load()
 
-
-# Check for existing vector store file
 vector_store_exists = os.path.exists(vector_store_path)
 vectorstore = None
 if use_existing_vector_store == "Yes" and vector_store_exists:
@@ -81,9 +69,9 @@
         st.success("Existing vector store loaded successfully.")
 else:
     with st.sidebar:
-        if raw_documents:
+        if raw_documents and use_existing_vector_store == "Yes":
             with st.spinner("Splitting documents into chunks..."):
-                text_splitter = CharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
+                text_splitter = CharacterTextSplitter(chunk_size=512, chunk_overlap=200)
                 documents = text_splitter.split_documents(raw_documents)
 
             with st.spinner("Adding document chunks to vector database..."):
@@ -96,10 +84,7 @@
         else:
             st.warning("No documents available to process!", icon="⚠️")
 
-############################################
 # Component #4 - LLM Response Generation and Chat
-############################################
-
 st.subheader("Chat with your AI Assistant, Envie!")
 
 if "messages" not in st.session_state:
@@ -109,34 +94,32 @@
     with st.chat_message(message["role"]):
         st.markdown(message["content"])
 
-from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import ChatPromptTemplate
-
-prompt_template = ChatPromptTemplate.from_messages(
-    [("system", "You are a helpful AI assistant named Envie. You will reply to questions only based on the context that you are provided. If something is out of context, you will refrain from replying and politely decline to respond to the user."), ("user", "{input}")]
-)
-user_input = st.chat_input("Can you tell me what NVIDIA is known for?")
-llm = ChatNVIDIA(model="ai-llama3-70b")
+prompt_template = ChatPromptTemplate.from_messages([
+    ("system", "You are a helpful AI assistant named Envie. If provided with context, use it to inform your responses. If no context is available, use your general knowledge to provide a helpful response."),
+    ("human", "{input}")
+])
 
 chain = prompt_template | llm | StrOutputParser()
 
-if user_input and vectorstore!=None:
+user_input = st.chat_input("Can you tell me what NVIDIA is known for?")
+
+if user_input:
     st.session_state.messages.append({"role": "user", "content": user_input})
-    retriever = vectorstore.as_retriever()
-    docs = retriever.invoke(user_input)
     with st.chat_message("user"):
         st.markdown(user_input)
 
-    context = ""
-    for doc in docs:
-        context += doc.page_content + "\n\n"
-
-    augmented_user_input = "Context: " + context + "\n\nQuestion: " + user_input + "\n"
-
     with st.chat_message("assistant"):
         message_placeholder = st.empty()
         full_response = ""
 
+        if vectorstore is not None and use_existing_vector_store == "Yes":
+            retriever = vectorstore.as_retriever()
+            docs = retriever.invoke(user_input)
+            context = "\n\n".join([doc.page_content for doc in docs])
+            augmented_user_input = f"Context: {context}\n\nQuestion: {user_input}\n"
+        else:
+            augmented_user_input = f"Question: {user_input}\n"
+
         for response in chain.stream({"input": augmented_user_input}):
             full_response += response
             message_placeholder.markdown(full_response + "▌")

From c10a71d9207afde1f7b6dc1017bc7e425959e5f0 Mon Sep 17 00:00:00 2001
From: Rohan Rao <158347344+rohrao@users.noreply.github.com>
Date: Tue, 20 Aug 2024 13:46:27 -0700
Subject: [PATCH 2/4] Update main.py with new API Catalog models

---
 community/5_mins_rag_no_gpu/main.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/community/5_mins_rag_no_gpu/main.py b/community/5_mins_rag_no_gpu/main.py
index 86071cff..04a0fdba 100644
--- a/community/5_mins_rag_no_gpu/main.py
+++ b/community/5_mins_rag_no_gpu/main.py
@@ -16,10 +16,6 @@
 # This is a simple standalone implementation showing rag pipeline using Nvidia AI Foundational models.
 # It uses a simple Streamlit UI and one file implementation of a minimalistic RAG pipeline.
 
-############################################
-# Component #1 - Document Loader
-############################################
-
 import streamlit as st
 import os
 from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
@@ -50,8 +46,7 @@
 
 # Component #2 - Embedding Model and LLM
 llm = ChatNVIDIA(model="meta/llama3-70b-instruct")
-document_embedder = NVIDIAEmbeddings(model="NV-Embed-QA", model_type="passage")
-#query_embedder = NVIDIAEmbeddings(model="NV-Embed-QA", model_type="query")
+document_embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5", model_type="passage")
 
 # Component #3 - Vector Database Store
 with st.sidebar:

From 2e4feb1ee1e9e8ce073135830ce65ee7f7214209 Mon Sep 17 00:00:00 2001
From: Rohan Rao <158347344+rohrao@users.noreply.github.com>
Date: Tue, 20 Aug 2024 13:47:00 -0700
Subject: [PATCH 3/4] Update requirements.txt to remove versioning on LangChain
 connector

---
 community/5_mins_rag_no_gpu/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/community/5_mins_rag_no_gpu/requirements.txt b/community/5_mins_rag_no_gpu/requirements.txt
index abd21ba6..72047cae 100644
--- a/community/5_mins_rag_no_gpu/requirements.txt
+++ b/community/5_mins_rag_no_gpu/requirements.txt
@@ -2,4 +2,4 @@ streamlit==1.30.0
 faiss-cpu==1.7.4
 langchain==0.1.20
 unstructured[all-docs]==0.11.2
-langchain-nvidia-ai-endpoints==0.0.19
+langchain-nvidia-ai-endpoints

From c484e5a1458058710ddecd847df7857d91c33bda Mon Sep 17 00:00:00 2001
From: Rohan Rao <158347344+rohrao@users.noreply.github.com>
Date: Tue, 20 Aug 2024 13:53:34 -0700
Subject: [PATCH 4/4] Create README.md for 5min RAG

---
 community/5_mins_rag_no_gpu/README.md | 48 +++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 community/5_mins_rag_no_gpu/README.md

diff --git a/community/5_mins_rag_no_gpu/README.md b/community/5_mins_rag_no_gpu/README.md
new file mode 100644
index 00000000..86a9fa4b
--- /dev/null
+++ b/community/5_mins_rag_no_gpu/README.md
@@ -0,0 +1,48 @@
+# RAG in 5 Minutes
+
+This implementation is tied to the [YouTube video on NVIDIA Developer](https://youtu.be/N_OOfkEWcOk).
+
+This is a simple standalone implementation showing a minimal RAG pipeline that uses models available from [NVIDIA API Catalog](https://catalog.ngc.nvidia.com/ai-foundation-models).
+The catalog enables you to experience state-of-the-art LLMs accelerated by NVIDIA.
+Developers get free credits for 10K requests to any of the models.
+
+The example uses an [integration package to LangChain](https://python.langchain.com/docs/integrations/providers/nvidia) to access the models.
+NVIDIA engineers develop, test, and maintain the open source integration.
+This example uses a simple [Streamlit](https://streamlit.io/) based user interface and has a one-file implementation.
+Because the example uses the models from the NVIDIA API Catalog, you do not need a GPU to run the example.
+
+### Steps
+
+1. Create a python virtual environment and activate it:
+
+   ```comsole
+   python3 -m virtualenv genai
+   source genai/bin/activate
+   ```
+
+1. From the root of this repository, `GenerativeAIExamples`, install the requirements:
+
+   ```console
+   pip install -r community/5_mins_rag_no_gpu/requirements.txt
+   ```
+
+1. Add your NVIDIA API key as an environment variable:
+
+   ```console
+   export NVIDIA_API_KEY="nvapi-*"
+   ```
+
+   If you don't already have an API key, visit the [NVIDIA API Catalog](https://build.ngc.nvidia.com/explore/), select on any model, then click on `Get API Key`.
+
+1. Run the example using Streamlit:
+
+   ```console
+   streamlit run community/5_mins_rag_no_gpu/main.py
+   ```
+
+1. Test the deployed example by going to `http://<host_ip>:8501` in a web browser.
+
+   Click **Browse Files** and select your knowledge source.
+   After selecting, click **Upload!** to complete the ingestion process.
+
+You are all set now! Try out queries related to the knowledge base using text from the user interface.