Merge pull request #273 from Yash-Bhatnagar-02/patch-1

Query to multi PDFs
UTSAVS26 · Oct 8, 2024 · ffeb5cd · ffeb5cd
2 parents 2d4e04d + 4d9507e
commit ffeb5cd
Show file tree

Hide file tree

Showing 4 changed files with 177 additions and 0 deletions.
diff --git a/Beginner_Projects/PDF Quering/README.md b/Beginner_Projects/PDF Quering/README.md
@@ -0,0 +1,48 @@
+# Query with multiple PDFs
+
+### **Goal**
+Developed a model that can take multiple PDFs as an input and then the user can ask different questions to the uploaded PDFs!!
+
+### **Description**
+Users can upload multiple PDFs (200 mb per file) and can ask questions related to the PDFs. Created an interactive UI for the same using streamlit and deployed on the streamlit community.
+
+### **What I Have Done!**
+1. Upload multiple PDFs.
+2. Ask questions related to the pdf.
+3. Get suitable answer.
+4. Summarizes the pdf.
+
+### **Libraries Needed**
+- `streamlit`
+- `textwrap`
+
+### **Usage**
+Run the server and client scripts using Python:
+
+1. Clone the repo:
+```bash
+  git clone https://github.com/Yash-Bhatnagar-02
+  Chat-with-multiple-PDFs.git
+```
+
+2. Install requirements in the terminal:
+```bash
+  pip install -r requirements
+```
+
+3. Create an environment variable (.env) in the root directory of
+your project folder and add your Google API key:
+```bash
+  GOOGLE_API_KEY=""
+```
+
+4. Run the application with the following command:
+```bash
+  streamlit run app.py
+```
+
+
+### How to Use:
+- Upon starting, upload the PDF(s).
+- Type any question related to the PDF.
+- A response will be generated containing the suitable answer.
diff --git a/Beginner_Projects/PDF Quering/app.py b/Beginner_Projects/PDF Quering/app.py
@@ -0,0 +1,115 @@
+import streamlit as st
+import textwrap
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import os
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+import google.generativeai as genai
+
+from langchain.vectorstores import FAISS
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain.chains.question_answering import load_qa_chain
+from langchain.prompts import PromptTemplate
+from dotenv import load_dotenv
+
+
+
+load_dotenv()
+
+genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+
+def get_pdf_text(pdf_docs):
+    text=""
+    for pdf in pdf_docs:
+        pdf_reader= PdfReader(pdf)
+        for page in pdf_reader.pages:
+            text+= page.extract_text()
+    return  text
+
+def get_text_chunks(text):
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+    chunks = text_splitter.split_text(text)
+    return chunks
+
+def get_vector_store(text_chunks):
+    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
+    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
+    vector_store.save_local("faiss_index")
+
+def get_conversational_chain():
+
+    prompt_template = """
+    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
+    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
+    Context:\n {context}?\n
+    Question: \n{question}\n
+
+    Answer:
+    """
+
+    model = ChatGoogleGenerativeAI(model="gemini-pro",
+                             temperature=0.3)
+
+    prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
+    chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
+
+    return chain
+
+def user_input(user_question):
+    wait=st.text("Please Wait...")
+    embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
+
+    new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
+    docs = new_db.similarity_search(user_question)
+
+    chain = get_conversational_chain()
+
+
+    response = chain(
+        {"input_documents":docs, "question": user_question}
+        , return_only_outputs=True)
+    wait.empty()
+    print(response)
+    st.write("Reply: ", response["output_text"])
+
+def main():
+    st.set_page_config("PDF Quering")
+    st.header("Chat with Multiple PDFs!")
+    #user_question = st.text_area("Ask a Question from the PDF Files")
+
+    page_bg_img = '''
+    <style>
+    body {
+    background-image: url("https://images.unsplash.com/photo-1542281286-9e0a16bb7366");
+    background-size: cover;
+    }
+    </style>
+    '''
+    st.markdown(page_bg_img, unsafe_allow_html=True)
+
+    with st.form(key="my_form"):
+
+        user_question=st.text_area(label="AMA! 😎", max_chars=40, key="query")
+
+        submit_button=st.form_submit_button(label="Submit")
+
+    if user_question:
+        user_input(user_question)
+
+
+    with st.sidebar:
+        st.title("Menu:")
+        pdf_docs = st.file_uploader("Upload your PDF Files 📚 ", accept_multiple_files=True)
+        if st.button("Submit & Process"):
+            with st.spinner("Processing..."):
+                raw_text = get_pdf_text(pdf_docs)
+                text_chunks = get_text_chunks(raw_text)
+                get_vector_store(text_chunks)
+                st.success("Done")
+        icon_size=20
+        #st.button('linkedin', 'https://www.linkedin.com/in/yash-bhatnagar-203aa622a/', 'Follow me on LinkedIn', icon_size)
+        st.write('Made with <3 by *[Yash Bhatnagar](https://www.linkedin.com/in/yash-bhatnagar-203aa622a/)*')
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Beginner_Projects/PDF Quering/config.toml b/Beginner_Projects/PDF Quering/config.toml
@@ -0,0 +1,6 @@
+[theme]
+primaryColor="#F9E79F"
+backgroundColor="#FFFFFF"
+secondaryBackgroundColor="#F0F2F6"
+textColor="#262730"
+font="sans serif"
diff --git a/Beginner_Projects/PDF Quering/requirements.txt b/Beginner_Projects/PDF Quering/requirements.txt
@@ -0,0 +1,8 @@
+streamlit
+google-generativeai
+python-dotenv
+langchain
+PyPDF2
+faiss-cpu
+langchain_google_genai
+langchain-community