-
Notifications
You must be signed in to change notification settings - Fork 210
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #273 from Yash-Bhatnagar-02/patch-1
Query to multi PDFs
- Loading branch information
Showing
4 changed files
with
177 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# Query with multiple PDFs | ||
|
||
### **Goal** | ||
Developed a model that can take multiple PDFs as an input and then the user can ask different questions to the uploaded PDFs!! | ||
|
||
### **Description** | ||
Users can upload multiple PDFs (200 mb per file) and can ask questions related to the PDFs. Created an interactive UI for the same using streamlit and deployed on the streamlit community. | ||
|
||
### **What I Have Done!** | ||
1. Upload multiple PDFs. | ||
2. Ask questions related to the pdf. | ||
3. Get suitable answer. | ||
4. Summarizes the pdf. | ||
|
||
### **Libraries Needed** | ||
- `streamlit` | ||
- `textwrap` | ||
|
||
### **Usage** | ||
Run the server and client scripts using Python: | ||
|
||
1. Clone the repo: | ||
```bash | ||
git clone https://github.com/Yash-Bhatnagar-02 | ||
Chat-with-multiple-PDFs.git | ||
``` | ||
|
||
2. Install requirements in the terminal: | ||
```bash | ||
pip install -r requirements | ||
``` | ||
|
||
3. Create an environment variable (.env) in the root directory of | ||
your project folder and add your Google API key: | ||
```bash | ||
GOOGLE_API_KEY="" | ||
``` | ||
|
||
4. Run the application with the following command: | ||
```bash | ||
streamlit run app.py | ||
``` | ||
|
||
|
||
### How to Use: | ||
- Upon starting, upload the PDF(s). | ||
- Type any question related to the PDF. | ||
- A response will be generated containing the suitable answer. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import streamlit as st | ||
import textwrap | ||
from PyPDF2 import PdfReader | ||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
import os | ||
from langchain_google_genai import GoogleGenerativeAIEmbeddings | ||
import google.generativeai as genai | ||
|
||
from langchain.vectorstores import FAISS | ||
from langchain_google_genai import ChatGoogleGenerativeAI | ||
from langchain.chains.question_answering import load_qa_chain | ||
from langchain.prompts import PromptTemplate | ||
from dotenv import load_dotenv | ||
|
||
|
||
|
||
load_dotenv() | ||
|
||
genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) | ||
|
||
def get_pdf_text(pdf_docs): | ||
text="" | ||
for pdf in pdf_docs: | ||
pdf_reader= PdfReader(pdf) | ||
for page in pdf_reader.pages: | ||
text+= page.extract_text() | ||
return text | ||
|
||
def get_text_chunks(text): | ||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) | ||
chunks = text_splitter.split_text(text) | ||
return chunks | ||
|
||
def get_vector_store(text_chunks): | ||
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") | ||
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) | ||
vector_store.save_local("faiss_index") | ||
|
||
def get_conversational_chain(): | ||
|
||
prompt_template = """ | ||
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in | ||
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n | ||
Context:\n {context}?\n | ||
Question: \n{question}\n | ||
Answer: | ||
""" | ||
|
||
model = ChatGoogleGenerativeAI(model="gemini-pro", | ||
temperature=0.3) | ||
|
||
prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"]) | ||
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) | ||
|
||
return chain | ||
|
||
def user_input(user_question): | ||
wait=st.text("Please Wait...") | ||
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") | ||
|
||
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True) | ||
docs = new_db.similarity_search(user_question) | ||
|
||
chain = get_conversational_chain() | ||
|
||
|
||
response = chain( | ||
{"input_documents":docs, "question": user_question} | ||
, return_only_outputs=True) | ||
wait.empty() | ||
print(response) | ||
st.write("Reply: ", response["output_text"]) | ||
|
||
def main(): | ||
st.set_page_config("PDF Quering") | ||
st.header("Chat with Multiple PDFs!") | ||
#user_question = st.text_area("Ask a Question from the PDF Files") | ||
|
||
page_bg_img = ''' | ||
<style> | ||
body { | ||
background-image: url("https://images.unsplash.com/photo-1542281286-9e0a16bb7366"); | ||
background-size: cover; | ||
} | ||
</style> | ||
''' | ||
st.markdown(page_bg_img, unsafe_allow_html=True) | ||
|
||
with st.form(key="my_form"): | ||
|
||
user_question=st.text_area(label="AMA! 😎", max_chars=40, key="query") | ||
|
||
submit_button=st.form_submit_button(label="Submit") | ||
|
||
if user_question: | ||
user_input(user_question) | ||
|
||
|
||
with st.sidebar: | ||
st.title("Menu:") | ||
pdf_docs = st.file_uploader("Upload your PDF Files 📚 ", accept_multiple_files=True) | ||
if st.button("Submit & Process"): | ||
with st.spinner("Processing..."): | ||
raw_text = get_pdf_text(pdf_docs) | ||
text_chunks = get_text_chunks(raw_text) | ||
get_vector_store(text_chunks) | ||
st.success("Done") | ||
icon_size=20 | ||
#st.button('linkedin', 'https://www.linkedin.com/in/yash-bhatnagar-203aa622a/', 'Follow me on LinkedIn', icon_size) | ||
st.write('Made with <3 by *[Yash Bhatnagar](https://www.linkedin.com/in/yash-bhatnagar-203aa622a/)*') | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
[theme] | ||
primaryColor="#F9E79F" | ||
backgroundColor="#FFFFFF" | ||
secondaryBackgroundColor="#F0F2F6" | ||
textColor="#262730" | ||
font="sans serif" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
streamlit | ||
google-generativeai | ||
python-dotenv | ||
langchain | ||
PyPDF2 | ||
faiss-cpu | ||
langchain_google_genai | ||
langchain-community |