-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathapp.py
121 lines (85 loc) · 4.13 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from PyPDF2 import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
import pickle
from pathlib import Path
from dotenv import load_dotenv
import os
import streamlit as st
from streamlit_chat import message
import io
import asyncio
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')
# vectors = getDocEmbeds("gpt4.pdf")
# qa = ChatVectorDBChain.from_llm(ChatOpenAI(model_name="gpt-3.5-turbo"), vectors, return_source_documents=True)
async def main():
async def storeDocEmbeds(file, filename):
reader = PdfReader(file)
corpus = ''.join([p.extract_text() for p in reader.pages if p.extract_text()])
splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200,)
chunks = splitter.split_text(corpus)
embeddings = OpenAIEmbeddings(openai_api_key = api_key)
vectors = FAISS.from_texts(chunks, embeddings)
with open(filename + ".pkl", "wb") as f:
pickle.dump(vectors, f)
async def getDocEmbeds(file, filename):
if not os.path.isfile(filename + ".pkl"):
await storeDocEmbeds(file, filename)
with open(filename + ".pkl", "rb") as f:
global vectores
vectors = pickle.load(f)
return vectors
async def conversational_chat(query):
result = qa({"question": query, "chat_history": st.session_state['history']})
st.session_state['history'].append((query, result["answer"]))
# print("Log: ")
# print(st.session_state['history'])
return result["answer"]
llm = ChatOpenAI(model_name="gpt-3.5-turbo")
chain = load_qa_chain(llm, chain_type="stuff")
if 'history' not in st.session_state:
st.session_state['history'] = []
#Creating the chatbot interface
st.title("PDFChat :")
if 'ready' not in st.session_state:
st.session_state['ready'] = False
uploaded_file = st.file_uploader("Choose a file", type="pdf")
if uploaded_file is not None:
with st.spinner("Processing..."):
# Add your code here that needs to be executed
uploaded_file.seek(0)
file = uploaded_file.read()
# pdf = PyPDF2.PdfFileReader()
vectors = await getDocEmbeds(io.BytesIO(file), uploaded_file.name)
qa = ConversationalRetrievalChain.from_llm(ChatOpenAI(model_name="gpt-3.5-turbo"), retriever=vectors.as_retriever(), return_source_documents=True)
st.session_state['ready'] = True
st.divider()
if st.session_state['ready']:
if 'generated' not in st.session_state:
st.session_state['generated'] = ["Welcome! You can now ask any questions regarding " + uploaded_file.name]
if 'past' not in st.session_state:
st.session_state['past'] = ["Hey!"]
# container for chat history
response_container = st.container()
# container for text box
container = st.container()
with container:
with st.form(key='my_form', clear_on_submit=True):
user_input = st.text_input("Query:", placeholder="e.g: Summarize the paper in a few sentences", key='input')
submit_button = st.form_submit_button(label='Send')
if submit_button and user_input:
output = await conversational_chat(user_input)
st.session_state['past'].append(user_input)
st.session_state['generated'].append(output)
if st.session_state['generated']:
with response_container:
for i in range(len(st.session_state['generated'])):
message(st.session_state["past"][i], is_user=True, key=str(i) + '_user', avatar_style="thumbs")
message(st.session_state["generated"][i], key=str(i), avatar_style="fun-emoji")
if __name__ == "__main__":
asyncio.run(main())