Skip to content

Commit

Permalink
Added Comments to increase code readability
Browse files Browse the repository at this point in the history
  • Loading branch information
halcyon-past committed Oct 5, 2024
1 parent 132efe1 commit 00cdcd5
Showing 1 changed file with 77 additions and 32 deletions.
109 changes: 77 additions & 32 deletions Advanced_Projects/EDUHELPER/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,83 +10,128 @@
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv

# Load environment variables from .env file and configure Google API
# This is crucial for securely managing API keys and other sensitive information
load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

def get_pdf_text(pdf_docs):
text=""
"""
Extract text content from uploaded PDF documents.
Args:
pdf_docs (list): List of uploaded PDF file objects.
Returns:
str: Concatenated text content from all pages of all PDFs.
"""
text = ""
for pdf in pdf_docs:
pdf_reader= PdfReader(pdf)
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text+= page.extract_text()
return text


text += page.extract_text()
return text

def get_text_chunks(text):
"""
Split the extracted text into smaller, overlapping chunks for better processing.
Args:
text (str): The full text extracted from PDFs.
Returns:
list: A list of text chunks.
"""
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
chunks = text_splitter.split_text(text)
return chunks


def get_vector_store(text_chunks):
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
"""
Create a vector store from the text chunks using FAISS and Google's embedding model.
Args:
text_chunks (list): List of text chunks to be embedded and stored.
Side effect:
Saves the vector store locally for future use.
"""
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
vector_store.save_local("faiss_index")


def get_conversational_chain():

"""
Set up the conversational chain for question answering using a custom prompt template.
Returns:
Chain: A LangChain QA chain configured with the Gemini Pro model and custom prompt.
"""
prompt_template = """
Answer the question in detail using the provided context. If the answer cannot be found
in the context or can't be answered with the knowledge you already have, respond with
'answer not available in the context'. Do not provide any misleading or made-up information
untill and unless the question requires you to generate content based on the given context.\n\n
until and unless the question requires you to generate content based on the given context.\n\n
Context:\n {context}?\n
Question: \n{question}\n
Answer:
"""

model = ChatGoogleGenerativeAI(model="gemini-pro",
temperature=0.7)

prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
# Initialize the Gemini Pro model with a slight randomness in responses
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.7)

# Create a prompt template for consistent questioning
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Set up the QA chain with the model and prompt
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

return chain



def user_input(user_question):
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")
"""
Process user input, search for relevant information, and generate a response.
Args:
user_question (str): The question input by the user.
Side effect:
Displays the AI-generated answer in the Streamlit app.
"""
# Load the previously saved vector store
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

# Perform a similarity search to find relevant document chunks
docs = new_db.similarity_search(user_question)

# Get the QA chain and run the query
chain = get_conversational_chain()


response = chain(
{"input_documents":docs, "question": user_question}
, return_only_outputs=True)

print(response)
st.write(response["output_text"]+"\n\nNOTE:\nThese Responses are generated by AI so they may not be accurate, please verify the answers from the original sources")


{"input_documents": docs, "question": user_question},
return_only_outputs=True
)

# Display the response in the Streamlit app
print(response) # For debugging purposes
st.write(response["output_text"] + "\n\nNOTE:\nThese Responses are generated by AI so they may not be accurate, please verify the answers from the original sources")

def main():
st.set_page_config("EDUHELPER",page_icon="📚")
"""
Main function to set up and run the Streamlit app interface.
This function defines the layout and interaction flow of the app.
"""
# Configure the Streamlit page
st.set_page_config("EDUHELPER", page_icon="📚")
st.header("EDUHELPER: Chat with the PDF Files")

# Main area for user input and displaying responses
user_question = st.text_input("Ask a Question from the PDF Files")

if user_question:
user_input(user_question)

# Sidebar for PDF upload and processing
with st.sidebar:
st.title("Menu:")
pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
Expand All @@ -97,6 +142,7 @@ def main():
get_vector_store(text_chunks)
st.success("Done")

# Footer with creator information
html_temp = """
<div style="text-align: center; font-size: 14px; padding: 5px;">
Created by Aritro Saha -
Expand All @@ -107,7 +153,6 @@ def main():
"""
st.markdown(html_temp, unsafe_allow_html=True)



# Entry point of the script
if __name__ == "__main__":
main()

0 comments on commit 00cdcd5

Please sign in to comment.