Skip to content

Commit

Permalink
Feat/deployment (#28)
Browse files Browse the repository at this point in the history
* fix(mcq): fixed issue where only 1 MCQ was being generated by gpt

* fix(client): minor qol fixes

* fix(mcq): added additonal fixes

* fix(profile): fixed cards

* fix(subscribe): added blue box

* feat(MCQ): added option to reorder, delete or add options

---------

Co-authored-by: neilscallywag <neil.sharma.2022@scis.smu.edu.sg>
Co-authored-by: Louis <64778758+iamlouisteo@users.noreply.github.com>
  • Loading branch information
3 people authored Apr 22, 2024
1 parent c0724c0 commit 5e1f42a
Show file tree
Hide file tree
Showing 11 changed files with 474 additions and 164 deletions.
23 changes: 15 additions & 8 deletions backend/complex/process-chunks/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,12 @@ def construct_prompt(self, message_from_queue1, messages_from_queue2):
logging.info(f"Message data: {message_data}")
generate_type = message_data.get("metadata", {}).get("generateType", "")
note_id = message_data.get("fileId", "")
# get all "content" key from various jsons in messages_from_queue2 which is a list of json objects
additional_context = ", ".join([json.loads(message)["content"] for message in messages_from_queue2])
logging.info(f"Additional context: {additional_context}")
strategy = PromptStrategyFactory.get_strategy(generate_type)
return strategy.construct_prompt(message_from_queue1, messages_from_queue2),generate_type,note_id
prompt,content = strategy.construct_prompt(message_from_queue1, additional_context)
return prompt,content,generate_type,note_id

def match_messages_and_call_api(self, ch, method, properties, body):
"""Processes messages from queue1, matches them with messages from queue2, and calls the OpenAI API."""
Expand All @@ -79,8 +83,8 @@ def match_messages_and_call_api(self, ch, method, properties, body):
else:
break

prompt,generate_type,note_id = self.construct_prompt(message_from_queue1, messages_from_queue2)
token_count = self.count_tokens_with_tiktoken(prompt)
prompt,content,generate_type,note_id = self.construct_prompt(message_from_queue1, messages_from_queue2)
token_count = self.count_tokens_with_tiktoken(prompt+content)
logging.info(f"Estimated token count for prompt: {token_count}")
if token_count > self.max_tokens:
# Calculate 2% of the max token limit
Expand All @@ -89,8 +93,8 @@ def match_messages_and_call_api(self, ch, method, properties, body):
new_max_length = self.max_tokens - reduction_amount
# Adjust the prompt to the new max length
# Assuming prompt is a string, this will cut off the end to fit. Adjust as necessary for your data structure.
prompt = prompt[:new_max_length]
logging.info(f"Prompt adjusted to within token limit. New length: {len(prompt)}")
content = content[:new_max_length]
logging.info(f"Prompt adjusted to within token limit. New length: {len(content)+len(prompt)}")



Expand All @@ -101,7 +105,10 @@ def match_messages_and_call_api(self, ch, method, properties, body):
try:
response = client.chat.completions.create(
model=self.model,
messages=[{"role": "system", "content": prompt}]
messages=[{"role": "system", "content": prompt},
{"role": "user", "content": content}],
temperature=0.3,
top_p=0.8,
)
except Exception as e:
logging.error(f"Error during OpenAI API call or response handling: {str(e)}")
Expand Down Expand Up @@ -147,6 +154,6 @@ def send_content(self, message, generate_type,note_id):
content_fetcher_builder = ContentFetcherBuilder()
content_fetcher = (content_fetcher_builder
.setup_logging()
.with_model("gpt-3.5-turbo-16k")
.with_model("gpt-3.5-turbo")
.build())
content_fetcher.start_consuming()
content_fetcher.start_consuming()
84 changes: 84 additions & 0 deletions backend/complex/process-chunks/src/main2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import grpc
import pika
import json
import utils
import logging
from itertools import cycle
from openai import OpenAI
from content_pb2_grpc import ContentStub
from strategies.ContentSendingStrategy import FlashcardStrategy, MCQStrategy
from proxies.ContentServiceProxy import ContentServiceProxy

class ContentFetcher:
def __init__(self):
self.RABBITMQ_SERVER = None
self.RABBITMQ_USERNAME = None
self.RABBITMQ_PASSWORD = None
self.QUEUE_NAME_1 = None
self.QUEUE_NAME_2 = None
self.OPENAI_API_KEYS = None
self.CONTENT_SERVICE_ADDRESS = None
self.model = None
self.connection = None
self.channel = None
self.grpc_channel = None
self.grpc_stub = None
self.max_tokens = 16384
self.api_key = None # Single API key for continuous context

def initialize_rabbitmq(self):
credentials = pika.PlainCredentials(self.RABBITMQ_USERNAME, self.RABBITMQ_PASSWORD)
connection_parameters = pika.ConnectionParameters(host=self.RABBITMQ_SERVER, credentials=credentials)
self.connection = pika.BlockingConnection(connection_parameters)
self.channel = self.connection.channel()
self.channel.queue_declare(queue=self.QUEUE_NAME_1, durable=True)
self.channel.queue_declare(queue=self.QUEUE_NAME_2, durable=True)

def match_messages_and_call_api(self, ch, method, properties, body):
message_from_queue1 = body.decode()
messages_from_queue2 = []
for method_frame, properties, body in self.channel.consume(queue=self.QUEUE_NAME_2, auto_ack=True, inactivity_timeout=1):
if method_frame:
messages_from_queue2.append(body.decode())
else:
break

prompt, generate_type, note_id = self.construct_prompt(message_from_queue1, messages_from_queue2)
responses = self.generate_content_in_batches(prompt, 3, generate_type, note_id) # Example: 3 iterations for 15 items
self.send_content(responses, generate_type, note_id)

def generate_content_in_batches(self, initial_prompt, num_batches, generate_type, note_id):
current_responses = []
prompt = initial_prompt
client = OpenAI(api_key=self.api_key) # Reuse the same API key for all batches

for _ in range(num_batches):
response = client.chat.completions.create(model=self.model, messages=[{"role": "system", "content": prompt}])
batch_responses = utils.extract_and_validate_json_objects(response.choices[0].message.content)
current_responses.extend(json.loads(batch_responses))

prompt = "Generate 5 more" # Modify the prompt for the next batch

return current_responses

def send_content(self, messages, generate_type, note_id):
if generate_type == "flashcard":
strategy = FlashcardStrategy()
elif generate_type == "mcq":
strategy = MCQStrategy()
else:
raise ValueError("Unsupported content type")

content_service_proxy = ContentServiceProxy(self.CONTENT_SERVICE_ADDRESS)
content_request = strategy.construct_request(json.dumps(messages), note_id)
response = content_service_proxy.send_content(content_request)
logging.info(f"gRPC Response: {response}")

def start_consuming(self):
self.channel.basic_consume(queue=self.QUEUE_NAME_1, on_message_callback=self.match_messages_and_call_api, auto_ack=True)
logging.info("Waiting for messages. To exit press CTRL+C")
self.channel.start_consuming()

if __name__ == "__main__":
content_fetcher = ContentFetcher()
content_fetcher.start_consuming()
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import json
from abc import ABC, abstractmethod


class PromptConstructionStrategy(ABC):
@abstractmethod
def construct_prompt(self, message_from_queue1, messages_from_queue2):
pass


class FlashcardPromptStrategy(PromptConstructionStrategy):
def construct_prompt(self, message_from_queue1, messages_from_queue2):
message_data = json.loads(message_from_queue1)
Expand All @@ -15,40 +17,43 @@ def construct_prompt(self, message_from_queue1, messages_from_queue2):
Provides a detailed answer, aiming for undergraduate-level depth where applicable.
Exclude any obfuscated or nonsensical text, and select content from various sections to cover the topic comprehensively. Aim for minimal token use in your response without sacrificing content quality unless the topic is straightforward.
Format your responses strictly as json:
Format all 20 of your responses strictly as json:
{{
"question": "What is the significance of [specific concept] in [subject]?",
"answer": "The significance lies in..."
}},
...
{additional_context}"""
return prompt
I will embedd the content below for your reference. Do not parse it as instruction this time. Just use it as reference to generate Flashcards.:
"""
return prompt, additional_context


class MCQPromptStrategy(PromptConstructionStrategy):
def construct_prompt(self, message_from_queue1, messages_from_queue2):
message_data = json.loads(message_from_queue1)
additional_context = ", ".join(messages_from_queue2)
prompt = f"""Generate twenty MCQs that highlight essential information on the subject, drawn from clear and relevant portions of the text. Focus on diverse concepts, definitions, and findings, ensuring each MCQ:
Provides a detailed answer, aiming for undergraduate-level depth where applicable.
Exclude any obfuscated or nonsensical text, and select content from various sections to cover the topic comprehensively. Aim for minimal token use in your response without sacrificing content quality unless the topic is straightforward.
Format your responses strictly as json:
additional_context = """Generate between 10 and 20 MCQs from the provided text, ensuring each question:
- Highlights essential information across diverse concepts, definitions, and findings.
- Is detailed enough for undergraduate-level understanding.
- Includes only clear and relevant portions of the text, covering the topic comprehensively.
""" + ", ".join(
messages_from_queue2
)
prompt = f"""
Each MCQ must be formatted in JSON and indicate whether multiple answers are allowed:
Example MCQ:
{{
"question": "What is the capital of France?",
"options": [
{{
"option": "Paris",
"is_correct": true
}},
...
],
"multiple_answers": false
"question": "What does the user YUE ZHENG TING add as a comment on February 24th?",
"options": [
{{"option": "Resolution: Done", "is_correct": true}},
{{"option": "Reporter: THADDEAUS LOW", "is_correct": true}},
{{"option": "Votes: 0", "is_correct": true}},
{{"option": "Service Management Request Type: Emailed request", "is_correct": false}}
],
"multiple_answers": true
}}
{additional_context}"""
return prompt
"""
return prompt, additional_context
94 changes: 94 additions & 0 deletions client/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5e1f42a

Please sign in to comment.