From aab24cdd33e6f14019451892ad91da39f6920a94 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 13:27:34 +0000 Subject: [PATCH 01/26] add firestore_sdk ad session_manager --- src/services/firestore_sdk.py | 58 ++++++++++++++++++++++++++ src/utils/session_manager.py | 78 +++++++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 src/services/firestore_sdk.py create mode 100644 src/utils/session_manager.py diff --git a/src/services/firestore_sdk.py b/src/services/firestore_sdk.py new file mode 100644 index 0000000..4ca0e7b --- /dev/null +++ b/src/services/firestore_sdk.py @@ -0,0 +1,58 @@ +import logging +from typing import Dict, Literal + +from firebase_admin.firestore import client, firestore + +firestore_client = client() +server_timestamp = firestore.SERVER_TIMESTAMP +increment = firestore.Increment +arrayUnion = firestore.ArrayUnion +arrayRemove = firestore.ArrayRemove + + +Collection = Literal["audiora_sessions", "audiora_audiocasts"] + +collections: Dict[Collection, Collection] = { + "audiora_sessions": "audiora_sessions", + "audiora_audiocasts": "audiora_audiocasts", +} + + +class DBManager: + def __init__(self, scope: str): + self.logger = logging.getLogger(scope) + + @property + def timestamp(self): + return server_timestamp + + def _get_collection(self, collection: Collection): + return firestore_client.collection(collections[collection]) + + def _create_document(self, collection: Collection, data: Dict): + return self._get_collection(collection).add( + {**data, "created_at": self.timestamp, "updated_at": self.timestamp} + ) + + def _set_document(self, collection: Collection, doc_id: str, data: Dict): + return ( + self._get_collection(collection) + .document(doc_id) + .set({**data, "created_at": self.timestamp, "updated_at": self.timestamp}) + ) + + def _update_document(self, collection: Collection, doc_id: str, data: Dict): + return ( + self._get_collection(collection) + .document(doc_id) + .update({**data, "updated_at": self.timestamp}) + ) + + def _delete_document(self, collection: Collection, doc_id: str): + return self._get_collection(collection).document(doc_id).delete() + + def _get_document(self, collection: Collection, doc_id: str): + return self._get_collection(collection).document(doc_id).get() + + def _get_documents(self, collection: Collection): + return self._get_collection(collection).stream() diff --git a/src/utils/session_manager.py b/src/utils/session_manager.py new file mode 100644 index 0000000..69c80ee --- /dev/null +++ b/src/utils/session_manager.py @@ -0,0 +1,78 @@ +from dataclasses import dataclass +from typing import Dict, List, Optional + +from src.services.firestore_sdk import ( + Collection, + DBManager, + arrayRemove, + arrayUnion, + collections, +) +from src.utils.chat_utils import SessionChatMessage + + +@dataclass +class ChatMetadata: + source: str + transcript: str + + +@dataclass +class SessionModel: + id: str + chats: List[SessionChatMessage] + metadata: Optional[ChatMetadata] + + +class SessionManager(DBManager): + collection: Collection = collections["audiora_sessions"] + + def __init__(self, session_id: str): + super().__init__(scope="ChatManager") + + self.doc_id = session_id + session_doc = self._get_document(self.collection, self.doc_id) + # if the collection does not exist, create it + if not session_doc.exists: + payload = SessionModel(id=self.doc_id, chats=[], metadata=None) + self._set_document(self.collection, self.doc_id, payload.__dict__) + + def _update(self, data: Dict): + return self._update_document(self.collection, self.doc_id, data) + + def _update_source(self, source: str): + return self._update({"metadata.source": source}) + + def _update_transcript(self, transcript: str): + return self._update({"metadata.transcript": transcript}) + + def _add_chat(self, chat: SessionChatMessage): + return self._update_document( + self.collection, self.doc_id, {"chats": arrayUnion(chat)} + ) + + def _delete_chat(self, chat_id: str): + doc = self._get_document(self.collection, self.doc_id) + if not doc.exists: + return + + chat_to_remove = [chat for chat in doc.get("chats") if chat.id == chat_id] + self._update_document( + self.collection, + self.doc_id, + {"chats": arrayRemove(chat_to_remove)}, + ) + + def _get_chat(self, chat_id: str) -> SessionChatMessage | None: + doc = self._get_document(self.collection, self.doc_id) + if not doc.exists: + return None + + return [chat for chat in doc.get("chats") if chat.id == chat_id][0] + + def _get_chats(self) -> List[SessionChatMessage] | None: + doc = self._get_document(self.collection, self.doc_id) + if not doc.exists: + return None + + return doc.get("chats") From 4fe5a504c2a331c9a9fc075fe9af59b1c0a06442 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 13:29:59 +0000 Subject: [PATCH 02/26] save user chats on firestore --- src/utils/chat_utils.py | 6 ++++-- src/utils/main_utils.py | 14 +++++--------- src/utils/session_manager.py | 4 ++-- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/utils/chat_utils.py b/src/utils/chat_utils.py index 6942bfb..e627b98 100644 --- a/src/utils/chat_utils.py +++ b/src/utils/chat_utils.py @@ -1,7 +1,8 @@ +import uuid from typing import Dict, List, Literal import streamlit as st -from pydantic import BaseModel +from pydantic import BaseModel, Field ContentCategory = Literal[ "podcast", @@ -49,8 +50,9 @@ class SessionChatMessage(BaseModel): - role: Literal["user", "assistant"] + id: str = Field(default_factory=lambda: str(uuid.uuid4())) content: str + role: Literal["user", "assistant"] class SessionChatRequest(BaseModel): diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py index ac75136..bb214e0 100644 --- a/src/utils/main_utils.py +++ b/src/utils/main_utils.py @@ -15,6 +15,7 @@ SessionChatRequest, content_categories, ) +from src.utils.session_manager import SessionManager class GenerateAudioCastRequest(BaseModel): @@ -36,21 +37,16 @@ class GenerateAudioCastResponse(BaseModel): def chat(session_id: str, request: SessionChatRequest): message = request.message content_category = request.content_category + db = SessionManager(session_id) - if session_id not in chat_sessions: - chat_sessions[session_id] = [] - - chat_sessions[session_id].append(message) + db._add_chat(message) def on_finish(text: str): - chat_sessions[session_id].append( - SessionChatMessage(role="assistant", content=text) - ) - # log text and other metadata to database + db._add_chat(SessionChatMessage(role="assistant", content=text)) generator = chat_request( content_category=content_category, - previous_messages=chat_sessions[session_id], + previous_messages=db._get_chats(), on_finish=on_finish, ) diff --git a/src/utils/session_manager.py b/src/utils/session_manager.py index 69c80ee..96c4305 100644 --- a/src/utils/session_manager.py +++ b/src/utils/session_manager.py @@ -70,9 +70,9 @@ def _get_chat(self, chat_id: str) -> SessionChatMessage | None: return [chat for chat in doc.get("chats") if chat.id == chat_id][0] - def _get_chats(self) -> List[SessionChatMessage] | None: + def _get_chats(self) -> List[SessionChatMessage]: doc = self._get_document(self.collection, self.doc_id) if not doc.exists: - return None + return [] return doc.get("chats") From f988cc5f745565d428dee3937e3e37f575b20d51 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 13:45:31 +0000 Subject: [PATCH 03/26] pass down session_id for a deterministic workflow --- app.py | 6 ++--- pages/audiocast.py | 4 ++-- src/uis/audioui.py | 6 ++--- src/uis/chatui.py | 7 +++--- src/utils/chat_thread.py | 43 +++++++++++++++++++++++++++-------- src/utils/main_utils.py | 24 ++++++++----------- src/utils/render_audiocast.py | 5 ++-- src/utils/session_state.py | 2 ++ 8 files changed, 60 insertions(+), 37 deletions(-) diff --git a/app.py b/app.py index 7bfdf36..d8b0108 100644 --- a/app.py +++ b/app.py @@ -17,7 +17,7 @@ async def main(): # Sidebar for content type selection st.sidebar.title("Audiocast Info") - init_session_state() + session_id = init_session_state() if st.session_state.content_category: st.sidebar.subheader( @@ -32,9 +32,9 @@ async def main(): uichat = st.empty() if not st.session_state.user_specification: with uichat.container(): - await chatui(uichat) + await chatui(session_id, uichat) else: - await audioui(uichat) + await audioui(session_id, uichat) if __name__ == "__main__": diff --git a/pages/audiocast.py b/pages/audiocast.py index 1666a9c..0dc3f85 100644 --- a/pages/audiocast.py +++ b/pages/audiocast.py @@ -14,7 +14,7 @@ def navigate_to_home(): async def render_audiocast_page(): st.set_page_config(page_title="Audiora | Share Page", page_icon="🎧") - audiocast_id = st.query_params.get("uuid") + audiocast_id = st.query_params.get("session_id") if audiocast_id: # Display audiocast content @@ -34,7 +34,7 @@ async def render_audiocast_page(): st.error(f"Error loading audiocast: {str(e)}") else: st.warning( - "Audiocast ID is missing in the URL. Expected URL format: ?uuid=your-audiocast-id" + "Audiocast ID is missing in the URL. Expected URL format: ?session_id=your-audiocast-id" ) st.markdown("---") diff --git a/src/uis/audioui.py b/src/uis/audioui.py index d3b8188..1680323 100644 --- a/src/uis/audioui.py +++ b/src/uis/audioui.py @@ -5,7 +5,7 @@ from src.utils.render_audiocast import render_audiocast -async def audioui(uichat: DeltaGenerator): +async def audioui(session_id: str, uichat: DeltaGenerator): """ Audiocast interface """ @@ -17,7 +17,7 @@ async def audioui(uichat: DeltaGenerator): summary = st.session_state.user_specification content_category = st.session_state.content_category - await use_audiocast_request(summary, content_category) + await use_audiocast_request(session_id, summary, content_category) else: st.info("Audiocast generation completed!") - render_audiocast() + render_audiocast(session_id) diff --git a/src/uis/chatui.py b/src/uis/chatui.py index 31bf729..02dcfe1 100644 --- a/src/uis/chatui.py +++ b/src/uis/chatui.py @@ -10,7 +10,7 @@ from src.utils.render_chat import render_chat_history -async def chatui(uichat: DeltaGenerator): +async def chatui(session_id: str, uichat: DeltaGenerator): """ Chat interface """ @@ -27,12 +27,13 @@ async def chatui(uichat: DeltaGenerator): content_category = st.session_state.content_category if st.session_state.example_prompt: - handle_example_prompt(content_category) + prompt = st.session_state.example_prompt + handle_example_prompt(session_id, prompt, content_category) if st.session_state.prompt: prompt = st.session_state.prompt st.session_state.prompt = None - ai_message = handle_user_prompt(prompt, content_category) + ai_message = handle_user_prompt(session_id, prompt, content_category) if isinstance(ai_message, str): await evaluate_final_response(ai_message, content_category) diff --git a/src/utils/chat_thread.py b/src/utils/chat_thread.py index 904287a..78b3561 100644 --- a/src/utils/chat_thread.py +++ b/src/utils/chat_thread.py @@ -14,10 +14,14 @@ termination_suffix = "Please click the button below to start generating the audiocast." -def generate_stream_response(prompt: str, content_category: ContentCategory): +def generate_stream_response( + session_id: str, + prompt: str, + content_category: ContentCategory, +): with st.spinner("Generating response..."): response_generator = chat( - st.session_state.chat_session_id, + session_id, SessionChatRequest( message=SessionChatMessage(role="user", content=prompt), content_category=content_category, @@ -27,12 +31,17 @@ def generate_stream_response(prompt: str, content_category: ContentCategory): return response_generator -def handle_example_prompt(content_category: ContentCategory): +def handle_example_prompt( + session_id: str, + prompt: str, + content_category: ContentCategory, +): """Handle selected example prompt""" - prompt = st.session_state.example_prompt with st.chat_message("assistant"): - response_generator = generate_stream_response(prompt, content_category) + response_generator = generate_stream_response( + session_id, prompt, content_category + ) ai_message = st.write_stream(response_generator) st.session_state.example_prompt = None @@ -45,12 +54,20 @@ def handle_example_prompt(content_category: ContentCategory): st.error("Failed to generate AI response. Please try again.") -def handle_user_prompt(prompt: str, content_category: ContentCategory): +def handle_user_prompt( + session_id: str, + prompt: str, + content_category: ContentCategory, +): """ Handle user input prompt """ with st.chat_message("assistant"): - response_generator = generate_stream_response(prompt, content_category) + response_generator = generate_stream_response( + session_id, + prompt, + content_category, + ) ai_message = st.write_stream(response_generator) if ai_message: @@ -110,7 +127,11 @@ def onclick(summary: str): st.rerun() -async def use_audiocast_request(summary: str, content_category: ContentCategory): +async def use_audiocast_request( + session_id: str, + summary: str, + content_category: ContentCategory, +): """ Call audiocast creating workflow @@ -121,7 +142,11 @@ async def use_audiocast_request(summary: str, content_category: ContentCategory) try: with st.spinner("Generating your audiocast..."): audiocast_response = await generate_audiocast( - GenerateAudioCastRequest(summary=summary, category=content_category) + GenerateAudioCastRequest( + sessionId=session_id, + summary=summary, + category=content_category, + ) ) print(f"Generate AudioCast Response: {audiocast_response}") diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py index bb214e0..d29d6d5 100644 --- a/src/utils/main_utils.py +++ b/src/utils/main_utils.py @@ -1,6 +1,4 @@ -import uuid from pathlib import Path -from typing import Dict, List import streamlit as st from pydantic import BaseModel @@ -19,21 +17,17 @@ class GenerateAudioCastRequest(BaseModel): + sessionId: str summary: str category: str class GenerateAudioCastResponse(BaseModel): - uuid: str url: str script: str source_content: str -# Store chat sessions (in-memory for now, should be moved to a database in production) -chat_sessions: Dict[str, List[SessionChatMessage]] = {} - - def chat(session_id: str, request: SessionChatRequest): message = request.message content_category = request.content_category @@ -57,8 +51,10 @@ async def generate_audiocast(request: GenerateAudioCastRequest): """ Generate an audiocast based on a summary of user's request """ + session_id = request.sessionId summary = request.summary category = request.category + if category not in content_categories: raise Exception("Invalid content category") @@ -93,21 +89,21 @@ async def generate_audiocast(request: GenerateAudioCastRequest): AudioSynthesizer().enhance_audio_minimal(Path(output_file)) print(f"output_file: {output_file}") - # unique ID for the audiocast - uniq_id = str(uuid.uuid4()) - # TODO: Use a background service # STEP 4: Ingest audio file to a storage service (e.g., GCS, S3) with container.container(): try: container.info("Storing a copy of your audiocast...") storage_manager = StorageManager() - storage_manager.upload_audio_to_gcs(output_file, uniq_id) + storage_manager.upload_audio_to_gcs(output_file, session_id) except Exception as e: print(f"Error while storing audiocast: {str(e)}") + db = SessionManager(session_id) + db._update_source(source_content) + db._update_transcript(audio_script) + response = GenerateAudioCastResponse( - uuid=uniq_id, url=output_file, script=audio_script, source_content=source_content, @@ -116,10 +112,10 @@ async def generate_audiocast(request: GenerateAudioCastRequest): return response.model_dump() -def get_audiocast_uri(uuid: str): +def get_audiocast_uri(session_id: str): """ Get the URI for the audiocast """ storage_manager = StorageManager() - filepath = storage_manager.download_from_gcs(uuid) + filepath = storage_manager.download_from_gcs(session_id) return filepath diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py index 5daa902..ac75489 100644 --- a/src/utils/render_audiocast.py +++ b/src/utils/render_audiocast.py @@ -8,13 +8,12 @@ class GenerateAudiocastDict(TypedDict): - uuid: str url: str script: str source_content: str -def render_audiocast(): +def render_audiocast(session_id: str): """ Render the audiocast based on the user's preferences - Display current audiocast if available @@ -33,7 +32,7 @@ def render_audiocast(): st.sidebar.subheader("Audiocast Source") st.sidebar.markdown(current_audiocast["source_content"]) - share_url = f"{APP_URL}/audiocast?uuid={current_audiocast['uuid']}" + share_url = f"{APP_URL}/audiocast?session_id={session_id}" st.text_input("Share this audiocast:", share_url) share_col, restart_row = st.columns(2, vertical_alignment="bottom") diff --git a/src/utils/session_state.py b/src/utils/session_state.py index 1386e1e..d19164d 100644 --- a/src/utils/session_state.py +++ b/src/utils/session_state.py @@ -33,6 +33,8 @@ def init_session_state(): if "current_audiocast" not in st.session_state: st.session_state.current_audiocast = None + return cast(str, st.session_state.chat_session_id) + def reset_session(): """ From 95c3538c8c65317f8d6bd6bf0084a6075020ac1e Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 14:16:41 +0000 Subject: [PATCH 04/26] handle conversion of chat object to/fro a dict --- src/utils/main_utils.py | 8 ++------ src/utils/render_chat.py | 2 +- src/utils/session_manager.py | 24 +++++++++++++++++++----- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py index d29d6d5..c56ebe1 100644 --- a/src/utils/main_utils.py +++ b/src/utils/main_utils.py @@ -29,23 +29,19 @@ class GenerateAudioCastResponse(BaseModel): def chat(session_id: str, request: SessionChatRequest): - message = request.message content_category = request.content_category db = SessionManager(session_id) - - db._add_chat(message) + db._add_chat(request.message) def on_finish(text: str): db._add_chat(SessionChatMessage(role="assistant", content=text)) - generator = chat_request( + return chat_request( content_category=content_category, previous_messages=db._get_chats(), on_finish=on_finish, ) - return generator - async def generate_audiocast(request: GenerateAudioCastRequest): """ diff --git a/src/utils/render_chat.py b/src/utils/render_chat.py index 2569b37..b891136 100644 --- a/src/utils/render_chat.py +++ b/src/utils/render_chat.py @@ -20,7 +20,7 @@ def on_value_change(): with col1: st.selectbox( "Select Content Category", - content_categories, + ["", *content_categories], format_func=lambda x: x.title(), key="selected_content_category", on_change=on_value_change, diff --git a/src/utils/session_manager.py b/src/utils/session_manager.py index 96c4305..f1e71fb 100644 --- a/src/utils/session_manager.py +++ b/src/utils/session_manager.py @@ -1,5 +1,5 @@ from dataclasses import dataclass -from typing import Dict, List, Optional +from typing import Dict, List, Optional, cast from src.services.firestore_sdk import ( Collection, @@ -48,7 +48,7 @@ def _update_transcript(self, transcript: str): def _add_chat(self, chat: SessionChatMessage): return self._update_document( - self.collection, self.doc_id, {"chats": arrayUnion(chat)} + self.collection, self.doc_id, {"chats": arrayUnion([chat.__dict__])} ) def _delete_chat(self, chat_id: str): @@ -60,7 +60,7 @@ def _delete_chat(self, chat_id: str): self._update_document( self.collection, self.doc_id, - {"chats": arrayRemove(chat_to_remove)}, + {"chats": arrayRemove([chat_to_remove.__dict__])}, ) def _get_chat(self, chat_id: str) -> SessionChatMessage | None: @@ -68,11 +68,25 @@ def _get_chat(self, chat_id: str) -> SessionChatMessage | None: if not doc.exists: return None - return [chat for chat in doc.get("chats") if chat.id == chat_id][0] + item = [chat for chat in doc.get("chats") if chat.id == chat_id][0] + if item: + return SessionChatMessage( + content=item["content"], + id=item["id"], + role=item["role"], + ) def _get_chats(self) -> List[SessionChatMessage]: doc = self._get_document(self.collection, self.doc_id) if not doc.exists: return [] - return doc.get("chats") + chats = cast(Dict, doc.get("chats")) + return [ + SessionChatMessage( + content=chat["content"], + id=chat["id"], + role=chat["role"], + ) + for chat in chats + ] From c458fe067ecd6a1f61e769e2c43121d7bf49afa8 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 15:22:54 +0000 Subject: [PATCH 05/26] remove references to langchain --- src/utils/content_generator.py | 75 ---------------------------------- 1 file changed, 75 deletions(-) delete mode 100644 src/utils/content_generator.py diff --git a/src/utils/content_generator.py b/src/utils/content_generator.py deleted file mode 100644 index 6f289ef..0000000 --- a/src/utils/content_generator.py +++ /dev/null @@ -1,75 +0,0 @@ -from typing import Dict, List - -from langchain.chains import LLMChain -from langchain.llms import OpenAI -from langchain.prompts import PromptTemplate - - -class ContentGenerator: - def __init__(self): - self.llm = OpenAI(temperature=0.7) - self.prompt_templates = { - "story": PromptTemplate( - input_variables=["query"], - template="""Create an engaging story about {query}. - Make it captivating and suitable for audio narration. - Include vivid descriptions and natural dialogue.""", - ), - "podcast": PromptTemplate( - input_variables=["query"], - template="""Create an informative podcast script about {query}. - Structure it like a professional podcast with clear sections, - engaging facts, and natural transitions.""", - ), - "sermon": PromptTemplate( - input_variables=["query"], - template="""Create an inspiring sermon about {query}. - Include spiritual insights, relevant scriptures, - and practical applications for daily life.""", - ), - "science": PromptTemplate( - input_variables=["query"], - template="""Create an educational scientific explanation about {query}. - Make it engaging and accessible while maintaining accuracy. - Include recent research and fascinating details.""", - ), - } - - def generate_content( - self, query: str, content_category: str, chat_history: List[Dict] - ) -> str: - # Get the appropriate prompt template - prompt_template = self.prompt_templates.get(content_category) - if not prompt_template: - raise ValueError(f"Invalid content type: {content_category}") - - # Create and run the chain - chain = LLMChain(llm=self.llm, prompt=prompt_template) - response = chain.run(query=query) - - return response - - def refine_with_chat_history(self, content: str, chat_history: List[Dict]) -> str: - # Use chat history to refine the content if needed - relevant_context = "\n".join( - [ - f"{msg['role']}: {msg['content']}" - for msg in chat_history[-3:] # Use last 3 messages for context - ] - ) - - refine_prompt = PromptTemplate( - input_variables=["content", "context"], - template="""Given this conversation context: - {context} - - Please refine this content to better match the user's needs: - {content} - - Refined content:""", - ) - - chain = LLMChain(llm=self.llm, prompt=refine_prompt) - refined_content = chain.run(content=content, context=relevant_context) - - return refined_content From 38585feef8e8ae5952132ba87b297255c7cf54f8 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 15:23:43 +0000 Subject: [PATCH 06/26] reuse a previously downloaded audiofile if it's processable --- src/services/storage.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/services/storage.py b/src/services/storage.py index 1580a9c..1e14bca 100644 --- a/src/services/storage.py +++ b/src/services/storage.py @@ -1,3 +1,4 @@ +import os from dataclasses import dataclass from io import BytesIO from pathlib import Path @@ -5,6 +6,7 @@ from uuid import uuid4 from google.cloud import storage +from pydub import AudioSegment from src.env_var import BUCKET_NAME @@ -70,8 +72,15 @@ def download_from_gcs(self, filename: str): """ blobname = f"{BLOB_BASE_URI}/{filename}" blob = bucket.blob(blobname) - tmp_file_path = f"/tmp/{str(uuid4())}" - blob.download_to_filename(tmp_file_path) + tmp_file_path = f"/tmp/{filename}" + if os.path.exists(tmp_file_path): + try: + audio = AudioSegment.from_file(tmp_file_path) + if audio.duration_seconds > 0: + return tmp_file_path + except Exception: + os.remove(tmp_file_path) + blob.download_to_filename(tmp_file_path) return tmp_file_path From 658f6afa8596a9fc93a8948fdccf692e4fb2e30e Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 15:24:24 +0000 Subject: [PATCH 07/26] render audiocast metdata on share page --- pages/audiocast.py | 47 +++++++++++++++++++++++++++-------- src/utils/main_utils.py | 29 +++++++++++++++++++-- src/utils/render_audiocast.py | 1 + src/utils/session_manager.py | 20 +++++++++++++++ 4 files changed, 85 insertions(+), 12 deletions(-) diff --git a/pages/audiocast.py b/pages/audiocast.py index 0dc3f85..cfc7a6f 100644 --- a/pages/audiocast.py +++ b/pages/audiocast.py @@ -1,9 +1,11 @@ import asyncio from pathlib import Path +import pyperclip import streamlit as st -from src.utils.main_utils import get_audiocast_uri +from src.env_var import APP_URL +from src.utils.main_utils import get_audiocast def navigate_to_home(): @@ -14,21 +16,46 @@ def navigate_to_home(): async def render_audiocast_page(): st.set_page_config(page_title="Audiora | Share Page", page_icon="🎧") - audiocast_id = st.query_params.get("session_id") + session_id = st.query_params.get("session_id") - if audiocast_id: + if session_id: # Display audiocast content - st.title("🎧 Audiocast Player") - st.write(f"Playing audiocast: {audiocast_id}") + st.title("🎧 Audiora") + st.subheader("Share Page ") + + st.markdown(f"#### Viewing audiocast: {session_id}") try: with st.spinner("Loading audiocast..."): - audio_path = get_audiocast_uri(audiocast_id) - st.audio(audio_path) + audiocast = get_audiocast(session_id) + + # Audio player + st.audio(audiocast["url"]) + + # Transcript + with st.expander("Show Transcript"): + st.write(audiocast["script"]) + + # Metadata + st.sidebar.subheader("Audiocast Source") + st.sidebar.markdown(audiocast["source_content"]) + + share_url = f"{APP_URL}/audiocast?session_id={session_id}" + st.text_input("Share this audiocast:", share_url) + + share_col, restart_row = st.columns(2, vertical_alignment="bottom") + + with share_col: + if st.button("Copy Share link", use_container_width=True): + pyperclip.copy(share_url) + st.session_state.show_copy_success = True + + with restart_row: + if st.button("Create your Audiocast", use_container_width=True): + navigate_to_home() - # TODO: Fetch audiocast metadata from the database - st.subheader("Audiocast Details") - st.write("Created: 2024-03-20") + if audiocast["created_at"]: + st.markdown(f"> Created: {audiocast["created_at"]}") except Exception as e: st.error(f"Error loading audiocast: {str(e)}") diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py index c56ebe1..6083a40 100644 --- a/src/utils/main_utils.py +++ b/src/utils/main_utils.py @@ -1,3 +1,4 @@ +from datetime import datetime from pathlib import Path import streamlit as st @@ -26,6 +27,7 @@ class GenerateAudioCastResponse(BaseModel): url: str script: str source_content: str + created_at: str | None def chat(session_id: str, request: SessionChatRequest): @@ -103,15 +105,38 @@ async def generate_audiocast(request: GenerateAudioCastRequest): url=output_file, script=audio_script, source_content=source_content, + created_at=datetime.now().strftime("%Y-%m-%d %H:%M"), ) return response.model_dump() -def get_audiocast_uri(session_id: str): +def get_audiocast(session_id: str): """ Get the URI for the audiocast """ storage_manager = StorageManager() filepath = storage_manager.download_from_gcs(session_id) - return filepath + + session_data = SessionManager(session_id).data() + if not session_data: + raise Exception(f"Audiocast not found for session_id: {session_id}") + + metadata = session_data.metadata + source = metadata.source if metadata else "" + transcript = metadata.transcript if metadata else "" + + created_at: str | None = None + if session_data.created_at: + created_at = datetime.fromisoformat(session_data.created_at).strftime( + "%Y-%m-%d %H:%M" + ) + + response = GenerateAudioCastResponse( + url=filepath, + script=transcript, + source_content=source, + created_at=created_at, + ) + + return response.model_dump() diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py index ac75489..b7000e0 100644 --- a/src/utils/render_audiocast.py +++ b/src/utils/render_audiocast.py @@ -11,6 +11,7 @@ class GenerateAudiocastDict(TypedDict): url: str script: str source_content: str + created_at: str | None def render_audiocast(session_id: str): diff --git a/src/utils/session_manager.py b/src/utils/session_manager.py index f1e71fb..3d9fc94 100644 --- a/src/utils/session_manager.py +++ b/src/utils/session_manager.py @@ -22,6 +22,7 @@ class SessionModel: id: str chats: List[SessionChatMessage] metadata: Optional[ChatMetadata] + created_at: Optional[str] = None class SessionManager(DBManager): @@ -40,6 +41,25 @@ def __init__(self, session_id: str): def _update(self, data: Dict): return self._update_document(self.collection, self.doc_id, data) + def data(self) -> SessionModel | None: + doc = self._get_document(self.collection, self.doc_id) + + data = doc.to_dict() + if not doc.exists or not data: + return None + + metadata = data["metadata"] or {} + + return SessionModel( + id=data["id"], + chats=data["chats"], + metadata=ChatMetadata( + source=metadata.get("source", ""), + transcript=metadata.get("transcript", ""), + ), + created_at=str(data["created_at"]), + ) + def _update_source(self, source: str): return self._update({"metadata.source": source}) From 6e0dcb8c494f5acb67e81a91f734d13671932bb3 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 16:23:05 +0000 Subject: [PATCH 08/26] cleanup --- src/utils/audio_manager_utils.py | 8 +++----- tests/__init__.py | 0 2 files changed, 3 insertions(+), 5 deletions(-) create mode 100644 tests/__init__.py diff --git a/src/utils/audio_manager_utils.py b/src/utils/audio_manager_utils.py index 1c73551..fe30fdc 100644 --- a/src/utils/audio_manager_utils.py +++ b/src/utils/audio_manager_utils.py @@ -41,9 +41,7 @@ def __init__(self) -> None: def _create_voice_mapping(self, tags: List[str], voices: List[Any]): """Create mapping of tags to voices""" - available_voices = voices[: len(tags)] - if len(available_voices) < len(tags): - available_voices = list(islice(cycle(voices), len(tags))) + available_voices = list(islice(cycle(voices), len(tags))) return dict(zip(tags, available_voices)) def _prepare_speech_jobs( @@ -120,8 +118,8 @@ def split_content(self, content: str, tags: List[str]) -> List[Tuple[str, str]]: # Regular expression pattern to match Tag0, Tag1, ..., TagN speaker dialogues matches = re.findall(r"<(Speaker\d+)>(.*?)", content, re.DOTALL) return [ - (str(person), " ".join(content.split()).strip()) - for person, content in matches + (str(speaker), " ".join(content_part.split()).strip()) + for speaker, content_part in matches ] @staticmethod diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 From 271c611a229ff2b4fe47ddc4c473fee7f4d625e6 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 16:23:53 +0000 Subject: [PATCH 09/26] temp remove audio_enchancement --- src/utils/main_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py index 6083a40..c9c3537 100644 --- a/src/utils/main_utils.py +++ b/src/utils/main_utils.py @@ -1,12 +1,12 @@ from datetime import datetime -from pathlib import Path import streamlit as st from pydantic import BaseModel from src.services.storage import StorageManager from src.utils.audio_manager import AudioManager -from src.utils.audio_synthesizer import AudioSynthesizer + +# from src.utils.audio_synthesizer import AudioSynthesizer from src.utils.audiocast_request import AudioScriptMaker, generate_source_content from src.utils.chat_request import chat_request from src.utils.chat_utils import ( @@ -83,8 +83,8 @@ async def generate_audiocast(request: GenerateAudioCastRequest): container.info("Generating audio...") output_file = await AudioManager().generate_speech(audio_script) - container.info("Enhancing audio quality...") - AudioSynthesizer().enhance_audio_minimal(Path(output_file)) + # container.info("Enhancing audio quality...") + # AudioSynthesizer().enhance_audio_minimal(Path(output_file)) print(f"output_file: {output_file}") # TODO: Use a background service From 3420a46f031abe2c87c88f1a6f096921c6e5e0ee Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 18:18:22 +0000 Subject: [PATCH 10/26] sanitize audiocast transcript --- pages/audiocast.py | 3 ++- src/utils/render_audiocast.py | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/pages/audiocast.py b/pages/audiocast.py index cfc7a6f..326964b 100644 --- a/pages/audiocast.py +++ b/pages/audiocast.py @@ -6,6 +6,7 @@ from src.env_var import APP_URL from src.utils.main_utils import get_audiocast +from src.utils.render_audiocast import parse_ai_script def navigate_to_home(): @@ -34,7 +35,7 @@ async def render_audiocast_page(): # Transcript with st.expander("Show Transcript"): - st.write(audiocast["script"]) + st.markdown(parse_ai_script(audiocast["script"])) # Metadata st.sidebar.subheader("Audiocast Source") diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py index b7000e0..556070a 100644 --- a/src/utils/render_audiocast.py +++ b/src/utils/render_audiocast.py @@ -1,3 +1,4 @@ +import re from typing import TypedDict import pyperclip @@ -14,6 +15,11 @@ class GenerateAudiocastDict(TypedDict): created_at: str | None +def parse_ai_script(ai_script: str): + matches = re.findall(r"<(Speaker\d+)>(.*?)", ai_script, re.DOTALL) + return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches]) + + def render_audiocast(session_id: str): """ Render the audiocast based on the user's preferences @@ -27,7 +33,7 @@ def render_audiocast(session_id: str): # Transcript with st.expander("Show Transcript"): - st.write(current_audiocast["script"]) + st.markdown(parse_ai_script(current_audiocast["script"])) # Metadata st.sidebar.subheader("Audiocast Source") From be5299f73803d041f9e5c60c7a0590dff8435e37 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 18:57:08 +0000 Subject: [PATCH 11/26] add elevenlabs client --- requirements.txt | 1 + src/services/elevenlabs_client.py | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 src/services/elevenlabs_client.py diff --git a/requirements.txt b/requirements.txt index c2e38c3..cf5d21f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ asyncio openai anthropic +elevenlabs pyperclip python-multipart diff --git a/src/services/elevenlabs_client.py b/src/services/elevenlabs_client.py new file mode 100644 index 0000000..0aa9559 --- /dev/null +++ b/src/services/elevenlabs_client.py @@ -0,0 +1,11 @@ +from elevenlabs.client import ElevenLabs + +from src.env_var import ELEVENLABS_API_KEY + +client = ElevenLabs( + api_key=ELEVENLABS_API_KEY, +) + + +def get_elevenlabs_client(): + return client From 421dbcbfa55aeae99a8ba460eddffed3601e07c2 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 18:59:30 +0000 Subject: [PATCH 12/26] add __text_to_speech_elevenlabs; cleanup --- src/utils/audio_manager.py | 43 +++++++++++++---- src/utils/audio_manager_utils.py | 43 ++++------------- src/utils/generate_speech_utils.py | 76 ++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 42 deletions(-) create mode 100644 src/utils/generate_speech_utils.py diff --git a/src/utils/audio_manager.py b/src/utils/audio_manager.py index f07cf7b..6241037 100644 --- a/src/utils/audio_manager.py +++ b/src/utils/audio_manager.py @@ -4,16 +4,16 @@ import re import uuid from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Tuple from src.utils.audio_manager_utils import ( AudioManagerConfig, AudioManagerSpeechGenerator, ContentSplitter, - openai_voices, ) from src.utils.audio_synthesizer import AudioSynthesizer from src.utils.clean_tss_markup import clean_tss_markup +from src.utils.generate_speech_utils import elevenlabs_voices, openai_voices logger = logging.getLogger(__name__) @@ -57,23 +57,30 @@ async def text_to_speech(self, audio_script: str, output_file: str): tags = self._get_tags(audio_script) audio_script = clean_tss_markup(audio_script, tags) + nway_content = self.split_content(audio_script, tags) + print(f"nway_content: {nway_content}") + if self.config.tts_provider == "openai": - return await self.__text_to_speech_openai(audio_script, output_file, tags) + return await self.__text_to_speech_openai(nway_content, output_file, tags) + elif self.config.tts_provider == "elevenlabs": + return await self.__text_to_speech_elevenlabs( + nway_content, output_file, tags + ) else: raise Exception("Invalid TTS model specified") async def __text_to_speech_openai( - self, audio_script: str, output_file: str, tags: List[str] + self, + nway_content: List[Tuple[str, str]], + output_file: str, + tags: List[str], ): try: - nway_content = self.split_content(audio_script, tags) - print(f"nway_content: {nway_content}") - jobs = self._prepare_speech_jobs( nway_content, tags, openai_voices, self.config.temp_audio_dir ) - audio_files = await self._process_speech_jobs(jobs) + audio_files = await self._process_speech_jobs(jobs, provider="openai") if not audio_files: raise Exception("No audio files were generated") @@ -83,6 +90,26 @@ async def __text_to_speech_openai( except Exception as e: raise Exception(f"Error converting text to speech with OpenAI: {str(e)}") + async def __text_to_speech_elevenlabs( + self, nway_content: List[Tuple[str, str]], output_file: str, tags: List[str] + ): + try: + jobs = self._prepare_speech_jobs( + nway_content, tags, elevenlabs_voices, self.config.temp_audio_dir + ) + + audio_files = await self._process_speech_jobs(jobs, provider="elevenlabs") + if not audio_files: + raise Exception("No audio files were generated") + + await self.__finalize(audio_files, output_file) + logger.info(f"Audio saved to {output_file}") + + except Exception as e: + raise Exception( + f"Error converting text to speech with Elevenlabs: {str(e)}" + ) + async def __finalize( self, audio_files: List[str], output_file: str, enhance_audio=False ) -> None: diff --git a/src/utils/audio_manager_utils.py b/src/utils/audio_manager_utils.py index fe30fdc..a27dbd5 100644 --- a/src/utils/audio_manager_utils.py +++ b/src/utils/audio_manager_utils.py @@ -6,26 +6,14 @@ from functools import partial from itertools import cycle, islice from pathlib import Path -from typing import Any, List, Literal, Optional, Tuple +from typing import Any, List, Optional, Tuple -from src.services.openai_client import get_openai - -OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"] -openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"] - - -@dataclass -class SpeechJob: - content: str - voice: OpenaiVoice - output_file: str - tag: str - index: int +from src.utils.generate_speech_utils import GenerateSpeech, SpeechJob, TTSProvider @dataclass class AudioManagerConfig: - tts_provider: Optional[Literal["openai"]] = "openai" + tts_provider: Optional[TTSProvider] = "openai" temp_audio_dir: str = field(default_factory=lambda: "/tmp/audiocast") outdir_base: str = field(default_factory=lambda: "/tmp/audiocast/output") @@ -73,27 +61,14 @@ def _prepare_speech_jobs( return jobs - def _generate_speech(self, job: SpeechJob) -> str: - try: - response = get_openai().audio.speech.create( - input=job.content, - model="tts-1-hd", - voice=job.voice, - ) - - with open(job.output_file, "wb") as file: - file.write(response.content) - - print(f"Generated speech for tag {job.tag} at index {job.index}") - return job.output_file - except Exception as e: - print(f"Failed to generate speech for tag {job.tag}: {str(e)}") - return "" - - async def _process_speech_jobs(self, jobs: List[SpeechJob]) -> List[str]: + async def _process_speech_jobs( + self, jobs: List[SpeechJob], provider: TTSProvider + ) -> List[str]: loop = asyncio.get_event_loop() tasks = [ - loop.run_in_executor(self.executor, partial(self._generate_speech, job)) + loop.run_in_executor( + self.executor, partial(GenerateSpeech(provider).run, job) + ) for job in jobs ] diff --git a/src/utils/generate_speech_utils.py b/src/utils/generate_speech_utils.py new file mode 100644 index 0000000..29d62cf --- /dev/null +++ b/src/utils/generate_speech_utils.py @@ -0,0 +1,76 @@ +from dataclasses import dataclass +from io import BytesIO +from typing import List, Literal + +from elevenlabs import VoiceSettings + +from src.services.elevenlabs_client import get_elevenlabs_client +from src.services.openai_client import get_openai + +TTSProvider = Literal["openai", "elevenlabs"] +OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"] +openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"] + +ElevenLabsVoice = Literal[ + "Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam" +] +elevenlabs_voices = ["Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam"] + + +@dataclass +class SpeechJob: + content: str + voice: OpenaiVoice + output_file: str + tag: str + index: int + + +class GenerateSpeech: + provider: TTSProvider + + def __init__(self, provider: TTSProvider): + self.provider = provider + + def run(self, job: SpeechJob): + """Generate speech using the specified provider""" + try: + content = ( + self.__use_openai(job) + if self.provider == "elevenlabs" + else self.__use_elevenlabs(job) + ) + + with open(job.output_file, "wb") as file: + file.write(content) + + print(f"Generated speech for tag {job.tag} at index {job.index}") + return job.output_file + except Exception as e: + print(f"Failed to generate speech for tag {job.tag}: {str(e)}") + return "" + + def __use_openai(self, job: SpeechJob): + response = get_openai().audio.speech.create( + input=job.content, model="tts-1-hd", voice=job.voice + ) + return response.content + + def __use_elevenlabs(self, job: SpeechJob): + response = get_elevenlabs_client().text_to_speech.convert( + voice_id=job.voice, + output_format="mp3_22050_32", + text=job.content, + model_id="eleven_turbo_v2_5", # use the turbo model for low latency + voice_settings=VoiceSettings( + stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True + ), + ) + + buffer = BytesIO() + for chunk in response: + if chunk: + buffer.write(chunk) + + buffer.seek(0) + return buffer.getvalue() From ccd7f124d94bbc37706f37a21fb6f599ede49bf3 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 19:03:46 +0000 Subject: [PATCH 13/26] use dry in text_to_speech --- src/utils/audio_manager.py | 41 +++++++++++++------------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/src/utils/audio_manager.py b/src/utils/audio_manager.py index 6241037..e4ee35c 100644 --- a/src/utils/audio_manager.py +++ b/src/utils/audio_manager.py @@ -42,7 +42,6 @@ async def generate_speech(self, audio_script: str): """ output_file = f"{self.config.outdir_base}/{str(uuid.uuid4())}.mp3" await self.text_to_speech(audio_script, output_file) - return output_file async def text_to_speech(self, audio_script: str, output_file: str): @@ -61,50 +60,38 @@ async def text_to_speech(self, audio_script: str, output_file: str): print(f"nway_content: {nway_content}") if self.config.tts_provider == "openai": - return await self.__text_to_speech_openai(nway_content, output_file, tags) + audio_files = await self.__text_to_speech_openai(nway_content, tags) elif self.config.tts_provider == "elevenlabs": - return await self.__text_to_speech_elevenlabs( - nway_content, output_file, tags - ) + audio_files = await self.__text_to_speech_elevenlabs(nway_content, tags) else: raise Exception("Invalid TTS model specified") + if not audio_files: + raise Exception("No audio files were generated") + + await self.__finalize(audio_files, output_file) + logger.info(f"Audio saved to {output_file}") + async def __text_to_speech_openai( - self, - nway_content: List[Tuple[str, str]], - output_file: str, - tags: List[str], - ): + self, nway_content: List[Tuple[str, str]], tags: List[str] + ) -> List[str]: try: jobs = self._prepare_speech_jobs( nway_content, tags, openai_voices, self.config.temp_audio_dir ) - audio_files = await self._process_speech_jobs(jobs, provider="openai") - if not audio_files: - raise Exception("No audio files were generated") - - await self.__finalize(audio_files, output_file) - logger.info(f"Audio saved to {output_file}") - + return await self._process_speech_jobs(jobs, provider="openai") except Exception as e: raise Exception(f"Error converting text to speech with OpenAI: {str(e)}") async def __text_to_speech_elevenlabs( - self, nway_content: List[Tuple[str, str]], output_file: str, tags: List[str] - ): + self, nway_content: List[Tuple[str, str]], tags: List[str] + ) -> List[str]: try: jobs = self._prepare_speech_jobs( nway_content, tags, elevenlabs_voices, self.config.temp_audio_dir ) - - audio_files = await self._process_speech_jobs(jobs, provider="elevenlabs") - if not audio_files: - raise Exception("No audio files were generated") - - await self.__finalize(audio_files, output_file) - logger.info(f"Audio saved to {output_file}") - + return await self._process_speech_jobs(jobs, provider="elevenlabs") except Exception as e: raise Exception( f"Error converting text to speech with Elevenlabs: {str(e)}" From ad76ef307e610bbc04cc006de153008407fb6534 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 19:11:33 +0000 Subject: [PATCH 14/26] only lint on python versions 3.11 and 3.12 --- .github/workflows/deploy.yml | 11 +++++++---- .github/workflows/ruff.yml | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 317f1f2..5b858cf 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -48,8 +48,9 @@ jobs: timeout-minutes: 5 steps: - uses: actions/checkout@v4 - - id: setup-python - uses: actions/setup-python@v5 + with: + fetch-depth: 0 + - uses: actions/setup-python@v5 with: python-version: "3.12" cache: "pip" # caching pip dependencies @@ -69,6 +70,8 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: "3.12" @@ -104,9 +107,9 @@ jobs: - run: curl -f "${{ steps.deploy.outputs.url }}" - uses: marocchino/sticky-pull-request-comment@v2 with: - header: app + header: audiora message: | - app: ${{ steps.deploy.outputs.url }} (${{ github.event.pull_request.head.sha }}) + audiora: ${{ steps.deploy.outputs.url }} (${{ github.event.pull_request.head.sha }}) promote: runs-on: ubuntu-latest diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml index e921c07..b7791ba 100644 --- a/.github/workflows/ruff.yml +++ b/.github/workflows/ruff.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.11", "3.12"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} From f14f71d6f19f0d61313468fbc661136b89d6ce9d Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 19:24:41 +0000 Subject: [PATCH 15/26] add write permission to deploy job for marocchino/sticky-pull-request-comment --- .github/workflows/deploy.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 5b858cf..a99135b 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -68,6 +68,8 @@ jobs: runs-on: ubuntu-latest needs: [prepare, lint] timeout-minutes: 10 + permissions: + pull-requests: write steps: - uses: actions/checkout@v4 with: From 162f8860a8e870650b8bd48d9f8532419867e92f Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Thu, 31 Oct 2024 20:36:49 +0000 Subject: [PATCH 16/26] use eleven_multilingual_v2 model for improved stability, accuracy and quality --- src/utils/audio_manager.py | 2 +- src/utils/audio_manager_utils.py | 10 ++++- src/utils/decorators.py | 36 +++++++++++++++++ src/utils/generate_speech_utils.py | 62 +++++++++++++++++++++--------- src/utils/main_utils.py | 10 ++--- 5 files changed, 93 insertions(+), 27 deletions(-) create mode 100644 src/utils/decorators.py diff --git a/src/utils/audio_manager.py b/src/utils/audio_manager.py index e4ee35c..848b162 100644 --- a/src/utils/audio_manager.py +++ b/src/utils/audio_manager.py @@ -55,8 +55,8 @@ async def text_to_speech(self, audio_script: str, output_file: str): """ tags = self._get_tags(audio_script) audio_script = clean_tss_markup(audio_script, tags) - nway_content = self.split_content(audio_script, tags) + print(f"nway_content: {nway_content}") if self.config.tts_provider == "openai": diff --git a/src/utils/audio_manager_utils.py b/src/utils/audio_manager_utils.py index a27dbd5..e0e9be0 100644 --- a/src/utils/audio_manager_utils.py +++ b/src/utils/audio_manager_utils.py @@ -8,7 +8,13 @@ from pathlib import Path from typing import Any, List, Optional, Tuple -from src.utils.generate_speech_utils import GenerateSpeech, SpeechJob, TTSProvider +from src.utils.generate_speech_utils import ( + ElevenLabsVoice, + GenerateSpeech, + OpenaiVoice, + SpeechJob, + TTSProvider, +) @dataclass @@ -36,7 +42,7 @@ def _prepare_speech_jobs( self, nway_content: List[Tuple[str, str]], tags: List[str], - voices: List[Any], + voices: List[OpenaiVoice] | List[ElevenLabsVoice], temp_audio_dir: str, ): jobs: List[SpeechJob] = [] diff --git a/src/utils/decorators.py b/src/utils/decorators.py new file mode 100644 index 0000000..25be2ad --- /dev/null +++ b/src/utils/decorators.py @@ -0,0 +1,36 @@ +import asyncio +from functools import wraps +from time import time + + +def process_time(): + """Print process execution time for a given function""" + + def decorator(func): + if asyncio.iscoroutinefunction(func): + + @wraps(func) + async def async_wrapper(*args, **kwargs): + start_time = time() + response = await func(*args, **kwargs) + + time_diff = f"{(time() - start_time):.2f}s" + print(f"Execution time for {func.__name__}: {time_diff}") + + return response + + return async_wrapper + + @wraps(func) + def wrapper(*args, **kwargs): + start_time = time() + response = func(*args, **kwargs) + + time_diff = f"{(time() - start_time):.2f}s" + print(f"Execution time for {func.__name__}: {time_diff}") + + return response + + return wrapper + + return decorator diff --git a/src/utils/generate_speech_utils.py b/src/utils/generate_speech_utils.py index 29d62cf..6a35589 100644 --- a/src/utils/generate_speech_utils.py +++ b/src/utils/generate_speech_utils.py @@ -1,26 +1,44 @@ from dataclasses import dataclass from io import BytesIO -from typing import List, Literal - -from elevenlabs import VoiceSettings +from typing import Dict, List, Literal from src.services.elevenlabs_client import get_elevenlabs_client from src.services.openai_client import get_openai +from src.utils.decorators import process_time TTSProvider = Literal["openai", "elevenlabs"] + OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"] openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"] ElevenLabsVoice = Literal[ "Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam" ] -elevenlabs_voices = ["Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam"] +elevenlabs_voices: List[ElevenLabsVoice] = [ + "Adam", + "Sarah", + "Laura", + "Charlie", + "George", + "Charlotte", + "Liam", +] + +elevenlabs_voice_to_id: Dict[ElevenLabsVoice, str] = { + "Adam": "pNInz6obpgDQGcFmaJgB", + "Sarah": "EXAVITQu4vr4xnSDxMaL", + "Laura": "FGY2WhTYpPnrIDTdsKH5", + "Charlie": "IKne3meq5aSn9XLyUdCD", + "George": "JBFqnCBsd6RMkjVDRZzb", + "Charlotte": "XB0fDUnXU5powFXDhCwa", + "Liam": "TX3LPaxmHKxFdv7VOQHJ", +} @dataclass class SpeechJob: content: str - voice: OpenaiVoice + voice: OpenaiVoice | ElevenLabsVoice output_file: str tag: str index: int @@ -35,11 +53,10 @@ def __init__(self, provider: TTSProvider): def run(self, job: SpeechJob): """Generate speech using the specified provider""" try: - content = ( - self.__use_openai(job) - if self.provider == "elevenlabs" - else self.__use_elevenlabs(job) - ) + if self.provider == "elevenlabs": + content = self.__use_elevenlabs(job) + else: + content = self.__use_openai(job) with open(job.output_file, "wb") as file: file.write(content) @@ -47,24 +64,33 @@ def run(self, job: SpeechJob): print(f"Generated speech for tag {job.tag} at index {job.index}") return job.output_file except Exception as e: - print(f"Failed to generate speech for tag {job.tag}: {str(e)}") + print(f"Failed to generate speech for tag: {job.tag}. Error: {str(e)}") return "" + @process_time() def __use_openai(self, job: SpeechJob): + if job.voice not in openai_voices: + raise ValueError("Wrong voice specification for openai tts") + response = get_openai().audio.speech.create( input=job.content, model="tts-1-hd", voice=job.voice ) return response.content + @process_time() def __use_elevenlabs(self, job: SpeechJob): - response = get_elevenlabs_client().text_to_speech.convert( - voice_id=job.voice, - output_format="mp3_22050_32", + if job.voice not in elevenlabs_voices: + raise ValueError("Wrong voice specification for elevenlabs tts") + # response = get_elevenlabs_client().text_to_speech.convert( + # model_id="eleven_turbo_v2_5", # use the turbo model for low latency + # text=job.content, + # voice_id=elevenlabs_voice_to_id[job.voice], + # output_format="mp3_22050_32", + # ) + response = get_elevenlabs_client().generate( + model="eleven_multilingual_v2", text=job.content, - model_id="eleven_turbo_v2_5", # use the turbo model for low latency - voice_settings=VoiceSettings( - stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True - ), + voice=job.voice, ) buffer = BytesIO() diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py index c9c3537..8268538 100644 --- a/src/utils/main_utils.py +++ b/src/utils/main_utils.py @@ -4,9 +4,7 @@ from pydantic import BaseModel from src.services.storage import StorageManager -from src.utils.audio_manager import AudioManager - -# from src.utils.audio_synthesizer import AudioSynthesizer +from src.utils.audio_manager import AudioManager, AudioManagerConfig from src.utils.audiocast_request import AudioScriptMaker, generate_source_content from src.utils.chat_request import chat_request from src.utils.chat_utils import ( @@ -81,10 +79,10 @@ async def generate_audiocast(request: GenerateAudioCastRequest): # STEP 3: Generate audio from the audio script with container.container(): container.info("Generating audio...") - output_file = await AudioManager().generate_speech(audio_script) + output_file = await AudioManager( + custom_config=AudioManagerConfig(tts_provider="elevenlabs") + ).generate_speech(audio_script) - # container.info("Enhancing audio quality...") - # AudioSynthesizer().enhance_audio_minimal(Path(output_file)) print(f"output_file: {output_file}") # TODO: Use a background service From b5e7230c46274c89082884b7bd786b21c567dce0 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 17:09:57 +0000 Subject: [PATCH 17/26] Refactor audiocast page to include waveform visualization --- pages/audiocast.py | 14 +++++++- requirements.txt | 6 +++- src/utils/audio_to_video.py | 38 +++++++++++++++++++++ src/utils/waveform_utils.py | 68 +++++++++++++++++++++++++++++++++++++ 4 files changed, 124 insertions(+), 2 deletions(-) create mode 100644 src/utils/audio_to_video.py create mode 100644 src/utils/waveform_utils.py diff --git a/pages/audiocast.py b/pages/audiocast.py index 326964b..ec9c8ed 100644 --- a/pages/audiocast.py +++ b/pages/audiocast.py @@ -7,6 +7,7 @@ from src.env_var import APP_URL from src.utils.main_utils import get_audiocast from src.utils.render_audiocast import parse_ai_script +from src.utils.waveform_utils import download_waveform_video, render_waveform def navigate_to_home(): @@ -23,13 +24,24 @@ async def render_audiocast_page(): # Display audiocast content st.title("🎧 Audiora") st.subheader("Share Page ") - st.markdown(f"#### Viewing audiocast: {session_id}") try: with st.spinner("Loading audiocast..."): audiocast = get_audiocast(session_id) + # Create placeholder for visualization + if audiocast["url"]: + viz = st.empty() + with viz.container(): + try: + video_path = render_waveform(session_id, audiocast["url"]) + if video_path: + # Download video + download_waveform_video(str(video_path)) + except Exception as e: + st.error(f"Error rendering waveform: {str(e)}") + # Audio player st.audio(audiocast["url"]) diff --git a/requirements.txt b/requirements.txt index cf5d21f..2169a7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,7 +11,7 @@ python-multipart python-slugify python-dotenv pydub - +pydantic firebase-admin google-auth @@ -19,4 +19,8 @@ google-cloud-storage google-api-python-client google-generativeai +ffmpeg-python +seewav +watchdog + ruff \ No newline at end of file diff --git a/src/utils/audio_to_video.py b/src/utils/audio_to_video.py new file mode 100644 index 0000000..4ae3fe8 --- /dev/null +++ b/src/utils/audio_to_video.py @@ -0,0 +1,38 @@ +import os +import subprocess + + +def create_video_from_audio(audio_path: str, image_path: str, output_path: str): + """Create a video with audio and spectrogram overlay.""" + cmd = [ + "ffmpeg", + "-y", + "-loop", + "1", + "-i", + image_path, + "-i", + audio_path, + "-c:v", + "libx264", + "-tune", + "stillimage", + "-c:a", + "aac", + "-b:a", + "192k", + "-pix_fmt", + "yuv420p", + "-shortest", + output_path, + ] + + try: + subprocess.run(cmd, check=True) + os.remove(image_path) # Clean up temporary spectrogram + return True + except subprocess.CalledProcessError as e: + print(f"Error during video creation: {str(e)}") + return False + except Exception as e: + print(f"Error during video creation: {str(e)}") diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py new file mode 100644 index 0000000..828962c --- /dev/null +++ b/src/utils/waveform_utils.py @@ -0,0 +1,68 @@ +import os +import tempfile +from pathlib import Path + +import streamlit as st +from pydub import AudioSegment +from seewav import visualize + + +def generate_waveform_video(output_path: Path, audio_path: str) -> Path: + """Generate waveform video from audio file using SeeWav.""" + with tempfile.TemporaryDirectory() as temp_dir: + visualize( + audio=Path(audio_path), + tmp=Path(temp_dir), + out=output_path, + bars=60, + speed=4, + time=0.4, + rate=60, + size=(200, 200), + fg_color=(0.0, 1.0, 0.6), # Bright green. Try 0.2 0.2 0.2 for dark green + bg_color=(0.05, 0.05, 0.05), # Near black + ) + return output_path + + +def render_waveform(session_id: str, audio_path: str): + """Render waveform visualization from audio file.""" + tmp_directory = Path("/tmp/audiora/waveforms") + tmp_directory.mkdir(parents=True, exist_ok=True) + tmp_vid_path = tmp_directory / f"{session_id}.mp4" + + video_path = None + if os.path.exists(tmp_vid_path): + try: + mp4_version = AudioSegment.from_file(str(tmp_vid_path), "mp4") + if mp4_version.duration_seconds > 0: + video_path = tmp_vid_path + except Exception: + os.remove(tmp_vid_path) + + try: + if not video_path: + with st.spinner("Generating waveform visualization..."): + video_path = generate_waveform_video(tmp_vid_path, audio_path) + + with open(video_path, "rb") as video_file: + video_bytes = video_file.read() + st.video(video_bytes, autoplay=True) + # st.video(str(video_path), autoplay=True) + + return video_path + except Exception as e: + st.error(f"Error generating visualization: {str(e)}") + + +def download_waveform_video(video_path: str): + """Download video with waveform""" + gen_video, _ = st.columns(2) + with gen_video: + with open(video_path, "rb") as f: + st.download_button( + label="Download Video with waveform", + data=f, + file_name="audio_visualization.mp4", + mime="video/mp4", + ) From ec95a252768ff6a5ce45076c85669da39c82a7a6 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 17:29:17 +0000 Subject: [PATCH 18/26] put waveform viz in an expander --- pages/audiocast.py | 12 ++++++++---- src/utils/render_audiocast.py | 13 +++++++++++++ src/utils/waveform_utils.py | 5 +++-- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/pages/audiocast.py b/pages/audiocast.py index ec9c8ed..d5f60d3 100644 --- a/pages/audiocast.py +++ b/pages/audiocast.py @@ -30,8 +30,11 @@ async def render_audiocast_page(): with st.spinner("Loading audiocast..."): audiocast = get_audiocast(session_id) + # Audio player + st.audio(audiocast["url"]) + # Create placeholder for visualization - if audiocast["url"]: + with st.expander("Show Waveform Visualization"): viz = st.empty() with viz.container(): try: @@ -42,9 +45,6 @@ async def render_audiocast_page(): except Exception as e: st.error(f"Error rendering waveform: {str(e)}") - # Audio player - st.audio(audiocast["url"]) - # Transcript with st.expander("Show Transcript"): st.markdown(parse_ai_script(audiocast["script"])) @@ -67,6 +67,10 @@ async def render_audiocast_page(): if st.button("Create your Audiocast", use_container_width=True): navigate_to_home() + if st.session_state.get("show_copy_success", False): + st.session_state.show_copy_succes = False + st.success("Share link copied successfully!", icon="✅") + if audiocast["created_at"]: st.markdown(f"> Created: {audiocast["created_at"]}") diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py index 556070a..0a2ec65 100644 --- a/src/utils/render_audiocast.py +++ b/src/utils/render_audiocast.py @@ -6,6 +6,7 @@ from src.env_var import APP_URL from src.utils.session_state import reset_session +from src.utils.waveform_utils import download_waveform_video, render_waveform class GenerateAudiocastDict(TypedDict): @@ -31,6 +32,18 @@ def render_audiocast(session_id: str): # Audio player st.audio(current_audiocast["url"]) + # Create placeholder for visualization + with st.expander("Show Waveform Visualization"): + viz = st.empty() + with viz.container(): + try: + video_path = render_waveform(session_id, current_audiocast["url"]) + if video_path: + # Download video + download_waveform_video(str(video_path)) + except Exception as e: + st.error(f"Error rendering waveform: {str(e)}") + # Transcript with st.expander("Show Transcript"): st.markdown(parse_ai_script(current_audiocast["script"])) diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py index 828962c..742c8b8 100644 --- a/src/utils/waveform_utils.py +++ b/src/utils/waveform_utils.py @@ -17,8 +17,8 @@ def generate_waveform_video(output_path: Path, audio_path: str) -> Path: bars=60, speed=4, time=0.4, - rate=60, - size=(200, 200), + # rate=60, + size=(120, 68), fg_color=(0.0, 1.0, 0.6), # Bright green. Try 0.2 0.2 0.2 for dark green bg_color=(0.05, 0.05, 0.05), # Near black ) @@ -65,4 +65,5 @@ def download_waveform_video(video_path: str): data=f, file_name="audio_visualization.mp4", mime="video/mp4", + use_container_width=True ) From efedaa6a7644518ad7dee300abc0ade0cb98ef77 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 17:42:55 +0000 Subject: [PATCH 19/26] cleanup --- pages/audiocast.py | 49 ++++++------------------- src/utils/render_audiocast.py | 47 +++--------------------- src/utils/render_audiocast_utils.py | 55 +++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+), 80 deletions(-) create mode 100644 src/utils/render_audiocast_utils.py diff --git a/pages/audiocast.py b/pages/audiocast.py index d5f60d3..8292e47 100644 --- a/pages/audiocast.py +++ b/pages/audiocast.py @@ -1,18 +1,15 @@ import asyncio -from pathlib import Path +from typing import cast import pyperclip import streamlit as st -from src.env_var import APP_URL from src.utils.main_utils import get_audiocast -from src.utils.render_audiocast import parse_ai_script -from src.utils.waveform_utils import download_waveform_video, render_waveform - - -def navigate_to_home(): - main_script = str(Path(__file__).parent.parent / "app.py") - st.switch_page(main_script) +from src.utils.render_audiocast_utils import ( + GenerateAudiocastDict, + navigate_to_home, + render_audiocast_handler, +) async def render_audiocast_page(): @@ -28,33 +25,9 @@ async def render_audiocast_page(): try: with st.spinner("Loading audiocast..."): - audiocast = get_audiocast(session_id) - - # Audio player - st.audio(audiocast["url"]) - - # Create placeholder for visualization - with st.expander("Show Waveform Visualization"): - viz = st.empty() - with viz.container(): - try: - video_path = render_waveform(session_id, audiocast["url"]) - if video_path: - # Download video - download_waveform_video(str(video_path)) - except Exception as e: - st.error(f"Error rendering waveform: {str(e)}") - - # Transcript - with st.expander("Show Transcript"): - st.markdown(parse_ai_script(audiocast["script"])) - - # Metadata - st.sidebar.subheader("Audiocast Source") - st.sidebar.markdown(audiocast["source_content"]) - - share_url = f"{APP_URL}/audiocast?session_id={session_id}" - st.text_input("Share this audiocast:", share_url) + audiocast = cast(GenerateAudiocastDict, get_audiocast(session_id)) + + share_url = render_audiocast_handler(session_id, audiocast) share_col, restart_row = st.columns(2, vertical_alignment="bottom") @@ -83,8 +56,8 @@ async def render_audiocast_page(): st.markdown("---") - cola, _ = st.columns([3, 5]) - with cola: + col1, _ = st.columns([3, 5]) + with col1: if st.button("← Back to Home", use_container_width=True): navigate_to_home() diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py index 0a2ec65..08227cf 100644 --- a/src/utils/render_audiocast.py +++ b/src/utils/render_audiocast.py @@ -1,24 +1,11 @@ -import re -from typing import TypedDict - import pyperclip import streamlit as st -from src.env_var import APP_URL +from src.utils.render_audiocast_utils import ( + GenerateAudiocastDict, + render_audiocast_handler, +) from src.utils.session_state import reset_session -from src.utils.waveform_utils import download_waveform_video, render_waveform - - -class GenerateAudiocastDict(TypedDict): - url: str - script: str - source_content: str - created_at: str | None - - -def parse_ai_script(ai_script: str): - matches = re.findall(r"<(Speaker\d+)>(.*?)", ai_script, re.DOTALL) - return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches]) def render_audiocast(session_id: str): @@ -29,31 +16,7 @@ def render_audiocast(session_id: str): st.markdown("#### Your Audiocast") current_audiocast: GenerateAudiocastDict = st.session_state.current_audiocast - # Audio player - st.audio(current_audiocast["url"]) - - # Create placeholder for visualization - with st.expander("Show Waveform Visualization"): - viz = st.empty() - with viz.container(): - try: - video_path = render_waveform(session_id, current_audiocast["url"]) - if video_path: - # Download video - download_waveform_video(str(video_path)) - except Exception as e: - st.error(f"Error rendering waveform: {str(e)}") - - # Transcript - with st.expander("Show Transcript"): - st.markdown(parse_ai_script(current_audiocast["script"])) - - # Metadata - st.sidebar.subheader("Audiocast Source") - st.sidebar.markdown(current_audiocast["source_content"]) - - share_url = f"{APP_URL}/audiocast?session_id={session_id}" - st.text_input("Share this audiocast:", share_url) + share_url = render_audiocast_handler(session_id, current_audiocast) share_col, restart_row = st.columns(2, vertical_alignment="bottom") diff --git a/src/utils/render_audiocast_utils.py b/src/utils/render_audiocast_utils.py new file mode 100644 index 0000000..3538c2c --- /dev/null +++ b/src/utils/render_audiocast_utils.py @@ -0,0 +1,55 @@ +import re +from pathlib import Path +from typing import TypedDict + +import streamlit as st + +from src.env_var import APP_URL +from src.utils.waveform_utils import download_waveform_video, render_waveform + + +def navigate_to_home(): + main_script = str(Path(__file__).parent.parent / "app.py") + st.switch_page(main_script) + + +def parse_ai_script(ai_script: str): + matches = re.findall(r"<(Speaker\d+)>(.*?)", ai_script, re.DOTALL) + return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches]) + + +class GenerateAudiocastDict(TypedDict): + url: str + script: str + source_content: str + created_at: str | None + + +def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict): + # Audio player + st.audio(audiocast["url"]) + + # Create placeholder for visualization + with st.expander("Show Waveform Visualization"): + viz = st.empty() + with viz.container(): + try: + video_path = render_waveform(session_id, audiocast["url"]) + if video_path: + # Download video + download_waveform_video(str(video_path)) + except Exception as e: + st.error(f"Error rendering waveform: {str(e)}") + + # Transcript + with st.expander("Show Transcript"): + st.markdown(parse_ai_script(audiocast["script"])) + + # Metadata + st.sidebar.subheader("Audiocast Source") + st.sidebar.markdown(audiocast["source_content"]) + + share_url = f"{APP_URL}/audiocast?session_id={session_id}" + st.text_input("Share this audiocast:", share_url) + + return share_url From d5308ba90e0ab163b4b92d3e7dc882bf400bc8c7 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 18:01:43 +0000 Subject: [PATCH 20/26] move download_waveform_video internal to render_waveform --- src/utils/render_audiocast_utils.py | 16 ++++++--------- src/utils/waveform_utils.py | 32 +++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 14 deletions(-) diff --git a/src/utils/render_audiocast_utils.py b/src/utils/render_audiocast_utils.py index 3538c2c..28a6d2f 100644 --- a/src/utils/render_audiocast_utils.py +++ b/src/utils/render_audiocast_utils.py @@ -5,7 +5,7 @@ import streamlit as st from src.env_var import APP_URL -from src.utils.waveform_utils import download_waveform_video, render_waveform +from src.utils.waveform_utils import render_waveform def navigate_to_home(): @@ -31,15 +31,11 @@ def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict): # Create placeholder for visualization with st.expander("Show Waveform Visualization"): - viz = st.empty() - with viz.container(): - try: - video_path = render_waveform(session_id, audiocast["url"]) - if video_path: - # Download video - download_waveform_video(str(video_path)) - except Exception as e: - st.error(f"Error rendering waveform: {str(e)}") + # with st.container(): + try: + render_waveform(session_id, audiocast["url"]) + except Exception as e: + st.error(f"Error rendering waveform: {str(e)}") # Transcript with st.expander("Show Transcript"): diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py index 742c8b8..1e0f39d 100644 --- a/src/utils/waveform_utils.py +++ b/src/utils/waveform_utils.py @@ -45,12 +45,36 @@ def render_waveform(session_id: str, audio_path: str): with st.spinner("Generating waveform visualization..."): video_path = generate_waveform_video(tmp_vid_path, audio_path) + # st.video(str(video_path), autoplay=True) with open(video_path, "rb") as video_file: video_bytes = video_file.read() - st.video(video_bytes, autoplay=True) - # st.video(str(video_path), autoplay=True) + # st.video(video_bytes, autoplay=True) + st.markdown( + f""" + +
+ +
+ """, + unsafe_allow_html=True, + ) - return video_path + download_waveform_video(str(video_path)) except Exception as e: st.error(f"Error generating visualization: {str(e)}") @@ -65,5 +89,5 @@ def download_waveform_video(video_path: str): data=f, file_name="audio_visualization.mp4", mime="video/mp4", - use_container_width=True + use_container_width=True, ) From 753b3578b0821716afa5639b4c4aefbfb0cc4d82 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 19:01:17 +0000 Subject: [PATCH 21/26] allow toggling waveform visualizer --- pages/audiocast.py | 30 ++++++++++++------------ src/utils/render_audiocast_utils.py | 36 +++++++++++++++++++++++------ src/utils/waveform_utils.py | 26 +-------------------- 3 files changed, 45 insertions(+), 47 deletions(-) diff --git a/pages/audiocast.py b/pages/audiocast.py index 8292e47..0307cc1 100644 --- a/pages/audiocast.py +++ b/pages/audiocast.py @@ -21,31 +21,31 @@ async def render_audiocast_page(): # Display audiocast content st.title("🎧 Audiora") st.subheader("Share Page ") - st.markdown(f"#### Viewing audiocast: {session_id}") + st.markdown(f"##### Viewing audiocast: _{session_id}_") try: with st.spinner("Loading audiocast..."): audiocast = cast(GenerateAudiocastDict, get_audiocast(session_id)) - share_url = render_audiocast_handler(session_id, audiocast) + share_url = render_audiocast_handler(session_id, audiocast) - share_col, restart_row = st.columns(2, vertical_alignment="bottom") + share_col, restart_row = st.columns(2, vertical_alignment="bottom") - with share_col: - if st.button("Copy Share link", use_container_width=True): - pyperclip.copy(share_url) - st.session_state.show_copy_success = True + with share_col: + if st.button("Copy Share link", use_container_width=True): + pyperclip.copy(share_url) + st.session_state.show_copy_success = True - with restart_row: - if st.button("Create your Audiocast", use_container_width=True): - navigate_to_home() + with restart_row: + if st.button("Create your Audiocast", use_container_width=True): + navigate_to_home() - if st.session_state.get("show_copy_success", False): - st.session_state.show_copy_succes = False - st.success("Share link copied successfully!", icon="✅") + if st.session_state.get("show_copy_success", False): + st.session_state.show_copy_succes = False + st.success("Share link copied successfully!", icon="✅") - if audiocast["created_at"]: - st.markdown(f"> Created: {audiocast["created_at"]}") + if audiocast["created_at"]: + st.markdown(f"> Created: {audiocast["created_at"]}") except Exception as e: st.error(f"Error loading audiocast: {str(e)}") diff --git a/src/utils/render_audiocast_utils.py b/src/utils/render_audiocast_utils.py index 28a6d2f..857a2ee 100644 --- a/src/utils/render_audiocast_utils.py +++ b/src/utils/render_audiocast_utils.py @@ -29,18 +29,40 @@ def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict): # Audio player st.audio(audiocast["url"]) - # Create placeholder for visualization - with st.expander("Show Waveform Visualization"): - # with st.container(): - try: - render_waveform(session_id, audiocast["url"]) - except Exception as e: - st.error(f"Error rendering waveform: {str(e)}") + st.markdown("---") + + col1, _ = st.columns([4, 1]) + with col1: + + def toggle_show_waveform(): + st.session_state.show_waveform = not st.session_state.get("show_waveform") + + button_label = ( + "Hide Waveform Visualization" + if st.session_state.get("show_waveform") + else "Show Waveform Visualization" + ) + + st.button( + button_label, + on_click=toggle_show_waveform, + use_container_width=True, + ) + + if st.session_state.get("show_waveform"): + try: + render_waveform(session_id, audiocast["url"]) + except Exception as e: + st.error(f"Error rendering waveform: {str(e)}") + + st.markdown("---") # Transcript with st.expander("Show Transcript"): st.markdown(parse_ai_script(audiocast["script"])) + st.markdown("---") + # Metadata st.sidebar.subheader("Audiocast Source") st.sidebar.markdown(audiocast["source_content"]) diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py index 1e0f39d..7e98aac 100644 --- a/src/utils/waveform_utils.py +++ b/src/utils/waveform_utils.py @@ -48,31 +48,7 @@ def render_waveform(session_id: str, audio_path: str): # st.video(str(video_path), autoplay=True) with open(video_path, "rb") as video_file: video_bytes = video_file.read() - # st.video(video_bytes, autoplay=True) - st.markdown( - f""" - -
- -
- """, - unsafe_allow_html=True, - ) + st.video(video_bytes, autoplay=True) download_waveform_video(str(video_path)) except Exception as e: From c5b9ac1eba58fcf85aabed61b4e546531bb9e6e1 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 19:39:08 +0000 Subject: [PATCH 22/26] save waveform to gcs --- src/services/storage.py | 24 +++++++++++++++++------- src/utils/waveform_utils.py | 14 ++++++++++++++ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/src/services/storage.py b/src/services/storage.py index 1e14bca..8710062 100644 --- a/src/services/storage.py +++ b/src/services/storage.py @@ -20,13 +20,6 @@ def listBlobs(prefix): return [blob for blob in blobs] -def check_file_exists(root_path: str, filename: str): - """check if a file exists in the bucket""" - blobname = f"{root_path}/{filename}" - blobs = listBlobs(prefix=root_path) - return any(blob.name == blobname for blob in blobs) - - @dataclass class UploadItemParams: content_type: str @@ -35,6 +28,12 @@ class UploadItemParams: class StorageManager: + def check_blob_exists(self, root_path: str, filename: str): + """check if a file exists in the bucket""" + blobname = f"{root_path}/{filename}" + blobs = listBlobs(prefix=root_path) + return any(blob.name == blobname for blob in blobs) + def upload_to_gcs( self, item: str | Path | BytesIO, blobname: str, params: UploadItemParams ): @@ -66,6 +65,17 @@ def upload_audio_to_gcs(self, tmp_audio_path: str, filename=str(uuid4())): return f"gs://{BUCKET_NAME}/{blobname}" + def upload_video_to_gcs(self, tmp_video_path: str, filename=str(uuid4())): + """upload audio file to GCS""" + blobname = f"{BLOB_BASE_URI}/{filename}" + self.upload_to_gcs( + Path(tmp_video_path), + blobname, + UploadItemParams(content_type="video/mp4"), + ) + + return f"gs://{BUCKET_NAME}/{blobname}" + def download_from_gcs(self, filename: str): """ Download any item on GCS to disk diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py index 7e98aac..9da3f1c 100644 --- a/src/utils/waveform_utils.py +++ b/src/utils/waveform_utils.py @@ -6,6 +6,14 @@ from pydub import AudioSegment from seewav import visualize +from src.services.storage import BLOB_BASE_URI, StorageManager + + +def save_waveform_video_to_gcs(session_id: str, video_path: str): + """Ingest waveform visualization to GCS.""" + full_path = StorageManager().upload_video_to_gcs(video_path, f"{session_id}.mp4") + return full_path + def generate_waveform_video(output_path: Path, audio_path: str) -> Path: """Generate waveform video from audio file using SeeWav.""" @@ -39,11 +47,17 @@ def render_waveform(session_id: str, audio_path: str): video_path = tmp_vid_path except Exception: os.remove(tmp_vid_path) + else: + blobname = f"{session_id}.mp4" + exists = StorageManager().check_blob_exists(BLOB_BASE_URI, blobname) + if exists: + video_path = StorageManager().download_from_gcs(blobname) try: if not video_path: with st.spinner("Generating waveform visualization..."): video_path = generate_waveform_video(tmp_vid_path, audio_path) + save_waveform_video_to_gcs(session_id, str(video_path)) # st.video(str(video_path), autoplay=True) with open(video_path, "rb") as video_file: From 5cad8448e70faac99a33a0128aaa3191f4089ff5 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 19:42:19 +0000 Subject: [PATCH 23/26] reshuffle dependencies in requirements.txt --- requirements.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2169a7a..f18a5f7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ +pydantic + streamlit httpx asyncio @@ -6,12 +8,14 @@ openai anthropic elevenlabs -pyperclip python-multipart python-slugify python-dotenv +ffmpeg-python + pydub -pydantic +pyperclip +seewav firebase-admin google-auth @@ -19,8 +23,5 @@ google-cloud-storage google-api-python-client google-generativeai -ffmpeg-python -seewav watchdog - ruff \ No newline at end of file From 0a8b98f13241d5786acdd997bec6bfc4a423ad94 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 20:19:45 +0000 Subject: [PATCH 24/26] add pycairo to deps --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index f18a5f7..834ebf4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,7 @@ ffmpeg-python pydub pyperclip seewav +pycairo firebase-admin google-auth From bf69acff6bdb76aee78cacb283bdfde5db70b408 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 20:21:34 +0000 Subject: [PATCH 25/26] fix reference to pyproject.toml --- pyprojec.toml => pyproject.toml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pyprojec.toml => pyproject.toml (100%) diff --git a/pyprojec.toml b/pyproject.toml similarity index 100% rename from pyprojec.toml rename to pyproject.toml From e44aef6eec17683d359d05ca68d0c8c7d25a1a85 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Fri, 1 Nov 2024 20:27:50 +0000 Subject: [PATCH 26/26] add deps for cairo library --- Dockerfile | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index fa87d0f..ae23133 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,9 +8,14 @@ ENV PYTHONDONTWRITEBYTECODE 1 WORKDIR /app -# Install FFmpeg and any other required dependencies -RUN apt-get -yqq update && apt-get -yqq install build-essential ffmpeg && \ - rm -rf /var/lib/apt/lists/* +# Install FFmpeg, Cairo, and any other required dependencies +RUN apt-get -yqq update && apt-get -yqq install \ + build-essential \ + ffmpeg \ + libcairo2-dev \ + pkg-config \ + python3-dev \ + && rm -rf /var/lib/apt/lists/* COPY . ./