From aab24cdd33e6f14019451892ad91da39f6920a94 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 13:27:34 +0000
Subject: [PATCH 01/26] add firestore_sdk ad session_manager

---
 src/services/firestore_sdk.py | 58 ++++++++++++++++++++++++++
 src/utils/session_manager.py  | 78 +++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 src/services/firestore_sdk.py
 create mode 100644 src/utils/session_manager.py

diff --git a/src/services/firestore_sdk.py b/src/services/firestore_sdk.py
new file mode 100644
index 0000000..4ca0e7b
--- /dev/null
+++ b/src/services/firestore_sdk.py
@@ -0,0 +1,58 @@
+import logging
+from typing import Dict, Literal
+
+from firebase_admin.firestore import client, firestore
+
+firestore_client = client()
+server_timestamp = firestore.SERVER_TIMESTAMP
+increment = firestore.Increment
+arrayUnion = firestore.ArrayUnion
+arrayRemove = firestore.ArrayRemove
+
+
+Collection = Literal["audiora_sessions", "audiora_audiocasts"]
+
+collections: Dict[Collection, Collection] = {
+    "audiora_sessions": "audiora_sessions",
+    "audiora_audiocasts": "audiora_audiocasts",
+}
+
+
+class DBManager:
+    def __init__(self, scope: str):
+        self.logger = logging.getLogger(scope)
+
+    @property
+    def timestamp(self):
+        return server_timestamp
+
+    def _get_collection(self, collection: Collection):
+        return firestore_client.collection(collections[collection])
+
+    def _create_document(self, collection: Collection, data: Dict):
+        return self._get_collection(collection).add(
+            {**data, "created_at": self.timestamp, "updated_at": self.timestamp}
+        )
+
+    def _set_document(self, collection: Collection, doc_id: str, data: Dict):
+        return (
+            self._get_collection(collection)
+            .document(doc_id)
+            .set({**data, "created_at": self.timestamp, "updated_at": self.timestamp})
+        )
+
+    def _update_document(self, collection: Collection, doc_id: str, data: Dict):
+        return (
+            self._get_collection(collection)
+            .document(doc_id)
+            .update({**data, "updated_at": self.timestamp})
+        )
+
+    def _delete_document(self, collection: Collection, doc_id: str):
+        return self._get_collection(collection).document(doc_id).delete()
+
+    def _get_document(self, collection: Collection, doc_id: str):
+        return self._get_collection(collection).document(doc_id).get()
+
+    def _get_documents(self, collection: Collection):
+        return self._get_collection(collection).stream()
diff --git a/src/utils/session_manager.py b/src/utils/session_manager.py
new file mode 100644
index 0000000..69c80ee
--- /dev/null
+++ b/src/utils/session_manager.py
@@ -0,0 +1,78 @@
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+
+from src.services.firestore_sdk import (
+    Collection,
+    DBManager,
+    arrayRemove,
+    arrayUnion,
+    collections,
+)
+from src.utils.chat_utils import SessionChatMessage
+
+
+@dataclass
+class ChatMetadata:
+    source: str
+    transcript: str
+
+
+@dataclass
+class SessionModel:
+    id: str
+    chats: List[SessionChatMessage]
+    metadata: Optional[ChatMetadata]
+
+
+class SessionManager(DBManager):
+    collection: Collection = collections["audiora_sessions"]
+
+    def __init__(self, session_id: str):
+        super().__init__(scope="ChatManager")
+
+        self.doc_id = session_id
+        session_doc = self._get_document(self.collection, self.doc_id)
+        # if the collection does not exist, create it
+        if not session_doc.exists:
+            payload = SessionModel(id=self.doc_id, chats=[], metadata=None)
+            self._set_document(self.collection, self.doc_id, payload.__dict__)
+
+    def _update(self, data: Dict):
+        return self._update_document(self.collection, self.doc_id, data)
+
+    def _update_source(self, source: str):
+        return self._update({"metadata.source": source})
+
+    def _update_transcript(self, transcript: str):
+        return self._update({"metadata.transcript": transcript})
+
+    def _add_chat(self, chat: SessionChatMessage):
+        return self._update_document(
+            self.collection, self.doc_id, {"chats": arrayUnion(chat)}
+        )
+
+    def _delete_chat(self, chat_id: str):
+        doc = self._get_document(self.collection, self.doc_id)
+        if not doc.exists:
+            return
+
+        chat_to_remove = [chat for chat in doc.get("chats") if chat.id == chat_id]
+        self._update_document(
+            self.collection,
+            self.doc_id,
+            {"chats": arrayRemove(chat_to_remove)},
+        )
+
+    def _get_chat(self, chat_id: str) -> SessionChatMessage | None:
+        doc = self._get_document(self.collection, self.doc_id)
+        if not doc.exists:
+            return None
+
+        return [chat for chat in doc.get("chats") if chat.id == chat_id][0]
+
+    def _get_chats(self) -> List[SessionChatMessage] | None:
+        doc = self._get_document(self.collection, self.doc_id)
+        if not doc.exists:
+            return None
+
+        return doc.get("chats")

From 4fe5a504c2a331c9a9fc075fe9af59b1c0a06442 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 13:29:59 +0000
Subject: [PATCH 02/26] save user chats on firestore

---
 src/utils/chat_utils.py      |  6 ++++--
 src/utils/main_utils.py      | 14 +++++---------
 src/utils/session_manager.py |  4 ++--
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/utils/chat_utils.py b/src/utils/chat_utils.py
index 6942bfb..e627b98 100644
--- a/src/utils/chat_utils.py
+++ b/src/utils/chat_utils.py
@@ -1,7 +1,8 @@
+import uuid
 from typing import Dict, List, Literal
 
 import streamlit as st
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 ContentCategory = Literal[
     "podcast",
@@ -49,8 +50,9 @@
 
 
 class SessionChatMessage(BaseModel):
-    role: Literal["user", "assistant"]
+    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
     content: str
+    role: Literal["user", "assistant"]
 
 
 class SessionChatRequest(BaseModel):
diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py
index ac75136..bb214e0 100644
--- a/src/utils/main_utils.py
+++ b/src/utils/main_utils.py
@@ -15,6 +15,7 @@
     SessionChatRequest,
     content_categories,
 )
+from src.utils.session_manager import SessionManager
 
 
 class GenerateAudioCastRequest(BaseModel):
@@ -36,21 +37,16 @@ class GenerateAudioCastResponse(BaseModel):
 def chat(session_id: str, request: SessionChatRequest):
     message = request.message
     content_category = request.content_category
+    db = SessionManager(session_id)
 
-    if session_id not in chat_sessions:
-        chat_sessions[session_id] = []
-
-    chat_sessions[session_id].append(message)
+    db._add_chat(message)
 
     def on_finish(text: str):
-        chat_sessions[session_id].append(
-            SessionChatMessage(role="assistant", content=text)
-        )
-        # log text and other metadata to database
+        db._add_chat(SessionChatMessage(role="assistant", content=text))
 
     generator = chat_request(
         content_category=content_category,
-        previous_messages=chat_sessions[session_id],
+        previous_messages=db._get_chats(),
         on_finish=on_finish,
     )
 
diff --git a/src/utils/session_manager.py b/src/utils/session_manager.py
index 69c80ee..96c4305 100644
--- a/src/utils/session_manager.py
+++ b/src/utils/session_manager.py
@@ -70,9 +70,9 @@ def _get_chat(self, chat_id: str) -> SessionChatMessage | None:
 
         return [chat for chat in doc.get("chats") if chat.id == chat_id][0]
 
-    def _get_chats(self) -> List[SessionChatMessage] | None:
+    def _get_chats(self) -> List[SessionChatMessage]:
         doc = self._get_document(self.collection, self.doc_id)
         if not doc.exists:
-            return None
+            return []
 
         return doc.get("chats")

From f988cc5f745565d428dee3937e3e37f575b20d51 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 13:45:31 +0000
Subject: [PATCH 03/26] pass down session_id for a deterministic workflow

---
 app.py                        |  6 ++---
 pages/audiocast.py            |  4 ++--
 src/uis/audioui.py            |  6 ++---
 src/uis/chatui.py             |  7 +++---
 src/utils/chat_thread.py      | 43 +++++++++++++++++++++++++++--------
 src/utils/main_utils.py       | 24 ++++++++-----------
 src/utils/render_audiocast.py |  5 ++--
 src/utils/session_state.py    |  2 ++
 8 files changed, 60 insertions(+), 37 deletions(-)

diff --git a/app.py b/app.py
index 7bfdf36..d8b0108 100644
--- a/app.py
+++ b/app.py
@@ -17,7 +17,7 @@ async def main():
     # Sidebar for content type selection
     st.sidebar.title("Audiocast Info")
 
-    init_session_state()
+    session_id = init_session_state()
 
     if st.session_state.content_category:
         st.sidebar.subheader(
@@ -32,9 +32,9 @@ async def main():
     uichat = st.empty()
     if not st.session_state.user_specification:
         with uichat.container():
-            await chatui(uichat)
+            await chatui(session_id, uichat)
     else:
-        await audioui(uichat)
+        await audioui(session_id, uichat)
 
 
 if __name__ == "__main__":
diff --git a/pages/audiocast.py b/pages/audiocast.py
index 1666a9c..0dc3f85 100644
--- a/pages/audiocast.py
+++ b/pages/audiocast.py
@@ -14,7 +14,7 @@ def navigate_to_home():
 async def render_audiocast_page():
     st.set_page_config(page_title="Audiora | Share Page", page_icon="🎧")
 
-    audiocast_id = st.query_params.get("uuid")
+    audiocast_id = st.query_params.get("session_id")
 
     if audiocast_id:
         # Display audiocast content
@@ -34,7 +34,7 @@ async def render_audiocast_page():
             st.error(f"Error loading audiocast: {str(e)}")
     else:
         st.warning(
-            "Audiocast ID is missing in the URL. Expected URL format: ?uuid=your-audiocast-id"
+            "Audiocast ID is missing in the URL. Expected URL format: ?session_id=your-audiocast-id"
         )
 
         st.markdown("---")
diff --git a/src/uis/audioui.py b/src/uis/audioui.py
index d3b8188..1680323 100644
--- a/src/uis/audioui.py
+++ b/src/uis/audioui.py
@@ -5,7 +5,7 @@
 from src.utils.render_audiocast import render_audiocast
 
 
-async def audioui(uichat: DeltaGenerator):
+async def audioui(session_id: str, uichat: DeltaGenerator):
     """
     Audiocast interface
     """
@@ -17,7 +17,7 @@ async def audioui(uichat: DeltaGenerator):
 
         summary = st.session_state.user_specification
         content_category = st.session_state.content_category
-        await use_audiocast_request(summary, content_category)
+        await use_audiocast_request(session_id, summary, content_category)
     else:
         st.info("Audiocast generation completed!")
-        render_audiocast()
+        render_audiocast(session_id)
diff --git a/src/uis/chatui.py b/src/uis/chatui.py
index 31bf729..02dcfe1 100644
--- a/src/uis/chatui.py
+++ b/src/uis/chatui.py
@@ -10,7 +10,7 @@
 from src.utils.render_chat import render_chat_history
 
 
-async def chatui(uichat: DeltaGenerator):
+async def chatui(session_id: str, uichat: DeltaGenerator):
     """
     Chat interface
     """
@@ -27,12 +27,13 @@ async def chatui(uichat: DeltaGenerator):
         content_category = st.session_state.content_category
 
         if st.session_state.example_prompt:
-            handle_example_prompt(content_category)
+            prompt = st.session_state.example_prompt
+            handle_example_prompt(session_id, prompt, content_category)
 
         if st.session_state.prompt:
             prompt = st.session_state.prompt
             st.session_state.prompt = None
-            ai_message = handle_user_prompt(prompt, content_category)
+            ai_message = handle_user_prompt(session_id, prompt, content_category)
 
             if isinstance(ai_message, str):
                 await evaluate_final_response(ai_message, content_category)
diff --git a/src/utils/chat_thread.py b/src/utils/chat_thread.py
index 904287a..78b3561 100644
--- a/src/utils/chat_thread.py
+++ b/src/utils/chat_thread.py
@@ -14,10 +14,14 @@
 termination_suffix = "Please click the button below to start generating the audiocast."
 
 
-def generate_stream_response(prompt: str, content_category: ContentCategory):
+def generate_stream_response(
+    session_id: str,
+    prompt: str,
+    content_category: ContentCategory,
+):
     with st.spinner("Generating response..."):
         response_generator = chat(
-            st.session_state.chat_session_id,
+            session_id,
             SessionChatRequest(
                 message=SessionChatMessage(role="user", content=prompt),
                 content_category=content_category,
@@ -27,12 +31,17 @@ def generate_stream_response(prompt: str, content_category: ContentCategory):
     return response_generator
 
 
-def handle_example_prompt(content_category: ContentCategory):
+def handle_example_prompt(
+    session_id: str,
+    prompt: str,
+    content_category: ContentCategory,
+):
     """Handle selected example prompt"""
-    prompt = st.session_state.example_prompt
 
     with st.chat_message("assistant"):
-        response_generator = generate_stream_response(prompt, content_category)
+        response_generator = generate_stream_response(
+            session_id, prompt, content_category
+        )
         ai_message = st.write_stream(response_generator)
         st.session_state.example_prompt = None
 
@@ -45,12 +54,20 @@ def handle_example_prompt(content_category: ContentCategory):
             st.error("Failed to generate AI response. Please try again.")
 
 
-def handle_user_prompt(prompt: str, content_category: ContentCategory):
+def handle_user_prompt(
+    session_id: str,
+    prompt: str,
+    content_category: ContentCategory,
+):
     """
     Handle user input prompt
     """
     with st.chat_message("assistant"):
-        response_generator = generate_stream_response(prompt, content_category)
+        response_generator = generate_stream_response(
+            session_id,
+            prompt,
+            content_category,
+        )
         ai_message = st.write_stream(response_generator)
 
         if ai_message:
@@ -110,7 +127,11 @@ def onclick(summary: str):
             st.rerun()
 
 
-async def use_audiocast_request(summary: str, content_category: ContentCategory):
+async def use_audiocast_request(
+    session_id: str,
+    summary: str,
+    content_category: ContentCategory,
+):
     """
     Call audiocast creating workflow
 
@@ -121,7 +142,11 @@ async def use_audiocast_request(summary: str, content_category: ContentCategory)
     try:
         with st.spinner("Generating your audiocast..."):
             audiocast_response = await generate_audiocast(
-                GenerateAudioCastRequest(summary=summary, category=content_category)
+                GenerateAudioCastRequest(
+                    sessionId=session_id,
+                    summary=summary,
+                    category=content_category,
+                )
             )
             print(f"Generate AudioCast Response: {audiocast_response}")
 
diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py
index bb214e0..d29d6d5 100644
--- a/src/utils/main_utils.py
+++ b/src/utils/main_utils.py
@@ -1,6 +1,4 @@
-import uuid
 from pathlib import Path
-from typing import Dict, List
 
 import streamlit as st
 from pydantic import BaseModel
@@ -19,21 +17,17 @@
 
 
 class GenerateAudioCastRequest(BaseModel):
+    sessionId: str
     summary: str
     category: str
 
 
 class GenerateAudioCastResponse(BaseModel):
-    uuid: str
     url: str
     script: str
     source_content: str
 
 
-# Store chat sessions (in-memory for now, should be moved to a database in production)
-chat_sessions: Dict[str, List[SessionChatMessage]] = {}
-
-
 def chat(session_id: str, request: SessionChatRequest):
     message = request.message
     content_category = request.content_category
@@ -57,8 +51,10 @@ async def generate_audiocast(request: GenerateAudioCastRequest):
     """
     Generate an audiocast based on a summary of user's request
     """
+    session_id = request.sessionId
     summary = request.summary
     category = request.category
+
     if category not in content_categories:
         raise Exception("Invalid content category")
 
@@ -93,21 +89,21 @@ async def generate_audiocast(request: GenerateAudioCastRequest):
         AudioSynthesizer().enhance_audio_minimal(Path(output_file))
         print(f"output_file: {output_file}")
 
-    # unique ID for the audiocast
-    uniq_id = str(uuid.uuid4())
-
     # TODO: Use a background service
     # STEP 4: Ingest audio file to a storage service (e.g., GCS, S3)
     with container.container():
         try:
             container.info("Storing a copy of your audiocast...")
             storage_manager = StorageManager()
-            storage_manager.upload_audio_to_gcs(output_file, uniq_id)
+            storage_manager.upload_audio_to_gcs(output_file, session_id)
         except Exception as e:
             print(f"Error while storing audiocast: {str(e)}")
 
+    db = SessionManager(session_id)
+    db._update_source(source_content)
+    db._update_transcript(audio_script)
+
     response = GenerateAudioCastResponse(
-        uuid=uniq_id,
         url=output_file,
         script=audio_script,
         source_content=source_content,
@@ -116,10 +112,10 @@ async def generate_audiocast(request: GenerateAudioCastRequest):
     return response.model_dump()
 
 
-def get_audiocast_uri(uuid: str):
+def get_audiocast_uri(session_id: str):
     """
     Get the URI for the audiocast
     """
     storage_manager = StorageManager()
-    filepath = storage_manager.download_from_gcs(uuid)
+    filepath = storage_manager.download_from_gcs(session_id)
     return filepath
diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py
index 5daa902..ac75489 100644
--- a/src/utils/render_audiocast.py
+++ b/src/utils/render_audiocast.py
@@ -8,13 +8,12 @@
 
 
 class GenerateAudiocastDict(TypedDict):
-    uuid: str
     url: str
     script: str
     source_content: str
 
 
-def render_audiocast():
+def render_audiocast(session_id: str):
     """
     Render the audiocast based on the user's preferences
     - Display current audiocast if available
@@ -33,7 +32,7 @@ def render_audiocast():
     st.sidebar.subheader("Audiocast Source")
     st.sidebar.markdown(current_audiocast["source_content"])
 
-    share_url = f"{APP_URL}/audiocast?uuid={current_audiocast['uuid']}"
+    share_url = f"{APP_URL}/audiocast?session_id={session_id}"
     st.text_input("Share this audiocast:", share_url)
 
     share_col, restart_row = st.columns(2, vertical_alignment="bottom")
diff --git a/src/utils/session_state.py b/src/utils/session_state.py
index 1386e1e..d19164d 100644
--- a/src/utils/session_state.py
+++ b/src/utils/session_state.py
@@ -33,6 +33,8 @@ def init_session_state():
     if "current_audiocast" not in st.session_state:
         st.session_state.current_audiocast = None
 
+    return cast(str, st.session_state.chat_session_id)
+
 
 def reset_session():
     """

From 95c3538c8c65317f8d6bd6bf0084a6075020ac1e Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 14:16:41 +0000
Subject: [PATCH 04/26] handle conversion of chat object to/fro a dict

---
 src/utils/main_utils.py      |  8 ++------
 src/utils/render_chat.py     |  2 +-
 src/utils/session_manager.py | 24 +++++++++++++++++++-----
 3 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py
index d29d6d5..c56ebe1 100644
--- a/src/utils/main_utils.py
+++ b/src/utils/main_utils.py
@@ -29,23 +29,19 @@ class GenerateAudioCastResponse(BaseModel):
 
 
 def chat(session_id: str, request: SessionChatRequest):
-    message = request.message
     content_category = request.content_category
     db = SessionManager(session_id)
-
-    db._add_chat(message)
+    db._add_chat(request.message)
 
     def on_finish(text: str):
         db._add_chat(SessionChatMessage(role="assistant", content=text))
 
-    generator = chat_request(
+    return chat_request(
         content_category=content_category,
         previous_messages=db._get_chats(),
         on_finish=on_finish,
     )
 
-    return generator
-
 
 async def generate_audiocast(request: GenerateAudioCastRequest):
     """
diff --git a/src/utils/render_chat.py b/src/utils/render_chat.py
index 2569b37..b891136 100644
--- a/src/utils/render_chat.py
+++ b/src/utils/render_chat.py
@@ -20,7 +20,7 @@ def on_value_change():
         with col1:
             st.selectbox(
                 "Select Content Category",
-                content_categories,
+                ["", *content_categories],
                 format_func=lambda x: x.title(),
                 key="selected_content_category",
                 on_change=on_value_change,
diff --git a/src/utils/session_manager.py b/src/utils/session_manager.py
index 96c4305..f1e71fb 100644
--- a/src/utils/session_manager.py
+++ b/src/utils/session_manager.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, cast
 
 from src.services.firestore_sdk import (
     Collection,
@@ -48,7 +48,7 @@ def _update_transcript(self, transcript: str):
 
     def _add_chat(self, chat: SessionChatMessage):
         return self._update_document(
-            self.collection, self.doc_id, {"chats": arrayUnion(chat)}
+            self.collection, self.doc_id, {"chats": arrayUnion([chat.__dict__])}
         )
 
     def _delete_chat(self, chat_id: str):
@@ -60,7 +60,7 @@ def _delete_chat(self, chat_id: str):
         self._update_document(
             self.collection,
             self.doc_id,
-            {"chats": arrayRemove(chat_to_remove)},
+            {"chats": arrayRemove([chat_to_remove.__dict__])},
         )
 
     def _get_chat(self, chat_id: str) -> SessionChatMessage | None:
@@ -68,11 +68,25 @@ def _get_chat(self, chat_id: str) -> SessionChatMessage | None:
         if not doc.exists:
             return None
 
-        return [chat for chat in doc.get("chats") if chat.id == chat_id][0]
+        item = [chat for chat in doc.get("chats") if chat.id == chat_id][0]
+        if item:
+            return SessionChatMessage(
+                content=item["content"],
+                id=item["id"],
+                role=item["role"],
+            )
 
     def _get_chats(self) -> List[SessionChatMessage]:
         doc = self._get_document(self.collection, self.doc_id)
         if not doc.exists:
             return []
 
-        return doc.get("chats")
+        chats = cast(Dict, doc.get("chats"))
+        return [
+            SessionChatMessage(
+                content=chat["content"],
+                id=chat["id"],
+                role=chat["role"],
+            )
+            for chat in chats
+        ]

From c458fe067ecd6a1f61e769e2c43121d7bf49afa8 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 15:22:54 +0000
Subject: [PATCH 05/26] remove references to langchain

---
 src/utils/content_generator.py | 75 ----------------------------------
 1 file changed, 75 deletions(-)
 delete mode 100644 src/utils/content_generator.py

diff --git a/src/utils/content_generator.py b/src/utils/content_generator.py
deleted file mode 100644
index 6f289ef..0000000
--- a/src/utils/content_generator.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from typing import Dict, List
-
-from langchain.chains import LLMChain
-from langchain.llms import OpenAI
-from langchain.prompts import PromptTemplate
-
-
-class ContentGenerator:
-    def __init__(self):
-        self.llm = OpenAI(temperature=0.7)
-        self.prompt_templates = {
-            "story": PromptTemplate(
-                input_variables=["query"],
-                template="""Create an engaging story about {query}. 
-                Make it captivating and suitable for audio narration. 
-                Include vivid descriptions and natural dialogue.""",
-            ),
-            "podcast": PromptTemplate(
-                input_variables=["query"],
-                template="""Create an informative podcast script about {query}.
-                Structure it like a professional podcast with clear sections,
-                engaging facts, and natural transitions.""",
-            ),
-            "sermon": PromptTemplate(
-                input_variables=["query"],
-                template="""Create an inspiring sermon about {query}.
-                Include spiritual insights, relevant scriptures,
-                and practical applications for daily life.""",
-            ),
-            "science": PromptTemplate(
-                input_variables=["query"],
-                template="""Create an educational scientific explanation about {query}.
-                Make it engaging and accessible while maintaining accuracy.
-                Include recent research and fascinating details.""",
-            ),
-        }
-
-    def generate_content(
-        self, query: str, content_category: str, chat_history: List[Dict]
-    ) -> str:
-        # Get the appropriate prompt template
-        prompt_template = self.prompt_templates.get(content_category)
-        if not prompt_template:
-            raise ValueError(f"Invalid content type: {content_category}")
-
-        # Create and run the chain
-        chain = LLMChain(llm=self.llm, prompt=prompt_template)
-        response = chain.run(query=query)
-
-        return response
-
-    def refine_with_chat_history(self, content: str, chat_history: List[Dict]) -> str:
-        # Use chat history to refine the content if needed
-        relevant_context = "\n".join(
-            [
-                f"{msg['role']}: {msg['content']}"
-                for msg in chat_history[-3:]  # Use last 3 messages for context
-            ]
-        )
-
-        refine_prompt = PromptTemplate(
-            input_variables=["content", "context"],
-            template="""Given this conversation context:
-            {context}
-            
-            Please refine this content to better match the user's needs:
-            {content}
-            
-            Refined content:""",
-        )
-
-        chain = LLMChain(llm=self.llm, prompt=refine_prompt)
-        refined_content = chain.run(content=content, context=relevant_context)
-
-        return refined_content

From 38585feef8e8ae5952132ba87b297255c7cf54f8 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 15:23:43 +0000
Subject: [PATCH 06/26] reuse a previously downloaded audiofile if it's
 processable

---
 src/services/storage.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/services/storage.py b/src/services/storage.py
index 1580a9c..1e14bca 100644
--- a/src/services/storage.py
+++ b/src/services/storage.py
@@ -1,3 +1,4 @@
+import os
 from dataclasses import dataclass
 from io import BytesIO
 from pathlib import Path
@@ -5,6 +6,7 @@
 from uuid import uuid4
 
 from google.cloud import storage
+from pydub import AudioSegment
 
 from src.env_var import BUCKET_NAME
 
@@ -70,8 +72,15 @@ def download_from_gcs(self, filename: str):
         """
         blobname = f"{BLOB_BASE_URI}/{filename}"
         blob = bucket.blob(blobname)
-        tmp_file_path = f"/tmp/{str(uuid4())}"
 
-        blob.download_to_filename(tmp_file_path)
+        tmp_file_path = f"/tmp/{filename}"
+        if os.path.exists(tmp_file_path):
+            try:
+                audio = AudioSegment.from_file(tmp_file_path)
+                if audio.duration_seconds > 0:
+                    return tmp_file_path
+            except Exception:
+                os.remove(tmp_file_path)
 
+        blob.download_to_filename(tmp_file_path)
         return tmp_file_path

From 658f6afa8596a9fc93a8948fdccf692e4fb2e30e Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 15:24:24 +0000
Subject: [PATCH 07/26] render audiocast metdata on share page

---
 pages/audiocast.py            | 47 +++++++++++++++++++++++++++--------
 src/utils/main_utils.py       | 29 +++++++++++++++++++--
 src/utils/render_audiocast.py |  1 +
 src/utils/session_manager.py  | 20 +++++++++++++++
 4 files changed, 85 insertions(+), 12 deletions(-)

diff --git a/pages/audiocast.py b/pages/audiocast.py
index 0dc3f85..cfc7a6f 100644
--- a/pages/audiocast.py
+++ b/pages/audiocast.py
@@ -1,9 +1,11 @@
 import asyncio
 from pathlib import Path
 
+import pyperclip
 import streamlit as st
 
-from src.utils.main_utils import get_audiocast_uri
+from src.env_var import APP_URL
+from src.utils.main_utils import get_audiocast
 
 
 def navigate_to_home():
@@ -14,21 +16,46 @@ def navigate_to_home():
 async def render_audiocast_page():
     st.set_page_config(page_title="Audiora | Share Page", page_icon="🎧")
 
-    audiocast_id = st.query_params.get("session_id")
+    session_id = st.query_params.get("session_id")
 
-    if audiocast_id:
+    if session_id:
         # Display audiocast content
-        st.title("🎧 Audiocast Player")
-        st.write(f"Playing audiocast: {audiocast_id}")
+        st.title("🎧 Audiora")
+        st.subheader("Share Page ")
+
+        st.markdown(f"#### Viewing audiocast: {session_id}")
 
         try:
             with st.spinner("Loading audiocast..."):
-                audio_path = get_audiocast_uri(audiocast_id)
-                st.audio(audio_path)
+                audiocast = get_audiocast(session_id)
+
+                # Audio player
+                st.audio(audiocast["url"])
+
+                # Transcript
+                with st.expander("Show Transcript"):
+                    st.write(audiocast["script"])
+
+                # Metadata
+                st.sidebar.subheader("Audiocast Source")
+                st.sidebar.markdown(audiocast["source_content"])
+
+                share_url = f"{APP_URL}/audiocast?session_id={session_id}"
+                st.text_input("Share this audiocast:", share_url)
+
+                share_col, restart_row = st.columns(2, vertical_alignment="bottom")
+
+                with share_col:
+                    if st.button("Copy Share link", use_container_width=True):
+                        pyperclip.copy(share_url)
+                        st.session_state.show_copy_success = True
+
+                with restart_row:
+                    if st.button("Create your Audiocast", use_container_width=True):
+                        navigate_to_home()
 
-                # TODO: Fetch audiocast metadata from the database
-                st.subheader("Audiocast Details")
-                st.write("Created: 2024-03-20")
+                if audiocast["created_at"]:
+                    st.markdown(f"> Created: {audiocast["created_at"]}")
 
         except Exception as e:
             st.error(f"Error loading audiocast: {str(e)}")
diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py
index c56ebe1..6083a40 100644
--- a/src/utils/main_utils.py
+++ b/src/utils/main_utils.py
@@ -1,3 +1,4 @@
+from datetime import datetime
 from pathlib import Path
 
 import streamlit as st
@@ -26,6 +27,7 @@ class GenerateAudioCastResponse(BaseModel):
     url: str
     script: str
     source_content: str
+    created_at: str | None
 
 
 def chat(session_id: str, request: SessionChatRequest):
@@ -103,15 +105,38 @@ async def generate_audiocast(request: GenerateAudioCastRequest):
         url=output_file,
         script=audio_script,
         source_content=source_content,
+        created_at=datetime.now().strftime("%Y-%m-%d %H:%M"),
     )
 
     return response.model_dump()
 
 
-def get_audiocast_uri(session_id: str):
+def get_audiocast(session_id: str):
     """
     Get the URI for the audiocast
     """
     storage_manager = StorageManager()
     filepath = storage_manager.download_from_gcs(session_id)
-    return filepath
+
+    session_data = SessionManager(session_id).data()
+    if not session_data:
+        raise Exception(f"Audiocast not found for session_id: {session_id}")
+
+    metadata = session_data.metadata
+    source = metadata.source if metadata else ""
+    transcript = metadata.transcript if metadata else ""
+
+    created_at: str | None = None
+    if session_data.created_at:
+        created_at = datetime.fromisoformat(session_data.created_at).strftime(
+            "%Y-%m-%d %H:%M"
+        )
+
+    response = GenerateAudioCastResponse(
+        url=filepath,
+        script=transcript,
+        source_content=source,
+        created_at=created_at,
+    )
+
+    return response.model_dump()
diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py
index ac75489..b7000e0 100644
--- a/src/utils/render_audiocast.py
+++ b/src/utils/render_audiocast.py
@@ -11,6 +11,7 @@ class GenerateAudiocastDict(TypedDict):
     url: str
     script: str
     source_content: str
+    created_at: str | None
 
 
 def render_audiocast(session_id: str):
diff --git a/src/utils/session_manager.py b/src/utils/session_manager.py
index f1e71fb..3d9fc94 100644
--- a/src/utils/session_manager.py
+++ b/src/utils/session_manager.py
@@ -22,6 +22,7 @@ class SessionModel:
     id: str
     chats: List[SessionChatMessage]
     metadata: Optional[ChatMetadata]
+    created_at: Optional[str] = None
 
 
 class SessionManager(DBManager):
@@ -40,6 +41,25 @@ def __init__(self, session_id: str):
     def _update(self, data: Dict):
         return self._update_document(self.collection, self.doc_id, data)
 
+    def data(self) -> SessionModel | None:
+        doc = self._get_document(self.collection, self.doc_id)
+
+        data = doc.to_dict()
+        if not doc.exists or not data:
+            return None
+
+        metadata = data["metadata"] or {}
+
+        return SessionModel(
+            id=data["id"],
+            chats=data["chats"],
+            metadata=ChatMetadata(
+                source=metadata.get("source", ""),
+                transcript=metadata.get("transcript", ""),
+            ),
+            created_at=str(data["created_at"]),
+        )
+
     def _update_source(self, source: str):
         return self._update({"metadata.source": source})
 

From 6e0dcb8c494f5acb67e81a91f734d13671932bb3 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 16:23:05 +0000
Subject: [PATCH 08/26] cleanup

---
 src/utils/audio_manager_utils.py | 8 +++-----
 tests/__init__.py                | 0
 2 files changed, 3 insertions(+), 5 deletions(-)
 create mode 100644 tests/__init__.py

diff --git a/src/utils/audio_manager_utils.py b/src/utils/audio_manager_utils.py
index 1c73551..fe30fdc 100644
--- a/src/utils/audio_manager_utils.py
+++ b/src/utils/audio_manager_utils.py
@@ -41,9 +41,7 @@ def __init__(self) -> None:
 
     def _create_voice_mapping(self, tags: List[str], voices: List[Any]):
         """Create mapping of tags to voices"""
-        available_voices = voices[: len(tags)]
-        if len(available_voices) < len(tags):
-            available_voices = list(islice(cycle(voices), len(tags)))
+        available_voices = list(islice(cycle(voices), len(tags)))
         return dict(zip(tags, available_voices))
 
     def _prepare_speech_jobs(
@@ -120,8 +118,8 @@ def split_content(self, content: str, tags: List[str]) -> List[Tuple[str, str]]:
         # Regular expression pattern to match Tag0, Tag1, ..., TagN speaker dialogues
         matches = re.findall(r"<(Speaker\d+)>(.*?)</Speaker\d+>", content, re.DOTALL)
         return [
-            (str(person), " ".join(content.split()).strip())
-            for person, content in matches
+            (str(speaker), " ".join(content_part.split()).strip())
+            for speaker, content_part in matches
         ]
 
     @staticmethod
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29

From 271c611a229ff2b4fe47ddc4c473fee7f4d625e6 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 16:23:53 +0000
Subject: [PATCH 09/26] temp remove audio_enchancement

---
 src/utils/main_utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py
index 6083a40..c9c3537 100644
--- a/src/utils/main_utils.py
+++ b/src/utils/main_utils.py
@@ -1,12 +1,12 @@
 from datetime import datetime
-from pathlib import Path
 
 import streamlit as st
 from pydantic import BaseModel
 
 from src.services.storage import StorageManager
 from src.utils.audio_manager import AudioManager
-from src.utils.audio_synthesizer import AudioSynthesizer
+
+# from src.utils.audio_synthesizer import AudioSynthesizer
 from src.utils.audiocast_request import AudioScriptMaker, generate_source_content
 from src.utils.chat_request import chat_request
 from src.utils.chat_utils import (
@@ -83,8 +83,8 @@ async def generate_audiocast(request: GenerateAudioCastRequest):
         container.info("Generating audio...")
         output_file = await AudioManager().generate_speech(audio_script)
 
-        container.info("Enhancing audio quality...")
-        AudioSynthesizer().enhance_audio_minimal(Path(output_file))
+        # container.info("Enhancing audio quality...")
+        # AudioSynthesizer().enhance_audio_minimal(Path(output_file))
         print(f"output_file: {output_file}")
 
     # TODO: Use a background service

From 3420a46f031abe2c87c88f1a6f096921c6e5e0ee Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 18:18:22 +0000
Subject: [PATCH 10/26] sanitize audiocast transcript

---
 pages/audiocast.py            | 3 ++-
 src/utils/render_audiocast.py | 8 +++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pages/audiocast.py b/pages/audiocast.py
index cfc7a6f..326964b 100644
--- a/pages/audiocast.py
+++ b/pages/audiocast.py
@@ -6,6 +6,7 @@
 
 from src.env_var import APP_URL
 from src.utils.main_utils import get_audiocast
+from src.utils.render_audiocast import parse_ai_script
 
 
 def navigate_to_home():
@@ -34,7 +35,7 @@ async def render_audiocast_page():
 
                 # Transcript
                 with st.expander("Show Transcript"):
-                    st.write(audiocast["script"])
+                    st.markdown(parse_ai_script(audiocast["script"]))
 
                 # Metadata
                 st.sidebar.subheader("Audiocast Source")
diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py
index b7000e0..556070a 100644
--- a/src/utils/render_audiocast.py
+++ b/src/utils/render_audiocast.py
@@ -1,3 +1,4 @@
+import re
 from typing import TypedDict
 
 import pyperclip
@@ -14,6 +15,11 @@ class GenerateAudiocastDict(TypedDict):
     created_at: str | None
 
 
+def parse_ai_script(ai_script: str):
+    matches = re.findall(r"<(Speaker\d+)>(.*?)</Speaker\d+>", ai_script, re.DOTALL)
+    return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches])
+
+
 def render_audiocast(session_id: str):
     """
     Render the audiocast based on the user's preferences
@@ -27,7 +33,7 @@ def render_audiocast(session_id: str):
 
     # Transcript
     with st.expander("Show Transcript"):
-        st.write(current_audiocast["script"])
+        st.markdown(parse_ai_script(current_audiocast["script"]))
 
     # Metadata
     st.sidebar.subheader("Audiocast Source")

From be5299f73803d041f9e5c60c7a0590dff8435e37 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 18:57:08 +0000
Subject: [PATCH 11/26] add elevenlabs client

---
 requirements.txt                  |  1 +
 src/services/elevenlabs_client.py | 11 +++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 src/services/elevenlabs_client.py

diff --git a/requirements.txt b/requirements.txt
index c2e38c3..cf5d21f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,7 @@ asyncio
 
 openai
 anthropic
+elevenlabs
 
 pyperclip
 python-multipart
diff --git a/src/services/elevenlabs_client.py b/src/services/elevenlabs_client.py
new file mode 100644
index 0000000..0aa9559
--- /dev/null
+++ b/src/services/elevenlabs_client.py
@@ -0,0 +1,11 @@
+from elevenlabs.client import ElevenLabs
+
+from src.env_var import ELEVENLABS_API_KEY
+
+client = ElevenLabs(
+    api_key=ELEVENLABS_API_KEY,
+)
+
+
+def get_elevenlabs_client():
+    return client

From 421dbcbfa55aeae99a8ba460eddffed3601e07c2 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 18:59:30 +0000
Subject: [PATCH 12/26] add __text_to_speech_elevenlabs; cleanup

---
 src/utils/audio_manager.py         | 43 +++++++++++++----
 src/utils/audio_manager_utils.py   | 43 ++++-------------
 src/utils/generate_speech_utils.py | 76 ++++++++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 42 deletions(-)
 create mode 100644 src/utils/generate_speech_utils.py

diff --git a/src/utils/audio_manager.py b/src/utils/audio_manager.py
index f07cf7b..6241037 100644
--- a/src/utils/audio_manager.py
+++ b/src/utils/audio_manager.py
@@ -4,16 +4,16 @@
 import re
 import uuid
 from pathlib import Path
-from typing import List, Optional
+from typing import List, Optional, Tuple
 
 from src.utils.audio_manager_utils import (
     AudioManagerConfig,
     AudioManagerSpeechGenerator,
     ContentSplitter,
-    openai_voices,
 )
 from src.utils.audio_synthesizer import AudioSynthesizer
 from src.utils.clean_tss_markup import clean_tss_markup
+from src.utils.generate_speech_utils import elevenlabs_voices, openai_voices
 
 logger = logging.getLogger(__name__)
 
@@ -57,23 +57,30 @@ async def text_to_speech(self, audio_script: str, output_file: str):
         tags = self._get_tags(audio_script)
         audio_script = clean_tss_markup(audio_script, tags)
 
+        nway_content = self.split_content(audio_script, tags)
+        print(f"nway_content: {nway_content}")
+
         if self.config.tts_provider == "openai":
-            return await self.__text_to_speech_openai(audio_script, output_file, tags)
+            return await self.__text_to_speech_openai(nway_content, output_file, tags)
+        elif self.config.tts_provider == "elevenlabs":
+            return await self.__text_to_speech_elevenlabs(
+                nway_content, output_file, tags
+            )
         else:
             raise Exception("Invalid TTS model specified")
 
     async def __text_to_speech_openai(
-        self, audio_script: str, output_file: str, tags: List[str]
+        self,
+        nway_content: List[Tuple[str, str]],
+        output_file: str,
+        tags: List[str],
     ):
         try:
-            nway_content = self.split_content(audio_script, tags)
-            print(f"nway_content: {nway_content}")
-
             jobs = self._prepare_speech_jobs(
                 nway_content, tags, openai_voices, self.config.temp_audio_dir
             )
 
-            audio_files = await self._process_speech_jobs(jobs)
+            audio_files = await self._process_speech_jobs(jobs, provider="openai")
             if not audio_files:
                 raise Exception("No audio files were generated")
 
@@ -83,6 +90,26 @@ async def __text_to_speech_openai(
         except Exception as e:
             raise Exception(f"Error converting text to speech with OpenAI: {str(e)}")
 
+    async def __text_to_speech_elevenlabs(
+        self, nway_content: List[Tuple[str, str]], output_file: str, tags: List[str]
+    ):
+        try:
+            jobs = self._prepare_speech_jobs(
+                nway_content, tags, elevenlabs_voices, self.config.temp_audio_dir
+            )
+
+            audio_files = await self._process_speech_jobs(jobs, provider="elevenlabs")
+            if not audio_files:
+                raise Exception("No audio files were generated")
+
+            await self.__finalize(audio_files, output_file)
+            logger.info(f"Audio saved to {output_file}")
+
+        except Exception as e:
+            raise Exception(
+                f"Error converting text to speech with Elevenlabs: {str(e)}"
+            )
+
     async def __finalize(
         self, audio_files: List[str], output_file: str, enhance_audio=False
     ) -> None:
diff --git a/src/utils/audio_manager_utils.py b/src/utils/audio_manager_utils.py
index fe30fdc..a27dbd5 100644
--- a/src/utils/audio_manager_utils.py
+++ b/src/utils/audio_manager_utils.py
@@ -6,26 +6,14 @@
 from functools import partial
 from itertools import cycle, islice
 from pathlib import Path
-from typing import Any, List, Literal, Optional, Tuple
+from typing import Any, List, Optional, Tuple
 
-from src.services.openai_client import get_openai
-
-OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"]
-openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"]
-
-
-@dataclass
-class SpeechJob:
-    content: str
-    voice: OpenaiVoice
-    output_file: str
-    tag: str
-    index: int
+from src.utils.generate_speech_utils import GenerateSpeech, SpeechJob, TTSProvider
 
 
 @dataclass
 class AudioManagerConfig:
-    tts_provider: Optional[Literal["openai"]] = "openai"
+    tts_provider: Optional[TTSProvider] = "openai"
     temp_audio_dir: str = field(default_factory=lambda: "/tmp/audiocast")
     outdir_base: str = field(default_factory=lambda: "/tmp/audiocast/output")
 
@@ -73,27 +61,14 @@ def _prepare_speech_jobs(
 
         return jobs
 
-    def _generate_speech(self, job: SpeechJob) -> str:
-        try:
-            response = get_openai().audio.speech.create(
-                input=job.content,
-                model="tts-1-hd",
-                voice=job.voice,
-            )
-
-            with open(job.output_file, "wb") as file:
-                file.write(response.content)
-
-            print(f"Generated speech for tag {job.tag} at index {job.index}")
-            return job.output_file
-        except Exception as e:
-            print(f"Failed to generate speech for tag {job.tag}: {str(e)}")
-            return ""
-
-    async def _process_speech_jobs(self, jobs: List[SpeechJob]) -> List[str]:
+    async def _process_speech_jobs(
+        self, jobs: List[SpeechJob], provider: TTSProvider
+    ) -> List[str]:
         loop = asyncio.get_event_loop()
         tasks = [
-            loop.run_in_executor(self.executor, partial(self._generate_speech, job))
+            loop.run_in_executor(
+                self.executor, partial(GenerateSpeech(provider).run, job)
+            )
             for job in jobs
         ]
 
diff --git a/src/utils/generate_speech_utils.py b/src/utils/generate_speech_utils.py
new file mode 100644
index 0000000..29d62cf
--- /dev/null
+++ b/src/utils/generate_speech_utils.py
@@ -0,0 +1,76 @@
+from dataclasses import dataclass
+from io import BytesIO
+from typing import List, Literal
+
+from elevenlabs import VoiceSettings
+
+from src.services.elevenlabs_client import get_elevenlabs_client
+from src.services.openai_client import get_openai
+
+TTSProvider = Literal["openai", "elevenlabs"]
+OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"]
+openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"]
+
+ElevenLabsVoice = Literal[
+    "Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam"
+]
+elevenlabs_voices = ["Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam"]
+
+
+@dataclass
+class SpeechJob:
+    content: str
+    voice: OpenaiVoice
+    output_file: str
+    tag: str
+    index: int
+
+
+class GenerateSpeech:
+    provider: TTSProvider
+
+    def __init__(self, provider: TTSProvider):
+        self.provider = provider
+
+    def run(self, job: SpeechJob):
+        """Generate speech using the specified provider"""
+        try:
+            content = (
+                self.__use_openai(job)
+                if self.provider == "elevenlabs"
+                else self.__use_elevenlabs(job)
+            )
+
+            with open(job.output_file, "wb") as file:
+                file.write(content)
+
+            print(f"Generated speech for tag {job.tag} at index {job.index}")
+            return job.output_file
+        except Exception as e:
+            print(f"Failed to generate speech for tag {job.tag}: {str(e)}")
+            return ""
+
+    def __use_openai(self, job: SpeechJob):
+        response = get_openai().audio.speech.create(
+            input=job.content, model="tts-1-hd", voice=job.voice
+        )
+        return response.content
+
+    def __use_elevenlabs(self, job: SpeechJob):
+        response = get_elevenlabs_client().text_to_speech.convert(
+            voice_id=job.voice,
+            output_format="mp3_22050_32",
+            text=job.content,
+            model_id="eleven_turbo_v2_5",  # use the turbo model for low latency
+            voice_settings=VoiceSettings(
+                stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True
+            ),
+        )
+
+        buffer = BytesIO()
+        for chunk in response:
+            if chunk:
+                buffer.write(chunk)
+
+        buffer.seek(0)
+        return buffer.getvalue()

From ccd7f124d94bbc37706f37a21fb6f599ede49bf3 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 19:03:46 +0000
Subject: [PATCH 13/26] use dry in text_to_speech

---
 src/utils/audio_manager.py | 41 +++++++++++++-------------------------
 1 file changed, 14 insertions(+), 27 deletions(-)

diff --git a/src/utils/audio_manager.py b/src/utils/audio_manager.py
index 6241037..e4ee35c 100644
--- a/src/utils/audio_manager.py
+++ b/src/utils/audio_manager.py
@@ -42,7 +42,6 @@ async def generate_speech(self, audio_script: str):
         """
         output_file = f"{self.config.outdir_base}/{str(uuid.uuid4())}.mp3"
         await self.text_to_speech(audio_script, output_file)
-
         return output_file
 
     async def text_to_speech(self, audio_script: str, output_file: str):
@@ -61,50 +60,38 @@ async def text_to_speech(self, audio_script: str, output_file: str):
         print(f"nway_content: {nway_content}")
 
         if self.config.tts_provider == "openai":
-            return await self.__text_to_speech_openai(nway_content, output_file, tags)
+            audio_files = await self.__text_to_speech_openai(nway_content, tags)
         elif self.config.tts_provider == "elevenlabs":
-            return await self.__text_to_speech_elevenlabs(
-                nway_content, output_file, tags
-            )
+            audio_files = await self.__text_to_speech_elevenlabs(nway_content, tags)
         else:
             raise Exception("Invalid TTS model specified")
 
+        if not audio_files:
+            raise Exception("No audio files were generated")
+
+        await self.__finalize(audio_files, output_file)
+        logger.info(f"Audio saved to {output_file}")
+
     async def __text_to_speech_openai(
-        self,
-        nway_content: List[Tuple[str, str]],
-        output_file: str,
-        tags: List[str],
-    ):
+        self, nway_content: List[Tuple[str, str]], tags: List[str]
+    ) -> List[str]:
         try:
             jobs = self._prepare_speech_jobs(
                 nway_content, tags, openai_voices, self.config.temp_audio_dir
             )
 
-            audio_files = await self._process_speech_jobs(jobs, provider="openai")
-            if not audio_files:
-                raise Exception("No audio files were generated")
-
-            await self.__finalize(audio_files, output_file)
-            logger.info(f"Audio saved to {output_file}")
-
+            return await self._process_speech_jobs(jobs, provider="openai")
         except Exception as e:
             raise Exception(f"Error converting text to speech with OpenAI: {str(e)}")
 
     async def __text_to_speech_elevenlabs(
-        self, nway_content: List[Tuple[str, str]], output_file: str, tags: List[str]
-    ):
+        self, nway_content: List[Tuple[str, str]], tags: List[str]
+    ) -> List[str]:
         try:
             jobs = self._prepare_speech_jobs(
                 nway_content, tags, elevenlabs_voices, self.config.temp_audio_dir
             )
-
-            audio_files = await self._process_speech_jobs(jobs, provider="elevenlabs")
-            if not audio_files:
-                raise Exception("No audio files were generated")
-
-            await self.__finalize(audio_files, output_file)
-            logger.info(f"Audio saved to {output_file}")
-
+            return await self._process_speech_jobs(jobs, provider="elevenlabs")
         except Exception as e:
             raise Exception(
                 f"Error converting text to speech with Elevenlabs: {str(e)}"

From ad76ef307e610bbc04cc006de153008407fb6534 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 19:11:33 +0000
Subject: [PATCH 14/26] only lint on python versions 3.11 and 3.12

---
 .github/workflows/deploy.yml | 11 +++++++----
 .github/workflows/ruff.yml   |  2 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 317f1f2..5b858cf 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -48,8 +48,9 @@ jobs:
     timeout-minutes: 5
     steps:
       - uses: actions/checkout@v4
-      - id: setup-python
-        uses: actions/setup-python@v5
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
         with:
           python-version: "3.12"
           cache: "pip" # caching pip dependencies
@@ -69,6 +70,8 @@ jobs:
     timeout-minutes: 10
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
       - uses: actions/setup-python@v5
         with:
           python-version: "3.12"
@@ -104,9 +107,9 @@ jobs:
       - run: curl -f "${{ steps.deploy.outputs.url }}"
       - uses: marocchino/sticky-pull-request-comment@v2
         with:
-          header: app
+          header: audiora
           message: |
-            app: ${{ steps.deploy.outputs.url }} (${{ github.event.pull_request.head.sha }})
+            audiora: ${{ steps.deploy.outputs.url }} (${{ github.event.pull_request.head.sha }})
 
   promote:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
index e921c07..b7791ba 100644
--- a/.github/workflows/ruff.yml
+++ b/.github/workflows/ruff.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.11", "3.12"]
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}

From f14f71d6f19f0d61313468fbc661136b89d6ce9d Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 19:24:41 +0000
Subject: [PATCH 15/26] add write permission to deploy job for
 marocchino/sticky-pull-request-comment

---
 .github/workflows/deploy.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 5b858cf..a99135b 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -68,6 +68,8 @@ jobs:
     runs-on: ubuntu-latest
     needs: [prepare, lint]
     timeout-minutes: 10
+    permissions: 
+      pull-requests: write 
     steps:
       - uses: actions/checkout@v4
         with:

From 162f8860a8e870650b8bd48d9f8532419867e92f Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Thu, 31 Oct 2024 20:36:49 +0000
Subject: [PATCH 16/26] use eleven_multilingual_v2 model for improved
 stability, accuracy and quality

---
 src/utils/audio_manager.py         |  2 +-
 src/utils/audio_manager_utils.py   | 10 ++++-
 src/utils/decorators.py            | 36 +++++++++++++++++
 src/utils/generate_speech_utils.py | 62 +++++++++++++++++++++---------
 src/utils/main_utils.py            | 10 ++---
 5 files changed, 93 insertions(+), 27 deletions(-)
 create mode 100644 src/utils/decorators.py

diff --git a/src/utils/audio_manager.py b/src/utils/audio_manager.py
index e4ee35c..848b162 100644
--- a/src/utils/audio_manager.py
+++ b/src/utils/audio_manager.py
@@ -55,8 +55,8 @@ async def text_to_speech(self, audio_script: str, output_file: str):
         """
         tags = self._get_tags(audio_script)
         audio_script = clean_tss_markup(audio_script, tags)
-
         nway_content = self.split_content(audio_script, tags)
+
         print(f"nway_content: {nway_content}")
 
         if self.config.tts_provider == "openai":
diff --git a/src/utils/audio_manager_utils.py b/src/utils/audio_manager_utils.py
index a27dbd5..e0e9be0 100644
--- a/src/utils/audio_manager_utils.py
+++ b/src/utils/audio_manager_utils.py
@@ -8,7 +8,13 @@
 from pathlib import Path
 from typing import Any, List, Optional, Tuple
 
-from src.utils.generate_speech_utils import GenerateSpeech, SpeechJob, TTSProvider
+from src.utils.generate_speech_utils import (
+    ElevenLabsVoice,
+    GenerateSpeech,
+    OpenaiVoice,
+    SpeechJob,
+    TTSProvider,
+)
 
 
 @dataclass
@@ -36,7 +42,7 @@ def _prepare_speech_jobs(
         self,
         nway_content: List[Tuple[str, str]],
         tags: List[str],
-        voices: List[Any],
+        voices: List[OpenaiVoice] | List[ElevenLabsVoice],
         temp_audio_dir: str,
     ):
         jobs: List[SpeechJob] = []
diff --git a/src/utils/decorators.py b/src/utils/decorators.py
new file mode 100644
index 0000000..25be2ad
--- /dev/null
+++ b/src/utils/decorators.py
@@ -0,0 +1,36 @@
+import asyncio
+from functools import wraps
+from time import time
+
+
+def process_time():
+    """Print process execution time for a given function"""
+
+    def decorator(func):
+        if asyncio.iscoroutinefunction(func):
+
+            @wraps(func)
+            async def async_wrapper(*args, **kwargs):
+                start_time = time()
+                response = await func(*args, **kwargs)
+
+                time_diff = f"{(time() - start_time):.2f}s"
+                print(f"Execution time for {func.__name__}: {time_diff}")
+
+                return response
+
+            return async_wrapper
+
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            start_time = time()
+            response = func(*args, **kwargs)
+
+            time_diff = f"{(time() - start_time):.2f}s"
+            print(f"Execution time for {func.__name__}: {time_diff}")
+
+            return response
+
+        return wrapper
+
+    return decorator
diff --git a/src/utils/generate_speech_utils.py b/src/utils/generate_speech_utils.py
index 29d62cf..6a35589 100644
--- a/src/utils/generate_speech_utils.py
+++ b/src/utils/generate_speech_utils.py
@@ -1,26 +1,44 @@
 from dataclasses import dataclass
 from io import BytesIO
-from typing import List, Literal
-
-from elevenlabs import VoiceSettings
+from typing import Dict, List, Literal
 
 from src.services.elevenlabs_client import get_elevenlabs_client
 from src.services.openai_client import get_openai
+from src.utils.decorators import process_time
 
 TTSProvider = Literal["openai", "elevenlabs"]
+
 OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"]
 openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"]
 
 ElevenLabsVoice = Literal[
     "Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam"
 ]
-elevenlabs_voices = ["Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam"]
+elevenlabs_voices: List[ElevenLabsVoice] = [
+    "Adam",
+    "Sarah",
+    "Laura",
+    "Charlie",
+    "George",
+    "Charlotte",
+    "Liam",
+]
+
+elevenlabs_voice_to_id: Dict[ElevenLabsVoice, str] = {
+    "Adam": "pNInz6obpgDQGcFmaJgB",
+    "Sarah": "EXAVITQu4vr4xnSDxMaL",
+    "Laura": "FGY2WhTYpPnrIDTdsKH5",
+    "Charlie": "IKne3meq5aSn9XLyUdCD",
+    "George": "JBFqnCBsd6RMkjVDRZzb",
+    "Charlotte": "XB0fDUnXU5powFXDhCwa",
+    "Liam": "TX3LPaxmHKxFdv7VOQHJ",
+}
 
 
 @dataclass
 class SpeechJob:
     content: str
-    voice: OpenaiVoice
+    voice: OpenaiVoice | ElevenLabsVoice
     output_file: str
     tag: str
     index: int
@@ -35,11 +53,10 @@ def __init__(self, provider: TTSProvider):
     def run(self, job: SpeechJob):
         """Generate speech using the specified provider"""
         try:
-            content = (
-                self.__use_openai(job)
-                if self.provider == "elevenlabs"
-                else self.__use_elevenlabs(job)
-            )
+            if self.provider == "elevenlabs":
+                content = self.__use_elevenlabs(job)
+            else:
+                content = self.__use_openai(job)
 
             with open(job.output_file, "wb") as file:
                 file.write(content)
@@ -47,24 +64,33 @@ def run(self, job: SpeechJob):
             print(f"Generated speech for tag {job.tag} at index {job.index}")
             return job.output_file
         except Exception as e:
-            print(f"Failed to generate speech for tag {job.tag}: {str(e)}")
+            print(f"Failed to generate speech for tag: {job.tag}. Error: {str(e)}")
             return ""
 
+    @process_time()
     def __use_openai(self, job: SpeechJob):
+        if job.voice not in openai_voices:
+            raise ValueError("Wrong voice specification for openai tts")
+
         response = get_openai().audio.speech.create(
             input=job.content, model="tts-1-hd", voice=job.voice
         )
         return response.content
 
+    @process_time()
     def __use_elevenlabs(self, job: SpeechJob):
-        response = get_elevenlabs_client().text_to_speech.convert(
-            voice_id=job.voice,
-            output_format="mp3_22050_32",
+        if job.voice not in elevenlabs_voices:
+            raise ValueError("Wrong voice specification for elevenlabs tts")
+        # response = get_elevenlabs_client().text_to_speech.convert(
+        #     model_id="eleven_turbo_v2_5", # use the turbo model for low latency
+        #     text=job.content,
+        #     voice_id=elevenlabs_voice_to_id[job.voice],
+        #     output_format="mp3_22050_32",
+        # )
+        response = get_elevenlabs_client().generate(
+            model="eleven_multilingual_v2",
             text=job.content,
-            model_id="eleven_turbo_v2_5",  # use the turbo model for low latency
-            voice_settings=VoiceSettings(
-                stability=0.0, similarity_boost=1.0, style=0.0, use_speaker_boost=True
-            ),
+            voice=job.voice,
         )
 
         buffer = BytesIO()
diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py
index c9c3537..8268538 100644
--- a/src/utils/main_utils.py
+++ b/src/utils/main_utils.py
@@ -4,9 +4,7 @@
 from pydantic import BaseModel
 
 from src.services.storage import StorageManager
-from src.utils.audio_manager import AudioManager
-
-# from src.utils.audio_synthesizer import AudioSynthesizer
+from src.utils.audio_manager import AudioManager, AudioManagerConfig
 from src.utils.audiocast_request import AudioScriptMaker, generate_source_content
 from src.utils.chat_request import chat_request
 from src.utils.chat_utils import (
@@ -81,10 +79,10 @@ async def generate_audiocast(request: GenerateAudioCastRequest):
     # STEP 3: Generate audio from the audio script
     with container.container():
         container.info("Generating audio...")
-        output_file = await AudioManager().generate_speech(audio_script)
+        output_file = await AudioManager(
+            custom_config=AudioManagerConfig(tts_provider="elevenlabs")
+        ).generate_speech(audio_script)
 
-        # container.info("Enhancing audio quality...")
-        # AudioSynthesizer().enhance_audio_minimal(Path(output_file))
         print(f"output_file: {output_file}")
 
     # TODO: Use a background service

From b5e7230c46274c89082884b7bd786b21c567dce0 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 17:09:57 +0000
Subject: [PATCH 17/26] Refactor audiocast page to include waveform
 visualization

---
 pages/audiocast.py          | 14 +++++++-
 requirements.txt            |  6 +++-
 src/utils/audio_to_video.py | 38 +++++++++++++++++++++
 src/utils/waveform_utils.py | 68 +++++++++++++++++++++++++++++++++++++
 4 files changed, 124 insertions(+), 2 deletions(-)
 create mode 100644 src/utils/audio_to_video.py
 create mode 100644 src/utils/waveform_utils.py

diff --git a/pages/audiocast.py b/pages/audiocast.py
index 326964b..ec9c8ed 100644
--- a/pages/audiocast.py
+++ b/pages/audiocast.py
@@ -7,6 +7,7 @@
 from src.env_var import APP_URL
 from src.utils.main_utils import get_audiocast
 from src.utils.render_audiocast import parse_ai_script
+from src.utils.waveform_utils import download_waveform_video, render_waveform
 
 
 def navigate_to_home():
@@ -23,13 +24,24 @@ async def render_audiocast_page():
         # Display audiocast content
         st.title("🎧 Audiora")
         st.subheader("Share Page ")
-
         st.markdown(f"#### Viewing audiocast: {session_id}")
 
         try:
             with st.spinner("Loading audiocast..."):
                 audiocast = get_audiocast(session_id)
 
+                # Create placeholder for visualization
+                if audiocast["url"]:
+                    viz = st.empty()
+                    with viz.container():
+                        try:
+                            video_path = render_waveform(session_id, audiocast["url"])
+                            if video_path:
+                                # Download video
+                                download_waveform_video(str(video_path))
+                        except Exception as e:
+                            st.error(f"Error rendering waveform: {str(e)}")
+
                 # Audio player
                 st.audio(audiocast["url"])
 
diff --git a/requirements.txt b/requirements.txt
index cf5d21f..2169a7a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,7 +11,7 @@ python-multipart
 python-slugify
 python-dotenv
 pydub
-
+pydantic
 
 firebase-admin
 google-auth
@@ -19,4 +19,8 @@ google-cloud-storage
 google-api-python-client
 google-generativeai
 
+ffmpeg-python
+seewav
+watchdog
+
 ruff
\ No newline at end of file
diff --git a/src/utils/audio_to_video.py b/src/utils/audio_to_video.py
new file mode 100644
index 0000000..4ae3fe8
--- /dev/null
+++ b/src/utils/audio_to_video.py
@@ -0,0 +1,38 @@
+import os
+import subprocess
+
+
+def create_video_from_audio(audio_path: str, image_path: str, output_path: str):
+    """Create a video with audio and spectrogram overlay."""
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-loop",
+        "1",
+        "-i",
+        image_path,
+        "-i",
+        audio_path,
+        "-c:v",
+        "libx264",
+        "-tune",
+        "stillimage",
+        "-c:a",
+        "aac",
+        "-b:a",
+        "192k",
+        "-pix_fmt",
+        "yuv420p",
+        "-shortest",
+        output_path,
+    ]
+
+    try:
+        subprocess.run(cmd, check=True)
+        os.remove(image_path)  # Clean up temporary spectrogram
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Error during video creation: {str(e)}")
+        return False
+    except Exception as e:
+        print(f"Error during video creation: {str(e)}")
diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py
new file mode 100644
index 0000000..828962c
--- /dev/null
+++ b/src/utils/waveform_utils.py
@@ -0,0 +1,68 @@
+import os
+import tempfile
+from pathlib import Path
+
+import streamlit as st
+from pydub import AudioSegment
+from seewav import visualize
+
+
+def generate_waveform_video(output_path: Path, audio_path: str) -> Path:
+    """Generate waveform video from audio file using SeeWav."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        visualize(
+            audio=Path(audio_path),
+            tmp=Path(temp_dir),
+            out=output_path,
+            bars=60,
+            speed=4,
+            time=0.4,
+            rate=60,
+            size=(200, 200),
+            fg_color=(0.0, 1.0, 0.6),  # Bright green. Try 0.2 0.2 0.2 for dark green
+            bg_color=(0.05, 0.05, 0.05),  # Near black
+        )
+        return output_path
+
+
+def render_waveform(session_id: str, audio_path: str):
+    """Render waveform visualization from audio file."""
+    tmp_directory = Path("/tmp/audiora/waveforms")
+    tmp_directory.mkdir(parents=True, exist_ok=True)
+    tmp_vid_path = tmp_directory / f"{session_id}.mp4"
+
+    video_path = None
+    if os.path.exists(tmp_vid_path):
+        try:
+            mp4_version = AudioSegment.from_file(str(tmp_vid_path), "mp4")
+            if mp4_version.duration_seconds > 0:
+                video_path = tmp_vid_path
+        except Exception:
+            os.remove(tmp_vid_path)
+
+    try:
+        if not video_path:
+            with st.spinner("Generating waveform visualization..."):
+                video_path = generate_waveform_video(tmp_vid_path, audio_path)
+
+        with open(video_path, "rb") as video_file:
+            video_bytes = video_file.read()
+            st.video(video_bytes, autoplay=True)
+            # st.video(str(video_path), autoplay=True)
+
+        return video_path
+    except Exception as e:
+        st.error(f"Error generating visualization: {str(e)}")
+
+
+def download_waveform_video(video_path: str):
+    """Download video with waveform"""
+    gen_video, _ = st.columns(2)
+    with gen_video:
+        with open(video_path, "rb") as f:
+            st.download_button(
+                label="Download Video with waveform",
+                data=f,
+                file_name="audio_visualization.mp4",
+                mime="video/mp4",
+            )

From ec95a252768ff6a5ce45076c85669da39c82a7a6 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 17:29:17 +0000
Subject: [PATCH 18/26] put waveform viz in an expander

---
 pages/audiocast.py            | 12 ++++++++----
 src/utils/render_audiocast.py | 13 +++++++++++++
 src/utils/waveform_utils.py   |  5 +++--
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/pages/audiocast.py b/pages/audiocast.py
index ec9c8ed..d5f60d3 100644
--- a/pages/audiocast.py
+++ b/pages/audiocast.py
@@ -30,8 +30,11 @@ async def render_audiocast_page():
             with st.spinner("Loading audiocast..."):
                 audiocast = get_audiocast(session_id)
 
+                # Audio player
+                st.audio(audiocast["url"])
+
                 # Create placeholder for visualization
-                if audiocast["url"]:
+                with st.expander("Show Waveform Visualization"):
                     viz = st.empty()
                     with viz.container():
                         try:
@@ -42,9 +45,6 @@ async def render_audiocast_page():
                         except Exception as e:
                             st.error(f"Error rendering waveform: {str(e)}")
 
-                # Audio player
-                st.audio(audiocast["url"])
-
                 # Transcript
                 with st.expander("Show Transcript"):
                     st.markdown(parse_ai_script(audiocast["script"]))
@@ -67,6 +67,10 @@ async def render_audiocast_page():
                     if st.button("Create your Audiocast", use_container_width=True):
                         navigate_to_home()
 
+                if st.session_state.get("show_copy_success", False):
+                    st.session_state.show_copy_succes = False
+                    st.success("Share link copied successfully!", icon="✅")
+
                 if audiocast["created_at"]:
                     st.markdown(f"> Created: {audiocast["created_at"]}")
 
diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py
index 556070a..0a2ec65 100644
--- a/src/utils/render_audiocast.py
+++ b/src/utils/render_audiocast.py
@@ -6,6 +6,7 @@
 
 from src.env_var import APP_URL
 from src.utils.session_state import reset_session
+from src.utils.waveform_utils import download_waveform_video, render_waveform
 
 
 class GenerateAudiocastDict(TypedDict):
@@ -31,6 +32,18 @@ def render_audiocast(session_id: str):
     # Audio player
     st.audio(current_audiocast["url"])
 
+    # Create placeholder for visualization
+    with st.expander("Show Waveform Visualization"):
+        viz = st.empty()
+        with viz.container():
+            try:
+                video_path = render_waveform(session_id, current_audiocast["url"])
+                if video_path:
+                    # Download video
+                    download_waveform_video(str(video_path))
+            except Exception as e:
+                st.error(f"Error rendering waveform: {str(e)}")
+
     # Transcript
     with st.expander("Show Transcript"):
         st.markdown(parse_ai_script(current_audiocast["script"]))
diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py
index 828962c..742c8b8 100644
--- a/src/utils/waveform_utils.py
+++ b/src/utils/waveform_utils.py
@@ -17,8 +17,8 @@ def generate_waveform_video(output_path: Path, audio_path: str) -> Path:
             bars=60,
             speed=4,
             time=0.4,
-            rate=60,
-            size=(200, 200),
+            # rate=60,
+            size=(120, 68),
             fg_color=(0.0, 1.0, 0.6),  # Bright green. Try 0.2 0.2 0.2 for dark green
             bg_color=(0.05, 0.05, 0.05),  # Near black
         )
@@ -65,4 +65,5 @@ def download_waveform_video(video_path: str):
                 data=f,
                 file_name="audio_visualization.mp4",
                 mime="video/mp4",
+                use_container_width=True
             )

From efedaa6a7644518ad7dee300abc0ade0cb98ef77 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 17:42:55 +0000
Subject: [PATCH 19/26] cleanup

---
 pages/audiocast.py                  | 49 ++++++-------------------
 src/utils/render_audiocast.py       | 47 +++---------------------
 src/utils/render_audiocast_utils.py | 55 +++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 80 deletions(-)
 create mode 100644 src/utils/render_audiocast_utils.py

diff --git a/pages/audiocast.py b/pages/audiocast.py
index d5f60d3..8292e47 100644
--- a/pages/audiocast.py
+++ b/pages/audiocast.py
@@ -1,18 +1,15 @@
 import asyncio
-from pathlib import Path
+from typing import cast
 
 import pyperclip
 import streamlit as st
 
-from src.env_var import APP_URL
 from src.utils.main_utils import get_audiocast
-from src.utils.render_audiocast import parse_ai_script
-from src.utils.waveform_utils import download_waveform_video, render_waveform
-
-
-def navigate_to_home():
-    main_script = str(Path(__file__).parent.parent / "app.py")
-    st.switch_page(main_script)
+from src.utils.render_audiocast_utils import (
+    GenerateAudiocastDict,
+    navigate_to_home,
+    render_audiocast_handler,
+)
 
 
 async def render_audiocast_page():
@@ -28,33 +25,9 @@ async def render_audiocast_page():
 
         try:
             with st.spinner("Loading audiocast..."):
-                audiocast = get_audiocast(session_id)
-
-                # Audio player
-                st.audio(audiocast["url"])
-
-                # Create placeholder for visualization
-                with st.expander("Show Waveform Visualization"):
-                    viz = st.empty()
-                    with viz.container():
-                        try:
-                            video_path = render_waveform(session_id, audiocast["url"])
-                            if video_path:
-                                # Download video
-                                download_waveform_video(str(video_path))
-                        except Exception as e:
-                            st.error(f"Error rendering waveform: {str(e)}")
-
-                # Transcript
-                with st.expander("Show Transcript"):
-                    st.markdown(parse_ai_script(audiocast["script"]))
-
-                # Metadata
-                st.sidebar.subheader("Audiocast Source")
-                st.sidebar.markdown(audiocast["source_content"])
-
-                share_url = f"{APP_URL}/audiocast?session_id={session_id}"
-                st.text_input("Share this audiocast:", share_url)
+                audiocast = cast(GenerateAudiocastDict, get_audiocast(session_id))
+
+                share_url = render_audiocast_handler(session_id, audiocast)
 
                 share_col, restart_row = st.columns(2, vertical_alignment="bottom")
 
@@ -83,8 +56,8 @@ async def render_audiocast_page():
 
         st.markdown("---")
 
-        cola, _ = st.columns([3, 5])
-        with cola:
+        col1, _ = st.columns([3, 5])
+        with col1:
             if st.button("← Back to Home", use_container_width=True):
                 navigate_to_home()
 
diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py
index 0a2ec65..08227cf 100644
--- a/src/utils/render_audiocast.py
+++ b/src/utils/render_audiocast.py
@@ -1,24 +1,11 @@
-import re
-from typing import TypedDict
-
 import pyperclip
 import streamlit as st
 
-from src.env_var import APP_URL
+from src.utils.render_audiocast_utils import (
+    GenerateAudiocastDict,
+    render_audiocast_handler,
+)
 from src.utils.session_state import reset_session
-from src.utils.waveform_utils import download_waveform_video, render_waveform
-
-
-class GenerateAudiocastDict(TypedDict):
-    url: str
-    script: str
-    source_content: str
-    created_at: str | None
-
-
-def parse_ai_script(ai_script: str):
-    matches = re.findall(r"<(Speaker\d+)>(.*?)</Speaker\d+>", ai_script, re.DOTALL)
-    return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches])
 
 
 def render_audiocast(session_id: str):
@@ -29,31 +16,7 @@ def render_audiocast(session_id: str):
     st.markdown("#### Your Audiocast")
     current_audiocast: GenerateAudiocastDict = st.session_state.current_audiocast
 
-    # Audio player
-    st.audio(current_audiocast["url"])
-
-    # Create placeholder for visualization
-    with st.expander("Show Waveform Visualization"):
-        viz = st.empty()
-        with viz.container():
-            try:
-                video_path = render_waveform(session_id, current_audiocast["url"])
-                if video_path:
-                    # Download video
-                    download_waveform_video(str(video_path))
-            except Exception as e:
-                st.error(f"Error rendering waveform: {str(e)}")
-
-    # Transcript
-    with st.expander("Show Transcript"):
-        st.markdown(parse_ai_script(current_audiocast["script"]))
-
-    # Metadata
-    st.sidebar.subheader("Audiocast Source")
-    st.sidebar.markdown(current_audiocast["source_content"])
-
-    share_url = f"{APP_URL}/audiocast?session_id={session_id}"
-    st.text_input("Share this audiocast:", share_url)
+    share_url = render_audiocast_handler(session_id, current_audiocast)
 
     share_col, restart_row = st.columns(2, vertical_alignment="bottom")
 
diff --git a/src/utils/render_audiocast_utils.py b/src/utils/render_audiocast_utils.py
new file mode 100644
index 0000000..3538c2c
--- /dev/null
+++ b/src/utils/render_audiocast_utils.py
@@ -0,0 +1,55 @@
+import re
+from pathlib import Path
+from typing import TypedDict
+
+import streamlit as st
+
+from src.env_var import APP_URL
+from src.utils.waveform_utils import download_waveform_video, render_waveform
+
+
+def navigate_to_home():
+    main_script = str(Path(__file__).parent.parent / "app.py")
+    st.switch_page(main_script)
+
+
+def parse_ai_script(ai_script: str):
+    matches = re.findall(r"<(Speaker\d+)>(.*?)</Speaker\d+>", ai_script, re.DOTALL)
+    return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches])
+
+
+class GenerateAudiocastDict(TypedDict):
+    url: str
+    script: str
+    source_content: str
+    created_at: str | None
+
+
+def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict):
+    # Audio player
+    st.audio(audiocast["url"])
+
+    # Create placeholder for visualization
+    with st.expander("Show Waveform Visualization"):
+        viz = st.empty()
+        with viz.container():
+            try:
+                video_path = render_waveform(session_id, audiocast["url"])
+                if video_path:
+                    # Download video
+                    download_waveform_video(str(video_path))
+            except Exception as e:
+                st.error(f"Error rendering waveform: {str(e)}")
+
+    # Transcript
+    with st.expander("Show Transcript"):
+        st.markdown(parse_ai_script(audiocast["script"]))
+
+    # Metadata
+    st.sidebar.subheader("Audiocast Source")
+    st.sidebar.markdown(audiocast["source_content"])
+
+    share_url = f"{APP_URL}/audiocast?session_id={session_id}"
+    st.text_input("Share this audiocast:", share_url)
+
+    return share_url

From d5308ba90e0ab163b4b92d3e7dc882bf400bc8c7 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 18:01:43 +0000
Subject: [PATCH 20/26] move download_waveform_video internal to
 render_waveform

---
 src/utils/render_audiocast_utils.py | 16 ++++++---------
 src/utils/waveform_utils.py         | 32 +++++++++++++++++++++++++----
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/utils/render_audiocast_utils.py b/src/utils/render_audiocast_utils.py
index 3538c2c..28a6d2f 100644
--- a/src/utils/render_audiocast_utils.py
+++ b/src/utils/render_audiocast_utils.py
@@ -5,7 +5,7 @@
 import streamlit as st
 
 from src.env_var import APP_URL
-from src.utils.waveform_utils import download_waveform_video, render_waveform
+from src.utils.waveform_utils import render_waveform
 
 
 def navigate_to_home():
@@ -31,15 +31,11 @@ def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict):
 
     # Create placeholder for visualization
     with st.expander("Show Waveform Visualization"):
-        viz = st.empty()
-        with viz.container():
-            try:
-                video_path = render_waveform(session_id, audiocast["url"])
-                if video_path:
-                    # Download video
-                    download_waveform_video(str(video_path))
-            except Exception as e:
-                st.error(f"Error rendering waveform: {str(e)}")
+        # with st.container():
+        try:
+            render_waveform(session_id, audiocast["url"])
+        except Exception as e:
+            st.error(f"Error rendering waveform: {str(e)}")
 
     # Transcript
     with st.expander("Show Transcript"):
diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py
index 742c8b8..1e0f39d 100644
--- a/src/utils/waveform_utils.py
+++ b/src/utils/waveform_utils.py
@@ -45,12 +45,36 @@ def render_waveform(session_id: str, audio_path: str):
             with st.spinner("Generating waveform visualization..."):
                 video_path = generate_waveform_video(tmp_vid_path, audio_path)
 
+        # st.video(str(video_path), autoplay=True)
         with open(video_path, "rb") as video_file:
             video_bytes = video_file.read()
-            st.video(video_bytes, autoplay=True)
-            # st.video(str(video_path), autoplay=True)
+            # st.video(video_bytes, autoplay=True)
+        st.markdown(
+            f"""
+            <style>
+            .video-container {{
+                position: relative;
+                width: 100%;
+                max-width: 640px; /* 16:9 aspect ratio for width */
+                height: 240px; /* Fixed height */
+            }}
+            .video-container video {{
+                width: 100%;
+                height: 100%;
+                object-fit: cover;
+                background-color: transparent; /* Set background to transparent */
+            }}
+            </style>
+            <div class="video-container">
+                <video autoplay loop muted playsinline style="border:none;">
+                    <source src="data:video/mp4;base64,{video_bytes.hex()}" type="video/mp4">
+                </video>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
 
-        return video_path
+        download_waveform_video(str(video_path))
     except Exception as e:
         st.error(f"Error generating visualization: {str(e)}")
 
@@ -65,5 +89,5 @@ def download_waveform_video(video_path: str):
                 data=f,
                 file_name="audio_visualization.mp4",
                 mime="video/mp4",
-                use_container_width=True
+                use_container_width=True,
             )

From 753b3578b0821716afa5639b4c4aefbfb0cc4d82 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 19:01:17 +0000
Subject: [PATCH 21/26] allow toggling waveform visualizer

---
 pages/audiocast.py                  | 30 ++++++++++++------------
 src/utils/render_audiocast_utils.py | 36 +++++++++++++++++++++++------
 src/utils/waveform_utils.py         | 26 +--------------------
 3 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/pages/audiocast.py b/pages/audiocast.py
index 8292e47..0307cc1 100644
--- a/pages/audiocast.py
+++ b/pages/audiocast.py
@@ -21,31 +21,31 @@ async def render_audiocast_page():
         # Display audiocast content
         st.title("🎧 Audiora")
         st.subheader("Share Page ")
-        st.markdown(f"#### Viewing audiocast: {session_id}")
+        st.markdown(f"##### Viewing audiocast: _{session_id}_")
 
         try:
             with st.spinner("Loading audiocast..."):
                 audiocast = cast(GenerateAudiocastDict, get_audiocast(session_id))
 
-                share_url = render_audiocast_handler(session_id, audiocast)
+            share_url = render_audiocast_handler(session_id, audiocast)
 
-                share_col, restart_row = st.columns(2, vertical_alignment="bottom")
+            share_col, restart_row = st.columns(2, vertical_alignment="bottom")
 
-                with share_col:
-                    if st.button("Copy Share link", use_container_width=True):
-                        pyperclip.copy(share_url)
-                        st.session_state.show_copy_success = True
+            with share_col:
+                if st.button("Copy Share link", use_container_width=True):
+                    pyperclip.copy(share_url)
+                    st.session_state.show_copy_success = True
 
-                with restart_row:
-                    if st.button("Create your Audiocast", use_container_width=True):
-                        navigate_to_home()
+            with restart_row:
+                if st.button("Create your Audiocast", use_container_width=True):
+                    navigate_to_home()
 
-                if st.session_state.get("show_copy_success", False):
-                    st.session_state.show_copy_succes = False
-                    st.success("Share link copied successfully!", icon="✅")
+            if st.session_state.get("show_copy_success", False):
+                st.session_state.show_copy_succes = False
+                st.success("Share link copied successfully!", icon="✅")
 
-                if audiocast["created_at"]:
-                    st.markdown(f"> Created: {audiocast["created_at"]}")
+            if audiocast["created_at"]:
+                st.markdown(f"> Created: {audiocast["created_at"]}")
 
         except Exception as e:
             st.error(f"Error loading audiocast: {str(e)}")
diff --git a/src/utils/render_audiocast_utils.py b/src/utils/render_audiocast_utils.py
index 28a6d2f..857a2ee 100644
--- a/src/utils/render_audiocast_utils.py
+++ b/src/utils/render_audiocast_utils.py
@@ -29,18 +29,40 @@ def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict):
     # Audio player
     st.audio(audiocast["url"])
 
-    # Create placeholder for visualization
-    with st.expander("Show Waveform Visualization"):
-        # with st.container():
-        try:
-            render_waveform(session_id, audiocast["url"])
-        except Exception as e:
-            st.error(f"Error rendering waveform: {str(e)}")
+    st.markdown("---")
+
+    col1, _ = st.columns([4, 1])
+    with col1:
+
+        def toggle_show_waveform():
+            st.session_state.show_waveform = not st.session_state.get("show_waveform")
+
+        button_label = (
+            "Hide Waveform Visualization"
+            if st.session_state.get("show_waveform")
+            else "Show Waveform Visualization"
+        )
+
+        st.button(
+            button_label,
+            on_click=toggle_show_waveform,
+            use_container_width=True,
+        )
+
+        if st.session_state.get("show_waveform"):
+            try:
+                render_waveform(session_id, audiocast["url"])
+            except Exception as e:
+                st.error(f"Error rendering waveform: {str(e)}")
+
+    st.markdown("---")
 
     # Transcript
     with st.expander("Show Transcript"):
         st.markdown(parse_ai_script(audiocast["script"]))
 
+    st.markdown("---")
+
     # Metadata
     st.sidebar.subheader("Audiocast Source")
     st.sidebar.markdown(audiocast["source_content"])
diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py
index 1e0f39d..7e98aac 100644
--- a/src/utils/waveform_utils.py
+++ b/src/utils/waveform_utils.py
@@ -48,31 +48,7 @@ def render_waveform(session_id: str, audio_path: str):
         # st.video(str(video_path), autoplay=True)
         with open(video_path, "rb") as video_file:
             video_bytes = video_file.read()
-            # st.video(video_bytes, autoplay=True)
-        st.markdown(
-            f"""
-            <style>
-            .video-container {{
-                position: relative;
-                width: 100%;
-                max-width: 640px; /* 16:9 aspect ratio for width */
-                height: 240px; /* Fixed height */
-            }}
-            .video-container video {{
-                width: 100%;
-                height: 100%;
-                object-fit: cover;
-                background-color: transparent; /* Set background to transparent */
-            }}
-            </style>
-            <div class="video-container">
-                <video autoplay loop muted playsinline style="border:none;">
-                    <source src="data:video/mp4;base64,{video_bytes.hex()}" type="video/mp4">
-                </video>
-            </div>
-            """,
-            unsafe_allow_html=True,
-        )
+            st.video(video_bytes, autoplay=True)
 
         download_waveform_video(str(video_path))
     except Exception as e:

From c5b9ac1eba58fcf85aabed61b4e546531bb9e6e1 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 19:39:08 +0000
Subject: [PATCH 22/26] save waveform to gcs

---
 src/services/storage.py     | 24 +++++++++++++++++-------
 src/utils/waveform_utils.py | 14 ++++++++++++++
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/services/storage.py b/src/services/storage.py
index 1e14bca..8710062 100644
--- a/src/services/storage.py
+++ b/src/services/storage.py
@@ -20,13 +20,6 @@ def listBlobs(prefix):
     return [blob for blob in blobs]
 
 
-def check_file_exists(root_path: str, filename: str):
-    """check if a file exists in the bucket"""
-    blobname = f"{root_path}/{filename}"
-    blobs = listBlobs(prefix=root_path)
-    return any(blob.name == blobname for blob in blobs)
-
-
 @dataclass
 class UploadItemParams:
     content_type: str
@@ -35,6 +28,12 @@ class UploadItemParams:
 
 
 class StorageManager:
+    def check_blob_exists(self, root_path: str, filename: str):
+        """check if a file exists in the bucket"""
+        blobname = f"{root_path}/{filename}"
+        blobs = listBlobs(prefix=root_path)
+        return any(blob.name == blobname for blob in blobs)
+
     def upload_to_gcs(
         self, item: str | Path | BytesIO, blobname: str, params: UploadItemParams
     ):
@@ -66,6 +65,17 @@ def upload_audio_to_gcs(self, tmp_audio_path: str, filename=str(uuid4())):
 
         return f"gs://{BUCKET_NAME}/{blobname}"
 
+    def upload_video_to_gcs(self, tmp_video_path: str, filename=str(uuid4())):
+        """upload audio file to GCS"""
+        blobname = f"{BLOB_BASE_URI}/{filename}"
+        self.upload_to_gcs(
+            Path(tmp_video_path),
+            blobname,
+            UploadItemParams(content_type="video/mp4"),
+        )
+
+        return f"gs://{BUCKET_NAME}/{blobname}"
+
     def download_from_gcs(self, filename: str):
         """
         Download any item on GCS to disk
diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py
index 7e98aac..9da3f1c 100644
--- a/src/utils/waveform_utils.py
+++ b/src/utils/waveform_utils.py
@@ -6,6 +6,14 @@
 from pydub import AudioSegment
 from seewav import visualize
 
+from src.services.storage import BLOB_BASE_URI, StorageManager
+
+
+def save_waveform_video_to_gcs(session_id: str, video_path: str):
+    """Ingest waveform visualization to GCS."""
+    full_path = StorageManager().upload_video_to_gcs(video_path, f"{session_id}.mp4")
+    return full_path
+
 
 def generate_waveform_video(output_path: Path, audio_path: str) -> Path:
     """Generate waveform video from audio file using SeeWav."""
@@ -39,11 +47,17 @@ def render_waveform(session_id: str, audio_path: str):
                 video_path = tmp_vid_path
         except Exception:
             os.remove(tmp_vid_path)
+    else:
+        blobname = f"{session_id}.mp4"
+        exists = StorageManager().check_blob_exists(BLOB_BASE_URI, blobname)
+        if exists:
+            video_path = StorageManager().download_from_gcs(blobname)
 
     try:
         if not video_path:
             with st.spinner("Generating waveform visualization..."):
                 video_path = generate_waveform_video(tmp_vid_path, audio_path)
+                save_waveform_video_to_gcs(session_id, str(video_path))
 
         # st.video(str(video_path), autoplay=True)
         with open(video_path, "rb") as video_file:

From 5cad8448e70faac99a33a0128aaa3191f4089ff5 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 19:42:19 +0000
Subject: [PATCH 23/26] reshuffle dependencies in requirements.txt

---
 requirements.txt | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 2169a7a..f18a5f7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
+pydantic
+
 streamlit
 httpx
 asyncio
@@ -6,12 +8,14 @@ openai
 anthropic
 elevenlabs
 
-pyperclip
 python-multipart
 python-slugify
 python-dotenv
+ffmpeg-python
+
 pydub
-pydantic
+pyperclip
+seewav
 
 firebase-admin
 google-auth
@@ -19,8 +23,5 @@ google-cloud-storage
 google-api-python-client
 google-generativeai
 
-ffmpeg-python
-seewav
 watchdog
-
 ruff
\ No newline at end of file

From 0a8b98f13241d5786acdd997bec6bfc4a423ad94 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 20:19:45 +0000
Subject: [PATCH 24/26] add pycairo to deps

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index f18a5f7..834ebf4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,6 +16,7 @@ ffmpeg-python
 pydub
 pyperclip
 seewav
+pycairo
 
 firebase-admin
 google-auth

From bf69acff6bdb76aee78cacb283bdfde5db70b408 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 20:21:34 +0000
Subject: [PATCH 25/26] fix reference to pyproject.toml

---
 pyprojec.toml => pyproject.toml | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename pyprojec.toml => pyproject.toml (100%)

diff --git a/pyprojec.toml b/pyproject.toml
similarity index 100%
rename from pyprojec.toml
rename to pyproject.toml

From e44aef6eec17683d359d05ca68d0c8c7d25a1a85 Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 20:27:50 +0000
Subject: [PATCH 26/26] add deps for cairo library

---
 Dockerfile | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index fa87d0f..ae23133 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,9 +8,14 @@ ENV PYTHONDONTWRITEBYTECODE 1
 
 WORKDIR /app
 
-# Install FFmpeg and any other required dependencies
-RUN apt-get -yqq update && apt-get -yqq install build-essential ffmpeg && \
-    rm -rf /var/lib/apt/lists/*
+# Install FFmpeg, Cairo, and any other required dependencies
+RUN apt-get -yqq update && apt-get -yqq install \
+    build-essential \
+    ffmpeg \
+    libcairo2-dev \
+    pkg-config \
+    python3-dev \
+    && rm -rf /var/lib/apt/lists/*
 
 COPY . ./