From 8664cbbfec2a64400da8c21fa5fb9e9a7ba10afb Mon Sep 17 00:00:00 2001
From: Chukwuma Nwaugha <nwaughac@gmail.com>
Date: Fri, 1 Nov 2024 20:37:25 +0000
Subject: [PATCH] Render audio spectogram (#6)

* add firestore_sdk ad session_manager

* save user chats on firestore

* pass down session_id for a deterministic workflow

* handle conversion of chat object to/fro a dict

* remove references to langchain

* reuse a previously downloaded audiofile if it's processable

* render audiocast metdata on share page

* cleanup

* temp remove audio_enchancement

* sanitize audiocast transcript

* add elevenlabs client

* add __text_to_speech_elevenlabs; cleanup

* use dry in text_to_speech

* only lint on python versions 3.11 and 3.12

* add write permission to deploy job for marocchino/sticky-pull-request-comment

* use eleven_multilingual_v2 model for improved stability, accuracy and quality

* Refactor audiocast page to include waveform visualization

* put waveform viz in an expander

* cleanup

* move download_waveform_video internal to render_waveform

* allow toggling waveform visualizer

* save waveform to gcs

* reshuffle dependencies in requirements.txt

* add pycairo to deps

* fix reference to pyproject.toml

* add deps for cairo library
---
 Dockerfile                          | 11 ++--
 pages/audiocast.py                  | 59 +++++++++-----------
 pyprojec.toml => pyproject.toml     |  0
 requirements.txt                    | 10 +++-
 src/services/storage.py             | 24 ++++++---
 src/utils/audio_to_video.py         | 38 +++++++++++++
 src/utils/render_audiocast.py       | 34 ++----------
 src/utils/render_audiocast_utils.py | 73 +++++++++++++++++++++++++
 src/utils/waveform_utils.py         | 83 +++++++++++++++++++++++++++++
 9 files changed, 256 insertions(+), 76 deletions(-)
 rename pyprojec.toml => pyproject.toml (100%)
 create mode 100644 src/utils/audio_to_video.py
 create mode 100644 src/utils/render_audiocast_utils.py
 create mode 100644 src/utils/waveform_utils.py

diff --git a/Dockerfile b/Dockerfile
index fa87d0f..ae23133 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -8,9 +8,14 @@ ENV PYTHONDONTWRITEBYTECODE 1
 
 WORKDIR /app
 
-# Install FFmpeg and any other required dependencies
-RUN apt-get -yqq update && apt-get -yqq install build-essential ffmpeg && \
-    rm -rf /var/lib/apt/lists/*
+# Install FFmpeg, Cairo, and any other required dependencies
+RUN apt-get -yqq update && apt-get -yqq install \
+    build-essential \
+    ffmpeg \
+    libcairo2-dev \
+    pkg-config \
+    python3-dev \
+    && rm -rf /var/lib/apt/lists/*
 
 COPY . ./
 
diff --git a/pages/audiocast.py b/pages/audiocast.py
index 326964b..0307cc1 100644
--- a/pages/audiocast.py
+++ b/pages/audiocast.py
@@ -1,17 +1,15 @@
 import asyncio
-from pathlib import Path
+from typing import cast
 
 import pyperclip
 import streamlit as st
 
-from src.env_var import APP_URL
 from src.utils.main_utils import get_audiocast
-from src.utils.render_audiocast import parse_ai_script
-
-
-def navigate_to_home():
-    main_script = str(Path(__file__).parent.parent / "app.py")
-    st.switch_page(main_script)
+from src.utils.render_audiocast_utils import (
+    GenerateAudiocastDict,
+    navigate_to_home,
+    render_audiocast_handler,
+)
 
 
 async def render_audiocast_page():
@@ -23,40 +21,31 @@ async def render_audiocast_page():
         # Display audiocast content
         st.title("🎧 Audiora")
         st.subheader("Share Page ")
-
-        st.markdown(f"#### Viewing audiocast: {session_id}")
+        st.markdown(f"##### Viewing audiocast: _{session_id}_")
 
         try:
             with st.spinner("Loading audiocast..."):
-                audiocast = get_audiocast(session_id)
-
-                # Audio player
-                st.audio(audiocast["url"])
-
-                # Transcript
-                with st.expander("Show Transcript"):
-                    st.markdown(parse_ai_script(audiocast["script"]))
+                audiocast = cast(GenerateAudiocastDict, get_audiocast(session_id))
 
-                # Metadata
-                st.sidebar.subheader("Audiocast Source")
-                st.sidebar.markdown(audiocast["source_content"])
+            share_url = render_audiocast_handler(session_id, audiocast)
 
-                share_url = f"{APP_URL}/audiocast?session_id={session_id}"
-                st.text_input("Share this audiocast:", share_url)
+            share_col, restart_row = st.columns(2, vertical_alignment="bottom")
 
-                share_col, restart_row = st.columns(2, vertical_alignment="bottom")
+            with share_col:
+                if st.button("Copy Share link", use_container_width=True):
+                    pyperclip.copy(share_url)
+                    st.session_state.show_copy_success = True
 
-                with share_col:
-                    if st.button("Copy Share link", use_container_width=True):
-                        pyperclip.copy(share_url)
-                        st.session_state.show_copy_success = True
+            with restart_row:
+                if st.button("Create your Audiocast", use_container_width=True):
+                    navigate_to_home()
 
-                with restart_row:
-                    if st.button("Create your Audiocast", use_container_width=True):
-                        navigate_to_home()
+            if st.session_state.get("show_copy_success", False):
+                st.session_state.show_copy_succes = False
+                st.success("Share link copied successfully!", icon="✅")
 
-                if audiocast["created_at"]:
-                    st.markdown(f"> Created: {audiocast["created_at"]}")
+            if audiocast["created_at"]:
+                st.markdown(f"> Created: {audiocast["created_at"]}")
 
         except Exception as e:
             st.error(f"Error loading audiocast: {str(e)}")
@@ -67,8 +56,8 @@ async def render_audiocast_page():
 
         st.markdown("---")
 
-        cola, _ = st.columns([3, 5])
-        with cola:
+        col1, _ = st.columns([3, 5])
+        with col1:
             if st.button("← Back to Home", use_container_width=True):
                 navigate_to_home()
 
diff --git a/pyprojec.toml b/pyproject.toml
similarity index 100%
rename from pyprojec.toml
rename to pyproject.toml
diff --git a/requirements.txt b/requirements.txt
index cf5d21f..834ebf4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
+pydantic
+
 streamlit
 httpx
 asyncio
@@ -6,12 +8,15 @@ openai
 anthropic
 elevenlabs
 
-pyperclip
 python-multipart
 python-slugify
 python-dotenv
-pydub
+ffmpeg-python
 
+pydub
+pyperclip
+seewav
+pycairo
 
 firebase-admin
 google-auth
@@ -19,4 +24,5 @@ google-cloud-storage
 google-api-python-client
 google-generativeai
 
+watchdog
 ruff
\ No newline at end of file
diff --git a/src/services/storage.py b/src/services/storage.py
index 1e14bca..8710062 100644
--- a/src/services/storage.py
+++ b/src/services/storage.py
@@ -20,13 +20,6 @@ def listBlobs(prefix):
     return [blob for blob in blobs]
 
 
-def check_file_exists(root_path: str, filename: str):
-    """check if a file exists in the bucket"""
-    blobname = f"{root_path}/{filename}"
-    blobs = listBlobs(prefix=root_path)
-    return any(blob.name == blobname for blob in blobs)
-
-
 @dataclass
 class UploadItemParams:
     content_type: str
@@ -35,6 +28,12 @@ class UploadItemParams:
 
 
 class StorageManager:
+    def check_blob_exists(self, root_path: str, filename: str):
+        """check if a file exists in the bucket"""
+        blobname = f"{root_path}/{filename}"
+        blobs = listBlobs(prefix=root_path)
+        return any(blob.name == blobname for blob in blobs)
+
     def upload_to_gcs(
         self, item: str | Path | BytesIO, blobname: str, params: UploadItemParams
     ):
@@ -66,6 +65,17 @@ def upload_audio_to_gcs(self, tmp_audio_path: str, filename=str(uuid4())):
 
         return f"gs://{BUCKET_NAME}/{blobname}"
 
+    def upload_video_to_gcs(self, tmp_video_path: str, filename=str(uuid4())):
+        """upload audio file to GCS"""
+        blobname = f"{BLOB_BASE_URI}/{filename}"
+        self.upload_to_gcs(
+            Path(tmp_video_path),
+            blobname,
+            UploadItemParams(content_type="video/mp4"),
+        )
+
+        return f"gs://{BUCKET_NAME}/{blobname}"
+
     def download_from_gcs(self, filename: str):
         """
         Download any item on GCS to disk
diff --git a/src/utils/audio_to_video.py b/src/utils/audio_to_video.py
new file mode 100644
index 0000000..4ae3fe8
--- /dev/null
+++ b/src/utils/audio_to_video.py
@@ -0,0 +1,38 @@
+import os
+import subprocess
+
+
+def create_video_from_audio(audio_path: str, image_path: str, output_path: str):
+    """Create a video with audio and spectrogram overlay."""
+    cmd = [
+        "ffmpeg",
+        "-y",
+        "-loop",
+        "1",
+        "-i",
+        image_path,
+        "-i",
+        audio_path,
+        "-c:v",
+        "libx264",
+        "-tune",
+        "stillimage",
+        "-c:a",
+        "aac",
+        "-b:a",
+        "192k",
+        "-pix_fmt",
+        "yuv420p",
+        "-shortest",
+        output_path,
+    ]
+
+    try:
+        subprocess.run(cmd, check=True)
+        os.remove(image_path)  # Clean up temporary spectrogram
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Error during video creation: {str(e)}")
+        return False
+    except Exception as e:
+        print(f"Error during video creation: {str(e)}")
diff --git a/src/utils/render_audiocast.py b/src/utils/render_audiocast.py
index 556070a..08227cf 100644
--- a/src/utils/render_audiocast.py
+++ b/src/utils/render_audiocast.py
@@ -1,25 +1,13 @@
-import re
-from typing import TypedDict
-
 import pyperclip
 import streamlit as st
 
-from src.env_var import APP_URL
+from src.utils.render_audiocast_utils import (
+    GenerateAudiocastDict,
+    render_audiocast_handler,
+)
 from src.utils.session_state import reset_session
 
 
-class GenerateAudiocastDict(TypedDict):
-    url: str
-    script: str
-    source_content: str
-    created_at: str | None
-
-
-def parse_ai_script(ai_script: str):
-    matches = re.findall(r"<(Speaker\d+)>(.*?)</Speaker\d+>", ai_script, re.DOTALL)
-    return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches])
-
-
 def render_audiocast(session_id: str):
     """
     Render the audiocast based on the user's preferences
@@ -28,19 +16,7 @@ def render_audiocast(session_id: str):
     st.markdown("#### Your Audiocast")
     current_audiocast: GenerateAudiocastDict = st.session_state.current_audiocast
 
-    # Audio player
-    st.audio(current_audiocast["url"])
-
-    # Transcript
-    with st.expander("Show Transcript"):
-        st.markdown(parse_ai_script(current_audiocast["script"]))
-
-    # Metadata
-    st.sidebar.subheader("Audiocast Source")
-    st.sidebar.markdown(current_audiocast["source_content"])
-
-    share_url = f"{APP_URL}/audiocast?session_id={session_id}"
-    st.text_input("Share this audiocast:", share_url)
+    share_url = render_audiocast_handler(session_id, current_audiocast)
 
     share_col, restart_row = st.columns(2, vertical_alignment="bottom")
 
diff --git a/src/utils/render_audiocast_utils.py b/src/utils/render_audiocast_utils.py
new file mode 100644
index 0000000..857a2ee
--- /dev/null
+++ b/src/utils/render_audiocast_utils.py
@@ -0,0 +1,73 @@
+import re
+from pathlib import Path
+from typing import TypedDict
+
+import streamlit as st
+
+from src.env_var import APP_URL
+from src.utils.waveform_utils import render_waveform
+
+
+def navigate_to_home():
+    main_script = str(Path(__file__).parent.parent / "app.py")
+    st.switch_page(main_script)
+
+
+def parse_ai_script(ai_script: str):
+    matches = re.findall(r"<(Speaker\d+)>(.*?)</Speaker\d+>", ai_script, re.DOTALL)
+    return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches])
+
+
+class GenerateAudiocastDict(TypedDict):
+    url: str
+    script: str
+    source_content: str
+    created_at: str | None
+
+
+def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict):
+    # Audio player
+    st.audio(audiocast["url"])
+
+    st.markdown("---")
+
+    col1, _ = st.columns([4, 1])
+    with col1:
+
+        def toggle_show_waveform():
+            st.session_state.show_waveform = not st.session_state.get("show_waveform")
+
+        button_label = (
+            "Hide Waveform Visualization"
+            if st.session_state.get("show_waveform")
+            else "Show Waveform Visualization"
+        )
+
+        st.button(
+            button_label,
+            on_click=toggle_show_waveform,
+            use_container_width=True,
+        )
+
+        if st.session_state.get("show_waveform"):
+            try:
+                render_waveform(session_id, audiocast["url"])
+            except Exception as e:
+                st.error(f"Error rendering waveform: {str(e)}")
+
+    st.markdown("---")
+
+    # Transcript
+    with st.expander("Show Transcript"):
+        st.markdown(parse_ai_script(audiocast["script"]))
+
+    st.markdown("---")
+
+    # Metadata
+    st.sidebar.subheader("Audiocast Source")
+    st.sidebar.markdown(audiocast["source_content"])
+
+    share_url = f"{APP_URL}/audiocast?session_id={session_id}"
+    st.text_input("Share this audiocast:", share_url)
+
+    return share_url
diff --git a/src/utils/waveform_utils.py b/src/utils/waveform_utils.py
new file mode 100644
index 0000000..9da3f1c
--- /dev/null
+++ b/src/utils/waveform_utils.py
@@ -0,0 +1,83 @@
+import os
+import tempfile
+from pathlib import Path
+
+import streamlit as st
+from pydub import AudioSegment
+from seewav import visualize
+
+from src.services.storage import BLOB_BASE_URI, StorageManager
+
+
+def save_waveform_video_to_gcs(session_id: str, video_path: str):
+    """Ingest waveform visualization to GCS."""
+    full_path = StorageManager().upload_video_to_gcs(video_path, f"{session_id}.mp4")
+    return full_path
+
+
+def generate_waveform_video(output_path: Path, audio_path: str) -> Path:
+    """Generate waveform video from audio file using SeeWav."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        visualize(
+            audio=Path(audio_path),
+            tmp=Path(temp_dir),
+            out=output_path,
+            bars=60,
+            speed=4,
+            time=0.4,
+            # rate=60,
+            size=(120, 68),
+            fg_color=(0.0, 1.0, 0.6),  # Bright green. Try 0.2 0.2 0.2 for dark green
+            bg_color=(0.05, 0.05, 0.05),  # Near black
+        )
+        return output_path
+
+
+def render_waveform(session_id: str, audio_path: str):
+    """Render waveform visualization from audio file."""
+    tmp_directory = Path("/tmp/audiora/waveforms")
+    tmp_directory.mkdir(parents=True, exist_ok=True)
+    tmp_vid_path = tmp_directory / f"{session_id}.mp4"
+
+    video_path = None
+    if os.path.exists(tmp_vid_path):
+        try:
+            mp4_version = AudioSegment.from_file(str(tmp_vid_path), "mp4")
+            if mp4_version.duration_seconds > 0:
+                video_path = tmp_vid_path
+        except Exception:
+            os.remove(tmp_vid_path)
+    else:
+        blobname = f"{session_id}.mp4"
+        exists = StorageManager().check_blob_exists(BLOB_BASE_URI, blobname)
+        if exists:
+            video_path = StorageManager().download_from_gcs(blobname)
+
+    try:
+        if not video_path:
+            with st.spinner("Generating waveform visualization..."):
+                video_path = generate_waveform_video(tmp_vid_path, audio_path)
+                save_waveform_video_to_gcs(session_id, str(video_path))
+
+        # st.video(str(video_path), autoplay=True)
+        with open(video_path, "rb") as video_file:
+            video_bytes = video_file.read()
+            st.video(video_bytes, autoplay=True)
+
+        download_waveform_video(str(video_path))
+    except Exception as e:
+        st.error(f"Error generating visualization: {str(e)}")
+
+
+def download_waveform_video(video_path: str):
+    """Download video with waveform"""
+    gen_video, _ = st.columns(2)
+    with gen_video:
+        with open(video_path, "rb") as f:
+            st.download_button(
+                label="Download Video with waveform",
+                data=f,
+                file_name="audio_visualization.mp4",
+                mime="video/mp4",
+                use_container_width=True,
+            )