Skip to content

Commit

Permalink
Render audio spectogram (#6)
Browse files Browse the repository at this point in the history
* add firestore_sdk ad session_manager

* save user chats on firestore

* pass down session_id for a deterministic workflow

* handle conversion of chat object to/fro a dict

* remove references to langchain

* reuse a previously downloaded audiofile if it's processable

* render audiocast metdata on share page

* cleanup

* temp remove audio_enchancement

* sanitize audiocast transcript

* add elevenlabs client

* add __text_to_speech_elevenlabs; cleanup

* use dry in text_to_speech

* only lint on python versions 3.11 and 3.12

* add write permission to deploy job for marocchino/sticky-pull-request-comment

* use eleven_multilingual_v2 model for improved stability, accuracy and quality

* Refactor audiocast page to include waveform visualization

* put waveform viz in an expander

* cleanup

* move download_waveform_video internal to render_waveform

* allow toggling waveform visualizer

* save waveform to gcs

* reshuffle dependencies in requirements.txt

* add pycairo to deps

* fix reference to pyproject.toml

* add deps for cairo library
  • Loading branch information
nwaughachukwuma authored Nov 1, 2024
1 parent e8b0f4f commit 8664cbb
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 76 deletions.
11 changes: 8 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,14 @@ ENV PYTHONDONTWRITEBYTECODE 1

WORKDIR /app

# Install FFmpeg and any other required dependencies
RUN apt-get -yqq update && apt-get -yqq install build-essential ffmpeg && \
rm -rf /var/lib/apt/lists/*
# Install FFmpeg, Cairo, and any other required dependencies
RUN apt-get -yqq update && apt-get -yqq install \
build-essential \
ffmpeg \
libcairo2-dev \
pkg-config \
python3-dev \
&& rm -rf /var/lib/apt/lists/*

COPY . ./

Expand Down
59 changes: 24 additions & 35 deletions pages/audiocast.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
import asyncio
from pathlib import Path
from typing import cast

import pyperclip
import streamlit as st

from src.env_var import APP_URL
from src.utils.main_utils import get_audiocast
from src.utils.render_audiocast import parse_ai_script


def navigate_to_home():
main_script = str(Path(__file__).parent.parent / "app.py")
st.switch_page(main_script)
from src.utils.render_audiocast_utils import (
GenerateAudiocastDict,
navigate_to_home,
render_audiocast_handler,
)


async def render_audiocast_page():
Expand All @@ -23,40 +21,31 @@ async def render_audiocast_page():
# Display audiocast content
st.title("🎧 Audiora")
st.subheader("Share Page ")

st.markdown(f"#### Viewing audiocast: {session_id}")
st.markdown(f"##### Viewing audiocast: _{session_id}_")

try:
with st.spinner("Loading audiocast..."):
audiocast = get_audiocast(session_id)

# Audio player
st.audio(audiocast["url"])

# Transcript
with st.expander("Show Transcript"):
st.markdown(parse_ai_script(audiocast["script"]))
audiocast = cast(GenerateAudiocastDict, get_audiocast(session_id))

# Metadata
st.sidebar.subheader("Audiocast Source")
st.sidebar.markdown(audiocast["source_content"])
share_url = render_audiocast_handler(session_id, audiocast)

share_url = f"{APP_URL}/audiocast?session_id={session_id}"
st.text_input("Share this audiocast:", share_url)
share_col, restart_row = st.columns(2, vertical_alignment="bottom")

share_col, restart_row = st.columns(2, vertical_alignment="bottom")
with share_col:
if st.button("Copy Share link", use_container_width=True):
pyperclip.copy(share_url)
st.session_state.show_copy_success = True

with share_col:
if st.button("Copy Share link", use_container_width=True):
pyperclip.copy(share_url)
st.session_state.show_copy_success = True
with restart_row:
if st.button("Create your Audiocast", use_container_width=True):
navigate_to_home()

with restart_row:
if st.button("Create your Audiocast", use_container_width=True):
navigate_to_home()
if st.session_state.get("show_copy_success", False):
st.session_state.show_copy_succes = False
st.success("Share link copied successfully!", icon="✅")

if audiocast["created_at"]:
st.markdown(f"> Created: {audiocast["created_at"]}")
if audiocast["created_at"]:
st.markdown(f"> Created: {audiocast["created_at"]}")

except Exception as e:
st.error(f"Error loading audiocast: {str(e)}")
Expand All @@ -67,8 +56,8 @@ async def render_audiocast_page():

st.markdown("---")

cola, _ = st.columns([3, 5])
with cola:
col1, _ = st.columns([3, 5])
with col1:
if st.button("← Back to Home", use_container_width=True):
navigate_to_home()

Expand Down
File renamed without changes.
10 changes: 8 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
pydantic

streamlit
httpx
asyncio
Expand All @@ -6,17 +8,21 @@ openai
anthropic
elevenlabs

pyperclip
python-multipart
python-slugify
python-dotenv
pydub
ffmpeg-python

pydub
pyperclip
seewav
pycairo

firebase-admin
google-auth
google-cloud-storage
google-api-python-client
google-generativeai

watchdog
ruff
24 changes: 17 additions & 7 deletions src/services/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@ def listBlobs(prefix):
return [blob for blob in blobs]


def check_file_exists(root_path: str, filename: str):
"""check if a file exists in the bucket"""
blobname = f"{root_path}/{filename}"
blobs = listBlobs(prefix=root_path)
return any(blob.name == blobname for blob in blobs)


@dataclass
class UploadItemParams:
content_type: str
Expand All @@ -35,6 +28,12 @@ class UploadItemParams:


class StorageManager:
def check_blob_exists(self, root_path: str, filename: str):
"""check if a file exists in the bucket"""
blobname = f"{root_path}/{filename}"
blobs = listBlobs(prefix=root_path)
return any(blob.name == blobname for blob in blobs)

def upload_to_gcs(
self, item: str | Path | BytesIO, blobname: str, params: UploadItemParams
):
Expand Down Expand Up @@ -66,6 +65,17 @@ def upload_audio_to_gcs(self, tmp_audio_path: str, filename=str(uuid4())):

return f"gs://{BUCKET_NAME}/{blobname}"

def upload_video_to_gcs(self, tmp_video_path: str, filename=str(uuid4())):
"""upload audio file to GCS"""
blobname = f"{BLOB_BASE_URI}/{filename}"
self.upload_to_gcs(
Path(tmp_video_path),
blobname,
UploadItemParams(content_type="video/mp4"),
)

return f"gs://{BUCKET_NAME}/{blobname}"

def download_from_gcs(self, filename: str):
"""
Download any item on GCS to disk
Expand Down
38 changes: 38 additions & 0 deletions src/utils/audio_to_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import subprocess


def create_video_from_audio(audio_path: str, image_path: str, output_path: str):
"""Create a video with audio and spectrogram overlay."""
cmd = [
"ffmpeg",
"-y",
"-loop",
"1",
"-i",
image_path,
"-i",
audio_path,
"-c:v",
"libx264",
"-tune",
"stillimage",
"-c:a",
"aac",
"-b:a",
"192k",
"-pix_fmt",
"yuv420p",
"-shortest",
output_path,
]

try:
subprocess.run(cmd, check=True)
os.remove(image_path) # Clean up temporary spectrogram
return True
except subprocess.CalledProcessError as e:
print(f"Error during video creation: {str(e)}")
return False
except Exception as e:
print(f"Error during video creation: {str(e)}")
34 changes: 5 additions & 29 deletions src/utils/render_audiocast.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,13 @@
import re
from typing import TypedDict

import pyperclip
import streamlit as st

from src.env_var import APP_URL
from src.utils.render_audiocast_utils import (
GenerateAudiocastDict,
render_audiocast_handler,
)
from src.utils.session_state import reset_session


class GenerateAudiocastDict(TypedDict):
url: str
script: str
source_content: str
created_at: str | None


def parse_ai_script(ai_script: str):
matches = re.findall(r"<(Speaker\d+)>(.*?)</Speaker\d+>", ai_script, re.DOTALL)
return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches])


def render_audiocast(session_id: str):
"""
Render the audiocast based on the user's preferences
Expand All @@ -28,19 +16,7 @@ def render_audiocast(session_id: str):
st.markdown("#### Your Audiocast")
current_audiocast: GenerateAudiocastDict = st.session_state.current_audiocast

# Audio player
st.audio(current_audiocast["url"])

# Transcript
with st.expander("Show Transcript"):
st.markdown(parse_ai_script(current_audiocast["script"]))

# Metadata
st.sidebar.subheader("Audiocast Source")
st.sidebar.markdown(current_audiocast["source_content"])

share_url = f"{APP_URL}/audiocast?session_id={session_id}"
st.text_input("Share this audiocast:", share_url)
share_url = render_audiocast_handler(session_id, current_audiocast)

share_col, restart_row = st.columns(2, vertical_alignment="bottom")

Expand Down
73 changes: 73 additions & 0 deletions src/utils/render_audiocast_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import re
from pathlib import Path
from typing import TypedDict

import streamlit as st

from src.env_var import APP_URL
from src.utils.waveform_utils import render_waveform


def navigate_to_home():
main_script = str(Path(__file__).parent.parent / "app.py")
st.switch_page(main_script)


def parse_ai_script(ai_script: str):
matches = re.findall(r"<(Speaker\d+)>(.*?)</Speaker\d+>", ai_script, re.DOTALL)
return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches])


class GenerateAudiocastDict(TypedDict):
url: str
script: str
source_content: str
created_at: str | None


def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict):
# Audio player
st.audio(audiocast["url"])

st.markdown("---")

col1, _ = st.columns([4, 1])
with col1:

def toggle_show_waveform():
st.session_state.show_waveform = not st.session_state.get("show_waveform")

button_label = (
"Hide Waveform Visualization"
if st.session_state.get("show_waveform")
else "Show Waveform Visualization"
)

st.button(
button_label,
on_click=toggle_show_waveform,
use_container_width=True,
)

if st.session_state.get("show_waveform"):
try:
render_waveform(session_id, audiocast["url"])
except Exception as e:
st.error(f"Error rendering waveform: {str(e)}")

st.markdown("---")

# Transcript
with st.expander("Show Transcript"):
st.markdown(parse_ai_script(audiocast["script"]))

st.markdown("---")

# Metadata
st.sidebar.subheader("Audiocast Source")
st.sidebar.markdown(audiocast["source_content"])

share_url = f"{APP_URL}/audiocast?session_id={session_id}"
st.text_input("Share this audiocast:", share_url)

return share_url
Loading

0 comments on commit 8664cbb

Please sign in to comment.