From ca62856c575222bb0922d1acbe412208a70f5bcd Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Wed, 6 Nov 2024 13:04:16 +0000 Subject: [PATCH] Move business logic to fastapi (#7) * add firestore_sdk ad session_manager * save user chats on firestore * pass down session_id for a deterministic workflow * handle conversion of chat object to/fro a dict * remove references to langchain * reuse a previously downloaded audiofile if it's processable * render audiocast metdata on share page * cleanup * temp remove audio_enchancement * sanitize audiocast transcript * add elevenlabs client * add __text_to_speech_elevenlabs; cleanup * use dry in text_to_speech * only lint on python versions 3.11 and 3.12 * add write permission to deploy job for marocchino/sticky-pull-request-comment * use eleven_multilingual_v2 model for improved stability, accuracy and quality * Refactor audiocast page to include waveform visualization * put waveform viz in an expander * cleanup * move download_waveform_video internal to render_waveform * allow toggling waveform visualizer * save waveform to gcs * reshuffle dependencies in requirements.txt * add pycairo to deps * add a server app * add header and process_time middlewares * make utils a shared folder * use root level services in app dir * write an improved init_project workflow * move streamlit related packages into app folder * convert app.src.utils into a module * install setup tools and create a setup file for utils_pkg * remove streamlit content from chat_utils * cleanup remnants * more cleanup * update package imports and initialization logic for utils_pkg * hit the backend in for chat workflow in generate_stream_response * add generate_audiocast endpoint handler * create a handler for get_audiocast by session_id * abstract waveform_utils; save wave_form on_generate audiocast * hit the server for generate_audiocast and get_audiocast * abstract audiocast inteface definitions * move audiocast_request to server * remove unused artifacts * save and update metadata.info * Remove audiocast page and add docstrings to session manager methods * revise github actions workflow files * add deploy_server.yml * rename deploy server workflow to reflect server deployment * upgrade to the latest version of pip * cleanup * remove the underscore in the service name * remove unused env variables * cleanup * add dockerfile for deploying the server * populate a dockerignore file on the fly * remove unused context from gcloudignore * localize the .*ignore files * move the dockerfile to root project pre-deploy step * specify corret path to streamlit index * Add debug output for Docker and gcloud ignore files; set no_traffic to false for deployment * add a gunicorn config file * rename server to api * rename all footprints of server to api * refactor: update workflow names and paths for cloudrun deployment * bump version to 1.0.1 and update project description * move chat_request to api utils * move api only utils into api folder * rename utils_pkg to shared_utils_pkg * add pre-commit config * refactor: clean up sidebar and improve metadata subscription handling * feat: add Streamlit configuration for browser and client settings * log the project_id in init_admin_sdk * remove reference_code from .gitignore --- .env.example | 3 +- .github/workflows/deploy_api.yml | 138 +++++++++++++++++ .../workflows/{deploy.yml => deploy_app.yml} | 46 ++++-- .gitignore | 7 +- .pre-commit-config.yaml | 18 +++ README.md | 1 + api/.dockerignore | 5 + api/.gcloudignore | 5 + api/Dockerfile | 34 +++++ api/gunicorn_config.py | 3 + api/src/__init__.py | 35 +++++ api/src/main.py | 89 +++++++++++ {src/services => api/src/utils}/__init__.py | 0 {src => api/src}/utils/audio_manager.py | 34 +---- {src => api/src}/utils/audio_manager_utils.py | 25 +--- {src => api/src}/utils/audio_synthesizer.py | 0 {src => api/src}/utils/audio_to_video.py | 0 {src => api/src}/utils/audiocast_request.py | 26 +--- {src => api/src}/utils/chat_request.py | 5 +- {src => api/src}/utils/clean_tss_markup.py | 0 api/src/utils/generate_audiocast.py | 79 ++++++++++ .../src}/utils/generate_speech_utils.py | 14 +- api/src/utils/get_audiocast.py | 41 +++++ .../src/utils/prompt_templates}/__init__.py | 0 .../prompt_templates/source_content_prompt.py | 2 +- .../prompt_templates/streamline_audio.py | 0 .../src}/utils/prompt_templates/tts_prompt.py | 2 +- {src => api/tests}/__init__.py | 2 +- {tests => api/tests}/test_speech_gen.py | 0 app/.dockerignore | 5 + app/.gcloudignore | 5 + app/.streamlit/config.toml | 6 + Dockerfile => app/Dockerfile | 3 +- .../prompt_templates => app}/__init__.py | 0 app/_init_project.py | 30 ++++ app.py => app/index.py | 22 ++- {pages => app/pages}/audiocast.py | 22 +-- {tests => app/src}/__init__.py | 0 app/src/utils/__init__.py | 0 {src => app/src}/utils/chat_thread.py | 42 +++--- {src => app/src}/utils/custom_components.py | 0 app/src/utils/display_example_cards.py | 43 ++++++ app/src/utils/metadata_subscription.py | 29 ++++ {src => app/src}/utils/render_audiocast.py | 0 app/src/utils/render_audiocast_utils.py | 78 ++++++++++ {src => app/src}/utils/render_chat.py | 3 +- .../src/utils/render_waveform.py | 39 +---- {src => app/src}/utils/session_state.py | 3 +- app/uis/__init__.py | 0 {src => app}/uis/audioui.py | 3 +- {src => app}/uis/chatui.py | 7 +- src/env_var.py => env_var.py | 4 +- pyproject.toml | 1 - requirements.txt | 9 +- services/__init__.py | 3 + services/admin_sdk.py | 10 ++ .../services => services}/anthropic_client.py | 2 +- .../elevenlabs_client.py | 2 +- {src/services => services}/firestore_sdk.py | 16 +- {src/services => services}/gemini_client.py | 2 +- {src/services => services}/openai_client.py | 2 +- services/setup.py | 10 ++ {src/services => services}/storage.py | 2 +- {src/utils => shared_utils_pkg}/__init__.py | 0 shared_utils_pkg/audiocast_utils.py | 25 ++++ {src/utils => shared_utils_pkg}/chat_utils.py | 41 ----- {src/utils => shared_utils_pkg}/decorators.py | 0 .../session_manager.py | 34 ++++- shared_utils_pkg/setup.py | 10 ++ shared_utils_pkg/waveform_utils.py | 57 +++++++ src/services/admin_sdk.py | 10 -- src/utils/main_utils.py | 140 ------------------ src/utils/render_audiocast_utils.py | 52 ------- 73 files changed, 932 insertions(+), 454 deletions(-) create mode 100644 .github/workflows/deploy_api.yml rename .github/workflows/{deploy.yml => deploy_app.yml} (79%) create mode 100644 .pre-commit-config.yaml create mode 100644 api/.dockerignore create mode 100644 api/.gcloudignore create mode 100644 api/Dockerfile create mode 100644 api/gunicorn_config.py create mode 100644 api/src/__init__.py create mode 100644 api/src/main.py rename {src/services => api/src/utils}/__init__.py (100%) rename {src => api/src}/utils/audio_manager.py (76%) rename {src => api/src}/utils/audio_manager_utils.py (85%) rename {src => api/src}/utils/audio_synthesizer.py (100%) rename {src => api/src}/utils/audio_to_video.py (100%) rename {src => api/src}/utils/audiocast_request.py (84%) rename {src => api/src}/utils/chat_request.py (95%) rename {src => api/src}/utils/clean_tss_markup.py (100%) create mode 100644 api/src/utils/generate_audiocast.py rename {src => api/src}/utils/generate_speech_utils.py (87%) create mode 100644 api/src/utils/get_audiocast.py rename {src/uis => api/src/utils/prompt_templates}/__init__.py (100%) rename {src => api/src}/utils/prompt_templates/source_content_prompt.py (95%) rename {src => api/src}/utils/prompt_templates/streamline_audio.py (100%) rename {src => api/src}/utils/prompt_templates/tts_prompt.py (98%) rename {src => api/tests}/__init__.py (56%) rename {tests => api/tests}/test_speech_gen.py (100%) create mode 100644 app/.dockerignore create mode 100644 app/.gcloudignore create mode 100644 app/.streamlit/config.toml rename Dockerfile => app/Dockerfile (88%) rename {src/utils/prompt_templates => app}/__init__.py (100%) create mode 100644 app/_init_project.py rename app.py => app/index.py (64%) rename {pages => app/pages}/audiocast.py (80%) rename {tests => app/src}/__init__.py (100%) create mode 100644 app/src/utils/__init__.py rename {src => app/src}/utils/chat_thread.py (82%) rename {src => app/src}/utils/custom_components.py (100%) create mode 100644 app/src/utils/display_example_cards.py create mode 100644 app/src/utils/metadata_subscription.py rename {src => app/src}/utils/render_audiocast.py (100%) create mode 100644 app/src/utils/render_audiocast_utils.py rename {src => app/src}/utils/render_chat.py (95%) rename src/utils/waveform_utils.py => app/src/utils/render_waveform.py (54%) rename {src => app/src}/utils/session_state.py (96%) create mode 100644 app/uis/__init__.py rename {src => app}/uis/audioui.py (99%) rename {src => app}/uis/chatui.py (91%) rename src/env_var.py => env_var.py (86%) create mode 100644 services/__init__.py create mode 100644 services/admin_sdk.py rename {src/services => services}/anthropic_client.py (86%) rename {src/services => services}/elevenlabs_client.py (77%) rename {src/services => services}/firestore_sdk.py (76%) rename {src/services => services}/gemini_client.py (96%) rename {src/services => services}/openai_client.py (69%) create mode 100644 services/setup.py rename {src/services => services}/storage.py (98%) rename {src/utils => shared_utils_pkg}/__init__.py (100%) create mode 100644 shared_utils_pkg/audiocast_utils.py rename {src/utils => shared_utils_pkg}/chat_utils.py (63%) rename {src/utils => shared_utils_pkg}/decorators.py (100%) rename {src/utils => shared_utils_pkg}/session_manager.py (73%) create mode 100644 shared_utils_pkg/setup.py create mode 100644 shared_utils_pkg/waveform_utils.py delete mode 100644 src/services/admin_sdk.py delete mode 100644 src/utils/main_utils.py delete mode 100644 src/utils/render_audiocast_utils.py diff --git a/.env.example b/.env.example index acfa63c..a9d64a2 100644 --- a/.env.example +++ b/.env.example @@ -7,4 +7,5 @@ GEMINI_API_KEY="your-gemini-api-key" ELEVENLABS_API_KEY="your-elevenlabs-api-key" BUCKET_NAME="your-bucket-name" -APP_URL=http://localhost:8501 \ No newline at end of file +APP_URL=http://localhost:8501 +API_URL=http://localhost:8585 \ No newline at end of file diff --git a/.github/workflows/deploy_api.yml b/.github/workflows/deploy_api.yml new file mode 100644 index 0000000..eec6901 --- /dev/null +++ b/.github/workflows/deploy_api.yml @@ -0,0 +1,138 @@ +name: Deploy api to cloudrun +on: + pull_request: + paths: + - "api/**" + - "services/**" + - "utils_pkg/**" + - ".github/workflows/deploy_api.yml" + push: + branches: + - main + paths: + - "api/**" + - "services/**" + - "utils_pkg/**" + - ".github/workflows/deploy_api.yml" + tags: + - "release-*" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + CI: true + PROJECT_ID: ${{ secrets.PROJECT_ID }} + SERVICE: audiora-api + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ELEVENLABS_API_KEY: ${{ secrets.ELEVENLABS_API_KEY }} + +jobs: + prepare: + runs-on: ubuntu-latest + outputs: + SHORT_SHA: ${{ steps.prepare_env.outputs.SHORT_SHA }} + VERSION: ${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.number) || format('main-{0}', steps.prepare_env.outputs.SHORT_SHA) }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - id: prepare_env + run: | + echo "TAGGED=${{ startsWith(github.ref, 'refs/tags/api') }}" >> $GITHUB_OUTPUT + + SHORT_SHA=$(git rev-parse --short HEAD) + echo "SHORT_SHA=$SHORT_SHA" >> $GITHUB_OUTPUT + + RAW=$(git branch -r --contains $SHORT_SHA) + TAG_BRANCH_NAME="${RAW##*/}" + echo "TAG_BRANCH_NAME=$TAG_BRANCH_NAME" >> $GITHUB_OUTPUT + + deploy: + runs-on: ubuntu-latest + needs: [prepare] + timeout-minutes: 10 + permissions: + pull-requests: write + env: + VERSION: ${{ needs.prepare.outputs.VERSION }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: "pip" # caching pip dependencies + check-latest: true + + - uses: google-github-actions/auth@v2 + with: + credentials_json: "${{ secrets.GCP_SA_KEY }}" + + - uses: google-github-actions/setup-gcloud@v2 + - run: gcloud config set app/cloud_build_timeout 300 + + - run: | + cp ./api/.dockerignore .dockerignore + cp ./api/.gcloudignore .gcloudignore + cp ./api/Dockerfile Dockerfile + + cat .dockerignore + cat .gcloudignore + cat Dockerfile + + - name: Deploy to Cloud Run + id: deploy + uses: google-github-actions/deploy-cloudrun@v2 + with: + service: ${{ env.SERVICE }} + source: ./ + tag: ${{ env.VERSION }} + no_traffic: true + timeout: "5m" + gcloud_version: "482.0.0" + flags: "--allow-unauthenticated --memory=32Gi --cpu=8 --execution-environment=gen2 --concurrency=80 --max-instances=10" + env_vars: | + ENV=prod + OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} + GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }} + ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }} + ELEVENLABS_API_KEY=${{ secrets.ELEVENLABS_API_KEY }} + + - run: curl -f "${{ steps.deploy.outputs.url }}" + - uses: marocchino/sticky-pull-request-comment@v2 + with: + header: audiora-api + message: | + audiora-api: ${{ steps.deploy.outputs.url }} (${{ github.event.pull_request.head.sha }}) + + Docs: + Swagger: ${{ steps.deploy.outputs.url }}/docs + Redoc: ${{ steps.deploy.outputs.url }}/redoc + + promote: + runs-on: ubuntu-latest + if: ${{ github.ref == 'refs/heads/main' }} + needs: [prepare, deploy] + timeout-minutes: 3 + steps: + - uses: google-github-actions/auth@v2 + with: + credentials_json: "${{ secrets.GCP_SA_KEY }}" + - uses: google-github-actions/setup-gcloud@v2 + - run: gcloud run services update-traffic ${{ env.SERVICE }} --to-tags=${{ needs.prepare.outputs.VERSION }}=100 --project=${{ env.PROJECT_ID }} --region=us-central1 + + cleanup: + runs-on: ubuntu-latest + needs: promote + timeout-minutes: 3 + steps: + - uses: google-github-actions/auth@v2 + with: + credentials_json: "${{ secrets.GCP_SA_KEY }}" + - uses: google-github-actions/setup-gcloud@v2 + - name: cleanup older revisions + run: | + gcloud run revisions list --service=${{ env.SERVICE }} --project=${{ env.PROJECT_ID }} --region=us-central1 --sort-by=CREATE_TIME --format="value(REVISION)" | tail -n +4 | xargs -I {} gcloud run revisions delete {} --project=${{ env.PROJECT_ID }} --region=us-central1 --quiet diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy_app.yml similarity index 79% rename from .github/workflows/deploy.yml rename to .github/workflows/deploy_app.yml index 84b78c0..fb917c9 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy_app.yml @@ -1,19 +1,19 @@ -name: Deploy app to Google Cloudrun +name: Deploy app to cloudrun on: pull_request: paths: - - "src/**" - - ".streamlit" - - "Dockerfile" - - ".github/workflows/deploy.yml" + - "app/**" + - "services/**" + - "utils_pkg/**" + - ".github/workflows/deploy_app.yml" push: branches: - main paths: - - "src/**" - - ".streamlit" - - "Dockerfile" - - ".github/workflows/deploy.yml" + - "app/**" + - "services/**" + - "utils_pkg/**" + - ".github/workflows/deploy_app.yml" tags: - "release-*" @@ -34,6 +34,7 @@ jobs: prepare: runs-on: ubuntu-latest outputs: + SHORT_SHA: ${{ steps.prepare_env.outputs.SHORT_SHA }} VERSION: ${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.number) || format('main-{0}', steps.prepare_env.outputs.SHORT_SHA) }} steps: - uses: actions/checkout@v4 @@ -42,7 +43,13 @@ jobs: - id: prepare_env run: | echo "TAGGED=${{ startsWith(github.ref, 'refs/tags/api') }}" >> $GITHUB_OUTPUT - echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + + SHORT_SHA=$(git rev-parse --short HEAD) + echo "SHORT_SHA=$SHORT_SHA" >> $GITHUB_OUTPUT + + RAW=$(git branch -r --contains $SHORT_SHA) + TAG_BRANCH_NAME="${RAW##*/}" + echo "TAG_BRANCH_NAME=$TAG_BRANCH_NAME" >> $GITHUB_OUTPUT deploy: runs-on: ubuntu-latest @@ -50,10 +57,10 @@ jobs: timeout-minutes: 10 permissions: pull-requests: write + env: + VERSION: ${{ needs.prepare.outputs.VERSION }} steps: - uses: actions/checkout@v4 - with: - fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: "3.12" @@ -67,15 +74,25 @@ jobs: - uses: google-github-actions/setup-gcloud@v2 - run: gcloud config set app/cloud_build_timeout 300 + - run: | + cp ./app/.dockerignore .dockerignore + cp ./app/.gcloudignore .gcloudignore + cp ./app/Dockerfile Dockerfile + + cat .dockerignore + cat .gcloudignore + cat Dockerfile + - id: deploy uses: google-github-actions/deploy-cloudrun@v2 with: service: ${{ env.SERVICE }} source: ./ - tag: ${{ needs.prepare.outputs.VERSION }} + tag: ${{ env.VERSION }} no_traffic: true timeout: "5m" gcloud_version: "482.0.0" + flags: "--allow-unauthenticated --memory=32Gi --cpu=8 --execution-environment=gen2 --concurrency=80 --max-instances=10" env_vars: | ENV=prod OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} @@ -83,8 +100,6 @@ jobs: ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }} ELEVENLABS_API_KEY=${{ secrets.ELEVENLABS_API_KEY }} - flags: "--allow-unauthenticated --memory=32Gi --cpu=8 --execution-environment=gen2 --concurrency=80 --max-instances=10" - - run: curl -f "${{ steps.deploy.outputs.url }}" - uses: marocchino/sticky-pull-request-comment@v2 with: @@ -101,7 +116,6 @@ jobs: - uses: google-github-actions/auth@v2 with: credentials_json: "${{ secrets.GCP_SA_KEY }}" - - uses: google-github-actions/setup-gcloud@v2 - run: gcloud run services update-traffic ${{ env.SERVICE }} --to-tags=${{ needs.prepare.outputs.VERSION }}=100 --project=${{ env.PROJECT_ID }} --region=us-central1 diff --git a/.gitignore b/.gitignore index d4e04e5..3bdf731 100644 --- a/.gitignore +++ b/.gitignore @@ -7,10 +7,7 @@ __pycache__ .env.* !.env.example -reference_code - keys/ - +.ruff_cache/ node_modules/ - -.DS_Store \ No newline at end of file +.DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..c3d5aee --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,18 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-added-large-files + - id: requirements-txt-fixer + - id: check-yaml + - id: check-toml + - id: check-xml + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.7.2 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format diff --git a/README.md b/README.md index 248ef0a..a12c0fc 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,7 @@ ANTHROPIC_API_KEY="your-anthropic-api-key" GEMINI_API_KEY="your-gemini-api-key" ELEVENLABS_API_KEY="your-elevenlabs-api-key" APP_URL="http://localhost:8080" +API_URL="http://localhost:8585" ``` 4. Launch the application: diff --git a/api/.dockerignore b/api/.dockerignore new file mode 100644 index 0000000..7db7938 --- /dev/null +++ b/api/.dockerignore @@ -0,0 +1,5 @@ +.streamlit/ +.vscode/ +app/ +tests/ +.env.* \ No newline at end of file diff --git a/api/.gcloudignore b/api/.gcloudignore new file mode 100644 index 0000000..7db7938 --- /dev/null +++ b/api/.gcloudignore @@ -0,0 +1,5 @@ +.streamlit/ +.vscode/ +app/ +tests/ +.env.* \ No newline at end of file diff --git a/api/Dockerfile b/api/Dockerfile new file mode 100644 index 0000000..4b2998c --- /dev/null +++ b/api/Dockerfile @@ -0,0 +1,34 @@ +# Use the official lightweight Python image. +# https://hub.docker.com/_/python +FROM python:3.12-slim + +# Allow statements and log messages to immediately appear in the Knative logs +ENV PYTHONUNBUFFERED True +ENV PYTHONDONTWRITEBYTECODE 1 + +WORKDIR /app + +# Install FFmpeg, Cairo, and any other required dependencies +RUN apt-get -yqq update && apt-get -yqq install \ + build-essential \ + ffmpeg \ + libcairo2-dev \ + pkg-config \ + python3-dev \ + && rm -rf /var/lib/apt/lists/* + +COPY . ./ +# RUN rm -rf ./app + +# Install production dependencies. +RUN pip install --no-cache-dir -r requirements.txt +RUN pip install pycairo + +ENV HOST '0.0.0.0' +ENV WORKER 4 +ENV THREADS 16 +ENV TIMEOUT 0 + +EXPOSE 8080 + +CMD exec gunicorn -k uvicorn.workers.UvicornWorker -b :$PORT -w $WORKER --threads $THREADS -t $TIMEOUT --preload api.src.main:app diff --git a/api/gunicorn_config.py b/api/gunicorn_config.py new file mode 100644 index 0000000..550fc52 --- /dev/null +++ b/api/gunicorn_config.py @@ -0,0 +1,3 @@ +bind = "0.0.0.0:8080" +workers = 4 +threads = 16 diff --git a/api/src/__init__.py b/api/src/__init__.py new file mode 100644 index 0000000..8c8bce3 --- /dev/null +++ b/api/src/__init__.py @@ -0,0 +1,35 @@ +import asyncio +import os +import sys +from pathlib import Path + +import uvloop +from dotenv import load_dotenv + +## Begin: Add the project root to sys.path +project_root = Path(__file__).parent.parent.resolve() +sys.path.append(str(project_root)) + +asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) +print("running uvloop as event loop policy for asyncio") + +load_dotenv() + + +def init_shared_packages(paths: list[str]): + # Add the shared module directory to the Python path + for path in paths: + pkg_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", path)) + sys.path.append(pkg_path) + + +def print_project_meta(): + print(f"Project root: {project_root}") + print(f"Python version: {sys.version}") + print(f"Current working directory: {os.getcwd()}") + + +init_shared_packages(["services", "utils_pkg"]) + + +print_project_meta() diff --git a/api/src/main.py b/api/src/main.py new file mode 100644 index 0000000..610bd5d --- /dev/null +++ b/api/src/main.py @@ -0,0 +1,89 @@ +from time import time +from typing import Any, Callable, Generator + +from fastapi import BackgroundTasks, FastAPI, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi_utilities import add_timer_middleware + +from src.utils.chat_request import chat_request +from src.utils.generate_audiocast import ( + GenerateAudioCastRequest, + GenerateAudioCastResponse, + generate_audiocast, +) +from src.utils.get_audiocast import get_audiocast +from shared_utils_pkg.chat_utils import ( + SessionChatMessage, + SessionChatRequest, +) +from shared_utils_pkg.session_manager import SessionManager + +app = FastAPI(title="Audiora", version="1.0.0") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + expose_headers=["*"], +) + +add_timer_middleware(app, show_avg=True) + + +@app.middleware("http") +async def inject_exec_time_header(request: Request, call_next: Callable): + """add request execution time header""" + start_time = time() + response = await call_next(request) + response.headers["X-Execution-Time"] = f"{(time() - start_time):.2f}s" + return response + + +@app.middleware("http") +async def log_request_headers(request: Request, call_next: Callable): + """log request headers""" + print("Request headers: %s", request.headers) + return await call_next(request) + + +@app.get("/") +async def root(): + return {"message": "Hello World"} + + +@app.post("/chat/{session_id}", response_model=Generator[str, Any, None]) +async def chat_endpoint(session_id: str, request: SessionChatRequest, background_tasks: BackgroundTasks): + content_category = request.content_category + db = SessionManager(session_id) + db._add_chat(request.message) + + def on_finish(text: str): + background_tasks.add_task( + db._add_chat, + SessionChatMessage(role="assistant", content=text), + ) + + response = chat_request( + content_category=content_category, + previous_messages=db._get_chats(), + on_finish=on_finish, + ) + + return response + + +@app.post("/audiocast/generate", response_model=GenerateAudioCastResponse) +async def generate_audiocast_endpoint( + request: GenerateAudioCastRequest, + background_tasks: BackgroundTasks, +): + result = await generate_audiocast(request, background_tasks) + return result + + +@app.get("/audiocast/{session_id}", response_model=GenerateAudioCastResponse) +async def get_audiocast_endpoint(session_id: str): + result = get_audiocast(session_id) + return result diff --git a/src/services/__init__.py b/api/src/utils/__init__.py similarity index 100% rename from src/services/__init__.py rename to api/src/utils/__init__.py diff --git a/src/utils/audio_manager.py b/api/src/utils/audio_manager.py similarity index 76% rename from src/utils/audio_manager.py rename to api/src/utils/audio_manager.py index 848b162..9b048bf 100644 --- a/src/utils/audio_manager.py +++ b/api/src/utils/audio_manager.py @@ -22,11 +22,7 @@ class AudioManager(AudioManagerSpeechGenerator, ContentSplitter): def __init__(self, custom_config: Optional[AudioManagerConfig] = None): super().__init__() - self.config = ( - AudioManagerConfig(**custom_config.__dict__) - if custom_config - else AudioManagerConfig() - ) + self.config = AudioManagerConfig(**custom_config.__dict__) if custom_config else AudioManagerConfig() self.config.ensure_directories() def _get_tags(self, audio_script: str) -> List[str]: @@ -72,34 +68,22 @@ async def text_to_speech(self, audio_script: str, output_file: str): await self.__finalize(audio_files, output_file) logger.info(f"Audio saved to {output_file}") - async def __text_to_speech_openai( - self, nway_content: List[Tuple[str, str]], tags: List[str] - ) -> List[str]: + async def __text_to_speech_openai(self, nway_content: List[Tuple[str, str]], tags: List[str]) -> List[str]: try: - jobs = self._prepare_speech_jobs( - nway_content, tags, openai_voices, self.config.temp_audio_dir - ) + jobs = self._prepare_speech_jobs(nway_content, tags, openai_voices, self.config.temp_audio_dir) return await self._process_speech_jobs(jobs, provider="openai") except Exception as e: raise Exception(f"Error converting text to speech with OpenAI: {str(e)}") - async def __text_to_speech_elevenlabs( - self, nway_content: List[Tuple[str, str]], tags: List[str] - ) -> List[str]: + async def __text_to_speech_elevenlabs(self, nway_content: List[Tuple[str, str]], tags: List[str]) -> List[str]: try: - jobs = self._prepare_speech_jobs( - nway_content, tags, elevenlabs_voices, self.config.temp_audio_dir - ) + jobs = self._prepare_speech_jobs(nway_content, tags, elevenlabs_voices, self.config.temp_audio_dir) return await self._process_speech_jobs(jobs, provider="elevenlabs") except Exception as e: - raise Exception( - f"Error converting text to speech with Elevenlabs: {str(e)}" - ) + raise Exception(f"Error converting text to speech with Elevenlabs: {str(e)}") - async def __finalize( - self, audio_files: List[str], output_file: str, enhance_audio=False - ) -> None: + async def __finalize(self, audio_files: List[str], output_file: str, enhance_audio=False) -> None: """ Merge and enhance audio files and save the final output. - Run audio processing in thread pool to avoid blocking @@ -111,9 +95,7 @@ async def __finalize( synthesizer = AudioSynthesizer() await asyncio.get_event_loop().run_in_executor( self.executor, - lambda: synthesizer.merge_audio_files( - self.config.temp_audio_dir, output_file - ), + lambda: synthesizer.merge_audio_files(self.config.temp_audio_dir, output_file), ) if enhance_audio: await asyncio.get_event_loop().run_in_executor( diff --git a/src/utils/audio_manager_utils.py b/api/src/utils/audio_manager_utils.py similarity index 85% rename from src/utils/audio_manager_utils.py rename to api/src/utils/audio_manager_utils.py index e0e9be0..9480003 100644 --- a/src/utils/audio_manager_utils.py +++ b/api/src/utils/audio_manager_utils.py @@ -20,8 +20,8 @@ @dataclass class AudioManagerConfig: tts_provider: Optional[TTSProvider] = "openai" - temp_audio_dir: str = field(default_factory=lambda: "/tmp/audiocast") - outdir_base: str = field(default_factory=lambda: "/tmp/audiocast/output") + temp_audio_dir: str = field(default_factory=lambda: "/tmp/audiora") + outdir_base: str = field(default_factory=lambda: "/tmp/audiora/output") def ensure_directories(self) -> None: """Ensure all required directories exist""" @@ -67,16 +67,9 @@ def _prepare_speech_jobs( return jobs - async def _process_speech_jobs( - self, jobs: List[SpeechJob], provider: TTSProvider - ) -> List[str]: + async def _process_speech_jobs(self, jobs: List[SpeechJob], provider: TTSProvider) -> List[str]: loop = asyncio.get_event_loop() - tasks = [ - loop.run_in_executor( - self.executor, partial(GenerateSpeech(provider).run, job) - ) - for job in jobs - ] + tasks = [loop.run_in_executor(self.executor, partial(GenerateSpeech(provider).run, job)) for job in jobs] results = await asyncio.gather(*tasks) audio_files = [f for f in results if f and os.path.exists(f)] @@ -98,10 +91,7 @@ def split_content(self, content: str, tags: List[str]) -> List[Tuple[str, str]]: # Regular expression pattern to match Tag0, Tag1, ..., TagN speaker dialogues matches = re.findall(r"<(Speaker\d+)>(.*?)", content, re.DOTALL) - return [ - (str(speaker), " ".join(content_part.split()).strip()) - for speaker, content_part in matches - ] + return [(str(speaker), " ".join(content_part.split()).strip()) for speaker, content_part in matches] @staticmethod def validate_content(content: str, tags: List[str]) -> bool: @@ -118,10 +108,7 @@ def validate_content(content: str, tags: List[str]) -> bool: opening_count = content.count(f"<{tag}>") closing_count = content.count(f"") if opening_count != closing_count: - print( - f"Mismatched tags for {tag}: " - f"{opening_count} opening, {closing_count} closing" - ) + print(f"Mismatched tags for {tag}: " f"{opening_count} opening, {closing_count} closing") return False if opening_count == 0: diff --git a/src/utils/audio_synthesizer.py b/api/src/utils/audio_synthesizer.py similarity index 100% rename from src/utils/audio_synthesizer.py rename to api/src/utils/audio_synthesizer.py diff --git a/src/utils/audio_to_video.py b/api/src/utils/audio_to_video.py similarity index 100% rename from src/utils/audio_to_video.py rename to api/src/utils/audio_to_video.py diff --git a/src/utils/audiocast_request.py b/api/src/utils/audiocast_request.py similarity index 84% rename from src/utils/audiocast_request.py rename to api/src/utils/audiocast_request.py index f6eb3fc..9ad52c9 100644 --- a/src/utils/audiocast_request.py +++ b/api/src/utils/audiocast_request.py @@ -1,13 +1,13 @@ import re from typing import Literal -from src.services.anthropic_client import get_anthropic_sync -from src.services.gemini_client import GeminiConfig, generate_content -from src.services.openai_client import get_openai -from src.utils.chat_utils import ContentCategory +from services.anthropic_client import get_anthropic_sync +from services.gemini_client import GeminiConfig, generate_content +from services.openai_client import get_openai from src.utils.prompt_templates.source_content_prompt import get_content_source_prompt from src.utils.prompt_templates.streamline_audio import streamline_audio_script_prompt from src.utils.prompt_templates.tts_prompt import Metadata, TTSPromptMaker +from shared_utils_pkg.chat_utils import ContentCategory def generate_source_content(category: ContentCategory, summary: str): @@ -20,9 +20,7 @@ def generate_source_content(category: ContentCategory, summary: str): Returns: str: The audiocast source content """ - refined_summary = re.sub( - "You want", "a user who wants", summary, flags=re.IGNORECASE - ) + refined_summary = re.sub("You want", "a user who wants", summary, flags=re.IGNORECASE) refined_summary = re.sub("You", "a user", refined_summary, flags=re.IGNORECASE) response = get_openai().chat.completions.create( @@ -71,11 +69,7 @@ def create(self, provider: AudioScriptProvider = "openai"): prompt_maker = TTSPromptMaker(self.category, Metadata()) system_prompt = prompt_maker.get_system_prompt(self.source_content) - audio_script = ( - self.__use_openai(system_prompt) - if provider == "openai" - else self.__use_anthropic(system_prompt) - ) + audio_script = self.__use_openai(system_prompt) if provider == "openai" else self.__use_anthropic(system_prompt) print(f"Audio script generated successfully: {audio_script}") if not audio_script: @@ -83,9 +77,7 @@ def create(self, provider: AudioScriptProvider = "openai"): print("Streamlining the audio script...") - streamlined_script = self.streamline_audio_script( - instruction=system_prompt, audio_script=audio_script - ) + streamlined_script = self.streamline_audio_script(instruction=system_prompt, audio_script=audio_script) return str(streamlined_script) @@ -132,9 +124,7 @@ def streamline_audio_script(self, instruction: str, audio_script: str): str: The streamlined audio script """ response = generate_content( - prompt=[ - "Now streamline the audio script to match the specified TTS requirements." - ], + prompt=["Now streamline the audio script to match the specified TTS requirements."], config=GeminiConfig( model_name="gemini-1.5-flash-002", system_prompt=streamline_audio_script_prompt(instruction, audio_script), diff --git a/src/utils/chat_request.py b/api/src/utils/chat_request.py similarity index 95% rename from src/utils/chat_request.py rename to api/src/utils/chat_request.py index ef4a87b..5aea5cf 100644 --- a/src/utils/chat_request.py +++ b/api/src/utils/chat_request.py @@ -1,7 +1,7 @@ from typing import Any, Callable, List, Optional -from src.services.openai_client import get_openai -from src.utils.chat_utils import ContentCategory, SessionChatMessage +from services.openai_client import get_openai +from shared_utils_pkg.chat_utils import ContentCategory, SessionChatMessage def get_system_message(content_category: ContentCategory): @@ -57,3 +57,4 @@ def generator(): on_finish(text) return generator() + diff --git a/src/utils/clean_tss_markup.py b/api/src/utils/clean_tss_markup.py similarity index 100% rename from src/utils/clean_tss_markup.py rename to api/src/utils/clean_tss_markup.py diff --git a/api/src/utils/generate_audiocast.py b/api/src/utils/generate_audiocast.py new file mode 100644 index 0000000..98ba363 --- /dev/null +++ b/api/src/utils/generate_audiocast.py @@ -0,0 +1,79 @@ +from datetime import datetime + +from fastapi import BackgroundTasks, HTTPException + +from services.storage import StorageManager +from shared_utils_pkg.audiocast_utils import ( + GenerateAudioCastRequest, + GenerateAudioCastResponse, +) +from shared_utils_pkg.session_manager import SessionManager +from shared_utils_pkg.waveform_utils import WaveformUtils +from src.utils.audio_manager import AudioManager, AudioManagerConfig +from src.utils.audiocast_request import AudioScriptMaker, generate_source_content + + +async def generate_audiocast(request: GenerateAudioCastRequest, background_tasks: BackgroundTasks): + """## Generate audiocast based on a summary of user's request + + ### Steps: + 1. Generate source content + 2. Generate audio script + 3. Generate audio + 4a. Store audio + 4b. TODO: Store the audio waveform on GCS + 5. Update session + """ + summary = request.summary + category = request.category + session_id = request.sessionId + + db = SessionManager(session_id) + + def update_session_info(info: str): + background_tasks.add_task(db._update_info, info) + + update_session_info("Generating source content...") + + source_content = generate_source_content(category, summary) + if not source_content: + raise HTTPException(status_code=500, detail="Failed to generate source content") + + # Generate audio script + update_session_info("Generating audio script...") + audio_script_maker = AudioScriptMaker(category, source_content) + audio_script = audio_script_maker.create(provider="anthropic") + if not audio_script: + raise HTTPException(status_code=500, detail="Failed to generate audio script") + + # Generate audio + update_session_info("Generating audio...") + audio_path = await AudioManager(custom_config=AudioManagerConfig(tts_provider="elevenlabs")).generate_speech( + audio_script + ) + + def _run_on_background(): + try: + # Store audio + storage_manager = StorageManager() + storage_manager.upload_audio_to_gcs(audio_path, session_id) + + # Update session metadata + db._update_source(source_content) + db._update_transcript(audio_script) + # TODO: add one to update title + + # Generate and save audio waveform as mp4 + waveform_utils = WaveformUtils(session_id, audio_path) + waveform_utils.run_all() + except Exception as e: + print(f"Error in generate_audiocast background_tasks: {str(e)}") + + background_tasks.add_task(_run_on_background) + + return GenerateAudioCastResponse( + url=audio_path, + script=audio_script, + source_content=source_content, + created_at=datetime.now().strftime("%Y-%m-%d %H:%M"), + ) diff --git a/src/utils/generate_speech_utils.py b/api/src/utils/generate_speech_utils.py similarity index 87% rename from src/utils/generate_speech_utils.py rename to api/src/utils/generate_speech_utils.py index 6a35589..430ac05 100644 --- a/src/utils/generate_speech_utils.py +++ b/api/src/utils/generate_speech_utils.py @@ -2,18 +2,16 @@ from io import BytesIO from typing import Dict, List, Literal -from src.services.elevenlabs_client import get_elevenlabs_client -from src.services.openai_client import get_openai -from src.utils.decorators import process_time +from services.elevenlabs_client import get_elevenlabs_client +from services.openai_client import get_openai +from shared_utils_pkg.decorators import process_time TTSProvider = Literal["openai", "elevenlabs"] OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"] openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"] -ElevenLabsVoice = Literal[ - "Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam" -] +ElevenLabsVoice = Literal["Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam"] elevenlabs_voices: List[ElevenLabsVoice] = [ "Adam", "Sarah", @@ -72,9 +70,7 @@ def __use_openai(self, job: SpeechJob): if job.voice not in openai_voices: raise ValueError("Wrong voice specification for openai tts") - response = get_openai().audio.speech.create( - input=job.content, model="tts-1-hd", voice=job.voice - ) + response = get_openai().audio.speech.create(input=job.content, model="tts-1-hd", voice=job.voice) return response.content @process_time() diff --git a/api/src/utils/get_audiocast.py b/api/src/utils/get_audiocast.py new file mode 100644 index 0000000..246827e --- /dev/null +++ b/api/src/utils/get_audiocast.py @@ -0,0 +1,41 @@ +from datetime import datetime + +from fastapi import HTTPException + +from services.storage import StorageManager +from src.utils.generate_audiocast import ( + GenerateAudioCastResponse, +) +from shared_utils_pkg.session_manager import SessionManager + + +def get_audiocast(session_id: str): + """ + Get audiocast based on session id + """ + storage_manager = StorageManager() + filepath = storage_manager.download_from_gcs(session_id) + + session_data = SessionManager(session_id).data() + if not session_data: + raise HTTPException( + status_code=404, + detail=f"Audiocast not found for session_id: {session_id}", + ) + + metadata = session_data.metadata + source = metadata.source if metadata else "" + transcript = metadata.transcript if metadata else "" + + created_at = None + if session_data.created_at: + created_at = datetime.fromisoformat(session_data.created_at).strftime( + "%Y-%m-%d %H:%M" + ) + + return GenerateAudioCastResponse( + url=filepath, + script=transcript, + source_content=source, + created_at=created_at, + ) diff --git a/src/uis/__init__.py b/api/src/utils/prompt_templates/__init__.py similarity index 100% rename from src/uis/__init__.py rename to api/src/utils/prompt_templates/__init__.py diff --git a/src/utils/prompt_templates/source_content_prompt.py b/api/src/utils/prompt_templates/source_content_prompt.py similarity index 95% rename from src/utils/prompt_templates/source_content_prompt.py rename to api/src/utils/prompt_templates/source_content_prompt.py index 33b2fad..6c431aa 100644 --- a/src/utils/prompt_templates/source_content_prompt.py +++ b/api/src/utils/prompt_templates/source_content_prompt.py @@ -1,4 +1,4 @@ -from src.utils.chat_utils import ContentCategory, category_qualifiers +from shared_utils_pkg.chat_utils import ContentCategory, category_qualifiers def get_content_source_prompt(category: ContentCategory, summary: str): diff --git a/src/utils/prompt_templates/streamline_audio.py b/api/src/utils/prompt_templates/streamline_audio.py similarity index 100% rename from src/utils/prompt_templates/streamline_audio.py rename to api/src/utils/prompt_templates/streamline_audio.py diff --git a/src/utils/prompt_templates/tts_prompt.py b/api/src/utils/prompt_templates/tts_prompt.py similarity index 98% rename from src/utils/prompt_templates/tts_prompt.py rename to api/src/utils/prompt_templates/tts_prompt.py index 98642d2..161aa40 100644 --- a/src/utils/prompt_templates/tts_prompt.py +++ b/api/src/utils/prompt_templates/tts_prompt.py @@ -1,6 +1,6 @@ from dataclasses import dataclass -from src.utils.chat_utils import ContentCategory, category_qualifiers +from shared_utils_pkg.chat_utils import ContentCategory, category_qualifiers @dataclass diff --git a/src/__init__.py b/api/tests/__init__.py similarity index 56% rename from src/__init__.py rename to api/tests/__init__.py index f83e1fa..03938a2 100644 --- a/src/__init__.py +++ b/api/tests/__init__.py @@ -1,6 +1,6 @@ from dotenv import load_dotenv -from src.services.admin_sdk import init_admin_sdk +from services.admin_sdk import init_admin_sdk load_dotenv() diff --git a/tests/test_speech_gen.py b/api/tests/test_speech_gen.py similarity index 100% rename from tests/test_speech_gen.py rename to api/tests/test_speech_gen.py diff --git a/app/.dockerignore b/app/.dockerignore new file mode 100644 index 0000000..72ef747 --- /dev/null +++ b/app/.dockerignore @@ -0,0 +1,5 @@ +.streamlit/ +.vscode/ +api/ +tests/ +.env.* \ No newline at end of file diff --git a/app/.gcloudignore b/app/.gcloudignore new file mode 100644 index 0000000..72ef747 --- /dev/null +++ b/app/.gcloudignore @@ -0,0 +1,5 @@ +.streamlit/ +.vscode/ +api/ +tests/ +.env.* \ No newline at end of file diff --git a/app/.streamlit/config.toml b/app/.streamlit/config.toml new file mode 100644 index 0000000..144fd39 --- /dev/null +++ b/app/.streamlit/config.toml @@ -0,0 +1,6 @@ +[browser] +gatherUsageStats = false + + +[client] +showSidebarNavigation = false diff --git a/Dockerfile b/app/Dockerfile similarity index 88% rename from Dockerfile rename to app/Dockerfile index cdd55d3..56f3664 100644 --- a/Dockerfile +++ b/app/Dockerfile @@ -19,6 +19,7 @@ RUN apt-get -yqq update && apt-get -yqq install \ && rm -rf /var/lib/apt/lists/* COPY . ./ +# RUN rm -rf ./api # Install production dependencies. RUN pip install --upgrade pip \ @@ -28,4 +29,4 @@ ENV HOST '0.0.0.0' EXPOSE $PORT HEALTHCHECK CMD curl --fail http://$HOST:$PORT/_stcore/health -CMD exec streamlit run app.py --server.port=$PORT --server.address=$HOST +CMD exec streamlit run app/index.py --server.port=$PORT --server.address=$HOST diff --git a/src/utils/prompt_templates/__init__.py b/app/__init__.py similarity index 100% rename from src/utils/prompt_templates/__init__.py rename to app/__init__.py diff --git a/app/_init_project.py b/app/_init_project.py new file mode 100644 index 0000000..c179f3e --- /dev/null +++ b/app/_init_project.py @@ -0,0 +1,30 @@ +import asyncio +import os +import sys +from pathlib import Path + +import uvloop +from dotenv import load_dotenv + +## Begin: Add the project root to sys.path +project_root = Path(__file__).parent.parent.resolve() +sys.path.append(str(project_root)) + +asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) +print("running uvloop as event loop policy for asyncio") + +load_dotenv() + + +def init_shared_packages(paths: list[str]): + # Add the shared module directory to the Python path + for path in paths: + pkg_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", path)) + sys.path.append(pkg_path) + + +init_shared_packages(["services", "utils_pkg"]) + + +def print_project_meta(): + print(f"Project root: {project_root}") diff --git a/app.py b/app/index.py similarity index 64% rename from app.py rename to app/index.py index d8b0108..88e8858 100644 --- a/app.py +++ b/app/index.py @@ -1,10 +1,12 @@ import asyncio import streamlit as st - -from src.uis.audioui import audioui -from src.uis.chatui import chatui +from _init_project import print_project_meta from src.utils.session_state import init_session_state +from uis.audioui import audioui +from uis.chatui import chatui + +print_project_meta() async def main(): @@ -14,19 +16,15 @@ async def main(): st.subheader("Listen to anything, anytime, leveraging AI") st.sidebar.info("A VeedoAI project. (c) 2024") - # Sidebar for content type selection - st.sidebar.title("Audiocast Info") - session_id = init_session_state() if st.session_state.content_category: - st.sidebar.subheader( - f"Content Category: {st.session_state.content_category.capitalize()}" - ) + # Sidebar for content type selection + st.sidebar.title("Audiocast Info") + + st.sidebar.subheader(f"Content Category: {st.session_state.content_category.capitalize()}") else: - st.sidebar.markdown( - "> Your preferences and audiocast metadata will appear here" - ) + st.sidebar.markdown("> Your preferences and audiocast metadata will appear here") # Declare chat interface container uichat = st.empty() diff --git a/pages/audiocast.py b/app/pages/audiocast.py similarity index 80% rename from pages/audiocast.py rename to app/pages/audiocast.py index 5754bd4..e12d1b4 100644 --- a/pages/audiocast.py +++ b/app/pages/audiocast.py @@ -1,33 +1,33 @@ import asyncio -from typing import cast import streamlit as st - +from _init_project import print_project_meta from src.utils.custom_components import copy_button -from src.utils.main_utils import get_audiocast from src.utils.render_audiocast_utils import ( - GenerateAudiocastDict, + get_audiocast, navigate_to_home, render_audiocast_handler, ) +print_project_meta() + async def render_audiocast_page(): - st.set_page_config(page_title="Audiora | Share Page", page_icon="🎧") + st.set_page_config(page_title="Audiora | Share Page", page_icon="🎧", layout="wide") + + # Display audiocast content + st.title("🎧 Audiora") + st.subheader("Share Page ") + st.sidebar.info("A VeedoAI project. (c) 2024") session_id = st.query_params.get("session_id") if session_id: - # Display audiocast content - st.title("🎧 Audiora") - st.subheader("Share Page ") st.markdown(f"##### Viewing audiocast: _{session_id}_") - st.sidebar.info("A VeedoAI project. (c) 2024") - try: with st.spinner("Loading audiocast..."): - audiocast = cast(GenerateAudiocastDict, get_audiocast(session_id)) + audiocast = get_audiocast(session_id) if audiocast["created_at"]: st.markdown(f"> Created: {audiocast["created_at"]}") diff --git a/tests/__init__.py b/app/src/__init__.py similarity index 100% rename from tests/__init__.py rename to app/src/__init__.py diff --git a/app/src/utils/__init__.py b/app/src/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/chat_thread.py b/app/src/utils/chat_thread.py similarity index 82% rename from src/utils/chat_thread.py rename to app/src/utils/chat_thread.py index 78b3561..3ce5b73 100644 --- a/src/utils/chat_thread.py +++ b/app/src/utils/chat_thread.py @@ -1,14 +1,17 @@ import re +from typing import Any, Generator +import httpx import streamlit as st +from src.utils.render_audiocast_utils import generate_audiocast +from src.utils.session_state import reset_session -from src.utils.chat_utils import ( +from env_var import API_URL +from shared_utils_pkg.chat_utils import ( ContentCategory, SessionChatMessage, SessionChatRequest, ) -from src.utils.main_utils import GenerateAudioCastRequest, chat, generate_audiocast -from src.utils.session_state import reset_session termination_prefix = "Ok, thanks for clarifying!" termination_suffix = "Please click the button below to start generating the audiocast." @@ -18,17 +21,20 @@ def generate_stream_response( session_id: str, prompt: str, content_category: ContentCategory, -): +) -> Generator[str, Any, None]: with st.spinner("Generating response..."): - response_generator = chat( - session_id, - SessionChatRequest( - message=SessionChatMessage(role="user", content=prompt), - content_category=content_category, - ), + session_chat = SessionChatRequest( + message=SessionChatMessage(role="user", content=prompt), + content_category=content_category, ) - return response_generator + response = httpx.post( + f"{API_URL}/chat/{session_id}", + json={**session_chat.model_dump()}, + timeout=None, + ) + response.raise_for_status() + return response.json() def handle_example_prompt( @@ -78,7 +84,7 @@ def handle_user_prompt( return ai_message -async def evaluate_final_response(ai_message: str, content_category: ContentCategory): +async def evaluate_final_response(ai_message: str): """ Evaluate if the ai_message is the final response from the ai model """ @@ -97,6 +103,7 @@ async def evaluate_final_response(ai_message: str, content_category: ContentCate unsafe_allow_html=True, ) + # Check if the chat session should end end_chat_session = termination_suffix.lower() in ai_message.lower() if not end_chat_session: return st.rerun() @@ -142,19 +149,18 @@ async def use_audiocast_request( try: with st.spinner("Generating your audiocast..."): audiocast_response = await generate_audiocast( - GenerateAudioCastRequest( - sessionId=session_id, - summary=summary, - category=content_category, - ) + session_id, + summary, + content_category, ) + print(f"Generate AudioCast Response: {audiocast_response}") st.session_state.current_audiocast = audiocast_response st.session_state.messages = [] # Clear messages st.rerun() except Exception as e: - st.info("Error while generating your audiocast. Please start afresh!") + st.warning("Error while generating your audiocast. Please start afresh!") st.error(f"Error generating audiocast: {str(e)}") st.session_state.user_specification = None diff --git a/src/utils/custom_components.py b/app/src/utils/custom_components.py similarity index 100% rename from src/utils/custom_components.py rename to app/src/utils/custom_components.py diff --git a/app/src/utils/display_example_cards.py b/app/src/utils/display_example_cards.py new file mode 100644 index 0000000..3021423 --- /dev/null +++ b/app/src/utils/display_example_cards.py @@ -0,0 +1,43 @@ +import streamlit as st + +from shared_utils_pkg.chat_utils import content_examples + + +def display_example_cards(): + """Display example content cards if there are no messages""" + st.markdown("##### You can start with one of the following") + + # CSS for fixed-height buttons and responsive columns + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) + + # Display example content cards + col1, col2 = st.columns(2) + for content_category, example in content_examples.items(): + with col1 if content_category in [ + "podcast", + "soundbite", + "sermon", + "audiodrama", + ] else col2: + if st.button(example, use_container_width=True): + st.session_state.messages.append({"role": "user", "content": example}) + st.session_state.example_prompt = example + st.session_state.content_category = content_category + + st.rerun() diff --git a/app/src/utils/metadata_subscription.py b/app/src/utils/metadata_subscription.py new file mode 100644 index 0000000..333c54a --- /dev/null +++ b/app/src/utils/metadata_subscription.py @@ -0,0 +1,29 @@ +from queue import Queue + +import streamlit as st + +from shared_utils_pkg.session_manager import SessionManager + + +def subscribe_to_audio_generation(session_id: str): + """Subscribe to audio generation metadata""" + q = Queue() + + def handler(info: str | None): + if info: + q.put(info, block=False) + + db = SessionManager(session_id) + doc_watch = db.subscribe_to_metadata_info(handler) + + with st.empty(): + while True: + try: + info = q.get(timeout=2) + if not info: + break + st.info(info) + except Exception: + break + + return doc_watch diff --git a/src/utils/render_audiocast.py b/app/src/utils/render_audiocast.py similarity index 100% rename from src/utils/render_audiocast.py rename to app/src/utils/render_audiocast.py diff --git a/app/src/utils/render_audiocast_utils.py b/app/src/utils/render_audiocast_utils.py new file mode 100644 index 0000000..04b4f99 --- /dev/null +++ b/app/src/utils/render_audiocast_utils.py @@ -0,0 +1,78 @@ +import re +from pathlib import Path +from typing import cast + +import httpx +import streamlit as st +from src.utils.metadata_subscription import subscribe_to_audio_generation +from src.utils.render_waveform import render_waveform + +from env_var import API_URL, APP_URL +from shared_utils_pkg.audiocast_utils import GenerateAudioCastRequest, GenerateAudiocastDict +from shared_utils_pkg.chat_utils import ContentCategory + + +def navigate_to_home(): + main_script = str(Path(__file__).parent.parent.parent / "index.py") + st.switch_page(main_script) + + +def parse_ai_script(ai_script: str): + matches = re.findall(r"<(Speaker\d+)>(.*?)", ai_script, re.DOTALL) + return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches]) + + +def get_audiocast(session_id: str): + response = httpx.get(f"{API_URL}/audiocast/{session_id}", timeout=None) + response.raise_for_status() + return cast(GenerateAudiocastDict, response.json()) + + +async def generate_audiocast( + session_id: str, + summary: str, + content_category: ContentCategory, +): + doc_watch = subscribe_to_audio_generation(session_id) + + audiocast_req = GenerateAudioCastRequest( + sessionId=session_id, + summary=summary, + category=content_category, + ) + response = httpx.post( + f"{API_URL}/audiocast/generate", + json=audiocast_req.model_dump(), + timeout=None, + ) + response.raise_for_status() + doc_watch.unsubscribe() + + return cast(GenerateAudiocastDict, response.json()) + + +def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict): + # Audio player + st.audio(audiocast["url"]) + + # Voice waveform + with st.expander("Show Audio Waveform"): + try: + render_waveform(session_id, audiocast["url"], False) + except Exception as e: + st.error(f"Error rendering waveform: {str(e)}") + + # Transcript + with st.expander("Show Transcript"): + st.markdown(parse_ai_script(audiocast["script"])) + + st.markdown("---") + + # Metadata + st.sidebar.subheader("Audiocast Source") + st.sidebar.markdown(audiocast["source_content"]) + + share_url = f"{APP_URL}/audiocast?session_id={session_id}" + st.text_input("Share this audiocast:", share_url) + + return share_url diff --git a/src/utils/render_chat.py b/app/src/utils/render_chat.py similarity index 95% rename from src/utils/render_chat.py rename to app/src/utils/render_chat.py index b891136..d02b634 100644 --- a/src/utils/render_chat.py +++ b/app/src/utils/render_chat.py @@ -1,6 +1,5 @@ import streamlit as st - -from src.utils.chat_utils import content_categories +from shared_utils_pkg.chat_utils import content_categories def set_content_category(): diff --git a/src/utils/waveform_utils.py b/app/src/utils/render_waveform.py similarity index 54% rename from src/utils/waveform_utils.py rename to app/src/utils/render_waveform.py index 3026701..0e23f79 100644 --- a/src/utils/waveform_utils.py +++ b/app/src/utils/render_waveform.py @@ -1,43 +1,16 @@ import os -import tempfile -from pathlib import Path import streamlit as st from pydub import AudioSegment -from seewav import visualize -from src.services.storage import BLOB_BASE_URI, StorageManager - - -def save_waveform_video_to_gcs(session_id: str, video_path: str): - """Ingest waveform visualization to GCS.""" - full_path = StorageManager().upload_video_to_gcs(video_path, f"{session_id}.mp4") - return full_path - - -def generate_waveform_video(output_path: Path, audio_path: str) -> Path: - """Generate waveform video from audio file using SeeWav.""" - with tempfile.TemporaryDirectory() as temp_dir: - visualize( - audio=Path(audio_path), - tmp=Path(temp_dir), - out=output_path, - bars=60, - speed=4, - time=0.4, - rate=60, - size=(120, 68), - fg_color=(0.0, 1.0, 0.6), # Bright green. Try 0.2 0.2 0.2 for dark green - bg_color=(0.05, 0.05, 0.05), # Near black - ) - return output_path +from services.storage import BLOB_BASE_URI, StorageManager +from shared_utils_pkg.waveform_utils import WaveformUtils def render_waveform(session_id: str, audio_path: str, autoplay=False): """Render waveform visualization from audio file.""" - tmp_directory = Path("/tmp/audiora/waveforms") - tmp_directory.mkdir(parents=True, exist_ok=True) - tmp_vid_path = tmp_directory / f"{session_id}.mp4" + waveform_utils = WaveformUtils(session_id, audio_path) + tmp_vid_path = waveform_utils.get_tmp_video_path() video_path = None if os.path.exists(tmp_vid_path): @@ -56,8 +29,8 @@ def render_waveform(session_id: str, audio_path: str, autoplay=False): try: if not video_path: with st.spinner("Generating waveform visualization..."): - video_path = generate_waveform_video(tmp_vid_path, audio_path) - save_waveform_video_to_gcs(session_id, str(video_path)) + video_path = waveform_utils.generate_waveform_video(tmp_vid_path) + waveform_utils.save_waveform_video_to_gcs(str(video_path)) with open(video_path, "rb") as video_file: video_bytes = video_file.read() diff --git a/src/utils/session_state.py b/app/src/utils/session_state.py similarity index 96% rename from src/utils/session_state.py rename to app/src/utils/session_state.py index d19164d..4267b46 100644 --- a/src/utils/session_state.py +++ b/app/src/utils/session_state.py @@ -2,8 +2,7 @@ from typing import List, Literal, TypedDict, cast import streamlit as st - -from src.utils.chat_utils import ContentCategory +from shared_utils_pkg.chat_utils import ContentCategory MessageRole = Literal["user", "assistant", "ai", "human"] diff --git a/app/uis/__init__.py b/app/uis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/uis/audioui.py b/app/uis/audioui.py similarity index 99% rename from src/uis/audioui.py rename to app/uis/audioui.py index 1680323..254566e 100644 --- a/src/uis/audioui.py +++ b/app/uis/audioui.py @@ -1,8 +1,7 @@ import streamlit as st -from streamlit.delta_generator import DeltaGenerator - from src.utils.chat_thread import use_audiocast_request from src.utils.render_audiocast import render_audiocast +from streamlit.delta_generator import DeltaGenerator async def audioui(session_id: str, uichat: DeltaGenerator): diff --git a/src/uis/chatui.py b/app/uis/chatui.py similarity index 91% rename from src/uis/chatui.py rename to app/uis/chatui.py index 02dcfe1..072e7b0 100644 --- a/src/uis/chatui.py +++ b/app/uis/chatui.py @@ -1,13 +1,12 @@ import streamlit as st -from streamlit.delta_generator import DeltaGenerator - from src.utils.chat_thread import ( evaluate_final_response, handle_example_prompt, handle_user_prompt, ) -from src.utils.chat_utils import display_example_cards +from src.utils.display_example_cards import display_example_cards from src.utils.render_chat import render_chat_history +from streamlit.delta_generator import DeltaGenerator async def chatui(session_id: str, uichat: DeltaGenerator): @@ -36,7 +35,7 @@ async def chatui(session_id: str, uichat: DeltaGenerator): ai_message = handle_user_prompt(session_id, prompt, content_category) if isinstance(ai_message, str): - await evaluate_final_response(ai_message, content_category) + await evaluate_final_response(ai_message) # Chat input for custom prompts if prompt := uichat.chat_input("What would you like to listen to?"): diff --git a/src/env_var.py b/env_var.py similarity index 86% rename from src/env_var.py rename to env_var.py index c7f4a5b..42f7ce6 100644 --- a/src/env_var.py +++ b/env_var.py @@ -7,10 +7,12 @@ else: load_dotenv() +BUCKET_NAME = environ["BUCKET_NAME"] + OPENAI_API_KEY = environ["OPENAI_API_KEY"] ANTHROPIC_API_KEY = environ["ANTHROPIC_API_KEY"] GEMINI_API_KEY = environ["GEMINI_API_KEY"] ELEVENLABS_API_KEY = environ["ELEVENLABS_API_KEY"] -BUCKET_NAME = environ.get("BUCKET_NAME") APP_URL = environ.get("APP_URL", "http://localhost:8501") +API_URL = environ["API_URL"] diff --git a/pyproject.toml b/pyproject.toml index b72f9af..2fa7cfb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,4 @@ [project] -# Support Python 3.10+. requires-python = ">=3.10" [tool.ruff] diff --git a/requirements.txt b/requirements.txt index 4296b70..8491f24 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,10 @@ +fastapi pydantic +fastapi-utilities + +uvicorn +gunicorn +uvloop streamlit httpx @@ -23,5 +29,6 @@ google-cloud-storage google-api-python-client google-generativeai +ruff watchdog -ruff \ No newline at end of file +setuptools diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000..ceb7a4e --- /dev/null +++ b/services/__init__.py @@ -0,0 +1,3 @@ +from services.admin_sdk import init_admin_sdk + +init_admin_sdk() diff --git a/services/admin_sdk.py b/services/admin_sdk.py new file mode 100644 index 0000000..38cb90e --- /dev/null +++ b/services/admin_sdk.py @@ -0,0 +1,10 @@ +import firebase_admin + + +def init_admin_sdk(): + try: + app = firebase_admin.get_app() + print(f"Firebase Admin SDK already initialized ~> {app.project_id}") + except ValueError: + app = firebase_admin.initialize_app() + print(f"Firebase Admin SDK initialized ~> {app.project_id}") diff --git a/src/services/anthropic_client.py b/services/anthropic_client.py similarity index 86% rename from src/services/anthropic_client.py rename to services/anthropic_client.py index 3622daf..d191d8d 100644 --- a/src/services/anthropic_client.py +++ b/services/anthropic_client.py @@ -1,6 +1,6 @@ from anthropic import Anthropic, AsyncAnthropic -from src.env_var import ANTHROPIC_API_KEY +from env_var import ANTHROPIC_API_KEY def get_anthropic(): diff --git a/src/services/elevenlabs_client.py b/services/elevenlabs_client.py similarity index 77% rename from src/services/elevenlabs_client.py rename to services/elevenlabs_client.py index 0aa9559..d3f8468 100644 --- a/src/services/elevenlabs_client.py +++ b/services/elevenlabs_client.py @@ -1,6 +1,6 @@ from elevenlabs.client import ElevenLabs -from src.env_var import ELEVENLABS_API_KEY +from env_var import ELEVENLABS_API_KEY client = ElevenLabs( api_key=ELEVENLABS_API_KEY, diff --git a/src/services/firestore_sdk.py b/services/firestore_sdk.py similarity index 76% rename from src/services/firestore_sdk.py rename to services/firestore_sdk.py index 4ca0e7b..cef9ee2 100644 --- a/src/services/firestore_sdk.py +++ b/services/firestore_sdk.py @@ -1,4 +1,3 @@ -import logging from typing import Dict, Literal from firebase_admin.firestore import client, firestore @@ -19,11 +18,8 @@ class DBManager: - def __init__(self, scope: str): - self.logger = logging.getLogger(scope) - @property - def timestamp(self): + def _timestamp(self): return server_timestamp def _get_collection(self, collection: Collection): @@ -31,22 +27,18 @@ def _get_collection(self, collection: Collection): def _create_document(self, collection: Collection, data: Dict): return self._get_collection(collection).add( - {**data, "created_at": self.timestamp, "updated_at": self.timestamp} + {**data, "created_at": self._timestamp, "updated_at": self._timestamp} ) def _set_document(self, collection: Collection, doc_id: str, data: Dict): return ( self._get_collection(collection) .document(doc_id) - .set({**data, "created_at": self.timestamp, "updated_at": self.timestamp}) + .set({**data, "created_at": self._timestamp, "updated_at": self._timestamp}) ) def _update_document(self, collection: Collection, doc_id: str, data: Dict): - return ( - self._get_collection(collection) - .document(doc_id) - .update({**data, "updated_at": self.timestamp}) - ) + return self._get_collection(collection).document(doc_id).update({**data, "updated_at": self._timestamp}) def _delete_document(self, collection: Collection, doc_id: str): return self._get_collection(collection).document(doc_id).delete() diff --git a/src/services/gemini_client.py b/services/gemini_client.py similarity index 96% rename from src/services/gemini_client.py rename to services/gemini_client.py index b0038a0..f90ecd7 100644 --- a/src/services/gemini_client.py +++ b/services/gemini_client.py @@ -3,7 +3,7 @@ import google.generativeai as genai -from src.env_var import GEMINI_API_KEY +from env_var import GEMINI_API_KEY def get_gemini(): diff --git a/src/services/openai_client.py b/services/openai_client.py similarity index 69% rename from src/services/openai_client.py rename to services/openai_client.py index 79df20d..fc545f6 100644 --- a/src/services/openai_client.py +++ b/services/openai_client.py @@ -1,6 +1,6 @@ from openai import Client -from src.env_var import OPENAI_API_KEY +from env_var import OPENAI_API_KEY def get_openai(): diff --git a/services/setup.py b/services/setup.py new file mode 100644 index 0000000..e4d8ae6 --- /dev/null +++ b/services/setup.py @@ -0,0 +1,10 @@ +from setuptools import find_packages, setup + +setup( + name="services", + version="1.0.0", + packages=find_packages(), + install_requires=[ + # List any dependencies here + ], +) diff --git a/src/services/storage.py b/services/storage.py similarity index 98% rename from src/services/storage.py rename to services/storage.py index 8710062..c2411c0 100644 --- a/src/services/storage.py +++ b/services/storage.py @@ -8,7 +8,7 @@ from google.cloud import storage from pydub import AudioSegment -from src.env_var import BUCKET_NAME +from env_var import BUCKET_NAME storage_client = storage.Client() bucket = storage_client.bucket(BUCKET_NAME) diff --git a/src/utils/__init__.py b/shared_utils_pkg/__init__.py similarity index 100% rename from src/utils/__init__.py rename to shared_utils_pkg/__init__.py diff --git a/shared_utils_pkg/audiocast_utils.py b/shared_utils_pkg/audiocast_utils.py new file mode 100644 index 0000000..77a4f52 --- /dev/null +++ b/shared_utils_pkg/audiocast_utils.py @@ -0,0 +1,25 @@ +from typing import TypedDict + +from pydantic import BaseModel + +from shared_utils_pkg.chat_utils import ContentCategory + + +class GenerateAudioCastRequest(BaseModel): + sessionId: str + summary: str + category: ContentCategory + + +class GenerateAudioCastResponse(BaseModel): + url: str + script: str + source_content: str + created_at: str | None + + +class GenerateAudiocastDict(TypedDict): + url: str + script: str + source_content: str + created_at: str | None diff --git a/src/utils/chat_utils.py b/shared_utils_pkg/chat_utils.py similarity index 63% rename from src/utils/chat_utils.py rename to shared_utils_pkg/chat_utils.py index e627b98..c94d9f3 100644 --- a/src/utils/chat_utils.py +++ b/shared_utils_pkg/chat_utils.py @@ -1,7 +1,6 @@ import uuid from typing import Dict, List, Literal -import streamlit as st from pydantic import BaseModel, Field ContentCategory = Literal[ @@ -58,43 +57,3 @@ class SessionChatMessage(BaseModel): class SessionChatRequest(BaseModel): content_category: ContentCategory message: SessionChatMessage - - -def display_example_cards(): - """Display example content cards if there are no messages""" - st.markdown("##### You can start with one of the following") - - # CSS for fixed-height buttons and responsive columns - st.markdown( - """ - - """, - unsafe_allow_html=True, - ) - - # Display example content cards - col1, col2 = st.columns(2) - for content_category, example in content_examples.items(): - with col1 if content_category in [ - "podcast", - "soundbite", - "sermon", - "audiodrama", - ] else col2: - if st.button(example, use_container_width=True): - st.session_state.messages.append({"role": "user", "content": example}) - st.session_state.example_prompt = example - st.session_state.content_category = content_category - - st.rerun() diff --git a/src/utils/decorators.py b/shared_utils_pkg/decorators.py similarity index 100% rename from src/utils/decorators.py rename to shared_utils_pkg/decorators.py diff --git a/src/utils/session_manager.py b/shared_utils_pkg/session_manager.py similarity index 73% rename from src/utils/session_manager.py rename to shared_utils_pkg/session_manager.py index 3d9fc94..4fc7251 100644 --- a/src/utils/session_manager.py +++ b/shared_utils_pkg/session_manager.py @@ -1,20 +1,22 @@ from dataclasses import dataclass -from typing import Dict, List, Optional, cast +from typing import Callable, Dict, List, Optional, cast -from src.services.firestore_sdk import ( +from services.firestore_sdk import ( Collection, DBManager, arrayRemove, arrayUnion, collections, ) -from src.utils.chat_utils import SessionChatMessage +from shared_utils_pkg.chat_utils import SessionChatMessage @dataclass class ChatMetadata: source: str transcript: str + info: Optional[str] = None + title: Optional[str] = None @dataclass @@ -29,7 +31,7 @@ class SessionManager(DBManager): collection: Collection = collections["audiora_sessions"] def __init__(self, session_id: str): - super().__init__(scope="ChatManager") + super().__init__() self.doc_id = session_id session_doc = self._get_document(self.collection, self.doc_id) @@ -42,6 +44,7 @@ def _update(self, data: Dict): return self._update_document(self.collection, self.doc_id, data) def data(self) -> SessionModel | None: + """Get session data""" doc = self._get_document(self.collection, self.doc_id) data = doc.to_dict() @@ -66,10 +69,14 @@ def _update_source(self, source: str): def _update_transcript(self, transcript: str): return self._update({"metadata.transcript": transcript}) + def _update_info(self, info: str): + return self._update({"metadata.info": info}) + + def _update_title(self, title: str): + return self._update({"metadata.title": title}) + def _add_chat(self, chat: SessionChatMessage): - return self._update_document( - self.collection, self.doc_id, {"chats": arrayUnion([chat.__dict__])} - ) + return self._update_document(self.collection, self.doc_id, {"chats": arrayUnion([chat.__dict__])}) def _delete_chat(self, chat_id: str): doc = self._get_document(self.collection, self.doc_id) @@ -110,3 +117,16 @@ def _get_chats(self) -> List[SessionChatMessage]: ) for chat in chats ] + + def subscribe_to_metadata_info(self, callback: Callable): + """Subscribe to metadata.info""" + doc_ref = self._get_collection(self.collection).document(self.doc_id) + + def on_snapshot(doc_snapshot, _changes, _read_time): + for doc in doc_snapshot: + if doc.exists and doc.id == self.doc_id: + data = doc.to_dict() + info = (data.get("metadata", {}) or {}).get("info") + callback(info) + + return doc_ref.on_snapshot(on_snapshot) diff --git a/shared_utils_pkg/setup.py b/shared_utils_pkg/setup.py new file mode 100644 index 0000000..8d7a076 --- /dev/null +++ b/shared_utils_pkg/setup.py @@ -0,0 +1,10 @@ +from setuptools import find_packages, setup + +setup( + name="shared_utils_pkg", + version="1.0.0", + packages=find_packages(), + install_requires=[ + # List any dependencies here + ], +) diff --git a/shared_utils_pkg/waveform_utils.py b/shared_utils_pkg/waveform_utils.py new file mode 100644 index 0000000..df8c6ed --- /dev/null +++ b/shared_utils_pkg/waveform_utils.py @@ -0,0 +1,57 @@ +import tempfile +from pathlib import Path + +from seewav import visualize + +from services.storage import StorageManager + + +class WaveformUtils: + def __init__(self, session_id: str, audio_path: str): + self.session_id = session_id + self.audio_path = audio_path + + def run_all(self): + """ + 1. Generate a waveform video from the audio file + 2. Upload it to Google Cloud Storage. + """ + tmp_path = self.get_tmp_video_path() + self.generate_waveform_video(tmp_path) + self.save_waveform_video_to_gcs(str(tmp_path)) + + def get_tmp_video_path(self): + """ + Get temporary video path for waveform visualization. + """ + tmp_directory = Path("/tmp/audiora/waveforms") + tmp_directory.mkdir(parents=True, exist_ok=True) + tmp_vid_path = tmp_directory / f"{self.session_id}.mp4" + + return tmp_vid_path + + def save_waveform_video_to_gcs(self, video_path: str): + """Ingest waveform visualization to GCS.""" + full_path = StorageManager().upload_video_to_gcs(video_path, f"{self.session_id}.mp4") + return full_path + + def generate_waveform_video(self, output_path: Path) -> Path: + """Generate waveform video from audio file using SeeWav.""" + with tempfile.TemporaryDirectory() as temp_dir: + visualize( + audio=Path(self.audio_path), + tmp=Path(temp_dir), + out=output_path, + bars=60, + speed=4, + time=0.4, + # rate=60, + size=(120, 68), + fg_color=( + 0.0, + 1.0, + 0.6, + ), # Bright green. Try 0.2 0.2 0.2 for dark green + bg_color=(0.05, 0.05, 0.05), # Near black + ) + return output_path diff --git a/src/services/admin_sdk.py b/src/services/admin_sdk.py deleted file mode 100644 index 26c6c6b..0000000 --- a/src/services/admin_sdk.py +++ /dev/null @@ -1,10 +0,0 @@ -import firebase_admin - - -def init_admin_sdk(): - try: - app = firebase_admin.get_app() - print(f"Firebase Admin SDK already initialized ~> {app.name}") - except ValueError: - firebase_admin.initialize_app() - print("Firebase Admin SDK initialized") diff --git a/src/utils/main_utils.py b/src/utils/main_utils.py deleted file mode 100644 index 8268538..0000000 --- a/src/utils/main_utils.py +++ /dev/null @@ -1,140 +0,0 @@ -from datetime import datetime - -import streamlit as st -from pydantic import BaseModel - -from src.services.storage import StorageManager -from src.utils.audio_manager import AudioManager, AudioManagerConfig -from src.utils.audiocast_request import AudioScriptMaker, generate_source_content -from src.utils.chat_request import chat_request -from src.utils.chat_utils import ( - SessionChatMessage, - SessionChatRequest, - content_categories, -) -from src.utils.session_manager import SessionManager - - -class GenerateAudioCastRequest(BaseModel): - sessionId: str - summary: str - category: str - - -class GenerateAudioCastResponse(BaseModel): - url: str - script: str - source_content: str - created_at: str | None - - -def chat(session_id: str, request: SessionChatRequest): - content_category = request.content_category - db = SessionManager(session_id) - db._add_chat(request.message) - - def on_finish(text: str): - db._add_chat(SessionChatMessage(role="assistant", content=text)) - - return chat_request( - content_category=content_category, - previous_messages=db._get_chats(), - on_finish=on_finish, - ) - - -async def generate_audiocast(request: GenerateAudioCastRequest): - """ - Generate an audiocast based on a summary of user's request - """ - session_id = request.sessionId - summary = request.summary - category = request.category - - if category not in content_categories: - raise Exception("Invalid content category") - - container = st.empty() - - # TODO: We can keep the process for generating source content and audio content separate - # STEP 1: Generate source content - with container.container(): - container.info("Generating source content...") - - source_content = generate_source_content(category, summary) - print(f"audiocast source content: {source_content}") - if not source_content: - raise Exception("Failed to develop audiocast source content") - - # STEP 2: Generate audio script - with container.container(): - container.info("Generating audio script...") - - audio_script_maker = AudioScriptMaker(category, source_content) - audio_script = audio_script_maker.create(provider="anthropic") - print(f"streamlined audio_script: {audio_script}") - if not audio_script: - raise Exception("Error while generating audio script") - - # STEP 3: Generate audio from the audio script - with container.container(): - container.info("Generating audio...") - output_file = await AudioManager( - custom_config=AudioManagerConfig(tts_provider="elevenlabs") - ).generate_speech(audio_script) - - print(f"output_file: {output_file}") - - # TODO: Use a background service - # STEP 4: Ingest audio file to a storage service (e.g., GCS, S3) - with container.container(): - try: - container.info("Storing a copy of your audiocast...") - storage_manager = StorageManager() - storage_manager.upload_audio_to_gcs(output_file, session_id) - except Exception as e: - print(f"Error while storing audiocast: {str(e)}") - - db = SessionManager(session_id) - db._update_source(source_content) - db._update_transcript(audio_script) - - response = GenerateAudioCastResponse( - url=output_file, - script=audio_script, - source_content=source_content, - created_at=datetime.now().strftime("%Y-%m-%d %H:%M"), - ) - - return response.model_dump() - - -def get_audiocast(session_id: str): - """ - Get the URI for the audiocast - """ - storage_manager = StorageManager() - filepath = storage_manager.download_from_gcs(session_id) - - session_data = SessionManager(session_id).data() - if not session_data: - raise Exception(f"Audiocast not found for session_id: {session_id}") - - metadata = session_data.metadata - source = metadata.source if metadata else "" - transcript = metadata.transcript if metadata else "" - - created_at: str | None = None - if session_data.created_at: - created_at = datetime.fromisoformat(session_data.created_at).strftime( - "%Y-%m-%d %H:%M" - ) - - response = GenerateAudioCastResponse( - url=filepath, - script=transcript, - source_content=source, - created_at=created_at, - ) - - return response.model_dump() diff --git a/src/utils/render_audiocast_utils.py b/src/utils/render_audiocast_utils.py deleted file mode 100644 index e2de1ac..0000000 --- a/src/utils/render_audiocast_utils.py +++ /dev/null @@ -1,52 +0,0 @@ -import re -from pathlib import Path -from typing import TypedDict - -import streamlit as st - -from src.env_var import APP_URL -from src.utils.waveform_utils import render_waveform - - -def navigate_to_home(): - main_script = str(Path(__file__).parent.parent.parent / "app.py") - st.switch_page(main_script) - - -def parse_ai_script(ai_script: str): - matches = re.findall(r"<(Speaker\d+)>(.*?)", ai_script, re.DOTALL) - return "\n\n".join([f"**{speaker}**: {content}" for speaker, content in matches]) - - -class GenerateAudiocastDict(TypedDict): - url: str - script: str - source_content: str - created_at: str | None - - -def render_audiocast_handler(session_id: str, audiocast: GenerateAudiocastDict): - # Audio player - st.audio(audiocast["url"]) - - # Voice waveform - with st.expander("Show Audio Waveform"): - try: - render_waveform(session_id, audiocast["url"], False) - except Exception as e: - st.error(f"Error rendering waveform: {str(e)}") - - # Transcript - with st.expander("Show Transcript"): - st.markdown(parse_ai_script(audiocast["script"])) - - st.markdown("---") - - # Metadata - st.sidebar.subheader("Audiocast Source") - st.sidebar.markdown(audiocast["source_content"]) - - share_url = f"{APP_URL}/audiocast?session_id={session_id}" - st.text_input("Share this audiocast:", share_url) - - return share_url