From 2ccca1bc15a37df5866872aeb6089aeeb4967078 Mon Sep 17 00:00:00 2001 From: Chukwuma Nwaugha Date: Wed, 30 Oct 2024 21:16:33 +0000 Subject: [PATCH] create a storage service; define bucket_name env variable --- .env.example | 1 + .github/workflows/deploy.yml | 24 +-------- .gitignore | 2 + app.py | 2 +- src/env_var.py | 1 + src/services/storage.py | 100 +++++++++++++++++++++++++++++++++++ 6 files changed, 107 insertions(+), 23 deletions(-) create mode 100644 src/services/storage.py diff --git a/.env.example b/.env.example index 20de934..acfa63c 100644 --- a/.env.example +++ b/.env.example @@ -6,4 +6,5 @@ ANTHROPIC_API_KEY="your-anthropic-api-key" GEMINI_API_KEY="your-gemini-api-key" ELEVENLABS_API_KEY="your-elevenlabs-api-key" +BUCKET_NAME="your-bucket-name" APP_URL=http://localhost:8501 \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index cf48de9..317f1f2 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -26,16 +26,13 @@ env: GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} ELEVENLABS_API_KEY: ${{ secrets.ELEVENLABS_API_KEY }} + BUCKET_NAME: ${{ secrets.BUCKET_NAME }} jobs: prepare: runs-on: ubuntu-latest outputs: VERSION: ${{ github.event_name == 'pull_request' && format('pr-{0}', github.event.number) || format('main-{0}', steps.prepare_env.outputs.SHORT_SHA) }} -<<<<<<< HEAD -======= - MAIN_OR_TAGGED: ${{ fromJSON(env.MAIN) || (fromJSON(steps.prepare_env.outputs.TAGGED) && steps.prepare_env.outputs.TAG_BRANCH_NAME == 'main') }} ->>>>>>> origin/main steps: - uses: actions/checkout@v4 with: @@ -43,17 +40,7 @@ jobs: - id: prepare_env run: | echo "TAGGED=${{ startsWith(github.ref, 'refs/tags/api') }}" >> $GITHUB_OUTPUT -<<<<<<< HEAD echo "SHORT_SHA=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT -======= - - SHORT_SHA=$(git rev-parse --short HEAD) - echo "SHORT_SHA=$SHORT_SHA" >> $GITHUB_OUTPUT - - RAW=$(git branch -r --contains $SHORT_SHA) - TAG_BRANCH_NAME="${RAW##*/}" - echo "TAG_BRANCH_NAME=$TAG_BRANCH_NAME" >> $GITHUB_OUTPUT ->>>>>>> origin/main lint: runs-on: ubuntu-latest @@ -110,21 +97,14 @@ jobs: GEMINI_API_KEY=${{ secrets.GEMINI_API_KEY }} ANTHROPIC_API_KEY=${{ secrets.ANTHROPIC_API_KEY }} ELEVENLABS_API_KEY=${{ secrets.ELEVENLABS_API_KEY }} + BUCKET_NAME=${{ secrets.BUCKET_NAME }} flags: "--allow-unauthenticated --memory=32Gi --cpu=8 --execution-environment=gen2 --concurrency=80 --max-instances=10" -<<<<<<< HEAD - run: curl -f "${{ steps.deploy.outputs.url }}" - uses: marocchino/sticky-pull-request-comment@v2 with: header: app -======= - - name: health-check - run: curl -f "${{ steps.deploy.outputs.url }}" - - uses: marocchino/sticky-pull-request-comment@v2 - with: - header: api ->>>>>>> origin/main message: | app: ${{ steps.deploy.outputs.url }} (${{ github.event.pull_request.head.sha }}) diff --git a/.gitignore b/.gitignore index 104a111..8619962 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ __pycache__ !.env.example reference_code + +keys/ \ No newline at end of file diff --git a/app.py b/app.py index 2e5649d..7bfdf36 100644 --- a/app.py +++ b/app.py @@ -12,7 +12,7 @@ async def main(): st.title("🎧 Audiora") st.subheader("Listen to anything, anytime, leveraging AI") - st.sidebar.info('A VeedoAI project. (c) 2024 ') + st.sidebar.info("A VeedoAI project. (c) 2024") # Sidebar for content type selection st.sidebar.title("Audiocast Info") diff --git a/src/env_var.py b/src/env_var.py index c8eedfe..c7f4a5b 100644 --- a/src/env_var.py +++ b/src/env_var.py @@ -12,4 +12,5 @@ GEMINI_API_KEY = environ["GEMINI_API_KEY"] ELEVENLABS_API_KEY = environ["ELEVENLABS_API_KEY"] +BUCKET_NAME = environ.get("BUCKET_NAME") APP_URL = environ.get("APP_URL", "http://localhost:8501") diff --git a/src/services/storage.py b/src/services/storage.py new file mode 100644 index 0000000..eb855c9 --- /dev/null +++ b/src/services/storage.py @@ -0,0 +1,100 @@ +import re +from dataclasses import dataclass +from io import BytesIO +from typing import Any, Dict +from uuid import uuid4 + +from google.cloud import storage + +from src.env_var import BUCKET_NAME + +# Instantiates a client +storage_client = storage.Client() +bucket = storage_client.bucket(BUCKET_NAME) + + +def listBlobs(prefix): + blobs = bucket.list_blobs(prefix=prefix) + return [blob for blob in blobs] + + +def download_audio_file(root_cloud_path: str, tmp_path: str): + """download the video file from the bucket""" + blobs = listBlobs(prefix=root_cloud_path) + pattern = re.compile("audio/mpeg|audio/mp3|audio/wav|audio/ogg|audio/aac") + audio_blobs = [blob for blob in blobs if bool(pattern.match(blob.content_type))] + + if not len(audio_blobs): + return None + + audio_file = audio_blobs[0] + audio_file.download_to_filename(f"{tmp_path}") + + return tmp_path + + +def check_file_exists(root_path: str, filename: str): + """check if a file exists in the bucket""" + blobname = f"{root_path}/{filename}" + blobs = listBlobs(prefix=root_path) + return any(blob.name == blobname for blob in blobs) + + +@dataclass +class UploadItemParams: + content_type: str + cache_control: str = "public, max-age=31536000" + metadata: Dict[str, Any] | None = None + + +def upload_string_to_gcs(content: str, blobname: str, params: UploadItemParams): + """upload string content to GCS""" + blob = bucket.blob(blobname) + blob.content_type = "text/plain" + blob.cache_control = params.content_type + + if params.metadata: + blob.metadata = {**(blob.metadata or dict()), **params.metadata} + + blob.upload_from_string(content) + + return f"gs://{BUCKET_NAME}/{blob.name}" + + +def upload_file_to_gcs(tmp_path: str, blobname: str, params: UploadItemParams): + """upload file to GCS""" + blob = bucket.blob(blobname) + blob.content_type = params.content_type + blob.cache_control = params.cache_control + + if params.metadata: + blob.metadata = {**(blob.metadata or dict()), **params.metadata} + + blob.upload_from_filename(tmp_path) + + return f"gs://{BUCKET_NAME}/{blob.name}" + + +def upload_bytes_to_gcs(bytes: BytesIO, blobname: str, params: UploadItemParams): + """upload bytes to GCS""" + blob = bucket.blob(blobname) + blob.content_type = params.content_type + blob.cache_control = params.cache_control + + if params.metadata: + blob.metadata = {**(blob.metadata or dict()), **params.metadata} + + blob.upload_from_file(bytes) + + return f"gs://{BUCKET_NAME}/{blobname}" + + +def download_file_from_gcs(blobname: str): + """ + Download any item on GCS to disk + """ + blob = bucket.blob(blobname) + tmp_file_path = f"/tmp/{str(uuid4())}" + blob.download_to_filename(tmp_file_path) + + return tmp_file_path