Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use elevenlabs #5

Merged
merged 17 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ jobs:
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- id: setup-python
uses: actions/setup-python@v5
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: "pip" # caching pip dependencies
Expand All @@ -67,8 +68,12 @@ jobs:
runs-on: ubuntu-latest
needs: [prepare, lint]
timeout-minutes: 10
permissions:
pull-requests: write
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.12"
Expand Down Expand Up @@ -104,9 +109,9 @@ jobs:
- run: curl -f "${{ steps.deploy.outputs.url }}"
- uses: marocchino/sticky-pull-request-comment@v2
with:
header: app
header: audiora
message: |
app: ${{ steps.deploy.outputs.url }} (${{ github.event.pull_request.head.sha }})
audiora: ${{ steps.deploy.outputs.url }} (${{ github.event.pull_request.head.sha }})

promote:
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ruff.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
python-version: ["3.11", "3.12"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
Expand Down
3 changes: 2 additions & 1 deletion pages/audiocast.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from src.env_var import APP_URL
from src.utils.main_utils import get_audiocast
from src.utils.render_audiocast import parse_ai_script


def navigate_to_home():
Expand Down Expand Up @@ -34,7 +35,7 @@ async def render_audiocast_page():

# Transcript
with st.expander("Show Transcript"):
st.write(audiocast["script"])
st.markdown(parse_ai_script(audiocast["script"]))

# Metadata
st.sidebar.subheader("Audiocast Source")
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ asyncio

openai
anthropic
elevenlabs

pyperclip
python-multipart
Expand Down
11 changes: 11 additions & 0 deletions src/services/elevenlabs_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from elevenlabs.client import ElevenLabs

from src.env_var import ELEVENLABS_API_KEY

client = ElevenLabs(
api_key=ELEVENLABS_API_KEY,
)


def get_elevenlabs_client():
return client
46 changes: 30 additions & 16 deletions src/utils/audio_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
import re
import uuid
from pathlib import Path
from typing import List, Optional
from typing import List, Optional, Tuple

from src.utils.audio_manager_utils import (
AudioManagerConfig,
AudioManagerSpeechGenerator,
ContentSplitter,
openai_voices,
)
from src.utils.audio_synthesizer import AudioSynthesizer
from src.utils.clean_tss_markup import clean_tss_markup
from src.utils.generate_speech_utils import elevenlabs_voices, openai_voices

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -42,7 +42,6 @@ async def generate_speech(self, audio_script: str):
"""
output_file = f"{self.config.outdir_base}/{str(uuid.uuid4())}.mp3"
await self.text_to_speech(audio_script, output_file)

return output_file

async def text_to_speech(self, audio_script: str, output_file: str):
Expand All @@ -56,33 +55,48 @@ async def text_to_speech(self, audio_script: str, output_file: str):
"""
tags = self._get_tags(audio_script)
audio_script = clean_tss_markup(audio_script, tags)
nway_content = self.split_content(audio_script, tags)

print(f"nway_content: {nway_content}")

if self.config.tts_provider == "openai":
return await self.__text_to_speech_openai(audio_script, output_file, tags)
audio_files = await self.__text_to_speech_openai(nway_content, tags)
elif self.config.tts_provider == "elevenlabs":
audio_files = await self.__text_to_speech_elevenlabs(nway_content, tags)
else:
raise Exception("Invalid TTS model specified")

if not audio_files:
raise Exception("No audio files were generated")

await self.__finalize(audio_files, output_file)
logger.info(f"Audio saved to {output_file}")

async def __text_to_speech_openai(
self, audio_script: str, output_file: str, tags: List[str]
):
self, nway_content: List[Tuple[str, str]], tags: List[str]
) -> List[str]:
try:
nway_content = self.split_content(audio_script, tags)
print(f"nway_content: {nway_content}")

jobs = self._prepare_speech_jobs(
nway_content, tags, openai_voices, self.config.temp_audio_dir
)

audio_files = await self._process_speech_jobs(jobs)
if not audio_files:
raise Exception("No audio files were generated")

await self.__finalize(audio_files, output_file)
logger.info(f"Audio saved to {output_file}")

return await self._process_speech_jobs(jobs, provider="openai")
except Exception as e:
raise Exception(f"Error converting text to speech with OpenAI: {str(e)}")

async def __text_to_speech_elevenlabs(
self, nway_content: List[Tuple[str, str]], tags: List[str]
) -> List[str]:
try:
jobs = self._prepare_speech_jobs(
nway_content, tags, elevenlabs_voices, self.config.temp_audio_dir
)
return await self._process_speech_jobs(jobs, provider="elevenlabs")
except Exception as e:
raise Exception(
f"Error converting text to speech with Elevenlabs: {str(e)}"
)

async def __finalize(
self, audio_files: List[str], output_file: str, enhance_audio=False
) -> None:
Expand Down
51 changes: 16 additions & 35 deletions src/utils/audio_manager_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,20 @@
from functools import partial
from itertools import cycle, islice
from pathlib import Path
from typing import Any, List, Literal, Optional, Tuple
from typing import Any, List, Optional, Tuple

from src.services.openai_client import get_openai

OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"]
openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"]


@dataclass
class SpeechJob:
content: str
voice: OpenaiVoice
output_file: str
tag: str
index: int
from src.utils.generate_speech_utils import (
ElevenLabsVoice,
GenerateSpeech,
OpenaiVoice,
SpeechJob,
TTSProvider,
)


@dataclass
class AudioManagerConfig:
tts_provider: Optional[Literal["openai"]] = "openai"
tts_provider: Optional[TTSProvider] = "openai"
temp_audio_dir: str = field(default_factory=lambda: "/tmp/audiocast")
outdir_base: str = field(default_factory=lambda: "/tmp/audiocast/output")

Expand All @@ -48,7 +42,7 @@ def _prepare_speech_jobs(
self,
nway_content: List[Tuple[str, str]],
tags: List[str],
voices: List[Any],
voices: List[OpenaiVoice] | List[ElevenLabsVoice],
temp_audio_dir: str,
):
jobs: List[SpeechJob] = []
Expand All @@ -73,27 +67,14 @@ def _prepare_speech_jobs(

return jobs

def _generate_speech(self, job: SpeechJob) -> str:
try:
response = get_openai().audio.speech.create(
input=job.content,
model="tts-1-hd",
voice=job.voice,
)

with open(job.output_file, "wb") as file:
file.write(response.content)

print(f"Generated speech for tag {job.tag} at index {job.index}")
return job.output_file
except Exception as e:
print(f"Failed to generate speech for tag {job.tag}: {str(e)}")
return ""

async def _process_speech_jobs(self, jobs: List[SpeechJob]) -> List[str]:
async def _process_speech_jobs(
self, jobs: List[SpeechJob], provider: TTSProvider
) -> List[str]:
loop = asyncio.get_event_loop()
tasks = [
loop.run_in_executor(self.executor, partial(self._generate_speech, job))
loop.run_in_executor(
self.executor, partial(GenerateSpeech(provider).run, job)
)
for job in jobs
]

Expand Down
36 changes: 36 additions & 0 deletions src/utils/decorators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import asyncio
from functools import wraps
from time import time


def process_time():
"""Print process execution time for a given function"""

def decorator(func):
if asyncio.iscoroutinefunction(func):

@wraps(func)
async def async_wrapper(*args, **kwargs):
start_time = time()
response = await func(*args, **kwargs)

time_diff = f"{(time() - start_time):.2f}s"
print(f"Execution time for {func.__name__}: {time_diff}")

return response

return async_wrapper

@wraps(func)
def wrapper(*args, **kwargs):
start_time = time()
response = func(*args, **kwargs)

time_diff = f"{(time() - start_time):.2f}s"
print(f"Execution time for {func.__name__}: {time_diff}")

return response

return wrapper

return decorator
102 changes: 102 additions & 0 deletions src/utils/generate_speech_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from dataclasses import dataclass
from io import BytesIO
from typing import Dict, List, Literal

from src.services.elevenlabs_client import get_elevenlabs_client
from src.services.openai_client import get_openai
from src.utils.decorators import process_time

TTSProvider = Literal["openai", "elevenlabs"]

OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"]
openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"]

ElevenLabsVoice = Literal[
"Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam"
]
elevenlabs_voices: List[ElevenLabsVoice] = [
"Adam",
"Sarah",
"Laura",
"Charlie",
"George",
"Charlotte",
"Liam",
]

elevenlabs_voice_to_id: Dict[ElevenLabsVoice, str] = {
"Adam": "pNInz6obpgDQGcFmaJgB",
"Sarah": "EXAVITQu4vr4xnSDxMaL",
"Laura": "FGY2WhTYpPnrIDTdsKH5",
"Charlie": "IKne3meq5aSn9XLyUdCD",
"George": "JBFqnCBsd6RMkjVDRZzb",
"Charlotte": "XB0fDUnXU5powFXDhCwa",
"Liam": "TX3LPaxmHKxFdv7VOQHJ",
}


@dataclass
class SpeechJob:
content: str
voice: OpenaiVoice | ElevenLabsVoice
output_file: str
tag: str
index: int


class GenerateSpeech:
provider: TTSProvider

def __init__(self, provider: TTSProvider):
self.provider = provider

def run(self, job: SpeechJob):
"""Generate speech using the specified provider"""
try:
if self.provider == "elevenlabs":
content = self.__use_elevenlabs(job)
else:
content = self.__use_openai(job)

with open(job.output_file, "wb") as file:
file.write(content)

print(f"Generated speech for tag {job.tag} at index {job.index}")
return job.output_file
except Exception as e:
print(f"Failed to generate speech for tag: {job.tag}. Error: {str(e)}")
return ""

@process_time()
def __use_openai(self, job: SpeechJob):
if job.voice not in openai_voices:
raise ValueError("Wrong voice specification for openai tts")

response = get_openai().audio.speech.create(
input=job.content, model="tts-1-hd", voice=job.voice
)
return response.content

@process_time()
def __use_elevenlabs(self, job: SpeechJob):
if job.voice not in elevenlabs_voices:
raise ValueError("Wrong voice specification for elevenlabs tts")
# response = get_elevenlabs_client().text_to_speech.convert(
# model_id="eleven_turbo_v2_5", # use the turbo model for low latency
# text=job.content,
# voice_id=elevenlabs_voice_to_id[job.voice],
# output_format="mp3_22050_32",
# )
response = get_elevenlabs_client().generate(
model="eleven_multilingual_v2",
text=job.content,
voice=job.voice,
)

buffer = BytesIO()
for chunk in response:
if chunk:
buffer.write(chunk)

buffer.seek(0)
return buffer.getvalue()
Loading
Loading