-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add firestore_sdk ad session_manager * save user chats on firestore * pass down session_id for a deterministic workflow * handle conversion of chat object to/fro a dict * remove references to langchain * reuse a previously downloaded audiofile if it's processable * render audiocast metdata on share page * cleanup * temp remove audio_enchancement * sanitize audiocast transcript * add elevenlabs client * add __text_to_speech_elevenlabs; cleanup * use dry in text_to_speech * only lint on python versions 3.11 and 3.12 * add write permission to deploy job for marocchino/sticky-pull-request-comment * use eleven_multilingual_v2 model for improved stability, accuracy and quality
- Loading branch information
1 parent
82db371
commit e8b0f4f
Showing
11 changed files
with
219 additions
and
64 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ asyncio | |
|
||
openai | ||
anthropic | ||
elevenlabs | ||
|
||
pyperclip | ||
python-multipart | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from elevenlabs.client import ElevenLabs | ||
|
||
from src.env_var import ELEVENLABS_API_KEY | ||
|
||
client = ElevenLabs( | ||
api_key=ELEVENLABS_API_KEY, | ||
) | ||
|
||
|
||
def get_elevenlabs_client(): | ||
return client |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import asyncio | ||
from functools import wraps | ||
from time import time | ||
|
||
|
||
def process_time(): | ||
"""Print process execution time for a given function""" | ||
|
||
def decorator(func): | ||
if asyncio.iscoroutinefunction(func): | ||
|
||
@wraps(func) | ||
async def async_wrapper(*args, **kwargs): | ||
start_time = time() | ||
response = await func(*args, **kwargs) | ||
|
||
time_diff = f"{(time() - start_time):.2f}s" | ||
print(f"Execution time for {func.__name__}: {time_diff}") | ||
|
||
return response | ||
|
||
return async_wrapper | ||
|
||
@wraps(func) | ||
def wrapper(*args, **kwargs): | ||
start_time = time() | ||
response = func(*args, **kwargs) | ||
|
||
time_diff = f"{(time() - start_time):.2f}s" | ||
print(f"Execution time for {func.__name__}: {time_diff}") | ||
|
||
return response | ||
|
||
return wrapper | ||
|
||
return decorator |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from dataclasses import dataclass | ||
from io import BytesIO | ||
from typing import Dict, List, Literal | ||
|
||
from src.services.elevenlabs_client import get_elevenlabs_client | ||
from src.services.openai_client import get_openai | ||
from src.utils.decorators import process_time | ||
|
||
TTSProvider = Literal["openai", "elevenlabs"] | ||
|
||
OpenaiVoice = Literal["onyx", "shimmer", "echo", "nova", "alloy"] | ||
openai_voices: List[OpenaiVoice] = ["onyx", "shimmer", "echo", "nova", "alloy"] | ||
|
||
ElevenLabsVoice = Literal[ | ||
"Adam", "Sarah", "Laura", "Charlie", "George", "Charlotte", "Liam" | ||
] | ||
elevenlabs_voices: List[ElevenLabsVoice] = [ | ||
"Adam", | ||
"Sarah", | ||
"Laura", | ||
"Charlie", | ||
"George", | ||
"Charlotte", | ||
"Liam", | ||
] | ||
|
||
elevenlabs_voice_to_id: Dict[ElevenLabsVoice, str] = { | ||
"Adam": "pNInz6obpgDQGcFmaJgB", | ||
"Sarah": "EXAVITQu4vr4xnSDxMaL", | ||
"Laura": "FGY2WhTYpPnrIDTdsKH5", | ||
"Charlie": "IKne3meq5aSn9XLyUdCD", | ||
"George": "JBFqnCBsd6RMkjVDRZzb", | ||
"Charlotte": "XB0fDUnXU5powFXDhCwa", | ||
"Liam": "TX3LPaxmHKxFdv7VOQHJ", | ||
} | ||
|
||
|
||
@dataclass | ||
class SpeechJob: | ||
content: str | ||
voice: OpenaiVoice | ElevenLabsVoice | ||
output_file: str | ||
tag: str | ||
index: int | ||
|
||
|
||
class GenerateSpeech: | ||
provider: TTSProvider | ||
|
||
def __init__(self, provider: TTSProvider): | ||
self.provider = provider | ||
|
||
def run(self, job: SpeechJob): | ||
"""Generate speech using the specified provider""" | ||
try: | ||
if self.provider == "elevenlabs": | ||
content = self.__use_elevenlabs(job) | ||
else: | ||
content = self.__use_openai(job) | ||
|
||
with open(job.output_file, "wb") as file: | ||
file.write(content) | ||
|
||
print(f"Generated speech for tag {job.tag} at index {job.index}") | ||
return job.output_file | ||
except Exception as e: | ||
print(f"Failed to generate speech for tag: {job.tag}. Error: {str(e)}") | ||
return "" | ||
|
||
@process_time() | ||
def __use_openai(self, job: SpeechJob): | ||
if job.voice not in openai_voices: | ||
raise ValueError("Wrong voice specification for openai tts") | ||
|
||
response = get_openai().audio.speech.create( | ||
input=job.content, model="tts-1-hd", voice=job.voice | ||
) | ||
return response.content | ||
|
||
@process_time() | ||
def __use_elevenlabs(self, job: SpeechJob): | ||
if job.voice not in elevenlabs_voices: | ||
raise ValueError("Wrong voice specification for elevenlabs tts") | ||
# response = get_elevenlabs_client().text_to_speech.convert( | ||
# model_id="eleven_turbo_v2_5", # use the turbo model for low latency | ||
# text=job.content, | ||
# voice_id=elevenlabs_voice_to_id[job.voice], | ||
# output_format="mp3_22050_32", | ||
# ) | ||
response = get_elevenlabs_client().generate( | ||
model="eleven_multilingual_v2", | ||
text=job.content, | ||
voice=job.voice, | ||
) | ||
|
||
buffer = BytesIO() | ||
for chunk in response: | ||
if chunk: | ||
buffer.write(chunk) | ||
|
||
buffer.seek(0) | ||
return buffer.getvalue() |
Oops, something went wrong.