Skip to content

Commit

Permalink
SDK Regeneration
Browse files Browse the repository at this point in the history
  • Loading branch information
fern-bot committed Mar 12, 2024
1 parent a3eaeee commit 81925c1
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 156 deletions.
2 changes: 1 addition & 1 deletion .fernignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

src/elevenlabs/client.py
src/elevenlabs/play.py
src/elevenlabs/tts.py
src/elevenlabs/realtime_tts.py

.github/workflows/ci.yml

Expand Down
68 changes: 35 additions & 33 deletions src/elevenlabs/client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import typing
import json
import re
import os
import httpx

from typing import Iterator, Optional, Union, \
Optional, AsyncIterator
Expand All @@ -10,6 +12,8 @@
from .core import RequestOptions, ApiError
from .types import Voice, VoiceSettings, \
PronunciationDictionaryVersionLocator, Model
from .environment import ElevenLabsEnvironment
from .realitme_tts import RealtimeTextToSpeechClient


DEFAULT_VOICE = Voice(
Expand Down Expand Up @@ -58,6 +62,23 @@ class ElevenLabs(BaseElevenLabs):
api_key="YOUR_API_KEY",
)
"""
def __init__(
self,
*,
base_url: typing.Optional[str] = None,
environment: ElevenLabsEnvironment = ElevenLabsEnvironment.PRODUCTION,
api_key: typing.Optional[str] = os.getenv("ELEVEN_API_KEY"),
timeout: typing.Optional[float] = 60,
httpx_client: typing.Optional[httpx.Client] = None
):
super().__init__(
base_url=base_url,
environment=environment,
api_key=api_key,
timeout=timeout,
httpx_client=httpx_client
)
self.text_to_speech = RealtimeTextToSpeechClient(client_wrapper=self._client_wrapper)

def clone(
self,
Expand Down Expand Up @@ -187,16 +208,12 @@ def generate(
model_id=model_id
)
elif isinstance(text, Iterator):
# TODO(fern): Update to WebSocket
return self.text_to_speech.convert_as_stream(
return self.text_to_speech.convert_realtime( # type: ignore
voice_id=voice_id,
model_id=model_id,
voice_settings=voice_settings,
optimize_streaming_latency=optimize_streaming_latency,
output_format=output_format,
text=text,
request_options=request_options,
pronunciation_dictionary_locators=pronunciation_dictionary_locators
model_id=model_id
)
else:
raise ApiError(body="Text is neither a string nor an iterator.")
Expand Down Expand Up @@ -280,7 +297,7 @@ async def clone(
async def generate(
self,
*,
text: Union[str, Iterator[str]],
text: str,
voice: Union[VoiceId, VoiceName, Voice] = DEFAULT_VOICE,
voice_settings: typing.Optional[VoiceSettings] = DEFAULT_VOICE.settings,
model: Union[ModelId, Model] = "eleven_monolingual_v1",
Expand All @@ -300,7 +317,7 @@ async def generate(
calls to the `text_to_speech.convert` and`text_to_speech.convert_as_stream`
functions.
- text: Union[str, Iterator[str]]. The string or stream of strings that will get converted into speech.
- text: str. The string that will get converted into speech. The Async client does not support streaming.
- voice: str. A voice id, name, or voice response. Defaults to the Rachel voice.
Expand Down Expand Up @@ -363,31 +380,16 @@ async def generate(
model_id = model.model_id

if stream:
if isinstance(text, str):
return self.text_to_speech.convert_as_stream(
voice_id=voice_id,
model_id=model_id,
voice_settings=voice_settings,
optimize_streaming_latency=optimize_streaming_latency,
output_format=output_format,
text=text,
request_options=request_options,
pronunciation_dictionary_locators=pronunciation_dictionary_locators
)
elif isinstance(text, Iterator):
# TODO(fern): Update to WebSocket
return self.text_to_speech.convert_as_stream(
voice_id=voice_id,
model_id=model_id,
voice_settings=voice_settings,
optimize_streaming_latency=optimize_streaming_latency,
output_format=output_format,
text=text,
request_options=request_options,
pronunciation_dictionary_locators=pronunciation_dictionary_locators
)
else:
raise ApiError(body="Text is neither a string nor an iterator.")
return self.text_to_speech.convert_as_stream(
voice_id=voice_id,
model_id=model_id,
voice_settings=voice_settings,
optimize_streaming_latency=optimize_streaming_latency,
output_format=output_format,
text=text,
request_options=request_options,
pronunciation_dictionary_locators=pronunciation_dictionary_locators
)
else:
if not isinstance(text, str):
raise ApiError(body="Text must be a string when stream is False.")
Expand Down
132 changes: 132 additions & 0 deletions src/elevenlabs/realitme_tts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# This file was auto-generated by Fern from our API Definition.

import typing
import urllib.parse
import json
import base64
import websockets

from websockets.sync.client import connect

from .core.api_error import ApiError
from .core.jsonable_encoder import jsonable_encoder
from .core.remove_none_from_dict import remove_none_from_dict
from .core.request_options import RequestOptions
from .types.voice_settings import VoiceSettings
from .text_to_speech.client import TextToSpeechClient

# this is used as the default value for optional parameters
OMIT = typing.cast(typing.Any, ...)


def text_chunker(chunks: typing.Iterator[str]) -> typing.Iterator[str]:
"""Used during input streaming to chunk text blocks and set last char to space"""
splitters = (".", ",", "?", "!", ";", ":", "—", "-", "(", ")", "[", "]", "}", " ")
buffer = ""
for text in chunks:
if buffer.endswith(splitters):
yield buffer if buffer.endswith(" ") else buffer + " "
buffer = text
elif text.startswith(splitters):
output = buffer + text[0]
yield output if output.endswith(" ") else output + " "
buffer = text[1:]
else:
buffer += text
if buffer != "":
yield buffer + " "


class RealtimeTextToSpeechClient(TextToSpeechClient):

def convert_realtime(
self,
voice_id: str,
*,
text: typing.Iterator[str],
model_id: typing.Optional[str] = OMIT,
voice_settings: typing.Optional[VoiceSettings] = OMIT,
request_options: typing.Optional[RequestOptions] = None,
) -> typing.Iterator[bytes]:
"""
Converts text into speech using a voice of your choice and returns audio.
Parameters:
- voice_id: str. Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
- text: typing.Iterator[str]. The text that will get converted into speech.
- model_id: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.
- voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request.
- request_options: typing.Optional[RequestOptions]. Request-specific configuration.
---
from elevenlabs import PronunciationDictionaryVersionLocator, VoiceSettings
from elevenlabs.client import ElevenLabs
def get_text() -> typing.Iterator[str]:
yield "Hello, how are you?"
yield "I am fine, thank you."
client = ElevenLabs(
api_key="YOUR_API_KEY",
)
client.text_to_speech.convert_realtime(
voice_id="string",
text=get_text(),
model_id="string",
voice_settings=VoiceSettings(
stability=1.1,
similarity_boost=1.1,
style=1.1,
use_speaker_boost=True,
),
)
"""
with connect(
urllib.parse.urljoin(
"wss://api.elevenlabs.io/", f"v1/text-to-speech/{jsonable_encoder(voice_id)}/stream-input?model_id={model_id}"
),
additional_headers=jsonable_encoder(
remove_none_from_dict(
{
**self._client_wrapper.get_headers(),
**(request_options.get("additional_headers", {}) if request_options is not None else {}),
}
)
)
) as socket:
socket.send(json.dumps(
dict(
text=" ",
try_trigger_generation=True,
voice_settings=voice_settings.dict() if voice_settings else None,
generation_config=dict(
chunk_length_schedule=[50],
),
)
))

for text_chunk in text_chunker(text):
data = dict(text=text_chunk, try_trigger_generation=True)
socket.send(json.dumps(data))
try:
data = json.loads(socket.recv(1e-4))
if "audio" in data and data["audio"]:
yield base64.b64decode(data["audio"]) # type: ignore
except TimeoutError:
pass

socket.send(json.dumps(dict(text="")))

while True:
try:
data = json.loads(socket.recv())
if "audio" in data and data["audio"]:
yield base64.b64decode(data["audio"]) # type: ignore
except websockets.exceptions.ConnectionClosed:
if "message" in data:
raise ApiError(body=data)
break

122 changes: 0 additions & 122 deletions src/elevenlabs/tts.py

This file was deleted.

0 comments on commit 81925c1

Please sign in to comment.