-
Notifications
You must be signed in to change notification settings - Fork 260
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
168 additions
and
156 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
# This file was auto-generated by Fern from our API Definition. | ||
|
||
import typing | ||
import urllib.parse | ||
import json | ||
import base64 | ||
import websockets | ||
|
||
from websockets.sync.client import connect | ||
|
||
from .core.api_error import ApiError | ||
from .core.jsonable_encoder import jsonable_encoder | ||
from .core.remove_none_from_dict import remove_none_from_dict | ||
from .core.request_options import RequestOptions | ||
from .types.voice_settings import VoiceSettings | ||
from .text_to_speech.client import TextToSpeechClient | ||
|
||
# this is used as the default value for optional parameters | ||
OMIT = typing.cast(typing.Any, ...) | ||
|
||
|
||
def text_chunker(chunks: typing.Iterator[str]) -> typing.Iterator[str]: | ||
"""Used during input streaming to chunk text blocks and set last char to space""" | ||
splitters = (".", ",", "?", "!", ";", ":", "—", "-", "(", ")", "[", "]", "}", " ") | ||
buffer = "" | ||
for text in chunks: | ||
if buffer.endswith(splitters): | ||
yield buffer if buffer.endswith(" ") else buffer + " " | ||
buffer = text | ||
elif text.startswith(splitters): | ||
output = buffer + text[0] | ||
yield output if output.endswith(" ") else output + " " | ||
buffer = text[1:] | ||
else: | ||
buffer += text | ||
if buffer != "": | ||
yield buffer + " " | ||
|
||
|
||
class RealtimeTextToSpeechClient(TextToSpeechClient): | ||
|
||
def convert_realtime( | ||
self, | ||
voice_id: str, | ||
*, | ||
text: typing.Iterator[str], | ||
model_id: typing.Optional[str] = OMIT, | ||
voice_settings: typing.Optional[VoiceSettings] = OMIT, | ||
request_options: typing.Optional[RequestOptions] = None, | ||
) -> typing.Iterator[bytes]: | ||
""" | ||
Converts text into speech using a voice of your choice and returns audio. | ||
Parameters: | ||
- voice_id: str. Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices. | ||
- text: typing.Iterator[str]. The text that will get converted into speech. | ||
- model_id: typing.Optional[str]. Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property. | ||
- voice_settings: typing.Optional[VoiceSettings]. Voice settings overriding stored setttings for the given voice. They are applied only on the given request. | ||
- request_options: typing.Optional[RequestOptions]. Request-specific configuration. | ||
--- | ||
from elevenlabs import PronunciationDictionaryVersionLocator, VoiceSettings | ||
from elevenlabs.client import ElevenLabs | ||
def get_text() -> typing.Iterator[str]: | ||
yield "Hello, how are you?" | ||
yield "I am fine, thank you." | ||
client = ElevenLabs( | ||
api_key="YOUR_API_KEY", | ||
) | ||
client.text_to_speech.convert_realtime( | ||
voice_id="string", | ||
text=get_text(), | ||
model_id="string", | ||
voice_settings=VoiceSettings( | ||
stability=1.1, | ||
similarity_boost=1.1, | ||
style=1.1, | ||
use_speaker_boost=True, | ||
), | ||
) | ||
""" | ||
with connect( | ||
urllib.parse.urljoin( | ||
"wss://api.elevenlabs.io/", f"v1/text-to-speech/{jsonable_encoder(voice_id)}/stream-input?model_id={model_id}" | ||
), | ||
additional_headers=jsonable_encoder( | ||
remove_none_from_dict( | ||
{ | ||
**self._client_wrapper.get_headers(), | ||
**(request_options.get("additional_headers", {}) if request_options is not None else {}), | ||
} | ||
) | ||
) | ||
) as socket: | ||
socket.send(json.dumps( | ||
dict( | ||
text=" ", | ||
try_trigger_generation=True, | ||
voice_settings=voice_settings.dict() if voice_settings else None, | ||
generation_config=dict( | ||
chunk_length_schedule=[50], | ||
), | ||
) | ||
)) | ||
|
||
for text_chunk in text_chunker(text): | ||
data = dict(text=text_chunk, try_trigger_generation=True) | ||
socket.send(json.dumps(data)) | ||
try: | ||
data = json.loads(socket.recv(1e-4)) | ||
if "audio" in data and data["audio"]: | ||
yield base64.b64decode(data["audio"]) # type: ignore | ||
except TimeoutError: | ||
pass | ||
|
||
socket.send(json.dumps(dict(text=""))) | ||
|
||
while True: | ||
try: | ||
data = json.loads(socket.recv()) | ||
if "audio" in data and data["audio"]: | ||
yield base64.b64decode(data["audio"]) # type: ignore | ||
except websockets.exceptions.ConnectionClosed: | ||
if "message" in data: | ||
raise ApiError(body=data) | ||
break | ||
|
This file was deleted.
Oops, something went wrong.