This repository has been archived by the owner on Aug 6, 2024. It is now read-only.
forked from Rubiksman78/MonikA.I
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tts_api.py
153 lines (133 loc) · 6.96 KB
/
tts_api.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from pathlib import Path
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
class TTS:
"""TODO: Add voice conversion and Capacitron support."""
def __init__(self, model_name: str = None, progress_bar: bool = True, gpu=False):
"""🐸TTS python interface that allows to load and use the released models.
Example with a multi-speaker model:
>>> from TTS.api import TTS
>>> tts = TTS(TTS.list_models()[0])
>>> wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
>>> tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")
Example with a single-speaker model:
>>> tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)
>>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav")
Args:
model_name (str, optional): Model name to load. You can list models by ```tts.models```. Defaults to None.
progress_bar (bool, optional): Whether to pring a progress bar while downloading a model. Defaults to True.
gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
"""
self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
self.synthesizer = None
if model_name:
self.load_model_by_name(model_name, gpu)
@property
def models(self):
return self.manager.list_tts_models()
@property
def is_multi_speaker(self):
if hasattr(self.synthesizer.tts_model, "speaker_manager") and self.synthesizer.tts_model.speaker_manager:
return self.synthesizer.tts_model.speaker_manager.num_speakers > 1
return False
@property
def is_multi_lingual(self):
if hasattr(self.synthesizer.tts_model, "language_manager") and self.synthesizer.tts_model.language_manager:
return self.synthesizer.tts_model.language_manager.num_languages > 1
return False
@property
def speakers(self):
if not self.is_multi_speaker:
return None
return self.synthesizer.tts_model.speaker_manager.speaker_names
@property
def languages(self):
if not self.is_multi_lingual:
return None
return self.synthesizer.tts_model.language_manager.language_names
@staticmethod
def get_models_file_path():
return Path(__file__).parent / ".models.json"
@staticmethod
def list_models():
manager = ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False)
return manager.list_tts_models()
def download_model_by_name(self, model_name: str):
model_path, config_path, model_item = self.manager.download_model(model_name)
if model_item["default_vocoder"] is None:
return model_path, config_path, None, None
vocoder_path, vocoder_config_path, _ = self.manager.download_model(model_item["default_vocoder"])
return model_path, config_path, vocoder_path, vocoder_config_path
def load_model_by_name(self, model_name: str, gpu: bool = False):
model_path, config_path, vocoder_path, vocoder_config_path = self.download_model_by_name(model_name)
# init synthesizer
# None values are fetch from the model
self.synthesizer = Synthesizer(
tts_checkpoint=model_path,
tts_config_path=config_path,
tts_speakers_file=None,
tts_languages_file=None,
vocoder_checkpoint=vocoder_path,
vocoder_config=vocoder_config_path,
encoder_checkpoint=None,
encoder_config=None,
use_cuda=gpu,
)
def tts(self, text: str, speaker: str = None, language: str = None):
"""Convert text to speech.
Args:
text (str):
Input text to synthesize.
speaker (str, optional):
Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
`tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
language (str, optional):
Language code for multi-lingual models. You can check whether loaded model is multi-lingual
`tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
"""
wav = self.synthesizer.tts(
text=text,
speaker_name=speaker,
language_name=language,
speaker_wav=None,
reference_wav=None,
style_wav=None,
style_text=None,
reference_speaker_name=None,
)
return wav
def tts_to_file(self, text: str, speaker: str = None, language: str = None, file_path: str = "output.wav"):
"""Convert text to speech.
Args:
text (str):
Input text to synthesize.
speaker (str, optional):
Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
`tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
language (str, optional):
Language code for multi-lingual models. You can check whether loaded model is multi-lingual
`tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
file_path (str, optional):
Output file path. Defaults to "output.wav".
"""
wav = self.tts(text=text, speaker=speaker, language=language)
self.synthesizer.save_wav(wav=wav, path=file_path)
class my_TTS(TTS):
def __init__(self, *args, **kwargs):
super(my_TTS, self).__init__(*args, **kwargs)
def tts(self, text: str, speaker: str = None, language: str = None,speaker_wav: str = None, reference_wav: str = None, style_wav: str = None, style_text: str = None, reference_speaker_name: str = None):
"""Synthesize text to speech."""
wav = self.synthesizer.tts(
text=text,
speaker_name=speaker,
language_name=language,
speaker_wav=speaker_wav,
reference_wav=reference_wav,
style_wav=style_wav,
style_text=style_text,
reference_speaker_name=reference_speaker_name,
)
return wav
def tts_to_file(self, text: str, speaker: str = None, language: str = None, file_path: str = "output.wav", speaker_wav: str = None, reference_wav: str = None, style_wav: str = None, style_text: str = None, reference_speaker_name: str = None):
wav = self.tts(text=text, speaker=speaker, language=language,speaker_wav=speaker_wav, reference_wav=reference_wav, style_wav=style_wav, style_text=style_text, reference_speaker_name=reference_speaker_name)
self.synthesizer.save_wav(wav=wav, path=file_path)