-
Notifications
You must be signed in to change notification settings - Fork 1
/
soundgenerator.py
36 lines (30 loc) · 1.36 KB
/
soundgenerator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import librosa
from ethan_preprocess import MinMaxNormaliser
class SoundGenerator:
"""SoundGenerator is responsible for generating audios from
spectrograms.
"""
def __init__(self, vae, hop_length):
self.vae = vae
self.hop_length = hop_length
self._min_max_normaliser = MinMaxNormaliser(0, 1)
def generate(self, spectrograms, min_max_values):
generated_spectrograms, latent_representations = \
self.vae.reconstruct(spectrograms)
signals = self.convert_spectrograms_to_audio(generated_spectrograms, min_max_values)
return signals, latent_representations
def convert_spectrograms_to_audio(self, spectrograms, min_max_values):
signals = []
for spectrogram, min_max_value in zip(spectrograms, min_max_values):
# reshape the log spectrogram
log_spectrogram = spectrogram[:, :, 0]
# apply denormalisation
denorm_log_spec = self._min_max_normaliser.denormalise(
log_spectrogram, min_max_value["min"], min_max_value["max"])
# log spectrogram -> spectrogram
spec = librosa.db_to_amplitude(denorm_log_spec)
# apply Griffin-Lim
signal = librosa.istft(spec, hop_length=self.hop_length)
# append signal to "signals"
signals.append(signal)
return signals