From 60c65be81f8cf81b6b1e714b1fe9333ffd05dd9c Mon Sep 17 00:00:00 2001 From: MarkusHammer <107761433+MarkusHammer@users.noreply.github.com> Date: Sun, 11 Jun 2023 21:13:41 -0500 Subject: [PATCH] Housekeeping and small error fixes I noticed that the only thing holding me back from running this with certain versions of the modules used here was some argument related issues! I did my best to track all of them down here. I also appended *.pt files to the .gitignore to ensure not models are accidentally uploaded --- .gitignore | 1 + encoder/audio.py | 4 ++-- synthesizer/audio.py | 2 +- synthesizer/inference.py | 2 +- synthesizer/preprocess.py | 4 ++-- toolbox/ui.py | 2 +- vocoder/audio.py | 4 ++-- 7 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 9401d2ebb..88dfb2be4 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ encoder/saved_models/* synthesizer/saved_models/* vocoder/saved_models/* +*.pt diff --git a/encoder/audio.py b/encoder/audio.py index 799aa8354..0bc28eed4 100644 --- a/encoder/audio.py +++ b/encoder/audio.py @@ -56,8 +56,8 @@ def wav_to_mel_spectrogram(wav): Note: this not a log-mel spectrogram. """ frames = librosa.feature.melspectrogram( - wav, - sampling_rate, + y = wav, + sr = sampling_rate, n_fft=int(sampling_rate * mel_window_length / 1000), hop_length=int(sampling_rate * mel_window_step / 1000), n_mels=mel_n_channels diff --git a/synthesizer/audio.py b/synthesizer/audio.py index 83dc96c63..f05a80ccb 100644 --- a/synthesizer/audio.py +++ b/synthesizer/audio.py @@ -167,7 +167,7 @@ def _mel_to_linear(mel_spectrogram, hparams): def _build_mel_basis(hparams): assert hparams.fmax <= hparams.sample_rate // 2 - return librosa.filters.mel(hparams.sample_rate, hparams.n_fft, n_mels=hparams.num_mels, + return librosa.filters.mel(sr = hparams.sample_rate, n_fft = hparams.n_fft, n_mels=hparams.num_mels, fmin=hparams.fmin, fmax=hparams.fmax) def _amp_to_db(x, hparams): diff --git a/synthesizer/inference.py b/synthesizer/inference.py index 340bb1fa1..ef245fb59 100644 --- a/synthesizer/inference.py +++ b/synthesizer/inference.py @@ -133,7 +133,7 @@ def load_preprocess_wav(fpath): Loads and preprocesses an audio file under the same conditions the audio files were used to train the synthesizer. """ - wav = librosa.load(str(fpath), hparams.sample_rate)[0] + wav = librosa.load(str(fpath), sr = hparams.sample_rate)[0] if hparams.rescale: wav = wav / np.abs(wav).max() * hparams.rescaling_max return wav diff --git a/synthesizer/preprocess.py b/synthesizer/preprocess.py index 08c58c40f..9a6babc5d 100644 --- a/synthesizer/preprocess.py +++ b/synthesizer/preprocess.py @@ -62,7 +62,7 @@ def preprocess_speaker(speaker_dir, out_dir: Path, skip_existing: bool, hparams, for wav_fpath in wav_fpaths: # Load the audio waveform - wav, _ = librosa.load(str(wav_fpath), hparams.sample_rate) + wav, _ = librosa.load(str(wav_fpath), sr = hparams.sample_rate) if hparams.rescale: wav = wav / np.abs(wav).max() * hparams.rescaling_max @@ -111,7 +111,7 @@ def preprocess_speaker(speaker_dir, out_dir: Path, skip_existing: bool, hparams, def split_on_silences(wav_fpath, words, end_times, hparams): # Load the audio waveform - wav, _ = librosa.load(str(wav_fpath), hparams.sample_rate) + wav, _ = librosa.load(str(wav_fpath), sr = hparams.sample_rate) if hparams.rescale: wav = wav / np.abs(wav).max() * hparams.rescaling_max diff --git a/toolbox/ui.py b/toolbox/ui.py index e33998ba9..a8813ebc4 100644 --- a/toolbox/ui.py +++ b/toolbox/ui.py @@ -381,7 +381,7 @@ def log(self, line, mode="newline"): self.app.processEvents() def set_loading(self, value, maximum=1): - self.loading_bar.setValue(value * 100) + self.loading_bar.setValue(int(value * 100)) self.loading_bar.setMaximum(maximum * 100) self.loading_bar.setTextVisible(value != 0) self.app.processEvents() diff --git a/vocoder/audio.py b/vocoder/audio.py index 116396261..7be7627a1 100644 --- a/vocoder/audio.py +++ b/vocoder/audio.py @@ -17,7 +17,7 @@ def float_2_label(x, bits) : def load_wav(path) : - return librosa.load(str(path), sr=hp.sample_rate)[0] + return librosa.load(str(path), sr = hp.sample_rate)[0] def save_wav(x, path) : @@ -50,7 +50,7 @@ def linear_to_mel(spectrogram): def build_mel_basis(): - return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin) + return librosa.filters.mel(sr = hp.sample_rate, n_fft = hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin) def normalize(S):