From 60c65be81f8cf81b6b1e714b1fe9333ffd05dd9c Mon Sep 17 00:00:00 2001
From: MarkusHammer <107761433+MarkusHammer@users.noreply.github.com>
Date: Sun, 11 Jun 2023 21:13:41 -0500
Subject: [PATCH] Housekeeping and small error fixes

I noticed that the only thing holding me back from running this with certain versions of the modules used here was some argument related issues! I did my best to track all of them down here.

I also appended *.pt files to the .gitignore to ensure not models are accidentally uploaded
---
 .gitignore                | 1 +
 encoder/audio.py          | 4 ++--
 synthesizer/audio.py      | 2 +-
 synthesizer/inference.py  | 2 +-
 synthesizer/preprocess.py | 4 ++--
 toolbox/ui.py             | 2 +-
 vocoder/audio.py          | 4 ++--
 7 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9401d2ebb..88dfb2be4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,3 +18,4 @@
 encoder/saved_models/*
 synthesizer/saved_models/*
 vocoder/saved_models/*
+*.pt
diff --git a/encoder/audio.py b/encoder/audio.py
index 799aa8354..0bc28eed4 100644
--- a/encoder/audio.py
+++ b/encoder/audio.py
@@ -56,8 +56,8 @@ def wav_to_mel_spectrogram(wav):
     Note: this not a log-mel spectrogram.
     """
     frames = librosa.feature.melspectrogram(
-        wav,
-        sampling_rate,
+        y = wav,
+        sr = sampling_rate,
         n_fft=int(sampling_rate * mel_window_length / 1000),
         hop_length=int(sampling_rate * mel_window_step / 1000),
         n_mels=mel_n_channels
diff --git a/synthesizer/audio.py b/synthesizer/audio.py
index 83dc96c63..f05a80ccb 100644
--- a/synthesizer/audio.py
+++ b/synthesizer/audio.py
@@ -167,7 +167,7 @@ def _mel_to_linear(mel_spectrogram, hparams):
 
 def _build_mel_basis(hparams):
     assert hparams.fmax <= hparams.sample_rate // 2
-    return librosa.filters.mel(hparams.sample_rate, hparams.n_fft, n_mels=hparams.num_mels,
+    return librosa.filters.mel(sr = hparams.sample_rate, n_fft = hparams.n_fft, n_mels=hparams.num_mels,
                                fmin=hparams.fmin, fmax=hparams.fmax)
 
 def _amp_to_db(x, hparams):
diff --git a/synthesizer/inference.py b/synthesizer/inference.py
index 340bb1fa1..ef245fb59 100644
--- a/synthesizer/inference.py
+++ b/synthesizer/inference.py
@@ -133,7 +133,7 @@ def load_preprocess_wav(fpath):
         Loads and preprocesses an audio file under the same conditions the audio files were used to
         train the synthesizer.
         """
-        wav = librosa.load(str(fpath), hparams.sample_rate)[0]
+        wav = librosa.load(str(fpath), sr = hparams.sample_rate)[0]
         if hparams.rescale:
             wav = wav / np.abs(wav).max() * hparams.rescaling_max
         return wav
diff --git a/synthesizer/preprocess.py b/synthesizer/preprocess.py
index 08c58c40f..9a6babc5d 100644
--- a/synthesizer/preprocess.py
+++ b/synthesizer/preprocess.py
@@ -62,7 +62,7 @@ def preprocess_speaker(speaker_dir, out_dir: Path, skip_existing: bool, hparams,
 
                 for wav_fpath in wav_fpaths:
                     # Load the audio waveform
-                    wav, _ = librosa.load(str(wav_fpath), hparams.sample_rate)
+                    wav, _ = librosa.load(str(wav_fpath), sr = hparams.sample_rate)
                     if hparams.rescale:
                         wav = wav / np.abs(wav).max() * hparams.rescaling_max
 
@@ -111,7 +111,7 @@ def preprocess_speaker(speaker_dir, out_dir: Path, skip_existing: bool, hparams,
 
 def split_on_silences(wav_fpath, words, end_times, hparams):
     # Load the audio waveform
-    wav, _ = librosa.load(str(wav_fpath), hparams.sample_rate)
+    wav, _ = librosa.load(str(wav_fpath), sr = hparams.sample_rate)
     if hparams.rescale:
         wav = wav / np.abs(wav).max() * hparams.rescaling_max
 
diff --git a/toolbox/ui.py b/toolbox/ui.py
index e33998ba9..a8813ebc4 100644
--- a/toolbox/ui.py
+++ b/toolbox/ui.py
@@ -381,7 +381,7 @@ def log(self, line, mode="newline"):
         self.app.processEvents()
 
     def set_loading(self, value, maximum=1):
-        self.loading_bar.setValue(value * 100)
+        self.loading_bar.setValue(int(value * 100))
         self.loading_bar.setMaximum(maximum * 100)
         self.loading_bar.setTextVisible(value != 0)
         self.app.processEvents()
diff --git a/vocoder/audio.py b/vocoder/audio.py
index 116396261..7be7627a1 100644
--- a/vocoder/audio.py
+++ b/vocoder/audio.py
@@ -17,7 +17,7 @@ def float_2_label(x, bits) :
 
 
 def load_wav(path) :
-    return librosa.load(str(path), sr=hp.sample_rate)[0]
+    return librosa.load(str(path), sr = hp.sample_rate)[0]
 
 
 def save_wav(x, path) :
@@ -50,7 +50,7 @@ def linear_to_mel(spectrogram):
 
 
 def build_mel_basis():
-    return librosa.filters.mel(hp.sample_rate, hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin)
+    return librosa.filters.mel(sr = hp.sample_rate, n_fft = hp.n_fft, n_mels=hp.num_mels, fmin=hp.fmin)
 
 
 def normalize(S):