From 545761a3596b66c8afdbaa2915282e59ca4bad8f Mon Sep 17 00:00:00 2001 From: jhj0517 <97279763+jhj0517@users.noreply.github.com> Date: Thu, 12 Sep 2024 20:00:46 +0900 Subject: [PATCH 1/6] Add tab --- app.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/app.py b/app.py index 69a8dc9..c204495 100644 --- a/app.py +++ b/app.py @@ -197,6 +197,7 @@ def launch(self): translation_params = self.default_params["translation"] deepl_params = translation_params["deepl"] nllb_params = translation_params["nllb"] + uvr_params = self.default_params["bgm_separation"] with self.app: with gr.Row(): @@ -341,6 +342,29 @@ def launch(self): btn_openfolder.click(fn=lambda: self.open_folder(os.path.join(self.args.output_dir, "translations")), inputs=None, outputs=None) + with gr.TabItem("BGM Separation"): + files_audio = gr.Files(type="filepath", label="Upload Audio Files to separate background music") + dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device, + choices=self.whisper_inf.music_separator.available_devices) + dd_uvr_model_size = gr.Dropdown(label="Model", value=uvr_params["model_size"], + choices=self.whisper_inf.music_separator.available_models) + nb_uvr_segment_size = gr.Number(label="Segment Size", value=uvr_params["segment_size"], precision=0) + cb_uvr_save_file = gr.Checkbox(label="Save separated files to output", + value=uvr_params["save_file"]) + btn_run = gr.Button("SEPARATE BACKGROUND MUSIC", variant="primary") + with gr.Row(): + with gr.Column(scale=8): + ad_instrumental = gr.Audio(label="Instrumental") + ad_vocals = gr.Audio(label="Vocals") + with gr.Column(scale=1): + btn_openfolder = gr.Button('📂', scale=1) + + btn_run.click(fn=self.whisper_inf.music_separator.separate, + inputs=[files_audio, dd_uvr_device, dd_uvr_model_size, nb_uvr_segment_size, cb_uvr_save_file], + outputs=[ad_instrumental, ad_vocals]) + btn_openfolder.click(inputs=None, + outputs=None, + fn=lambda: self.open_folder(os.path.join(self.args.output_dir, "uvr"))) # Launch the app with optional gradio settings args = self.args From eab33e770f663a56d99345f7c0080b039a556f74 Mon Sep 17 00:00:00 2001 From: jhj0517 <97279763+jhj0517@users.noreply.github.com> Date: Fri, 13 Sep 2024 19:29:58 +0900 Subject: [PATCH 2/6] Update default `save_file` value to False --- configs/default_parameters.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/default_parameters.yaml b/configs/default_parameters.yaml index 85fd093..34244e3 100644 --- a/configs/default_parameters.yaml +++ b/configs/default_parameters.yaml @@ -48,7 +48,7 @@ bgm_separation: is_separate_bgm: false model_size: "UVR-MDX-NET-Inst_HQ_4" segment_size: 256 - save_file: true + save_file: false translation: deepl: From 8c8001e178f589ec1a95632e2cae63030a2e8580 Mon Sep 17 00:00:00 2001 From: jhj0517 <97279763+jhj0517@users.noreply.github.com> Date: Fri, 13 Sep 2024 22:03:33 +0900 Subject: [PATCH 3/6] Add dedicated bgm separation app --- app.py | 37 ++++++++++++++--------- modules/uvr/music_separator.py | 52 +++++++++++++++++++++++++++++---- modules/whisper/whisper_base.py | 2 +- 3 files changed, 72 insertions(+), 19 deletions(-) diff --git a/app.py b/app.py index 54a2812..1b6bc4b 100644 --- a/app.py +++ b/app.py @@ -343,6 +343,7 @@ def launch(self): btn_openfolder.click(fn=lambda: self.open_folder(os.path.join(self.args.output_dir, "translations")), inputs=None, outputs=None) + with gr.TabItem("BGM Separation"): files_audio = gr.Files(type="filepath", label="Upload Audio Files to separate background music") dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device, @@ -351,21 +352,30 @@ def launch(self): choices=self.whisper_inf.music_separator.available_models) nb_uvr_segment_size = gr.Number(label="Segment Size", value=uvr_params["segment_size"], precision=0) cb_uvr_save_file = gr.Checkbox(label="Save separated files to output", - value=uvr_params["save_file"]) + value=True, visible=False) btn_run = gr.Button("SEPARATE BACKGROUND MUSIC", variant="primary") - with gr.Row(): - with gr.Column(scale=8): - ad_instrumental = gr.Audio(label="Instrumental") - ad_vocals = gr.Audio(label="Vocals") - with gr.Column(scale=1): - btn_openfolder = gr.Button('📂', scale=1) + with gr.Column(): + with gr.Row(): + ad_instrumental = gr.Audio(label="Instrumental", scale=8) + btn_open_instrumental_folder = gr.Button('📂', scale=1) + with gr.Row(): + ad_vocals = gr.Audio(label="Vocals", scale=8) + btn_open_vocals_folder = gr.Button('📂', scale=1) - btn_run.click(fn=self.whisper_inf.music_separator.separate, - inputs=[files_audio, dd_uvr_device, dd_uvr_model_size, nb_uvr_segment_size, cb_uvr_save_file], + btn_run.click(fn=self.whisper_inf.music_separator.separate_files, + inputs=[files_audio, dd_uvr_model_size, dd_uvr_device, nb_uvr_segment_size, + cb_uvr_save_file], outputs=[ad_instrumental, ad_vocals]) - btn_openfolder.click(inputs=None, - outputs=None, - fn=lambda: self.open_folder(os.path.join(self.args.output_dir, "uvr"))) + btn_open_instrumental_folder.click(inputs=None, + outputs=None, + fn=lambda: self.open_folder(os.path.join( + self.args.output_dir, "UVR", "instrumental" + ))) + btn_open_vocals_folder.click(inputs=None, + outputs=None, + fn=lambda: self.open_folder(os.path.join( + self.args.output_dir, "UVR", "vocals" + ))) # Launch the app with optional gradio settings args = self.args @@ -386,7 +396,8 @@ def open_folder(folder_path: str): if os.path.exists(folder_path): os.system(f"start {folder_path}") else: - print(f"The folder {folder_path} does not exist.") + os.makedirs(folder_path, exist_ok=True) + print(f"The directory path {folder_path} has newly created.") @staticmethod def on_change_models(model_size: str): diff --git a/modules/uvr/music_separator.py b/modules/uvr/music_separator.py index b90dcb2..f41d468 100644 --- a/modules/uvr/music_separator.py +++ b/modules/uvr/music_separator.py @@ -1,4 +1,4 @@ -from typing import Optional, Union +from typing import Optional, Union, List, Dict import numpy as np import torchaudio import soundfile as sf @@ -9,6 +9,8 @@ from datetime import datetime from uvr.models import MDX, Demucs, VrNetwork, MDXC +from modules.utils.paths import DEFAULT_PARAMETERS_CONFIG_PATH +from modules.utils.files_manager import load_yaml, save_yaml class MusicSeparator: @@ -61,7 +63,7 @@ def separate(self, device: Optional[str] = None, segment_size: int = 256, save_file: bool = False, - progress: gr.Progress = gr.Progress()) -> tuple[np.ndarray, np.ndarray]: + progress: gr.Progress = gr.Progress()) -> tuple[np.ndarray, np.ndarray, List]: """ Separate the background music from the audio. @@ -74,7 +76,10 @@ def separate(self, progress (gr.Progress): Gradio progress indicator. Returns: - tuple[np.ndarray, np.ndarray]: Instrumental and vocals numpy arrays. + A Tuple of + np.ndarray: Instrumental numpy arrays. + np.ndarray: Vocals numpy arrays. + file_paths: List of file paths where the separated audio is saved. Return empty when save_file is False. """ if isinstance(audio, str): self.audio_info = torchaudio.info(audio) @@ -108,13 +113,37 @@ def separate(self, result = self.model(audio) instrumental, vocals = result["instrumental"].T, result["vocals"].T + file_paths = [] if save_file: instrumental_output_path = os.path.join(self.output_dir, "instrumental", f"{output_filename}-instrumental{ext}") vocals_output_path = os.path.join(self.output_dir, "vocals", f"{output_filename}-vocals{ext}") sf.write(instrumental_output_path, instrumental, sample_rate, format="WAV") sf.write(vocals_output_path, vocals, sample_rate, format="WAV") - - return instrumental, vocals + file_paths += [instrumental_output_path, vocals_output_path] + + return instrumental, vocals, file_paths + + def separate_files(self, + files: List, + model_name: str, + device: Optional[str] = None, + segment_size: int = 256, + save_file: bool = True, + progress: gr.Progress = gr.Progress()) -> List[str]: + """Separate the background music from the audio files. Returns only last Instrumental and vocals file paths + to display into gr.Audio()""" + self.cache_parameters(model_size=model_name, segment_size=segment_size) + + for file_path in files: + instrumental, vocals, file_paths = self.separate( + audio=file_path, + model_name=model_name, + device=device, + segment_size=segment_size, + save_file=save_file, + progress=progress + ) + return file_paths @staticmethod def get_device(): @@ -130,3 +159,16 @@ def offload(self): torch.cuda.empty_cache() gc.collect() self.audio_info = None + + @staticmethod + def cache_parameters(model_size: str, + segment_size: int): + cached_params = load_yaml(DEFAULT_PARAMETERS_CONFIG_PATH) + cached_uvr_params = cached_params["bgm_separation"] + uvr_params_to_cache = { + "model_size": model_size, + "segment_size": segment_size + } + cached_uvr_params = {**cached_uvr_params, **uvr_params_to_cache} + cached_params = {**cached_params, **cached_uvr_params} + save_yaml(cached_params, DEFAULT_PARAMETERS_CONFIG_PATH) diff --git a/modules/whisper/whisper_base.py b/modules/whisper/whisper_base.py index a8ce0d6..ec7b8c9 100644 --- a/modules/whisper/whisper_base.py +++ b/modules/whisper/whisper_base.py @@ -111,7 +111,7 @@ def run(self, params.lang = language_code_dict[params.lang] if params.is_bgm_separate: - music, audio = self.music_separator.separate( + music, audio, _ = self.music_separator.separate( audio=audio, model_name=params.uvr_model_size, device=params.uvr_device, From 0dced67c1167e32be3cea87f538e6841df30b065 Mon Sep 17 00:00:00 2001 From: jhj0517 <97279763+jhj0517@users.noreply.github.com> Date: Sat, 14 Sep 2024 13:44:12 +0900 Subject: [PATCH 4/6] Fix caching keys --- modules/uvr/music_separator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/uvr/music_separator.py b/modules/uvr/music_separator.py index db2ed0f..c920d73 100644 --- a/modules/uvr/music_separator.py +++ b/modules/uvr/music_separator.py @@ -174,5 +174,5 @@ def cache_parameters(model_size: str, "segment_size": segment_size } cached_uvr_params = {**cached_uvr_params, **uvr_params_to_cache} - cached_params = {**cached_params, **cached_uvr_params} + cached_params["bgm_separation"] = cached_uvr_params save_yaml(cached_params, DEFAULT_PARAMETERS_CONFIG_PATH) From 6e5f6d9675cfd7950475c3e210f36d41bbeacd31 Mon Sep 17 00:00:00 2001 From: jhj0517 <97279763+jhj0517@users.noreply.github.com> Date: Sat, 14 Sep 2024 14:59:52 +0900 Subject: [PATCH 5/6] Fix output filename --- modules/uvr/music_separator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/uvr/music_separator.py b/modules/uvr/music_separator.py index c920d73..2f4766b 100644 --- a/modules/uvr/music_separator.py +++ b/modules/uvr/music_separator.py @@ -83,6 +83,7 @@ def separate(self, """ if isinstance(audio, str): output_filename, ext = os.path.basename(audio), ".wav" + output_filename, orig_ext = os.path.splitext(output_filename) if is_video(audio): audio = load_audio(audio) From a8c9eff4fa3c1e525984d6e484baeb680fbebd38 Mon Sep 17 00:00:00 2001 From: jhj0517 <97279763+jhj0517@users.noreply.github.com> Date: Sat, 14 Sep 2024 15:00:57 +0900 Subject: [PATCH 6/6] Update label --- app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app.py b/app.py index 1b6bc4b..aeb7229 100644 --- a/app.py +++ b/app.py @@ -131,7 +131,7 @@ def create_whisper_parameters(self): nb_batch_size = gr.Number(label="Batch Size", value=whisper_params["batch_size"], precision=0) with gr.Accordion("BGM Separation", open=False): - cb_bgm_separation = gr.Checkbox(label="Enable BGM separation", value=uvr_params["is_separate_bgm"], + cb_bgm_separation = gr.Checkbox(label="Enable BGM Separation Filter", value=uvr_params["is_separate_bgm"], interactive=True) dd_uvr_device = gr.Dropdown(label="Device", value=self.whisper_inf.music_separator.device, choices=self.whisper_inf.music_separator.available_devices)