From 29babf079a2d1ed974b0ecca0ae7690d5fe4b1c9 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Wed, 13 Oct 2021 07:08:35 +0000 Subject: [PATCH] select device when synthesize --- docs/src/demo.rst | 11 ++++++++- .../baker/synthesize_from_wav.py | 3 +++ .../fastspeech2/aishell3/synthesize_e2e.py | 3 +++ examples/fastspeech2/baker/synthesize_e2e.py | 3 +++ .../fastspeech2/ljspeech/synthesize_e2e.py | 3 +++ examples/fastspeech2/synthesize.py | 3 +++ examples/speedyspeech/baker/conf/default.yaml | 24 +++++++++---------- examples/speedyspeech/baker/synthesize_e2e.py | 3 +++ examples/speedyspeech/synthesize.py | 3 +++ .../ljspeech/synthesize_e2e.py | 3 +++ examples/transformer_tts/synthesize.py | 3 +++ 11 files changed, 49 insertions(+), 13 deletions(-) diff --git a/docs/src/demo.rst b/docs/src/demo.rst index d7375130..a6f18f88 100644 --- a/docs/src/demo.rst +++ b/docs/src/demo.rst @@ -27,7 +27,10 @@ Analysis/synthesis Audio samples generated from ground-truth spectrograms with a vocoder. .. raw:: html - + + LJSpeech(English) +
+
@@ -101,6 +104,12 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
GT
+ +
+
+ CSMSC(Chinese) +
+
diff --git a/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py b/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py index 948a2870..f20f0a72 100644 --- a/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py +++ b/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py @@ -105,6 +105,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose.") args = parser.parse_args() + + paddle.set_device(args.device) + config = get_cfg_default() if args.config: config.merge_from_file(args.config) diff --git a/examples/fastspeech2/aishell3/synthesize_e2e.py b/examples/fastspeech2/aishell3/synthesize_e2e.py index 7137a74e..13f59bfd 100644 --- a/examples/fastspeech2/aishell3/synthesize_e2e.py +++ b/examples/fastspeech2/aishell3/synthesize_e2e.py @@ -149,6 +149,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose.") args = parser.parse_args() + + paddle.set_device(args.device) + with open(args.fastspeech2_config) as f: fastspeech2_config = CfgNode(yaml.safe_load(f)) with open(args.pwg_config) as f: diff --git a/examples/fastspeech2/baker/synthesize_e2e.py b/examples/fastspeech2/baker/synthesize_e2e.py index ec52c00d..75e06edf 100644 --- a/examples/fastspeech2/baker/synthesize_e2e.py +++ b/examples/fastspeech2/baker/synthesize_e2e.py @@ -140,6 +140,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose.") args = parser.parse_args() + + paddle.set_device(args.device) + with open(args.fastspeech2_config) as f: fastspeech2_config = CfgNode(yaml.safe_load(f)) with open(args.pwg_config) as f: diff --git a/examples/fastspeech2/ljspeech/synthesize_e2e.py b/examples/fastspeech2/ljspeech/synthesize_e2e.py index 93ed91c0..01ccbbc4 100644 --- a/examples/fastspeech2/ljspeech/synthesize_e2e.py +++ b/examples/fastspeech2/ljspeech/synthesize_e2e.py @@ -148,6 +148,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose.") args = parser.parse_args() + + paddle.set_device(args.device) + with open(args.fastspeech2_config) as f: fastspeech2_config = CfgNode(yaml.safe_load(f)) with open(args.pwg_config) as f: diff --git a/examples/fastspeech2/synthesize.py b/examples/fastspeech2/synthesize.py index aee7bcee..c1329f8d 100644 --- a/examples/fastspeech2/synthesize.py +++ b/examples/fastspeech2/synthesize.py @@ -149,6 +149,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose.") args = parser.parse_args() + + paddle.set_device(args.device) + with open(args.fastspeech2_config) as f: fastspeech2_config = CfgNode(yaml.safe_load(f)) with open(args.pwg_config) as f: diff --git a/examples/speedyspeech/baker/conf/default.yaml b/examples/speedyspeech/baker/conf/default.yaml index 8be96aaa..333a6d35 100644 --- a/examples/speedyspeech/baker/conf/default.yaml +++ b/examples/speedyspeech/baker/conf/default.yaml @@ -1,20 +1,20 @@ ########################################################### # FEATURE EXTRACTION SETTING # ########################################################### -fs: 24000 # Sampling rate. -n_fft: 2048 # FFT size. -n_shift: 300 # Hop size. -win_length: 1200 # Window length. - # If set to null, it will be the same as fft_size. -window: "hann" # Window function. -n_mels: 80 # Number of mel basis. -fmin: 80 # Minimum freq in mel basis calculation. -fmax: 7600 # Maximum frequency in mel basis calculation. +fs: 24000 # Sampling rate. +n_fft: 2048 # FFT size. +n_shift: 300 # Hop size. +win_length: 1200 # Window length. + # If set to null, it will be the same as fft_size. +window: "hann" # Window function. +n_mels: 80 # Number of mel basis. +fmin: 80 # Minimum freq in mel basis calculation. +fmax: 7600 # Maximum frequency in mel basis calculation. ########################################################### # DATA SETTING # ########################################################### -batch_size: 256 +batch_size: 32 num_workers: 4 ########################################################### @@ -35,13 +35,13 @@ model: ########################################################### optimizer: optim: adam # optimizer type - learning_rate: 0.008 # learning rate + learning_rate: 0.001 # learning rate max_grad_norm: 5.0 ########################################################### # TRAINING SETTING # ########################################################### -max_epoch: 600 +max_epoch: 300 num_snapshots: 5 ########################################################### diff --git a/examples/speedyspeech/baker/synthesize_e2e.py b/examples/speedyspeech/baker/synthesize_e2e.py index 8e8dad30..6dd3abd1 100644 --- a/examples/speedyspeech/baker/synthesize_e2e.py +++ b/examples/speedyspeech/baker/synthesize_e2e.py @@ -175,6 +175,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose") args, _ = parser.parse_known_args() + + paddle.set_device(args.device) + with open(args.speedyspeech_config) as f: speedyspeech_config = CfgNode(yaml.safe_load(f)) with open(args.pwg_config) as f: diff --git a/examples/speedyspeech/synthesize.py b/examples/speedyspeech/synthesize.py index 550f9405..4225071e 100644 --- a/examples/speedyspeech/synthesize.py +++ b/examples/speedyspeech/synthesize.py @@ -159,6 +159,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose") args, _ = parser.parse_known_args() + + paddle.set_device(args.device) + with open(args.speedyspeech_config) as f: speedyspeech_config = CfgNode(yaml.safe_load(f)) with open(args.pwg_config) as f: diff --git a/examples/transformer_tts/ljspeech/synthesize_e2e.py b/examples/transformer_tts/ljspeech/synthesize_e2e.py index a5566e4b..7ca75a8f 100644 --- a/examples/transformer_tts/ljspeech/synthesize_e2e.py +++ b/examples/transformer_tts/ljspeech/synthesize_e2e.py @@ -140,6 +140,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose.") args = parser.parse_args() + + paddle.set_device(args.device) + with open(args.transformer_tts_config) as f: transformer_tts_config = CfgNode(yaml.safe_load(f)) with open(args.waveflow_config) as f: diff --git a/examples/transformer_tts/synthesize.py b/examples/transformer_tts/synthesize.py index 7c2210a6..21614c53 100644 --- a/examples/transformer_tts/synthesize.py +++ b/examples/transformer_tts/synthesize.py @@ -121,6 +121,9 @@ def main(): parser.add_argument("--verbose", type=int, default=1, help="verbose.") args = parser.parse_args() + + paddle.set_device(args.device) + with open(args.transformer_tts_config) as f: transformer_tts_config = CfgNode(yaml.safe_load(f)) with open(args.waveflow_config) as f: