diff --git a/docs/src/demo.rst b/docs/src/demo.rst
index d7375130..a6f18f88 100644
--- a/docs/src/demo.rst
+++ b/docs/src/demo.rst
@@ -27,7 +27,10 @@ Analysis/synthesis
Audio samples generated from ground-truth spectrograms with a vocoder.
.. raw:: html
-
+
+ LJSpeech(English)
+
+
GT |
@@ -101,6 +104,12 @@ Audio samples generated from ground-truth spectrograms with a vocoder.
+
+
+
+ CSMSC(Chinese)
+
+
diff --git a/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py b/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py
index 948a2870..f20f0a72 100644
--- a/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py
+++ b/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py
@@ -105,6 +105,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose.")
args = parser.parse_args()
+
+ paddle.set_device(args.device)
+
config = get_cfg_default()
if args.config:
config.merge_from_file(args.config)
diff --git a/examples/fastspeech2/aishell3/synthesize_e2e.py b/examples/fastspeech2/aishell3/synthesize_e2e.py
index 7137a74e..13f59bfd 100644
--- a/examples/fastspeech2/aishell3/synthesize_e2e.py
+++ b/examples/fastspeech2/aishell3/synthesize_e2e.py
@@ -149,6 +149,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose.")
args = parser.parse_args()
+
+ paddle.set_device(args.device)
+
with open(args.fastspeech2_config) as f:
fastspeech2_config = CfgNode(yaml.safe_load(f))
with open(args.pwg_config) as f:
diff --git a/examples/fastspeech2/baker/synthesize_e2e.py b/examples/fastspeech2/baker/synthesize_e2e.py
index ec52c00d..75e06edf 100644
--- a/examples/fastspeech2/baker/synthesize_e2e.py
+++ b/examples/fastspeech2/baker/synthesize_e2e.py
@@ -140,6 +140,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose.")
args = parser.parse_args()
+
+ paddle.set_device(args.device)
+
with open(args.fastspeech2_config) as f:
fastspeech2_config = CfgNode(yaml.safe_load(f))
with open(args.pwg_config) as f:
diff --git a/examples/fastspeech2/ljspeech/synthesize_e2e.py b/examples/fastspeech2/ljspeech/synthesize_e2e.py
index 93ed91c0..01ccbbc4 100644
--- a/examples/fastspeech2/ljspeech/synthesize_e2e.py
+++ b/examples/fastspeech2/ljspeech/synthesize_e2e.py
@@ -148,6 +148,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose.")
args = parser.parse_args()
+
+ paddle.set_device(args.device)
+
with open(args.fastspeech2_config) as f:
fastspeech2_config = CfgNode(yaml.safe_load(f))
with open(args.pwg_config) as f:
diff --git a/examples/fastspeech2/synthesize.py b/examples/fastspeech2/synthesize.py
index aee7bcee..c1329f8d 100644
--- a/examples/fastspeech2/synthesize.py
+++ b/examples/fastspeech2/synthesize.py
@@ -149,6 +149,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose.")
args = parser.parse_args()
+
+ paddle.set_device(args.device)
+
with open(args.fastspeech2_config) as f:
fastspeech2_config = CfgNode(yaml.safe_load(f))
with open(args.pwg_config) as f:
diff --git a/examples/speedyspeech/baker/conf/default.yaml b/examples/speedyspeech/baker/conf/default.yaml
index 8be96aaa..333a6d35 100644
--- a/examples/speedyspeech/baker/conf/default.yaml
+++ b/examples/speedyspeech/baker/conf/default.yaml
@@ -1,20 +1,20 @@
###########################################################
# FEATURE EXTRACTION SETTING #
###########################################################
-fs: 24000 # Sampling rate.
-n_fft: 2048 # FFT size.
-n_shift: 300 # Hop size.
-win_length: 1200 # Window length.
- # If set to null, it will be the same as fft_size.
-window: "hann" # Window function.
-n_mels: 80 # Number of mel basis.
-fmin: 80 # Minimum freq in mel basis calculation.
-fmax: 7600 # Maximum frequency in mel basis calculation.
+fs: 24000 # Sampling rate.
+n_fft: 2048 # FFT size.
+n_shift: 300 # Hop size.
+win_length: 1200 # Window length.
+ # If set to null, it will be the same as fft_size.
+window: "hann" # Window function.
+n_mels: 80 # Number of mel basis.
+fmin: 80 # Minimum freq in mel basis calculation.
+fmax: 7600 # Maximum frequency in mel basis calculation.
###########################################################
# DATA SETTING #
###########################################################
-batch_size: 256
+batch_size: 32
num_workers: 4
###########################################################
@@ -35,13 +35,13 @@ model:
###########################################################
optimizer:
optim: adam # optimizer type
- learning_rate: 0.008 # learning rate
+ learning_rate: 0.001 # learning rate
max_grad_norm: 5.0
###########################################################
# TRAINING SETTING #
###########################################################
-max_epoch: 600
+max_epoch: 300
num_snapshots: 5
###########################################################
diff --git a/examples/speedyspeech/baker/synthesize_e2e.py b/examples/speedyspeech/baker/synthesize_e2e.py
index 8e8dad30..6dd3abd1 100644
--- a/examples/speedyspeech/baker/synthesize_e2e.py
+++ b/examples/speedyspeech/baker/synthesize_e2e.py
@@ -175,6 +175,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose")
args, _ = parser.parse_known_args()
+
+ paddle.set_device(args.device)
+
with open(args.speedyspeech_config) as f:
speedyspeech_config = CfgNode(yaml.safe_load(f))
with open(args.pwg_config) as f:
diff --git a/examples/speedyspeech/synthesize.py b/examples/speedyspeech/synthesize.py
index 550f9405..4225071e 100644
--- a/examples/speedyspeech/synthesize.py
+++ b/examples/speedyspeech/synthesize.py
@@ -159,6 +159,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose")
args, _ = parser.parse_known_args()
+
+ paddle.set_device(args.device)
+
with open(args.speedyspeech_config) as f:
speedyspeech_config = CfgNode(yaml.safe_load(f))
with open(args.pwg_config) as f:
diff --git a/examples/transformer_tts/ljspeech/synthesize_e2e.py b/examples/transformer_tts/ljspeech/synthesize_e2e.py
index a5566e4b..7ca75a8f 100644
--- a/examples/transformer_tts/ljspeech/synthesize_e2e.py
+++ b/examples/transformer_tts/ljspeech/synthesize_e2e.py
@@ -140,6 +140,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose.")
args = parser.parse_args()
+
+ paddle.set_device(args.device)
+
with open(args.transformer_tts_config) as f:
transformer_tts_config = CfgNode(yaml.safe_load(f))
with open(args.waveflow_config) as f:
diff --git a/examples/transformer_tts/synthesize.py b/examples/transformer_tts/synthesize.py
index 7c2210a6..21614c53 100644
--- a/examples/transformer_tts/synthesize.py
+++ b/examples/transformer_tts/synthesize.py
@@ -121,6 +121,9 @@ def main():
parser.add_argument("--verbose", type=int, default=1, help="verbose.")
args = parser.parse_args()
+
+ paddle.set_device(args.device)
+
with open(args.transformer_tts_config) as f:
transformer_tts_config = CfgNode(yaml.safe_load(f))
with open(args.waveflow_config) as f: