From aacf81c3de8b7901e30b98cedd1351405773f2f7 Mon Sep 17 00:00:00 2001 From: TianYuan Date: Tue, 12 Oct 2021 11:13:21 +0000 Subject: [PATCH] add with no grad when inference --- .../parallelwave_gan/baker/synthesize_from_wav.py | 3 ++- examples/GANVocoder/parallelwave_gan/synthesize.py | 3 ++- examples/fastspeech2/aishell3/synthesize_e2e.py | 10 +++++----- examples/fastspeech2/baker/synthesize_e2e.py | 10 +++++----- examples/speedyspeech/baker/synthesize_e2e.py | 10 +++++----- examples/transformer_tts/ljspeech/synthesize_e2e.py | 11 ++++++----- examples/transformer_tts/synthesize.py | 1 - 7 files changed, 25 insertions(+), 23 deletions(-) diff --git a/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py b/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py index 16db38b4..948a2870 100644 --- a/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py +++ b/examples/GANVocoder/parallelwave_gan/baker/synthesize_from_wav.py @@ -76,7 +76,8 @@ def evaluate(args, config): # extract mel feats mel = mel_extractor.get_log_mel_fbank(wav) mel = paddle.to_tensor(mel) - gen_wav = pwg_inference(mel) + with paddle.no_grad(): + gen_wav = pwg_inference(mel) sf.write( str(output_dir / ("gen_" + utt_name)), gen_wav.numpy(), diff --git a/examples/GANVocoder/parallelwave_gan/synthesize.py b/examples/GANVocoder/parallelwave_gan/synthesize.py index 7c37e340..e57ddf88 100644 --- a/examples/GANVocoder/parallelwave_gan/synthesize.py +++ b/examples/GANVocoder/parallelwave_gan/synthesize.py @@ -80,7 +80,8 @@ def main(): mel = example['feats'] mel = paddle.to_tensor(mel) # (T, C) with timer() as t: - wav = generator.inference(c=mel) + with paddle.no_grad(): + wav = generator.inference(c=mel) wav = wav.numpy() N += wav.size T += t.elapse diff --git a/examples/fastspeech2/aishell3/synthesize_e2e.py b/examples/fastspeech2/aishell3/synthesize_e2e.py index bc7c2f24..7137a74e 100644 --- a/examples/fastspeech2/aishell3/synthesize_e2e.py +++ b/examples/fastspeech2/aishell3/synthesize_e2e.py @@ -97,11 +97,11 @@ def evaluate(args, fastspeech2_config, pwg_config): mel = fastspeech2_inference( part_phone_ids, spk_id=paddle.to_tensor(spk_id)) temp_wav = pwg_inference(mel) - if flags == 0: - wav = temp_wav - flags = 1 - else: - wav = paddle.concat([wav, temp_wav]) + if flags == 0: + wav = temp_wav + flags = 1 + else: + wav = paddle.concat([wav, temp_wav]) sf.write( str(output_dir / (str(spk_id) + "_" + utt_id + ".wav")), wav.numpy(), diff --git a/examples/fastspeech2/baker/synthesize_e2e.py b/examples/fastspeech2/baker/synthesize_e2e.py index 7b39ab6a..ec52c00d 100644 --- a/examples/fastspeech2/baker/synthesize_e2e.py +++ b/examples/fastspeech2/baker/synthesize_e2e.py @@ -87,11 +87,11 @@ def evaluate(args, fastspeech2_config, pwg_config): with paddle.no_grad(): mel = fastspeech2_inference(part_phone_ids) temp_wav = pwg_inference(mel) - if flags == 0: - wav = temp_wav - flags = 1 - else: - wav = paddle.concat([wav, temp_wav]) + if flags == 0: + wav = temp_wav + flags = 1 + else: + wav = paddle.concat([wav, temp_wav]) sf.write( str(output_dir / (utt_id + ".wav")), wav.numpy(), diff --git a/examples/speedyspeech/baker/synthesize_e2e.py b/examples/speedyspeech/baker/synthesize_e2e.py index f633aeab..8e8dad30 100644 --- a/examples/speedyspeech/baker/synthesize_e2e.py +++ b/examples/speedyspeech/baker/synthesize_e2e.py @@ -121,11 +121,11 @@ def evaluate(args, speedyspeech_config, pwg_config): with paddle.no_grad(): mel = speedyspeech_inference(part_phone_ids, part_tone_ids) temp_wav = pwg_inference(mel) - if flags == 0: - wav = temp_wav - flags = 1 - else: - wav = paddle.concat([wav, temp_wav]) + if flags == 0: + wav = temp_wav + flags = 1 + else: + wav = paddle.concat([wav, temp_wav]) sf.write( output_dir / (utt_id + ".wav"), wav.numpy(), diff --git a/examples/transformer_tts/ljspeech/synthesize_e2e.py b/examples/transformer_tts/ljspeech/synthesize_e2e.py index 534b6aa0..a5566e4b 100644 --- a/examples/transformer_tts/ljspeech/synthesize_e2e.py +++ b/examples/transformer_tts/ljspeech/synthesize_e2e.py @@ -89,11 +89,12 @@ def evaluate(args, acoustic_model_config, vocoder_config): phones = [phn for phn in phones if not phn.isspace()] phones = [phn if phn in phone_id_map else "," for phn in phones] phone_ids = [phone_id_map[phn] for phn in phones] - mel = transformer_tts_inference(paddle.to_tensor(phone_ids)) - # mel shape is (T, feats) and waveflow's input shape is (batch, feats, T) - mel = mel.unsqueeze(0).transpose([0, 2, 1]) - # wavflow's output shape is (B, T) - wav = vocoder.infer(mel)[0] + with paddle.no_grad(): + mel = transformer_tts_inference(paddle.to_tensor(phone_ids)) + # mel shape is (T, feats) and waveflow's input shape is (batch, feats, T) + mel = mel.unsqueeze(0).transpose([0, 2, 1]) + # wavflow's output shape is (B, T) + wav = vocoder.infer(mel)[0] sf.write( str(output_dir / (utt_id + ".wav")), diff --git a/examples/transformer_tts/synthesize.py b/examples/transformer_tts/synthesize.py index c71b4065..7c2210a6 100644 --- a/examples/transformer_tts/synthesize.py +++ b/examples/transformer_tts/synthesize.py @@ -81,7 +81,6 @@ def evaluate(args, acoustic_model_config, vocoder_config): mel = mel.unsqueeze(0).transpose([0, 2, 1]) # wavflow's output shape is (B, T) wav = vocoder.infer(mel)[0] - print("wav:", wav) sf.write( str(output_dir / (utt_id + ".wav")),