Skip to content
This repository has been archived by the owner on Sep 11, 2022. It is now read-only.

Commit

Permalink
fix the bug of duration
Browse files Browse the repository at this point in the history
  • Loading branch information
Jackwaterveg committed Sep 30, 2021
1 parent 092e108 commit 240f3e7
Show file tree
Hide file tree
Showing 11 changed files with 140 additions and 8 deletions.
4 changes: 3 additions & 1 deletion parakeet/models/speedyspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,9 @@ def inference(self, text, tones=None):
k = paddle.full([1], 0, dtype=paddle.int64)
for j in range(t_enc):
d = durations_to_expand[0, j]
M[0, k:k + d, j] = 1
# If the d == 0, slice action is meaningless and not supported
if d >= 1:
M[0, k:k + d, j] = 1
k += d

encodings = paddle.matmul(M, encodings)
Expand Down
16 changes: 16 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
Please use lite_train_infer.sh, whole_train_infer.sh, infer.sh.

For lite_train_infer
```
bash lite_train_infer.sh
```

For whole_train_infer
```
bash whole_train_infer.sh
```

For infer
```
bash infer.sh
```
2 changes: 1 addition & 1 deletion tests/infer.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
bash prepare.sh infer
bash test.sh speedyspeech_params_lite.txt infer
bash test.sh speedyspeech_params_lite_single_gpu.txt infer
7 changes: 6 additions & 1 deletion tests/lite_train_infer.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
rm exp -rf
rm e2e -rf
bash prepare.sh lite_train_infer
bash test.sh speedyspeech_params_lite.txt lite_train_infer
bash test.sh speedyspeech_params_lite_single_gpu.txt lite_train_infer
rm exp -rf
rm e2e -rf
bash test.sh speedyspeech_params_lite_multi_gpu.txt lite_train_infer
3 changes: 2 additions & 1 deletion tests/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ if [ ${MODE} = "lite_train_infer" ];then
wget -nc -P ./pretrain_models/ https://paddlespeech.bj.bcebos.com/Parakeet/pwg_baker_ckpt_0.4.zip
(cd ./pretrain_models && unzip speedyspeech_baker_ckpt_0.4.zip && unzip pwg_baker_ckpt_0.4.zip)
# generate a config patch
echo 'max_epoch: 30' > lite_train_infer.yaml
echo 'max_epoch: 10' > lite_train_infer.yaml
echo 'num_snapshots: 10' >> lite_train_infer.yaml
# download data
rm -rf ./train_data/mini_BZNSYP
wget -nc -P ./train_data/ https://paddlespeech.bj.bcebos.com/datasets/CE/speedyspeech/mini_BZNSYP.tar.gz
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
===========================train_params===========================
model_name:speedyspeech
python:python3.7
gpu_list:1|0,1
gpu_list:2,3
null:null
null:null
null:null
Expand All @@ -21,7 +21,7 @@ null:null
null:null
##
===========================eval_params===========================
eval:../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=../examples/speedyspeech/baker/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_90.pdz --speedyspeech-stat=pretrain_models/speedyspeech_baker_ckpt_0.4/speedy_speech_stats.npy --pwg-config=../examples/parallelwave_gan/baker/conf/default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../examples/speedyspeech/baker/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=../examples/speedyspeech/baker/phones.txt --tones-dict=../examples/speedyspeech/baker/tones.txt
eval:../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=../examples/speedyspeech/baker/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_10.pdz --speedyspeech-stat=pretrain_models/speedyspeech_baker_ckpt_0.4/speedy_speech_stats.npy --pwg-config=../examples/parallelwave_gan/baker/conf/default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../examples/speedyspeech/baker/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=../examples/speedyspeech/baker/phones.txt --tones-dict=../examples/speedyspeech/baker/tones.txt
null:null
##
===========================infer_params===========================
Expand Down
51 changes: 51 additions & 0 deletions tests/speedyspeech_params_lite_single_gpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
===========================train_params===========================
model_name:speedyspeech
python:python3.7
gpu_list:2
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
##
trainer:norm_train
norm_train:../examples/speedyspeech/baker/train.py --train-metadata=train_data/mini_BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/mini_BZNSYP/dev/norm/metadata.jsonl --config=lite_train_infer.yaml --output-dir=exp/default
null:null
null:null
null:null
null:null
null:null
##
===========================eval_params===========================
eval:../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=../examples/speedyspeech/baker/conf/default.yaml --speedyspeech-checkpoint=exp/default/checkpoints/snapshot_iter_30.pdz --speedyspeech-stat=pretrain_models/speedyspeech_baker_ckpt_0.4/speedy_speech_stats.npy --pwg-config=../examples/parallelwave_gan/baker/conf/default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../examples/speedyspeech/baker/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=../examples/speedyspeech/baker/phones.txt --tones-dict=../examples/speedyspeech/baker/tones.txt
null:null
##
===========================infer_params===========================
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
##
null:null
null:null
null:null
inference:../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.4 --text=../examples/speedyspeech/baker/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=../examples/speedyspeech/baker/phones.txt --tones-dict=../examples/speedyspeech/baker/tones.txt --output-dir=e2e --inference-dir=inference
--use_gpu:True
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ null:null
null:null
##
trainer:norm_train
norm_train:../examples/speedyspeech/baker/train.py --train-metadata=train_data/BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/BZNSYP/dev/norm/metadata.jsonl --output-dir=exp/lite
norm_train:../examples/speedyspeech/baker/train.py --train-metadata=train_data/BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/BZNSYP/dev/norm/metadata.jsonl --output-dir=exp/whole
null:null
null:null
null:null
Expand Down
51 changes: 51 additions & 0 deletions tests/speedyspeech_params_whole_single_gpu.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
===========================train_params===========================
model_name:speedyspeech
python:python3.7
gpu_list:1
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
##
trainer:norm_train
norm_train:../examples/speedyspeech/baker/train.py --train-metadata=train_data/BZNSYP/train/norm/metadata.jsonl --dev-metadata=train_data/BZNSYP/dev/norm/metadata.jsonl --output-dir=exp/whole
null:null
null:null
null:null
null:null
null:null
##
===========================eval_params===========================
eval:../examples/speedyspeech/baker/synthesize_e2e.py --speedyspeech-config=../examples/speedyspeech/baker/conf/default.yaml --speedyspeech-checkpoint=pretrain_models/speedyspeech_baker_ckpt_0.4/speedyspeech_snapshot_iter_91800.pdz --speedyspeech-stat=pretrain_models/speedyspeech_baker_ckpt_0.4/speedy_speech_stats.npy --pwg-config=../examples/parallelwave_gan/baker/conf/default.yaml --pwg-checkpoint=pretrain_models/pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz --pwg-stat=pretrain_models/pwg_baker_ckpt_0.4/pwg_stats.npy --text=../examples/speedyspeech/baker/sentences.txt --output-dir=e2e --inference-dir=inference --device="gpu" --phones-dict=../examples/speedyspeech/baker/phones.txt --tones-dict=../examples/speedyspeech/baker/tones.txt
null:null
##
===========================infer_params===========================
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
##
null:null
null:null
null:null
inference:../examples/speedyspeech/baker/inference.py --inference-dir=pretrain_models/speedyspeech_pwg_inference_0.4 --text=../examples/speedyspeech/baker/sentences.txt --output-dir=inference_out --enable-auto-log --phones-dict=../examples/speedyspeech/baker/phones.txt --tones-dict=../examples/speedyspeech/baker/tones.txt --output-dir=e2e --inference-dir=inference
--use_gpu:True
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
null:null
1 change: 1 addition & 0 deletions tests/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ else
elif [ ${#gpu} -le 15 ];then # train with multi-gpu
gsu=${gpu//,/ }
nump=`echo $gsu | wc -w`
CUDA_VISIBLE_DEVICES=${gpu}
cmd="${python} ${run_train} --nprocs=$nump"
else # train with multi-machine
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1}"
Expand Down
7 changes: 6 additions & 1 deletion tests/whole_train_infer.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
rm exp -rf
rm e2e -rf
bash prepare.sh whole_train_infer
bash test.sh speedyspeech_params_whole.txt whole_train_infer
bash test.sh speedyspeech_params_whole_single_gpu.txt whole_train_infer
rm exp -rf
rm e2e -rf
bash test.sh speedyspeech_params_whole_multi_gpu.txt whole_train_infer

0 comments on commit 240f3e7

Please sign in to comment.