Skip to content

Commit

Permalink
fix scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
pengzhendong committed Sep 12, 2023
1 parent 319673b commit 753ed8b
Show file tree
Hide file tree
Showing 15 changed files with 181 additions and 194 deletions.
2 changes: 1 addition & 1 deletion examples/aishell-3/configs/v1.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
"data": {
"max_wav_value": 32768.0,
"sampling_rate": 22050,
"sampling_rate": 44100,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
Expand Down
2 changes: 1 addition & 1 deletion examples/aishell-3/local/download_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ fi

cd $dir
if ! tar -xvzf data_aishell3.tgz; then
echo "$0: error un-tarring archive $dir/$data_aishell3.tgz"
echo "$0: error un-tarring archive $dir/data_aishell3.tgz"
exit 1;
fi
25 changes: 13 additions & 12 deletions examples/aishell-3/local/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,33 +18,34 @@
import sys

if len(sys.argv) != 4:
print('Usage: prepare_data.py lexicon in_data_dir out_data')
print("Usage: prepare_data.py lexicon in_data_dir out_data")
sys.exit(-1)

lexicon = {}
with open(sys.argv[1], 'r', encoding='utf8') as fin:
with open(sys.argv[1], "r", encoding="utf8") as fin:
for line in fin:
arr = line.strip().split()
lexicon[arr[0]] = arr[1:]

train_set_label_file = os.path.join(sys.argv[2], 'train',
'label_train-set.txt')
with open(train_set_label_file, encoding='utf8') as fin, \
open(sys.argv[3], 'w', encoding='utf8') as fout:
train_set_label_file = os.path.join(sys.argv[2], "train", "label_train-set.txt")
with open(train_set_label_file, encoding="utf8") as fin, open(
sys.argv[3], "w", encoding="utf8"
) as fout:
# skip the first five lines in label_train-set.txt
lines = [x.strip() for x in fin.readlines()][5:]
for line in lines:
key, text, _ = line.split('|')
key, text, _ = line.split("|")
speaker = key[:-4]
wav_path = os.path.join(sys.argv[2], 'train', 'wav', speaker,
'{}.wav'.format(key))
wav_path = os.path.join(
sys.argv[2], "train", "wav", speaker, "{}.wav".format(key)
)
phones = []
for x in text.split():
if x == '%' or x == '$':
if x == "%" or x == "$":
phones.append(x)
elif x in lexicon:
phones.extend(lexicon[x])
else:
print('{} OOV {}'.format(key, x))
print("{} OOV {}".format(key, x))
sys.exit(-1)
fout.write('{}|{}|{}\n'.format(wav_path, speaker, ' '.join(phones)))
fout.write("{}|{}|sil {} sil\n".format(wav_path, speaker, " ".join(phones)))
61 changes: 28 additions & 33 deletions examples/aishell-3/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,16 @@
export CUDA_VISIBLE_DEVICES="0,1,2,3"

stage=0 # start from -1 if you need to download data
stop_stage=1
stop_stage=3

dataset_url=https://openslr.magicdatatech.com/resources/93/data_aishell3.tgz
dataset_dir=. # path to dataset directory

config=configs/v1.json
dir=exp/v1 # training dir
test_audio=test_audio
dir=exp/v3 # training dir
config=configs/v3.json

data=data
use_onnx=false
test_audio=test_audio

. tools/parse_options.sh || exit 1;

Expand All @@ -45,56 +44,52 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
sort | uniq | awk '{print $0, NR}' > $data/speaker.txt
echo 'sil 0' > $data/phones.txt
cat $data/all.txt | awk -F '|' '{print $3}' | \
awk '{ for (i=1;i<=NF;i++) print $i}' | \
sort | uniq | awk '{print $0, NR}' >> $data/phones.txt
awk '{for (i=1;i<=NF;i++) print $i}' | sort | uniq | \
grep -v 'sil' | awk '{print $0, NR}' >> $data/phones.txt

# Split train/validation
cat $data/all.txt | shuf --random-source=<(yes 777) | head -n 110 | \
awk -F '|' '{print $1}' > $data/val.key
cat $data/all.txt | grep -f $data/val.key > $data/val.txt
head -10 $data/val.txt > $data/test.txt
sed -i '1,10d' $data/val.txt
cat $data/all.txt | grep -v -f $data/val.key > $data/train.txt
shuf --random-source=<(yes 777) $data/all.txt > $data/train.txt
head -n 100 $data/train.txt > $data/val.txt
sed -i '1,100d' $data/train.txt
head -n 10 $data/train.txt > $data/test.txt
sed -i '1,10d' $data/train.txt
fi


if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
export MASTER_ADDR=localhost
export MASTER_PORT=10087
export MASTER_PORT=10086
python vits/train.py -c $config -m $dir \
--train_data $data/train.txt \
--val_data $data/val.txt \
--phone_table $data/phones.txt \
--speaker_table $data/speaker.txt
--speaker_table $data/speaker.txt \
--phone_table $data/phones.txt
fi


if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
mkdir -p $test_audio
python vits/inference.py \
--checkpoint $dir/G_950000.pth --cfg $config \
--outdir $test_audio \
--phone_table $data/phones.txt \
python vits/inference.py --cfg $config \
--speaker_table $data/speaker.txt \
--test_file $data/test.txt
--phone_table $data/phones.txt \
--checkpoint $dir/G_90000.pth \
--test_file $data/test.txt \
--outdir $test_audio
fi


if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
mkdir -p $test_audio
python vits/export_onnx.py \
--checkpoint $dir/G_950000.pth \
--cfg $config \
--onnx_model $dir/G_950000.onnx \
python vits/export_onnx.py --cfg $config \
--speaker_table $data/speaker.txt \
--phone_table $data/phones.txt \
--speaker_table $data/speaker.txt
--checkpoint $dir/G_90000.pth \
--onnx_model $dir/G_90000.onnx

python vits/inference_onnx.py \
--onnx_model $dir/G_950000.onnx \
--cfg $config \
--outdir $test_audio \
--phone_table $data/phones.txt \
python vits/inference_onnx.py --cfg $config \
--speaker_table $data/speaker.txt \
--test_file $data/test.txt
--phone_table $data/phones.txt \
--onnx_model $dir/G_90000.onnx \
--test_file $data/test.txt \
--outdir $test_audio
fi

25 changes: 13 additions & 12 deletions examples/baker/local/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,41 @@

lexicon = {}

with open(sys.argv[1], 'r', encoding='utf8') as fin:
with open(sys.argv[1], "r", encoding="utf8") as fin:
for line in fin:
arr = line.strip().split()
lexicon[arr[0]] = arr[1:]

with open(sys.argv[2], 'r', encoding='utf8') as fin:
with open(sys.argv[2], "r", encoding="utf8") as fin:
lines = fin.readlines()
for i in range(0, len(lines), 2):
key = lines[i][:6]
content = lines[i][7:].strip()
content = re.sub('[。,、“”?:……!( )—;]', '', content)
if 'P' in content: # ignore utt 002365
content = re.sub("[。,、“”?:……!( )—;]", "", content)
if "P" in content: # ignore utt 002365
continue
chars = []
prosody = {}

j = 0
while j < len(content):
if content[j] == '#':
prosody[len(chars) - 1] = content[j:j + 2]
if content[j] == "#":
prosody[len(chars) - 1] = content[j : j + 2]
j += 2
else:
chars.append(content[j])
j += 1
if key == '005107':
lines[i + 1] = lines[i + 1].replace(' ng1', ' en1')
if key == "005107":
lines[i + 1] = lines[i + 1].replace(" ng1", " en1")
syllable = lines[i + 1].strip().split()
s_index = 0
phones = []
for k, char in enumerate(chars):
# 儿化音处理
er_flag = False
if char == '儿' and (s_index == len(syllable)
or syllable[s_index][0:2] != 'er'):
if char == "儿" and (
s_index == len(syllable) or syllable[s_index][0:2] != "er"
):
er_flag = True
else:
phones.extend(lexicon[syllable[s_index]])
Expand All @@ -47,5 +48,5 @@
else:
phones.append(prosody[k])
else:
phones.append('#0')
print('{}/{}.wav|sil {} sil'.format(sys.argv[3], key, ' '.join(phones)))
phones.append("#0")
print("{}/{}.wav|baker|sil {} sil".format(sys.argv[3], key, " ".join(phones)))
75 changes: 37 additions & 38 deletions examples/baker/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,14 @@ export CUDA_VISIBLE_DEVICES="0,1,2,3" # specify your gpu id for training
stage=0 # start from -1 if you need to download data
stop_stage=3

config=configs/v1.json #
dir=exp/v1 # training dir
test_audio=test_audio
dir=exp/v3 # training dir
config=configs/v3.json

# Please download data from https://www.data-baker.com/data/index/TNtts/, and
# Please download data from https://www.data-baker.com/data/index/TNtts, and
# set `raw_data_dir` to your data.
raw_data_dir=/mnt/mnt-data-1/binbin.zhang/data/BZNSYP
data=data
use_onnx=false
test_audio=test_audio

. tools/parse_options.sh || exit 1;

Expand All @@ -33,19 +32,19 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
$raw_data_dir/ProsodyLabeling/000001-010000.txt \
$raw_data_dir/Wave > $data/all.txt

echo 'baker 0' > $data/speaker.txt
echo 'sil 0' > $data/phones.txt
cat $data/all.txt | awk -F '|' '{print $2}' | \
awk '{ for (i=1;i<=NF;i++) print $i}' | \
sort | uniq | awk '{print $0, NR}' >> $data/phones.txt
sort | uniq | awk '{print $0, NR}' > $data/speaker.txt
echo 'sil 0' > $data/phones.txt
cat $data/all.txt | awk -F '|' '{print $3}' | \
awk '{for (i=1;i<=NF;i++) print $i}' | sort | uniq | \
grep -v 'sil' | awk '{print $0, NR}' >> $data/phones.txt

# Split train/validation
cat $data/all.txt | shuf --random-source=<(yes 777) | head -n 110 | \
awk -F '|' '{print $1}' > $data/val.key
cat $data/all.txt | grep -f $data/val.key > $data/val.txt
head -10 $data/val.txt > $data/test.txt
sed -i '1,10d' $data/val.txt
cat $data/all.txt | grep -v -f $data/val.key > $data/train.txt
shuf --random-source=<(yes 777) $data/all.txt > $data/train.txt
head -n 100 $data/train.txt > $data/val.txt
sed -i '1,100d' $data/train.txt
head -n 10 $data/train.txt > $data/test.txt
sed -i '1,10d' $data/train.txt
fi


Expand All @@ -55,34 +54,34 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
python vits/train.py -c $config -m $dir \
--train_data $data/train.txt \
--val_data $data/val.txt \
--phone_table $data/phones.txt \
--speaker_table $data/speaker.txt
--speaker_table $data/speaker.txt \
--phone_table $data/phones.txt
fi


if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
python vits/export_onnx.py \
mkdir -p $test_audio
python vits/inference.py --cfg $config \
--speaker_table $data/speaker.txt \
--phone_table $data/phones.txt \
--checkpoint $dir/G_90000.pth \
--cfg configs/base.json \
--onnx_model $dir/G_90000.onnx \
--phone_table data/phones.txt \
--speaker_table $data/speaker.txt
--test_file $data/test.txt \
--outdir $test_audio
fi


if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
[ ! -d ${test_audio} ] && mkdir ${test_audio}
if $use_onnx; then
python vits/inference_onnx.py \
--onnx_model $dir/G_90000.onnx --cfg $config \
--outdir $test_audio \
--phone_table $data/phones.txt \
--test_file $data/test.txt \
--speaker_table $data/speaker.txt
else
python vits/inference.py \
--checkpoint $dir/G_90000.pth --cfg $config \
--outdir $test_audio \
--phone_table $data/phones.txt \
--test_file $data/test.txt \
--speaker_table $data/speaker.txt
fi
mkdir -p $test_audio
python vits/export_onnx.py --cfg $config \
--speaker_table $data/speaker.txt \
--phone_table $data/phones.txt \
--checkpoint $dir/G_90000.pth \
--onnx_model $dir/G_90000.onnx

python vits/inference_onnx.py --cfg $config \
--speaker_table $data/speaker.txt \
--phone_table $data/phones.txt \
--onnx_model $dir/G_90000.onnx \
--test_file $data/test.txt \
--outdir $test_audio
fi
1 change: 1 addition & 0 deletions examples/chinese_prosody_polyphone/frontend
Loading

0 comments on commit 753ed8b

Please sign in to comment.