diff --git a/examples/aishell3/tts3/README.md b/examples/aishell3/tts3/README.md index 49801c4c3527b98b743c36686fa4abce7fa1570c..c33d665c86eeef292193e8690832c2c787a2a61d 100644 --- a/examples/aishell3/tts3/README.md +++ b/examples/aishell3/tts3/README.md @@ -241,7 +241,7 @@ fastspeech2_aishell3_ckpt_1.1.0 ├── speaker_id_map.txt # speaker id map file when training a multi-speaker fastspeech2 └── speech_stats.npy # statistics used to normalize spectrogram when training fastspeech2 ``` -You can use the following scripts to synthesize for `${BIN_DIR}/../sentences.txt` using pretrained fastspeech2 and parallel wavegan models. +You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences.txt` using pretrained fastspeech2 and parallel wavegan models. ```bash source path.sh @@ -257,7 +257,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=exp/default/test_e2e \ --phones_dict=fastspeech2_aishell3_ckpt_1.1.0/phone_id_map.txt \ --speaker_dict=fastspeech2_aishell3_ckpt_1.1.0/speaker_id_map.txt \ diff --git a/examples/aishell3/tts3/local/inference.sh b/examples/aishell3/tts3/local/inference.sh index dc05ec59218533631fe9fa16f5ec4ca1c4f4b3db..2d096bdcdcd2bbe4b62f1b48cb46a583a3880a18 100755 --- a/examples/aishell3/tts3/local/inference.sh +++ b/examples/aishell3/tts3/local/inference.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_aishell3 \ --voc=pwgan_aishell3 \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -22,7 +22,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_aishell3 \ --voc=hifigan_aishell3 \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/aishell3/tts3/local/lite_predict.sh b/examples/aishell3/tts3/local/lite_predict.sh index e77e8b6c27071407fc37bb25fc8513e1073340e3..2534b460b9e618b59ad1314f34a572d3229aba46 100755 --- a/examples/aishell3/tts3/local/lite_predict.sh +++ b/examples/aishell3/tts3/local/lite_predict.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_aishell3 \ --voc=pwgan_aishell3 \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -24,7 +24,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_aishell3 \ --voc=hifigan_aishell3 \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/aishell3/tts3/local/ort_predict.sh b/examples/aishell3/tts3/local/ort_predict.sh index 24e66f689603519e894fecceae22c644000719f9..9c41dee38ca5993ade0681d00185cc2f00eff212 100755 --- a/examples/aishell3/tts3/local/ort_predict.sh +++ b/examples/aishell3/tts3/local/ort_predict.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=fastspeech2_aishell3 \ --voc=pwgan_aishell3 \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ @@ -24,7 +24,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_aishell3 \ --voc=hifigan_aishell3 \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ diff --git a/examples/aishell3/tts3/local/synthesize_e2e.sh b/examples/aishell3/tts3/local/synthesize_e2e.sh index 158350ae4337db6ebb8ec70294fee1e4a9a948db..2cc22ede2a2eaa0cd2ba35ecab665a6adff08dc3 100755 --- a/examples/aishell3/tts3/local/synthesize_e2e.sh +++ b/examples/aishell3/tts3/local/synthesize_e2e.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -44,7 +44,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/aishell3/vits/README.md b/examples/aishell3/vits/README.md index dc80e18bc62167070be7ae351ebcd8b5785d7cba..8c19e29fd6c89a99ac1f24a4a8b74504a1709e0c 100644 --- a/examples/aishell3/vits/README.md +++ b/examples/aishell3/vits/README.md @@ -196,7 +196,7 @@ python3 ${BIN_DIR}/synthesize_e2e.py \ --phones_dict=vits_aishell3_ckpt_1.1.0/phone_id_map.txt \ --speaker_dict=vits_aishell3_ckpt_1.1.0/speaker_id_map.txt \ --output_dir=exp/default/test_e2e \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --add-blank=${add_blank} ``` --> diff --git a/examples/aishell3/vits/local/synthesize_e2e.sh b/examples/aishell3/vits/local/synthesize_e2e.sh index 1bd58549a84b0d998417475dc308ee8cedfd3f07..5369cbf9169567d3cb374670db899623f011fd69 100755 --- a/examples/aishell3/vits/local/synthesize_e2e.sh +++ b/examples/aishell3/vits/local/synthesize_e2e.sh @@ -20,6 +20,6 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --speaker_dict=dump/speaker_id_map.txt \ --spk_id=0 \ --output_dir=${train_output_path}/test_e2e \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --add-blank=${add_blank} fi diff --git a/examples/canton/tts3/README.md b/examples/canton/tts3/README.md index f46949d2f8002c40bf024b1fb3413a66d3757fd2..87ef40907855be1c1d2a3556fe5acd9dad4d6a56 100644 --- a/examples/canton/tts3/README.md +++ b/examples/canton/tts3/README.md @@ -102,7 +102,7 @@ Download the pretrained parallel wavegan model from [pwg_aishell3_ckpt_0.5.zip]( unzip pwg_aishell3_ckpt_0.5.zip ``` -You can use the following scripts to synthesize for `${BIN_DIR}/../sentences_canton.txt` using pretrained fastspeech2 and parallel wavegan models. +You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences_canton.txt` using pretrained fastspeech2 and parallel wavegan models. ```bash source path.sh @@ -118,7 +118,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \ --lang=canton \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --output_dir=exp/default/test_e2e \ --phones_dict=fastspeech2_canton_ckpt_1.4.0/phone_id_map.txt \ --speaker_dict=fastspeech2_canton_ckpt_1.4.0/speaker_id_map.txt \ diff --git a/examples/canton/tts3/local/inference.sh b/examples/canton/tts3/local/inference.sh index caf0b4380785509d7f6016a3d89851ba828a950d..ad3af2d02ddf2cb7722ec752f49e7929d43291ca 100755 --- a/examples/canton/tts3/local/inference.sh +++ b/examples/canton/tts3/local/inference.sh @@ -12,7 +12,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=fastspeech2_canton \ --voc=pwgan_aishell3 \ --spk_id=10 \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -27,7 +27,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_canton \ --voc=mb_melgan_csmsc \ --spk_id=10 \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -41,7 +41,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --am=fastspeech2_canton \ --voc=hifigan_csmsc \ --spk_id=10 \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -55,7 +55,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --am=fastspeech2_canton \ --voc=wavernn_csmsc \ --spk_id=10 \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/canton/tts3/local/ort_predict.sh b/examples/canton/tts3/local/ort_predict.sh index d95e49f97b64458ff34cdb56847f21a464ce93eb..edbe040660d0c4d95d28c8a42ad698c62445f0b8 100755 --- a/examples/canton/tts3/local/ort_predict.sh +++ b/examples/canton/tts3/local/ort_predict.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc=pwgan_aishell3 \ --spk_id=10 \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ --lang=canton \ @@ -26,7 +26,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc=mb_melgan_csmsc \ --spk_id=10 \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ --lang=canton \ @@ -40,7 +40,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --am=fastspeech2_canton \ --voc=hifigan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ --lang=canton \ diff --git a/examples/canton/tts3/local/synthesize_e2e.sh b/examples/canton/tts3/local/synthesize_e2e.sh index 8cf7eb22bb5b5967616cd63416fb208fff5d2499..38b7e1af0389839163dc7cc9099ceeccf7915ed4 100755 --- a/examples/canton/tts3/local/synthesize_e2e.sh +++ b/examples/canton/tts3/local/synthesize_e2e.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \ --lang=canton \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -44,7 +44,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \ --lang=canton \ - --text=${BIN_DIR}/../sentences_canton.txt \ + --text=${BIN_DIR}/../../assets/sentences_canton.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/csmsc/jets/local/inference.sh b/examples/csmsc/jets/local/inference.sh index 30941caa88a232cf4d86fbdbf46ca3e66a1bcc0e..987f4cea105fa16465cae4e4c8fdae00a33ac7b8 100755 --- a/examples/csmsc/jets/local/inference.sh +++ b/examples/csmsc/jets/local/inference.sh @@ -9,7 +9,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then python3 ${BIN_DIR}/inference.py \ --inference_dir=${train_output_path}/inference \ --am=jets_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt fi diff --git a/examples/csmsc/jets/local/synthesize_e2e.sh b/examples/csmsc/jets/local/synthesize_e2e.sh index 67ae14fabe8a1d6dd18e3646fa9040b735267bbf..c95354d86f02c6cc84266fba32fbf5f3b726c9e4 100755 --- a/examples/csmsc/jets/local/synthesize_e2e.sh +++ b/examples/csmsc/jets/local/synthesize_e2e.sh @@ -17,6 +17,6 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --ckpt=${train_output_path}/checkpoints/${ckpt_name} \ --phones_dict=dump/phone_id_map.txt \ --output_dir=${train_output_path}/test_e2e \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --inference_dir=${train_output_path}/inference fi diff --git a/examples/csmsc/tts0/README.md b/examples/csmsc/tts0/README.md index bc7769d1572166b6e27185f282b8ca16b998f40f..ce682495e9732088725fc08d9cdd5cfcb8a53d2c 100644 --- a/examples/csmsc/tts0/README.md +++ b/examples/csmsc/tts0/README.md @@ -226,7 +226,7 @@ tacotron2_csmsc_ckpt_0.2.0 ├── snapshot_iter_30600.pdz # model parameters and optimizer states └── speech_stats.npy # statistics used to normalize spectrogram when training Tacotron2 ``` -You can use the following scripts to synthesize for `${BIN_DIR}/../sentences.txt` using pretrained Tacotron2 and parallel wavegan models. +You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences.txt` using pretrained Tacotron2 and parallel wavegan models. ```bash source path.sh @@ -242,7 +242,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=exp/default/test_e2e \ --inference_dir=exp/default/inference \ --phones_dict=tacotron2_csmsc_ckpt_0.2.0/phone_id_map.txt diff --git a/examples/csmsc/tts0/local/inference.sh b/examples/csmsc/tts0/local/inference.sh index d2960441016d81bfb2ec328404949a4e4afdeb11..6ea2e4b6c3e48a551260dd5bb55f60a37b078991 100755 --- a/examples/csmsc/tts0/local/inference.sh +++ b/examples/csmsc/tts0/local/inference.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/inference \ --am=tacotron2_csmsc \ --voc=pwgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt fi @@ -22,7 +22,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/inference \ --am=tacotron2_csmsc \ --voc=mb_melgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt fi @@ -33,7 +33,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --inference_dir=${train_output_path}/inference \ --am=tacotron2_csmsc \ --voc=hifigan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt fi \ No newline at end of file diff --git a/examples/csmsc/tts0/local/synthesize_e2e.sh b/examples/csmsc/tts0/local/synthesize_e2e.sh index 4c3b08dc1f52bd20e250ba025e33c91f5f5eb290..40b49aa1e4b78628d4f0aefd8002dc8a160f29be 100755 --- a/examples/csmsc/tts0/local/synthesize_e2e.sh +++ b/examples/csmsc/tts0/local/synthesize_e2e.sh @@ -22,7 +22,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference @@ -44,7 +44,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference @@ -66,7 +66,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \ --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt # --inference_dir=${train_output_path}/inference @@ -87,7 +87,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference @@ -108,7 +108,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \ --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference diff --git a/examples/csmsc/tts2/README.md b/examples/csmsc/tts2/README.md index ec88959d1e95a9d54e88e4195cfae60f121fc746..96956776244f1d249900afe60ba54af24b72d30a 100644 --- a/examples/csmsc/tts2/README.md +++ b/examples/csmsc/tts2/README.md @@ -248,7 +248,7 @@ speedyspeech_csmsc_ckpt_0.2.0 ├── snapshot_iter_30600.pdz # model parameters and optimizer states └── tone_id_map.txt # tone vocabulary file when training speedyspeech ``` -You can use the following scripts to synthesize for `${BIN_DIR}/../sentences.txt` using pretrained speedyspeech and parallel wavegan models. +You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences.txt` using pretrained speedyspeech and parallel wavegan models. ```bash source path.sh @@ -264,7 +264,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=exp/default/test_e2e \ --inference_dir=exp/default/inference \ --phones_dict=speedyspeech_csmsc_ckpt_0.2.0/phone_id_map.txt \ diff --git a/examples/csmsc/tts2/local/inference.sh b/examples/csmsc/tts2/local/inference.sh index ed92136cde13d70034de93e7e583ef79bfa1bdda..9a677edcb364acf0ba9734cb31e6111c8537e185 100755 --- a/examples/csmsc/tts2/local/inference.sh +++ b/examples/csmsc/tts2/local/inference.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/inference \ --am=speedyspeech_csmsc \ --voc=pwgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt @@ -24,7 +24,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/inference \ --am=speedyspeech_csmsc \ --voc=mb_melgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt @@ -36,7 +36,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --inference_dir=${train_output_path}/inference \ --am=speedyspeech_csmsc \ --voc=hifigan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt diff --git a/examples/csmsc/tts2/local/lite_predict.sh b/examples/csmsc/tts2/local/lite_predict.sh index d0c6c05840997993026eb7089fb25875fe111700..9bb33cdf44c2fdb3964439181d948c06aa3f18c0 100755 --- a/examples/csmsc/tts2/local/lite_predict.sh +++ b/examples/csmsc/tts2/local/lite_predict.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/pdlite \ --am=speedyspeech_csmsc \ --voc=pwgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt @@ -24,7 +24,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/pdlite \ --am=speedyspeech_csmsc \ --voc=mb_melgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt @@ -36,7 +36,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --inference_dir=${train_output_path}/pdlite \ --am=speedyspeech_csmsc \ --voc=hifigan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt diff --git a/examples/csmsc/tts2/local/ort_predict.sh b/examples/csmsc/tts2/local/ort_predict.sh index 8ca4c0e9bd06ea5ff836ff0ceb61bae28554ca07..36f88667a68e1294ee1991e2e40affb625c02557 100755 --- a/examples/csmsc/tts2/local/ort_predict.sh +++ b/examples/csmsc/tts2/local/ort_predict.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=speedyspeech_csmsc \ --voc=pwgan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt \ --device=cpu \ @@ -23,7 +23,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=speedyspeech_csmsc \ --voc=mb_melgan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt \ --device=cpu \ @@ -36,7 +36,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --am=speedyspeech_csmsc \ --voc=hifigan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt \ --device=cpu \ diff --git a/examples/csmsc/tts2/local/synthesize_e2e.sh b/examples/csmsc/tts2/local/synthesize_e2e.sh index 553b45543faea1c0eca1b4fcaaa89cffe447334f..2b27872959eac471d426d51aac282394d6f5d4d2 100755 --- a/examples/csmsc/tts2/local/synthesize_e2e.sh +++ b/examples/csmsc/tts2/local/synthesize_e2e.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt \ @@ -43,7 +43,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt \ @@ -66,7 +66,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \ --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt @@ -87,7 +87,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt \ @@ -109,7 +109,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \ --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --tones_dict=dump/tone_id_map.txt \ diff --git a/examples/csmsc/tts3/README.md b/examples/csmsc/tts3/README.md index 39926259dbf0b4d0ec49287d8c41d3ca6ab94ab0..5a0975376c6c701d86fc257a9b6d56f2ee91ddf9 100644 --- a/examples/csmsc/tts3/README.md +++ b/examples/csmsc/tts3/README.md @@ -258,7 +258,7 @@ fastspeech2_nosil_baker_ckpt_0.4 ├── snapshot_iter_76000.pdz # model parameters and optimizer states └── speech_stats.npy # statistics used to normalize spectrogram when training fastspeech2 ``` -You can use the following scripts to synthesize for `${BIN_DIR}/../sentences.txt` using pretrained fastspeech2 and parallel wavegan models. +You can use the following scripts to synthesize for `${BIN_DIR}/../../assets/sentences.txt` using pretrained fastspeech2 and parallel wavegan models. If you want to use fastspeech2_conformer, you must delete this line `--inference_dir=exp/default/inference \` to skip the step of dygraph to static graph, cause we haven't tested dygraph to static graph for fastspeech2_conformer till now. ```bash @@ -276,7 +276,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=exp/default/test_e2e \ --inference_dir=exp/default/inference \ --phones_dict=fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt diff --git a/examples/csmsc/tts3/README_cn.md b/examples/csmsc/tts3/README_cn.md index 1829b77063e0ec08f5bd8dcb41405ea815d3ec0e..3f2783a9780304d123d7a8e7bd47343fc82560a5 100644 --- a/examples/csmsc/tts3/README_cn.md +++ b/examples/csmsc/tts3/README_cn.md @@ -248,7 +248,7 @@ fastspeech2_nosil_baker_ckpt_0.4 ├── snapshot_iter_76000.pdz # 模型参数和优化器状态 └── speech_stats.npy # 训练 fastspeech2 时用于规范化频谱图的统计数据 ``` -您可以使用以下脚本通过使用预训练的 fastspeech2 和 parallel wavegan 模型为 `${BIN_DIR}/../sentences.txt` 合成句子 +您可以使用以下脚本通过使用预训练的 fastspeech2 和 parallel wavegan 模型为 `${BIN_DIR}/../../assets/sentences.txt` 合成句子 ```bash source path.sh @@ -264,7 +264,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=exp/default/test_e2e \ --inference_dir=exp/default/inference \ --phones_dict=fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt diff --git a/examples/csmsc/tts3/local/inference.sh b/examples/csmsc/tts3/local/inference.sh index b43fd2861754097c10713de054e24d4ad96def49..5b143cdd05ad6f7f072f81ad7232d1ffc0196d96 100755 --- a/examples/csmsc/tts3/local/inference.sh +++ b/examples/csmsc/tts3/local/inference.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_csmsc \ --voc=pwgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt fi @@ -23,7 +23,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_csmsc \ --voc=mb_melgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt fi @@ -34,7 +34,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_csmsc \ --voc=hifigan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt fi @@ -45,7 +45,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_csmsc \ --voc=wavernn_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt fi \ No newline at end of file diff --git a/examples/csmsc/tts3/local/inference_streaming.sh b/examples/csmsc/tts3/local/inference_streaming.sh index 719f46c620aee6e8eb3496a7aaa4e40abf7e9817..5ad50aa599b6847fb391df1552504e2164e2c7df 100755 --- a/examples/csmsc/tts3/local/inference_streaming.sh +++ b/examples/csmsc/tts3/local/inference_streaming.sh @@ -12,7 +12,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=fastspeech2_csmsc \ --am_stat=dump/train/speech_stats.npy \ --voc=pwgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True @@ -26,7 +26,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_csmsc \ --am_stat=dump/train/speech_stats.npy \ --voc=mb_melgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True @@ -39,7 +39,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --am=fastspeech2_csmsc \ --am_stat=dump/train/speech_stats.npy \ --voc=hifigan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True diff --git a/examples/csmsc/tts3/local/lite_predict.sh b/examples/csmsc/tts3/local/lite_predict.sh index 1ed2f108d205aee5f02d8b17fe8e9ebf760e10aa..9af17899f4607ab1d9dd1a5996612c84325b3711 100755 --- a/examples/csmsc/tts3/local/lite_predict.sh +++ b/examples/csmsc/tts3/local/lite_predict.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_csmsc \ --voc=pwgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt fi @@ -23,7 +23,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_csmsc \ --voc=mb_melgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt fi @@ -34,7 +34,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_csmsc \ --voc=hifigan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt fi diff --git a/examples/csmsc/tts3/local/lite_predict_streaming.sh b/examples/csmsc/tts3/local/lite_predict_streaming.sh index 4570cb4eb73f9bfb8dfc1086a71a1193b4ac6dc1..19fdde41db0b616395dccc247be86d8324bde6c2 100755 --- a/examples/csmsc/tts3/local/lite_predict_streaming.sh +++ b/examples/csmsc/tts3/local/lite_predict_streaming.sh @@ -12,7 +12,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=fastspeech2_csmsc \ --am_stat=dump/train/speech_stats.npy \ --voc=pwgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True @@ -26,7 +26,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_csmsc \ --am_stat=dump/train/speech_stats.npy \ --voc=mb_melgan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True @@ -39,7 +39,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --am=fastspeech2_csmsc \ --am_stat=dump/train/speech_stats.npy \ --voc=hifigan_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True diff --git a/examples/csmsc/tts3/local/ort_predict.sh b/examples/csmsc/tts3/local/ort_predict.sh index e16c7bd0533436c53a076e3c5a0e434481926fda..99955665f3c03f5611ea8b38a1e6747363528809 100755 --- a/examples/csmsc/tts3/local/ort_predict.sh +++ b/examples/csmsc/tts3/local/ort_predict.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=fastspeech2_csmsc \ --voc=pwgan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 @@ -22,7 +22,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_csmsc \ --voc=mb_melgan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 @@ -34,7 +34,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --am=fastspeech2_csmsc \ --voc=hifigan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 diff --git a/examples/csmsc/tts3/local/ort_predict_streaming.sh b/examples/csmsc/tts3/local/ort_predict_streaming.sh index 743935816509b4eeab386df254b67432cc34d2e6..e2c7e8525e9eaf9816f5e362eb0eee334b58061f 100755 --- a/examples/csmsc/tts3/local/ort_predict_streaming.sh +++ b/examples/csmsc/tts3/local/ort_predict_streaming.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am_stat=dump/train/speech_stats.npy \ --voc=pwgan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_streaming \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ @@ -25,7 +25,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am_stat=dump/train/speech_stats.npy \ --voc=mb_melgan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_streaming \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ @@ -39,7 +39,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --am_stat=dump/train/speech_stats.npy \ --voc=hifigan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_streaming \ - --text=${BIN_DIR}/../csmsc_test.txt \ + --text=${BIN_DIR}/../../assets/csmsc_test.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ diff --git a/examples/csmsc/tts3/local/synthesize_e2e.sh b/examples/csmsc/tts3/local/synthesize_e2e.sh index 512e062b7792a7c363640f48bb0a665f1be96ab6..35a5598a8d034ce675843b4300df609e7bc2b774 100755 --- a/examples/csmsc/tts3/local/synthesize_e2e.sh +++ b/examples/csmsc/tts3/local/synthesize_e2e.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference @@ -42,7 +42,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference @@ -64,7 +64,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \ --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt # --inference_dir=${train_output_path}/inference @@ -85,7 +85,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference @@ -107,7 +107,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \ --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference diff --git a/examples/csmsc/tts3/local/synthesize_streaming.sh b/examples/csmsc/tts3/local/synthesize_streaming.sh index 366a88db969950c76efc9a53362d4e35e1eb8602..f4e783d4c3008240026ea11dc99546a755b259e4 100755 --- a/examples/csmsc/tts3/local/synthesize_streaming.sh +++ b/examples/csmsc/tts3/local/synthesize_streaming.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True \ @@ -43,7 +43,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True \ @@ -66,7 +66,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \ --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True @@ -87,7 +87,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e_streaming \ --phones_dict=dump/phone_id_map.txt \ --am_streaming=True \ diff --git a/examples/csmsc/tts3_rhy/local/synthesize_e2e.sh b/examples/csmsc/tts3_rhy/local/synthesize_e2e.sh index 8f5d801045dc46de7698a85aa78935c1b0346fa0..bf7229e13c6b5d6713afd369458b7921fc8d86ab 100755 --- a/examples/csmsc/tts3_rhy/local/synthesize_e2e.sh +++ b/examples/csmsc/tts3_rhy/local/synthesize_e2e.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_baker_ckpt_0.4/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_baker_ckpt_0.4/pwg_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference \ @@ -43,7 +43,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=mb_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1000000.pdz\ --voc_stat=mb_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference \ @@ -66,7 +66,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --voc_ckpt=style_melgan_csmsc_ckpt_0.1.1/snapshot_iter_1500000.pdz \ --voc_stat=style_melgan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --use_rhy=True @@ -88,7 +88,7 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference \ @@ -111,7 +111,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then --voc_ckpt=wavernn_csmsc_ckpt_0.2.0/snapshot_iter_400000.pdz \ --voc_stat=wavernn_csmsc_ckpt_0.2.0/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --inference_dir=${train_output_path}/inference \ diff --git a/examples/csmsc/vits/README.md b/examples/csmsc/vits/README.md index 50d703b2d7ff19825df0f719091258d27c5257b7..83871277baa0a75d12b1428f63012c127a43ca38 100644 --- a/examples/csmsc/vits/README.md +++ b/examples/csmsc/vits/README.md @@ -172,6 +172,6 @@ python3 ${BIN_DIR}/synthesize_e2e.py \ --ckpt=vits_csmsc_ckpt_1.4.0/snapshot_iter_150000.pdz \ --phones_dict=vits_csmsc_ckpt_1.4.0/phone_id_map.txt \ --output_dir=exp/default/test_e2e \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --add-blank=${add_blank} ``` diff --git a/examples/csmsc/vits/local/inference.sh b/examples/csmsc/vits/local/inference.sh index 0a79c255ca35edda43e846c995c0cc7d7c8685fd..d26b7f71d59a9fb739d299f230e8928a6423d584 100755 --- a/examples/csmsc/vits/local/inference.sh +++ b/examples/csmsc/vits/local/inference.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then python3 ${BIN_DIR}/inference.py \ --inference_dir=${train_output_path}/inference \ --am=vits_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --add-blank=${add_blank} diff --git a/examples/csmsc/vits/local/lite_predict.sh b/examples/csmsc/vits/local/lite_predict.sh index e12f5349300ba672df977c79bb6b088beb49ccdd..d20d7a578ea7b4d9cfd89bec9ec2f5f57b304c81 100755 --- a/examples/csmsc/vits/local/lite_predict.sh +++ b/examples/csmsc/vits/local/lite_predict.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then python3 ${BIN_DIR}/lite_predict.py \ --inference_dir=${train_output_path}/pdlite \ --am=vits_csmsc \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --add-blank=${add_blank} diff --git a/examples/csmsc/vits/local/synthesize_e2e.sh b/examples/csmsc/vits/local/synthesize_e2e.sh index 6a69b366bdd0187d64f2723f06a053d950d29a6a..f3c067e445c9d6345313bf8b97b58c1d133ec1b6 100755 --- a/examples/csmsc/vits/local/synthesize_e2e.sh +++ b/examples/csmsc/vits/local/synthesize_e2e.sh @@ -18,7 +18,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --ckpt=${train_output_path}/checkpoints/${ckpt_name} \ --phones_dict=dump/phone_id_map.txt \ --output_dir=${train_output_path}/test_e2e \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --add-blank=${add_blank} #\ # --inference_dir=${train_output_path}/inference fi diff --git a/examples/ljspeech/tts0/README.md b/examples/ljspeech/tts0/README.md index 85d9e448b8422e32d9058a18d1ac413aff69cce7..fa986c85b256b1fa67b65ffd141419118a39d70e 100644 --- a/examples/ljspeech/tts0/README.md +++ b/examples/ljspeech/tts0/README.md @@ -239,7 +239,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=exp/default/test_e2e \ --phones_dict=tacotron2_ljspeech_ckpt_0.2.0/phone_id_map.txt ``` diff --git a/examples/ljspeech/tts0/local/synthesize_e2e.sh b/examples/ljspeech/tts0/local/synthesize_e2e.sh index 73dfff603e279f211a8178182b10369428df2e88..903ebb4720f391cd8f5226169487f4fe2bde828f 100755 --- a/examples/ljspeech/tts0/local/synthesize_e2e.sh +++ b/examples/ljspeech/tts0/local/synthesize_e2e.sh @@ -16,7 +16,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ # --inference_dir=${train_output_path}/inference \ No newline at end of file diff --git a/examples/ljspeech/tts1/README.md b/examples/ljspeech/tts1/README.md index 85621653f761b05ac382efe83e5f1daa694a06e6..7f0571a1b34de41fbc3e9c70617ced23e0b06bdb 100644 --- a/examples/ljspeech/tts1/README.md +++ b/examples/ljspeech/tts1/README.md @@ -191,7 +191,7 @@ python3 ${BIN_DIR}/synthesize_e2e.py \ --transformer-tts-stat=transformer_tts_ljspeech_ckpt_0.4/speech_stats.npy \ --waveflow-config=waveflow_ljspeech_ckpt_0.3/config.yaml \ --waveflow-checkpoint=waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output-dir=exp/default/test_e2e \ --phones-dict=transformer_tts_ljspeech_ckpt_0.4/phone_id_map.txt ``` diff --git a/examples/ljspeech/tts1/local/synthesize_e2e.sh b/examples/ljspeech/tts1/local/synthesize_e2e.sh index 25a862f9007499d6e1703ec54831884fd86274d4..d6ff9cae3fa70f8d9d82d8d93dab85c4c797b049 100755 --- a/examples/ljspeech/tts1/local/synthesize_e2e.sh +++ b/examples/ljspeech/tts1/local/synthesize_e2e.sh @@ -12,6 +12,6 @@ python3 ${BIN_DIR}/synthesize_e2e.py \ --transformer-tts-stat=dump/train/speech_stats.npy \ --waveflow-config=waveflow_ljspeech_ckpt_0.3/config.yaml \ --waveflow-checkpoint=waveflow_ljspeech_ckpt_0.3/step-2000000.pdparams \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output-dir=${train_output_path}/test_e2e \ --phones-dict=dump/phone_id_map.txt diff --git a/examples/ljspeech/tts3/README.md b/examples/ljspeech/tts3/README.md index 23b433d4e4616578365c5bbe52e873d41e0be427..f1ed111a0492e32d25a807a2e7058e0004137b1b 100644 --- a/examples/ljspeech/tts3/README.md +++ b/examples/ljspeech/tts3/README.md @@ -254,7 +254,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=exp/default/test_e2e \ --inference_dir=exp/default/inference \ --phones_dict=fastspeech2_nosil_ljspeech_ckpt_0.5/phone_id_map.txt diff --git a/examples/ljspeech/tts3/local/inference.sh b/examples/ljspeech/tts3/local/inference.sh index ff192f3e3c7571699cdf18a39c257d927ca2e11c..94d6b371cb67a025bf4b3a9fa8732643d547ad9d 100755 --- a/examples/ljspeech/tts3/local/inference.sh +++ b/examples/ljspeech/tts3/local/inference.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_ljspeech \ --voc=pwgan_ljspeech \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --lang=en @@ -23,7 +23,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_ljspeech \ --voc=hifigan_ljspeech \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --lang=en diff --git a/examples/ljspeech/tts3/local/lite_predict.sh b/examples/ljspeech/tts3/local/lite_predict.sh index 75db6a0eaff4fe82f542f4a3dc4d6233590c129e..9cf1d8d7fd69250e964c241fde81d68b54d40aa3 100755 --- a/examples/ljspeech/tts3/local/lite_predict.sh +++ b/examples/ljspeech/tts3/local/lite_predict.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_ljspeech \ --voc=pwgan_ljspeech \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --lang=en @@ -23,7 +23,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_ljspeech \ --voc=hifigan_ljspeech \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --lang=en diff --git a/examples/ljspeech/tts3/local/ort_predict.sh b/examples/ljspeech/tts3/local/ort_predict.sh index b4716f70e92fb24199312bfb89e300a6f3ffbee3..b82ec15f49ee3fdfa724d7730a0f6362e1c0fe43 100755 --- a/examples/ljspeech/tts3/local/ort_predict.sh +++ b/examples/ljspeech/tts3/local/ort_predict.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=fastspeech2_ljspeech \ --voc=pwgan_ljspeech\ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ @@ -24,7 +24,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_ljspeech \ --voc=hifigan_ljspeech \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ diff --git a/examples/ljspeech/tts3/local/synthesize_e2e.sh b/examples/ljspeech/tts3/local/synthesize_e2e.sh index 36865f7f169d12f9767819f1a8912e7349065df1..3f2340808d763531e2fcdee220e032dc9622ae25 100755 --- a/examples/ljspeech/tts3/local/synthesize_e2e.sh +++ b/examples/ljspeech/tts3/local/synthesize_e2e.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_ljspeech_ckpt_0.5/pwg_snapshot_iter_400000.pdz \ --voc_stat=pwg_ljspeech_ckpt_0.5/pwg_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/test_e2e \ --inference_dir=${train_output_path}/inference \ --phones_dict=dump/phone_id_map.txt @@ -41,7 +41,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=hifigan_ljspeech_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_ljspeech_ckpt_0.2.0/feats_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/test_e2e \ --inference_dir=${train_output_path}/inference \ --phones_dict=dump/phone_id_map.txt diff --git a/examples/opencpop/svs1/README.md b/examples/opencpop/svs1/README.md index 1600d0c761fb253157889d8888aa47e6a5a95eaa..43cc6e86efa92cd674465742cb6b94d28e0c2544 100644 --- a/examples/opencpop/svs1/README.md +++ b/examples/opencpop/svs1/README.md @@ -267,7 +267,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwgan_opencpop_ckpt_1.4.0/snapshot_iter_100000.pdz \ --voc_stat=pwgan_opencpop_ckpt_1.4.0/feats_stats.npy \ --lang=sing \ - --text=${BIN_DIR}/../sentences_sing.txt \ + --text=${BIN_DIR}/../../assets/sentences_sing.txt \ --output_dir=exp/default/test_e2e \ --phones_dict=diffsinger_opencpop_ckpt_1.4.0/phone_id_map.txt \ --pinyin_phone=diffsinger_opencpop_ckpt_1.4.0/pinyin_to_phone.txt \ diff --git a/examples/opencpop/svs1/README_cn.md b/examples/opencpop/svs1/README_cn.md index 1435b42ecb5fee6db18d805ed685a0dc0340df63..cf65c97f8384b2c54e558eccaec9cab824efaca3 100644 --- a/examples/opencpop/svs1/README_cn.md +++ b/examples/opencpop/svs1/README_cn.md @@ -271,7 +271,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwgan_opencpop_ckpt_1.4.0/snapshot_iter_100000.pdz \ --voc_stat=pwgan_opencpop_ckpt_1.4.0/feats_stats.npy \ --lang=sing \ - --text=${BIN_DIR}/../sentences_sing.txt \ + --text=${BIN_DIR}/../../assets/sentences_sing.txt \ --output_dir=exp/default/test_e2e \ --phones_dict=diffsinger_opencpop_ckpt_1.4.0/phone_id_map.txt \ --pinyin_phone=diffsinger_opencpop_ckpt_1.4.0/pinyin_to_phone.txt \ diff --git a/examples/opencpop/svs1/local/synthesize_e2e.sh b/examples/opencpop/svs1/local/synthesize_e2e.sh index b3dc29b11143302709783e625166785f81d0b080..e8d0cc45afb277ccc7dc7a26bc9b8683e75943eb 100755 --- a/examples/opencpop/svs1/local/synthesize_e2e.sh +++ b/examples/opencpop/svs1/local/synthesize_e2e.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwgan_opencpop_ckpt_1.4.0/snapshot_iter_100000.pdz \ --voc_stat=pwgan_opencpop_ckpt_1.4.0/feats_stats.npy \ --lang=sing \ - --text=${BIN_DIR}/../sentences_sing.txt \ + --text=${BIN_DIR}/../../assets/sentences_sing.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speech_stretchs=dump/train/speech_stretchs.npy \ @@ -44,7 +44,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=hifigan_opencpop_ckpt_1.4.0/snapshot_iter_625000.pdz \ --voc_stat=hifigan_opencpop_ckpt_1.4.0/feats_stats.npy \ --lang=sing \ - --text=${BIN_DIR}/../sentences_sing.txt \ + --text=${BIN_DIR}/../../assets/sentences_sing.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speech_stretchs=dump/train/speech_stretchs.npy \ diff --git a/examples/other/tts_finetune/tts3/run.sh b/examples/other/tts_finetune/tts3/run.sh index cc25d8f674e65fc2035fed6cf379bc667d6797e0..f5a65e6bd842c00a3f572ceb00619c281bab82f4 100755 --- a/examples/other/tts_finetune/tts3/run.sh +++ b/examples/other/tts_finetune/tts3/run.sh @@ -99,7 +99,7 @@ if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then --voc_ckpt=pretrained_models/hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=pretrained_models/hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \ --lang=zh \ - --text=${BIN_DIR}/../sentences.txt \ + --text=${BIN_DIR}/../../assets/sentences.txt \ --output_dir=./test_e2e/ \ --phones_dict=${dump_dir}/phone_id_map.txt \ --speaker_dict=${dump_dir}/speaker_id_map.txt \ diff --git a/examples/other/tts_finetune/tts3/run_en.sh b/examples/other/tts_finetune/tts3/run_en.sh index 537214863cc67bbdd32dedcc50e93f475202fb1a..86c58afaed046c91ff50020840857d87f64864e6 100755 --- a/examples/other/tts_finetune/tts3/run_en.sh +++ b/examples/other/tts_finetune/tts3/run_en.sh @@ -98,7 +98,7 @@ if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then --voc_ckpt=pretrained_models/hifigan_vctk_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=pretrained_models/hifigan_vctk_ckpt_0.2.0/feats_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=./test_e2e/ \ --phones_dict=${dump_dir}/phone_id_map.txt \ --speaker_dict=${dump_dir}/speaker_id_map.txt \ diff --git a/examples/other/tts_finetune/tts3/run_mix.sh b/examples/other/tts_finetune/tts3/run_mix.sh index 7630022b386da0f52be99aea46e168abd73db321..210f03141144f19c31dab58be827bc71fac21a7f 100755 --- a/examples/other/tts_finetune/tts3/run_mix.sh +++ b/examples/other/tts_finetune/tts3/run_mix.sh @@ -100,7 +100,7 @@ if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then --voc_ckpt=pretrained_models/hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=pretrained_models/hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \ --lang=mix \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --output_dir=./test_e2e/ \ --phones_dict=${dump_dir}/phone_id_map.txt \ --speaker_dict=${dump_dir}/speaker_id_map.txt \ diff --git a/examples/vctk/tts3/README.md b/examples/vctk/tts3/README.md index 0bf2037f5bb06b2b50168dea64f45775bf722990..3a6f3e1b904fab71b395695dca46ca5e5dbc6f40 100644 --- a/examples/vctk/tts3/README.md +++ b/examples/vctk/tts3/README.md @@ -254,7 +254,7 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --voc_ckpt=pwg_vctk_ckpt_0.1.1/snapshot_iter_1500000.pdz \ --voc_stat=pwg_vctk_ckpt_0.1.1/feats_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=exp/default/test_e2e \ --phones_dict=fastspeech2_vctk_ckpt_1.2.0/phone_id_map.txt \ --speaker_dict=fastspeech2_vctk_ckpt_1.2.0/speaker_id_map.txt \ diff --git a/examples/vctk/tts3/local/inference.sh b/examples/vctk/tts3/local/inference.sh index 9c4426146ff5fd1bd75eaa4921920feaf106f478..ef23d951cea8ac342df5c3425d287a7d0173944a 100755 --- a/examples/vctk/tts3/local/inference.sh +++ b/examples/vctk/tts3/local/inference.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_vctk \ --voc=pwgan_vctk \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -23,7 +23,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_vctk \ --voc=hifigan_vctk \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/vctk/tts3/local/lite_predict.sh b/examples/vctk/tts3/local/lite_predict.sh index eb608535b5217217546c19bd3ebf9388024add2d..53141b5f14fd95000725a34ea06728a5e07a9186 100755 --- a/examples/vctk/tts3/local/lite_predict.sh +++ b/examples/vctk/tts3/local/lite_predict.sh @@ -11,7 +11,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_vctk \ --voc=pwgan_vctk \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -25,7 +25,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/pdlite \ --am=fastspeech2_vctk \ --voc=hifigan_vctk \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/lite_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/vctk/tts3/local/ort_predict.sh b/examples/vctk/tts3/local/ort_predict.sh index 4019e17fa935c3955e13b5d63c5fa8414661f4f8..f376ee75534351861b082f35fcec90b6f051af7c 100755 --- a/examples/vctk/tts3/local/ort_predict.sh +++ b/examples/vctk/tts3/local/ort_predict.sh @@ -10,7 +10,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=fastspeech2_vctk \ --voc=pwgan_vctk \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ @@ -25,7 +25,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_vctk \ --voc=hifigan_vctk \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=2 \ diff --git a/examples/vctk/tts3/local/synthesize_e2e.sh b/examples/vctk/tts3/local/synthesize_e2e.sh index a89f42b50dae9f91538005c6a24e22e37eda7904..971c8385340f7c1a2a797e836657a655ce001925 100755 --- a/examples/vctk/tts3/local/synthesize_e2e.sh +++ b/examples/vctk/tts3/local/synthesize_e2e.sh @@ -21,7 +21,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_vctk_ckpt_0.1.1/snapshot_iter_1500000.pdz \ --voc_stat=pwg_vctk_ckpt_0.1.1/feats_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -43,7 +43,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=hifigan_vctk_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_vctk_ckpt_0.2.0/feats_stats.npy \ --lang=en \ - --text=${BIN_DIR}/../sentences_en.txt \ + --text=${BIN_DIR}/../../assets/sentences_en.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/zh_en_tts/tts3/README.md b/examples/zh_en_tts/tts3/README.md index 1f04d41e7e9b165d93ecccebd3157fa50b44a341..15de3f487272b06cfb8fd5553c959d3c7bb79f1e 100644 --- a/examples/zh_en_tts/tts3/README.md +++ b/examples/zh_en_tts/tts3/README.md @@ -252,8 +252,10 @@ optional arguments: ## Pretrained Model + Pretrained FastSpeech2 model with no silence in the edge of audios: - [fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip) +- [pwg_aishell3_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip) The static model can be downloaded here: - [fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip) @@ -285,18 +287,18 @@ FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ python3 ${BIN_DIR}/../synthesize_e2e.py \ --am=fastspeech2_mix \ - --am_config=fastspeech2_mix_ckpt_1.2.0/default.yaml \ - --am_ckpt=fastspeech2_mix_ckpt_1.2.0/snapshot_iter_99200.pdz \ - --am_stat=fastspeech2_mix_ckpt_1.2.0/speech_stats.npy \ - --phones_dict=fastspeech2_mix_ckpt_1.2.0/phone_id_map.txt \ - --speaker_dict=fastspeech2_mix_ckpt_1.2.0/speaker_id_map.txt \ + --am_config=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/default.yaml \ + --am_ckpt=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/snapshot_iter_99200.pdz \ + --am_stat=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/speech_stats.npy \ + --phones_dict=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/phone_id_map.txt \ + --speaker_dict=exp/pretrain/fastspeech2_mix_ckpt_1.2.0/speaker_id_map.txt \ --spk_id=174 \ --voc=pwgan_aishell3 \ - --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \ - --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ - --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \ + --voc_config=exp/pretrain/pwg_aishell3_ckpt_0.5/default.yaml \ + --voc_ckpt=exp/pretrain/pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ + --voc_stat=exp/pretrain/pwg_aishell3_ckpt_0.5/feats_stats.npy \ --lang=mix \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --output_dir=exp/default/test_e2e \ --inference_dir=exp/default/inference ``` diff --git a/examples/zh_en_tts/tts3/local/inference.sh b/examples/zh_en_tts/tts3/local/inference.sh index 16499ed0168d2c0aa96d9ecde908f2647cda0521..e4168fd04c2e05b15f824138e838eb0491b2fc47 100755 --- a/examples/zh_en_tts/tts3/local/inference.sh +++ b/examples/zh_en_tts/tts3/local/inference.sh @@ -13,7 +13,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_mix \ --voc=pwgan_aishell3 \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -30,7 +30,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_mix \ --voc=hifigan_aishell3 \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -45,7 +45,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --inference_dir=${train_output_path}/inference \ --am=fastspeech2_mix \ --voc=hifigan_csmsc \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --output_dir=${train_output_path}/pd_infer_out \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/examples/zh_en_tts/tts3/local/model_download.sh b/examples/zh_en_tts/tts3/local/model_download.sh index 20a830b7427fa1707c05177873738554a0720904..21a218a836eac9994e54d7009d574ab9fdde6690 100755 --- a/examples/zh_en_tts/tts3/local/model_download.sh +++ b/examples/zh_en_tts/tts3/local/model_download.sh @@ -8,6 +8,7 @@ mkdir -p $pretrain pushd $pretrain wget -c https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip & +wget -c https://paddlespeech.bj.bcebos.com/Parakeet/released_models/pwgan/pwg_aishell3_ckpt_0.5.zip & wait popd diff --git a/examples/zh_en_tts/tts3/local/ort_predict.sh b/examples/zh_en_tts/tts3/local/ort_predict.sh index d80da9c91b2e3d4c803ca7f96b50accc0726d487..0d5ac675ca713af67b7b20b1275bb1dd8cd2b5da 100755 --- a/examples/zh_en_tts/tts3/local/ort_predict.sh +++ b/examples/zh_en_tts/tts3/local/ort_predict.sh @@ -13,7 +13,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --am=fastspeech2_mix \ --voc=pwgan_aishell3 \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=4 \ @@ -31,7 +31,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_mix \ --voc=hifigan_aishell3 \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=4 \ @@ -45,7 +45,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --am=fastspeech2_mix \ --voc=hifigan_csmsc \ --output_dir=${train_output_path}/onnx_infer_out_e2e \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --phones_dict=dump/phone_id_map.txt \ --device=cpu \ --cpu_threads=4 \ diff --git a/examples/zh_en_tts/tts3/local/synthesize_e2e.sh b/examples/zh_en_tts/tts3/local/synthesize_e2e.sh index f6ee04aefecb728f1ffb13a33af3f07c49bf4862..daad7180c2a42ae10bb71a9114f967f705dffe83 100755 --- a/examples/zh_en_tts/tts3/local/synthesize_e2e.sh +++ b/examples/zh_en_tts/tts3/local/synthesize_e2e.sh @@ -23,7 +23,7 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ --voc_stat=pwg_aishell3_ckpt_0.5/feats_stats.npy \ --lang=mix \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -48,7 +48,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_aishell3_ckpt_0.2.0/feats_stats.npy \ --lang=mix \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ @@ -73,7 +73,7 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then --voc_ckpt=hifigan_csmsc_ckpt_0.1.1/snapshot_iter_2500000.pdz \ --voc_stat=hifigan_csmsc_ckpt_0.1.1/feats_stats.npy \ --lang=mix \ - --text=${BIN_DIR}/../sentences_mix.txt \ + --text=${BIN_DIR}/../../assets/sentences_mix.txt \ --output_dir=${train_output_path}/test_e2e \ --phones_dict=dump/phone_id_map.txt \ --speaker_dict=dump/speaker_id_map.txt \ diff --git a/paddlespeech/t2s/exps/csmsc_test.txt b/paddlespeech/t2s/assets/csmsc_test.txt similarity index 100% rename from paddlespeech/t2s/exps/csmsc_test.txt rename to paddlespeech/t2s/assets/csmsc_test.txt diff --git a/paddlespeech/t2s/exps/sentences.txt b/paddlespeech/t2s/assets/sentences.txt similarity index 100% rename from paddlespeech/t2s/exps/sentences.txt rename to paddlespeech/t2s/assets/sentences.txt diff --git a/paddlespeech/t2s/exps/sentences_canton.txt b/paddlespeech/t2s/assets/sentences_canton.txt similarity index 100% rename from paddlespeech/t2s/exps/sentences_canton.txt rename to paddlespeech/t2s/assets/sentences_canton.txt diff --git a/paddlespeech/t2s/exps/sentences_en.txt b/paddlespeech/t2s/assets/sentences_en.txt similarity index 100% rename from paddlespeech/t2s/exps/sentences_en.txt rename to paddlespeech/t2s/assets/sentences_en.txt diff --git a/paddlespeech/t2s/exps/sentences_mix.txt b/paddlespeech/t2s/assets/sentences_mix.txt similarity index 100% rename from paddlespeech/t2s/exps/sentences_mix.txt rename to paddlespeech/t2s/assets/sentences_mix.txt diff --git a/paddlespeech/t2s/exps/sentences_sing.txt b/paddlespeech/t2s/assets/sentences_sing.txt similarity index 100% rename from paddlespeech/t2s/exps/sentences_sing.txt rename to paddlespeech/t2s/assets/sentences_sing.txt diff --git a/paddlespeech/t2s/exps/sentences_ssml.txt b/paddlespeech/t2s/assets/sentences_ssml.txt similarity index 100% rename from paddlespeech/t2s/exps/sentences_ssml.txt rename to paddlespeech/t2s/assets/sentences_ssml.txt