From 6469568d2afa9fa9ba5c37f75989510630787c9c Mon Sep 17 00:00:00 2001 From: TianYuan Date: Tue, 22 Mar 2022 07:26:35 +0000 Subject: [PATCH] update readme for vctk hifigan, test=tts --- docs/source/released_model.md | 1 + examples/vctk/tts3/local/synthesize_e2e.sh | 2 +- examples/vctk/voc5/README.md | 14 ++++++++++++ paddlespeech/cli/tts/infer.py | 26 ++++++++++++++++++++++ paddlespeech/t2s/exps/synthesize_e2e.py | 2 +- tests/unit/cli/test_cli.sh | 2 ++ 6 files changed, 45 insertions(+), 2 deletions(-) diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 52b386da..62986da0 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -55,6 +55,7 @@ Parallel WaveGAN| VCTK |[PWGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeec Style MelGAN | CSMSC |[Style MelGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc4)|[style_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip)| | | HiFiGAN | CSMSC |[HiFiGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc5)|[hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip)|[hifigan_csmsc_static_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip)|50MB| HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)||| +HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)||| WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB| diff --git a/examples/vctk/tts3/local/synthesize_e2e.sh b/examples/vctk/tts3/local/synthesize_e2e.sh index 16b0d62b..a89f42b5 100755 --- a/examples/vctk/tts3/local/synthesize_e2e.sh +++ b/examples/vctk/tts3/local/synthesize_e2e.sh @@ -49,4 +49,4 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --speaker_dict=dump/speaker_id_map.txt \ --spk_id=0 \ --inference_dir=${train_output_path}/inference -fi \ No newline at end of file +fi diff --git a/examples/vctk/voc5/README.md b/examples/vctk/voc5/README.md index 06623ec5..b4be341c 100644 --- a/examples/vctk/voc5/README.md +++ b/examples/vctk/voc5/README.md @@ -133,7 +133,21 @@ optional arguments: 5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu. ## Pretrained Model +The pretrained model can be downloaded here [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip). +Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss +:-------------:| :------------:| :-----: | :-----: | :--------: +default| 1(gpu) x 2500000|58.092|0.1234|24.384 + +HiFiGAN checkpoint contains files listed below. + +```text +hifigan_vctk_ckpt_0.2.0 +├── default.yaml # default config used to train hifigan +├── feats_stats.npy # statistics used to normalize spectrogram when training hifigan +└── snapshot_iter_2500000.pdz # generator parameters of hifigan +``` + ## Acknowledgement We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN. diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 8423dfa8..78eae769 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -237,6 +237,30 @@ pretrained_models = { 'speech_stats': 'feats_stats.npy', }, + "hifigan_aishell3-zh": { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip', + 'md5': + '3bb49bc75032ed12f79c00c8cc79a09a', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_2500000.pdz', + 'speech_stats': + 'feats_stats.npy', + }, + "hifigan_vctk-en": { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip', + 'md5': + '7da8f88359bca2457e705d924cf27bd4', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_2500000.pdz', + 'speech_stats': + 'feats_stats.npy', + }, # wavernn "wavernn_csmsc-zh": { @@ -365,6 +389,8 @@ class TTSExecutor(BaseExecutor): 'mb_melgan_csmsc', 'style_melgan_csmsc', 'hifigan_csmsc', + 'hifigan_aishell3', + 'hifigan_vctk', 'wavernn_csmsc', ], help='Choose vocoder type of tts task.') diff --git a/paddlespeech/t2s/exps/synthesize_e2e.py b/paddlespeech/t2s/exps/synthesize_e2e.py index f5214d4a..10b33c60 100644 --- a/paddlespeech/t2s/exps/synthesize_e2e.py +++ b/paddlespeech/t2s/exps/synthesize_e2e.py @@ -63,7 +63,7 @@ def evaluate(args): output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) - merge_sentences = True + merge_sentences = False # Avoid not stopping at the end of a sub sentence when tacotron2_ljspeech dygraph to static graph # but still not stopping in the end (NOTE by yuantian01 Feb 9 2022) if am_name == 'tacotron2': diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh index 7dabe197..9852b069 100755 --- a/tests/unit/cli/test_cli.sh +++ b/tests/unit/cli/test_cli.sh @@ -19,8 +19,10 @@ paddlespeech tts --voc mb_melgan_csmsc --input "你好,欢迎使用百度飞 paddlespeech tts --voc style_melgan_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --voc hifigan_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am fastspeech2_aishell3 --voc pwgan_aishell3 --input "你好,欢迎使用百度飞桨深度学习框架!" --spk_id 0 +paddlespeech tts --am fastspeech2_aishell3 --voc hifigan_aishell3 --input "你好,欢迎使用百度飞桨深度学习框架!" --spk_id 0 paddlespeech tts --am fastspeech2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." paddlespeech tts --am fastspeech2_vctk --voc pwgan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 +paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 paddlespeech tts --am tacotron2_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am tacotron2_csmsc --voc wavernn_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." -- GitLab