diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 52b386daf3e43698b7fa74080425f28770489353..62986da03d10cc29dd8ba76565feab0555cf3ba7 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -55,6 +55,7 @@ Parallel WaveGAN| VCTK |[PWGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeec Style MelGAN | CSMSC |[Style MelGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc4)|[style_melgan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/style_melgan/style_melgan_csmsc_ckpt_0.1.1.zip)| | | HiFiGAN | CSMSC |[HiFiGAN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc5)|[hifigan_csmsc_ckpt_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_ckpt_0.1.1.zip)|[hifigan_csmsc_static_0.1.1.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_csmsc_static_0.1.1.zip)|50MB| HiFiGAN | AISHELL-3 |[HiFiGAN-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)||| +HiFiGAN | VCTK |[HiFiGAN-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/voc5)|[hifigan_aishell3_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip)||| WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/voc6)|[wavernn_csmsc_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_ckpt_0.2.0.zip)|[wavernn_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/wavernn/wavernn_csmsc_static_0.2.0.zip)|18MB| diff --git a/examples/vctk/tts3/local/synthesize_e2e.sh b/examples/vctk/tts3/local/synthesize_e2e.sh index 16b0d62ba39270d37d952fa0a40ddcf0ad5b79fd..a89f42b50dae9f91538005c6a24e22e37eda7904 100755 --- a/examples/vctk/tts3/local/synthesize_e2e.sh +++ b/examples/vctk/tts3/local/synthesize_e2e.sh @@ -49,4 +49,4 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --speaker_dict=dump/speaker_id_map.txt \ --spk_id=0 \ --inference_dir=${train_output_path}/inference -fi \ No newline at end of file +fi diff --git a/examples/vctk/voc5/README.md b/examples/vctk/voc5/README.md index 06623ec5f827886990e8a428b8219bb49c98188b..b4be341c0e52a74bbf475e22d39fd72bfe737e47 100644 --- a/examples/vctk/voc5/README.md +++ b/examples/vctk/voc5/README.md @@ -133,7 +133,21 @@ optional arguments: 5. `--ngpu` is the number of gpus to use, if ngpu == 0, use cpu. ## Pretrained Model +The pretrained model can be downloaded here [hifigan_vctk_ckpt_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip). +Model | Step | eval/generator_loss | eval/mel_loss| eval/feature_matching_loss +:-------------:| :------------:| :-----: | :-----: | :--------: +default| 1(gpu) x 2500000|58.092|0.1234|24.384 + +HiFiGAN checkpoint contains files listed below. + +```text +hifigan_vctk_ckpt_0.2.0 +├── default.yaml # default config used to train hifigan +├── feats_stats.npy # statistics used to normalize spectrogram when training hifigan +└── snapshot_iter_2500000.pdz # generator parameters of hifigan +``` + ## Acknowledgement We adapted some code from https://github.com/kan-bayashi/ParallelWaveGAN. diff --git a/paddlespeech/cli/tts/infer.py b/paddlespeech/cli/tts/infer.py index 8423dfa8d1cbf7fc651ff5e538d0ec0993ca2e9f..78eae769bee752dd72d6ec377e0be64a2c0f7fc0 100644 --- a/paddlespeech/cli/tts/infer.py +++ b/paddlespeech/cli/tts/infer.py @@ -237,6 +237,30 @@ pretrained_models = { 'speech_stats': 'feats_stats.npy', }, + "hifigan_aishell3-zh": { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_aishell3_ckpt_0.2.0.zip', + 'md5': + '3bb49bc75032ed12f79c00c8cc79a09a', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_2500000.pdz', + 'speech_stats': + 'feats_stats.npy', + }, + "hifigan_vctk-en": { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/hifigan/hifigan_vctk_ckpt_0.2.0.zip', + 'md5': + '7da8f88359bca2457e705d924cf27bd4', + 'config': + 'default.yaml', + 'ckpt': + 'snapshot_iter_2500000.pdz', + 'speech_stats': + 'feats_stats.npy', + }, # wavernn "wavernn_csmsc-zh": { @@ -365,6 +389,8 @@ class TTSExecutor(BaseExecutor): 'mb_melgan_csmsc', 'style_melgan_csmsc', 'hifigan_csmsc', + 'hifigan_aishell3', + 'hifigan_vctk', 'wavernn_csmsc', ], help='Choose vocoder type of tts task.') diff --git a/paddlespeech/t2s/exps/synthesize_e2e.py b/paddlespeech/t2s/exps/synthesize_e2e.py index f5214d4a48b1bc395cccc627208f672d1f8df0ca..10b33c60acfafa93cbf6c8400cc3ba1b733c8a30 100644 --- a/paddlespeech/t2s/exps/synthesize_e2e.py +++ b/paddlespeech/t2s/exps/synthesize_e2e.py @@ -63,7 +63,7 @@ def evaluate(args): output_dir = Path(args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) - merge_sentences = True + merge_sentences = False # Avoid not stopping at the end of a sub sentence when tacotron2_ljspeech dygraph to static graph # but still not stopping in the end (NOTE by yuantian01 Feb 9 2022) if am_name == 'tacotron2': diff --git a/tests/unit/cli/test_cli.sh b/tests/unit/cli/test_cli.sh index 7dabe1976a6104c19f2d7851d5b29759755db10b..9852b0695e5dabcec48e5510ff19274b81669670 100755 --- a/tests/unit/cli/test_cli.sh +++ b/tests/unit/cli/test_cli.sh @@ -19,8 +19,10 @@ paddlespeech tts --voc mb_melgan_csmsc --input "你好,欢迎使用百度飞 paddlespeech tts --voc style_melgan_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --voc hifigan_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am fastspeech2_aishell3 --voc pwgan_aishell3 --input "你好,欢迎使用百度飞桨深度学习框架!" --spk_id 0 +paddlespeech tts --am fastspeech2_aishell3 --voc hifigan_aishell3 --input "你好,欢迎使用百度飞桨深度学习框架!" --spk_id 0 paddlespeech tts --am fastspeech2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get." paddlespeech tts --am fastspeech2_vctk --voc pwgan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 +paddlespeech tts --am fastspeech2_vctk --voc hifigan_vctk --input "Life was like a box of chocolates, you never know what you're gonna get." --lang en --spk_id 0 paddlespeech tts --am tacotron2_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am tacotron2_csmsc --voc wavernn_csmsc --input "你好,欢迎使用百度飞桨深度学习框架!" paddlespeech tts --am tacotron2_ljspeech --voc pwgan_ljspeech --lang en --input "Life was like a box of chocolates, you never know what you're gonna get."