Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
00e9853f
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
00e9853f
编写于
7月 29, 2022
作者:
小湉湉
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add mix tts cli, test=tts
上级
1f128a08
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
34 addition
and
14 deletion
+34
-14
paddlespeech/cli/tts/infer.py
paddlespeech/cli/tts/infer.py
+16
-14
paddlespeech/resource/pretrained_models.py
paddlespeech/resource/pretrained_models.py
+18
-0
未找到文件。
paddlespeech/cli/tts/infer.py
浏览文件 @
00e9853f
...
@@ -29,8 +29,7 @@ from yacs.config import CfgNode
...
@@ -29,8 +29,7 @@ from yacs.config import CfgNode
from
..executor
import
BaseExecutor
from
..executor
import
BaseExecutor
from
..log
import
logger
from
..log
import
logger
from
..utils
import
stats_wrapper
from
..utils
import
stats_wrapper
from
paddlespeech.t2s.frontend
import
English
from
paddlespeech.t2s.exps.syn_utils
import
get_frontend
from
paddlespeech.t2s.frontend.zh_frontend
import
Frontend
from
paddlespeech.t2s.modules.normalizer
import
ZScore
from
paddlespeech.t2s.modules.normalizer
import
ZScore
__all__
=
[
'TTSExecutor'
]
__all__
=
[
'TTSExecutor'
]
...
@@ -54,6 +53,7 @@ class TTSExecutor(BaseExecutor):
...
@@ -54,6 +53,7 @@ class TTSExecutor(BaseExecutor):
'fastspeech2_ljspeech'
,
'fastspeech2_ljspeech'
,
'fastspeech2_aishell3'
,
'fastspeech2_aishell3'
,
'fastspeech2_vctk'
,
'fastspeech2_vctk'
,
'fastspeech2_mix'
,
'tacotron2_csmsc'
,
'tacotron2_csmsc'
,
'tacotron2_ljspeech'
,
'tacotron2_ljspeech'
,
],
],
...
@@ -135,7 +135,7 @@ class TTSExecutor(BaseExecutor):
...
@@ -135,7 +135,7 @@ class TTSExecutor(BaseExecutor):
'--lang'
,
'--lang'
,
type
=
str
,
type
=
str
,
default
=
'zh'
,
default
=
'zh'
,
help
=
'Choose model language. zh or en'
)
help
=
'Choose model language. zh or en
or mix
'
)
self
.
parser
.
add_argument
(
self
.
parser
.
add_argument
(
'--device'
,
'--device'
,
type
=
str
,
type
=
str
,
...
@@ -231,8 +231,11 @@ class TTSExecutor(BaseExecutor):
...
@@ -231,8 +231,11 @@ class TTSExecutor(BaseExecutor):
use_pretrained_voc
=
True
use_pretrained_voc
=
True
else
:
else
:
use_pretrained_voc
=
False
use_pretrained_voc
=
False
voc_lang
=
lang
voc_tag
=
voc
+
'-'
+
lang
# we must use ljspeech's voc for mix am now!
if
lang
==
'mix'
:
voc_lang
=
'en'
voc_tag
=
voc
+
'-'
+
voc_lang
self
.
task_resource
.
set_task_model
(
self
.
task_resource
.
set_task_model
(
model_tag
=
voc_tag
,
model_tag
=
voc_tag
,
model_type
=
1
,
# vocoder
model_type
=
1
,
# vocoder
...
@@ -281,13 +284,8 @@ class TTSExecutor(BaseExecutor):
...
@@ -281,13 +284,8 @@ class TTSExecutor(BaseExecutor):
spk_num
=
len
(
spk_id
)
spk_num
=
len
(
spk_id
)
# frontend
# frontend
if
lang
==
'zh'
:
self
.
frontend
=
get_frontend
(
self
.
frontend
=
Frontend
(
lang
=
lang
,
phones_dict
=
self
.
phones_dict
,
tones_dict
=
self
.
tones_dict
)
phone_vocab_path
=
self
.
phones_dict
,
tone_vocab_path
=
self
.
tones_dict
)
elif
lang
==
'en'
:
self
.
frontend
=
English
(
phone_vocab_path
=
self
.
phones_dict
)
# acoustic model
# acoustic model
odim
=
self
.
am_config
.
n_mels
odim
=
self
.
am_config
.
n_mels
...
@@ -381,8 +379,12 @@ class TTSExecutor(BaseExecutor):
...
@@ -381,8 +379,12 @@ class TTSExecutor(BaseExecutor):
input_ids
=
self
.
frontend
.
get_input_ids
(
input_ids
=
self
.
frontend
.
get_input_ids
(
text
,
merge_sentences
=
merge_sentences
)
text
,
merge_sentences
=
merge_sentences
)
phone_ids
=
input_ids
[
"phone_ids"
]
phone_ids
=
input_ids
[
"phone_ids"
]
elif
lang
==
'mix'
:
input_ids
=
self
.
frontend
.
get_input_ids
(
text
,
merge_sentences
=
merge_sentences
)
phone_ids
=
input_ids
[
"phone_ids"
]
else
:
else
:
logger
.
error
(
"lang should in {'zh', 'en'}!"
)
logger
.
error
(
"lang should in {'zh', 'en'
, 'mix'
}!"
)
self
.
frontend_time
=
time
.
time
()
-
frontend_st
self
.
frontend_time
=
time
.
time
()
-
frontend_st
self
.
am_time
=
0
self
.
am_time
=
0
...
@@ -398,7 +400,7 @@ class TTSExecutor(BaseExecutor):
...
@@ -398,7 +400,7 @@ class TTSExecutor(BaseExecutor):
# fastspeech2
# fastspeech2
else
:
else
:
# multi speaker
# multi speaker
if
am_dataset
in
{
"aishell3"
,
"vctk"
}:
if
am_dataset
in
{
'aishell3'
,
'vctk'
,
'mix'
}:
mel
=
self
.
am_inference
(
mel
=
self
.
am_inference
(
part_phone_ids
,
spk_id
=
paddle
.
to_tensor
(
spk_id
))
part_phone_ids
,
spk_id
=
paddle
.
to_tensor
(
spk_id
))
else
:
else
:
...
...
paddlespeech/resource/pretrained_models.py
浏览文件 @
00e9853f
...
@@ -655,6 +655,24 @@ tts_dynamic_pretrained_models = {
...
@@ -655,6 +655,24 @@ tts_dynamic_pretrained_models = {
'phone_id_map.txt'
,
'phone_id_map.txt'
,
},
},
},
},
"fastspeech2_mix-mix"
:
{
'1.0'
:
{
'url'
:
'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip'
,
'md5'
:
'77d9d4b5a79ed6203339ead7ef6c74f9'
,
'config'
:
'default.yaml'
,
'ckpt'
:
'snapshot_iter_94000.pdz'
,
'speech_stats'
:
'speech_stats.npy'
,
'phones_dict'
:
'phone_id_map.txt'
,
'speaker_dict'
:
'speaker_id_map.txt'
,
},
},
# tacotron2
# tacotron2
"tacotron2_csmsc-zh"
:
{
"tacotron2_csmsc-zh"
:
{
'1.0'
:
{
'1.0'
:
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录