Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
e354848c
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e354848c
编写于
2月 10, 2022
作者:
L
lym0302
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'tts-server3' of
https://github.com/lym0302/PaddleSpeech
into tts-server3
上级
830e91ca
20149e89
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
18 addition
and
6 deletion
+18
-6
speechserving/speechserving/conf/tts/tts_pd.yaml
speechserving/speechserving/conf/tts/tts_pd.yaml
+2
-1
speechserving/speechserving/engine/tts/paddleinference/tts_engine.py
...ng/speechserving/engine/tts/paddleinference/tts_engine.py
+16
-5
未找到文件。
speechserving/speechserving/conf/tts/tts_pd.yaml
浏览文件 @
e354848c
...
@@ -14,7 +14,7 @@ port: 8692
...
@@ -14,7 +14,7 @@ port: 8692
am
:
'
fastspeech2_csmsc'
am
:
'
fastspeech2_csmsc'
am_model
:
# the pdmodel file of am static model
am_model
:
# the pdmodel file of am static model
am_params
:
# the pdiparams file of am static model
am_params
:
# the pdiparams file of am static model
sample_rate
:
24000
am_
sample_rate
:
24000
phones_dict
:
phones_dict
:
tones_dict
:
tones_dict
:
speaker_dict
:
speaker_dict
:
...
@@ -33,6 +33,7 @@ am_predictor_conf:
...
@@ -33,6 +33,7 @@ am_predictor_conf:
voc
:
'
pwgan_csmsc'
voc
:
'
pwgan_csmsc'
voc_model
:
# the pdmodel file of vocoder static model
voc_model
:
# the pdmodel file of vocoder static model
voc_params
:
# the pdiparams file of vocoder static model
voc_params
:
# the pdiparams file of vocoder static model
voc_sample_rate
:
24000
voc_predictor_conf
:
voc_predictor_conf
:
use_gpu
:
True
use_gpu
:
True
...
...
speechserving/speechserving/engine/tts/paddleinference/tts_engine.py
浏览文件 @
e354848c
...
@@ -83,6 +83,8 @@ pretrained_models = {
...
@@ -83,6 +83,8 @@ pretrained_models = {
'pwgan_csmsc.pdmodel'
,
'pwgan_csmsc.pdmodel'
,
'params'
:
'params'
:
'pwgan_csmsc.pdiparams'
,
'pwgan_csmsc.pdiparams'
,
'sample_rate'
:
24000
,
},
},
# mb_melgan
# mb_melgan
"mb_melgan_csmsc-zh"
:
{
"mb_melgan_csmsc-zh"
:
{
...
@@ -94,6 +96,8 @@ pretrained_models = {
...
@@ -94,6 +96,8 @@ pretrained_models = {
'mb_melgan_csmsc.pdmodel'
,
'mb_melgan_csmsc.pdmodel'
,
'params'
:
'params'
:
'mb_melgan_csmsc.pdiparams'
,
'mb_melgan_csmsc.pdiparams'
,
'sample_rate'
:
24000
,
},
},
# hifigan
# hifigan
"hifigan_csmsc-zh"
:
{
"hifigan_csmsc-zh"
:
{
...
@@ -105,6 +109,8 @@ pretrained_models = {
...
@@ -105,6 +109,8 @@ pretrained_models = {
'hifigan_csmsc.pdmodel'
,
'hifigan_csmsc.pdmodel'
,
'params'
:
'params'
:
'hifigan_csmsc.pdiparams'
,
'hifigan_csmsc.pdiparams'
,
'sample_rate'
:
24000
,
},
},
}
}
...
@@ -141,13 +147,14 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -141,13 +147,14 @@ class TTSServerExecutor(TTSExecutor):
am
:
str
=
'fastspeech2_csmsc'
,
am
:
str
=
'fastspeech2_csmsc'
,
am_model
:
Optional
[
os
.
PathLike
]
=
None
,
am_model
:
Optional
[
os
.
PathLike
]
=
None
,
am_params
:
Optional
[
os
.
PathLike
]
=
None
,
am_params
:
Optional
[
os
.
PathLike
]
=
None
,
sample_rate
:
int
=
24000
,
am_
sample_rate
:
int
=
24000
,
phones_dict
:
Optional
[
os
.
PathLike
]
=
None
,
phones_dict
:
Optional
[
os
.
PathLike
]
=
None
,
tones_dict
:
Optional
[
os
.
PathLike
]
=
None
,
tones_dict
:
Optional
[
os
.
PathLike
]
=
None
,
speaker_dict
:
Optional
[
os
.
PathLike
]
=
None
,
speaker_dict
:
Optional
[
os
.
PathLike
]
=
None
,
voc
:
str
=
'pwgan_csmsc'
,
voc
:
str
=
'pwgan_csmsc'
,
voc_model
:
Optional
[
os
.
PathLike
]
=
None
,
voc_model
:
Optional
[
os
.
PathLike
]
=
None
,
voc_params
:
Optional
[
os
.
PathLike
]
=
None
,
voc_params
:
Optional
[
os
.
PathLike
]
=
None
,
voc_sample_rate
:
int
=
24000
,
lang
:
str
=
'zh'
,
lang
:
str
=
'zh'
,
am_predictor_conf
:
dict
=
None
,
am_predictor_conf
:
dict
=
None
,
voc_predictor_conf
:
dict
=
None
,
):
voc_predictor_conf
:
dict
=
None
,
):
...
@@ -169,7 +176,7 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -169,7 +176,7 @@ class TTSServerExecutor(TTSExecutor):
# must have phones_dict in acoustic
# must have phones_dict in acoustic
self
.
phones_dict
=
os
.
path
.
join
(
self
.
phones_dict
=
os
.
path
.
join
(
am_res_path
,
pretrained_models
[
am_tag
][
'phones_dict'
])
am_res_path
,
pretrained_models
[
am_tag
][
'phones_dict'
])
self
.
sample_rate
=
pretrained_models
[
am_tag
][
'sample_rate'
]
self
.
am_
sample_rate
=
pretrained_models
[
am_tag
][
'sample_rate'
]
logger
.
info
(
am_res_path
)
logger
.
info
(
am_res_path
)
logger
.
info
(
self
.
am_model
)
logger
.
info
(
self
.
am_model
)
...
@@ -178,7 +185,7 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -178,7 +185,7 @@ class TTSServerExecutor(TTSExecutor):
self
.
am_model
=
os
.
path
.
abspath
(
am_model
)
self
.
am_model
=
os
.
path
.
abspath
(
am_model
)
self
.
am_params
=
os
.
path
.
abspath
(
am_params
)
self
.
am_params
=
os
.
path
.
abspath
(
am_params
)
self
.
phones_dict
=
os
.
path
.
abspath
(
phones_dict
)
self
.
phones_dict
=
os
.
path
.
abspath
(
phones_dict
)
self
.
sample_rate
=
sample_rate
self
.
am_sample_rate
=
am_
sample_rate
self
.
am_res_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
self
.
am_model
))
self
.
am_res_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
self
.
am_model
))
print
(
"self.phones_dict:"
,
self
.
phones_dict
)
print
(
"self.phones_dict:"
,
self
.
phones_dict
)
...
@@ -207,14 +214,17 @@ class TTSServerExecutor(TTSExecutor):
...
@@ -207,14 +214,17 @@ class TTSServerExecutor(TTSExecutor):
pretrained_models
[
voc_tag
][
'model'
])
pretrained_models
[
voc_tag
][
'model'
])
self
.
voc_params
=
os
.
path
.
join
(
voc_res_path
,
self
.
voc_params
=
os
.
path
.
join
(
voc_res_path
,
pretrained_models
[
voc_tag
][
'params'
])
pretrained_models
[
voc_tag
][
'params'
])
self
.
voc_sample_rate
=
pretrained_models
[
voc_tag
][
'sample_rate'
]
logger
.
info
(
voc_res_path
)
logger
.
info
(
voc_res_path
)
logger
.
info
(
self
.
voc_model
)
logger
.
info
(
self
.
voc_model
)
logger
.
info
(
self
.
voc_params
)
logger
.
info
(
self
.
voc_params
)
else
:
else
:
self
.
voc_model
=
os
.
path
.
abspath
(
voc_model
)
self
.
voc_model
=
os
.
path
.
abspath
(
voc_model
)
self
.
voc_params
=
os
.
path
.
abspath
(
voc_params
)
self
.
voc_params
=
os
.
path
.
abspath
(
voc_params
)
self
.
voc_sample_rate
=
voc_sample_rate
self
.
voc_res_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
self
.
voc_model
))
self
.
voc_res_path
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
self
.
voc_model
))
assert
(
self
.
voc_sample_rate
==
self
.
am_sample_rate
)
# Init body.
# Init body.
with
open
(
self
.
phones_dict
,
"r"
)
as
f
:
with
open
(
self
.
phones_dict
,
"r"
)
as
f
:
phn_id
=
[
line
.
strip
().
split
()
for
line
in
f
.
readlines
()]
phn_id
=
[
line
.
strip
().
split
()
for
line
in
f
.
readlines
()]
...
@@ -343,13 +353,14 @@ class TTSEngine(BaseEngine):
...
@@ -343,13 +353,14 @@ class TTSEngine(BaseEngine):
am
=
self
.
conf_dict
[
"am"
],
am
=
self
.
conf_dict
[
"am"
],
am_model
=
self
.
conf_dict
[
"am_model"
],
am_model
=
self
.
conf_dict
[
"am_model"
],
am_params
=
self
.
conf_dict
[
"am_params"
],
am_params
=
self
.
conf_dict
[
"am_params"
],
sample_rate
=
self
.
conf_dict
[
"
sample_rate"
],
am_sample_rate
=
self
.
conf_dict
[
"am_
sample_rate"
],
phones_dict
=
self
.
conf_dict
[
"phones_dict"
],
phones_dict
=
self
.
conf_dict
[
"phones_dict"
],
tones_dict
=
self
.
conf_dict
[
"tones_dict"
],
tones_dict
=
self
.
conf_dict
[
"tones_dict"
],
speaker_dict
=
self
.
conf_dict
[
"speaker_dict"
],
speaker_dict
=
self
.
conf_dict
[
"speaker_dict"
],
voc
=
self
.
conf_dict
[
"voc"
],
voc
=
self
.
conf_dict
[
"voc"
],
voc_model
=
self
.
conf_dict
[
"voc_model"
],
voc_model
=
self
.
conf_dict
[
"voc_model"
],
voc_params
=
self
.
conf_dict
[
"voc_params"
],
voc_params
=
self
.
conf_dict
[
"voc_params"
],
voc_sample_rate
=
self
.
conf_dict
[
"voc_sample_rate"
],
lang
=
self
.
conf_dict
[
"lang"
],
lang
=
self
.
conf_dict
[
"lang"
],
am_predictor_conf
=
self
.
conf_dict
[
"am_predictor_conf"
],
am_predictor_conf
=
self
.
conf_dict
[
"am_predictor_conf"
],
voc_predictor_conf
=
self
.
conf_dict
[
"voc_predictor_conf"
],
)
voc_predictor_conf
=
self
.
conf_dict
[
"voc_predictor_conf"
],
)
...
@@ -451,7 +462,7 @@ class TTSEngine(BaseEngine):
...
@@ -451,7 +462,7 @@ class TTSEngine(BaseEngine):
try
:
try
:
target_sample_rate
,
wav_base64
=
self
.
postprocess
(
target_sample_rate
,
wav_base64
=
self
.
postprocess
(
wav
=
self
.
executor
.
_outputs
[
'wav'
].
numpy
(),
wav
=
self
.
executor
.
_outputs
[
'wav'
].
numpy
(),
original_fs
=
self
.
executor
.
sample_rate
,
original_fs
=
self
.
executor
.
am_
sample_rate
,
target_fs
=
sample_rate
,
target_fs
=
sample_rate
,
volume
=
volume
,
volume
=
volume
,
speed
=
speed
,
speed
=
speed
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录