Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
DeepSpeech
提交
2071774d
D
DeepSpeech
项目概览
PaddlePaddle
/
DeepSpeech
大约 2 年 前同步成功
通知
210
Star
8425
Fork
1598
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
245
列表
看板
标记
里程碑
合并请求
3
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
D
DeepSpeech
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
245
Issue
245
列表
看板
标记
里程碑
合并请求
3
合并请求
3
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2071774d
编写于
1月 25, 2022
作者:
小湉湉
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add wavernn in synthesize_e2e, test=tts
上级
1cc7905d
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
61 addition
and
5 deletion
+61
-5
examples/csmsc/tts3/local/synthesize_e2e.sh
examples/csmsc/tts3/local/synthesize_e2e.sh
+21
-0
paddlespeech/t2s/exps/synthesize_e2e.py
paddlespeech/t2s/exps/synthesize_e2e.py
+16
-5
paddlespeech/t2s/models/wavernn/wavernn.py
paddlespeech/t2s/models/wavernn/wavernn.py
+24
-0
未找到文件。
examples/csmsc/tts3/local/synthesize_e2e.sh
浏览文件 @
2071774d
...
@@ -89,3 +89,24 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
...
@@ -89,3 +89,24 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--inference_dir
=
${
train_output_path
}
/inference
\
--inference_dir
=
${
train_output_path
}
/inference
\
--phones_dict
=
dump/phone_id_map.txt
--phones_dict
=
dump/phone_id_map.txt
fi
fi
# wavernn
if
[
${
stage
}
-le
4
]
&&
[
${
stop_stage
}
-ge
4
]
;
then
echo
"in wavernn syn_e2e"
FLAGS_allocator_strategy
=
naive_best_fit
\
FLAGS_fraction_of_gpu_memory_to_use
=
0.01
\
python3
${
BIN_DIR
}
/../synthesize_e2e.py
\
--am
=
fastspeech2_csmsc
\
--am_config
=
${
config_path
}
\
--am_ckpt
=
${
train_output_path
}
/checkpoints/
${
ckpt_name
}
\
--am_stat
=
dump/train/speech_stats.npy
\
--voc
=
wavernn_csmsc
\
--voc_config
=
wavernn_test/default.yaml
\
--voc_ckpt
=
wavernn_test/snapshot_iter_5000.pdz
\
--voc_stat
=
wavernn_test/feats_stats.npy
\
--lang
=
zh
\
--text
=
${
BIN_DIR
}
/../sentences.txt
\
--output_dir
=
${
train_output_path
}
/test_e2e
\
--phones_dict
=
dump/phone_id_map.txt
fi
paddlespeech/t2s/exps/synthesize_e2e.py
浏览文件 @
2071774d
...
@@ -59,6 +59,10 @@ model_alias = {
...
@@ -59,6 +59,10 @@ model_alias = {
"paddlespeech.t2s.models.hifigan:HiFiGANGenerator"
,
"paddlespeech.t2s.models.hifigan:HiFiGANGenerator"
,
"hifigan_inference"
:
"hifigan_inference"
:
"paddlespeech.t2s.models.hifigan:HiFiGANInference"
,
"paddlespeech.t2s.models.hifigan:HiFiGANInference"
,
"wavernn"
:
"paddlespeech.t2s.models.wavernn:WaveRNN"
,
"wavernn_inference"
:
"paddlespeech.t2s.models.wavernn:WaveRNNInference"
,
}
}
...
@@ -148,10 +152,16 @@ def evaluate(args):
...
@@ -148,10 +152,16 @@ def evaluate(args):
voc_name
=
args
.
voc
[:
args
.
voc
.
rindex
(
'_'
)]
voc_name
=
args
.
voc
[:
args
.
voc
.
rindex
(
'_'
)]
voc_class
=
dynamic_import
(
voc_name
,
model_alias
)
voc_class
=
dynamic_import
(
voc_name
,
model_alias
)
voc_inference_class
=
dynamic_import
(
voc_name
+
'_inference'
,
model_alias
)
voc_inference_class
=
dynamic_import
(
voc_name
+
'_inference'
,
model_alias
)
voc
=
voc_class
(
**
voc_config
[
"generator_params"
])
if
voc_name
!=
'wavernn'
:
voc
.
set_state_dict
(
paddle
.
load
(
args
.
voc_ckpt
)[
"generator_params"
])
voc
=
voc_class
(
**
voc_config
[
"generator_params"
])
voc
.
remove_weight_norm
()
voc
.
set_state_dict
(
paddle
.
load
(
args
.
voc_ckpt
)[
"generator_params"
])
voc
.
eval
()
voc
.
remove_weight_norm
()
voc
.
eval
()
else
:
voc
=
voc_class
(
**
voc_config
[
"model"
])
voc
.
set_state_dict
(
paddle
.
load
(
args
.
voc_ckpt
)[
"main_params"
])
voc
.
eval
()
voc_mu
,
voc_std
=
np
.
load
(
args
.
voc_stat
)
voc_mu
,
voc_std
=
np
.
load
(
args
.
voc_stat
)
voc_mu
=
paddle
.
to_tensor
(
voc_mu
)
voc_mu
=
paddle
.
to_tensor
(
voc_mu
)
voc_std
=
paddle
.
to_tensor
(
voc_std
)
voc_std
=
paddle
.
to_tensor
(
voc_std
)
...
@@ -307,7 +317,8 @@ def main():
...
@@ -307,7 +317,8 @@ def main():
default
=
'pwgan_csmsc'
,
default
=
'pwgan_csmsc'
,
choices
=
[
choices
=
[
'pwgan_csmsc'
,
'pwgan_ljspeech'
,
'pwgan_aishell3'
,
'pwgan_vctk'
,
'pwgan_csmsc'
,
'pwgan_ljspeech'
,
'pwgan_aishell3'
,
'pwgan_vctk'
,
'mb_melgan_csmsc'
,
'style_melgan_csmsc'
,
'hifigan_csmsc'
'mb_melgan_csmsc'
,
'style_melgan_csmsc'
,
'hifigan_csmsc'
,
'wavernn_csmsc'
],
],
help
=
'Choose vocoder type of tts task.'
)
help
=
'Choose vocoder type of tts task.'
)
...
...
paddlespeech/t2s/models/wavernn/wavernn.py
浏览文件 @
2071774d
...
@@ -590,3 +590,27 @@ class WaveRNN(nn.Layer):
...
@@ -590,3 +590,27 @@ class WaveRNN(nn.Layer):
for
i
in
range
(
size
):
for
i
in
range
(
size
):
bar
+=
'█'
if
i
<=
done
else
'░'
bar
+=
'█'
if
i
<=
done
else
'░'
return
bar
return
bar
class
WaveRNNInference
(
nn
.
Layer
):
def
__init__
(
self
,
normalizer
,
wavernn
):
super
().
__init__
()
self
.
normalizer
=
normalizer
self
.
wavernn
=
wavernn
def
forward
(
self
,
logmel
,
batched
:
bool
=
True
,
target
:
int
=
12000
,
overlap
:
int
=
600
,
mu_law
:
bool
=
True
,
gen_display
:
bool
=
False
):
normalized_mel
=
self
.
normalizer
(
logmel
)
wav
=
self
.
wavernn
.
generate
(
normalized_mel
,
batched
=
batched
,
target
=
target
,
overlap
=
overlap
,
mu_law
=
mu_law
,
gen_display
=
gen_display
)
return
wav
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录