Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Parakeet
提交
ad4b248a
P
Parakeet
项目概览
PaddlePaddle
/
Parakeet
通知
8
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Parakeet
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ad4b248a
编写于
4月 07, 2020
作者:
L
lifuchen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some bug of mask in fastspeech
上级
75d46422
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
18 addition
and
5 deletion
+18
-5
examples/fastspeech/synthesis.py
examples/fastspeech/synthesis.py
+13
-3
examples/fastspeech/synthesis.sh
examples/fastspeech/synthesis.sh
+1
-1
parakeet/models/fastspeech/decoder.py
parakeet/models/fastspeech/decoder.py
+2
-1
parakeet/models/fastspeech/fastspeech.py
parakeet/models/fastspeech/fastspeech.py
+1
-0
parakeet/models/transformer_tts/decoder.py
parakeet/models/transformer_tts/decoder.py
+1
-0
未找到文件。
examples/fastspeech/synthesis.py
浏览文件 @
ad4b248a
...
...
@@ -18,6 +18,7 @@ import argparse
from
parse
import
add_config_options_to_parser
from
pprint
import
pprint
from
ruamel
import
yaml
from
matplotlib
import
cm
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.dygraph
as
dg
...
...
@@ -64,8 +65,7 @@ def synthesis(text_input, args):
pos_text
=
np
.
arange
(
1
,
text
.
shape
[
1
]
+
1
)
pos_text
=
np
.
expand_dims
(
pos_text
,
axis
=
0
)
enc_non_pad_mask
=
get_non_pad_mask
(
pos_text
).
astype
(
np
.
float32
)
enc_slf_attn_mask
=
get_attn_key_pad_mask
(
pos_text
,
text
).
astype
(
np
.
float32
)
enc_slf_attn_mask
=
get_attn_key_pad_mask
(
pos_text
).
astype
(
np
.
float32
)
text
=
dg
.
to_variable
(
text
)
pos_text
=
dg
.
to_variable
(
pos_text
)
...
...
@@ -101,8 +101,17 @@ def synthesis(text_input, args):
do_trim_silence
=
False
,
sound_norm
=
False
)
np
.
save
(
'mel_output'
,
mel_output_postnet
.
numpy
())
mel_output_postnet
=
fluid
.
layers
.
transpose
(
fluid
.
layers
.
squeeze
(
mel_output_postnet
,
[
0
]),
[
1
,
0
])
x
=
np
.
uint8
(
cm
.
viridis
(
mel_output_postnet
.
numpy
())
*
255
)
writer
.
add_image
(
'mel_0_0'
,
x
,
0
,
dataformats
=
"HWC"
)
ground_truth
=
_ljspeech_processor
.
load_wav
(
str
(
'/paddle/Parakeet/dataset/LJSpeech-1.1/wavs/LJ001-0175.wav'
))
ground_truth
=
_ljspeech_processor
.
melspectrogram
(
ground_truth
).
astype
(
np
.
float32
)
x
=
np
.
uint8
(
cm
.
viridis
(
ground_truth
)
*
255
)
writer
.
add_image
(
'mel_gt_0'
,
x
,
0
,
dataformats
=
"HWC"
)
wav
=
_ljspeech_processor
.
inv_melspectrogram
(
mel_output_postnet
.
numpy
(
))
writer
.
add_audio
(
text_input
,
wav
,
0
,
cfg
[
'audio'
][
'sr'
])
...
...
@@ -114,4 +123,5 @@ if __name__ == '__main__':
parser
=
argparse
.
ArgumentParser
(
description
=
"Train Fastspeech model"
)
add_config_options_to_parser
(
parser
)
args
=
parser
.
parse_args
()
synthesis
(
"Transformer model is so fast!"
,
args
)
synthesis
(
"Simple as this proposition is, it is necessary to be stated,"
,
args
)
examples/fastspeech/synthesis.sh
浏览文件 @
ad4b248a
...
...
@@ -4,7 +4,7 @@ python -u synthesis.py \
--use_gpu
=
1
\
--alpha
=
1.0
\
--checkpoint_path
=
'checkpoint/'
\
--fastspeech_step
=
71
000
\
--fastspeech_step
=
89
000
\
--log_dir
=
'./log'
\
--config_path
=
'configs/synthesis.yaml'
\
...
...
parakeet/models/fastspeech/decoder.py
浏览文件 @
ad4b248a
...
...
@@ -88,7 +88,8 @@ class Decoder(dg.Layer):
dec_slf_attn_list (list[Variable]): len(n_layers), the decoder self attention list.
"""
dec_slf_attn_list
=
[]
slf_attn_mask
=
layers
.
expand
(
slf_attn_mask
,
[
self
.
n_head
,
1
,
1
])
if
slf_attn_mask
:
slf_attn_mask
=
layers
.
expand
(
slf_attn_mask
,
[
self
.
n_head
,
1
,
1
])
# -- Forward
dec_output
=
enc_seq
+
self
.
position_enc
(
enc_pos
)
...
...
parakeet/models/fastspeech/fastspeech.py
浏览文件 @
ad4b248a
...
...
@@ -142,6 +142,7 @@ class FastSpeech(dg.Layer):
encoder_output
,
alpha
=
alpha
)
slf_attn_mask
=
get_triu_tensor
(
decoder_pos
.
numpy
(),
decoder_pos
.
numpy
()).
astype
(
np
.
float32
)
slf_attn_mask
=
np
.
expand_dims
(
slf_attn_mask
,
axis
=
0
)
slf_attn_mask
=
fluid
.
layers
.
cast
(
dg
.
to_variable
(
slf_attn_mask
==
0
),
np
.
float32
)
slf_attn_mask
=
dg
.
to_variable
(
slf_attn_mask
)
...
...
parakeet/models/transformer_tts/decoder.py
浏览文件 @
ad4b248a
...
...
@@ -149,6 +149,7 @@ class Decoder(dg.Layer):
zero_mask
=
layers
.
expand
(
zero_mask
,
[
self
.
num_head
,
1
,
1
])
else
:
mask
=
layers
.
expand
(
mask
,
[
self
.
num_head
,
1
,
1
])
m_mask
,
m_self_mask
,
zero_mask
=
None
,
None
,
None
# Decoder pre-network
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录