Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Parakeet
提交
ad4b248a
P
Parakeet
项目概览
PaddlePaddle
/
Parakeet
通知
14
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Parakeet
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
ad4b248a
编写于
4月 07, 2020
作者:
L
lifuchen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some bug of mask in fastspeech
上级
75d46422
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
18 addition
and
5 deletion
+18
-5
examples/fastspeech/synthesis.py
examples/fastspeech/synthesis.py
+13
-3
examples/fastspeech/synthesis.sh
examples/fastspeech/synthesis.sh
+1
-1
parakeet/models/fastspeech/decoder.py
parakeet/models/fastspeech/decoder.py
+2
-1
parakeet/models/fastspeech/fastspeech.py
parakeet/models/fastspeech/fastspeech.py
+1
-0
parakeet/models/transformer_tts/decoder.py
parakeet/models/transformer_tts/decoder.py
+1
-0
未找到文件。
examples/fastspeech/synthesis.py
浏览文件 @
ad4b248a
...
@@ -18,6 +18,7 @@ import argparse
...
@@ -18,6 +18,7 @@ import argparse
from
parse
import
add_config_options_to_parser
from
parse
import
add_config_options_to_parser
from
pprint
import
pprint
from
pprint
import
pprint
from
ruamel
import
yaml
from
ruamel
import
yaml
from
matplotlib
import
cm
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid.dygraph
as
dg
...
@@ -64,8 +65,7 @@ def synthesis(text_input, args):
...
@@ -64,8 +65,7 @@ def synthesis(text_input, args):
pos_text
=
np
.
arange
(
1
,
text
.
shape
[
1
]
+
1
)
pos_text
=
np
.
arange
(
1
,
text
.
shape
[
1
]
+
1
)
pos_text
=
np
.
expand_dims
(
pos_text
,
axis
=
0
)
pos_text
=
np
.
expand_dims
(
pos_text
,
axis
=
0
)
enc_non_pad_mask
=
get_non_pad_mask
(
pos_text
).
astype
(
np
.
float32
)
enc_non_pad_mask
=
get_non_pad_mask
(
pos_text
).
astype
(
np
.
float32
)
enc_slf_attn_mask
=
get_attn_key_pad_mask
(
pos_text
,
enc_slf_attn_mask
=
get_attn_key_pad_mask
(
pos_text
).
astype
(
np
.
float32
)
text
).
astype
(
np
.
float32
)
text
=
dg
.
to_variable
(
text
)
text
=
dg
.
to_variable
(
text
)
pos_text
=
dg
.
to_variable
(
pos_text
)
pos_text
=
dg
.
to_variable
(
pos_text
)
...
@@ -101,8 +101,17 @@ def synthesis(text_input, args):
...
@@ -101,8 +101,17 @@ def synthesis(text_input, args):
do_trim_silence
=
False
,
do_trim_silence
=
False
,
sound_norm
=
False
)
sound_norm
=
False
)
np
.
save
(
'mel_output'
,
mel_output_postnet
.
numpy
())
mel_output_postnet
=
fluid
.
layers
.
transpose
(
mel_output_postnet
=
fluid
.
layers
.
transpose
(
fluid
.
layers
.
squeeze
(
mel_output_postnet
,
[
0
]),
[
1
,
0
])
fluid
.
layers
.
squeeze
(
mel_output_postnet
,
[
0
]),
[
1
,
0
])
x
=
np
.
uint8
(
cm
.
viridis
(
mel_output_postnet
.
numpy
())
*
255
)
writer
.
add_image
(
'mel_0_0'
,
x
,
0
,
dataformats
=
"HWC"
)
ground_truth
=
_ljspeech_processor
.
load_wav
(
str
(
'/paddle/Parakeet/dataset/LJSpeech-1.1/wavs/LJ001-0175.wav'
))
ground_truth
=
_ljspeech_processor
.
melspectrogram
(
ground_truth
).
astype
(
np
.
float32
)
x
=
np
.
uint8
(
cm
.
viridis
(
ground_truth
)
*
255
)
writer
.
add_image
(
'mel_gt_0'
,
x
,
0
,
dataformats
=
"HWC"
)
wav
=
_ljspeech_processor
.
inv_melspectrogram
(
mel_output_postnet
.
numpy
(
wav
=
_ljspeech_processor
.
inv_melspectrogram
(
mel_output_postnet
.
numpy
(
))
))
writer
.
add_audio
(
text_input
,
wav
,
0
,
cfg
[
'audio'
][
'sr'
])
writer
.
add_audio
(
text_input
,
wav
,
0
,
cfg
[
'audio'
][
'sr'
])
...
@@ -114,4 +123,5 @@ if __name__ == '__main__':
...
@@ -114,4 +123,5 @@ if __name__ == '__main__':
parser
=
argparse
.
ArgumentParser
(
description
=
"Train Fastspeech model"
)
parser
=
argparse
.
ArgumentParser
(
description
=
"Train Fastspeech model"
)
add_config_options_to_parser
(
parser
)
add_config_options_to_parser
(
parser
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
synthesis
(
"Transformer model is so fast!"
,
args
)
synthesis
(
"Simple as this proposition is, it is necessary to be stated,"
,
args
)
examples/fastspeech/synthesis.sh
浏览文件 @
ad4b248a
...
@@ -4,7 +4,7 @@ python -u synthesis.py \
...
@@ -4,7 +4,7 @@ python -u synthesis.py \
--use_gpu
=
1
\
--use_gpu
=
1
\
--alpha
=
1.0
\
--alpha
=
1.0
\
--checkpoint_path
=
'checkpoint/'
\
--checkpoint_path
=
'checkpoint/'
\
--fastspeech_step
=
71
000
\
--fastspeech_step
=
89
000
\
--log_dir
=
'./log'
\
--log_dir
=
'./log'
\
--config_path
=
'configs/synthesis.yaml'
\
--config_path
=
'configs/synthesis.yaml'
\
...
...
parakeet/models/fastspeech/decoder.py
浏览文件 @
ad4b248a
...
@@ -88,7 +88,8 @@ class Decoder(dg.Layer):
...
@@ -88,7 +88,8 @@ class Decoder(dg.Layer):
dec_slf_attn_list (list[Variable]): len(n_layers), the decoder self attention list.
dec_slf_attn_list (list[Variable]): len(n_layers), the decoder self attention list.
"""
"""
dec_slf_attn_list
=
[]
dec_slf_attn_list
=
[]
slf_attn_mask
=
layers
.
expand
(
slf_attn_mask
,
[
self
.
n_head
,
1
,
1
])
if
slf_attn_mask
:
slf_attn_mask
=
layers
.
expand
(
slf_attn_mask
,
[
self
.
n_head
,
1
,
1
])
# -- Forward
# -- Forward
dec_output
=
enc_seq
+
self
.
position_enc
(
enc_pos
)
dec_output
=
enc_seq
+
self
.
position_enc
(
enc_pos
)
...
...
parakeet/models/fastspeech/fastspeech.py
浏览文件 @
ad4b248a
...
@@ -142,6 +142,7 @@ class FastSpeech(dg.Layer):
...
@@ -142,6 +142,7 @@ class FastSpeech(dg.Layer):
encoder_output
,
alpha
=
alpha
)
encoder_output
,
alpha
=
alpha
)
slf_attn_mask
=
get_triu_tensor
(
slf_attn_mask
=
get_triu_tensor
(
decoder_pos
.
numpy
(),
decoder_pos
.
numpy
()).
astype
(
np
.
float32
)
decoder_pos
.
numpy
(),
decoder_pos
.
numpy
()).
astype
(
np
.
float32
)
slf_attn_mask
=
np
.
expand_dims
(
slf_attn_mask
,
axis
=
0
)
slf_attn_mask
=
fluid
.
layers
.
cast
(
slf_attn_mask
=
fluid
.
layers
.
cast
(
dg
.
to_variable
(
slf_attn_mask
==
0
),
np
.
float32
)
dg
.
to_variable
(
slf_attn_mask
==
0
),
np
.
float32
)
slf_attn_mask
=
dg
.
to_variable
(
slf_attn_mask
)
slf_attn_mask
=
dg
.
to_variable
(
slf_attn_mask
)
...
...
parakeet/models/transformer_tts/decoder.py
浏览文件 @
ad4b248a
...
@@ -149,6 +149,7 @@ class Decoder(dg.Layer):
...
@@ -149,6 +149,7 @@ class Decoder(dg.Layer):
zero_mask
=
layers
.
expand
(
zero_mask
,
[
self
.
num_head
,
1
,
1
])
zero_mask
=
layers
.
expand
(
zero_mask
,
[
self
.
num_head
,
1
,
1
])
else
:
else
:
mask
=
layers
.
expand
(
mask
,
[
self
.
num_head
,
1
,
1
])
m_mask
,
m_self_mask
,
zero_mask
=
None
,
None
,
None
m_mask
,
m_self_mask
,
zero_mask
=
None
,
None
,
None
# Decoder pre-network
# Decoder pre-network
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录