Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Parakeet
提交
f5ac04b1
P
Parakeet
项目概览
PaddlePaddle
/
Parakeet
通知
10
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Parakeet
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f5ac04b1
编写于
2月 12, 2020
作者:
L
lifuchen
提交者:
chenfeiyu
2月 12, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update Conv1D and Linear
上级
53f569a5
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
74 addition
and
79 deletion
+74
-79
examples/FastSpeech/config/fastspeech.yaml
examples/FastSpeech/config/fastspeech.yaml
+13
-13
examples/FastSpeech/train.py
examples/FastSpeech/train.py
+1
-2
examples/TransformerTTS/config/train_transformer.yaml
examples/TransformerTTS/config/train_transformer.yaml
+1
-1
parakeet/models/fastspeech/LengthRegulator.py
parakeet/models/fastspeech/LengthRegulator.py
+17
-10
parakeet/models/transformerTTS/CBHG.py
parakeet/models/transformerTTS/CBHG.py
+12
-16
parakeet/models/transformerTTS/encoderprenet.py
parakeet/models/transformerTTS/encoderprenet.py
+6
-8
parakeet/models/transformerTTS/post_convnet.py
parakeet/models/transformerTTS/post_convnet.py
+9
-12
parakeet/models/transformerTTS/vocoder.py
parakeet/models/transformerTTS/vocoder.py
+6
-8
parakeet/modules/ffn.py
parakeet/modules/ffn.py
+9
-9
未找到文件。
examples/FastSpeech/config/fastspeech.yaml
浏览文件 @
f5ac04b1
audio
:
num_mels
:
80
n_fft
:
2048
sr
:
22050
preemphasis
:
0.97
hop_length
:
256
win_length
:
1024
power
:
1.2
min_level_db
:
-100
ref_level_db
:
20
outputs_per_step
:
1
num_mels
:
80
#the number of mel bands when calculating mel spectrograms.
n_fft
:
2048
#the number of fft components.
sr
:
22050
#the sampling rate of audio data file.
preemphasis
:
0.97
#the preemphasis coefficient.
hop_length
:
256
#the number of samples to advance between frames.
win_length
:
1024
#the length (width) of the window function.
power
:
1.2
#the power to raise before griffin-lim.
min_level_db
:
-100
#the minimum level db.
ref_level_db
:
20
#the reference level db.
outputs_per_step
:
1
#the outputs per step.
encoder_n_layer
:
6
encoder_head
:
2
...
...
@@ -35,12 +35,12 @@ epochs: 10000
lr
:
0.001
save_step
:
500
use_gpu
:
True
use_data_parallel
:
Fals
e
use_data_parallel
:
Tru
e
data_path
:
../../dataset/LJSpeech-1.1
transtts_path
:
../TransformerTTS/checkpoint/
transformer_step
:
20
0000
transformer_step
:
16
0000
save_path
:
./checkpoint
log_dir
:
./log
#checkpoint_path: ./checkpoint
#ransformer_step: 97000
\ No newline at end of file
#transformer_step: 97000
examples/FastSpeech/train.py
浏览文件 @
f5ac04b1
...
...
@@ -51,7 +51,6 @@ def main(cfg):
with
fluid
.
unique_name
.
guard
():
transformerTTS
=
TransformerTTS
(
cfg
)
model_dict
,
_
=
load_checkpoint
(
str
(
cfg
.
transformer_step
),
os
.
path
.
join
(
cfg
.
transtts_path
,
"transformer"
))
transformerTTS
.
set_dict
(
model_dict
)
transformerTTS
.
eval
()
...
...
@@ -126,4 +125,4 @@ if __name__ =='__main__':
parser
=
jsonargparse
.
ArgumentParser
(
description
=
"Train Fastspeech model"
,
formatter_class
=
'default_argparse'
)
add_config_options_to_parser
(
parser
)
cfg
=
parser
.
parse_args
(
'-c config/fastspeech.yaml'
.
split
())
main
(
cfg
)
\ No newline at end of file
main
(
cfg
)
examples/TransformerTTS/config/train_transformer.yaml
浏览文件 @
f5ac04b1
...
...
@@ -23,7 +23,7 @@ lr: 0.001
save_step
:
1000
image_step
:
2000
use_gpu
:
True
use_data_parallel
:
Tru
e
use_data_parallel
:
Fals
e
stop_token
:
False
data_path
:
../../dataset/LJSpeech-1.1
...
...
parakeet/models/fastspeech/LengthRegulator.py
浏览文件 @
f5ac04b1
...
...
@@ -83,21 +83,21 @@ class DurationPredictor(dg.Layer):
self
.
dropout
=
dropout
k
=
math
.
sqrt
(
1
/
self
.
input_size
)
self
.
conv1
=
Conv1D
(
in
_channels
=
self
.
input_size
,
out_channel
s
=
self
.
out_channels
,
self
.
conv1
=
Conv1D
(
num
_channels
=
self
.
input_size
,
num_filter
s
=
self
.
out_channels
,
filter_size
=
self
.
filter_size
,
padding
=
1
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
))
,
data_format
=
'NTC'
)
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
))
)
#
data_format='NTC')
k
=
math
.
sqrt
(
1
/
self
.
out_channels
)
self
.
conv2
=
Conv1D
(
in
_channels
=
self
.
out_channels
,
out_channel
s
=
self
.
out_channels
,
self
.
conv2
=
Conv1D
(
num
_channels
=
self
.
out_channels
,
num_filter
s
=
self
.
out_channels
,
filter_size
=
self
.
filter_size
,
padding
=
1
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
))
,
data_format
=
'NTC'
)
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
))
)
#
data_format='NTC')
self
.
layer_norm1
=
dg
.
LayerNorm
(
self
.
out_channels
)
self
.
layer_norm2
=
dg
.
LayerNorm
(
self
.
out_channels
)
...
...
@@ -118,10 +118,17 @@ class DurationPredictor(dg.Layer):
out (Variable), Shape(B, T, C), the output of duration predictor.
"""
# encoder_output.shape(N, T, C)
out
=
layers
.
dropout
(
layers
.
relu
(
self
.
layer_norm1
(
self
.
conv1
(
encoder_output
))),
self
.
dropout
)
out
=
layers
.
dropout
(
layers
.
relu
(
self
.
layer_norm2
(
self
.
conv2
(
out
))),
self
.
dropout
)
out
=
layers
.
transpose
(
encoder_output
,
[
0
,
2
,
1
])
out
=
self
.
conv1
(
out
)
out
=
layers
.
transpose
(
out
,
[
0
,
2
,
1
])
out
=
layers
.
dropout
(
layers
.
relu
(
self
.
layer_norm1
(
out
)),
self
.
dropout
)
out
=
layers
.
transpose
(
out
,
[
0
,
2
,
1
])
out
=
self
.
conv2
(
out
)
out
=
layers
.
transpose
(
out
,
[
0
,
2
,
1
])
out
=
layers
.
dropout
(
layers
.
relu
(
self
.
layer_norm2
(
out
)),
self
.
dropout
)
out
=
layers
.
relu
(
self
.
linear
(
out
))
out
=
layers
.
squeeze
(
out
,
axes
=
[
-
1
])
return
out
...
...
parakeet/models/transformerTTS/CBHG.py
浏览文件 @
f5ac04b1
...
...
@@ -24,22 +24,20 @@ class CBHG(dg.Layer):
self
.
projection_size
=
projection_size
self
.
conv_list
=
[]
k
=
math
.
sqrt
(
1
/
projection_size
)
self
.
conv_list
.
append
(
Conv1D
(
in
_channels
=
projection_size
,
out_channel
s
=
hidden_size
,
self
.
conv_list
.
append
(
Conv1D
(
num
_channels
=
projection_size
,
num_filter
s
=
hidden_size
,
filter_size
=
1
,
padding
=
int
(
np
.
floor
(
1
/
2
)),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
data_format
=
"NCT"
))
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
))))
k
=
math
.
sqrt
(
1
/
hidden_size
)
for
i
in
range
(
2
,
K
+
1
):
self
.
conv_list
.
append
(
Conv1D
(
in
_channels
=
hidden_size
,
out_channel
s
=
hidden_size
,
self
.
conv_list
.
append
(
Conv1D
(
num
_channels
=
hidden_size
,
num_filter
s
=
hidden_size
,
filter_size
=
i
,
padding
=
int
(
np
.
floor
(
i
/
2
)),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
data_format
=
"NCT"
))
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
))))
for
i
,
layer
in
enumerate
(
self
.
conv_list
):
self
.
add_sublayer
(
"conv_list_{}"
.
format
(
i
),
layer
)
...
...
@@ -55,22 +53,20 @@ class CBHG(dg.Layer):
conv_outdim
=
hidden_size
*
K
k
=
math
.
sqrt
(
1
/
conv_outdim
)
self
.
conv_projection_1
=
Conv1D
(
in
_channels
=
conv_outdim
,
out_channel
s
=
hidden_size
,
self
.
conv_projection_1
=
Conv1D
(
num
_channels
=
conv_outdim
,
num_filter
s
=
hidden_size
,
filter_size
=
3
,
padding
=
int
(
np
.
floor
(
3
/
2
)),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
data_format
=
"NCT"
)
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)))
k
=
math
.
sqrt
(
1
/
hidden_size
)
self
.
conv_projection_2
=
Conv1D
(
in
_channels
=
hidden_size
,
out_channel
s
=
projection_size
,
self
.
conv_projection_2
=
Conv1D
(
num
_channels
=
hidden_size
,
num_filter
s
=
projection_size
,
filter_size
=
3
,
padding
=
int
(
np
.
floor
(
3
/
2
)),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
data_format
=
"NCT"
)
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)))
self
.
batchnorm_proj_1
=
dg
.
BatchNorm
(
hidden_size
,
data_layout
=
'NCHW'
)
...
...
parakeet/models/transformerTTS/encoderprenet.py
浏览文件 @
f5ac04b1
...
...
@@ -17,24 +17,22 @@ class EncoderPrenet(dg.Layer):
padding_idx
=
None
)
self
.
conv_list
=
[]
k
=
math
.
sqrt
(
1
/
embedding_size
)
self
.
conv_list
.
append
(
Conv1D
(
in
_channels
=
embedding_size
,
out_channel
s
=
num_hidden
,
self
.
conv_list
.
append
(
Conv1D
(
num
_channels
=
embedding_size
,
num_filter
s
=
num_hidden
,
filter_size
=
5
,
padding
=
int
(
np
.
floor
(
5
/
2
)),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NCT"
))
use_cudnn
=
use_cudnn
))
k
=
math
.
sqrt
(
1
/
num_hidden
)
for
_
in
range
(
2
):
self
.
conv_list
.
append
(
Conv1D
(
in
_channels
=
num_hidden
,
out_channel
s
=
num_hidden
,
self
.
conv_list
.
append
(
Conv1D
(
num
_channels
=
num_hidden
,
num_filter
s
=
num_hidden
,
filter_size
=
5
,
padding
=
int
(
np
.
floor
(
5
/
2
)),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NCT"
))
use_cudnn
=
use_cudnn
))
for
i
,
layer
in
enumerate
(
self
.
conv_list
):
self
.
add_sublayer
(
"conv_list_{}"
.
format
(
i
),
layer
)
...
...
parakeet/models/transformerTTS/post_convnet.py
浏览文件 @
f5ac04b1
...
...
@@ -22,34 +22,31 @@ class PostConvNet(dg.Layer):
self
.
batchnorm_last
=
batchnorm_last
self
.
conv_list
=
[]
k
=
math
.
sqrt
(
1
/
(
n_mels
*
outputs_per_step
))
self
.
conv_list
.
append
(
Conv1D
(
in
_channels
=
n_mels
*
outputs_per_step
,
out_channel
s
=
num_hidden
,
self
.
conv_list
.
append
(
Conv1D
(
num
_channels
=
n_mels
*
outputs_per_step
,
num_filter
s
=
num_hidden
,
filter_size
=
filter_size
,
padding
=
padding
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NCT"
))
use_cudnn
=
use_cudnn
))
k
=
math
.
sqrt
(
1
/
num_hidden
)
for
_
in
range
(
1
,
num_conv
-
1
):
self
.
conv_list
.
append
(
Conv1D
(
in
_channels
=
num_hidden
,
out_channel
s
=
num_hidden
,
self
.
conv_list
.
append
(
Conv1D
(
num
_channels
=
num_hidden
,
num_filter
s
=
num_hidden
,
filter_size
=
filter_size
,
padding
=
padding
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NCT"
)
)
use_cudnn
=
use_cudnn
))
self
.
conv_list
.
append
(
Conv1D
(
in
_channels
=
num_hidden
,
out_channel
s
=
n_mels
*
outputs_per_step
,
self
.
conv_list
.
append
(
Conv1D
(
num
_channels
=
num_hidden
,
num_filter
s
=
n_mels
*
outputs_per_step
,
filter_size
=
filter_size
,
padding
=
padding
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NCT"
))
use_cudnn
=
use_cudnn
))
for
i
,
layer
in
enumerate
(
self
.
conv_list
):
self
.
add_sublayer
(
"conv_list_{}"
.
format
(
i
),
layer
)
...
...
parakeet/models/transformerTTS/vocoder.py
浏览文件 @
f5ac04b1
...
...
@@ -10,15 +10,13 @@ class Vocoder(dg.Layer):
"""
def
__init__
(
self
,
config
):
super
(
Vocoder
,
self
).
__init__
()
self
.
pre_proj
=
Conv1D
(
in_channels
=
config
.
audio
.
num_mels
,
out_channels
=
config
.
hidden_size
,
filter_size
=
1
,
data_format
=
"NCT"
)
self
.
pre_proj
=
Conv1D
(
num_channels
=
config
.
audio
.
num_mels
,
num_filters
=
config
.
hidden_size
,
filter_size
=
1
)
self
.
cbhg
=
CBHG
(
config
.
hidden_size
,
config
.
batch_size
)
self
.
post_proj
=
Conv1D
(
in_channels
=
config
.
hidden_size
,
out_channels
=
(
config
.
audio
.
n_fft
//
2
)
+
1
,
filter_size
=
1
,
data_format
=
"NCT"
)
self
.
post_proj
=
Conv1D
(
num_channels
=
config
.
hidden_size
,
num_filters
=
(
config
.
audio
.
n_fft
//
2
)
+
1
,
filter_size
=
1
)
def
forward
(
self
,
mel
):
mel
=
layers
.
transpose
(
mel
,
[
0
,
2
,
1
])
...
...
parakeet/modules/ffn.py
浏览文件 @
f5ac04b1
...
...
@@ -14,23 +14,21 @@ class PositionwiseFeedForward(dg.Layer):
self
.
dropout
=
dropout
k
=
math
.
sqrt
(
1
/
d_in
)
self
.
w_1
=
Conv1D
(
in
_channels
=
d_in
,
out_channel
s
=
num_hidden
,
self
.
w_1
=
Conv1D
(
num
_channels
=
d_in
,
num_filter
s
=
num_hidden
,
filter_size
=
filter_size
,
padding
=
padding
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NTC"
)
use_cudnn
=
use_cudnn
)
k
=
math
.
sqrt
(
1
/
num_hidden
)
self
.
w_2
=
Conv1D
(
in
_channels
=
num_hidden
,
out_channel
s
=
d_in
,
self
.
w_2
=
Conv1D
(
num
_channels
=
num_hidden
,
num_filter
s
=
d_in
,
filter_size
=
filter_size
,
padding
=
padding
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
XavierInitializer
()),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
k
,
high
=
k
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NTC"
)
use_cudnn
=
use_cudnn
)
self
.
layer_norm
=
dg
.
LayerNorm
(
d_in
)
def
forward
(
self
,
input
):
...
...
@@ -42,12 +40,14 @@ class PositionwiseFeedForward(dg.Layer):
Returns:
output (Variable), Shape(B, T, C), the result after FFN.
"""
x
=
layers
.
transpose
(
input
,
[
0
,
2
,
1
])
#FFN Networt
x
=
self
.
w_2
(
layers
.
relu
(
self
.
w_1
(
input
)))
x
=
self
.
w_2
(
layers
.
relu
(
self
.
w_1
(
x
)))
# dropout
x
=
layers
.
dropout
(
x
,
self
.
dropout
)
x
=
layers
.
transpose
(
x
,
[
0
,
2
,
1
])
# residual connection
x
=
x
+
input
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录