Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Parakeet
提交
5b2d2a37
P
Parakeet
项目概览
PaddlePaddle
/
Parakeet
通知
10
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Parakeet
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5b2d2a37
编写于
2月 10, 2020
作者:
L
lifuchen
提交者:
chenfeiyu
2月 10, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add TransformerTTS and fastspeech
上级
185e25fe
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
443 addition
and
0 deletion
+443
-0
parakeet/models/transformerTTS/CBHG.py
parakeet/models/transformerTTS/CBHG.py
+173
-0
parakeet/models/transformerTTS/__init__.py
parakeet/models/transformerTTS/__init__.py
+0
-0
parakeet/models/transformerTTS/decoder.py
parakeet/models/transformerTTS/decoder.py
+98
-0
parakeet/models/transformerTTS/encoder.py
parakeet/models/transformerTTS/encoder.py
+58
-0
parakeet/models/transformerTTS/encoderprenet.py
parakeet/models/transformerTTS/encoderprenet.py
+52
-0
parakeet/models/transformerTTS/transformerTTS.py
parakeet/models/transformerTTS/transformerTTS.py
+33
-0
parakeet/models/transformerTTS/vocoder.py
parakeet/models/transformerTTS/vocoder.py
+29
-0
未找到文件。
parakeet/models/transformerTTS/CBHG.py
0 → 100644
浏览文件 @
5b2d2a37
import
math
from
parakeet.g2p.text.symbols
import
symbols
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
from
parakeet.modules.layers
import
Conv
,
Pool1D
,
Linear
from
parakeet.modules.dynamicGRU
import
DynamicGRU
import
numpy
as
np
class
CBHG
(
dg
.
Layer
):
def
__init__
(
self
,
hidden_size
,
batch_size
,
K
=
16
,
projection_size
=
256
,
num_gru_layers
=
2
,
max_pool_kernel_size
=
2
,
is_post
=
False
):
super
(
CBHG
,
self
).
__init__
()
"""
:param hidden_size: dimension of hidden unit
:param batch_size: batch size
:param K: # of convolution banks
:param projection_size: dimension of projection unit
:param num_gru_layers: # of layers of GRUcell
:param max_pool_kernel_size: max pooling kernel size
:param is_post: whether post processing or not
"""
self
.
hidden_size
=
hidden_size
self
.
projection_size
=
projection_size
self
.
conv_list
=
[]
self
.
conv_list
.
append
(
Conv
(
in_channels
=
projection_size
,
out_channels
=
hidden_size
,
filter_size
=
1
,
padding
=
int
(
np
.
floor
(
1
/
2
)),
data_format
=
"NCT"
))
for
i
in
range
(
2
,
K
+
1
):
self
.
conv_list
.
append
(
Conv
(
in_channels
=
hidden_size
,
out_channels
=
hidden_size
,
filter_size
=
i
,
padding
=
int
(
np
.
floor
(
i
/
2
)),
data_format
=
"NCT"
))
for
i
,
layer
in
enumerate
(
self
.
conv_list
):
self
.
add_sublayer
(
"conv_list_{}"
.
format
(
i
),
layer
)
self
.
batchnorm_list
=
[]
for
i
in
range
(
K
):
self
.
batchnorm_list
.
append
(
dg
.
BatchNorm
(
hidden_size
,
data_layout
=
'NCHW'
))
for
i
,
layer
in
enumerate
(
self
.
batchnorm_list
):
self
.
add_sublayer
(
"batchnorm_list_{}"
.
format
(
i
),
layer
)
conv_outdim
=
hidden_size
*
K
self
.
conv_projection_1
=
Conv
(
in_channels
=
conv_outdim
,
out_channels
=
hidden_size
,
filter_size
=
3
,
padding
=
int
(
np
.
floor
(
3
/
2
)),
data_format
=
"NCT"
)
self
.
conv_projection_2
=
Conv
(
in_channels
=
hidden_size
,
out_channels
=
projection_size
,
filter_size
=
3
,
padding
=
int
(
np
.
floor
(
3
/
2
)),
data_format
=
"NCT"
)
self
.
batchnorm_proj_1
=
dg
.
BatchNorm
(
hidden_size
,
data_layout
=
'NCHW'
)
self
.
batchnorm_proj_2
=
dg
.
BatchNorm
(
projection_size
,
data_layout
=
'NCHW'
)
self
.
max_pool
=
Pool1D
(
pool_size
=
max_pool_kernel_size
,
pool_type
=
'max'
,
pool_stride
=
1
,
pool_padding
=
1
,
data_format
=
"NCT"
)
self
.
highway
=
Highwaynet
(
self
.
projection_size
)
h_0
=
np
.
zeros
((
batch_size
,
hidden_size
//
2
),
dtype
=
"float32"
)
h_0
=
dg
.
to_variable
(
h_0
)
self
.
fc_forward1
=
Linear
(
hidden_size
,
hidden_size
//
2
*
3
)
self
.
fc_reverse1
=
Linear
(
hidden_size
,
hidden_size
//
2
*
3
)
self
.
gru_forward1
=
DynamicGRU
(
size
=
self
.
hidden_size
//
2
,
is_reverse
=
False
,
origin_mode
=
True
,
h_0
=
h_0
)
self
.
gru_reverse1
=
DynamicGRU
(
size
=
self
.
hidden_size
//
2
,
is_reverse
=
True
,
origin_mode
=
True
,
h_0
=
h_0
)
self
.
fc_forward2
=
Linear
(
hidden_size
,
hidden_size
//
2
*
3
)
self
.
fc_reverse2
=
Linear
(
hidden_size
,
hidden_size
//
2
*
3
)
self
.
gru_forward2
=
DynamicGRU
(
size
=
self
.
hidden_size
//
2
,
is_reverse
=
False
,
origin_mode
=
True
,
h_0
=
h_0
)
self
.
gru_reverse2
=
DynamicGRU
(
size
=
self
.
hidden_size
//
2
,
is_reverse
=
True
,
origin_mode
=
True
,
h_0
=
h_0
)
def
_conv_fit_dim
(
self
,
x
,
filter_size
=
3
):
if
filter_size
%
2
==
0
:
return
x
[:,:,:
-
1
]
else
:
return
x
def
forward
(
self
,
input_
):
# input_.shape = [N, C, T]
conv_list
=
[]
conv_input
=
input_
for
i
,
(
conv
,
batchnorm
)
in
enumerate
(
zip
(
self
.
conv_list
,
self
.
batchnorm_list
)):
conv_input
=
self
.
_conv_fit_dim
(
conv
(
conv_input
),
i
+
1
)
conv_input
=
layers
.
relu
(
batchnorm
(
conv_input
))
conv_list
.
append
(
conv_input
)
conv_cat
=
layers
.
concat
(
conv_list
,
axis
=
1
)
conv_pool
=
self
.
max_pool
(
conv_cat
)[:,:,:
-
1
]
conv_proj
=
layers
.
relu
(
self
.
batchnorm_proj_1
(
self
.
_conv_fit_dim
(
self
.
conv_projection_1
(
conv_pool
))))
conv_proj
=
self
.
batchnorm_proj_2
(
self
.
_conv_fit_dim
(
self
.
conv_projection_2
(
conv_proj
)))
+
input_
# conv_proj.shape = [N, C, T]
highway
=
layers
.
transpose
(
conv_proj
,
[
0
,
2
,
1
])
highway
=
self
.
highway
(
highway
)
# highway.shape = [N, T, C]
fc_forward
=
self
.
fc_forward1
(
highway
)
fc_reverse
=
self
.
fc_reverse1
(
highway
)
out_forward
=
self
.
gru_forward1
(
fc_forward
)
out_reverse
=
self
.
gru_reverse1
(
fc_reverse
)
out
=
layers
.
concat
([
out_forward
,
out_reverse
],
axis
=-
1
)
fc_forward
=
self
.
fc_forward2
(
out
)
fc_reverse
=
self
.
fc_reverse2
(
out
)
out_forward
=
self
.
gru_forward2
(
fc_forward
)
out_reverse
=
self
.
gru_reverse2
(
fc_reverse
)
out
=
layers
.
concat
([
out_forward
,
out_reverse
],
axis
=-
1
)
out
=
layers
.
transpose
(
out
,
[
0
,
2
,
1
])
return
out
class
Highwaynet
(
dg
.
Layer
):
def
__init__
(
self
,
num_units
,
num_layers
=
4
):
super
(
Highwaynet
,
self
).
__init__
()
self
.
num_units
=
num_units
self
.
num_layers
=
num_layers
self
.
gates
=
[]
self
.
linears
=
[]
for
i
in
range
(
num_layers
):
self
.
linears
.
append
(
Linear
(
num_units
,
num_units
))
self
.
gates
.
append
(
Linear
(
num_units
,
num_units
))
for
i
,
(
linear
,
gate
)
in
enumerate
(
zip
(
self
.
linears
,
self
.
gates
)):
self
.
add_sublayer
(
"linears_{}"
.
format
(
i
),
linear
)
self
.
add_sublayer
(
"gates_{}"
.
format
(
i
),
gate
)
def
forward
(
self
,
input_
):
out
=
input_
for
linear
,
gate
in
zip
(
self
.
linears
,
self
.
gates
):
h
=
fluid
.
layers
.
relu
(
linear
(
out
))
t_
=
fluid
.
layers
.
sigmoid
(
gate
(
out
))
c
=
1
-
t_
out
=
h
*
t_
+
out
*
c
return
out
parakeet/models/transformerTTS/__init__.py
0 → 100644
浏览文件 @
5b2d2a37
parakeet/models/transformerTTS/decoder.py
0 → 100644
浏览文件 @
5b2d2a37
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid
as
fluid
from
parakeet.modules.layers
import
Conv1D
,
Linear
from
parakeet.modules.utils
import
*
from
parakeet.modules.multihead_attention
import
MultiheadAttention
from
parakeet.modules.feed_forward
import
PositionwiseFeedForward
from
parakeet.modules.prenet
import
PreNet
from
parakeet.modules.post_convnet
import
PostConvNet
class
Decoder
(
dg
.
Layer
):
def
__init__
(
self
,
num_hidden
,
config
,
num_head
=
4
):
super
(
Decoder
,
self
).
__init__
()
self
.
num_hidden
=
num_hidden
param
=
fluid
.
ParamAttr
()
self
.
alpha
=
self
.
create_parameter
(
shape
=
(
1
,),
attr
=
param
,
dtype
=
'float32'
,
default_initializer
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1.0
))
self
.
pos_inp
=
get_sinusoid_encoding_table
(
1024
,
self
.
num_hidden
,
padding_idx
=
0
)
self
.
pos_emb
=
dg
.
Embedding
(
size
=
[
1024
,
num_hidden
],
padding_idx
=
0
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NumpyArrayInitializer
(
self
.
pos_inp
),
trainable
=
False
))
self
.
decoder_prenet
=
PreNet
(
input_size
=
config
.
audio
.
num_mels
,
hidden_size
=
num_hidden
*
2
,
output_size
=
num_hidden
,
dropout_rate
=
0.2
)
self
.
linear
=
Linear
(
num_hidden
,
num_hidden
)
self
.
selfattn_layers
=
[
MultiheadAttention
(
num_hidden
,
num_hidden
//
num_head
,
num_hidden
//
num_head
)
for
_
in
range
(
3
)]
for
i
,
layer
in
enumerate
(
self
.
selfattn_layers
):
self
.
add_sublayer
(
"self_attn_{}"
.
format
(
i
),
layer
)
self
.
attn_layers
=
[
MultiheadAttention
(
num_hidden
,
num_hidden
//
num_head
,
num_hidden
//
num_head
)
for
_
in
range
(
3
)]
for
i
,
layer
in
enumerate
(
self
.
attn_layers
):
self
.
add_sublayer
(
"attn_{}"
.
format
(
i
),
layer
)
self
.
ffns
=
[
PositionwiseFeedForward
(
num_hidden
,
num_hidden
*
num_head
,
filter_size
=
1
)
for
_
in
range
(
3
)]
for
i
,
layer
in
enumerate
(
self
.
ffns
):
self
.
add_sublayer
(
"ffns_{}"
.
format
(
i
),
layer
)
self
.
mel_linear
=
Linear
(
num_hidden
,
config
.
audio
.
num_mels
*
config
.
audio
.
outputs_per_step
)
self
.
stop_linear
=
Linear
(
num_hidden
,
1
)
self
.
postconvnet
=
PostConvNet
(
config
.
audio
.
num_mels
,
config
.
hidden_size
,
filter_size
=
5
,
padding
=
4
,
num_conv
=
5
,
outputs_per_step
=
config
.
audio
.
outputs_per_step
,
use_cudnn
=
config
.
use_gpu
)
def
forward
(
self
,
key
,
value
,
query
,
c_mask
,
positional
):
# get decoder mask with triangular matrix
if
fluid
.
framework
.
_dygraph_tracer
().
_train_mode
:
m_mask
=
get_non_pad_mask
(
positional
)
mask
=
get_attn_key_pad_mask
((
positional
==
0
).
astype
(
np
.
float32
),
query
)
triu_tensor
=
dg
.
to_variable
(
get_triu_tensor
(
query
.
numpy
(),
query
.
numpy
())).
astype
(
np
.
float32
)
mask
=
mask
+
triu_tensor
mask
=
fluid
.
layers
.
cast
(
mask
==
0
,
np
.
float32
)
# (batch_size, decoder_len, encoder_len)
zero_mask
=
get_attn_key_pad_mask
(
layers
.
squeeze
(
c_mask
,[
-
1
]),
query
)
else
:
mask
=
get_triu_tensor
(
query
.
numpy
(),
query
.
numpy
()).
astype
(
np
.
float32
)
mask
=
fluid
.
layers
.
cast
(
dg
.
to_variable
(
mask
==
0
),
np
.
float32
)
m_mask
,
zero_mask
=
None
,
None
# Decoder pre-network
query
=
self
.
decoder_prenet
(
query
)
# Centered position
query
=
self
.
linear
(
query
)
# Get position embedding
positional
=
self
.
pos_emb
(
positional
)
query
=
positional
*
self
.
alpha
+
query
#positional dropout
query
=
fluid
.
layers
.
dropout
(
query
,
0.1
)
# Attention decoder-decoder, encoder-decoder
selfattn_list
=
list
()
attn_list
=
list
()
for
selfattn
,
attn
,
ffn
in
zip
(
self
.
selfattn_layers
,
self
.
attn_layers
,
self
.
ffns
):
query
,
attn_dec
=
selfattn
(
query
,
query
,
query
,
mask
=
mask
,
query_mask
=
m_mask
)
query
,
attn_dot
=
attn
(
key
,
value
,
query
,
mask
=
zero_mask
,
query_mask
=
m_mask
)
query
=
ffn
(
query
)
selfattn_list
.
append
(
attn_dec
)
attn_list
.
append
(
attn_dot
)
# Mel linear projection
mel_out
=
self
.
mel_linear
(
query
)
# Post Mel Network
out
=
self
.
postconvnet
(
mel_out
)
out
=
mel_out
+
out
# Stop tokens
stop_tokens
=
self
.
stop_linear
(
query
)
stop_tokens
=
layers
.
squeeze
(
stop_tokens
,
[
-
1
])
stop_tokens
=
layers
.
sigmoid
(
stop_tokens
)
return
mel_out
,
out
,
attn_list
,
stop_tokens
,
selfattn_list
parakeet/models/transformerTTS/encoder.py
0 → 100644
浏览文件 @
5b2d2a37
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid
as
fluid
from
parakeet.modules.layers
import
Conv1D
,
Linear
from
parakeet.modules.utils
import
*
from
parakeet.modules.multihead_attention
import
MultiheadAttention
from
parakeet.modules.feed_forward
import
PositionwiseFeedForward
from
parakeet.models.transformerTTS.encoderprenet
import
EncoderPrenet
class
Encoder
(
dg
.
Layer
):
def
__init__
(
self
,
embedding_size
,
num_hidden
,
config
,
num_head
=
4
):
super
(
Encoder
,
self
).
__init__
()
self
.
num_hidden
=
num_hidden
param
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
1.0
))
self
.
alpha
=
self
.
create_parameter
(
shape
=
(
1
,
),
attr
=
param
,
dtype
=
'float32'
)
self
.
pos_inp
=
get_sinusoid_encoding_table
(
1024
,
self
.
num_hidden
,
padding_idx
=
0
)
self
.
pos_emb
=
dg
.
Embedding
(
size
=
[
1024
,
num_hidden
],
padding_idx
=
0
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NumpyArrayInitializer
(
self
.
pos_inp
),
trainable
=
False
))
self
.
encoder_prenet
=
EncoderPrenet
(
embedding_size
=
embedding_size
,
num_hidden
=
num_hidden
,
use_cudnn
=
config
.
use_gpu
)
self
.
layers
=
[
MultiheadAttention
(
num_hidden
,
num_hidden
//
num_head
,
num_hidden
//
num_head
)
for
_
in
range
(
3
)]
for
i
,
layer
in
enumerate
(
self
.
layers
):
self
.
add_sublayer
(
"self_attn_{}"
.
format
(
i
),
layer
)
self
.
ffns
=
[
PositionwiseFeedForward
(
num_hidden
,
num_hidden
*
num_head
,
filter_size
=
1
,
use_cudnn
=
config
.
use_gpu
)
for
_
in
range
(
3
)]
for
i
,
layer
in
enumerate
(
self
.
ffns
):
self
.
add_sublayer
(
"ffns_{}"
.
format
(
i
),
layer
)
def
forward
(
self
,
x
,
positional
):
if
fluid
.
framework
.
_dygraph_tracer
().
_train_mode
:
query_mask
=
get_non_pad_mask
(
positional
)
mask
=
get_attn_key_pad_mask
(
positional
,
x
)
else
:
query_mask
,
mask
=
None
,
None
# Encoder pre_network
x
=
self
.
encoder_prenet
(
x
)
#(N,T,C)
# Get positional encoding
positional
=
self
.
pos_emb
(
positional
)
x
=
positional
*
self
.
alpha
+
x
#(N, T, C)
# Positional dropout
x
=
layers
.
dropout
(
x
,
0.1
)
# Self attention encoder
attentions
=
list
()
for
layer
,
ffn
in
zip
(
self
.
layers
,
self
.
ffns
):
x
,
attention
=
layer
(
x
,
x
,
x
,
mask
=
mask
,
query_mask
=
query_mask
)
x
=
ffn
(
x
)
attentions
.
append
(
attention
)
return
x
,
query_mask
,
attentions
\ No newline at end of file
parakeet/models/transformerTTS/encoderprenet.py
0 → 100644
浏览文件 @
5b2d2a37
import
math
from
parakeet.g2p.text.symbols
import
symbols
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
from
parakeet.modules.layers
import
Conv
,
Linear
import
numpy
as
np
class
EncoderPrenet
(
dg
.
Layer
):
def
__init__
(
self
,
embedding_size
,
num_hidden
,
use_cudnn
=
True
):
super
(
EncoderPrenet
,
self
).
__init__
()
self
.
embedding_size
=
embedding_size
self
.
num_hidden
=
num_hidden
self
.
use_cudnn
=
use_cudnn
self
.
embedding
=
dg
.
Embedding
(
size
=
[
len
(
symbols
),
embedding_size
],
padding_idx
=
None
)
self
.
conv_list
=
[]
self
.
conv_list
.
append
(
Conv
(
in_channels
=
embedding_size
,
out_channels
=
num_hidden
,
filter_size
=
5
,
padding
=
int
(
np
.
floor
(
5
/
2
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NCT"
))
for
_
in
range
(
2
):
self
.
conv_list
.
append
(
Conv
(
in_channels
=
num_hidden
,
out_channels
=
num_hidden
,
filter_size
=
5
,
padding
=
int
(
np
.
floor
(
5
/
2
)),
use_cudnn
=
use_cudnn
,
data_format
=
"NCT"
))
for
i
,
layer
in
enumerate
(
self
.
conv_list
):
self
.
add_sublayer
(
"conv_list_{}"
.
format
(
i
),
layer
)
self
.
batch_norm_list
=
[
dg
.
BatchNorm
(
num_hidden
,
data_layout
=
'NCHW'
)
for
_
in
range
(
3
)]
for
i
,
layer
in
enumerate
(
self
.
batch_norm_list
):
self
.
add_sublayer
(
"batch_norm_list_{}"
.
format
(
i
),
layer
)
self
.
projection
=
Linear
(
num_hidden
,
num_hidden
)
def
forward
(
self
,
x
):
x
=
self
.
embedding
(
x
)
#(batch_size, seq_len, embending_size)
x
=
layers
.
transpose
(
x
,[
0
,
2
,
1
])
for
batch_norm
,
conv
in
zip
(
self
.
batch_norm_list
,
self
.
conv_list
):
x
=
layers
.
dropout
(
layers
.
relu
(
batch_norm
(
conv
(
x
))),
0.2
)
x
=
layers
.
transpose
(
x
,[
0
,
2
,
1
])
#(N,T,C)
x
=
self
.
projection
(
x
)
return
x
\ No newline at end of file
parakeet/models/transformerTTS/transformerTTS.py
0 → 100644
浏览文件 @
5b2d2a37
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid
as
fluid
from
parakeet.models.transformerTTS.encoder
import
Encoder
from
parakeet.models.transformerTTS.decoder
import
Decoder
class
TransformerTTS
(
dg
.
Layer
):
def
__init__
(
self
,
config
):
super
(
TransformerTTS
,
self
).
__init__
()
self
.
encoder
=
Encoder
(
config
.
embedding_size
,
config
.
hidden_size
,
config
)
self
.
decoder
=
Decoder
(
config
.
hidden_size
,
config
)
self
.
config
=
config
def
forward
(
self
,
characters
,
mel_input
,
pos_text
,
pos_mel
):
# key (batch_size, seq_len, channel)
# c_mask (batch_size, seq_len)
# attns_enc (channel / 2, seq_len, seq_len)
key
,
c_mask
,
attns_enc
=
self
.
encoder
(
characters
,
pos_text
)
# mel_output/postnet_output (batch_size, mel_len, n_mel)
# attn_probs (128, mel_len, seq_len)
# stop_preds (batch_size, mel_len, 1)
# attns_dec (128, mel_len, mel_len)
mel_output
,
postnet_output
,
attn_probs
,
stop_preds
,
attns_dec
=
self
.
decoder
(
key
,
key
,
mel_input
,
c_mask
,
pos_mel
)
return
mel_output
,
postnet_output
,
attn_probs
,
stop_preds
,
attns_enc
,
attns_dec
parakeet/models/transformerTTS/vocoder.py
0 → 100644
浏览文件 @
5b2d2a37
import
paddle.fluid.dygraph
as
dg
import
paddle.fluid
as
fluid
from
parakeet.modules.layers
import
Conv1D
,
Linear
from
parakeet.modules.utils
import
*
from
parakeet.models.transformerTTS.CBHG
import
CBHG
class
Vocoder
(
dg
.
Layer
):
"""
CBHG Network (mel -> linear)
"""
def
__init__
(
self
,
config
):
super
(
Vocoder
,
self
).
__init__
()
self
.
pre_proj
=
Conv1D
(
in_channels
=
config
.
audio
.
num_mels
,
out_channels
=
config
.
hidden_size
,
filter_size
=
1
,
data_format
=
"NCT"
)
self
.
cbhg
=
CBHG
(
config
.
hidden_size
,
config
.
batch_size
)
self
.
post_proj
=
Conv1D
(
in_channels
=
config
.
hidden_size
,
out_channels
=
(
config
.
audio
.
n_fft
//
2
)
+
1
,
filter_size
=
1
,
data_format
=
"NCT"
)
def
forward
(
self
,
mel
):
mel
=
layers
.
transpose
(
mel
,
[
0
,
2
,
1
])
mel
=
self
.
pre_proj
(
mel
)
mel
=
self
.
cbhg
(
mel
)
mag_pred
=
self
.
post_proj
(
mel
)
mag_pred
=
layers
.
transpose
(
mag_pred
,
[
0
,
2
,
1
])
return
mag_pred
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录