提交 6908ec52 编写于 作者: L lifuchen 提交者: chenfeiyu

Adjust the directory structure

上级 fc84ca2d
......@@ -35,7 +35,7 @@ epochs: 10000
lr: 0.001
save_step: 500
use_gpu: True
use_data_parallel: True
use_data_parallel: False
data_path: ../../dataset/LJSpeech-1.1
transtts_path: ../TransformerTTS/checkpoint/
......
......@@ -4,7 +4,7 @@ import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
import paddle.fluid as fluid
from parakeet.modules.multihead_attention import MultiheadAttention
from parakeet.modules.feed_forward import PositionwiseFeedForward
from parakeet.modules.ffn import PositionwiseFeedForward
class FFTBlock(dg.Layer):
def __init__(self, d_model, d_inner, n_head, d_k, d_v, filter_size, padding, dropout=0.2):
......
......@@ -4,7 +4,7 @@ import parakeet.models.fastspeech.utils
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
import paddle.fluid as fluid
from parakeet.modules.layers import Conv, Linear
from parakeet.modules.customized import Conv1D
class LengthRegulator(dg.Layer):
def __init__(self, input_size, out_channels, filter_size, dropout=0.1):
......@@ -82,20 +82,31 @@ class DurationPredictor(dg.Layer):
self.filter_size = filter_size
self.dropout = dropout
self.conv1 = Conv(in_channels = self.input_size,
k = math.sqrt(1 / self.input_size)
self.conv1 = Conv1D(in_channels = self.input_size,
out_channels = self.out_channels,
filter_size = self.filter_size,
padding=1,
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format='NTC')
self.conv2 = Conv(in_channels = self.out_channels,
k = math.sqrt(1 / self.out_channels)
self.conv2 = Conv1D(in_channels = self.out_channels,
out_channels = self.out_channels,
filter_size = self.filter_size,
padding=1,
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format='NTC')
self.layer_norm1 = dg.LayerNorm(self.out_channels)
self.layer_norm2 = dg.LayerNorm(self.out_channels)
self.linear =Linear(self.out_channels, 1)
self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
k = math.sqrt(1 / self.out_channels)
self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
self.linear =dg.Linear(self.out_channels, 1, param_attr = self.weight,
bias_attr = self.bias)
def forward(self, encoder_output):
"""
......
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.g2p.text.symbols import symbols
from parakeet.modules.utils import *
from parakeet.modules.post_convnet import PostConvNet
from parakeet.modules.layers import Linear
from parakeet.models.fastspeech.FFTBlock import FFTBlock
class Decoder(dg.Layer):
......
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.g2p.text.symbols import symbols
from parakeet.modules.utils import *
from parakeet.modules.post_convnet import PostConvNet
from parakeet.modules.layers import Linear
from parakeet.models.fastspeech.FFTBlock import FFTBlock
class Encoder(dg.Layer):
......
import math
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.g2p.text.symbols import symbols
from parakeet.modules.utils import *
from parakeet.modules.post_convnet import PostConvNet
from parakeet.modules.layers import Linear
from parakeet.models.fastspeech.utils import *
from parakeet.models.transformerTTS.post_convnet import PostConvNet
from parakeet.models.fastspeech.LengthRegulator import LengthRegulator
from parakeet.models.fastspeech.encoder import Encoder
from parakeet.models.fastspeech.decoder import Decoder
......@@ -39,7 +37,13 @@ class FastSpeech(dg.Layer):
fft_conv1d_kernel=cfg.fft_conv1d_filter,
fft_conv1d_padding=cfg.fft_conv1d_padding,
dropout=0.1)
self.mel_linear = Linear(cfg.fs_hidden_size, cfg.audio.num_mels * cfg.audio.outputs_per_step)
self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
k = math.sqrt(1 / cfg.fs_hidden_size)
self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
self.mel_linear = dg.Linear(cfg.fs_hidden_size,
cfg.audio.num_mels * cfg.audio.outputs_per_step,
param_attr = self.weight,
bias_attr = self.bias,)
self.postnet = PostConvNet(n_mels=cfg.audio.num_mels,
num_hidden=512,
filter_size=5,
......
......@@ -3,8 +3,8 @@ from parakeet.g2p.text.symbols import symbols
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from parakeet.modules.layers import Conv, Pool1D, Linear
from parakeet.modules.dynamicGRU import DynamicGRU
from parakeet.modules.customized import Pool1D, Conv1D
from parakeet.modules.dynamic_gru import DynamicGRU
import numpy as np
class CBHG(dg.Layer):
......@@ -23,16 +23,22 @@ class CBHG(dg.Layer):
self.hidden_size = hidden_size
self.projection_size = projection_size
self.conv_list = []
self.conv_list.append(Conv(in_channels = projection_size,
k = math.sqrt(1 / projection_size)
self.conv_list.append(Conv1D(in_channels = projection_size,
out_channels = hidden_size,
filter_size = 1,
padding = int(np.floor(1/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format = "NCT"))
k = math.sqrt(1 / hidden_size)
for i in range(2,K+1):
self.conv_list.append(Conv(in_channels = hidden_size,
self.conv_list.append(Conv1D(in_channels = hidden_size,
out_channels = hidden_size,
filter_size = i,
padding = int(np.floor(i/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format = "NCT"))
for i, layer in enumerate(self.conv_list):
......@@ -48,16 +54,22 @@ class CBHG(dg.Layer):
conv_outdim = hidden_size * K
self.conv_projection_1 = Conv(in_channels = conv_outdim,
k = math.sqrt(1 / conv_outdim)
self.conv_projection_1 = Conv1D(in_channels = conv_outdim,
out_channels = hidden_size,
filter_size = 3,
padding = int(np.floor(3/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format = "NCT")
self.conv_projection_2 = Conv(in_channels = hidden_size,
k = math.sqrt(1 / hidden_size)
self.conv_projection_2 = Conv1D(in_channels = hidden_size,
out_channels = projection_size,
filter_size = 3,
padding = int(np.floor(3/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
data_format = "NCT")
self.batchnorm_proj_1 = dg.BatchNorm(hidden_size,
......@@ -73,8 +85,13 @@ class CBHG(dg.Layer):
h_0 = np.zeros((batch_size, hidden_size // 2), dtype="float32")
h_0 = dg.to_variable(h_0)
self.fc_forward1 = Linear(hidden_size, hidden_size // 2 * 3)
self.fc_reverse1 = Linear(hidden_size, hidden_size // 2 * 3)
k = math.sqrt(1 / hidden_size)
self.fc_forward1 = dg.Linear(hidden_size, hidden_size // 2 * 3,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.fc_reverse1 = dg.Linear(hidden_size, hidden_size // 2 * 3,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.gru_forward1 = DynamicGRU(size = self.hidden_size // 2,
is_reverse = False,
origin_mode = True,
......@@ -84,8 +101,12 @@ class CBHG(dg.Layer):
origin_mode=True,
h_0 = h_0)
self.fc_forward2 = Linear(hidden_size, hidden_size // 2 * 3)
self.fc_reverse2 = Linear(hidden_size, hidden_size // 2 * 3)
self.fc_forward2 = dg.Linear(hidden_size, hidden_size // 2 * 3,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.fc_reverse2 = dg.Linear(hidden_size, hidden_size // 2 * 3,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.gru_forward2 = DynamicGRU(size = self.hidden_size // 2,
is_reverse = False,
origin_mode = True,
......@@ -145,10 +166,14 @@ class Highwaynet(dg.Layer):
self.gates = []
self.linears = []
k = math.sqrt(1 / num_units)
for i in range(num_layers):
self.linears.append(Linear(num_units, num_units))
self.gates.append(Linear(num_units, num_units))
self.linears.append(dg.Linear(num_units, num_units,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))))
self.gates.append(dg.Linear(num_units, num_units,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))))
for i, (linear, gate) in enumerate(zip(self.linears,self.gates)):
self.add_sublayer("linears_{}".format(i), linear)
......
import math
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.modules.layers import Conv1D, Linear
from parakeet.modules.utils import *
from parakeet.modules.multihead_attention import MultiheadAttention
from parakeet.modules.feed_forward import PositionwiseFeedForward
from parakeet.modules.prenet import PreNet
from parakeet.modules.post_convnet import PostConvNet
from parakeet.modules.ffn import PositionwiseFeedForward
from parakeet.models.transformerTTS.prenet import PreNet
from parakeet.models.transformerTTS.post_convnet import PostConvNet
class Decoder(dg.Layer):
def __init__(self, num_hidden, config, num_head=4):
......@@ -24,7 +24,10 @@ class Decoder(dg.Layer):
hidden_size = num_hidden * 2,
output_size = num_hidden,
dropout_rate=0.2)
self.linear = Linear(num_hidden, num_hidden)
k = math.sqrt(1 / num_hidden)
self.linear = dg.Linear(num_hidden, num_hidden,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.selfattn_layers = [MultiheadAttention(num_hidden, num_hidden//num_head, num_hidden//num_head) for _ in range(3)]
for i, layer in enumerate(self.selfattn_layers):
......@@ -35,8 +38,12 @@ class Decoder(dg.Layer):
self.ffns = [PositionwiseFeedForward(num_hidden, num_hidden*num_head, filter_size=1) for _ in range(3)]
for i, layer in enumerate(self.ffns):
self.add_sublayer("ffns_{}".format(i), layer)
self.mel_linear = Linear(num_hidden, config.audio.num_mels * config.audio.outputs_per_step)
self.stop_linear = Linear(num_hidden, 1)
self.mel_linear = dg.Linear(num_hidden, config.audio.num_mels * config.audio.outputs_per_step,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.stop_linear = dg.Linear(num_hidden, 1,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
self.postconvnet = PostConvNet(config.audio.num_mels, config.hidden_size,
filter_size = 5, padding = 4, num_conv=5,
......
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.modules.layers import Conv1D, Linear
from parakeet.modules.utils import *
from parakeet.modules.multihead_attention import MultiheadAttention
from parakeet.modules.feed_forward import PositionwiseFeedForward
from parakeet.modules.ffn import PositionwiseFeedForward
from parakeet.models.transformerTTS.encoderprenet import EncoderPrenet
class Encoder(dg.Layer):
......
......@@ -3,7 +3,7 @@ from parakeet.g2p.text.symbols import symbols
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from parakeet.modules.layers import Conv, Linear
from parakeet.modules.customized import Conv1D
import numpy as np
......@@ -16,17 +16,23 @@ class EncoderPrenet(dg.Layer):
self.embedding = dg.Embedding( size = [len(symbols), embedding_size],
padding_idx = None)
self.conv_list = []
self.conv_list.append(Conv(in_channels = embedding_size,
k = math.sqrt(1 / embedding_size)
self.conv_list.append(Conv1D(in_channels = embedding_size,
out_channels = num_hidden,
filter_size = 5,
padding = int(np.floor(5/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
use_cudnn = use_cudnn,
data_format = "NCT"))
k = math.sqrt(1 / num_hidden)
for _ in range(2):
self.conv_list.append(Conv(in_channels = num_hidden,
self.conv_list.append(Conv1D(in_channels = num_hidden,
out_channels = num_hidden,
filter_size = 5,
padding = int(np.floor(5/2)),
param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
use_cudnn = use_cudnn,
data_format = "NCT"))
......@@ -39,7 +45,10 @@ class EncoderPrenet(dg.Layer):
for i, layer in enumerate(self.batch_norm_list):
self.add_sublayer("batch_norm_list_{}".format(i), layer)
self.projection = Linear(num_hidden, num_hidden)
k = math.sqrt(1 / num_hidden)
self.projection = dg.Linear(num_hidden, num_hidden,
param_attr=fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer()),
bias_attr=fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k)))
def forward(self, x):
x = self.embedding(x) #(batch_size, seq_len, embending_size)
......
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
from parakeet.modules.layers import Conv1D, Linear
from parakeet.modules.customized import Conv1D
from parakeet.modules.utils import *
from parakeet.models.transformerTTS.CBHG import CBHG
......
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
class DynamicGRU(dg.Layer):
def __init__(self,
size,
param_attr=None,
bias_attr=None,
is_reverse=False,
gate_activation='sigmoid',
candidate_activation='tanh',
h_0=None,
origin_mode=False,
init_size=None):
super(DynamicGRU, self).__init__()
self.gru_unit = dg.GRUUnit(
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
self.size = size
self.h_0 = h_0
self.is_reverse = is_reverse
def forward(self, inputs):
"""
Dynamic GRU block.
Args:
input (Variable): Shape(B, T, C), dtype: float32. The input value.
Returns:
output (Variable), Shape(B, T, C), the result compute by GRU.
"""
hidden = self.h_0
res = []
for i in range(inputs.shape[1]):
if self.is_reverse:
i = inputs.shape[1] - 1 - i
input_ = inputs[:, i:i + 1, :]
input_ = layers.reshape(
input_, [-1, input_.shape[2]], inplace=False)
hidden, reset, gate = self.gru_unit(input_, hidden)
hidden_ = layers.reshape(
hidden, [-1, 1, hidden.shape[1]], inplace=False)
res.append(hidden_)
if self.is_reverse:
res = res[::-1]
res = layers.concat(res, axis=1)
return res
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
import paddle.fluid as fluid
import math
from parakeet.modules.layers import Conv
class PositionwiseFeedForward(dg.Layer):
''' A two-feed-forward-layer module '''
def __init__(self, d_in, num_hidden, filter_size, padding=0, use_cudnn=True, dropout=0.1):
super(PositionwiseFeedForward, self).__init__()
self.num_hidden = num_hidden
self.use_cudnn = use_cudnn
self.dropout = dropout
self.w_1 = Conv(in_channels = d_in,
out_channels = num_hidden,
filter_size = filter_size,
padding=padding,
use_cudnn = use_cudnn,
data_format = "NTC")
self.w_2 = Conv(in_channels = num_hidden,
out_channels = d_in,
filter_size = filter_size,
padding=padding,
use_cudnn = use_cudnn,
data_format = "NTC")
self.layer_norm = dg.LayerNorm(d_in)
def forward(self, input):
"""
Feed Forward Network.
Args:
input (Variable): Shape(B, T, C), dtype: float32. The input value.
Returns:
output (Variable), Shape(B, T, C), the result after FFN.
"""
#FFN Networt
x = self.w_2(layers.relu(self.w_1(input)))
# dropout
x = layers.dropout(x, self.dropout)
# residual connection
x = x + input
#layer normalization
output = self.layer_norm(x)
return output
\ No newline at end of file
import math
import numpy as np
import paddle
from paddle import fluid
import paddle.fluid.dygraph as dg
class Linear(dg.Layer):
def __init__(self, in_features, out_features, is_bias=True, dtype="float32"):
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.dtype = dtype
self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
self.bias = is_bias
if is_bias is not False:
k = math.sqrt(1 / in_features)
self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
self.linear = dg.Linear(in_features, out_features, param_attr = self.weight,
bias_attr = self.bias,)
def forward(self, x):
x = self.linear(x)
return x
class Conv(dg.Layer):
def __init__(self, in_channels, out_channels, filter_size=1,
padding=0, dilation=1, stride=1, use_cudnn=True,
data_format="NCT", is_bias=True):
super(Conv, self).__init__()
self.in_channels = in_channels
self.out_channels = out_channels
self.filter_size = filter_size
self.padding = padding
self.dilation = dilation
self.stride = stride
self.use_cudnn = use_cudnn
self.data_format = data_format
self.is_bias = is_bias
self.weight_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer())
self.bias_attr = None
if is_bias is not False:
k = math.sqrt(1 / in_channels)
self.bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k))
self.conv = Conv1D( in_channels = in_channels,
out_channels = out_channels,
filter_size = filter_size,
padding = padding,
dilation = dilation,
stride = stride,
param_attr = self.weight_attr,
bias_attr = self.bias_attr,
use_cudnn = use_cudnn,
data_format = data_format)
def forward(self, x):
x = self.conv(x)
return x
class Conv1D(dg.Layer):
"""
A convolution 1D block implemented with Conv2D. Form simplicity and
ensuring the output has the same length as the input, it does not allow
stride > 1.
"""
def __init__(self,
in_channels,
out_channels,
filter_size=3,
padding=0,
dilation=1,
stride=1,
groups=None,
param_attr=None,
bias_attr=None,
use_cudnn=True,
act=None,
data_format='NCT',
dtype="float32"):
super(Conv1D, self).__init__(dtype=dtype)
self.padding = padding
self.in_channels = in_channels
self.num_filters = out_channels
self.filter_size = filter_size
self.stride = stride
self.dilation = dilation
self.padding = padding
self.act = act
self.data_format = data_format
self.conv = dg.Conv2D(
num_channels=in_channels,
num_filters=out_channels,
filter_size=(1, filter_size),
stride=(1, stride),
dilation=(1, dilation),
padding=(0, padding),
groups=groups,
param_attr=param_attr,
bias_attr=bias_attr,
use_cudnn=use_cudnn,
act=act,
dtype=dtype)
def forward(self, x):
"""
Args:
x (Variable): Shape(B, C_in, 1, T), the input, where C_in means
input channels.
Returns:
x (Variable): Shape(B, C_out, 1, T), the outputs, where C_out means
output channels (num_filters).
"""
if self.data_format == 'NTC':
x = fluid.layers.transpose(x, [0, 2, 1])
x = fluid.layers.unsqueeze(x, [2])
x = self.conv(x)
x = fluid.layers.squeeze(x, [2])
if self.data_format == 'NTC':
x = fluid.layers.transpose(x, [0, 2, 1])
return x
class Pool1D(dg.Layer):
"""
A Pool 1D block implemented with Pool2D.
"""
def __init__(self,
pool_size=-1,
pool_type='max',
pool_stride=1,
pool_padding=0,
global_pooling=False,
use_cudnn=True,
ceil_mode=False,
exclusive=True,
data_format='NCT'):
super(Pool1D, self).__init__()
self.pool_size = pool_size
self.pool_type = pool_type
self.pool_stride = pool_stride
self.pool_padding = pool_padding
self.global_pooling = global_pooling
self.use_cudnn = use_cudnn
self.ceil_mode = ceil_mode
self.exclusive = exclusive
self.data_format = data_format
self.pool2d = dg.Pool2D([1,pool_size], pool_type = pool_type,
pool_stride = [1,pool_stride], pool_padding = [0, pool_padding],
global_pooling = global_pooling, use_cudnn = use_cudnn,
ceil_mode = ceil_mode, exclusive = exclusive)
def forward(self, x):
"""
Args:
x (Variable): Shape(B, C_in, 1, T), the input, where C_in means
input channels.
Returns:
x (Variable): Shape(B, C_out, 1, T), the outputs, where C_out means
output channels (num_filters).
"""
if self.data_format == 'NTC':
x = fluid.layers.transpose(x, [0, 2, 1])
x = fluid.layers.unsqueeze(x, [2])
x = self.pool2d(x)
x = fluid.layers.squeeze(x, [2])
if self.data_format == 'NTC':
x = fluid.layers.transpose(x, [0, 2, 1])
return x
import math
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
from parakeet.modules.layers import Linear
class Linear(dg.Layer):
def __init__(self, in_features, out_features, is_bias=True, dtype="float32"):
super(Linear, self).__init__()
self.in_features = in_features
self.out_features = out_features
self.dtype = dtype
self.weight = fluid.ParamAttr(initializer = fluid.initializer.XavierInitializer())
self.bias = is_bias
if is_bias is not False:
k = math.sqrt(1 / in_features)
self.bias = fluid.ParamAttr(initializer = fluid.initializer.Uniform(low=-k, high=k))
self.linear = dg.Linear(in_features, out_features, param_attr = self.weight,
bias_attr = self.bias,)
def forward(self, x):
x = self.linear(x)
return x
class ScaledDotProductAttention(dg.Layer):
def __init__(self, d_key):
......
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from parakeet.modules.layers import Conv
class PostConvNet(dg.Layer):
def __init__(self,
n_mels=80,
num_hidden=512,
filter_size=5,
padding=0,
num_conv=5,
outputs_per_step=1,
use_cudnn=True,
dropout=0.1,
batchnorm_last=False):
super(PostConvNet, self).__init__()
self.dropout = dropout
self.num_conv = num_conv
self.batchnorm_last = batchnorm_last
self.conv_list = []
self.conv_list.append(Conv(in_channels = n_mels * outputs_per_step,
out_channels = num_hidden,
filter_size = filter_size,
padding = padding,
use_cudnn = use_cudnn,
data_format = "NCT"))
for _ in range(1, num_conv-1):
self.conv_list.append(Conv(in_channels = num_hidden,
out_channels = num_hidden,
filter_size = filter_size,
padding = padding,
use_cudnn = use_cudnn,
data_format = "NCT") )
self.conv_list.append(Conv(in_channels = num_hidden,
out_channels = n_mels * outputs_per_step,
filter_size = filter_size,
padding = padding,
use_cudnn = use_cudnn,
data_format = "NCT"))
for i, layer in enumerate(self.conv_list):
self.add_sublayer("conv_list_{}".format(i), layer)
self.batch_norm_list = [dg.BatchNorm(num_hidden,
data_layout='NCHW') for _ in range(num_conv-1)]
if self.batchnorm_last:
self.batch_norm_list.append(dg.BatchNorm(n_mels * outputs_per_step,
data_layout='NCHW'))
for i, layer in enumerate(self.batch_norm_list):
self.add_sublayer("batch_norm_list_{}".format(i), layer)
def forward(self, input):
"""
Post Conv Net.
Args:
input (Variable): Shape(B, T, C), dtype: float32. The input value.
Returns:
output (Variable), Shape(B, T, C), the result after postconvnet.
"""
input = layers.transpose(input, [0,2,1])
len = input.shape[-1]
for i in range(self.num_conv-1):
batch_norm = self.batch_norm_list[i]
conv = self.conv_list[i]
input = layers.dropout(layers.tanh(batch_norm(conv(input)[:,:,:len])), self.dropout)
conv = self.conv_list[self.num_conv-1]
input = conv(input)[:,:,:len]
if self.batchnorm_last:
batch_norm = self.batch_norm_list[self.num_conv-1]
input = layers.dropout(batch_norm(input), self.dropout)
output = layers.transpose(input, [0,2,1])
return output
\ No newline at end of file
import paddle.fluid.dygraph as dg
import paddle.fluid.layers as layers
from parakeet.modules.layers import Linear
class PreNet(dg.Layer):
def __init__(self, input_size, hidden_size, output_size, dropout_rate=0.2):
"""
:param input_size: dimension of input
:param hidden_size: dimension of hidden unit
:param output_size: dimension of output
"""
super(PreNet, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.dropout_rate = dropout_rate
self.linear1 = Linear(input_size, hidden_size)
self.linear2 = Linear(hidden_size, output_size)
def forward(self, x):
"""
Pre Net before passing through the network.
Args:
x (Variable): Shape(B, T, C), dtype: float32. The input value.
Returns:
x (Variable), Shape(B, T, C), the result after pernet.
"""
x = layers.dropout(layers.relu(self.linear1(x)), self.dropout_rate)
x = layers.dropout(layers.relu(self.linear2(x)), self.dropout_rate)
return x
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册