post_convnet.py 3.7 KB
Newer Older
L
lifuchen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
import math
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from parakeet.modules.customized import Conv1D

class PostConvNet(dg.Layer):
    def __init__(self, 
                 n_mels=80,
                 num_hidden=512,
                 filter_size=5,
                 padding=0,
                 num_conv=5,
                 outputs_per_step=1,
                 use_cudnn=True,
                 dropout=0.1,
                 batchnorm_last=False):
        super(PostConvNet, self).__init__()
        
        self.dropout = dropout
        self.num_conv = num_conv
        self.batchnorm_last = batchnorm_last
        self.conv_list = []
        k = math.sqrt(1 / (n_mels * outputs_per_step))
L
lifuchen 已提交
25 26
        self.conv_list.append(Conv1D(num_channels = n_mels * outputs_per_step,
                            num_filters = num_hidden,
L
lifuchen 已提交
27 28 29 30
                            filter_size = filter_size,
                            padding = padding,
                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
L
lifuchen 已提交
31
                            use_cudnn = use_cudnn))
L
lifuchen 已提交
32 33 34

        k = math.sqrt(1 / num_hidden)
        for _ in range(1, num_conv-1):
L
lifuchen 已提交
35 36
            self.conv_list.append(Conv1D(num_channels = num_hidden,
                                num_filters = num_hidden,
L
lifuchen 已提交
37 38 39 40
                                filter_size = filter_size,
                                padding = padding,
                                param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
                                bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
L
lifuchen 已提交
41
                                use_cudnn = use_cudnn))
L
lifuchen 已提交
42

L
lifuchen 已提交
43 44
        self.conv_list.append(Conv1D(num_channels = num_hidden,
                            num_filters = n_mels * outputs_per_step,
L
lifuchen 已提交
45 46 47 48
                            filter_size = filter_size,
                            padding = padding,
                            param_attr = fluid.ParamAttr(initializer=fluid.initializer.XavierInitializer()),
                            bias_attr = fluid.ParamAttr(initializer=fluid.initializer.Uniform(low=-k, high=k)),
L
lifuchen 已提交
49
                            use_cudnn = use_cudnn))
L
lifuchen 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86

        for i, layer in enumerate(self.conv_list):
            self.add_sublayer("conv_list_{}".format(i), layer)

        self.batch_norm_list = [dg.BatchNorm(num_hidden, 
                            data_layout='NCHW') for _ in range(num_conv-1)]
        if self.batchnorm_last:
            self.batch_norm_list.append(dg.BatchNorm(n_mels * outputs_per_step, 
                                data_layout='NCHW'))
        for i, layer in enumerate(self.batch_norm_list):
            self.add_sublayer("batch_norm_list_{}".format(i), layer)
        

    def forward(self, input):
        """
        Post Conv Net.
        
        Args:
            input (Variable): Shape(B, T, C), dtype: float32. The input value.
        Returns:
            output (Variable), Shape(B, T, C), the result after postconvnet.
        """
        
        input = layers.transpose(input, [0,2,1])
        len = input.shape[-1]
        for i in range(self.num_conv-1):
            batch_norm = self.batch_norm_list[i]
            conv = self.conv_list[i]
            
            input = layers.dropout(layers.tanh(batch_norm(conv(input)[:,:,:len])), self.dropout)
        conv = self.conv_list[self.num_conv-1]
        input = conv(input)[:,:,:len]
        if self.batchnorm_last:
            batch_norm = self.batch_norm_list[self.num_conv-1]
            input = layers.dropout(batch_norm(input), self.dropout)
        output = layers.transpose(input, [0,2,1])
        return output