post_convnet.py 4.6 KB
Newer Older
L
lifuchen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
L
lifuchen 已提交
14 15 16 17 18 19
import math
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from parakeet.modules.customized import Conv1D

L
lifuchen 已提交
20

L
lifuchen 已提交
21
class PostConvNet(dg.Layer):
L
lifuchen 已提交
22
    def __init__(self,
L
lifuchen 已提交
23 24 25 26 27 28 29 30 31 32
                 n_mels=80,
                 num_hidden=512,
                 filter_size=5,
                 padding=0,
                 num_conv=5,
                 outputs_per_step=1,
                 use_cudnn=True,
                 dropout=0.1,
                 batchnorm_last=False):
        super(PostConvNet, self).__init__()
L
lifuchen 已提交
33

L
lifuchen 已提交
34 35 36 37 38
        self.dropout = dropout
        self.num_conv = num_conv
        self.batchnorm_last = batchnorm_last
        self.conv_list = []
        k = math.sqrt(1 / (n_mels * outputs_per_step))
L
lifuchen 已提交
39 40 41 42 43 44 45 46 47 48 49 50
        self.conv_list.append(
            Conv1D(
                num_channels=n_mels * outputs_per_step,
                num_filters=num_hidden,
                filter_size=filter_size,
                padding=padding,
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.XavierInitializer()),
                bias_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-k, high=k)),
                use_cudnn=use_cudnn))
L
lifuchen 已提交
51 52

        k = math.sqrt(1 / num_hidden)
L
lifuchen 已提交
53 54 55 56 57 58 59 60 61 62 63 64 65
        for _ in range(1, num_conv - 1):
            self.conv_list.append(
                Conv1D(
                    num_channels=num_hidden,
                    num_filters=num_hidden,
                    filter_size=filter_size,
                    padding=padding,
                    param_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.XavierInitializer()),
                    bias_attr=fluid.ParamAttr(
                        initializer=fluid.initializer.Uniform(
                            low=-k, high=k)),
                    use_cudnn=use_cudnn))
L
lifuchen 已提交
66

L
lifuchen 已提交
67 68 69 70 71 72 73 74 75 76 77 78
        self.conv_list.append(
            Conv1D(
                num_channels=num_hidden,
                num_filters=n_mels * outputs_per_step,
                filter_size=filter_size,
                padding=padding,
                param_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.XavierInitializer()),
                bias_attr=fluid.ParamAttr(
                    initializer=fluid.initializer.Uniform(
                        low=-k, high=k)),
                use_cudnn=use_cudnn))
L
lifuchen 已提交
79 80 81 82

        for i, layer in enumerate(self.conv_list):
            self.add_sublayer("conv_list_{}".format(i), layer)

L
lifuchen 已提交
83 84 85 86
        self.batch_norm_list = [
            dg.BatchNorm(
                num_hidden, data_layout='NCHW') for _ in range(num_conv - 1)
        ]
L
lifuchen 已提交
87
        if self.batchnorm_last:
L
lifuchen 已提交
88 89 90
            self.batch_norm_list.append(
                dg.BatchNorm(
                    n_mels * outputs_per_step, data_layout='NCHW'))
L
lifuchen 已提交
91 92 93 94 95 96 97 98 99 100 101 102
        for i, layer in enumerate(self.batch_norm_list):
            self.add_sublayer("batch_norm_list_{}".format(i), layer)

    def forward(self, input):
        """
        Post Conv Net.
        
        Args:
            input (Variable): Shape(B, T, C), dtype: float32. The input value.
        Returns:
            output (Variable), Shape(B, T, C), the result after postconvnet.
        """
L
lifuchen 已提交
103 104

        input = layers.transpose(input, [0, 2, 1])
L
lifuchen 已提交
105
        len = input.shape[-1]
L
lifuchen 已提交
106
        for i in range(self.num_conv - 1):
L
lifuchen 已提交
107 108
            batch_norm = self.batch_norm_list[i]
            conv = self.conv_list[i]
L
lifuchen 已提交
109 110

            input = layers.dropout(
111 112 113
                layers.tanh(batch_norm(conv(input)[:, :, :len])),
                self.dropout,
                dropout_implementation='upscale_in_train')
L
lifuchen 已提交
114 115
        conv = self.conv_list[self.num_conv - 1]
        input = conv(input)[:, :, :len]
L
lifuchen 已提交
116
        if self.batchnorm_last:
L
lifuchen 已提交
117
            batch_norm = self.batch_norm_list[self.num_conv - 1]
118 119 120 121
            input = layers.dropout(
                batch_norm(input),
                self.dropout,
                dropout_implementation='upscale_in_train')
L
lifuchen 已提交
122 123
        output = layers.transpose(input, [0, 2, 1])
        return output