encoder.py 3.4 KB
Newer Older
L
lifuchen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
L
lifuchen 已提交
14 15
import paddle.fluid.dygraph as dg
import paddle.fluid as fluid
16
from parakeet.models.transformer_tts.utils import *
L
lifuchen 已提交
17
from parakeet.modules.multihead_attention import MultiheadAttention
L
lifuchen 已提交
18
from parakeet.modules.ffn import PositionwiseFeedForward
L
lifuchen 已提交
19
from parakeet.models.transformer_tts.encoderprenet import EncoderPrenet
L
lifuchen 已提交
20

L
lifuchen 已提交
21

L
lifuchen 已提交
22
class Encoder(dg.Layer):
L
lifuchen 已提交
23
    def __init__(self, embedding_size, num_hidden, num_head=4):
L
lifuchen 已提交
24 25
        super(Encoder, self).__init__()
        self.num_hidden = num_hidden
26
        self.num_head = num_head
L
lifuchen 已提交
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
        param = fluid.ParamAttr(initializer=fluid.initializer.Constant(
            value=1.0))
        self.alpha = self.create_parameter(
            shape=(1, ), attr=param, dtype='float32')
        self.pos_inp = get_sinusoid_encoding_table(
            1024, self.num_hidden, padding_idx=0)
        self.pos_emb = dg.Embedding(
            size=[1024, num_hidden],
            param_attr=fluid.ParamAttr(
                initializer=fluid.initializer.NumpyArrayInitializer(
                    self.pos_inp),
                trainable=False))
        self.encoder_prenet = EncoderPrenet(
            embedding_size=embedding_size,
            num_hidden=num_hidden,
            use_cudnn=True)
        self.layers = [
            MultiheadAttention(num_hidden, num_hidden // num_head,
                               num_hidden // num_head) for _ in range(3)
        ]
L
lifuchen 已提交
47 48
        for i, layer in enumerate(self.layers):
            self.add_sublayer("self_attn_{}".format(i), layer)
L
lifuchen 已提交
49 50 51 52 53 54 55
        self.ffns = [
            PositionwiseFeedForward(
                num_hidden,
                num_hidden * num_head,
                filter_size=1,
                use_cudnn=True) for _ in range(3)
        ]
L
lifuchen 已提交
56 57 58
        for i, layer in enumerate(self.ffns):
            self.add_sublayer("ffns_{}".format(i), layer)

59
    def forward(self, x, positional, mask=None, query_mask=None):
60

L
lifuchen 已提交
61
        if fluid.framework._dygraph_tracer()._train_mode:
62
            seq_len_key = x.shape[1]
63 64
            query_mask = layers.expand(query_mask,
                                       [self.num_head, 1, seq_len_key])
65
            mask = layers.expand(mask, [self.num_head, 1, 1])
L
lifuchen 已提交
66 67 68
        else:
            query_mask, mask = None, None
        # Encoder pre_network
L
lifuchen 已提交
69 70
        x = self.encoder_prenet(x)  #(N,T,C)

L
lifuchen 已提交
71
        # Get positional encoding
L
lifuchen 已提交
72 73 74
        positional = self.pos_emb(positional)

        x = positional * self.alpha + x  #(N, T, C)
L
lifuchen 已提交
75 76

        # Positional dropout
77
        x = layers.dropout(x, 0.1, dropout_implementation='upscale_in_train')
L
lifuchen 已提交
78

L
lifuchen 已提交
79 80 81
        # Self attention encoder
        attentions = list()
        for layer, ffn in zip(self.layers, self.ffns):
L
lifuchen 已提交
82
            x, attention = layer(x, x, x, mask=mask, query_mask=query_mask)
L
lifuchen 已提交
83 84 85
            x = ffn(x)
            attentions.append(attention)

86
        return x, attentions