extension.py 3.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
__all__ = ['RowConv']
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49

from ...fluid.dygraph import layers
from .. import functional as F


class RowConv(layers.Layer):
    """
    **Row-convolution operator**

    The row convolution is called lookahead convolution.  This operator was 
    introduced in the following paper for 
    `DeepSpeech2 <http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf>`_.

    The main motivation is that a bidirectional RNN, useful in DeepSpeech like 
    speech models, learns representation for a sequence by performing a
    forward and a backward pass through the entire sequence. However, unlike
    unidirectional RNNs, bidirectional RNNs are challenging to deploy in an online
    and low-latency setting. The lookahead convolution incorporates information
    from future subsequences in a computationally efficient manner to improve
    unidirectional recurrent neural networks. The row convolution operator is
    different from the 1D sequence convolution, and is computed as follows:

    Given an input sequence X of length t and input dimension D, and a filter 
    (W) of size context * D.

    More details about row_conv please refer to the design document 
    `<https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645>`_ .

    Parameters:
        num_channels (int): input data's feature size.
        future_context_size (int): Future context size. Please note, the shape
            of convolution kernel is [future_context_size + 1, D].
        param_attr (ParamAttr): Attributes of parameters, including
            name, initializer etc. Default: None.
50
        act (str): Non-linear activation to be applied to output tensor. Default: None.
51 52 53 54 55 56 57 58 59 60 61 62
        dtype (str, optional): Data type, it can be "float32". Default: "float32".

    Attributes:
        weight (Parameter): shape [future_context_size + 1, D], the learnable 
            weight (convolution kernel) of this layer.

    Returns:
        None

    Examples:
        .. code-block:: python

63
          from paddle import nn
64 65 66 67 68 69 70 71 72 73
          import paddle.nn.functional as F
          import numpy as np

          batch_size = 4
          time_steps = 8
          feature_size = 6
          context_size = 4

          x = np.random.randn(batch_size, time_steps, feature_size).astype(np.float32)

74 75 76 77
          x = paddle.to_tensor(x)
          conv = nn.RowConv(feature_size, context_size)
          y = conv(x)
          print(y.shape)
78

79
          # [4, 8, 6]
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
    """

    def __init__(self,
                 num_channels,
                 future_context_size,
                 param_attr=None,
                 act=None,
                 dtype="float32"):
        super(RowConv, self).__init__()
        self._dtype = dtype
        self._param_attr = param_attr
        self._act = act

        filter_shape = [future_context_size + 1, num_channels]
        self.weight = self.create_parameter(
            filter_shape, attr=param_attr, dtype=dtype)

    def forward(self, input):
98
        out = F.extension.row_conv(input, self.weight, act=self._act)
99
        return out