# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. __all__ = ["RowConv"] from ...fluid.dygraph import layers from .. import functional as F class RowConv(layers.Layer): """ **Row-convolution operator** The row convolution is called lookahead convolution. This operator was introduced in the following paper for `DeepSpeech2 `_. The main motivation is that a bidirectional RNN, useful in DeepSpeech like speech models, learns representation for a sequence by performing a forward and a backward pass through the entire sequence. However, unlike unidirectional RNNs, bidirectional RNNs are challenging to deploy in an online and low-latency setting. The lookahead convolution incorporates information from future subsequences in a computationally efficient manner to improve unidirectional recurrent neural networks. The row convolution operator is different from the 1D sequence convolution, and is computed as follows: Given an input sequence X of length t and input dimension D, and a filter (W) of size context * D. More details about row_conv please refer to the design document ``_ . Parameters: num_channels (int): input data's feature size. future_context_size (int): Future context size. Please note, the shape of convolution kernel is [future_context_size + 1, D]. param_attr (ParamAttr): Attributes of parameters, including name, initializer etc. Default: None. act (str): Non-linear activation to be applied to output variable. Default: None. dtype (str, optional): Data type, it can be "float32". Default: "float32". Attributes: weight (Parameter): shape [future_context_size + 1, D], the learnable weight (convolution kernel) of this layer. Returns: None Examples: .. code-block:: python from paddle import fluid, nn import paddle.fluid.dygraph as dg import paddle.nn.functional as F import numpy as np batch_size = 4 time_steps = 8 feature_size = 6 context_size = 4 x = np.random.randn(batch_size, time_steps, feature_size).astype(np.float32) place = fluid.CPUPlace() with dg.guard(place): x_var = dg.to_variable(x) conv = nn.RowConv(feature_size, context_size) y_var = conv(x_var) y_np = y_var.numpy() print(y_np.shape) # (4, 8, 6) """ def __init__(self, num_channels, future_context_size, param_attr=None, act=None, dtype="float32"): super(RowConv, self).__init__() self._dtype = dtype self._param_attr = param_attr self._act = act filter_shape = [future_context_size + 1, num_channels] self.weight = self.create_parameter( filter_shape, attr=param_attr, dtype=dtype) def forward(self, input): out = F.row_conv(input, self.weight, act=self._act) return out