【API 2.0】Add conv1d API (#26350)

e2b82e04 · whs · GitHub · e6675f4f · e2b82e04 · e2b82e04
6 changed file
--- a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import paddle
+from paddle import fluid, nn
+import paddle.fluid.dygraph as dg
+import paddle.nn.functional as F
+import paddle.fluid.initializer as I
+import unittest
+
+
+class Conv1dTestCase(unittest.TestCase):
+    def __init__(self,
+                 methodName='runTest',
+                 batch_size=4,
+                 spartial_shape=(16, ),
+                 num_channels=6,
+                 num_filters=8,
+                 filter_size=3,
+                 padding=0,
+                 padding_mode="zeros",
+                 stride=1,
+                 dilation=1,
+                 groups=1,
+                 no_bias=False,
+                 dtype="float32",
+                 data_format="NCL"):
+        super(Conv1dTestCase, self).__init__(methodName)
+        self.batch_size = batch_size
+        self.num_channels = num_channels
+        self.num_filters = num_filters
+        self.spartial_shape = spartial_shape
+        self.filter_size = filter_size
+        self.data_format = data_format
+        self.channel_last = (self.data_format == "NHWC")
+
+        self.padding = padding
+        self.padding_mode = padding_mode
+        self.stride = stride
+        self.dilation = dilation
+        self.groups = groups
+        self.no_bias = no_bias
+        self.dtype = dtype
+
+    def setUp(self):
+        input_shape = (self.batch_size, self.num_channels
+                       ) + self.spartial_shape if not self.channel_last else (
+                           self.batch_size, ) + self.spartial_shape + (
+                               self.num_channels, )
+        self.input = np.random.randn(*input_shape).astype(self.dtype)
+
+        if isinstance(self.filter_size, int):
+            filter_size = [self.filter_size]
+        else:
+            filter_size = self.filter_size
+        self.weight_shape = weight_shape = (self.num_filters, self.num_channels
+                                            // self.groups) + tuple(filter_size)
+        self.weight = np.random.uniform(
+            -1, 1, size=weight_shape).astype(self.dtype)
+        if not self.no_bias:
+            self.bias = np.random.uniform(
+                -1, 1, size=(self.num_filters, )).astype(self.dtype)
+        else:
+            self.bias = None
+
+    def functional(self, place):
+        main = fluid.Program()
+        start = fluid.Program()
+        with fluid.unique_name.guard():
+            with fluid.program_guard(main, start):
+                input_shape = (-1, self.num_channels,
+                               -1) if not self.channel_last else (
+                                   -1, -1, self.num_channels)
+                x_var = fluid.data("input", input_shape, dtype=self.dtype)
+                w_var = fluid.data(
+                    "weight", self.weight_shape, dtype=self.dtype)
+                b_var = fluid.data(
+                    "bias", (self.num_filters, ), dtype=self.dtype)
+                y_var = F.conv1d(
+                    x_var,
+                    w_var,
+                    b_var if not self.no_bias else None,
+                    padding=self.padding,
+                    stride=self.stride,
+                    dilation=self.dilation,
+                    groups=self.groups,
+                    data_format=self.data_format)
+        feed_dict = {"input": self.input, "weight": self.weight}
+        if self.bias is not None:
+            feed_dict["bias"] = self.bias
+        exe = fluid.Executor(place)
+        exe.run(start)
+        y_np, = exe.run(main, feed=feed_dict, fetch_list=[y_var])
+        return y_np
+
+    def paddle_nn_layer(self):
+        x_var = paddle.to_tensor(self.input)
+        conv = nn.Conv1d(
+            self.num_channels,
+            self.num_filters,
+            self.filter_size,
+            padding=self.padding,
+            padding_mode=self.padding_mode,
+            stride=self.stride,
+            dilation=self.dilation,
+            groups=self.groups,
+            data_format=self.data_format)
+        conv.weight.set_value(self.weight)
+        if not self.no_bias:
+            conv.bias.set_value(self.bias)
+        y_var = conv(x_var)
+        y_np = y_var.numpy()
+        return y_np
+
+    def _test_equivalence(self, place):
+        result1 = self.functional(place)
+        with dg.guard(place):
+            result2 = self.paddle_nn_layer()
+        np.testing.assert_array_almost_equal(result1, result2)
+
+    def runTest(self):
+        place = fluid.CPUPlace()
+        self._test_equivalence(place)
+
+        if fluid.core.is_compiled_with_cuda():
+            place = fluid.CUDAPlace(0)
+            self._test_equivalence(place)
+
+
+class Conv1dErrorTestCase(Conv1dTestCase):
+    def runTest(self):
+        place = fluid.CPUPlace()
+        with dg.guard(place):
+            with self.assertRaises(ValueError):
+                self.paddle_nn_layer()
+
+
+def add_cases(suite):
+    suite.addTest(Conv1dTestCase(methodName='runTest'))
+    suite.addTest(Conv1dTestCase(methodName='runTest', stride=[1], dilation=2))
+    suite.addTest(Conv1dTestCase(methodName='runTest', stride=2, dilation=(1)))
+    suite.addTest(
+        Conv1dTestCase(
+            methodName='runTest', padding="same", no_bias=True))
+    suite.addTest(
+        Conv1dTestCase(
+            methodName='runTest', filter_size=3, padding='valid'))
+    suite.addTest(
+        Conv1dTestCase(
+            methodName='runTest', padding=2, data_format='NLC'))
+    suite.addTest(Conv1dTestCase(methodName='runTest', padding=[1]))
+    suite.addTest(Conv1dTestCase(methodName='runTest', padding=2))
+    suite.addTest(Conv1dTestCase(methodName='runTest'))
+    suite.addTest(
+        Conv1dTestCase(
+            methodName='runTest', groups=2, padding="valid"))
+    suite.addTest(
+        Conv1dTestCase(
+            methodName='runTest',
+            num_filters=6,
+            num_channels=3,
+            groups=3,
+            padding="valid",
+            data_format='NLC'))
+
+
+def add_error_cases(suite):
+    suite.addTest(
+        Conv1dErrorTestCase(
+            methodName='runTest', padding_mode="reflect", padding="valid"))
+    suite.addTest(
+        Conv1dErrorTestCase(
+            methodName='runTest', data_format="VALID"))
+    suite.addTest(
+        Conv1dErrorTestCase(
+            methodName='runTest', padding_mode="VALID"))
+    suite.addTest(
+        Conv1dErrorTestCase(
+            methodName='runTest', num_channels=5, groups=2))
+    suite.addTest(
+        Conv1dErrorTestCase(
+            methodName='runTest', num_filters=8, num_channels=15, groups=3))
+    suite.addTest(
+        Conv1dErrorTestCase(
+            methodName='runTest', padding=[1, 2, 3, 4, 5]))
+
+
+def load_tests(loader, standard_tests, pattern):
+    suite = unittest.TestSuite()
+    add_cases(suite)
+    add_error_cases(suite)
+    return suite
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -93,6 +93,7 @@ from .layer.common import Dropout2D  #DEFINE_ALIAS
 from .layer.common import Dropout3D  #DEFINE_ALIAS
 from .layer.pooling import AdaptiveAvgPool2d  #DEFINE_ALIAS
 from .layer.pooling import AdaptiveAvgPool3d  #DEFINE_ALIAS
+from .layer.conv import Conv1d  #DEFINE_ALIAS
 from .layer.conv import Conv2d  #DEFINE_ALIAS
 from .layer.conv import Conv3d  #DEFINE_ALIAS
 from .layer.conv import ConvTranspose2d  #DEFINE_ALIAS

--- a/python/paddle/nn/functional/__init__.py
+++ b/python/paddle/nn/functional/__init__.py
@@ -69,6 +69,7 @@ from .common import unfold  #DEFINE_ALIAS
 # from .common import bilinear_tensor_product        #DEFINE_ALIAS
 from .common import assign  #DEFINE_ALIAS
 from .common import interpolate  #DEFINE_ALIAS
+from .conv import conv1d  #DEFINE_ALIAS
 from .conv import conv2d  #DEFINE_ALIAS
 from .conv import conv_transpose2d  #DEFINE_ALIAS
 from .conv import conv3d  #DEFINE_ALIAS

--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -13,7 +13,13 @@
 # limitations under the License.
 from __future__ import print_function

-__all__ = ['conv2d', 'conv_transpose2d', 'conv3d', 'conv_transpose3d']
+__all__ = [
+    'conv1d',
+    'conv2d',
+    'conv_transpose2d',
+    'conv3d',
+    'conv_transpose3d',
+]

 import numpy as np
 from ...device import get_cudnn_version
@@ -88,6 +94,232 @@ def _update_padding_nd(padding, channel_last, num_dims):
    return padding, padding_algorithm


+def conv1d(x,
+           weight,
+           bias=None,
+           stride=1,
+           padding=0,
+           dilation=1,
+           groups=1,
+           data_format='NCL',
+           name=None):
+    """
+    The convolution1D layer calculates the output based on the input, filter
+    and strides, paddings, dilations, groups parameters. Input and
+    Output are in NCL format, where N is batch size, C is the number of
+    channels, L is the length of the feature.
+    Filter is in MCK format, where M is the number of output image channels,
+    C is the number of input image channels, K is the size of the kernel.
+    If the groups is greater than 1, C will equal the number of input image
+    channels divided by the groups. If bias attribution and activation type
+    are provided, bias is added to the output of the convolution, and the
+    corresponding activation function is applied to the final result.
+
+    For each input :math:`X`, the equation is:
+
+    .. math::
+
+        Out = \sigma (W \\ast X + b)
+
+    Where:
+
+    * :math:`X`: Input value, a tensor with NCL format.
+    * :math:`W`: Kernel value, a tensor with MCK format.
+    * :math:`\\ast`: Convolution operation.
+    * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+    * :math:`\\sigma`: Activation function.
+    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+    Example:
+
+        - Input:
+
+          Input shape: :math:`(N, C_{in}, L_{in})`
+
+          Filter shape: :math:`(C_{out}, C_{in}, L_f)`
+
+        - Output:
+
+          Output shape: :math:`(N, C_{out}, L_{out})`
+
+        Where
+
+        .. math::
+
+            L_{out}&= \\frac{(L_{in} + 2 * padding - (dilation * (L_f - 1) + 1))}{stride} + 1
+
+    Args:
+        x (Tensor): The input is 3-D Tensor with shape [N, C, L], the data type 
+            of input is float16 or float32 or float64.
+        weight (Tensor): The convolution kernel with shape [M, C/g, K], where M is
+            the number of output channels, g is the number of groups, K is the kernel's size. 
+        bias (Tensor, optional): The bias with shape [M,]. Default: None.
+        stride (int or tuple, optional): The stride size. If stride is a tuple, it must
+            contain one integers, (stride_size). Default: 1.
+        padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
+            1. a string in ['valid', 'same'].
+            2. an int, which means the feature map is zero paded by size of `padding` on both sides.
+            3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides.
+            4. a list[int] or tuple[int] whose length is 2. It has the form  [pad_before, pad_after].
+            5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
+            The default value is 0.
+        dilation (int or tuple, optional): The dilation size. If dilation is a tuple, it must
+            contain one integer, (dilation_size). Default: 1.
+        groups (int, optional): The groups number of the conv1d function. According to grouped
+            convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+            the first half of the filters is only connected to the first half
+            of the input channels, while the second half of the filters is only
+            connected to the second half of the input channels. Default: 1.
+        data_format (str, optional): Specify the data format of the input, and the data format of the output 
+            will be consistent with that of the input. An optional string from: `"NCL"`, `"NLC"`.
+            The default is `"NCL"`. When it is `"NCL"`, the data is stored in the order of:
+            `[batch_size, input_channels, feature_length]`.
+        name(str, optional): For detailed information, please refer 
+           to :ref:`api_guide_Name`. Usually name is no need to set and 
+           None by default.
+
+    Returns:
+        A tensor representing the conv1d, whose data type is the 
+        same with input.
+
+    Raises:
+        ValueError: If the channel dimmention of the input is less than or equal to zero.
+        ValueError: If `data_format` is not "NCL" or "NLC".
+        ValueError: If `padding` is a string, but not "SAME" or "VALID".
+        ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 
+            or the element corresponding to the input's channel is not 0.
+        ShapeError: If the input is not 3-D Tensor.
+        ShapeError: If the input's dimension size and filter's dimension size not equal.
+        ShapeError: If the dimension size of input minus the size of `stride` is not 1.
+        ShapeError: If the number of input channels is not equal to filter's channels * groups.
+        ShapeError: If the number of output channels is not be divided by groups.
+
+    Examples:
+        .. code-block:: python
+
+          import paddle
+          import paddle.nn.functional as F
+          import numpy as np
+          x = np.array([[[4, 8, 1, 9],
+            [7, 2, 0, 9],
+            [6, 9, 2, 6]]]).astype(np.float32)
+          w=np.array(
+          [[[9, 3, 4],
+            [0, 0, 7],
+            [2, 5, 6]],
+           [[0, 3, 4],
+            [2, 9, 7],
+            [5, 6, 8]]]).astype(np.float32)
+          paddle.disable_static()
+          x_var = paddle.to_tensor(x)
+          w_var = paddle.to_tensor(w)
+          y_var = F.conv1d(x_var, w_var)
+          y_np = y_var.numpy()
+          print(y_np)
+          
+          # [[[133. 238.]
+          #   [160. 211.]]]
+    """
+    cudnn_version = get_cudnn_version()
+    if cudnn_version is not None:
+        use_cudnn = True
+    else:
+        use_cudnn = False
+
+    if data_format not in ["NCL", "NLC"]:
+        raise ValueError("Attr(data_format) should be 'NCL' or 'NLC'. "
+                         "Received Attr(data_format): {}.".format(data_format))
+
+    channel_last = (data_format == "NHWC")
+    channel_dim = -1 if channel_last else 1
+    conv2d_data_format = "NHWC" if channel_last else "NCHW"
+    num_channels = x.shape[channel_dim]
+    num_filters = weight.shape[0]
+    if num_channels < 0:
+        raise ValueError("The channel dimmention of the input({}) "
+                         "should be defined. Received: {}.".format(
+                             x.shape, num_channels))
+    if num_channels % groups != 0:
+        raise ValueError(
+            "the channel of input must be divisible by groups,"
+            "received: the channel of input is {}, the shape of input is {}"
+            ", the groups is {}".format(num_channels, x.shape, groups))
+    if num_filters % groups != 0:
+        raise ValueError(
+            "the number of filters must be divisible by groups,"
+            "received: the number of filters is {}, the shape of weight is {}"
+            ", the groups is {}".format(num_filters, weight.shape, groups))
+
+    # update attrs
+    padding, padding_algorithm = _update_padding_nd(padding, channel_last, 1)
+    if len(padding) == 2:
+        padding = padding + [0] * 2
+    elif len(padding) == 1:
+        padding = padding + [0]
+    else:
+        raise ValueError(
+            "The size of padding's dimmention should 1 or 2. But got padding={}".
+            format(padding))
+
+    stride = utils.convert_to_list(stride, 1, 'stride') + [1]
+    dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1]
+
+    l_type = "conv2d"
+    if (num_channels == groups and num_filters % num_channels == 0 and
+            not use_cudnn):
+        l_type = 'depthwise_conv2d'
+        use_cudnn = False
+
+    inputs = {'Input': [x], 'Filter': [weight]}
+    attrs = {
+        'strides': stride,
+        'paddings': padding,
+        'dilations': dilation,
+        'groups': groups,
+        'use_cudnn': use_cudnn,
+        'use_mkldnn': False,
+        'fuse_relu_before_depthwise_conv': False,
+        "padding_algorithm": padding_algorithm,
+        "data_format": conv2d_data_format
+    }
+    squeeze_aixs = -2 if channel_last else -1
+    x = nn.unsqueeze(input=x, axes=[squeeze_aixs])
+    weight = nn.unsqueeze(input=weight, axes=[-1])
+    if in_dygraph_mode():
+        attrs = ('strides', stride, 'paddings', padding, 'dilations', dilation,
+                 'groups', groups, 'use_cudnn', use_cudnn, 'use_mkldnn', False,
+                 'fuse_relu_before_depthwise_conv', False, "padding_algorithm",
+                 padding_algorithm, "data_format", conv2d_data_format)
+        out = getattr(core.ops, l_type)(x, weight, *attrs)
+        if bias is not None:
+            out = nn.elementwise_add(out, bias, axis=channel_dim)
+    else:
+        inputs = {'Input': [x], 'Filter': [weight]}
+        attrs = {
+            'strides': stride,
+            'paddings': padding,
+            'dilations': dilation,
+            'groups': groups,
+            'use_cudnn': use_cudnn,
+            'use_mkldnn': False,
+            'fuse_relu_before_depthwise_conv': False,
+            "padding_algorithm": padding_algorithm,
+            "data_format": conv2d_data_format
+        }
+        check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'],
+                                 'conv2d')
+        helper = LayerHelper(l_type, **locals())
+        dtype = helper.input_dtype()
+        out = helper.create_variable_for_type_inference(dtype)
+        outputs = {"Output": [out]}
+        helper.append_op(
+            type=l_type, inputs=inputs, outputs=outputs, attrs=attrs)
+        if bias is not None:
+            out = nn.elementwise_add(out, bias, axis=channel_dim)
+    out = nn.squeeze(input=out, axes=[squeeze_aixs])
+    return out
+
+
 def conv2d(x,
           weight,
           bias=None,

--- a/python/paddle/nn/layer/__init__.py
+++ b/python/paddle/nn/layer/__init__.py
@@ -57,6 +57,7 @@ from .common import Dropout2D  #DEFINE_ALIAS
 from .common import Dropout3D  #DEFINE_ALIAS
 from .pooling import AdaptiveAvgPool2d  #DEFINE_ALIAS
 from .pooling import AdaptiveAvgPool3d  #DEFINE_ALIAS
+from .conv import Conv1d  #DEFINE_ALIAS
 from .conv import Conv2d  #DEFINE_ALIAS
 from .conv import Conv3d  #DEFINE_ALIAS
 from .conv import ConvTranspose2d  #DEFINE_ALIAS

--- a/python/paddle/nn/layer/conv.py
+++ b/python/paddle/nn/layer/conv.py