fix conv1d padding (#26921)

96331f74 · LielinJiang · GitHub · 95e1434b · 96331f74 · 96331f74
4 changed file
--- a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py
@@ -44,7 +44,7 @@ class Conv1dTestCase(unittest.TestCase):
        self.spartial_shape = spartial_shape
        self.filter_size = filter_size
        self.data_format = data_format
-        self.channel_last = (self.data_format == "NHWC")
+        self.channel_last = (self.data_format == "NLC")
        self.padding = padding
        self.padding_mode = padding_mode
@@ -147,6 +147,14 @@ class Conv1dErrorTestCase(Conv1dTestCase):
                self.paddle_nn_layer()
+class Conv1dTypeErrorTestCase(Conv1dTestCase):
+    def runTest(self):
+        place = fluid.CPUPlace()
+        with dg.guard(place):
+            with self.assertRaises(TypeError):
+                self.paddle_nn_layer()
 def add_cases(suite):
    suite.addTest(Conv1dTestCase(methodName='runTest'))
    suite.addTest(Conv1dTestCase(methodName='runTest', stride=[1], dilation=2))
@@ -161,6 +169,7 @@ def add_cases(suite):
        Conv1dTestCase(
            methodName='runTest', padding=2, data_format='NLC'))
    suite.addTest(Conv1dTestCase(methodName='runTest', padding=[1]))
+    suite.addTest(Conv1dTestCase(methodName='runTest', padding=[1, 2]))
    suite.addTest(Conv1dTestCase(methodName='runTest', padding=2))
    suite.addTest(Conv1dTestCase(methodName='runTest'))
    suite.addTest(
@@ -178,7 +187,7 @@ def add_cases(suite):
 def add_error_cases(suite):
    suite.addTest(
-        Conv1dErrorTestCase(
+        Conv1dTypeErrorTestCase(
            methodName='runTest', padding_mode="reflect", padding="valid"))
    suite.addTest(
        Conv1dErrorTestCase(

--- a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py
@@ -201,6 +201,7 @@ def add_cases(suite):
        ConvTranspose1dTestCase(
            methodName='runTest', data_format="NLC", stride=3,
            output_padding=2))
+    suite.addTest(ConvTranspose1dTestCase(methodName='runTest', padding=[1, 2]))
 def add_error_cases(suite):

--- a/python/paddle/nn/functional/conv.py
+++ b/python/paddle/nn/functional/conv.py
@@ -232,7 +232,7 @@ def conv1d(x,
        raise ValueError("Attr(data_format) should be 'NCL' or 'NLC'. "
                         "Received Attr(data_format): {}.".format(data_format))
-    channel_last = (data_format == "NHWC")
+    channel_last = (data_format == "NLC")
    channel_dim = -1 if channel_last else 1
    conv2d_data_format = "NHWC" if channel_last else "NCHW"
    num_channels = x.shape[channel_dim]
@@ -399,7 +399,7 @@ def conv2d(x,
            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when 
            `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0], 
            [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NHWC"`, `pool_padding` can be in the form
+            when `data_format` is `"NHWC"`, `padding` can be in the form
            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
            Default: padding = 0.
        dilation (int|tuple): The dilation size. It means the spacing between the kernel
@@ -733,20 +733,31 @@ def conv_transpose1d(x,
    stride = utils.convert_to_list(stride, 1, 'stride') + [1]
    dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1]
-    output_padding = utils.convert_to_list(output_padding, 1,
-                                           'output_padding') + [0]
-    if output_padding[0] > stride[0]:
-        raise ValueError(
-            "The size of output_padding should not be greater than stride."
-            "But got output_padding={} and stride={}".format(output_padding[0],
-                                                             stride[0]))
    if output_size is None:
        output_size = []
-    elif isinstance(output_size, (list, tuple, int)):
-        output_size = utils.convert_to_list(output_size, 1, 'output_size') + [1]
    else:
-        raise ValueError("output_size should be int, or list, tuple of ints")
+        if output_padding != 0:
+            raise ValueError('output_padding option is mutually exclusive with '
+                             'output_size')
+        if isinstance(output_size, (list, tuple, int)):
+            output_size = utils.convert_to_list(output_size, 1,
+                                                'output_size') + [1]
+        else:
+            raise ValueError(
+                "output_size should be int, or list, tuple of ints")
+    if output_padding == 0:
+        output_padding = []
+    else:
+        output_padding = utils.convert_to_list(output_padding, 1,
+                                               'output_padding') + [0]
+    if len(output_padding) > 0 and output_padding[0] > stride[0]:
+        raise ValueError(
+            "The size of output_padding should not be greater than stride."
+            "But got output_padding={} and stride={}".format(output_padding[0],
+                                                             stride[0]))
    op_type = 'conv2d_transpose'
    num_filters = weight.shape[1]
@@ -761,16 +772,17 @@ def conv_transpose1d(x,
    weight = nn.unsqueeze(input=weight, axes=[-1])
    if in_dygraph_mode():
-        attrs = ('output_size', output_size, 'strides', stride, 'paddings',
+        attrs = ('output_padding', output_padding, 'output_size', output_size,
-                 padding, 'padding_algorithm', padding_algorithm, 'dilations',
+                 'strides', stride, 'paddings', padding, 'padding_algorithm',
-                 dilation, 'groups', groups, 'use_cudnn', use_cudnn,
+                 padding_algorithm, 'dilations', dilation, 'groups', groups,
-                 'data_format', conv2d_data_format)
+                 'use_cudnn', use_cudnn, 'data_format', conv2d_data_format)
        out = getattr(core.ops, op_type)(x, weight, *attrs)
        if bias is not None:
            out = nn.elementwise_add(out, bias, axis=channel_dim)
    else:
        inputs = {'Input': [x], 'Filter': [weight]}
        attrs = {
+            'output_padding': output_padding,
            'output_size': output_size,
            'strides': stride,
            'paddings': padding,
@@ -791,12 +803,6 @@ def conv_transpose1d(x,
        if bias is not None:
            out = nn.elementwise_add(out, bias, axis=channel_dim)
-    if output_size is None:
-        out = pad2d(
-            out,
-            padding=[0, output_padding, 0, 0],
-            data_format=conv2d_data_format,
-            name=name)
    out = nn.squeeze(input=out, axes=[squeeze_axis])
    return out
@@ -888,9 +894,9 @@ def conv_transpose2d(x,
            'SAME' which is the padding algorithm. If padding size is a tuple or list,
            it could be in three forms: `[pad_height, pad_width]` or 
            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
-            and when `data_format` is `"NCHW"`, `pool_padding` can be in the form 
+            and when `data_format` is `"NCHW"`, `padding` can be in the form 
            `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NHWC"`, `pool_padding` can be in the form 
+            when `data_format` is `"NHWC"`, `padding` can be in the form 
            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
            Default: padding = 0.
        output_padding(int|list|tuple, optional): Additional size added to one side
@@ -1116,9 +1122,9 @@ def conv3d(x,
            'SAME' which is the padding algorithm. If padding size is a tuple or list,
            it could be in three forms: `[pad_depth, pad_height, pad_width]` or
            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
-            and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
+            and when `data_format` is `"NCDHW"`, `padding` can be in the form
            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
+            when `data_format` is `"NDHWC"`, `padding` can be in the form
            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
            Default: padding = 0.
        dilation (int|tuple): The dilation size. It means the spacing between the kernel points. 
@@ -1340,9 +1346,9 @@ def conv_transpose3d(x,
            'SAME' which is the padding algorithm. If padding size is a tuple or list,
            it could be in three forms: `[pad_depth, pad_height, pad_width]` or
            `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
-            and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
+            and when `data_format` is `"NCDHW"`, `padding` can be in the form
            `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
+            when `data_format` is `"NDHWC"`, `padding` can be in the form
            `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
            Default: padding = 0.
        output_padding(int|list|tuple, optional): Additional size added to one side

--- a/python/paddle/nn/layer/conv.py
+++ b/python/paddle/nn/layer/conv.py
@@ -113,7 +113,7 @@ class _ConvNd(layers.Layer):
            attr=self._bias_attr, shape=[self._out_channels], is_bias=True)
-class Conv1d(layers.Layer):
+class Conv1d(_ConvNd):
    """
    This interface is used to construct a callable object of the ``Conv1d`` class.
    For more details, refer to code examples.
@@ -172,8 +172,7 @@ class Conv1d(layers.Layer):
            When in 'replicate' mode, uses input boundaries to pad the input tensor.
            When in 'circular' mode, uses circular input to pad the input tensor.
            Default is 'zeros'.
-        bias(bool, optional): Whether to use bias. Default: True.
+        weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
-        param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
            of conv1d. If it is set to None or one attribute of ParamAttr, conv1d
            will create ParamAttr as param_attr. If the Initializer of the param_attr
            is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
@@ -218,196 +217,6 @@ class Conv1d(layers.Layer):
          #   [160. 211.]]]
    """
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 padding=0,
-                 dilation=1,
-                 groups=1,
-                 padding_mode='zeros',
-                 bias=True,
-                 weight_attr=None,
-                 bias_attr=None,
-                 data_format="NCL",
-                 name=None):
-        super(Conv1d, self).__init__()
-        assert weight_attr is not False, "param_attr should not be False here."
-        self._in_channels = in_channels
-        self._out_channels = out_channels
-        self._groups = groups
-        if in_channels % groups != 0:
-            raise ValueError("in_channels must be divisible by groups.")
-        self._kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size')
-        self._stride = utils.convert_to_list(stride, 1, 'stride')
-        self._dilation = utils.convert_to_list(dilation, 1, 'dilation')
-        self._padding = padding  # leave it to F.conv1d
-        self._weight_attr = weight_attr
-        self._bias_attr = bias_attr
-        self._data_format = data_format
-        self._name = name
-        self._padding_mode = padding_mode
-        valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'}
-        if padding_mode not in valid_padding_modes:
-            raise ValueError(
-                "padding_mode must be one of {}, but got padding_mode='{}'".
-                format(valid_padding_modes, padding_mode))
-        if padding_mode in {'reflect', 'replicate', 'circular'
-                            } and not isinstance(padding, np.int):
-            raise ValueError(
-                "when padding_mode in ['reflect', 'replicate', 'circular'], type of padding must be int"
-            )
-        if not isinstance(padding, str):
-            self._padding = utils.convert_to_list(padding, 1, 'padding') * 2
-        num_filter_channels = in_channels // groups
-        filter_shape = [self._out_channels, num_filter_channels
-                        ] + self._kernel_size
-        self.weight = self.create_parameter(
-            attr=self._weight_attr,
-            shape=filter_shape,
-            default_initializer=_get_default_param_initializer(
-                self._in_channels, filter_shape))
-        self.bias = self.create_parameter(
-            attr=self._bias_attr, shape=[self._out_channels],
-            is_bias=True) if bias else None
-    def forward(self, x):
-        padding = 0
-        if self._padding_mode != "zeros":
-            x = F.pad(x,
-                      self._padding,
-                      mode=self._padding_mode,
-                      data_format=self._data_format)
-        else:
-            padding = self._padding
-        out = F.conv1d(
-            x,
-            self.weight,
-            bias=self.bias,
-            padding=padding,
-            stride=self._stride,
-            dilation=self._dilation,
-            groups=self._groups,
-            data_format=self._data_format,
-            name=self._name)
-        return out
-class Conv2d(_ConvNd):
-    """
-    This interface is used to construct a callable object of the ``Conv2d`` class.
-    For more details, refer to code examples.
-    The convolution2D layer calculates the output based on the input, filter
-    and strides, paddings, dilations, groups parameters. Input and
-    Output are in NCHW format, where N is batch size, C is the number of
-    the feature map, H is the height of the feature map, and W is the width of the feature map.
-    Filter's shape is [MCHW] , where M is the number of output feature map,
-    C is the number of input feature map, H is the height of the filter,
-    and W is the width of the filter. If the groups is greater than 1,
-    C will equal the number of input feature map divided by the groups.
-    Please refer to UFLDL's `convolution
-    <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
-    for more details.
-    If bias attribution and activation type are provided, bias is added to the
-    output of the convolution, and the corresponding activation function is
-    applied to the final result.
-    For each input :math:`X`, the equation is:
-    ..  math::
-        Out = \sigma (W \\ast X + b)
-    Where:
-    * :math:`X`: Input value, a ``Tensor`` with NCHW format.
-    * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
-    * :math:`\\ast`: Convolution operation.
-    * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
-    * :math:`\\sigma`: Activation function.
-    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
-    Parameters:
-        in_channels(int): The number of input channels in the input image.
-        out_channels(int): The number of output channels produced by the convolution.
-        kernel_size(int|list|tuple, optional): The size of the convolving kernel.
-        stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
-            contain three integers, (stride_H, stride_W). Otherwise, the
-            stride_H = stride_W = stride. The default value is 1.
-        padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
-            1. a string in ['valid', 'same'].
-            2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` 
-            3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
-            4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form  [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
-            5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
-            The default value is 0.
-        dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
-            contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
-            dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
-        groups(int, optional): The groups number of the Conv3d Layer. According to grouped
-            convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
-            the first half of the filters is only connected to the first half
-            of the input channels, while the second half of the filters is only
-            connected to the second half of the input channels. The default value is 1.
-        padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
-        weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
-            of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
-            will create ParamAttr as param_attr. If it is set to None, the parameter
-            is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
-            :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
-        bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d.
-            If it is set to False, no bias will be added to the output units.
-            If it is set to None or one attribute of ParamAttr, conv2d
-            will create ParamAttr as bias_attr. If the Initializer of the bias_attr
-            is not set, the bias is initialized zero. The default value is None.
-        data_format(str, optional): Data format that specifies the layout of input.
-            It can be "NCHW" or "NHWC". Default: "NCHW".
-    Attribute:
-        **weight** (Parameter): the learnable weights of filter of this layer.
-        **bias** (Parameter or None): the learnable bias of this layer.
-    Shape:
-        - x: :math:`(N, C_{in}, H_{in}, W_{in})`
-        - output: :math:`(N, C_{out}, H_{out}, W_{out})`
-        Where
-        ..  math::
-           H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
-           W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
-    Examples:
-        .. code-block:: python
-          import numpy as np
-          import paddle
-          import paddle.nn as nn
-          x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32')
-          paddle.disable_static()
-          x_var = paddle.to_tensor(x)
-          conv = nn.Conv2d(4, 6, (3, 3))
-          y_var = conv(x_var)
-          y_np = y_var.numpy()
-          print(y_np.shape)
-          # (2, 6, 6, 6)
-    """
    def __init__(self,
                 in_channels,
                 out_channels,
@@ -419,13 +228,13 @@ class Conv2d(_ConvNd):
                 padding_mode='zeros',
                 weight_attr=None,
                 bias_attr=None,
-                 data_format="NCHW"):
+                 data_format="NCL"):
-        super(Conv2d, self).__init__(
+        super(Conv1d, self).__init__(
            in_channels,
            out_channels,
            kernel_size,
            False,
-            2,
+            1,
            stride=stride,
            padding=padding,
            padding_mode=padding_mode,
@@ -436,25 +245,20 @@ class Conv2d(_ConvNd):
            data_format=data_format)
    def forward(self, x):
-        if self._padding_mode != 'zeros':
+        padding = 0
+        if self._padding_mode != "zeros":
            x = F.pad(x,
-                      self._reversed_padding_repeated_twice,
+                      self._padding,
                      mode=self._padding_mode,
                      data_format=self._data_format)
-            return F.conv2d(
+        else:
-                x,
+            padding = self._padding
-                self.weight,
-                bias=self.bias,
-                stride=self._stride,
-                dilation=self._dilation,
-                groups=self._groups,
-                data_format=self._data_format)
-        out = F.conv2d(
+        out = F.conv1d(
            x,
            self.weight,
            bias=self.bias,
-            padding=self._padding,
+            padding=padding,
            stride=self._stride,
            dilation=self._dilation,
            groups=self._groups,
@@ -462,7 +266,7 @@ class Conv2d(_ConvNd):
        return out
-class ConvTranspose1d(layers.Layer):
+class ConvTranspose1d(_ConvNd):
    """
    This interface is used to construct a callable object of the ``ConvTranspose1d`` class.
    For more details, refer to code examples.
@@ -603,34 +407,24 @@ class ConvTranspose1d(layers.Layer):
                 padding=0,
                 output_padding=0,
                 groups=1,
-                 bias=True,
                 dilation=1,
                 weight_attr=None,
                 bias_attr=None,
                 data_format="NCL"):
-        super(ConvTranspose1d, self).__init__()
+        super(ConvTranspose1d, self).__init__(
-        assert weight_attr is not False, "param_attr should not be False in ConvTranspose1d."
+            in_channels,
-        self._param_attr = weight_attr
+            out_channels,
-        self._bias_attr = bias_attr
+            kernel_size,
-        self._groups = groups
+            True,
-        self._in_channels = in_channels
+            1,
-        self._out_channels = out_channels
+            stride=stride,
-        self._output_padding = output_padding
+            padding=padding,
-        self._data_format = data_format
+            dilation=dilation,
-        self._bias = bias
+            output_padding=output_padding,
+            groups=groups,
-        self._stride = utils.convert_to_list(stride, 1, 'stride')
+            weight_attr=weight_attr,
-        self._dilation = utils.convert_to_list(dilation, 1, 'dilation')
+            bias_attr=bias_attr,
-        self._kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size')
+            data_format=data_format)
-        self._padding = padding
-        filter_shape = [self._in_channels, out_channels // groups
-                        ] + self._kernel_size
-        self.weight = self.create_parameter(
-            shape=filter_shape, attr=self._param_attr)
-        self.bias = self.create_parameter(
-            attr=self._bias_attr, shape=[self._out_channels],
-            is_bias=True) if self._bias else None
    def forward(self, x, output_size=None):
        out = F.conv_transpose1d(
@@ -638,7 +432,169 @@ class ConvTranspose1d(layers.Layer):
            self.weight,
            bias=self.bias,
            output_size=output_size,
-            output_padding=self._output_padding,
+            output_padding=self.output_padding,
+            padding=self._padding,
+            stride=self._stride,
+            dilation=self._dilation,
+            groups=self._groups,
+            data_format=self._data_format)
+        return out
+class Conv2d(_ConvNd):
+    """
+    This interface is used to construct a callable object of the ``Conv2d`` class.
+    For more details, refer to code examples.
+    The convolution2D layer calculates the output based on the input, filter
+    and strides, paddings, dilations, groups parameters. Input and
+    Output are in NCHW format, where N is batch size, C is the number of
+    the feature map, H is the height of the feature map, and W is the width of the feature map.
+    Filter's shape is [MCHW] , where M is the number of output feature map,
+    C is the number of input feature map, H is the height of the filter,
+    and W is the width of the filter. If the groups is greater than 1,
+    C will equal the number of input feature map divided by the groups.
+    Please refer to UFLDL's `convolution
+    <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
+    for more details.
+    If bias attribution and activation type are provided, bias is added to the
+    output of the convolution, and the corresponding activation function is
+    applied to the final result.
+    For each input :math:`X`, the equation is:
+    ..  math::
+        Out = \sigma (W \\ast X + b)
+    Where:
+    * :math:`X`: Input value, a ``Tensor`` with NCHW format.
+    * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
+    * :math:`\\ast`: Convolution operation.
+    * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
+    * :math:`\\sigma`: Activation function.
+    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+    Parameters:
+        in_channels(int): The number of input channels in the input image.
+        out_channels(int): The number of output channels produced by the convolution.
+        kernel_size(int|list|tuple, optional): The size of the convolving kernel.
+        stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
+            contain three integers, (stride_H, stride_W). Otherwise, the
+            stride_H = stride_W = stride. The default value is 1.
+        padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
+            1. a string in ['valid', 'same'].
+            2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` 
+            3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
+            4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form  [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
+            5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
+            The default value is 0.
+        dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
+            contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
+            dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
+        groups(int, optional): The groups number of the Conv3d Layer. According to grouped
+            convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+            the first half of the filters is only connected to the first half
+            of the input channels, while the second half of the filters is only
+            connected to the second half of the input channels. The default value is 1.
+        padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
+        weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
+            of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
+            will create ParamAttr as param_attr. If it is set to None, the parameter
+            is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
+            :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
+        bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d.
+            If it is set to False, no bias will be added to the output units.
+            If it is set to None or one attribute of ParamAttr, conv2d
+            will create ParamAttr as bias_attr. If the Initializer of the bias_attr
+            is not set, the bias is initialized zero. The default value is None.
+        data_format(str, optional): Data format that specifies the layout of input.
+            It can be "NCHW" or "NHWC". Default: "NCHW".
+    Attribute:
+        **weight** (Parameter): the learnable weights of filter of this layer.
+        **bias** (Parameter or None): the learnable bias of this layer.
+    Shape:
+        - x: :math:`(N, C_{in}, H_{in}, W_{in})`
+        - output: :math:`(N, C_{out}, H_{out}, W_{out})`
+        Where
+        ..  math::
+           H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
+           W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
+    Examples:
+        .. code-block:: python
+          import numpy as np
+          import paddle
+          import paddle.nn as nn
+          x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32')
+          paddle.disable_static()
+          x_var = paddle.to_tensor(x)
+          conv = nn.Conv2d(4, 6, (3, 3))
+          y_var = conv(x_var)
+          y_np = y_var.numpy()
+          print(y_np.shape)
+          # (2, 6, 6, 6)
+    """
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride=1,
+                 padding=0,
+                 dilation=1,
+                 groups=1,
+                 padding_mode='zeros',
+                 weight_attr=None,
+                 bias_attr=None,
+                 data_format="NCHW"):
+        super(Conv2d, self).__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            False,
+            2,
+            stride=stride,
+            padding=padding,
+            padding_mode=padding_mode,
+            dilation=dilation,
+            groups=groups,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            data_format=data_format)
+    def forward(self, x):
+        if self._padding_mode != 'zeros':
+            x = F.pad(x,
+                      self._reversed_padding_repeated_twice,
+                      mode=self._padding_mode,
+                      data_format=self._data_format)
+            return F.conv2d(
+                x,
+                self.weight,
+                bias=self.bias,
+                stride=self._stride,
+                dilation=self._dilation,
+                groups=self._groups,
+                data_format=self._data_format)
+        out = F.conv2d(
+            x,
+            self.weight,
+            bias=self.bias,
            padding=self._padding,
            stride=self._stride,
            dilation=self._dilation,
@@ -920,8 +876,8 @@ class Conv3d(_ConvNd):
                 in_channels,
                 out_channels,
                 kernel_size,
-                 padding=0,
                 stride=1,
+                 padding=0,
                 dilation=1,
                 groups=1,
                 padding_mode='zeros',