From 96331f74d76d77bf92104c12f498c89cb3ec2819 Mon Sep 17 00:00:00 2001 From: LielinJiang <50691816+LielinJiang@users.noreply.github.com> Date: Thu, 3 Sep 2020 13:27:41 +0800 Subject: [PATCH] fix conv1d padding (#26921) --- .../tests/unittests/test_conv1d_layer.py | 13 +- .../unittests/test_conv1d_transpose_layer.py | 1 + python/paddle/nn/functional/conv.py | 62 +-- python/paddle/nn/layer/conv.py | 426 ++++++++---------- 4 files changed, 237 insertions(+), 265 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py index da527b26bf0..35fce9e9d6b 100644 --- a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py @@ -44,7 +44,7 @@ class Conv1dTestCase(unittest.TestCase): self.spartial_shape = spartial_shape self.filter_size = filter_size self.data_format = data_format - self.channel_last = (self.data_format == "NHWC") + self.channel_last = (self.data_format == "NLC") self.padding = padding self.padding_mode = padding_mode @@ -147,6 +147,14 @@ class Conv1dErrorTestCase(Conv1dTestCase): self.paddle_nn_layer() +class Conv1dTypeErrorTestCase(Conv1dTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(TypeError): + self.paddle_nn_layer() + + def add_cases(suite): suite.addTest(Conv1dTestCase(methodName='runTest')) suite.addTest(Conv1dTestCase(methodName='runTest', stride=[1], dilation=2)) @@ -161,6 +169,7 @@ def add_cases(suite): Conv1dTestCase( methodName='runTest', padding=2, data_format='NLC')) suite.addTest(Conv1dTestCase(methodName='runTest', padding=[1])) + suite.addTest(Conv1dTestCase(methodName='runTest', padding=[1, 2])) suite.addTest(Conv1dTestCase(methodName='runTest', padding=2)) suite.addTest(Conv1dTestCase(methodName='runTest')) suite.addTest( @@ -178,7 +187,7 @@ def add_cases(suite): def add_error_cases(suite): suite.addTest( - Conv1dErrorTestCase( + Conv1dTypeErrorTestCase( methodName='runTest', padding_mode="reflect", padding="valid")) suite.addTest( Conv1dErrorTestCase( diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py index 73227dd3610..4c98aacd209 100644 --- a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py @@ -201,6 +201,7 @@ def add_cases(suite): ConvTranspose1dTestCase( methodName='runTest', data_format="NLC", stride=3, output_padding=2)) + suite.addTest(ConvTranspose1dTestCase(methodName='runTest', padding=[1, 2])) def add_error_cases(suite): diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 42d7d98aefc..3c1482e69c3 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -232,7 +232,7 @@ def conv1d(x, raise ValueError("Attr(data_format) should be 'NCL' or 'NLC'. " "Received Attr(data_format): {}.".format(data_format)) - channel_last = (data_format == "NHWC") + channel_last = (data_format == "NLC") channel_dim = -1 if channel_last else 1 conv2d_data_format = "NHWC" if channel_last else "NCHW" num_channels = x.shape[channel_dim] @@ -399,7 +399,7 @@ def conv2d(x, `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NHWC"`, `pool_padding` can be in the form + when `data_format` is `"NHWC"`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. dilation (int|tuple): The dilation size. It means the spacing between the kernel @@ -733,20 +733,31 @@ def conv_transpose1d(x, stride = utils.convert_to_list(stride, 1, 'stride') + [1] dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1] - output_padding = utils.convert_to_list(output_padding, 1, - 'output_padding') + [0] - if output_padding[0] > stride[0]: - raise ValueError( - "The size of output_padding should not be greater than stride." - "But got output_padding={} and stride={}".format(output_padding[0], - stride[0])) if output_size is None: output_size = [] - elif isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 1, 'output_size') + [1] else: - raise ValueError("output_size should be int, or list, tuple of ints") + if output_padding != 0: + raise ValueError('output_padding option is mutually exclusive with ' + 'output_size') + if isinstance(output_size, (list, tuple, int)): + output_size = utils.convert_to_list(output_size, 1, + 'output_size') + [1] + else: + raise ValueError( + "output_size should be int, or list, tuple of ints") + + if output_padding == 0: + output_padding = [] + else: + output_padding = utils.convert_to_list(output_padding, 1, + 'output_padding') + [0] + + if len(output_padding) > 0 and output_padding[0] > stride[0]: + raise ValueError( + "The size of output_padding should not be greater than stride." + "But got output_padding={} and stride={}".format(output_padding[0], + stride[0])) op_type = 'conv2d_transpose' num_filters = weight.shape[1] @@ -761,16 +772,17 @@ def conv_transpose1d(x, weight = nn.unsqueeze(input=weight, axes=[-1]) if in_dygraph_mode(): - attrs = ('output_size', output_size, 'strides', stride, 'paddings', - padding, 'padding_algorithm', padding_algorithm, 'dilations', - dilation, 'groups', groups, 'use_cudnn', use_cudnn, - 'data_format', conv2d_data_format) + attrs = ('output_padding', output_padding, 'output_size', output_size, + 'strides', stride, 'paddings', padding, 'padding_algorithm', + padding_algorithm, 'dilations', dilation, 'groups', groups, + 'use_cudnn', use_cudnn, 'data_format', conv2d_data_format) out = getattr(core.ops, op_type)(x, weight, *attrs) if bias is not None: out = nn.elementwise_add(out, bias, axis=channel_dim) else: inputs = {'Input': [x], 'Filter': [weight]} attrs = { + 'output_padding': output_padding, 'output_size': output_size, 'strides': stride, 'paddings': padding, @@ -791,12 +803,6 @@ def conv_transpose1d(x, if bias is not None: out = nn.elementwise_add(out, bias, axis=channel_dim) - if output_size is None: - out = pad2d( - out, - padding=[0, output_padding, 0, 0], - data_format=conv2d_data_format, - name=name) out = nn.squeeze(input=out, axes=[squeeze_axis]) return out @@ -888,9 +894,9 @@ def conv_transpose2d(x, 'SAME' which is the padding algorithm. If padding size is a tuple or list, it could be in three forms: `[pad_height, pad_width]` or `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCHW"`, `pool_padding` can be in the form + and when `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NHWC"`, `pool_padding` can be in the form + when `data_format` is `"NHWC"`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. output_padding(int|list|tuple, optional): Additional size added to one side @@ -1116,9 +1122,9 @@ def conv3d(x, 'SAME' which is the padding algorithm. If padding size is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + and when `data_format` is `"NCDHW"`, `padding` can be in the form `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + when `data_format` is `"NDHWC"`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. dilation (int|tuple): The dilation size. It means the spacing between the kernel points. @@ -1340,9 +1346,9 @@ def conv_transpose3d(x, 'SAME' which is the padding algorithm. If padding size is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + and when `data_format` is `"NCDHW"`, `padding` can be in the form `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + when `data_format` is `"NDHWC"`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. output_padding(int|list|tuple, optional): Additional size added to one side diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 4e342c00528..f3985781adb 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -113,7 +113,7 @@ class _ConvNd(layers.Layer): attr=self._bias_attr, shape=[self._out_channels], is_bias=True) -class Conv1d(layers.Layer): +class Conv1d(_ConvNd): """ This interface is used to construct a callable object of the ``Conv1d`` class. For more details, refer to code examples. @@ -172,8 +172,7 @@ class Conv1d(layers.Layer): When in 'replicate' mode, uses input boundaries to pad the input tensor. When in 'circular' mode, uses circular input to pad the input tensor. Default is 'zeros'. - bias(bool, optional): Whether to use bias. Default: True. - param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) + weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) of conv1d. If it is set to None or one attribute of ParamAttr, conv1d will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with :math:`Normal(0.0, std)`, @@ -218,196 +217,6 @@ class Conv1d(layers.Layer): # [160. 211.]]] """ - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - padding_mode='zeros', - bias=True, - weight_attr=None, - bias_attr=None, - data_format="NCL", - name=None): - super(Conv1d, self).__init__() - assert weight_attr is not False, "param_attr should not be False here." - self._in_channels = in_channels - self._out_channels = out_channels - self._groups = groups - if in_channels % groups != 0: - raise ValueError("in_channels must be divisible by groups.") - self._kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size') - self._stride = utils.convert_to_list(stride, 1, 'stride') - self._dilation = utils.convert_to_list(dilation, 1, 'dilation') - self._padding = padding # leave it to F.conv1d - self._weight_attr = weight_attr - self._bias_attr = bias_attr - self._data_format = data_format - self._name = name - - self._padding_mode = padding_mode - - valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'} - if padding_mode not in valid_padding_modes: - raise ValueError( - "padding_mode must be one of {}, but got padding_mode='{}'". - format(valid_padding_modes, padding_mode)) - - if padding_mode in {'reflect', 'replicate', 'circular' - } and not isinstance(padding, np.int): - raise ValueError( - "when padding_mode in ['reflect', 'replicate', 'circular'], type of padding must be int" - ) - if not isinstance(padding, str): - self._padding = utils.convert_to_list(padding, 1, 'padding') * 2 - - num_filter_channels = in_channels // groups - filter_shape = [self._out_channels, num_filter_channels - ] + self._kernel_size - - self.weight = self.create_parameter( - attr=self._weight_attr, - shape=filter_shape, - default_initializer=_get_default_param_initializer( - self._in_channels, filter_shape)) - self.bias = self.create_parameter( - attr=self._bias_attr, shape=[self._out_channels], - is_bias=True) if bias else None - - def forward(self, x): - padding = 0 - if self._padding_mode != "zeros": - x = F.pad(x, - self._padding, - mode=self._padding_mode, - data_format=self._data_format) - else: - padding = self._padding - - out = F.conv1d( - x, - self.weight, - bias=self.bias, - padding=padding, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format, - name=self._name) - return out - - -class Conv2d(_ConvNd): - """ - This interface is used to construct a callable object of the ``Conv2d`` class. - For more details, refer to code examples. - The convolution2D layer calculates the output based on the input, filter - and strides, paddings, dilations, groups parameters. Input and - Output are in NCHW format, where N is batch size, C is the number of - the feature map, H is the height of the feature map, and W is the width of the feature map. - Filter's shape is [MCHW] , where M is the number of output feature map, - C is the number of input feature map, H is the height of the filter, - and W is the width of the filter. If the groups is greater than 1, - C will equal the number of input feature map divided by the groups. - Please refer to UFLDL's `convolution - `_ - for more details. - If bias attribution and activation type are provided, bias is added to the - output of the convolution, and the corresponding activation function is - applied to the final result. - For each input :math:`X`, the equation is: - - .. math:: - - Out = \sigma (W \\ast X + b) - - Where: - - * :math:`X`: Input value, a ``Tensor`` with NCHW format. - * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . - * :math:`\\ast`: Convolution operation. - * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. - * :math:`\\sigma`: Activation function. - * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. - - Parameters: - in_channels(int): The number of input channels in the input image. - out_channels(int): The number of output channels produced by the convolution. - kernel_size(int|list|tuple, optional): The size of the convolving kernel. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must - contain three integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. The default value is 1. - padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. - 1. a string in ['valid', 'same']. - 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` - 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. - 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. - 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). - The default value is 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must - contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the - dilation_D = dilation_H = dilation_W = dilation. The default value is 1. - groups(int, optional): The groups number of the Conv3d Layer. According to grouped - convolution in Alex Krizhevsky's Deep CNN paper: when group=2, - the first half of the filters is only connected to the first half - of the input channels, while the second half of the filters is only - connected to the second half of the input channels. The default value is 1. - padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``. - weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights - of conv2d. If it is set to None or one attribute of ParamAttr, conv2d - will create ParamAttr as param_attr. If it is set to None, the parameter - is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is - :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None. - bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d. - If it is set to False, no bias will be added to the output units. - If it is set to None or one attribute of ParamAttr, conv2d - will create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. The default value is None. - data_format(str, optional): Data format that specifies the layout of input. - It can be "NCHW" or "NHWC". Default: "NCHW". - - Attribute: - - **weight** (Parameter): the learnable weights of filter of this layer. - - **bias** (Parameter or None): the learnable bias of this layer. - - Shape: - - - x: :math:`(N, C_{in}, H_{in}, W_{in})` - - - output: :math:`(N, C_{out}, H_{out}, W_{out})` - - Where - - .. math:: - - H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1 - - W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1 - - Examples: - - .. code-block:: python - - import numpy as np - import paddle - import paddle.nn as nn - x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') - - paddle.disable_static() - x_var = paddle.to_tensor(x) - conv = nn.Conv2d(4, 6, (3, 3)) - y_var = conv(x_var) - y_np = y_var.numpy() - print(y_np.shape) - - # (2, 6, 6, 6) - """ - def __init__(self, in_channels, out_channels, @@ -419,13 +228,13 @@ class Conv2d(_ConvNd): padding_mode='zeros', weight_attr=None, bias_attr=None, - data_format="NCHW"): - super(Conv2d, self).__init__( + data_format="NCL"): + super(Conv1d, self).__init__( in_channels, out_channels, kernel_size, False, - 2, + 1, stride=stride, padding=padding, padding_mode=padding_mode, @@ -436,25 +245,20 @@ class Conv2d(_ConvNd): data_format=data_format) def forward(self, x): - if self._padding_mode != 'zeros': + padding = 0 + if self._padding_mode != "zeros": x = F.pad(x, - self._reversed_padding_repeated_twice, + self._padding, mode=self._padding_mode, data_format=self._data_format) - return F.conv2d( - x, - self.weight, - bias=self.bias, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format) + else: + padding = self._padding - out = F.conv2d( + out = F.conv1d( x, self.weight, bias=self.bias, - padding=self._padding, + padding=padding, stride=self._stride, dilation=self._dilation, groups=self._groups, @@ -462,7 +266,7 @@ class Conv2d(_ConvNd): return out -class ConvTranspose1d(layers.Layer): +class ConvTranspose1d(_ConvNd): """ This interface is used to construct a callable object of the ``ConvTranspose1d`` class. For more details, refer to code examples. @@ -603,34 +407,24 @@ class ConvTranspose1d(layers.Layer): padding=0, output_padding=0, groups=1, - bias=True, dilation=1, weight_attr=None, bias_attr=None, data_format="NCL"): - super(ConvTranspose1d, self).__init__() - assert weight_attr is not False, "param_attr should not be False in ConvTranspose1d." - self._param_attr = weight_attr - self._bias_attr = bias_attr - self._groups = groups - self._in_channels = in_channels - self._out_channels = out_channels - self._output_padding = output_padding - self._data_format = data_format - self._bias = bias - - self._stride = utils.convert_to_list(stride, 1, 'stride') - self._dilation = utils.convert_to_list(dilation, 1, 'dilation') - self._kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size') - self._padding = padding - - filter_shape = [self._in_channels, out_channels // groups - ] + self._kernel_size - self.weight = self.create_parameter( - shape=filter_shape, attr=self._param_attr) - self.bias = self.create_parameter( - attr=self._bias_attr, shape=[self._out_channels], - is_bias=True) if self._bias else None + super(ConvTranspose1d, self).__init__( + in_channels, + out_channels, + kernel_size, + True, + 1, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) def forward(self, x, output_size=None): out = F.conv_transpose1d( @@ -638,7 +432,169 @@ class ConvTranspose1d(layers.Layer): self.weight, bias=self.bias, output_size=output_size, - output_padding=self._output_padding, + output_padding=self.output_padding, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) + return out + + +class Conv2d(_ConvNd): + """ + This interface is used to construct a callable object of the ``Conv2d`` class. + For more details, refer to code examples. + The convolution2D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input and + Output are in NCHW format, where N is batch size, C is the number of + the feature map, H is the height of the feature map, and W is the width of the feature map. + Filter's shape is [MCHW] , where M is the number of output feature map, + C is the number of input feature map, H is the height of the filter, + and W is the width of the filter. If the groups is greater than 1, + C will equal the number of input feature map divided by the groups. + Please refer to UFLDL's `convolution + `_ + for more details. + If bias attribution and activation type are provided, bias is added to the + output of the convolution, and the corresponding activation function is + applied to the final result. + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a ``Tensor`` with NCHW format. + * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Parameters: + in_channels(int): The number of input channels in the input image. + out_channels(int): The number of output channels produced by the convolution. + kernel_size(int|list|tuple, optional): The size of the convolving kernel. + stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + contain three integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. The default value is 1. + padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. + 1. a string in ['valid', 'same']. + 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` + 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. + 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. + 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). + The default value is 0. + dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the + dilation_D = dilation_H = dilation_W = dilation. The default value is 1. + groups(int, optional): The groups number of the Conv3d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. The default value is 1. + padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``. + weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights + of conv2d. If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as param_attr. If it is set to None, the parameter + is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is + :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None. + bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. The default value is None. + data_format(str, optional): Data format that specifies the layout of input. + It can be "NCHW" or "NHWC". Default: "NCHW". + + Attribute: + + **weight** (Parameter): the learnable weights of filter of this layer. + + **bias** (Parameter or None): the learnable bias of this layer. + + Shape: + + - x: :math:`(N, C_{in}, H_{in}, W_{in})` + + - output: :math:`(N, C_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1 + + W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1 + + Examples: + + .. code-block:: python + + import numpy as np + import paddle + import paddle.nn as nn + x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') + + paddle.disable_static() + x_var = paddle.to_tensor(x) + conv = nn.Conv2d(4, 6, (3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) + + # (2, 6, 6, 6) + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode='zeros', + weight_attr=None, + bias_attr=None, + data_format="NCHW"): + super(Conv2d, self).__init__( + in_channels, + out_channels, + kernel_size, + False, + 2, + stride=stride, + padding=padding, + padding_mode=padding_mode, + dilation=dilation, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) + + def forward(self, x): + if self._padding_mode != 'zeros': + x = F.pad(x, + self._reversed_padding_repeated_twice, + mode=self._padding_mode, + data_format=self._data_format) + return F.conv2d( + x, + self.weight, + bias=self.bias, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) + + out = F.conv2d( + x, + self.weight, + bias=self.bias, padding=self._padding, stride=self._stride, dilation=self._dilation, @@ -920,8 +876,8 @@ class Conv3d(_ConvNd): in_channels, out_channels, kernel_size, - padding=0, stride=1, + padding=0, dilation=1, groups=1, padding_mode='zeros', -- GitLab