diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 0f542bfab1f5a3daa6f83495e470d57b42431ae4..d2ac562dfd0bb16ed58a3eb4dbf98bbf6f69409f 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -69,7 +69,6 @@ __all__ = [ 'crf_decoding', 'cos_sim', 'conv2d', - 'conv3d', 'softmax', 'pool2d', 'pool3d', @@ -1683,314 +1682,6 @@ def conv2d( return helper.append_activation(pre_act) -def conv3d( - input, - num_filters, - filter_size, - stride=1, - padding=0, - dilation=1, - groups=None, - param_attr=None, - bias_attr=None, - use_cudnn=True, - act=None, - name=None, - data_format="NCDHW", -): - r""" - :api_attr: Static Graph - - The convolution3D layer calculates the output based on the input, filter - and strides, paddings, dilations, groups parameters. Input(Input) and - Output(Output) are in NCDHW or NDHWC format. Where N is batch size C is the number of - channels, D is the depth of the feature, H is the height of the feature, - and W is the width of the feature. Convlution3D is similar with Convlution2D - but adds one dimension(depth). If bias attribution and activation type are - provided, bias is added to the output of the convolution, and the - corresponding activation function is applied to the final result. - - For each input :math:`X`, the equation is: - - .. math:: - - Out = \sigma (W \\ast X + b) - - In the above equation: - - * :math:`X`: Input value, a tensor with NCDHW or NDHWC format. - * :math:`W`: Filter value, a tensor with MCDHW format. - * :math:`\\ast`: Convolution operation. - * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. - * :math:`\\sigma`: Activation function. - * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. - - Example: - - - Input: - - Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` - - Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)` - - - Output: - Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` - - Where - - .. math:: - - D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\ - H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\ - W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 - - Args: - input (Tensor): The input is 5-D Tensor with shape [N, C, D, H, W], the data - type of input is float16 or float32 or float64. - num_filters(int): The number of filter. It is as same as the output - image channel. - filter_size (int|tuple): The filter size. If filter_size is a tuple, - it must contain three integers, (filter_size_depth, filter_size_height, - filter_size_width). Otherwise, filter_size_depth = filter_size_height = \ - filter_size_width = filter_size. - stride (int|tuple): The stride size. It means the stride in convolution. If stride is a - tuple, it must contain three integers, (stride_depth, stride_height, stride_width). - Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. - padding (string|int|list|tuple): The padding size. It means the number of zero-paddings - on both sides for each dimension. If `padding` is a string, either 'VALID' or - 'SAME' which is the padding algorithm. If padding size is a tuple or list, - it could be in three forms: `[pad_depth, pad_height, pad_width]` or - `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form - `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form - `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. - Default: padding = 0. - dilation (int|tuple): The dilation size. It means the spacing between the kernel points. - If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, - dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. - Default: dilation = 1. - groups (int): The groups number of the Conv3d Layer. According to grouped - convolution in Alex Krizhevsky's Deep CNN paper: when group=2, - the first half of the filters is only connected to the first half - of the input channels, while the second half of the filters is only - connected to the second half of the input channels. Default: groups=1 - param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights - of conv3d. If it is set to None or one attribute of ParamAttr, conv3d - will create ParamAttr as param_attr. If it is set to None, the parameter - is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is - :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None. - bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv3d. - If it is set to False, no bias will be added to the output units. - If it is set to None or one attribute of ParamAttr, conv3d - will create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn - library is installed. Default: True - act (str): Activation type, if it is set to None, activation is not appended. - Default: None. - name(str|None): For detailed information, please refer - to :ref:`api_guide_Name`. Usually name is no need to set and - None by default. - data_format (str, optional): Specify the data format of the input, and the data format of the output - will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. - The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: - `[batch_size, input_channels, input_height, input_width]`. - - Returns: - A Variable holding Tensor representing the conv3d, whose data type is - the same with input. If act is None, the tensor variable storing the - convolution result, and if act is not None, the tensor variable storing - convolution and non-linearity activation result. - - Raises: - ValueError: If the type of `use_cudnn` is not bool. - ValueError: If `data_format` is not "NCDHW" or "NDHWC". - ValueError: If the channel dimmention of the input is less than or equal to zero. - ValueError: If `padding` is a string, but not "SAME" or "VALID". - ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 - or the element corresponding to the input's channel is not 0. - ShapeError: If the input is not 5-D Tensor. - ShapeError: If the input's dimension size and filter's dimension size not equal. - ShapeError: If the dimension size of input minus the size of `stride` is not 2. - ShapeError: If the number of input channels is not equal to filter's channels * groups. - ShapeError: If the number of output channels is not be divided by groups. - - Examples: - .. code-block:: python - - import paddle - import numpy as np - - paddle.enable_static() - data = paddle.static.data(name='data', shape=[None, 3, 12, 32, 32], dtype='float32') - param_attr = paddle.framework.ParamAttr(name='conv3d.weight', initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001) - res = paddle.static.nn.conv3d(input=data, num_filters=2, filter_size=3, act="relu", param_attr=param_attr) - place = paddle.CPUPlace() - exe = paddle.static.Executor(place) - exe.run(paddle.static.default_startup_program()) - x = np.random.rand(1, 3, 12, 32, 32).astype("float32") - output = exe.run(feed={"data": x}, fetch_list=[res]) - print(output) - """ - - l_type = 'conv3d' - assert param_attr is not False, "param_attr should not be False here." - helper = LayerHelper(l_type, **locals()) - dtype = helper.input_dtype() - - if not isinstance(use_cudnn, bool): - raise ValueError( - "Attr(use_cudnn) should be True or False. Received " - "Attr(use_cudnn): %s. " % str(use_cudnn) - ) - - if data_format not in ["NCDHW", "NDHWC"]: - raise ValueError( - "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " - "Attr(data_format): %s." % str(data_format) - ) - - channel_last = data_format == "NDHWC" - if len(input.shape) != 5: - raise ValueError( - "Input should be 5D tensor, but received input with the shape of {}".format( - input.shape - ) - ) - num_channels = input.shape[4] if channel_last else input.shape[1] - if num_channels < 0: - raise ValueError( - "The channel dimmention of the input(%s) should be defined. " - "Received: %s." % (str(input.shape), str(num_channels)) - ) - - if groups is None: - num_filter_channels = num_channels - elif groups <= 0: - raise ValueError( - "the groups of conv3d should be greater than 0. Received groups: {}".format( - groups - ) - ) - else: - if num_channels % groups != 0: - raise ValueError( - "The number of input channels must be divisible by Attr(groups). " - "Received: number of channels(%s), groups(%s)." - % (str(num_channels), str(groups)) - ) - num_filter_channels = num_channels // groups - - filter_size = utils.convert_to_list(filter_size, 3, 'filter_size') - stride = utils.convert_to_list(stride, 3, 'stride') - dilation = utils.convert_to_list(dilation, 3, 'dilation') - - def _update_padding(padding, data_format): - def is_list_or_tuple(ele): - if isinstance(ele, list) or isinstance(ele, tuple): - return True - return False - - if is_list_or_tuple(padding) and len(padding) == 5: - if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"): - if not (padding[0] == [0, 0] and padding[1] == [0, 0]): - raise ValueError( - "Non-zero padding(%s) in the batch or channel dimensions " - "is not supported." % str(padding) - ) - padding = padding[2:5] - padding = [ele for a_list in padding for ele in a_list] - elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"): - if not (padding[0] == [0, 0] and padding[4] == [0, 0]): - raise ValueError( - "Non-zero padding(%s) in the batch or channel dimensions " - "is not supported." % str(padding) - ) - padding = padding[1:4] - padding = [ele for a_list in padding for ele in a_list] - padding = utils.convert_to_list(padding, 6, 'padding') - if utils._is_symmetric_padding(padding, 3): - padding = [padding[0], padding[2], padding[4]] - elif is_list_or_tuple(padding) and len(padding) == 6: - padding = utils.convert_to_list(padding, 6, 'padding') - if utils._is_symmetric_padding(padding, 3): - padding = [padding[0], padding[2], padding[4]] - else: - padding = utils.convert_to_list(padding, 3, 'padding') - - return padding - - padding_algorithm = "EXPLICIT" - if isinstance(padding, str): - padding = padding.upper() - if padding not in ["SAME", "VALID"]: - raise ValueError( - "Unknown padding: '%s'. It can only be 'SAME' or 'VALID'." - % str(padding) - ) - if padding == "VALID": - padding_algorithm = "VALID" - padding = [0, 0, 0] - elif padding == "SAME": - padding_algorithm = "SAME" - padding = [0, 0, 0] - - padding = _update_padding(padding, data_format) - - input_shape = input.shape - filter_shape = [num_filters, num_filter_channels] + filter_size - - def _get_default_param_initializer(): - filter_elem_num = ( - filter_size[0] * filter_size[1] * filter_size[2] * num_channels - ) - if filter_elem_num <= 0: - raise ValueError( - "Invalid filter number, excepted number is larger than 0, but" - " received {}, please check the input shape and " - "filter size.".format(filter_elem_num) - ) - - std = (2.0 / filter_elem_num) ** 0.5 - return Normal(0.0, std, 0) - - filter_param = helper.create_parameter( - attr=helper.param_attr, - shape=filter_shape, - dtype=dtype, - default_initializer=_get_default_param_initializer(), - ) - - pre_bias = helper.create_variable_for_type_inference(dtype) - - helper.append_op( - type=l_type, - inputs={ - 'Input': input, - 'Filter': filter_param, - }, - outputs={"Output": pre_bias}, - attrs={ - 'strides': stride, - 'paddings': padding, - 'dilations': dilation, - 'groups': groups, - 'use_cudnn': use_cudnn, - 'use_mkldnn': False, - "padding_algorithm": padding_algorithm, - "data_format": data_format, - }, - ) - - if data_format == 'NCDHW': - pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) - else: - pre_act = helper.append_bias_op(pre_bias, dim_start=4, dim_end=5) - - return helper.append_activation(pre_act) - - @templatedoc() def pool2d( input, diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_op.py index 70e62f9eaf6c7643c7cb1b32b32639fd59875502..1142e6370345a3ed60ce4167588ac43b23be24ed 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_op.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv3d_op.py @@ -15,6 +15,7 @@ import unittest import numpy as np from inference_pass_test import InferencePassTest +import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid.core import PassVersionChecker @@ -29,7 +30,7 @@ class TensorRTSubgraphPassConv3dTest(InferencePassTest): data = fluid.data( name="data", shape=[-1, 3, 6, 32, 32], dtype="float32" ) - conv_out = fluid.layers.conv3d( + conv_out = paddle.static.nn.conv3d( input=data, num_filters=self.conv_num_filters, filter_size=self.conv_filter_size, @@ -113,7 +114,7 @@ class DynamicShapeTensorRTSubgraphPassConv3dTest(InferencePassTest): data = fluid.data( name="data", shape=[-1, 6, -1, -1, -1], dtype="float32" ) - conv_out = fluid.layers.conv3d( + conv_out = paddle.static.nn.conv3d( input=data, num_filters=self.conv_num_filters, filter_size=self.conv_filter_size, diff --git a/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py index 81af05b156c86aba47d0a497707e2d36edcde32d..65ec9a489f4a532b4d79755aed10e688b9eee51f 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_conv3d_op_npu.py @@ -364,7 +364,7 @@ class TestConv3DAPI(unittest.TestCase): dtype="float32", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NDHWC, num_filters=3, filter_size=[3, 3, 3], @@ -375,7 +375,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -386,7 +386,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -397,7 +397,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NDHWC, num_filters=3, filter_size=[3, 3, 3], @@ -408,7 +408,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NDHWC", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -419,7 +419,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -442,7 +442,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: cudnn def run_1(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -458,7 +458,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: data_format def run_2(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=[3, 3, 3], @@ -474,7 +474,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: padding def run_3(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -489,7 +489,7 @@ class TestConv3DAPI_Error(unittest.TestCase): self.assertRaises(ValueError, run_3) def run_4(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -504,7 +504,7 @@ class TestConv3DAPI_Error(unittest.TestCase): self.assertRaises(ValueError, run_4) def run_5(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=0, @@ -527,7 +527,7 @@ class TestConv3DAPI_Error(unittest.TestCase): ) def run_6(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=x, num_filters=3, filter_size=3, @@ -543,7 +543,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: groups def run_7(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -559,7 +559,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: filter num def run_8(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=0, filter_size=0, diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py index a8ab4d65961f106107dd97820b36246c27a82c1d..fd7dc6bb630291304dd6e5fd8ee3862d75d91fc5 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py @@ -101,7 +101,7 @@ class Conv3DTestCase(unittest.TestCase): bias_attr = False else: bias_attr = I.NumpyArrayInitializer(self.bias) - y_var = fluid.layers.conv3d( + y_var = paddle.static.nn.conv3d( x_var, self.num_filters, self.filter_size, diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_op.py b/python/paddle/fluid/tests/unittests/test_conv3d_op.py index 54a3621e0ba72e1611558b30339b9df279b1b0e3..d9cd69e4d550ce57f195df9da2a0410937fc56b8 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_op.py @@ -878,7 +878,7 @@ class TestConv3DAPI(unittest.TestCase): dtype="float32", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NDHWC, num_filters=3, filter_size=[3, 3, 3], @@ -889,7 +889,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -900,7 +900,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -911,7 +911,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NDHWC, num_filters=3, filter_size=[3, 3, 3], @@ -922,7 +922,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NDHWC", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -933,7 +933,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -956,7 +956,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: cudnn def run_1(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -972,7 +972,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: data_format def run_2(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=[3, 3, 3], @@ -988,7 +988,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: padding def run_3(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -1003,7 +1003,7 @@ class TestConv3DAPI_Error(unittest.TestCase): self.assertRaises(ValueError, run_3) def run_4(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -1018,7 +1018,7 @@ class TestConv3DAPI_Error(unittest.TestCase): self.assertRaises(ValueError, run_4) def run_5(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=0, @@ -1041,7 +1041,7 @@ class TestConv3DAPI_Error(unittest.TestCase): ) def run_6(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=x, num_filters=3, filter_size=3, @@ -1057,7 +1057,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: groups def run_7(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -1073,7 +1073,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: filter num def run_8(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=0, filter_size=0, diff --git a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py index b09a86f5bfa0b7abc0ab1c045d21d5285dbc10c1..19b9d6fdee97c47d794314ad8d903f3159f6c262 100644 --- a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py +++ b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py @@ -110,7 +110,7 @@ class TestConv3DDoubleGradCheck(unittest.TestCase): eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d(x, 2, 1, bias_attr=False) + y = paddle.static.nn.conv3d(x, 2, 1, bias_attr=False) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() @@ -137,7 +137,7 @@ class TestConv3DDoubleGradCheckTest1(unittest.TestCase): eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d(x, 2, 1, padding=1, bias_attr=False) + y = paddle.static.nn.conv3d(x, 2, 1, padding=1, bias_attr=False) x_arr = np.random.uniform(-1, 1, shape).astype(dtype) w = fluid.default_main_program().global_block().all_parameters() @@ -332,7 +332,7 @@ class TestConv3DDoubleGradCheck_AsyPadding(unittest.TestCase): eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( + y = paddle.static.nn.conv3d( input=x, num_filters=2, filter_size=1, @@ -365,7 +365,7 @@ class TestConv3DoubleGradCheck_PaddingSAME(unittest.TestCase): eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( + y = paddle.static.nn.conv3d( input=x, num_filters=2, filter_size=1, @@ -399,7 +399,7 @@ class TestConv3DoubleGradCheck_PaddingVALID(unittest.TestCase): eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( + y = paddle.static.nn.conv3d( input=x, num_filters=2, filter_size=1, @@ -432,7 +432,7 @@ class TestConv3DDoubleGradCheck_ChannelLast(unittest.TestCase): eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( + y = paddle.static.nn.conv3d( input=x, num_filters=2, filter_size=1, @@ -467,7 +467,7 @@ class TestConv3DDoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase): eps = 0.005 dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64 x = layers.data('x', shape, False, dtype) - y = layers.conv3d( + y = paddle.static.nn.conv3d( input=x, num_filters=2, filter_size=1, diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py index d2d7074a4b1a3343e5c44aaf0566c24f71757ef2..71123cb51e74b8e1d7fd9fe1a9e1d39d395940ad 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py @@ -87,7 +87,7 @@ class TestFunctionalConv3D(TestCase): (-1, self.in_channels, -1, -1, -1), dtype=self.dtype, ) - y = fluid.layers.conv3d( + y = paddle.static.nn.conv3d( x, self.out_channels, self.filter_shape, @@ -480,7 +480,7 @@ class TestFunctionalConv3DErrorCase11(TestCase): with fluid.unique_name.guard(): with fluid.program_guard(main, start): x = fluid.data("input", self.input.shape, dtype=paddle.float32) - y = fluid.layers.conv3d( + y = paddle.static.nn.conv3d( x, self.num_filters, self.filter_size, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py index c96fe97fac87fe954ba7f6094b210cad38eab940..7147e924a191a3d5b090ee6bf82381568e15afa3 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py @@ -51,10 +51,10 @@ class TestDygraphLoadStatic(unittest.TestCase): conv3d_in = fluid.data( name='conv3d_in', shape=[None, 3, 12, 32, 32], dtype='float32' ) - conv3d_out_1 = fluid.layers.conv3d( + conv3d_out_1 = paddle.static.nn.conv3d( input=conv3d_in, num_filters=2, filter_size=3, act="relu" ) - conv3d_out_2 = fluid.layers.conv3d( + conv3d_out_2 = paddle.static.nn.conv3d( input=conv3d_in, num_filters=2, filter_size=3, act="relu" ) diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 937c0272220016d3bcbf09df702d9a7fbc502233..8301a02a2e21dede401ba4b515880572f146d565 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -1688,7 +1688,9 @@ class TestLayer(LayerTest): images = layers.data( name='pixel', shape=[3, 6, 6, 6], dtype='float32' ) - ret = layers.conv3d(input=images, num_filters=3, filter_size=2) + ret = paddle.static.nn.conv3d( + input=images, num_filters=3, filter_size=2 + ) static_ret = self.get_static_graph_result( feed={'pixel': np.ones([2, 3, 6, 6, 6], dtype='float32')}, fetch_list=[ret], diff --git a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py index f949d7eeef87bc4aa0a005e381d204014b5d8e3b..3b6b1a4363f16d0f7821c403549abd7fc64371fa 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_conv3d_op_xpu.py @@ -525,7 +525,7 @@ class TestConv3DAPI(unittest.TestCase): dtype="float32", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NDHWC, num_filters=3, filter_size=[3, 3, 3], @@ -536,7 +536,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -547,7 +547,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -558,7 +558,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NDHWC, num_filters=3, filter_size=[3, 3, 3], @@ -569,7 +569,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NDHWC", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -580,7 +580,7 @@ class TestConv3DAPI(unittest.TestCase): data_format="NCDHW", ) - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input_NCDHW, num_filters=3, filter_size=[3, 3, 3], @@ -603,7 +603,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: cudnn def run_1(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -619,7 +619,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: data_format def run_2(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=[3, 3, 3], @@ -635,7 +635,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: padding def run_3(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -650,7 +650,7 @@ class TestConv3DAPI_Error(unittest.TestCase): self.assertRaises(ValueError, run_3) def run_4(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -665,7 +665,7 @@ class TestConv3DAPI_Error(unittest.TestCase): self.assertRaises(ValueError, run_4) def run_5(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=0, @@ -688,7 +688,7 @@ class TestConv3DAPI_Error(unittest.TestCase): ) def run_6(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=x, num_filters=3, filter_size=3, @@ -704,7 +704,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: groups def run_7(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=3, filter_size=3, @@ -720,7 +720,7 @@ class TestConv3DAPI_Error(unittest.TestCase): # ValueError: filter num def run_8(): - fluid.layers.conv3d( + paddle.static.nn.conv3d( input=input, num_filters=0, filter_size=0, diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py index 7f695a48219d45821b6648e4906e909da1f955e8..ef966ecd98ce4a3c8fccf0f7bf8b237faac73916 100755 --- a/python/paddle/static/nn/__init__.py +++ b/python/paddle/static/nn/__init__.py @@ -14,6 +14,7 @@ from .common import fc # noqa: F401 from .common import deform_conv2d # noqa: F401 +from .common import conv3d # noqa: F401 from .common import conv2d_transpose # noqa: F401 from .common import conv3d_transpose # noqa: F401 @@ -22,7 +23,6 @@ from ...fluid.layers import bilinear_tensor_product # noqa: F401 from ...fluid.layers import case # noqa: F401 from ...fluid.layers import cond # noqa: F401 from ...fluid.layers import conv2d # noqa: F401 -from ...fluid.layers import conv3d # noqa: F401 from ...fluid.layers import create_parameter # noqa: F401 from ...fluid.layers import crf_decoding # noqa: F401 from ...fluid.layers import data_norm # noqa: F401 diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index aee2009edd28f10736e42623b379aba2fb19086b..a7470f2fb2e03f9ee980b9d219fc36f8e396a0a2 100755 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -13,6 +13,7 @@ # limitations under the License. import paddle +from paddle.fluid.initializer import Normal from paddle.fluid.framework import static_only, Variable, _non_static_mode from paddle.fluid.data_feeder import check_dtype @@ -176,6 +177,314 @@ def fc( ) +def conv3d( + input, + num_filters, + filter_size, + stride=1, + padding=0, + dilation=1, + groups=None, + param_attr=None, + bias_attr=None, + use_cudnn=True, + act=None, + name=None, + data_format="NCDHW", +): + r""" + :api_attr: Static Graph + + The convolution3D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input(Input) and + Output(Output) are in NCDHW or NDHWC format. Where N is batch size C is the number of + channels, D is the depth of the feature, H is the height of the feature, + and W is the width of the feature. Convlution3D is similar with Convlution2D + but adds one dimension(depth). If bias attribution and activation type are + provided, bias is added to the output of the convolution, and the + corresponding activation function is applied to the final result. + + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + In the above equation: + + * :math:`X`: Input value, a tensor with NCDHW or NDHWC format. + * :math:`W`: Filter value, a tensor with MCDHW format. + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Example: + + - Input: + + Input shape: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` + + Filter shape: :math:`(C_{out}, C_{in}, D_f, H_f, W_f)` + + - Output: + Output shape: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\ + H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\ + W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 + + Args: + input (Tensor): The input is 5-D Tensor with shape [N, C, D, H, W], the data + type of input is float16 or float32 or float64. + num_filters(int): The number of filter. It is as same as the output + image channel. + filter_size (int|tuple): The filter size. If filter_size is a tuple, + it must contain three integers, (filter_size_depth, filter_size_height, + filter_size_width). Otherwise, filter_size_depth = filter_size_height = \ + filter_size_width = filter_size. + stride (int|tuple): The stride size. It means the stride in convolution. If stride is a + tuple, it must contain three integers, (stride_depth, stride_height, stride_width). + Otherwise, stride_depth = stride_height = stride_width = stride. Default: stride = 1. + padding (string|int|list|tuple): The padding size. It means the number of zero-paddings + on both sides for each dimension. If `padding` is a string, either 'VALID' or + 'SAME' which is the padding algorithm. If padding size is a tuple or list, + it could be in three forms: `[pad_depth, pad_height, pad_width]` or + `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, + and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. + when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. + Default: padding = 0. + dilation (int|tuple): The dilation size. It means the spacing between the kernel points. + If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height, + dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation. + Default: dilation = 1. + groups (int): The groups number of the Conv3d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. Default: groups=1 + param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights + of conv3d. If it is set to None or one attribute of ParamAttr, conv3d + will create ParamAttr as param_attr. If it is set to None, the parameter + is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is + :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None. + bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv3d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv3d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. Default: None. + use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn + library is installed. Default: True + act (str): Activation type, if it is set to None, activation is not appended. + Default: None. + name(str|None): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + data_format (str, optional): Specify the data format of the input, and the data format of the output + will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. + The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: + `[batch_size, input_channels, input_height, input_width]`. + + Returns: + A Variable holding Tensor representing the conv3d, whose data type is + the same with input. If act is None, the tensor variable storing the + convolution result, and if act is not None, the tensor variable storing + convolution and non-linearity activation result. + + Raises: + ValueError: If the type of `use_cudnn` is not bool. + ValueError: If `data_format` is not "NCDHW" or "NDHWC". + ValueError: If the channel dimmention of the input is less than or equal to zero. + ValueError: If `padding` is a string, but not "SAME" or "VALID". + ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 + or the element corresponding to the input's channel is not 0. + ShapeError: If the input is not 5-D Tensor. + ShapeError: If the input's dimension size and filter's dimension size not equal. + ShapeError: If the dimension size of input minus the size of `stride` is not 2. + ShapeError: If the number of input channels is not equal to filter's channels * groups. + ShapeError: If the number of output channels is not be divided by groups. + + Examples: + .. code-block:: python + + import paddle + import numpy as np + + paddle.enable_static() + data = paddle.static.data(name='data', shape=[None, 3, 12, 32, 32], dtype='float32') + param_attr = paddle.framework.ParamAttr(name='conv3d.weight', initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001) + res = paddle.static.nn.conv3d(input=data, num_filters=2, filter_size=3, act="relu", param_attr=param_attr) + place = paddle.CPUPlace() + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + x = np.random.rand(1, 3, 12, 32, 32).astype("float32") + output = exe.run(feed={"data": x}, fetch_list=[res]) + print(output) + """ + + l_type = 'conv3d' + assert param_attr is not False, "param_attr should not be False here." + helper = LayerHelper(l_type, **locals()) + dtype = helper.input_dtype() + + if not isinstance(use_cudnn, bool): + raise ValueError( + "Attr(use_cudnn) should be True or False. Received " + "Attr(use_cudnn): %s. " % str(use_cudnn) + ) + + if data_format not in ["NCDHW", "NDHWC"]: + raise ValueError( + "Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received " + "Attr(data_format): %s." % str(data_format) + ) + + channel_last = data_format == "NDHWC" + if len(input.shape) != 5: + raise ValueError( + "Input should be 5D tensor, but received input with the shape of {}".format( + input.shape + ) + ) + num_channels = input.shape[4] if channel_last else input.shape[1] + if num_channels < 0: + raise ValueError( + "The channel dimmention of the input(%s) should be defined. " + "Received: %s." % (str(input.shape), str(num_channels)) + ) + + if groups is None: + num_filter_channels = num_channels + elif groups <= 0: + raise ValueError( + "the groups of conv3d should be greater than 0. Received groups: {}".format( + groups + ) + ) + else: + if num_channels % groups != 0: + raise ValueError( + "The number of input channels must be divisible by Attr(groups). " + "Received: number of channels(%s), groups(%s)." + % (str(num_channels), str(groups)) + ) + num_filter_channels = num_channels // groups + + filter_size = utils.convert_to_list(filter_size, 3, 'filter_size') + stride = utils.convert_to_list(stride, 3, 'stride') + dilation = utils.convert_to_list(dilation, 3, 'dilation') + + def _update_padding(padding, data_format): + def is_list_or_tuple(ele): + if isinstance(ele, list) or isinstance(ele, tuple): + return True + return False + + if is_list_or_tuple(padding) and len(padding) == 5: + if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"): + if not (padding[0] == [0, 0] and padding[1] == [0, 0]): + raise ValueError( + "Non-zero padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding) + ) + padding = padding[2:5] + padding = [ele for a_list in padding for ele in a_list] + elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"): + if not (padding[0] == [0, 0] and padding[4] == [0, 0]): + raise ValueError( + "Non-zero padding(%s) in the batch or channel dimensions " + "is not supported." % str(padding) + ) + padding = padding[1:4] + padding = [ele for a_list in padding for ele in a_list] + padding = utils.convert_to_list(padding, 6, 'padding') + if utils._is_symmetric_padding(padding, 3): + padding = [padding[0], padding[2], padding[4]] + elif is_list_or_tuple(padding) and len(padding) == 6: + padding = utils.convert_to_list(padding, 6, 'padding') + if utils._is_symmetric_padding(padding, 3): + padding = [padding[0], padding[2], padding[4]] + else: + padding = utils.convert_to_list(padding, 3, 'padding') + + return padding + + padding_algorithm = "EXPLICIT" + if isinstance(padding, str): + padding = padding.upper() + if padding not in ["SAME", "VALID"]: + raise ValueError( + "Unknown padding: '%s'. It can only be 'SAME' or 'VALID'." + % str(padding) + ) + if padding == "VALID": + padding_algorithm = "VALID" + padding = [0, 0, 0] + elif padding == "SAME": + padding_algorithm = "SAME" + padding = [0, 0, 0] + + padding = _update_padding(padding, data_format) + + input_shape = input.shape + filter_shape = [num_filters, num_filter_channels] + filter_size + + def _get_default_param_initializer(): + filter_elem_num = ( + filter_size[0] * filter_size[1] * filter_size[2] * num_channels + ) + if filter_elem_num <= 0: + raise ValueError( + "Invalid filter number, excepted number is larger than 0, but" + " received {}, please check the input shape and " + "filter size.".format(filter_elem_num) + ) + + std = (2.0 / filter_elem_num) ** 0.5 + return Normal(0.0, std, 0) + + filter_param = helper.create_parameter( + attr=helper.param_attr, + shape=filter_shape, + dtype=dtype, + default_initializer=_get_default_param_initializer(), + ) + + pre_bias = helper.create_variable_for_type_inference(dtype) + + helper.append_op( + type=l_type, + inputs={ + 'Input': input, + 'Filter': filter_param, + }, + outputs={"Output": pre_bias}, + attrs={ + 'strides': stride, + 'paddings': padding, + 'dilations': dilation, + 'groups': groups, + 'use_cudnn': use_cudnn, + 'use_mkldnn': False, + "padding_algorithm": padding_algorithm, + "data_format": data_format, + }, + ) + + if data_format == 'NCDHW': + pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) + else: + pre_act = helper.append_bias_op(pre_bias, dim_start=4, dim_end=5) + + return helper.append_activation(pre_act) + + def conv2d_transpose( input, num_filters,