【fluid API clear】move conv2d (#49025)

* move conv2d * code-style and build bug * restore c++ * code-style * rm fluid init conv2d * remove nn.conv2d * modify doc

【fluid API clear】move conv2d (#49025)
* move conv2d * code-style and build bug * restore c++ * code-style * rm fluid init conv2d * remove nn.conv2d * modify doc
af599121 · xiaoguoguo626807 · GitHub · 0978bca4 · af599121 · af599121
52 changed file
--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -1974,7 +1974,7 @@ def fused_bn_add_act(
                with fluid.program_guard(main_program, startup_program):
                    x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
                    y = fluid.layers.data(name="y", shape=[1], dtype='int64')
-                    conv1_1 = fluid.layers.conv2d(
+                    conv1_1 = paddle.static.nn.conv2d(
                        input=x,
                        filter_size=3,
                        num_filters=32,
@@ -1983,7 +1983,7 @@ def fused_bn_add_act(
                        act=None,
                        bias_attr=False,
                        data_format='NHWC')
-                    conv1_2 = fluid.layers.conv2d(
+                    conv1_2 = paddle.static.nn.conv2d(
                        input=x,
                        filter_size=3,
                        num_filters=32,

--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
@@ -50,7 +50,7 @@ def residual_block(num, quant_skip_pattern=None):
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
-        tmp = fluid.layers.conv2d(
+        tmp = paddle.static.nn.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,
@@ -709,7 +709,7 @@ def quant_dequant_residual_block(num, quant_skip_pattern=None):
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
-        tmp = fluid.layers.conv2d(
+        tmp = paddle.static.nn.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,

--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -32,7 +32,7 @@ def resnet_cifar10(input, depth=32):
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
-        tmp = fluid.layers.conv2d(
+        tmp = paddle.static.nn.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,

--- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
+++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
@@ -50,7 +50,7 @@ def resnet_cifar10(input, depth=32):
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
-        tmp = fluid.layers.conv2d(
+        tmp = paddle.static.nn.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,

--- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
@@ -41,7 +41,7 @@ def residual_block(num):
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
-        tmp = fluid.layers.conv2d(
+        tmp = paddle.static.nn.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -65,7 +65,6 @@ from collections.abc import Iterable
 __all__ = [
    'fc',
    'embedding',
-    'conv2d',
    'row_conv',
    'layer_norm',
    'spectral_norm',
@@ -745,339 +744,6 @@ def _pull_box_sparse(
    return outs


-def conv2d(
-    input,
-    num_filters,
-    filter_size,
-    stride=1,
-    padding=0,
-    dilation=1,
-    groups=None,
-    param_attr=None,
-    bias_attr=None,
-    use_cudnn=True,
-    act=None,
-    name=None,
-    data_format="NCHW",
-):
-    r"""
-    :api_attr: Static Graph
-
-    The convolution2D layer calculates the output based on the input, filter
-    and strides, paddings, dilations, groups parameters. Input and
-    Output are in NCHW or NHWC format, where N is batch size, C is the number of
-    channels, H is the height of the feature, and W is the width of the feature.
-    Filter is in MCHW format, where M is the number of output image channels,
-    C is the number of input image channels, H is the height of the filter,
-    and W is the width of the filter. If the groups is greater than 1,
-    C will equal the number of input image channels divided by the groups.
-    Please refer to UFLDL's `convolution
-    <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
-    for more details.
-    If bias attribution and activation type are provided, bias is added to the
-    output of the convolution, and the corresponding activation function is
-    applied to the final result.
-
-    For each input :math:`X`, the equation is:
-
-    .. math::
-
-        Out = \sigma (W \\ast X + b)
-
-    Where:
-
-    * :math:`X`: Input value, a tensor with NCHW or NHWC format.
-    * :math:`W`: Filter value, a tensor with MCHW format.
-    * :math:`\\ast`: Convolution operation.
-    * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
-    * :math:`\\sigma`: Activation function.
-    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
-
-    Example:
-
-        - Input:
-
-          Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
-
-          Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
-
-        - Output:
-
-          Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
-
-        Where
-
-        .. math::
-
-            H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
-            W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
-
-    Args:
-        input (Tensor): The input is 4-D Tensor with shape [N, C, H, W], the data type
-            of input is float16 or float32 or float64.
-        num_filters(int): The number of filter. It is as same as the output
-            image channel.
-        filter_size (int|tuple): The filter size. If filter_size
-            is a tuple, it must contain two integers, (filter_size_height,
-            filter_size_width). Otherwise, filter_size_height = filter_size_width =\
-            filter_size.
-        stride (int|tuple): The stride size. It means the stride in convolution.
-            If stride is a tuple, it must contain two integers, (stride_height, stride_width).
-            Otherwise, stride_height = stride_width = stride. Default: stride = 1.
-        padding (string|int|list|tuple): The padding size. It means the number of zero-paddings
-            on both sides for each dimension.If `padding` is a string, either 'VALID' or
-            'SAME' which is the padding algorithm. If padding size is a tuple or list,
-            it could be in three forms: `[pad_height, pad_width]` or
-            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when
-            `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0],
-            [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
-            when `data_format` is `"NHWC"`, `pool_padding` can be in the form
-            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
-            Default: padding = 0.
-        dilation (int|tuple): The dilation size. It means the spacing between the kernel
-            points. If dilation is a tuple, it must contain two integers, (dilation_height,
-            dilation_width). Otherwise, dilation_height = dilation_width = dilation.
-            Default: dilation = 1.
-        groups (int): The groups number of the Conv2d Layer. According to grouped
-            convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
-            the first half of the filters is only connected to the first half
-            of the input channels, while the second half of the filters is only
-            connected to the second half of the input channels. Default: groups=1.
-        param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
-            of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
-            will create ParamAttr as param_attr. If the Initializer of the param_attr
-            is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
-            and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
-        bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d.
-            If it is set to False, no bias will be added to the output units.
-            If it is set to None or one attribute of ParamAttr, conv2d
-            will create ParamAttr as bias_attr. If the Initializer of the bias_attr
-            is not set, the bias is initialized zero. Default: None.
-        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
-            library is installed. Default: True
-        act (str): Activation type, if it is set to None, activation is not appended.
-            Default: None
-        name(str|None): For detailed information, please refer
-           to :ref:`api_guide_Name`. Usually name is no need to set and
-           None by default.
-        data_format (str, optional): Specify the data format of the input, and the data format of the output
-            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
-            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-            `[batch_size, input_channels, input_height, input_width]`.
-
-    Returns:
-        A Tensor representing the conv2d, whose data type is the
-        same with input. If act is None, the tensor storing the convolution
-        result, and if act is not None, the tensor storing convolution
-        and non-linearity activation result.
-
-    Raises:
-        ValueError: If the type of `use_cudnn` is not bool.
-        ValueError: If `data_format` is not "NCHW" or "NHWC".
-        ValueError: If the channel dimmention of the input is less than or equal to zero.
-        ValueError: If `padding` is a string, but not "SAME" or "VALID".
-        ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
-            or the element corresponding to the input's channel is not 0.
-        ShapeError: If the input is not 4-D Tensor.
-        ShapeError: If the input's dimension size and filter's dimension size not equal.
-        ShapeError: If the dimension size of input minus the size of `stride` is not 2.
-        ShapeError: If the number of input channels is not equal to filter's channels * groups.
-        ShapeError: If the number of output channels is not be divided by groups.
-
-    Examples:
-        .. code-block:: python
-
-          import paddle
-          paddle.enable_static()
-
-          data = paddle.static.data(name='data', shape=[None, 3, 32, 32], dtype='float32')
-          conv2d = paddle.static.nn.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
-          print(conv2d.shape) # [-1, 2, 30, 30]
-    """
-
-    check_variable_and_dtype(
-        input, 'input', ['float16', 'float32', 'float64'], 'conv2d'
-    )
-    if len(input.shape) != 4:
-        raise ValueError(
-            "Input size should be 4, "
-            "but received {}".format(len(input.shape))
-        )
-    num_channels = input.shape[1]
-    if not isinstance(use_cudnn, bool):
-        raise ValueError(
-            "Attr(use_cudnn) should be True or False. Received "
-            "Attr(use_cudnn): %s. " % str(use_cudnn)
-        )
-
-    if data_format not in ["NCHW", "NHWC"]:
-        raise ValueError(
-            "Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
-            "Attr(data_format): %s." % str(data_format)
-        )
-
-    channel_last = data_format == "NHWC"
-    num_channels = input.shape[3] if channel_last else input.shape[1]
-    if num_channels < 0:
-        raise ValueError(
-            "The channel dimmention of the input(%s) should be defined. "
-            "Received: %s." % (str(input.shape), str(num_channels))
-        )
-    assert param_attr is not False, "param_attr should not be False here."
-
-    if groups is None:
-        num_filter_channels = num_channels
-    elif groups <= 0:
-        raise ValueError(
-            "the groups of input must be greater than 0, "
-            "but received the groups of input is {}".format(groups)
-        )
-    else:
-        if num_channels % groups != 0:
-            raise ValueError(
-                "the channel of input must be divisible by groups,"
-                "received: the channel of input is {}, the shape of input is {}"
-                ", the groups is {}".format(num_channels, input.shape, groups)
-            )
-        num_filter_channels = num_channels // groups
-
-    l_type = 'conv2d'
-    if (
-        num_channels == groups
-        and num_filters % num_channels == 0
-        and not use_cudnn
-    ):
-        l_type = 'depthwise_conv2d'
-
-    if (
-        num_channels == groups
-        and num_filters % num_channels == 0
-        and core.is_compiled_with_rocm()
-    ):
-        l_type = 'depthwise_conv2d'
-
-    # NPU only supports depthwise_conv2d when  "input_channel = output_channel = groups"
-    if core.is_compiled_with_npu():
-        if num_channels == groups and num_channels == num_filters:
-            l_type = 'depthwise_conv2d'
-        else:
-            l_type = 'conv2d'
-
-    helper = LayerHelper(l_type, **locals())
-    dtype = helper.input_dtype()
-
-    filter_size = utils.convert_to_list(filter_size, 2, 'filter_size')
-    stride = utils.convert_to_list(stride, 2, 'stride')
-    dilation = utils.convert_to_list(dilation, 2, 'dilation')
-
-    # padding
-    def _update_padding(padding, data_format):
-        def is_list_or_tuple(ele):
-            if isinstance(ele, list) or isinstance(ele, tuple):
-                return True
-            return False
-
-        if is_list_or_tuple(padding) and len(padding) == 4:
-            if is_list_or_tuple(padding[0]) and (data_format == "NCHW"):
-                if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
-                    raise ValueError(
-                        "Non-zero padding(%s) in the batch or channel dimensions "
-                        "is not supported." % str(padding)
-                    )
-                padding = padding[2:4]
-                padding = [ele for a_list in padding for ele in a_list]
-            elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"):
-                if not (padding[0] == [0, 0] and padding[3] == [0, 0]):
-                    raise ValueError(
-                        "Non-zero padding(%s) in the batch or channel dimensions "
-                        "is not supported." % str(padding)
-                    )
-                padding = padding[1:3]
-                padding = [ele for a_list in padding for ele in a_list]
-            padding = utils.convert_to_list(padding, 4, 'padding')
-            if utils._is_symmetric_padding(padding, 2):
-                padding = [padding[0], padding[2]]
-
-        else:
-            padding = utils.convert_to_list(padding, 2, 'padding')
-
-        return padding
-
-    padding_algorithm = "EXPLICIT"
-    if isinstance(padding, str):
-        padding = padding.upper()
-        if padding not in ["SAME", "VALID"]:
-            raise ValueError(
-                "Unknown padding: '%s'. It can only be 'SAME' or 'VALID'."
-                % str(padding)
-            )
-        if padding == "VALID":
-            padding_algorithm = "VALID"
-            padding = [0, 0]
-        elif padding == "SAME":
-            padding_algorithm = "SAME"
-            padding = [0, 0]
-
-    padding = _update_padding(padding, data_format)
-
-    filter_shape = [num_filters, int(num_filter_channels)] + filter_size
-
-    def _get_default_param_initializer():
-        filter_elem_num = filter_size[0] * filter_size[1] * num_channels
-        if filter_elem_num <= 0:
-            raise ValueError(
-                "Invalid filter number, excepted number is larger than 0, but"
-                " received {}, please check the input shape and "
-                "filter size.".format(filter_elem_num)
-            )
-        std = (2.0 / filter_elem_num) ** 0.5
-        return Normal(0.0, std, 0)
-
-    filter_param = helper.create_parameter(
-        attr=helper.param_attr,
-        shape=filter_shape,
-        dtype=dtype,
-        default_initializer=_get_default_param_initializer(),
-    )
-
-    pre_bias = helper.create_variable_for_type_inference(dtype)
-
-    if (
-        core.is_compiled_with_cuda()
-        and paddle.fluid.get_flags("FLAGS_conv2d_disable_cudnn")[
-            "FLAGS_conv2d_disable_cudnn"
-        ]
-    ):
-        use_cudnn = False
-
-    helper.append_op(
-        type=l_type,
-        inputs={
-            'Input': input,
-            'Filter': filter_param,
-        },
-        outputs={"Output": pre_bias},
-        attrs={
-            'strides': stride,
-            'paddings': padding,
-            'dilations': dilation,
-            'groups': groups,
-            'use_cudnn': use_cudnn,
-            'use_mkldnn': False,
-            'fuse_relu_before_depthwise_conv': False,
-            "padding_algorithm": padding_algorithm,
-            "data_format": data_format,
-        },
-    )
-
-    if data_format == 'NCHW':
-        pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
-    else:
-        pre_act = helper.append_bias_op(pre_bias, dim_start=3, dim_end=4)
-
-    return helper.append_activation(pre_act)
-
-
 @templatedoc()
 def layer_norm(
    input,

--- a/python/paddle/fluid/nets.py
+++ b/python/paddle/fluid/nets.py
@@ -119,7 +119,7 @@ def simple_img_conv_pool(
                                                        pool_stride=2,
                                                        act="relu")
    """
-    conv_out = layers.conv2d(
+    conv_out = paddle.static.nn.conv2d(
        input=input,
        num_filters=num_filters,
        filter_size=filter_size,
@@ -246,7 +246,7 @@ def img_conv_group(
        if conv_with_batchnorm[i]:
            local_conv_act = None

-        tmp = layers.conv2d(
+        tmp = paddle.static.nn.conv2d(
            input=tmp,
            num_filters=conv_num_filter[i],
            filter_size=conv_filter_size[i],

--- a/python/paddle/fluid/profiler.py
+++ b/python/paddle/fluid/profiler.py
@@ -88,7 +88,7 @@ def npu_profiler(output_file, config=None):
            epoc = 8
            dshape = [4, 3, 28, 28]
            data = fluid.data(name='data', shape=[None, 3, 28, 28], dtype='float32')
-            conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
+            conv = paddle.static.nn.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])

            place = fluid.NPUPlace(0)
            exe = fluid.Executor(place)
@@ -338,7 +338,7 @@ def profiler(
            epoc = 8
            dshape = [4, 3, 28, 28]
            data = fluid.data(name='data', shape=[None, 3, 28, 28], dtype='float32')
-            conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
+            conv = paddle.static.nn.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])

            place = fluid.CPUPlace()
            exe = fluid.Executor(place)

--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -31,7 +31,7 @@ def resnet_cifar10(input, depth=32):
    def conv_bn_layer(
        input, ch_out, filter_size, stride, padding, act='relu', bias_attr=False
    ):
-        tmp = fluid.layers.conv2d(
+        tmp = paddle.static.nn.conv2d(
            input=input,
            filter_size=filter_size,
            num_filters=ch_out,

--- a/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py
+++ b/python/paddle/fluid/tests/unittests/asp/asp_pruning_base.py
@@ -35,7 +35,7 @@ class TestASPHelperPruningBase(unittest.TestCase):
                name='img', shape=[None, 3, 32, 32], dtype='float32'
            )
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-            hidden = fluid.layers.conv2d(
+            hidden = paddle.static.nn.conv2d(
                input=img, num_filters=4, filter_size=3, padding=2, act="relu"
            )
            hidden = fluid.layers.fc(input=hidden, size=32, act='relu')

--- a/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_asp_customized_pruning.py
@@ -202,7 +202,7 @@ class TestASPStaticCustomerizedPruneFunc(unittest.TestCase):
                name='img', shape=[None, 3, 32, 32], dtype='float32'
            )
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-            hidden = fluid.layers.conv2d(
+            hidden = paddle.static.nn.conv2d(
                input=img, num_filters=4, filter_size=3, padding=2, act="relu"
            )
            hidden = fluid.layers.fc(

--- a/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_asp_optimize_static.py
@@ -35,7 +35,7 @@ class TestASPStaticOptimize(unittest.TestCase):
                name='img', shape=[None, 3, 24, 24], dtype='float32'
            )
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-            hidden = fluid.layers.conv2d(
+            hidden = paddle.static.nn.conv2d(
                input=img, num_filters=4, filter_size=3, padding=2, act="relu"
            )
            hidden = fluid.layers.fc(input=hidden, size=32, act='relu')

--- a/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_asp_pruning_static.py
@@ -35,7 +35,7 @@ class TestASPStaticPruningBase(unittest.TestCase):
                name='img', shape=[None, 3, 24, 24], dtype='float32'
            )
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-            hidden = fluid.layers.conv2d(
+            hidden = paddle.static.nn.conv2d(
                input=img, num_filters=2, filter_size=3, padding=2, act="relu"
            )
            hidden = fluid.layers.fc(input=hidden, size=32, act='softmax')

--- a/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py
+++ b/python/paddle/fluid/tests/unittests/asp/test_asp_save_load.py
@@ -136,7 +136,7 @@ class TestASPStaticOptimize(unittest.TestCase):
                name='img', shape=[None, 3, 32, 32], dtype='float32'
            )
            label = fluid.data(name='label', shape=[None, 1], dtype='int64')
-            hidden = fluid.layers.conv2d(
+            hidden = paddle.static.nn.conv2d(
                input=img, num_filters=4, filter_size=3, padding=2, act="relu"
            )
            hidden = fluid.layers.fc(input=hidden, size=32, act='relu')

--- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
@@ -164,7 +164,7 @@ class SE_ResNeXt:
    def conv_bn_layer(
        self, input, num_filters, filter_size, stride=1, groups=1, act=None
    ):
-        conv = fluid.layers.conv2d(
+        conv = paddle.static.nn.conv2d(
            input=input,
            num_filters=num_filters,
            filter_size=filter_size,

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm_v2.py
@@ -215,7 +215,7 @@ class PtbModel(paddle.nn.Layer):
        )
        loss = paddle.reshape(loss, shape=[-1, self.num_steps])
        loss = paddle.mean(loss, axis=[0])
-        loss = paddle.paddle.sum(loss)
+        loss = paddle.sum(loss)

        return loss, last_hidden, last_cell


--- a/python/paddle/fluid/tests/unittests/ipu/test_conv_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_conv_op_ipu.py
@@ -56,7 +56,7 @@ class TestBase(IPUOpTest):
        x = paddle.static.data(
            name=self.feed_list[0], shape=self.feed_shape[0], dtype='float32'
        )
-        x = paddle.fluid.layers.conv2d(x, **self.attrs)
+        x = paddle.static.nn.conv2d(x, **self.attrs)
        self.fetch_list = [x.name]

    def run_model(self, exec_mode):

--- a/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_print_op_ipu.py
@@ -54,7 +54,7 @@ class TestBase(IPUOpTest):
            shape=self.feed_shape[0],
            dtype=self.feed_dtype[0],
        )
-        out = paddle.fluid.layers.conv2d(x, num_filters=3, filter_size=3)
+        out = paddle.static.nn.conv2d(x, num_filters=3, filter_size=3)
        out = paddle.fluid.layers.Print(out, **self.attrs)

        if self.is_training:

--- a/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_reduce_x_op_ipu.py
@@ -134,7 +134,7 @@ class TestMin(TestMean):

 class TestSum(TestMean):
    def set_test_op(self):
-        self.op = paddle.paddle.sum
+        self.op = paddle.sum


 class TestLogsumexp(TestMean):

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_activation_fuse_pass.py
@@ -17,6 +17,7 @@ import unittest
 import numpy as np
 from inference_pass_test import InferencePassTest

+import paddle
 import paddle.fluid as fluid
 from paddle.fluid.core import PassVersionChecker

@@ -28,7 +29,7 @@ class ConvActivationMkldnnFusePassTest(InferencePassTest):
            data = fluid.data(
                name="data", shape=[-1, 3, 100, 100], dtype="float32"
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                data,
                num_filters=self.conv_num_filters,
                filter_size=self.conv_filter_size,

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py
@@ -33,7 +33,7 @@ class ConvBiasMkldnnFusePassSamePadTest(InferencePassTest):
                initializer=fluid.initializer.Xavier(uniform=False),
                learning_rate=0.001,
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data,
                num_filters=3,
                filter_size=3,
@@ -66,7 +66,7 @@ class ConvBiasMkldnnFusePassValidPadTest(ConvBiasMkldnnFusePassSamePadTest):
                initializer=fluid.initializer.Xavier(uniform=False),
                learning_rate=0.001,
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data,
                num_filters=3,
                filter_size=3,
@@ -92,7 +92,7 @@ class ConvBiasMkldnnFusePassExplictPadTest(ConvBiasMkldnnFusePassSamePadTest):
                initializer=fluid.initializer.Xavier(uniform=False),
                learning_rate=0.001,
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data,
                num_filters=3,
                filter_size=3,
@@ -117,7 +117,7 @@ class ConvBiasMkldnnFusePassGroupTest(ConvBiasMkldnnFusePassSamePadTest):
                initializer=fluid.initializer.Xavier(uniform=False),
                learning_rate=0.001,
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data,
                num_filters=3,
                filter_size=3,
@@ -148,7 +148,7 @@ class ConvBiasMkldnnFusePassDialtionsGroupsTest(
                initializer=fluid.initializer.Xavier(uniform=False),
                learning_rate=0.001,
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data,
                num_filters=3,
                filter_size=3,

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_pass.py
@@ -33,7 +33,7 @@ class TensorRTSubgraphPassConvTest(InferencePassTest):
            data = fluid.data(
                name="data", shape=[-1, 6, 64, 64], dtype="float32"
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data,
                num_filters=self.conv_num_filters,
                filter_size=self.conv_filter_size,
@@ -210,7 +210,7 @@ class DynamicShapeTensorRTSubgraphPassConvTest(InferencePassTest):
            data = fluid.data(
                name="data", shape=[-1, 6, -1, -1], dtype="float32"
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data,
                num_filters=self.conv_num_filters,
                filter_size=self.conv_filter_size,

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_conv_quant_dequant_pass.py
@@ -35,7 +35,7 @@ class QuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest):
            data_reshape = paddle.reshape(self.data, shape=[1, 4, 14, 14])
            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
            label_shape = paddle.reshape(self.label, shape=[1, 1, 1])
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data_reshape,
                num_filters=self.conv_num_filters,
                filter_size=self.conv_filter_size,
@@ -150,7 +150,7 @@ class DynamicShapeQuantDequantTensorRTSubgraphPassConvTest(QuantDequantTest):
            data_reshape = paddle.reshape(self.data, shape=[1, 4, 14, 14])
            self.label = fluid.data(name='label', shape=[1, 1], dtype='int64')
            label_shape = paddle.reshape(self.label, shape=[1, 1, 1])
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data_reshape,
                num_filters=self.conv_num_filters,
                filter_size=self.conv_filter_size,

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_dynamic_shape.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_dynamic_shape.py
@@ -17,6 +17,7 @@ import unittest
 import numpy as np
 from inference_pass_test import InferencePassTest

+import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.fluid.core import AnalysisConfig
@@ -28,7 +29,7 @@ class TRTDynamicShapeTest(InferencePassTest):
            data = fluid.data(
                name="data", shape=[-1, 3, 16, 16], dtype="float32"
            )
-            out = fluid.layers.conv2d(
+            out = paddle.static.nn.conv2d(
                input=data,
                num_filters=3,
                filter_size=3,

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tuned_dynamic_shape.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tuned_dynamic_shape.py
@@ -34,7 +34,7 @@ class TRTTunedDynamicShapeTest(unittest.TestCase):
            data = fluid.data(
                name="data", shape=[-1, 6, 64, 64], dtype="float32"
            )
-            conv_out = fluid.layers.conv2d(
+            conv_out = paddle.static.nn.conv2d(
                input=data,
                num_filters=3,
                filter_size=3,

--- a/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py
@@ -76,7 +76,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
                    dtype=self.dtype,
                    append_batch_size=False,
                )
-                conv = fluid.layers.conv2d(
+                conv = paddle.static.nn.conv2d(
                    input=data,
                    num_filters=32,
                    filter_size=1,

--- a/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py
@@ -80,7 +80,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
                    dtype=self.dtype,
                    append_batch_size=False,
                )
-                conv = fluid.layers.conv2d(
+                conv = paddle.static.nn.conv2d(
                    input=data,
                    num_filters=32,
                    filter_size=1,

--- a/python/paddle/fluid/tests/unittests/seresnext_net.py
+++ b/python/paddle/fluid/tests/unittests/seresnext_net.py
@@ -68,7 +68,7 @@ def squeeze_excitation(input, num_channels, reduction_ratio):
 def conv_bn_layer(
    input, num_filters, filter_size, stride=1, groups=1, act=None
 ):
-    conv = fluid.layers.conv2d(
+    conv = paddle.static.nn.conv2d(
        input=input,
        num_filters=num_filters,
        filter_size=filter_size,

--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -613,7 +613,7 @@ class TestAdamOpV2(unittest.TestCase):
        with fluid.program_guard(train_prog, startup):
            with fluid.unique_name.guard():
                data = fluid.data(name="data", shape=shape)
-                conv = fluid.layers.conv2d(data, 8, 3)
+                conv = paddle.static.nn.conv2d(data, 8, 3)
                loss = paddle.mean(conv)

                beta1 = paddle.static.create_global_var(

--- a/python/paddle/fluid/tests/unittests/test_adamax_api.py
+++ b/python/paddle/fluid/tests/unittests/test_adamax_api.py
@@ -52,7 +52,7 @@ class TestAdamaxAPI(unittest.TestCase):
        with fluid.program_guard(train_prog, startup):
            with fluid.unique_name.guard():
                data = fluid.data(name="data", shape=shape)
-                conv = fluid.layers.conv2d(data, 8, 3)
+                conv = paddle.static.nn.conv2d(data, 8, 3)
                loss = paddle.mean(conv)
                beta1 = 0.85
                beta2 = 0.95

--- a/python/paddle/fluid/tests/unittests/test_adamw_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py
@@ -209,7 +209,7 @@ class TestAdamWOp(unittest.TestCase):
        with fluid.program_guard(train_prog, startup):
            with fluid.unique_name.guard():
                data = fluid.data(name="data", shape=shape)
-                conv = fluid.layers.conv2d(data, 8, 3)
+                conv = paddle.static.nn.conv2d(data, 8, 3)
                loss = paddle.mean(conv)

                beta1 = paddle.static.create_global_var(

--- a/python/paddle/fluid/tests/unittests/test_backward.py
+++ b/python/paddle/fluid/tests/unittests/test_backward.py
@@ -285,7 +285,7 @@ class TestGradientsError(unittest.TestCase):
    def test_error(self):
        x = fluid.data(name='x', shape=[None, 2, 8, 8], dtype='float32')
        x.stop_gradient = False
-        conv = fluid.layers.conv2d(x, 4, 1, bias_attr=False)
+        conv = paddle.static.nn.conv2d(x, 4, 1, bias_attr=False)
        y = F.relu(conv)

        with self.assertRaises(TypeError):

--- a/python/paddle/fluid/tests/unittests/test_conv2d_api.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_api.py
@@ -40,7 +40,7 @@ class TestConv2DAPI(unittest.TestCase):
            dtype="float32",
        )

-        fluid.layers.conv2d(
+        paddle.static.nn.conv2d(
            input=input_NHWC,
            num_filters=3,
            filter_size=[3, 3],
@@ -51,7 +51,7 @@ class TestConv2DAPI(unittest.TestCase):
            data_format="NCHW",
        )

-        fluid.layers.conv2d(
+        paddle.static.nn.conv2d(
            input=input_NCHW,
            num_filters=3,
            filter_size=[3, 3],
@@ -62,7 +62,7 @@ class TestConv2DAPI(unittest.TestCase):
            data_format="NCHW",
        )

-        fluid.layers.conv2d(
+        paddle.static.nn.conv2d(
            input=input_NCHW,
            num_filters=3,
            filter_size=[3, 3],
@@ -73,7 +73,7 @@ class TestConv2DAPI(unittest.TestCase):
            data_format="NCHW",
        )

-        fluid.layers.conv2d(
+        paddle.static.nn.conv2d(
            input=input_NHWC,
            num_filters=3,
            filter_size=[3, 3],
@@ -84,7 +84,7 @@ class TestConv2DAPI(unittest.TestCase):
            data_format="NHWC",
        )

-        fluid.layers.conv2d(
+        paddle.static.nn.conv2d(
            input=input_NCHW,
            num_filters=3,
            filter_size=[3, 3],
@@ -95,7 +95,7 @@ class TestConv2DAPI(unittest.TestCase):
            data_format="NCHW",
        )

-        fluid.layers.conv2d(
+        paddle.static.nn.conv2d(
            input=input_NCHW,
            num_filters=3,
            filter_size=[3, 3],
@@ -129,7 +129,7 @@ class TestConv2DAPI_Error(unittest.TestCase):

        # ValueError: cudnn
        def run_1():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=3,
                filter_size=[3, 3],
@@ -145,7 +145,7 @@ class TestConv2DAPI_Error(unittest.TestCase):

        # ValueError: data_format
        def run_2():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=3,
                filter_size=[3, 3],
@@ -161,7 +161,7 @@ class TestConv2DAPI_Error(unittest.TestCase):

        # ValueError: padding
        def run_3():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=3,
                filter_size=[3, 3],
@@ -176,7 +176,7 @@ class TestConv2DAPI_Error(unittest.TestCase):
        self.assertRaises(ValueError, run_3)

        def run_4():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=3,
                filter_size=[3, 3],
@@ -191,7 +191,7 @@ class TestConv2DAPI_Error(unittest.TestCase):
        self.assertRaises(ValueError, run_4)

        def run_5():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=3,
                filter_size=[3, 3],
@@ -214,7 +214,7 @@ class TestConv2DAPI_Error(unittest.TestCase):
        )

        def run_6():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=x,
                num_filters=3,
                filter_size=[3, 3],
@@ -230,7 +230,7 @@ class TestConv2DAPI_Error(unittest.TestCase):

        # ValueError: groups
        def run_7():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=3,
                filter_size=[3, 3],
@@ -246,7 +246,7 @@ class TestConv2DAPI_Error(unittest.TestCase):

        # ValueError: filter num
        def run_8():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=0,
                filter_size=0,
@@ -262,7 +262,7 @@ class TestConv2DAPI_Error(unittest.TestCase):

        # ValueError: groups
        def run_9():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=0,
                filter_size=0,
@@ -278,7 +278,7 @@ class TestConv2DAPI_Error(unittest.TestCase):

        # ValueError: stride
        def run_10():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=1,
                filter_size=1,
@@ -302,7 +302,7 @@ class TestConv2DAPI_Error(unittest.TestCase):

        # ValueError: cudnn
        def run_1():
-            fluid.layers.conv2d(
+            paddle.static.nn.conv2d(
                input=input,
                num_filters=0,
                filter_size=0,
@@ -331,7 +331,7 @@ class TestConv2DEnviron(unittest.TestCase):
                name="inputs",
                dtype="float32",
            )
-            result = fluid.layers.conv2d(
+            result = paddle.static.nn.conv2d(
                input=inputs,
                num_filters=4,
                filter_size=[3, 3],

--- a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py
@@ -127,7 +127,7 @@ class Conv2DTestCase(unittest.TestCase):
                else:
                    padding = self.padding

-                y_var = fluid.layers.conv2d(
+                y_var = paddle.static.nn.conv2d(
                    x_var,
                    self.num_filters,
                    self.filter_size,

--- a/python/paddle/fluid/tests/unittests/test_conv2d_op.py
+++ b/python/paddle/fluid/tests/unittests/test_conv2d_op.py
@@ -706,7 +706,7 @@ class TestConv2DOpError(unittest.TestCase):
                x1 = fluid.create_lod_tensor(
                    np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()
                )
-                fluid.layers.conv2d(x1, 1, 1)
+                paddle.static.nn.conv2d(x1, 1, 1)

            self.assertRaises(TypeError, test_Variable)

@@ -716,7 +716,7 @@ class TestConv2DOpError(unittest.TestCase):
                x2 = fluid.layers.data(
                    name='x2', shape=[3, 4, 5, 6], dtype="int32"
                )
-                fluid.layers.conv2d(x2, 1, 1)
+                paddle.static.nn.conv2d(x2, 1, 1)

            self.assertRaises(TypeError, test_dtype)


--- a/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_conv_nn_grad.py
@@ -31,7 +31,7 @@ class TestConvDoubleGradCheck(unittest.TestCase):
        eps = 0.005
        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
        x = layers.data('x', shape, False, dtype)
-        y = layers.conv2d(x, 2, 1, groups=1, bias_attr=False)
+        y = paddle.static.nn.conv2d(x, 2, 1, groups=1, bias_attr=False)
        x_arr = np.random.uniform(-1, 1, shape).astype(dtype)

        w = fluid.default_main_program().global_block().all_parameters()
@@ -58,7 +58,7 @@ class TestConvDoubleGradCheckTest0(unittest.TestCase):
        eps = 0.005
        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
        x = layers.data('x', shape, False, dtype)
-        y = layers.conv2d(x, 2, 1, bias_attr=False)
+        y = paddle.static.nn.conv2d(x, 2, 1, bias_attr=False)
        x_arr = np.random.uniform(-1, 1, shape).astype(dtype)

        w = fluid.default_main_program().global_block().all_parameters()
@@ -84,7 +84,7 @@ class TestConvDoubleGradCheckTest1(unittest.TestCase):
        eps = 0.005
        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
        x = layers.data('x', shape, False, dtype)
-        y = layers.conv2d(x, 2, 1, padding=1, bias_attr=False)
+        y = paddle.static.nn.conv2d(x, 2, 1, padding=1, bias_attr=False)
        x_arr = np.random.uniform(-1, 1, shape).astype(dtype)

        w = fluid.default_main_program().global_block().all_parameters()
@@ -163,7 +163,7 @@ class TestConv2DoubleGradCheck_AsyPadding(unittest.TestCase):
        eps = 0.005
        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
        x = layers.data('x', shape, False, dtype)
-        y = layers.conv2d(
+        y = paddle.static.nn.conv2d(
            input=x,
            num_filters=2,
            filter_size=1,
@@ -196,7 +196,7 @@ class TestConv2DoubleGradCheck_PaddingSAME(unittest.TestCase):
        eps = 0.005
        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
        x = layers.data('x', shape, False, dtype)
-        y = layers.conv2d(
+        y = paddle.static.nn.conv2d(
            input=x,
            num_filters=2,
            filter_size=1,
@@ -229,7 +229,7 @@ class TestConv2DoubleGradCheck_PaddingVALID(unittest.TestCase):
        eps = 0.005
        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
        x = layers.data('x', shape, False, dtype)
-        y = layers.conv2d(
+        y = paddle.static.nn.conv2d(
            input=x,
            num_filters=2,
            filter_size=1,
@@ -262,7 +262,7 @@ class TestConv2DoubleGradCheck_ChannelLast(unittest.TestCase):
        eps = 0.005
        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
        x = layers.data('x', shape, False, dtype)
-        y = layers.conv2d(
+        y = paddle.static.nn.conv2d(
            input=x,
            num_filters=2,
            filter_size=1,
@@ -297,7 +297,7 @@ class TestConv2DoubleGradCheck_ChannelLast_AsyPadding(unittest.TestCase):
        eps = 0.005
        dtype = np.float32 if fluid.core.is_compiled_with_rocm() else np.float64
        x = layers.data('x', shape, False, dtype)
-        y = layers.conv2d(
+        y = paddle.static.nn.conv2d(
            input=x,
            num_filters=2,
            filter_size=1,
@@ -507,7 +507,7 @@ class TestDepthWiseConvDoubleGradCheck(unittest.TestCase):
        # use_cudnn == False
        # groups == filters
        # num_filters % num_channels == 0
-        y = layers.conv2d(
+        y = paddle.static.nn.conv2d(
            x, shape[1], 1, groups=shape[1], bias_attr=False, use_cudnn=False
        )
        x_arr = np.random.uniform(-1, 1, shape).astype(dtype)

--- a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py
@@ -89,7 +89,7 @@ class TestFunctionalConv2D(TestCase):
                        (-1, self.in_channels, -1, -1),
                        dtype=self.dtype,
                    )
-                y = fluid.layers.conv2d(
+                y = paddle.static.nn.conv2d(
                    x,
                    self.out_channels,
                    self.filter_shape,
@@ -507,7 +507,7 @@ class TestFunctionalConv2DErrorCase12(TestCase):
        with fluid.unique_name.guard():
            with fluid.program_guard(main, start):
                x = fluid.data("input", self.input.shape, dtype=paddle.float32)
-                y = fluid.layers.conv2d(
+                y = paddle.static.nn.conv2d(
                    x,
                    self.num_filters,
                    self.filter_size,

--- a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
+++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py
@@ -527,7 +527,7 @@ class TestFunctionalConv2DErrorCase10(TestCase):
        with fluid.unique_name.guard():
            with fluid.program_guard(main, start):
                x = fluid.data("input", self.input.shape, dtype=paddle.float32)
-                y = fluid.layers.conv2d(
+                y = paddle.static.nn.conv2d(
                    x,
                    self.num_filters,
                    self.filter_size,

--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
@@ -23,7 +23,7 @@ class TestFuseBatchNormActPass(unittest.TestCase):
        with fluid.program_guard(main_program, startup_program):
            x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
            y = fluid.layers.data(name="y", shape=[1], dtype='int64')
-            hidden1 = fluid.layers.conv2d(
+            hidden1 = paddle.static.nn.conv2d(
                input=x,
                filter_size=3,
                num_filters=16,

--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
@@ -66,7 +66,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
        with fluid.program_guard(main_program, startup_program):
            x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
            y = fluid.layers.data(name="y", shape=[1], dtype='int64')
-            conv1_1 = fluid.layers.conv2d(
+            conv1_1 = paddle.static.nn.conv2d(
                input=x,
                filter_size=3,
                num_filters=32,
@@ -77,7 +77,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
                bias_attr=False,
                data_format='NHWC',
            )
-            conv1_2 = fluid.layers.conv2d(
+            conv1_2 = paddle.static.nn.conv2d(
                input=x,
                filter_size=3,
                num_filters=32,
@@ -125,7 +125,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
        with fluid.program_guard(main_program, startup_program):
            x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
            y = fluid.layers.data(name="y", shape=[1], dtype='int64')
-            conv1_1 = fluid.layers.conv2d(
+            conv1_1 = paddle.static.nn.conv2d(
                input=x,
                filter_size=3,
                num_filters=32,
@@ -143,7 +143,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
                act=None,
                data_layout='NHWC',
            )
-            conv1_2 = fluid.layers.conv2d(
+            conv1_2 = paddle.static.nn.conv2d(
                input=conv1_1,
                filter_size=1,
                num_filters=32,

--- a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
@@ -29,7 +29,7 @@ def norm(*args, **kargs):

 def sep_conv(input, channel, stride, filter, dilation=1, act=None):
    # with scope('depthwise'):
-    input = fluid.layers.conv2d(
+    input = paddle.static.nn.conv2d(
        input,
        input.shape[1],
        filter,
@@ -44,7 +44,7 @@ def sep_conv(input, channel, stride, filter, dilation=1, act=None):
    if act:
        input = act(input)
    # with scope('pointwise'):
-    input = fluid.layers.conv2d(
+    input = paddle.static.nn.conv2d(
        input, channel, 1, 1, groups=1, padding=0, bias_attr=False
    )
    input = norm(input)

--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -34,10 +34,10 @@ class TestDygraphLoadStatic(unittest.TestCase):
        fc_out1 = fluid.layers.fc(a, 10)
        fc_out2 = fluid.layers.fc(a, 20)

-        conv_out_1 = fluid.layers.conv2d(
+        conv_out_1 = paddle.static.nn.conv2d(
            conv_in, num_filters=10, filter_size=5, act="relu"
        )
-        conv_out_2 = fluid.layers.conv2d(
+        conv_out_2 = paddle.static.nn.conv2d(
            conv_in, num_filters=10, filter_size=5, act="relu"
        )


--- a/python/paddle/fluid/tests/unittests/test_inference_api.py
+++ b/python/paddle/fluid/tests/unittests/test_inference_api.py
@@ -80,7 +80,7 @@ def get_sample_model():
    startup_program = fluid.Program()
    with fluid.program_guard(main_program, startup_program):
        data = fluid.data(name="data", shape=[-1, 6, 64, 64], dtype="float32")
-        conv_out = fluid.layers.conv2d(
+        conv_out = paddle.static.nn.conv2d(
            input=data,
            num_filters=3,
            filter_size=3,

--- a/python/paddle/fluid/tests/unittests/test_initializer.py
+++ b/python/paddle/fluid/tests/unittests/test_initializer.py
@@ -661,7 +661,7 @@ class TestSetGlobalInitializer(unittest.TestCase):
        with fluid.program_guard(main_prog, startup_prog):
            x = fluid.data(name="x", shape=[1, 3, 32, 32])
            # default initilizer of param in layers.conv2d is NormalInitializer
-            conv = fluid.layers.conv2d(x, 5, 3)
+            conv = paddle.static.nn.conv2d(x, 5, 3)

        block = startup_prog.global_block()
        self.assertEqual(len(block.ops), 2)
@@ -689,7 +689,7 @@ class TestSetGlobalInitializer(unittest.TestCase):
        with fluid.program_guard(main_prog, startup_prog):
            x = fluid.data(name="x", shape=[1, 3, 32, 32])
            # default initilizer of bias in layers.conv2d is ConstantInitializer
-            conv = fluid.layers.conv2d(x, 5, 3)
+            conv = paddle.static.nn.conv2d(x, 5, 3)

        block = startup_prog.global_block()
        self.assertEqual(len(block.ops), 2)

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2805,7 +2805,7 @@ class TestBook(LayerTest):
            images = layers.data(
                name='pixel', shape=[3, 48, 48], dtype='float32'
            )
-            return layers.conv2d(
+            return paddle.static.nn.conv2d(
                input=images, num_filters=3, filter_size=[4, 4]
            )


--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
@@ -25,10 +25,10 @@ from paddle.fluid import compiler


 def Lenet(data, class_dim):
-    conv1 = fluid.layers.conv2d(data, 4, 5, 1, act=None)
+    conv1 = paddle.static.nn.conv2d(data, 4, 5, 1, act=None)
    bn1 = paddle.static.nn.batch_norm(conv1, act='relu')
    pool1 = paddle.nn.functional.max_pool2d(bn1, 2, 2)
-    conv2 = fluid.layers.conv2d(pool1, 16, 5, 1, act=None)
+    conv2 = paddle.static.nn.conv2d(pool1, 16, 5, 1, act=None)
    bn2 = paddle.static.nn.batch_norm(conv2, act='relu')
    pool2 = paddle.nn.functional.max_pool2d(bn2, 2, 2)


--- a/python/paddle/fluid/tests/unittests/test_set_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py
@@ -1437,7 +1437,7 @@ class TestGradientTruncated(unittest.TestCase):
            # set_value_grad_op will not be run during backward.
            y, value = op(x)
            y2 = y + 1
-            loss = paddle.paddle.sum(y2)
+            loss = paddle.sum(y2)
            sgd = paddle.optimizer.Adam()
            sgd.minimize(loss)
            place = (

--- a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
@@ -73,7 +73,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
                    dtype=self.dtype,
                    append_batch_size=False,
                )
-                conv = fluid.layers.conv2d(
+                conv = paddle.static.nn.conv2d(
                    input=data,
                    num_filters=32,
                    filter_size=1,

--- a/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_adamw_op_xpu.py
@@ -196,7 +196,7 @@ class XPUTestAdamwOp2(XPUOpTestWrapper):
            with fluid.program_guard(train_prog, startup):
                with fluid.unique_name.guard():
                    data = fluid.data(name="data", shape=shape)
-                    conv = fluid.layers.conv2d(data, 8, 3)
+                    conv = paddle.static.nn.conv2d(data, 8, 3)
                    loss = paddle.mean(conv)

                    beta1 = paddle.static.create_global_var(

--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -74,11 +74,15 @@ from ..fluid.io import batch  # noqa: F401
 from ..fluid.contrib.layers import ctr_metric_bundle  # noqa: F401
 from ..fluid.layers import exponential_decay  # noqa: F401

+from .nn.common import batch_norm  # noqa: F401
+from .nn.common import conv2d  # noqa: F401
 from .nn.metric import auc  # noqa: F401
 from .nn.metric import accuracy  # noqa: F401

 __all__ = [  # noqa
    'append_backward',
+    'batch_norm',
+    'conv2d',
    'gradients',
    'Executor',
    'global_scope',

--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -19,6 +19,7 @@ from .common import data_norm  # noqa: F401
 from .common import continuous_value_model  # noqa: F401
 from .common import group_norm  # noqa: F401
 from .common import deform_conv2d  # noqa: F401
+from .common import conv2d  # noqa: F401
 from .common import conv3d  # noqa: F401
 from .common import conv2d_transpose  # noqa: F401
 from .common import conv3d_transpose  # noqa: F401
@@ -30,7 +31,6 @@ from .control_flow import (
 from .common import bilinear_tensor_product  # noqa: F401
 from .common import py_func  # noqa: F401
 from ...tensor.creation import create_parameter  # noqa: F401
-from ...fluid.layers import conv2d  # noqa: F401
 from ...fluid.layers import layer_norm  # noqa: F401
 from .loss import nce  # noqa: F401
 from .common import prelu  # noqa: F401

--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -685,6 +685,324 @@ def group_norm(
    return helper.append_activation(group_norm_out)


+def conv2d(
+    input,
+    num_filters,
+    filter_size,
+    stride=1,
+    padding=0,
+    dilation=1,
+    groups=None,
+    param_attr=None,
+    bias_attr=None,
+    use_cudnn=True,
+    act=None,
+    name=None,
+    data_format="NCHW",
+):
+    r"""
+    The convolution2D layer calculates the output based on the input, filter
+    and strides, paddings, dilations, groups parameters. Input and
+    Output are in NCHW or NHWC format, where N is batch size, C is the number of
+    channels, H is the height of the feature, and W is the width of the feature.
+    Filter is in MCHW format, where M is the number of output image channels,
+    C is the number of input image channels, H is the height of the filter,
+    and W is the width of the filter. If the groups is greater than 1,
+    C will equal the number of input image channels divided by the groups.
+    Please refer to UFLDL's `convolution
+    <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
+    for more details.
+    If bias attribution and activation type are provided, bias is added to the
+    output of the convolution, and the corresponding activation function is
+    applied to the final result.
+
+    For each input :math:`X`, the equation is:
+
+    .. math::
+
+        Out = \sigma (W \\ast X + b)
+
+    Where:
+
+    * :math:`X`: Input value, a tensor with NCHW or NHWC format.
+    * :math:`W`: Filter value, a tensor with MCHW format.
+    * :math:`\\ast`: Convolution operation.
+    * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
+    * :math:`\\sigma`: Activation function.
+    * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
+
+    Example:
+
+        - Input:
+
+          Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
+
+          Filter shape: :math:`(C_{out}, C_{in}, H_f, W_f)`
+
+        - Output:
+
+          Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
+
+        Where
+
+        .. math::
+
+            H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (H_f - 1) + 1))}{strides[0]} + 1 \\\\
+            W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
+
+    Args:
+        input (Tensor): The input is 4-D Tensor with shape [N, C, H, W], the data type
+            of input is float16 or float32 or float64.
+        num_filters(int): The number of filter. It is as same as the output
+            image channel.
+        filter_size (int|tuple): The filter size. If filter_size
+            is a tuple, it must contain two integers, (filter_size_height,
+            filter_size_width). Otherwise, filter_size_height = filter_size_width =\
+            filter_size.
+        stride (int|tuple, optional): The stride size. It means the stride in convolution.
+            If stride is a tuple, it must contain two integers, (stride_height, stride_width).
+            Otherwise, stride_height = stride_width = stride. Default: stride = 1.
+        padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings
+            on both sides for each dimension.If `padding` is a string, either 'VALID' or
+            'SAME' which is the padding algorithm. If padding size is a tuple or list,
+            it could be in three forms: `[pad_height, pad_width]` or
+            `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when
+            `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0],
+            [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
+            when `data_format` is `"NHWC"`, `pool_padding` can be in the form
+            `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
+            Default: padding = 0.
+        dilation (int|tuple, optional): The dilation size. It means the spacing between the kernel
+            points. If dilation is a tuple, it must contain two integers, (dilation_height,
+            dilation_width). Otherwise, dilation_height = dilation_width = dilation.
+            Default: dilation = 1.
+        groups (int, optional): The groups number of the Conv2d Layer. According to grouped
+            convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
+            the first half of the filters is only connected to the first half
+            of the input channels, while the second half of the filters is only
+            connected to the second half of the input channels. Default: groups=1.
+        param_attr (ParamAttr|None, optional): The parameter attribute for learnable parameters/weights
+            of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
+            will create ParamAttr as param_attr. If the Initializer of the param_attr
+            is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
+            and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
+        bias_attr (ParamAttr|bool|None, optional): The parameter attribute for the bias of conv2d.
+            If it is set to False, no bias will be added to the output units.
+            If it is set to None or one attribute of ParamAttr, conv2d
+            will create ParamAttr as bias_attr. If the Initializer of the bias_attr
+            is not set, the bias is initialized zero. Default: None.
+        use_cudnn (bool, optional): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: True
+        act (str, optional): Activation type, if it is set to None, activation is not appended.
+            Default: None
+        name(str|None, optional): For detailed information, please refer
+           to :ref:`api_guide_Name`. Usually name is no need to set and
+           None by default.
+        data_format (str, optional): Specify the data format of the input, and the data format of the output
+            will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+            The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+            `[batch_size, input_channels, input_height, input_width]`.
+
+    Returns:
+        A Tensor representing the conv2d, whose data type is the
+        same with input. If act is None, the tensor storing the convolution
+        result, and if act is not None, the tensor storing convolution
+        and non-linearity activation result.
+
+    Examples:
+        .. code-block:: python
+
+          import paddle
+          paddle.enable_static()
+
+          data = paddle.static.data(name='data', shape=[None, 3, 32, 32], dtype='float32')
+          conv2d = paddle.static.nn.conv2d(input=data, num_filters=2, filter_size=3, act="relu")
+          print(conv2d.shape) # [-1, 2, 30, 30]
+    """
+
+    check_variable_and_dtype(
+        input, 'input', ['float16', 'float32', 'float64'], 'conv2d'
+    )
+    if len(input.shape) != 4:
+        raise ValueError(
+            "Input size should be 4, "
+            "but received {}".format(len(input.shape))
+        )
+    num_channels = input.shape[1]
+    if not isinstance(use_cudnn, bool):
+        raise ValueError(
+            "Attr(use_cudnn) should be True or False. Received "
+            "Attr(use_cudnn): %s. " % str(use_cudnn)
+        )
+
+    if data_format not in ["NCHW", "NHWC"]:
+        raise ValueError(
+            "Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
+            "Attr(data_format): %s." % str(data_format)
+        )
+
+    channel_last = data_format == "NHWC"
+    num_channels = input.shape[3] if channel_last else input.shape[1]
+    if num_channels < 0:
+        raise ValueError(
+            "The channel dimmention of the input(%s) should be defined. "
+            "Received: %s." % (str(input.shape), str(num_channels))
+        )
+    assert param_attr is not False, "param_attr should not be False here."
+
+    if groups is None:
+        num_filter_channels = num_channels
+    elif groups <= 0:
+        raise ValueError(
+            "the groups of input must be greater than 0, "
+            "but received the groups of input is {}".format(groups)
+        )
+    else:
+        if num_channels % groups != 0:
+            raise ValueError(
+                "the channel of input must be divisible by groups,"
+                "received: the channel of input is {}, the shape of input is {}"
+                ", the groups is {}".format(num_channels, input.shape, groups)
+            )
+        num_filter_channels = num_channels // groups
+
+    l_type = 'conv2d'
+    if (
+        num_channels == groups
+        and num_filters % num_channels == 0
+        and not use_cudnn
+    ):
+        l_type = 'depthwise_conv2d'
+
+    if (
+        num_channels == groups
+        and num_filters % num_channels == 0
+        and core.is_compiled_with_rocm()
+    ):
+        l_type = 'depthwise_conv2d'
+
+    # NPU only supports depthwise_conv2d when  "input_channel = output_channel = groups"
+    if core.is_compiled_with_npu():
+        if num_channels == groups and num_channels == num_filters:
+            l_type = 'depthwise_conv2d'
+        else:
+            l_type = 'conv2d'
+
+    helper = LayerHelper(l_type, **locals())
+    dtype = helper.input_dtype()
+
+    filter_size = utils.convert_to_list(filter_size, 2, 'filter_size')
+    stride = utils.convert_to_list(stride, 2, 'stride')
+    dilation = utils.convert_to_list(dilation, 2, 'dilation')
+
+    # padding
+    def _update_padding(padding, data_format):
+        def is_list_or_tuple(ele):
+            if isinstance(ele, list) or isinstance(ele, tuple):
+                return True
+            return False
+
+        if is_list_or_tuple(padding) and len(padding) == 4:
+            if is_list_or_tuple(padding[0]) and (data_format == "NCHW"):
+                if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
+                    raise ValueError(
+                        "Non-zero padding(%s) in the batch or channel dimensions "
+                        "is not supported." % str(padding)
+                    )
+                padding = padding[2:4]
+                padding = [ele for a_list in padding for ele in a_list]
+            elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"):
+                if not (padding[0] == [0, 0] and padding[3] == [0, 0]):
+                    raise ValueError(
+                        "Non-zero padding(%s) in the batch or channel dimensions "
+                        "is not supported." % str(padding)
+                    )
+                padding = padding[1:3]
+                padding = [ele for a_list in padding for ele in a_list]
+            padding = utils.convert_to_list(padding, 4, 'padding')
+            if utils._is_symmetric_padding(padding, 2):
+                padding = [padding[0], padding[2]]
+
+        else:
+            padding = utils.convert_to_list(padding, 2, 'padding')
+
+        return padding
+
+    padding_algorithm = "EXPLICIT"
+    if isinstance(padding, str):
+        padding = padding.upper()
+        if padding not in ["SAME", "VALID"]:
+            raise ValueError(
+                "Unknown padding: '%s'. It can only be 'SAME' or 'VALID'."
+                % str(padding)
+            )
+        if padding == "VALID":
+            padding_algorithm = "VALID"
+            padding = [0, 0]
+        elif padding == "SAME":
+            padding_algorithm = "SAME"
+            padding = [0, 0]
+
+    padding = _update_padding(padding, data_format)
+
+    filter_shape = [num_filters, int(num_filter_channels)] + filter_size
+
+    def _get_default_param_initializer():
+        filter_elem_num = filter_size[0] * filter_size[1] * num_channels
+        if filter_elem_num <= 0:
+            raise ValueError(
+                "Invalid filter number, excepted number is larger than 0, but"
+                " received {}, please check the input shape and "
+                "filter size.".format(filter_elem_num)
+            )
+        std = (2.0 / filter_elem_num) ** 0.5
+        return Normal(0.0, std, 0)
+
+    filter_param = helper.create_parameter(
+        attr=helper.param_attr,
+        shape=filter_shape,
+        dtype=dtype,
+        default_initializer=_get_default_param_initializer(),
+    )
+
+    pre_bias = helper.create_variable_for_type_inference(dtype)
+
+    if (
+        core.is_compiled_with_cuda()
+        and paddle.fluid.get_flags("FLAGS_conv2d_disable_cudnn")[
+            "FLAGS_conv2d_disable_cudnn"
+        ]
+    ):
+        use_cudnn = False
+
+    helper.append_op(
+        type=l_type,
+        inputs={
+            'Input': input,
+            'Filter': filter_param,
+        },
+        outputs={"Output": pre_bias},
+        attrs={
+            'strides': stride,
+            'paddings': padding,
+            'dilations': dilation,
+            'groups': groups,
+            'use_cudnn': use_cudnn,
+            'use_mkldnn': False,
+            'fuse_relu_before_depthwise_conv': False,
+            "padding_algorithm": padding_algorithm,
+            "data_format": data_format,
+        },
+    )
+
+    if data_format == 'NCHW':
+        pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
+    else:
+        pre_act = helper.append_bias_op(pre_bias, dim_start=3, dim_end=4)
+
+    return helper.append_activation(pre_act)
+
+
 def conv3d(
    input,
    num_filters,