【fluid api clear】Move batch norm1 (#47965)

* modify slice infershape * code style * modify slice_unittest * temp fix * batch_norm api move * code_style * codestyle * ci_static * add __init__ * reset other change * revert .cc * add import batchnorm * conflict and revert * fix bug * fix third conflict one day * fix conflict * fix conflict bug * fix conflict bug * modify api * code_style * modify doc * add lost doc stable * fix conflict bug * ci lack of gpu

【fluid api clear】Move batch norm1 (#47965)
* modify slice infershape * code style * modify slice_unittest * temp fix * batch_norm api move * code_style * codestyle * ci_static * add __init__ * reset other change * revert .cc * add import batchnorm * conflict and revert * fix bug * fix third conflict one day * fix conflict * fix conflict bug * fix conflict bug * modify api * code_style * modify doc * add lost doc stable * fix conflict bug * ci lack of gpu
9a9e0aa0 · xiaoguoguo626807 · GitHub · 1a3d2592 · 9a9e0aa0 · 9a9e0aa0
66 changed file
--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -1963,8 +1963,11 @@ def fused_bn_add_act(
    Examples:
            .. code-block:: python
+            import paddle
            import paddle.fluid as fluid
+            paddle.enable_static()
+            # required: gpu
            def build_program(main_program, startup_program):
                with fluid.program_guard(main_program, startup_program):
                    x = fluid.layers.data(name='x', shape=[1, 28, 28], dtype='float32')
@@ -1987,7 +1990,7 @@ def fused_bn_add_act(
                        act=None,
                        bias_attr=False,
                        data_format='NHWC')
-                    bn = fluid.layers.batch_norm(
+                    bn = paddle.static.nn.batch_norm(
                        input=conv1_1,
                        act=None,
                        data_layout='NHWC')

--- a/python/paddle/fluid/contrib/slim/tests/test_graph.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_graph.py
@@ -37,7 +37,7 @@ def conv_block():
        pool_stride=2,
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,

--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_mkldnn_pass.py
@@ -37,7 +37,7 @@ def conv_net(img, label):
        pool_stride=2,
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,

--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_pass.py
@@ -57,7 +57,7 @@ def residual_block(num, quant_skip_pattern=None):
            act=None,
            bias_attr=bias_attr,
        )
-        return fluid.layers.batch_norm(input=tmp, act=act)
+        return paddle.static.nn.batch_norm(input=tmp, act=act)
    data = fluid.layers.data(
        name='image',
@@ -102,7 +102,7 @@ def conv_net(img, label, quant_skip_pattern):
        pool_type='max',
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
@@ -712,7 +712,7 @@ def quant_dequant_residual_block(num, quant_skip_pattern=None):
            act=None,
            bias_attr=bias_attr,
        )
-        return fluid.layers.batch_norm(input=tmp, act=act)
+        return paddle.static.nn.batch_norm(input=tmp, act=act)
    data1 = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    data2 = fluid.layers.data(

--- a/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_quantization_scale_pass.py
@@ -43,7 +43,7 @@ def conv_net(img, label):
        pool_type='max',
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,

--- a/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_user_defined_quantization.py
@@ -45,7 +45,7 @@ def conv_net(img, label):
        pool_type='max',
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,

--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -41,7 +41,7 @@ def resnet_cifar10(input, depth=32):
            act=None,
            bias_attr=bias_attr,
        )
-        return fluid.layers.batch_norm(input=tmp, act=act)
+        return paddle.static.nn.batch_norm(input=tmp, act=act)
    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:
@@ -97,7 +97,7 @@ def vgg16_bn_drop(input):
    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
    fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
-    bn = fluid.layers.batch_norm(input=fc1, act='relu')
+    bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
    fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
    return fc2

--- a/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
+++ b/python/paddle/fluid/contrib/tests/test_multi_precision_fp16_train.py
@@ -59,7 +59,7 @@ def resnet_cifar10(input, depth=32):
            act=None,
            bias_attr=bias_attr,
        )
-        return fluid.layers.batch_norm(input=tmp, act=act)
+        return paddle.static.nn.batch_norm(input=tmp, act=act)
    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:

--- a/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
+++ b/python/paddle/fluid/contrib/tests/test_quantize_transpiler.py
@@ -48,7 +48,7 @@ def residual_block(num):
            act=None,
            bias_attr=bias_attr,
        )
-        return fluid.layers.batch_norm(input=tmp, act=act)
+        return paddle.static.nn.batch_norm(input=tmp, act=act)
    data = fluid.layers.data(name='image', shape=[1, 32, 32], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
@@ -72,7 +72,7 @@ def conv_net(img, label):
        pool_stride=2,
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -69,7 +69,6 @@ __all__ = [
    'crf_decoding',
    'conv2d',
    'pool2d',
-    'batch_norm',
    'dropout',
    'split',
    'l2_normalize',
@@ -1681,328 +1680,6 @@ def pool2d(
    return pool_out
-def batch_norm(
-    input,
-    act=None,
-    is_test=False,
-    momentum=0.9,
-    epsilon=1e-05,
-    param_attr=None,
-    bias_attr=None,
-    data_layout='NCHW',
-    in_place=False,
-    name=None,
-    moving_mean_name=None,
-    moving_variance_name=None,
-    do_model_average_for_mean_and_var=True,
-    use_global_stats=False,
-):
-    r"""
-    :api_attr: Static Graph
-    **Batch Normalization Layer**
-    Can be used as a normalizer function for convolution or fully_connected operations.
-    The required data format for this layer is one of the following:
-    1. NHWC `[batch, in_height, in_width, in_channels]`
-    2. NCHW `[batch, in_channels, in_height, in_width]`
-    Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
-    Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
-    for more details.
-    :math:`input` is the input features over a mini-batch.
-    ..  math::
-        \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
-        \ mini-batch\ mean \\\\
-        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
-        \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
-        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
-        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
-        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
-        moving\_mean = moving\_mean * momentum + mini-batch\_mean * (1. - momentum) \\\\
-        moving\_var = moving\_var * momentum + mini-batch\_var * (1. - momentum)
-    moving_mean is global mean and moving_var is global variance.
-    When use_global_stats = True, the :math:`\\mu_{\\beta}`
-    and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch.
-    They are global (or running) statistics. (It usually got from the
-    pre-trained model.)
-    The training and testing (or inference) have the same behavior:
-    ..  math::
-        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
-        \\sigma_{\\beta}^{2} + \\epsilon}}  \\\\
-        y_i &\\gets \\gamma \\hat{x_i} + \\beta
-    Note:
-        if build_strategy.sync_batch_norm=True, the batch_norm in network will use
-        sync_batch_norm automatically.
-        `is_test = True` can only be used in test program and inference program, `is_test` CANNOT be set to True in train program, if you want to use global status from pre_train model in train program, please set `use_global_stats = True`.
-    Args:
-        input(Tensor): The rank of input Tensor can be 2, 3, 4, 5. The data type
-            is float16 or float32 or float64.
-        act(string, Default None): Activation type, linear|relu|prelu|...
-        is_test (bool, Default False): A flag indicating whether it is in
-            test phrase or not.
-        momentum(float|Tensor, Default 0.9): The value used for the moving_mean and
-            moving_var computation. This should be a float number or a Tensor with
-            shape [1] and data type as float32. The updated formula is:
-            :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
-            :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
-            Default is 0.9.
-        epsilon(float, Default 1e-05): A value added to the denominator for
-            numerical stability. Default is 1e-5.
-        param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
-             of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
-	     will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
-	     If the Initializer of the param_attr is not set, the parameter is initialized
-	     with Xavier. Default: None.
-        bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm.
-             If it is set to None or one attribute of ParamAttr, batch_norm
-	     will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-	     If the Initializer of the bias_attr is not set, the bias is initialized zero.
-	     Default: None.
-        data_layout (str, optional): Specify the data format of the input, and the data format of the output
-             will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
-             The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-             `[batch_size, input_channels, input_height, input_width]`.
-        in_place(bool, Default False): Make the input and output of batch norm reuse memory.
-        name(str|None): For detailed information, please refer to :ref:`api_guide_Name`.
-            Usually name is no need to set and None by default.
-        moving_mean_name(str, Default None): The name of moving_mean which store the global Mean. If it
-            is set to None, batch_norm will save global mean with a random name, otherwise, batch_norm
-            will save global mean with the string.
-        moving_variance_name(str, Default None): The name of the moving_variance which store the global Variance.
-            If it is set to None, batch_norm will save global variance with a random name, otherwise, batch_norm
-            will save global variance with the string.
-        do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance should do model
-            average when model average is enabled.
-        use_global_stats(bool, Default False): Whether to use global mean and
-            variance. In inference or test mode, set use_global_stats to true
-            or is_test to true, and the behavior is equivalent.
-            In train mode, when setting use_global_stats True, the global mean
-            and variance are also used during train period.
-    Returns:
-        A Tensor which is the result after applying batch normalization on the input,
-        has same shape and data type with input.
-    Examples:
-        .. code-block:: python
-            import paddle
-            paddle.enable_static()
-            x = paddle.static.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
-            hidden1 = paddle.static.nn.fc(x=x, size=200)
-            print(hidden1.shape)
-            # [3, 200]
-            hidden2 = paddle.static.nn.batch_norm(input=hidden1)
-            print(hidden2.shape)
-            # [3, 200]
-    """
-    assert (
-        bias_attr is not False
-    ), "bias_attr should not be False in batch_norm."
-    helper = LayerHelper('batch_norm', **locals())
-    check_variable_and_dtype(
-        input, 'input', ['float16', 'float32', 'float64'], 'batch_norm'
-    )
-    dtype = helper.input_dtype()
-    # use fp32 for bn parameter
-    if dtype == core.VarDesc.VarType.FP16:
-        dtype = core.VarDesc.VarType.FP32
-    input_shape = input.shape
-    if data_layout == 'NCHW':
-        channel_num = input_shape[1]
-    else:
-        if data_layout == 'NHWC':
-            channel_num = input_shape[-1]
-        else:
-            raise ValueError("unsupported data layout:" + data_layout)
-    param_shape = [channel_num]
-    # create parameter
-    scale = helper.create_parameter(
-        attr=helper.param_attr,
-        shape=param_shape,
-        dtype=dtype,
-        default_initializer=Constant(1.0),
-    )
-    bias = helper.create_parameter(
-        attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
-    )
-    mean = helper.create_parameter(
-        attr=ParamAttr(
-            name=moving_mean_name,
-            initializer=Constant(0.0),
-            trainable=False,
-            do_model_average=do_model_average_for_mean_and_var,
-        ),
-        shape=param_shape,
-        dtype=dtype,
-    )
-    mean.stop_gradient = True
-    variance = helper.create_parameter(
-        attr=ParamAttr(
-            name=moving_variance_name,
-            initializer=Constant(1.0),
-            trainable=False,
-            do_model_average=do_model_average_for_mean_and_var,
-        ),
-        shape=param_shape,
-        dtype=dtype,
-    )
-    variance.stop_gradient = True
-    # create output
-    # mean and mean_out share the same memory
-    mean_out = mean
-    # variance and variance_out share the same memory
-    variance_out = variance
-    if in_dygraph_mode():
-        inputs_has_MomemtumTensor = False
-        attrs_has_momentum = False
-        tmp_tensor_type = core.eager.Tensor
-        if isinstance(momentum, tmp_tensor_type):
-            inputs_has_MomemtumTensor = True
-        else:
-            attrs_has_momentum = True
-        attrs_ = ()
-        if attrs_has_momentum:
-            attrs_ = (
-                'momentum',
-                momentum,
-                'epsilon',
-                epsilon,
-                'is_test',
-                is_test,
-                'data_layout',
-                data_layout,
-                'use_mkldnn',
-                False,
-                'fuse_with_relu',
-                False,
-                'use_global_stats',
-                use_global_stats,
-            )
-        else:
-            attrs_ = (
-                'epsilon',
-                epsilon,
-                'is_test',
-                is_test,
-                'data_layout',
-                data_layout,
-                'use_mkldnn',
-                False,
-                'fuse_with_relu',
-                False,
-                'use_global_stats',
-                use_global_stats,
-            )
-        if inputs_has_MomemtumTensor:
-            batch_norm_out, _, _, _, _, _ = _legacy_C_ops.batch_norm(
-                input,
-                scale,
-                bias,
-                mean,
-                variance,
-                momentum,
-                mean_out,
-                variance_out,
-                *attrs_,
-            )
-        else:
-            batch_norm_out, _, _, _, _, _ = _legacy_C_ops.batch_norm(
-                input,
-                scale,
-                bias,
-                mean,
-                variance,
-                None,
-                mean_out,
-                variance_out,
-                *attrs_,
-            )
-        return dygraph_utils._append_activation_in_dygraph(
-            batch_norm_out, act=act, use_mkldnn=False
-        )
-    saved_mean = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-    saved_variance = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-    reserve_space = None
-    if not is_test:
-        reserve_space = helper.create_variable_for_type_inference(
-            dtype=helper.input_dtype(), stop_gradient=True
-        )
-    batch_norm_out = (
-        input if in_place else helper.create_variable_for_type_inference(dtype)
-    )
-    inputs = {
-        "X": input,
-        "Scale": scale,
-        "Bias": bias,
-        "Mean": mean,
-        "Variance": variance,
-        "MeanOut": mean_out,
-        "VarianceOut": variance_out,
-    }
-    attrs = {
-        "epsilon": epsilon,
-        "is_test": is_test,
-        "data_layout": data_layout,
-        "use_mkldnn": False,
-        "fuse_with_relu": False,
-        "use_global_stats": use_global_stats,
-    }
-    if isinstance(momentum, Variable):
-        inputs['MomemtumTensor'] = momentum
-    else:
-        attrs['momentum'] = momentum
-    outputs = {
-        "Y": batch_norm_out,
-        "MeanOut": mean_out,
-        "VarianceOut": variance_out,
-        "SavedMean": saved_mean,
-        "SavedVariance": saved_variance,
-    }
-    if reserve_space is not None:
-        outputs["ReserveSpace"] = reserve_space
-    helper.append_op(
-        type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs
-    )
-    return helper.append_activation(batch_norm_out)
 @templatedoc()
 def layer_norm(
    input,

--- a/python/paddle/fluid/nets.py
+++ b/python/paddle/fluid/nets.py
@@ -253,7 +253,7 @@ def img_conv_group(
        )
        if conv_with_batchnorm[i]:
-            tmp = layers.batch_norm(input=tmp, act=conv_act)
+            tmp = paddle.static.nn.batch_norm(input=tmp, act=conv_act)
            drop_rate = conv_batchnorm_drop_rate[i]
            if abs(drop_rate) > 1e-5:
                tmp = layers.dropout(x=tmp, dropout_prob=drop_rate)

--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -40,7 +40,7 @@ def resnet_cifar10(input, depth=32):
            act=None,
            bias_attr=bias_attr,
        )
-        return fluid.layers.batch_norm(input=tmp, act=act)
+        return paddle.static.nn.batch_norm(input=tmp, act=act)
    def shortcut(input, ch_in, ch_out, stride):
        if ch_in != ch_out:
@@ -96,7 +96,7 @@ def vgg16_bn_drop(input):
    drop = fluid.layers.dropout(x=conv5, dropout_prob=0.5)
    fc1 = fluid.layers.fc(input=drop, size=4096, act=None)
-    bn = fluid.layers.batch_norm(input=fc1, act='relu')
+    bn = paddle.static.nn.batch_norm(input=fc1, act='relu')
    drop2 = fluid.layers.dropout(x=bn, dropout_prob=0.5)
    fc2 = fluid.layers.fc(input=drop2, size=4096, act=None)
    return fc2

--- a/python/paddle/fluid/tests/book/test_recognize_digits.py
+++ b/python/paddle/fluid/tests/book/test_recognize_digits.py
@@ -51,7 +51,7 @@ def conv_net(img, label):
        pool_stride=2,
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,

--- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py
+++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py
@@ -182,7 +182,7 @@ class SE_ResNeXt:
            ),
            bias_attr=False,
        )
-        return fluid.layers.batch_norm(input=conv, act=act)
+        return paddle.static.nn.batch_norm(input=conv, act=act)
    def squeeze_excitation(self, input, num_channels, reduction_ratio):
        pool = fluid.layers.pool2d(

--- a/python/paddle/fluid/tests/unittests/ipu/test_batch_norm_op_ipu.py
+++ b/python/paddle/fluid/tests/unittests/ipu/test_batch_norm_op_ipu.py
@@ -58,7 +58,7 @@ class TestBase(IPUOpTest):
        x = paddle.static.nn.conv2d(
            x, num_filters=3, filter_size=3, bias_attr=False
        )
-        x = paddle.fluid.layers.batch_norm(x, **self.attrs)
+        x = paddle.static.nn.batch_norm(x, **self.attrs)
        self.fetch_list = [x.name]
    def run_model(self, exec_mode):

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_activation_pass.py
@@ -22,6 +22,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -39,7 +40,7 @@ class TensorRTSubgraphPassActivationTest(InferencePassTest):
                name="data", shape=[-1, 6, 32, 32], dtype="float32"
            )
            act_out = self.append_act(data)
-            out = fluid.layers.batch_norm(act_out, is_test=True)
+            out = nn.batch_norm(act_out, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 6, 32, 32]).astype("float32"),
        }

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_anchor_generator_op.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -67,7 +68,7 @@ class TRTAnchorGeneratorBaseTest(InferencePassTest):
            )
            if self.dynamic_shape_params is not None:
                anchor = paddle.transpose(anchor, [2, 3, 0, 1])
-            out = fluid.layers.batch_norm(anchor, is_test=True)
+            out = nn.batch_norm(anchor, is_test=True)
        self.fetch_list = [out, var]

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_elementwise_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_elementwise_op.py
@@ -21,6 +21,7 @@ from inference_pass_test import InferencePassTest
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -34,7 +35,7 @@ class TensorRTSubgraphPassElementwiseBroadcastTest(InferencePassTest):
                name="data2", shape=[-1, 3, 64, 1], dtype="float32"
            )
            eltwise_out = self.append_eltwise(data1, data2)
-            out = fluid.layers.batch_norm(eltwise_out, is_test=True)
+            out = nn.batch_norm(eltwise_out, is_test=True)
        self.feeds = {
            "data1": np.random.random([1, 3, 64, 64]).astype("float32"),
            "data2": np.random.random([1, 3, 64, 1]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -30,7 +31,7 @@ class TRTFlattenTest(InferencePassTest):
                name="data", shape=[-1, 6, 64, 64], dtype="float32"
            )
            flatten_out = self.append_flatten(data)
-            out = fluid.layers.batch_norm(flatten_out, is_test=True)
+            out = nn.batch_norm(flatten_out, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 6, 64, 64]).astype("float32"),
        }
@@ -59,7 +60,7 @@ class TRTFlattenDynamicTest(InferencePassTest):
                name="data", shape=[-1, 6, 64, 64], dtype="float32"
            )
            flatten_out = self.append_flatten(data)
-            out = fluid.layers.batch_norm(flatten_out, is_test=True)
+            out = nn.batch_norm(flatten_out, is_test=True)
        self.feeds = {
            "data": np.random.random([2, 6, 64, 64]).astype("float32"),
        }

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_nd_op.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -29,7 +30,7 @@ class TRTGatherNdTest(InferencePassTest):
            data = fluid.data(name="data", shape=[-1, 3, 4], dtype="float32")
            index = fluid.data(name="index", shape=[-1, 2, 2], dtype="int32")
            gather_nd = paddle.gather_nd(data, index)
-            out = fluid.layers.batch_norm(gather_nd, is_test=True)
+            out = nn.batch_norm(gather_nd, is_test=True)
        self.feeds = {
            "data": np.random.random([2, 3, 4]).astype("float32"),
@@ -66,7 +67,7 @@ class TRTGatherNdFp16Test(InferencePassTest):
            )
            index = fluid.data(name="index", shape=[-1, 1028, 2], dtype="int32")
            gather_nd = paddle.gather_nd(data, index)
-            out = fluid.layers.batch_norm(gather_nd, is_test=True)
+            out = nn.batch_norm(gather_nd, is_test=True)
        index_data = np.zeros((1, 1028, 2), dtype='int32')
        self.feeds = {

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_inspector.py
@@ -37,7 +37,7 @@ class TensorRTInspectorTest(InferencePassTest):
                transpose_y=self.transpose_y,
            )
            matmul_out = paddle.scale(matmul_out, scale=self.alpha)
-            out = fluid.layers.batch_norm(matmul_out, is_test=True)
+            out = paddle.static.nn.batch_norm(matmul_out, is_test=True)
        self.feeds = {
            "data": np.ones([1, 16, 16]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_instance_norm_op.py
@@ -20,9 +20,9 @@ import unittest
 import numpy as np
 from inference_pass_test import InferencePassTest
-import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -44,8 +44,8 @@ class TRTInstanceNormTest(InferencePassTest):
        with fluid.program_guard(self.main_program, self.startup_program):
            shape = [-1, self.channel, self.height, self.width]
            data = fluid.data(name='in', shape=shape, dtype='float32')
-            instance_norm_out = paddle.static.nn.instance_norm(data)
+            instance_norm_out = nn.instance_norm(data)
-            out = fluid.layers.batch_norm(instance_norm_out, is_test=True)
+            out = nn.batch_norm(instance_norm_out, is_test=True)
        shape[0] = self.bs
        self.feeds = {

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -35,7 +36,7 @@ class TensorRTMatMulDims2Test(InferencePassTest):
                transpose_y=self.transpose_y,
            )
            matmul_out = paddle.scale(matmul_out, scale=self.alpha)
-            out = fluid.layers.batch_norm(matmul_out, is_test=True)
+            out = nn.batch_norm(matmul_out, is_test=True)
        self.feeds = {
            "data": np.ones([24, 24]).astype("float32"),
@@ -74,7 +75,7 @@ class TensorRTMatMulTest(InferencePassTest):
                transpose_y=self.transpose_y,
            )
            matmul_out = paddle.scale(matmul_out, scale=self.alpha)
-            out = fluid.layers.batch_norm(matmul_out, is_test=True)
+            out = nn.batch_norm(matmul_out, is_test=True)
        self.feeds = {
            "data": np.ones([1, 6, 24, 24]).astype("float32"),
@@ -136,7 +137,7 @@ class TensorRTMatMulBroadcastTest(InferencePassTest):
                transpose_y=self.transpose_y,
            )
            matmul_out = paddle.scale(matmul_out, scale=self.alpha)
-            out = fluid.layers.batch_norm(matmul_out, is_test=True)
+            out = nn.batch_norm(matmul_out, is_test=True)
        self.feeds = {
            "data_x": np.ones([2, 6, 24]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_matmul_quant_dequant.py
@@ -135,7 +135,7 @@ class TensorRTMatMulQuantDequantDims4Test(QuantDequantTest):
                transpose_y=self.transpose_y,
            )
            matmul_out = paddle.scale(matmul_out, scale=self.alpha)
-            out = fluid.layers.batch_norm(matmul_out, is_test=True)
+            out = paddle.static.nn.batch_norm(matmul_out, is_test=True)
            fc_out = fluid.layers.fc(
                input=matmul_out,
                size=10,
@@ -231,7 +231,7 @@ class TensorRTMatMulQuantDequantDims3DynamicTest(QuantDequantTest):
                transpose_y=self.transpose_y,
            )
            matmul_out = paddle.scale(matmul_out, scale=self.alpha)
-            out = fluid.layers.batch_norm(matmul_out, is_test=True)
+            out = paddle.static.nn.batch_norm(matmul_out, is_test=True)
            fc_out = fluid.layers.fc(
                input=matmul_out,
                size=10,

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms3_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms3_op.py
@@ -21,6 +21,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
 from paddle.fluid.framework import in_dygraph_mode
 from paddle.fluid.layer_helper import LayerHelper
@@ -242,7 +243,7 @@ class TensorRTMultiClassNMS3Test(InferencePassTest):
                [self.bs, 1, self.keep_top_k, 6],
                name='reshape',
            )
-            out = fluid.layers.batch_norm(multiclass_nms_out, is_test=True)
+            out = nn.batch_norm(multiclass_nms_out, is_test=True)
        boxes_data = (
            np.arange(self.num_boxes * 4)

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_multiclass_nms_op.py
@@ -21,6 +21,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -69,7 +70,7 @@ class TensorRTMultiClassNMSTest(InferencePassTest):
                [self.bs, 1, self.keep_top_k, 6],
                name='reshape',
            )
-            out = fluid.layers.batch_norm(multiclass_nms_out, is_test=True)
+            out = nn.batch_norm(multiclass_nms_out, is_test=True)
        boxes_data = (
            np.arange(self.num_boxes * 4)

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_op.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -44,7 +45,7 @@ class TRTNearestInterpTest(InferencePassTest):
                ]
            data = fluid.data(name='data', shape=shape, dtype='float32')
            resize_out = self.append_nearest_interp(data)
-            out = fluid.layers.batch_norm(resize_out, is_test=True)
+            out = nn.batch_norm(resize_out, is_test=True)
        if self.data_layout == 'NCHW':
            shape = [

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_nearest_interp_v2_op.py
@@ -19,6 +19,7 @@ from inference_pass_test import InferencePassTest
 import paddle.fluid.core as core
 import paddle.nn.functional as F
+import paddle.static.nn as nn
 from paddle import fluid
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -44,7 +45,7 @@ class TRTNearestInterpTest(InferencePassTest):
                ]
            data = fluid.data(name='data', shape=shape, dtype='float32')
            resize_out = self.append_nearest_interp(data)
-            out = fluid.layers.batch_norm(resize_out, is_test=True)
+            out = nn.batch_norm(resize_out, is_test=True)
        if self.data_layout == 'NCHW':
            shape = [

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pad_op.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig
@@ -32,7 +33,7 @@ class PadOpTRTTest(InferencePassTest):
            pad_out = paddle.nn.functional.pad(
                x=data, pad=[0, 0, 0, 0, 0, 1, 1, 2], value=0.0
            )
-            out = fluid.layers.batch_norm(pad_out, is_test=True)
+            out = nn.batch_norm(pad_out, is_test=True)
        self.feeds = {
            "data": np.random.random((1, 3, 128, 128)).astype("float32")

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool3d_op.py
@@ -80,7 +80,7 @@ class TensorRTPool3dTest(InferencePassTest):
                    ceil_mode=self.ceil_mode,
                    exclusive=self.exclusive,
                )
-            # out = fluid.layers.batch_norm(pool_out, is_test=True)
+            # out = paddle.static.nn.batch_norm(pool_out, is_test=True)
            self.fetch_list = [pool_out]
    def check_output(self):
@@ -198,7 +198,7 @@ class TensorRTAdaptiveAvgPool3DTest(InferencePassTest):
            pool_out = paddle.nn.functional.adaptive_avg_pool3d(
                x=data, output_size=[3, 3, 3]
            )
-            # out = fluid.layers.batch_norm(pool_out, is_test=True)
+            # out = paddle.static.nn.batch_norm(pool_out, is_test=True)
            self.fetch_list = [pool_out]
    def check_output(self):
@@ -298,7 +298,7 @@ class TensorRTAdaptiveMaxPool3DTest(InferencePassTest):
            pool_out = paddle.nn.functional.adaptive_max_pool3d(
                x=data, output_size=[3, 3, 3]
            )
-            # out = fluid.layers.batch_norm(pool_out, is_test=True)
+            # out = paddle.static.nn.batch_norm(pool_out, is_test=True)
            self.fetch_list = [pool_out]
    def check_output(self):

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_pool_op.py
@@ -22,6 +22,7 @@ from inference_pass_test import InferencePassTest
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -72,7 +73,7 @@ class TensorRTPoolTest(InferencePassTest):
                ceil_mode=self.ceil_mode,
                exclusive=self.exclusive,
            )
-            out = fluid.layers.batch_norm(pool_out, is_test=True)
+            out = nn.batch_norm(pool_out, is_test=True)
            self.fetch_list = [out]
    def check_output(self):

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_sum_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reduce_sum_op.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -30,7 +31,7 @@ class TRTReduceSumTest(InferencePassTest):
                name="data", shape=[-1, 3, 10, 192], dtype="float32"
            )
            reduce_sum = paddle.sum(data, axis=[2, -1], keepdim=True)
-            out = fluid.layers.batch_norm(reduce_sum, is_test=True)
+            out = nn.batch_norm(reduce_sum, is_test=True)
        self.feeds = {
            "data": np.random.random([3, 3, 10, 192]).astype("float32"),
@@ -63,7 +64,7 @@ class TRTReduceSumAllTest(InferencePassTest):
                name="data", shape=[-1, 3, 10, 192], dtype="float32"
            )
            reduce_sum = paddle.sum(data, keepdim=True)
-            out = fluid.layers.batch_norm(reduce_sum, is_test=True)
+            out = nn.batch_norm(reduce_sum, is_test=True)
        self.feeds = {
            "data": np.random.random([3, 3, 10, 192]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_reshape_op.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -39,7 +40,7 @@ class TRTReshapeTest(InferencePassTest):
                name='data', shape=self.data_shape, dtype='float32'
            )
            reshape_out = self.append_reshape(data, self.reshape)
-            out = fluid.layers.batch_norm(reshape_out, is_test=True)
+            out = nn.batch_norm(reshape_out, is_test=True)
        self.feeds = {
            'data': np.random.random(self.data_shape).astype('float32'),
        }
@@ -77,7 +78,7 @@ class TRTReshapeTest1(TRTReshapeTest):
                name='data', shape=self.data_shape, dtype='float32'
            )
            reshape_out = self.append_reshape(data, self.reshape)
-            out = fluid.layers.batch_norm(reshape_out, is_test=True)
+            out = nn.batch_norm(reshape_out, is_test=True)
        self.feeds = {
            'data': np.random.random(self.data_shape).astype('float32'),
        }
@@ -104,7 +105,7 @@ class TRTReshapeTest2(TRTReshapeTest):
                name='data', shape=self.data_shape, dtype='float32'
            )
            reshape_out = paddle.reshape(x=data, shape=self.reshape)
-            out = fluid.layers.batch_norm(reshape_out, is_test=True)
+            out = nn.batch_norm(reshape_out, is_test=True)
        self.feeds = {
            'data': np.random.random(self.data_shape).astype('float32')
        }
@@ -130,7 +131,7 @@ class TRTReshapeTest3(TRTReshapeTest):
            data = fluid.data(
                name='data', shape=self.data_shape, dtype='float32'
            )
-            bn_out = fluid.layers.batch_norm(data, is_test=True)
+            bn_out = nn.batch_norm(data, is_test=True)
            out = self.append_reshape(bn_out, self.reshape)
        self.feeds = {
            'data': np.random.random(self.data_shape).astype('float32'),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_scale_op.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -28,7 +29,7 @@ class TRTScaleTest(InferencePassTest):
        with fluid.program_guard(self.main_program, self.startup_program):
            data = fluid.data(name="data", shape=[-1, 512], dtype="float32")
            scale_out = self.append_scale(data)
-            out = fluid.layers.batch_norm(scale_out, is_test=True)
+            out = nn.batch_norm(scale_out, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 512]).astype("float32"),
@@ -60,7 +61,7 @@ class TRTScaleShape2Test(InferencePassTest):
                name="data", shape=[-1, 512, 512], dtype="float32"
            )
            scale_out = self.append_scale(data)
-            out = fluid.layers.batch_norm(scale_out, is_test=True)
+            out = nn.batch_norm(scale_out, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 512, 512]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_shuffle_channel_detect_pass.py
@@ -19,6 +19,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -31,8 +32,7 @@ class ShuffleChannelFuseTRTPassTest(InferencePassTest):
            reshape1 = paddle.reshape(x=data, shape=[-1, 2, 3, 64, 64])
            trans = paddle.transpose(x=reshape1, perm=[0, 2, 1, 3, 4])
            reshape2 = paddle.reshape(x=trans, shape=[-1, 6, 64, 64])
+            out = nn.batch_norm(reshape2, is_test=True)
-            out = fluid.layers.batch_norm(reshape2, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 6, 64, 64]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_slice_plugin.py
@@ -20,6 +20,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig
@@ -45,7 +46,7 @@ class SlicePluginTRTTest(InferencePassTest):
            starts = self.params_starts
            ends = self.params_ends
            slice_out = paddle.slice(data, axes=axes, starts=starts, ends=ends)
-            out = fluid.layers.batch_norm(slice_out, is_test=True)
+            out = nn.batch_norm(slice_out, is_test=True)
        self.feeds = {
            "data": np.random.random((3, 3, 3, 3)).astype("float32"),
@@ -115,7 +116,7 @@ class SlicePluginTRTTestInt32(SlicePluginTRTTest):
            ends = self.params_ends
            slice_out = paddle.slice(data, axes=axes, starts=starts, ends=ends)
            cast_out = fluid.layers.cast(slice_out, 'float32')
-            out = fluid.layers.batch_norm(cast_out, is_test=True)
+            out = nn.batch_norm(cast_out, is_test=True)
        self.feeds = {
            "data": np.random.random((3, 3, 3, 3)).astype("int32"),
@@ -140,7 +141,7 @@ class StaticSlicePluginTRTTestInt32(SlicePluginTRTTest):
            ends = self.params_ends
            slice_out = paddle.slice(data, axes=axes, starts=starts, ends=ends)
            cast_out = fluid.layers.cast(slice_out, 'float32')
-            out = fluid.layers.batch_norm(cast_out, is_test=True)
+            out = nn.batch_norm(cast_out, is_test=True)
        self.feeds = {
            "data": np.random.random((3, 3, 3, 3)).astype("int32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py
@@ -22,6 +22,7 @@ from inference_pass_test import InferencePassTest
 import paddle
 import paddle.fluid as fluid
 import paddle.fluid.core as core
+import paddle.static.nn as nn
 from paddle.fluid.core import AnalysisConfig, PassVersionChecker
@@ -62,7 +63,7 @@ class TensorRTSubgraphPassConcatTest(InferencePassTest):
                name="data2", shape=[-1, 3, 64, 64], dtype="float32"
            )
            concat_out = fluid.layers.concat([data1, data2], axis=2)
-            out = fluid.layers.batch_norm(concat_out, is_test=True)
+            out = nn.batch_norm(concat_out, is_test=True)
        self.feeds = {
            "data1": np.random.random([1, 3, 64, 64]).astype("float32"),
            "data2": np.random.random([1, 3, 64, 64]).astype("float32"),
@@ -89,7 +90,7 @@ class TensorRTSubgraphPassSplitTest(InferencePassTest):
                name="data", shape=[-1, 3, 64, 64], dtype="float32"
            )
            split_out = fluid.layers.split(data, dim=-1, num_or_sections=2)
-            out = fluid.layers.batch_norm(split_out[0], is_test=True)
+            out = nn.batch_norm(split_out[0], is_test=True)
        self.feeds = {
            "data": np.random.random([1, 3, 64, 64]).astype("float32"),
        }
@@ -115,7 +116,7 @@ class TensorRTSubgraphPassSplitSerializeTest(InferencePassTest):
                name="data", shape=[-1, 3, 64, 64], dtype="float32"
            )
            split_out = fluid.layers.split(data, dim=-1, num_or_sections=2)
-            out = fluid.layers.batch_norm(split_out[0], is_test=True)
+            out = nn.batch_norm(split_out[0], is_test=True)
        self.feeds = {
            "data": np.random.random([1, 3, 64, 64]).astype("float32"),
        }
@@ -143,7 +144,7 @@ class TensorRTSubgraphPassDynamicSplitFp16SerializeTest(InferencePassTest):
                name="data", shape=[-1, 3, 64, 64], dtype="float32"
            )
            split_out = fluid.layers.split(data, dim=-1, num_or_sections=2)
-            out = fluid.layers.batch_norm(split_out[0], is_test=True)
+            out = nn.batch_norm(split_out[0], is_test=True)
        self.feeds = {
            "data": np.random.random([1, 3, 64, 64]).astype("float32"),
        }
@@ -216,7 +217,7 @@ class TensorRTSubgraphPassTransposeTest(InferencePassTest):
                name="data", shape=[-1, 6, 64, 64], dtype="float32"
            )
            transpose_out = self.append_transpose(data)
-            out = fluid.layers.batch_norm(transpose_out, is_test=True)
+            out = nn.batch_norm(transpose_out, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 6, 64, 64]).astype("float32"),
        }
@@ -366,7 +367,7 @@ class TensorRTSubgraphPassElementwiseTest(InferencePassTest):
                name="data2", shape=[-1, 3, 64, 64], dtype="float32"
            )
            eltwise_out = self.append_eltwise(data1, data2)
-            out = fluid.layers.batch_norm(eltwise_out, is_test=True)
+            out = nn.batch_norm(eltwise_out, is_test=True)
        self.feeds = {
            "data1": np.random.random([1, 3, 64, 64]).astype("float32"),
            "data2": np.random.random([1, 3, 64, 64]).astype("float32"),
@@ -419,7 +420,7 @@ class TensorRTSubgraphPassElementwiseBroadcastDynamicTest(InferencePassTest):
            )
            data2 = fluid.data(name="data2", shape=[64, 64], dtype="float32")
            eltwise_out = self.append_eltwise(data1, data2)
-            out = fluid.layers.batch_norm(eltwise_out, is_test=True)
+            out = nn.batch_norm(eltwise_out, is_test=True)
        self.feeds = {
            "data1": np.random.random([1, 3, 64, 64]).astype("float32"),
            "data2": np.random.random([64, 64]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py
@@ -30,7 +30,7 @@ class TRTTileTest(InferencePassTest):
                name="data", shape=[4, 3, 224, 256], dtype="float32"
            )
            tile_out = paddle.tile(x=data, repeat_times=[1, 1, 1, 1])
-            out = fluid.layers.batch_norm(tile_out, is_test=True)
+            out = paddle.static.nn.batch_norm(tile_out, is_test=True)
        self.feeds = {
            "data": np.random.random([4, 3, 224, 256]).astype("float32"),
@@ -55,7 +55,7 @@ class TRTTileExpandTest(InferencePassTest):
        with fluid.program_guard(self.main_program, self.startup_program):
            data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
            tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
-            out = fluid.layers.batch_norm(tile_out, is_test=True)
+            out = paddle.static.nn.batch_norm(tile_out, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 1, 1, 1]).astype("float32"),
@@ -80,7 +80,7 @@ class TRTTileExpandStaticTest(InferencePassTest):
        with fluid.program_guard(self.main_program, self.startup_program):
            data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
            tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
-            out = fluid.layers.batch_norm(tile_out, is_test=True)
+            out = paddle.static.nn.batch_norm(tile_out, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 1, 1, 1]).astype("float32"),
@@ -105,7 +105,7 @@ class TRTTileExpandHalfTest(InferencePassTest):
        with fluid.program_guard(self.main_program, self.startup_program):
            data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32")
            tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920])
-            out = fluid.layers.batch_norm(tile_out, is_test=True)
+            out = paddle.static.nn.batch_norm(tile_out, is_test=True)
        self.feeds = {
            "data": np.random.random([1, 1, 1, 1]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_transpose_flatten_concat_fuse_pass.py
@@ -42,7 +42,7 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest):
            # There is no parameters for above structure.
            # Hence, append a batch_norm to avoid failure caused by load_combined.
            reshape_out = paddle.reshape(concat_out, [-1, 0, 1, 1])
-            out = fluid.layers.batch_norm(reshape_out, is_test=True)
+            out = paddle.static.nn.batch_norm(reshape_out, is_test=True)
        self.feeds = {
            "data1": np.random.random([8, 32, 128]).astype("float32"),

--- a/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/sync_batch_norm_op_mlu.py
@@ -86,7 +86,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
                )
                if self.bn_dtype == np.float16:
                    conv = fluid.layers.cast(conv, 'float16')
-                bn = fluid.layers.batch_norm(
+                bn = paddle.static.nn.batch_norm(
                    conv,
                    param_attr=fluid.ParamAttr(name='bn_scale'),
                    bias_attr=fluid.ParamAttr(name='bn_bias'),

--- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py
+++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu.py
@@ -742,12 +742,12 @@ class TestBatchNormOpError(unittest.TestCase):
            x1 = fluid.create_lod_tensor(
                np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()
            )
-            self.assertRaises(TypeError, fluid.layers.batch_norm, x1)
+            self.assertRaises(TypeError, paddle.static.nn.batch_norm, x1)
            # the input dtype of batch_norm must be float16 or float32 or float64
            # float16 only can be set on GPU place
            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.batch_norm, x2)
+            self.assertRaises(TypeError, paddle.static.nn.batch_norm, x2)
 class TestDygraphBatchNormAPIError(unittest.TestCase):

--- a/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/sync_batch_norm_op_npu.py
@@ -88,7 +88,7 @@ class TestSyncBatchNormOpTraining(TestSyncBatchNormRunnerBase):
                    bias_attr=False,
                    use_cudnn=use_cudnn,
                )
-                bn = fluid.layers.batch_norm(
+                bn = paddle.static.nn.batch_norm(
                    conv,
                    param_attr=fluid.ParamAttr(name='bn_scale'),
                    bias_attr=fluid.ParamAttr(name='bn_bias'),

--- a/python/paddle/fluid/tests/unittests/seresnext_net.py
+++ b/python/paddle/fluid/tests/unittests/seresnext_net.py
@@ -80,7 +80,7 @@ def conv_bn_layer(
    return (
        conv
        if remove_bn
-        else fluid.layers.batch_norm(input=conv, act=act, momentum=0.1)
+        else paddle.static.nn.batch_norm(input=conv, act=act, momentum=0.1)
    )

--- a/python/paddle/fluid/tests/unittests/simple_nets.py
+++ b/python/paddle/fluid/tests/unittests/simple_nets.py
@@ -53,7 +53,7 @@ def batchnorm_fc_with_inputs(img, label, class_num=10):
            ),
        )
-        hidden = fluid.layers.batch_norm(input=hidden)
+        hidden = paddle.static.nn.batch_norm(input=hidden)
    prediction = fluid.layers.fc(hidden, size=class_num, act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)

--- a/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_async_ssa_graph_executor_mnist.py
@@ -46,7 +46,7 @@ def convolutional_neural_network(use_py_reader):
            pool_stride=2,
            act="relu",
        )
-        conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+        conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
        conv_pool_2 = fluid.nets.simple_img_conv_pool(
            input=conv_pool_1,
            filter_size=5,

--- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
@@ -759,12 +759,12 @@ class TestBatchNormOpError(unittest.TestCase):
            x1 = fluid.create_lod_tensor(
                np.array([-1, 3, 5, 5]), [[1, 1, 1, 1]], fluid.CPUPlace()
            )
-            self.assertRaises(TypeError, fluid.layers.batch_norm, x1)
+            self.assertRaises(TypeError, paddle.static.nn.batch_norm, x1)
            # the input dtype of batch_norm must be float16 or float32 or float64
            # float16 only can be set on GPU place
            x2 = fluid.layers.data(name='x2', shape=[3, 4, 5, 6], dtype="int32")
-            self.assertRaises(TypeError, fluid.layers.batch_norm, x2)
+            self.assertRaises(TypeError, paddle.static.nn.batch_norm, x2)
 class TestDygraphBatchNormAPIError(unittest.TestCase):

--- a/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
+++ b/python/paddle/fluid/tests/unittests/test_fetch_unmerged.py
@@ -34,7 +34,7 @@ class TestFetchUnmerged(unittest.TestCase):
            pool_type='max',
            act="relu",
        )
-        conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+        conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
        conv_pool_2 = fluid.nets.simple_img_conv_pool(
            input=conv_pool_1,
            filter_size=5,

--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py
@@ -41,7 +41,7 @@ class TestFuseBatchNormActPass(unittest.TestCase):
                name='batch_norm_b',
                initializer=fluid.initializer.Constant(value=0.0),
            )
-            hidden2 = fluid.layers.batch_norm(
+            hidden2 = paddle.static.nn.batch_norm(
                input=hidden1,
                param_attr=param_attr,
                bias_attr=bias_attr,
@@ -49,7 +49,7 @@ class TestFuseBatchNormActPass(unittest.TestCase):
                data_layout='NHWC',
            )
            hidden3 = fluid.layers.fc(input=hidden2, size=32, act='relu')
-            hidden4 = fluid.layers.batch_norm(
+            hidden4 = paddle.static.nn.batch_norm(
                input=hidden3, act='relu', data_layout='NHWC'
            )
            prediction = fluid.layers.fc(input=hidden4, size=10, act='softmax')

--- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py
@@ -87,7 +87,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
                bias_attr=False,
                data_format='NHWC',
            )
-            bn = fluid.layers.batch_norm(
+            bn = paddle.static.nn.batch_norm(
                input=conv1_1,
                param_attr=self.bn_param_attr1,
                bias_attr=self.bn_bias_attr1,
@@ -133,7 +133,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
                bias_attr=False,
                data_format='NHWC',
            )
-            bn1 = fluid.layers.batch_norm(
+            bn1 = paddle.static.nn.batch_norm(
                input=conv1_1,
                param_attr=self.bn_param_attr1,
                bias_attr=self.bn_bias_attr1,
@@ -150,7 +150,7 @@ class TestFusedBnAddActAPI(unittest.TestCase):
                bias_attr=False,
                data_format='NHWC',
            )
-            bn2 = fluid.layers.batch_norm(
+            bn2 = paddle.static.nn.batch_norm(
                input=conv1_1,
                param_attr=self.bn_param_attr2,
                bias_attr=self.bn_bias_attr2,

--- a/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_fuse_relu_depthwise_conv_pass.py
@@ -23,7 +23,7 @@ import paddle.fluid.core as core
 def norm(*args, **kargs):
-    return fluid.layers.batch_norm(*args, **kargs)
+    return paddle.static.nn.batch_norm(*args, **kargs)
 def sep_conv(input, channel, stride, filter, dilation=1, act=None):

--- a/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
+++ b/python/paddle/fluid/tests/unittests/test_image_classification_layer.py
@@ -42,9 +42,9 @@ class TestLayer(unittest.TestCase):
            images = fluid.layers.data(
                name='pixel', shape=[3, 48, 48], dtype='float32'
            )
-            hidden1 = fluid.layers.batch_norm(input=images)
+            hidden1 = paddle.static.nn.batch_norm(input=images)
            hidden2 = fluid.layers.fc(input=hidden1, size=128, act='relu')
-            fluid.layers.batch_norm(input=hidden2)
+            paddle.static.nn.batch_norm(input=hidden2)
        print(str(main_program))

--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -55,8 +55,8 @@ class TestDygraphLoadStatic(unittest.TestCase):
        batchnorm_in = fluid.data(
            name="batchnorm_in", shape=[None, 10], dtype='float32'
        )
-        batchnorm_out_1 = fluid.layers.batch_norm(batchnorm_in)
+        batchnorm_out_1 = paddle.static.nn.batch_norm(batchnorm_in)
-        batchnorm_out_2 = fluid.layers.batch_norm(batchnorm_in)
+        batchnorm_out_2 = paddle.static.nn.batch_norm(batchnorm_in)
        emb_in = fluid.data(name='emb_in', shape=[None, 10], dtype='int64')
        emb_out_1 = fluid.embedding(emb_in, [1000, 100])

--- a/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_static_runner_mnist.py
@@ -33,7 +33,7 @@ def convolutional_neural_network(img):
        pool_stride=2,
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,

--- a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
@@ -56,7 +56,7 @@ class TestInplaceANBOpTraining(unittest.TestCase):
                    stop_gradient=False,
                )
-                bn = fluid.layers.batch_norm(
+                bn = paddle.static.nn.batch_norm(
                    data,
                    param_attr=fluid.ParamAttr(name='bn_scale'),
                    bias_attr=fluid.ParamAttr(name='bn_bias'),

--- a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
+++ b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py
@@ -38,7 +38,7 @@ def fc_with_batchnorm(use_feed):
            ),
        )
-        hidden = fluid.layers.batch_norm(input=hidden)
+        hidden = paddle.static.nn.batch_norm(input=hidden)
    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
    loss = paddle.mean(loss)

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -2863,7 +2863,7 @@ class TestBook(LayerTest):
            data = self._get_data(
                name='data', shape=[32, 128, 128], dtype="float32"
            )
-            out = layers.batch_norm(data)
+            out = paddle.static.nn.batch_norm(data)
            return out
    def make_batch_norm_momentum_variable(self):
@@ -2879,7 +2879,7 @@ class TestBook(LayerTest):
                dtype='float32',
                append_batch_size=False,
            )
-            out = layers.batch_norm(data, momentum=momentum)
+            out = paddle.static.nn.batch_norm(data, momentum=momentum)
            return out
    def make_range(self):

--- a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
+++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
@@ -33,7 +33,7 @@ def convolutional_neural_network(img):
        pool_stride=2,
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,

--- a/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
+++ b/python/paddle/fluid/tests/unittests/test_mix_precision_all_reduce_fuse.py
@@ -45,7 +45,7 @@ def conv_net(use_feed):
        pool_stride=2,
        act="relu",
    )
-    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
+    conv_pool_1 = paddle.static.nn.batch_norm(conv_pool_1)
    conv_pool_1 = fluid.layers.cast(conv_pool_1, np.float32)
    conv_pool_2 = fluid.nets.simple_img_conv_pool(

--- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
@@ -171,7 +171,7 @@ class TestBatchNormDoubleGradCheck(unittest.TestCase):
            eps = 0.005
            atol = 1e-4
            x = paddle.create_parameter(dtype=dtype, shape=self.shape, name='x')
-            z = fluid.layers.batch_norm(
+            z = paddle.static.nn.batch_norm(
                input=x,
                data_layout=self.data_layout,
                use_global_stats=self.use_global_stats,
@@ -251,7 +251,7 @@ class TestBatchNormDoubleGradCheckCase5(TestBatchNormDoubleGradCheck):
                self.shape[1] if self.data_layout == 'NCHW' else self.shape[-1]
            )
            x = paddle.create_parameter(dtype=dtype, shape=self.shape, name='x')
-            z = fluid.layers.batch_norm(
+            z = paddle.static.nn.batch_norm(
                input=x,
                data_layout=self.data_layout,
                use_global_stats=self.use_global_stats,

--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
@@ -26,10 +26,10 @@ from paddle.fluid import compiler
 def Lenet(data, class_dim):
    conv1 = fluid.layers.conv2d(data, 4, 5, 1, act=None)
-    bn1 = fluid.layers.batch_norm(conv1, act='relu')
+    bn1 = paddle.static.nn.batch_norm(conv1, act='relu')
    pool1 = fluid.layers.pool2d(bn1, 2, 'max', 2)
    conv2 = fluid.layers.conv2d(pool1, 16, 5, 1, act=None)
-    bn2 = fluid.layers.batch_norm(conv2, act='relu')
+    bn2 = paddle.static.nn.batch_norm(conv2, act='relu')
    pool2 = fluid.layers.pool2d(bn2, 2, 'max', 2)
    fc1 = fluid.layers.fc(pool2, size=50, act='relu')

--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
@@ -58,7 +58,7 @@ def fc_with_batchnorm(use_feed):
                ),
            )
-            hidden = fluid.layers.batch_norm(input=hidden)
+            hidden = paddle.static.nn.batch_norm(input=hidden)
    with fluid.name_scope("fc_layer"):
        prediction = fluid.layers.fc(hidden, size=10, act='softmax')
    with fluid.name_scope("loss"):

--- a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py
+++ b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py
@@ -12,8 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
+import paddle
 import paddle.fluid as fluid
@@ -28,7 +30,7 @@ class TestAttrSet(unittest.TestCase):
            name='batch_norm_b',
            initializer=fluid.initializer.Constant(value=0.0),
        )
-        bn = fluid.layers.batch_norm(
+        bn = paddle.static.nn.batch_norm(
            input=x, param_attr=param_attr, bias_attr=bias_attr
        )
        block = fluid.default_main_program().desc.block(0)

--- a/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sync_batch_norm_op.py
@@ -81,7 +81,7 @@ class TestSyncBatchNormOpTraining(unittest.TestCase):
                    bias_attr=False,
                    use_cudnn=use_cudnn,
                )
-                bn = fluid.layers.batch_norm(
+                bn = paddle.static.nn.batch_norm(
                    conv,
                    param_attr=fluid.ParamAttr(name='bn_scale'),
                    bias_attr=fluid.ParamAttr(name='bn_bias'),

--- a/python/paddle/static/__init__.py
+++ b/python/paddle/static/__init__.py
@@ -74,11 +74,13 @@ from ..fluid.layers import create_global_var  # noqa: F401
 from ..fluid.contrib.layers import ctr_metric_bundle  # noqa: F401
 from ..fluid.layers import exponential_decay  # noqa: F401
+from .nn.common import batch_norm  # noqa: F401
 from paddle.static.nn.metric import auc  # noqa: F401
 from paddle.static.nn.metric import accuracy  # noqa: F401
 __all__ = [  # noqa
    'append_backward',
+    'batch_norm',
    'gradients',
    'Executor',
    'global_scope',

--- a/python/paddle/static/nn/__init__.py
+++ b/python/paddle/static/nn/__init__.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from .common import fc  # noqa: F401
+from .common import batch_norm  # noqa: F401
 from .common import instance_norm  # noqa: F401
 from .common import data_norm  # noqa: F401
 from .common import group_norm  # noqa: F401
@@ -22,9 +23,7 @@ from .common import conv2d_transpose  # noqa: F401
 from .common import conv3d_transpose  # noqa: F401
 from .common import bilinear_tensor_product  # noqa: F401
 from .common import py_func  # noqa: F401
 from ...tensor.creation import create_parameter  # noqa: F401
-from ...fluid.layers import batch_norm  # noqa: F401
 from ...fluid.layers import case  # noqa: F401
 from ...fluid.layers import cond  # noqa: F401
 from ...fluid.layers import conv2d  # noqa: F401

--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -2160,6 +2160,328 @@ def bilinear_tensor_product(
    return helper.append_activation(out)
+def batch_norm(
+    input,
+    act=None,
+    is_test=False,
+    momentum=0.9,
+    epsilon=1e-05,
+    param_attr=None,
+    bias_attr=None,
+    data_layout='NCHW',
+    in_place=False,
+    name=None,
+    moving_mean_name=None,
+    moving_variance_name=None,
+    do_model_average_for_mean_and_var=True,
+    use_global_stats=False,
+):
+    r"""
+    **Batch Normalization Layer**
+    Can be used as a normalizer function for convolution or fully_connected operations.
+    The required data format for this layer is one of the following:
+    1. NHWC `[batch, in_height, in_width, in_channels]`
+    2. NCHW `[batch, in_channels, in_height, in_width]`
+    Refer to `Batch Normalization: Accelerating Deep Network Training by Reducing
+    Internal Covariate Shift <https://arxiv.org/pdf/1502.03167.pdf>`_
+    for more details.
+    :math:input is the input features over a mini-batch.
+    ..  math::
+        \\mu_{\\beta} &\\gets \\frac{1}{m} \\sum_{i=1}^{m} x_i \\qquad &//\\
+        \ mini-batch\ mean \\\\
+        \\sigma_{\\beta}^{2} &\\gets \\frac{1}{m} \\sum_{i=1}^{m}(x_i - \\
+        \\mu_{\\beta})^2 \\qquad &//\ mini-batch\ variance \\\\
+        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
+        \\sigma_{\\beta}^{2} + \\epsilon}} \\qquad &//\ normalize \\\\
+        y_i &\\gets \\gamma \\hat{x_i} + \\beta \\qquad &//\ scale\ and\ shift
+        moving\_mean = moving\_mean * momentum + mini-batch\_mean * (1. - momentum) \\\\
+        moving\_var = moving\_var * momentum + mini-batch\_var * (1. - momentum)
+    moving_mean is global mean and moving_var is global variance.
+    When use_global_stats = True, the :math:`\\mu_{\\beta}`
+    and :math:`\\sigma_{\\beta}^{2}` are not the statistics of one mini-batch.
+    They are global (or running) statistics. (It usually got from the
+    pre-trained model.)
+    The training and testing (or inference) have the same behavior:
+    ..  math::
+        \\hat{x_i} &\\gets \\frac{x_i - \\mu_\\beta} {\\sqrt{\\
+        \\sigma_{\\beta}^{2} + \\epsilon}}  \\\\
+        y_i &\\gets \\gamma \\hat{x_i} + \\beta
+    Note:
+        if build_strategy.sync_batch_norm=True, the batch_norm in network will use
+        sync_batch_norm automatically.
+        `is_test = True` can only be used in test program and inference program, `is_test` CANNOT be set to True in train program, if you want to use global status from pre_train model in train program, please set `use_global_stats = True`.
+    Args:
+        input(Tensor): The rank of input Tensor can be 2, 3, 4, 5. The data type
+            is float16 or float32 or float64.
+        act(string, Default None): Activation type, linear|relu|prelu|...
+        is_test (bool, Default False): A flag indicating whether it is in
+            test phrase or not.
+        momentum(float|Tensor, Default 0.9): The value used for the moving_mean and
+            moving_var computation. This should be a float number or a Tensor with
+            shape [1] and data type as float32. The updated formula is:
+            :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
+            :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
+            Default is 0.9.
+        epsilon(float, Default 1e-05): A value added to the denominator for
+            numerical stability. Default is 1e-5.
+        param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
+             of batch_norm. If it is set to None or one attribute of ParamAttr, batch_norm
+         will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
+         If the Initializer of the param_attr is not set, the parameter is initialized
+         with Xavier. Default: None.
+        bias_attr(ParamAttr|None): The parameter attribute for the bias of batch_norm.
+             If it is set to None or one attribute of ParamAttr, batch_norm
+         will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
+         If the Initializer of the bias_attr is not set, the bias is initialized zero.
+         Default: None.
+        data_layout (str, optional): Specify the data format of the input, and the data format of the output
+             will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
+             The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
+             `[batch_size, input_channels, input_height, input_width]`.
+        in_place(bool, Default False): Make the input and output of batch norm reuse memory.
+        name(str|None): For detailed information, please refer to :ref:`api_guide_Name`.
+            Usually name is no need to set and None by default.
+        moving_mean_name(str, Default None): The name of moving_mean which store the global Mean. If it
+            is set to None, batch_norm will save global mean with a random name, otherwise, batch_norm
+            will save global mean with the string.
+        moving_variance_name(str, Default None): The name of the moving_variance which store the global Variance.
+            If it is set to None, batch_norm will save global variance with a random name, otherwise, batch_norm
+            will save global variance with the string.
+        do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance should do model
+            average when model average is enabled.
+        use_global_stats(bool, Default False): Whether to use global mean and
+            variance. In inference or test mode, set use_global_stats to true
+            or is_test to true, and the behavior is equivalent.
+            In train mode, when setting use_global_stats True, the global mean
+            and variance are also used during train period.
+    Returns:
+        A Tensor which is the result after applying batch normalization on the input,
+        has same shape and data type with input.
+    Examples:
+        .. code-block:: python
+            import paddle
+            paddle.enable_static()
+            x = paddle.static.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
+            hidden1 = paddle.static.nn.fc(x=x, size=200)
+            print(hidden1.shape)
+            # [3, 200]
+            hidden2 = paddle.static.nn.batch_norm(input=hidden1)
+            print(hidden2.shape)
+            # [3, 200]
+    """
+    assert (
+        bias_attr is not False
+    ), "bias_attr should not be False in batch_norm."
+    helper = LayerHelper('batch_norm', **locals())
+    check_variable_and_dtype(
+        input, 'input', ['float16', 'float32', 'float64'], 'batch_norm'
+    )
+    dtype = helper.input_dtype()
+    # use fp32 for bn parameter
+    if dtype == core.VarDesc.VarType.FP16:
+        dtype = core.VarDesc.VarType.FP32
+    input_shape = input.shape
+    if data_layout == 'NCHW':
+        channel_num = input_shape[1]
+    else:
+        if data_layout == 'NHWC':
+            channel_num = input_shape[-1]
+        else:
+            raise ValueError("unsupported data layout:" + data_layout)
+    param_shape = [channel_num]
+    # create parameter
+    scale = helper.create_parameter(
+        attr=helper.param_attr,
+        shape=param_shape,
+        dtype=dtype,
+        default_initializer=paddle.fluid.initializer.Constant(1.0),
+    )
+    bias = helper.create_parameter(
+        attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
+    )
+    mean = helper.create_parameter(
+        attr=paddle.ParamAttr(
+            name=moving_mean_name,
+            initializer=paddle.fluid.initializer.Constant(0.0),
+            trainable=False,
+            do_model_average=do_model_average_for_mean_and_var,
+        ),
+        shape=param_shape,
+        dtype=dtype,
+    )
+    mean.stop_gradient = True
+    variance = helper.create_parameter(
+        attr=paddle.ParamAttr(
+            name=moving_variance_name,
+            initializer=paddle.fluid.initializer.Constant(1.0),
+            trainable=False,
+            do_model_average=do_model_average_for_mean_and_var,
+        ),
+        shape=param_shape,
+        dtype=dtype,
+    )
+    variance.stop_gradient = True
+    # create output
+    # mean and mean_out share the same memory
+    mean_out = mean
+    # variance and variance_out share the same memory
+    variance_out = variance
+    if _non_static_mode():
+        inputs_has_MomemtumTensor = False
+        attrs_has_momentum = False
+        tmp_tensor_type = core.eager.Tensor
+        if isinstance(momentum, tmp_tensor_type):
+            inputs_has_MomemtumTensor = True
+        else:
+            attrs_has_momentum = True
+        attrs_ = ()
+        if attrs_has_momentum:
+            attrs_ = (
+                'momentum',
+                momentum,
+                'epsilon',
+                epsilon,
+                'is_test',
+                is_test,
+                'data_layout',
+                data_layout,
+                'use_mkldnn',
+                False,
+                'fuse_with_relu',
+                False,
+                'use_global_stats',
+                use_global_stats,
+            )
+        else:
+            attrs_ = (
+                'epsilon',
+                epsilon,
+                'is_test',
+                is_test,
+                'data_layout',
+                data_layout,
+                'use_mkldnn',
+                False,
+                'fuse_with_relu',
+                False,
+                'use_global_stats',
+                use_global_stats,
+            )
+        if inputs_has_MomemtumTensor:
+            batch_norm_out, _, _, _, _, _ = paddle._legacy_C_ops.batch_norm(
+                input,
+                scale,
+                bias,
+                mean,
+                variance,
+                momentum,
+                mean_out,
+                variance_out,
+                *attrs_,
+            )
+        else:
+            batch_norm_out, _, _, _, _, _ = paddle._legacy_C_ops.batch_norm(
+                input,
+                scale,
+                bias,
+                mean,
+                variance,
+                None,
+                mean_out,
+                variance_out,
+                *attrs_,
+            )
+        return paddle.fluid.dygraph_utils._append_activation_in_dygraph(
+            batch_norm_out, act=act, use_mkldnn=False
+        )
+    saved_mean = helper.create_variable_for_type_inference(
+        dtype=dtype, stop_gradient=True
+    )
+    saved_variance = helper.create_variable_for_type_inference(
+        dtype=dtype, stop_gradient=True
+    )
+    reserve_space = None
+    if not is_test:
+        reserve_space = helper.create_variable_for_type_inference(
+            dtype=helper.input_dtype(), stop_gradient=True
+        )
+    batch_norm_out = (
+        input if in_place else helper.create_variable_for_type_inference(dtype)
+    )
+    inputs = {
+        "X": input,
+        "Scale": scale,
+        "Bias": bias,
+        "Mean": mean,
+        "Variance": variance,
+        "MeanOut": mean_out,
+        "VarianceOut": variance_out,
+    }
+    attrs = {
+        "epsilon": epsilon,
+        "is_test": is_test,
+        "data_layout": data_layout,
+        "use_mkldnn": False,
+        "fuse_with_relu": False,
+        "use_global_stats": use_global_stats,
+    }
+    if isinstance(momentum, paddle.static.Variable):
+        inputs['MomemtumTensor'] = momentum
+    else:
+        attrs['momentum'] = momentum
+    outputs = {
+        "Y": batch_norm_out,
+        "MeanOut": mean_out,
+        "VarianceOut": variance_out,
+        "SavedMean": saved_mean,
+        "SavedVariance": saved_variance,
+    }
+    if reserve_space is not None:
+        outputs["ReserveSpace"] = reserve_space
+    helper.append_op(
+        type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs
+    )
+    return helper.append_activation(batch_norm_out)
 @static_only
 def prelu(x, mode, param_attr=None, data_format="NCHW", name=None):
    r"""