remove inplace_abn (#48275)

6138331d · ccrrong · GitHub · de4310e6 · 6138331d · 6138331d
3 changed file
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -74,7 +74,6 @@ __all__ = [
    'pool2d',
    'pool3d',
    'batch_norm',
-    'inplace_abn',
    'instance_norm',
    'data_norm',
    'reduce_sum',
@@ -2797,293 +2796,6 @@ def batch_norm(
    return helper.append_activation(batch_norm_out)
-def inplace_abn(
-    input,
-    act=None,
-    is_test=False,
-    momentum=0.9,
-    epsilon=1e-05,
-    param_attr=None,
-    bias_attr=None,
-    data_layout='NCHW',
-    name=None,
-    moving_mean_name=None,
-    moving_variance_name=None,
-    do_model_average_for_mean_and_var=True,
-    use_global_stats=False,
-    act_alpha=1.0,
-):
-    r"""
-    **In-place Activation Batch Normalization Layer**
-    This layer calculates batch normalization and activation with in-place memory.
-    For batch normalization calculations, see `fluid.layers.batch_norm`.
-    For in-place activation batch normalization, see `In-Place Activated BatchNorm for
-    Memory-Optimized Training of DNNs <https://arxiv.org/abs/1712.02616>`_
-    `inplace_abn` only support activation type as `None`, `identity`, `leaky_relu`,
-    `elu` currently.
-    `inplace_abn` only support data type as `float32`, `float64` currently.
-    Note:
-        if build_strategy.sync_batch_norm=True, the batch_norm in network will use
-        sync_batch_norm automatically.
-        `is_test = True` can only be used in test program and inference program, `is_test` CANNOT be set to True in train program, if you want to use global status from pre_train model in train program, please set `use_global_stats = True`.
-    Args:
-        input(Variable): The rank of input variable can be 2, 3, 4, 5. The data type
-            is float16 or float32 or float64.
-        act(string, Default None): Activation type, linear|relu|prelu|...
-        is_test (bool, Default False): A flag indicating whether it is in
-            test phrase or not.
-        momentum(float|Variable, Default 0.9): The value used for the moving_mean and
-            moving_var computation. This should be a float number or a Variable with
-            shape [1] and data type as float32. The updated formula is:
-            :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
-            :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
-            Default is 0.9.
-        epsilon(float, Default 1e-05): A value added to the denominator for
-            numerical stability. Default is 1e-5.
-        param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
-             of inplace_abn. If it is set to None or one attribute of ParamAttr, inplace_abn
-             will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
-             If the Initializer of the param_attr is not set, the parameter is initialized
-             with Xavier. Default: None.
-        bias_attr(ParamAttr|None): The parameter attribute for the bias of inplace_abn.
-             If it is set to None or one attribute of ParamAttr, inplace_abn
-             will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
-             If the Initializer of the bias_attr is not set, the bias is initialized zero.
-             Default: None.
-        data_layout (str, optional): Specify the data format of the input, and the data format of the output
-             will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
-             The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
-             `[batch_size, input_channels, input_height, input_width]`.
-        name(str|None): For detailed information, please refer to :ref:`api_guide_Name`.
-            Usually name is no need to set and None by default.
-        moving_mean_name(str, Default None): The name of moving_mean which store the global Mean. If it
-            is set to None, inplace_abn will save global mean with a random name, otherwise, inplace_abn
-            will save global mean with the string.
-        moving_variance_name(str, Default None): The name of the moving_variance which store the global Variance.
-            If it is set to None, inplace_abn, will save global variance with a random name, otherwise, inplace_abn
-            will save global variance with the string.
-        do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance should do model
-            average when model average is enabled.
-        use_global_stats(bool, Default False): Whether to use global mean and
-            variance. In inference or test mode, set use_global_stats to true
-            or is_test to true, and the behavior is equivalent.
-            In train mode, when setting use_global_stats True, the global mean
-            and variance are also used during train period.
-        act_alpha(float, Default 1.0): when activation is in ['elu', 'identity', 'leaky_relu'],
-            inplace activative batch normalization will be used, and alpha parameter for activation
-            can be given by this parameter.
-    Returns:
-        A Variable holding Tensor which is the result after applying batch normalization and activation on the input,
-        has same shape and data type with input.
-    Examples:
-        .. code-block:: python
-            import paddle.fluid as fluid
-            x = fluid.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
-            hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
-            hidden2 = fluid.layers.inplace_abn(input=hidden1)
-            hidden3 = fluid.layers.inplace_abn(input=hidden2, act='leaky_relu', act_alpha=0.2)
-    """
-    assert act in [None, 'identity', 'leaky_relu', 'elu'], (
-        "inplace_abn only support act as None, 'identity', "
-        "'leaky_relu', 'elu' currently"
-    )
-    assert (
-        bias_attr is not False
-    ), "bias_attr should not be False in inplace_abn."
-    helper = LayerHelper('inplace_abn', **locals())
-    check_variable_and_dtype(
-        input, 'input', ['float32', 'float64'], 'inplace_abn'
-    )
-    dtype = helper.input_dtype()
-    input_shape = input.shape
-    if data_layout == 'NCHW':
-        channel_num = input_shape[1]
-    else:
-        if data_layout == 'NHWC':
-            channel_num = input_shape[-1]
-        else:
-            raise ValueError("unsupported data layout:" + data_layout)
-    param_shape = [channel_num]
-    # create parameter
-    scale = helper.create_parameter(
-        attr=helper.param_attr,
-        shape=param_shape,
-        dtype=dtype,
-        default_initializer=Constant(1.0),
-    )
-    bias = helper.create_parameter(
-        attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
-    )
-    mean = helper.create_parameter(
-        attr=ParamAttr(
-            name=moving_mean_name,
-            initializer=Constant(0.0),
-            trainable=False,
-            do_model_average=do_model_average_for_mean_and_var,
-        ),
-        shape=param_shape,
-        dtype=dtype,
-    )
-    mean.stop_gradient = True
-    variance = helper.create_parameter(
-        attr=ParamAttr(
-            name=moving_variance_name,
-            initializer=Constant(1.0),
-            trainable=False,
-            do_model_average=do_model_average_for_mean_and_var,
-        ),
-        shape=param_shape,
-        dtype=dtype,
-    )
-    variance.stop_gradient = True
-    # create output
-    # mean and mean_out share the same memory
-    mean_out = mean
-    # variance and variance out share the same memory
-    variance_out = variance
-    # batch_norm_out and input share the same memory
-    batch_norm_out = input
-    if in_dygraph_mode():
-        inputs_has_MomemtumTensor = False
-        attrs_has_momentum = False
-        tmp_tensor_type = core.eager.Tensor
-        if isinstance(momentum, tmp_tensor_type):
-            inputs_has_MomemtumTensor = True
-        else:
-            attrs_has_momentum = True
-        attrs__ = ()
-        if attrs_has_momentum:
-            attrs__ = (
-                'momentum',
-                momentum,
-                'epsilon',
-                epsilon,
-                'is_test',
-                is_test,
-                'data_layout',
-                data_layout,
-                'use_mkldnn',
-                False,
-                'fuse_with_relu',
-                False,
-                'use_global_stats',
-                use_global_stats,
-                'activation',
-                act,
-                'alpha',
-                act_alpha,
-            )
-        else:
-            attrs__ = (
-                'epsilon',
-                epsilon,
-                'is_test',
-                is_test,
-                'data_layout',
-                data_layout,
-                'use_mkldnn',
-                False,
-                'fuse_with_relu',
-                False,
-                'use_global_stats',
-                use_global_stats,
-                'activation',
-                act,
-                'alpha',
-                act_alpha,
-            )
-        if inputs_has_MomemtumTensor:
-            batch_norm_out, _, _, _, _, _ = _legacy_C_ops.inplace_abn_(
-                input,
-                scale,
-                bias,
-                mean,
-                variance,
-                momentum,
-                mean_out,
-                variance_out,
-                *attrs__,
-            )
-            return batch_norm_out
-        else:
-            batch_norm_out, _, _, _, _, _ = _legacy_C_ops.inplace_abn_(
-                input,
-                scale,
-                bias,
-                mean,
-                variance,
-                None,
-                mean_out,
-                variance_out,
-                *attrs__,
-            )
-            return batch_norm_out
-    saved_mean = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-    saved_variance = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-    reserve_space = helper.create_variable_for_type_inference(
-        dtype=dtype, stop_gradient=True
-    )
-    inputs = {
-        "X": input,
-        "Scale": scale,
-        "Bias": bias,
-        "Mean": mean,
-        "Variance": variance,
-    }
-    attrs = {
-        "epsilon": epsilon,
-        "is_test": is_test,
-        "data_layout": data_layout,
-        "use_mkldnn": False,
-        "fuse_with_relu": False,
-        "use_global_stats": use_global_stats,
-        "activation": act,
-        "alpha": act_alpha,
-    }
-    if isinstance(momentum, Variable):
-        inputs['MomemtumTensor'] = momentum
-    else:
-        attrs['momentum'] = momentum
-    outputs = {
-        "Y": batch_norm_out,
-        "MeanOut": mean_out,
-        "VarianceOut": variance_out,
-        "SavedMean": saved_mean,
-        "SavedVariance": saved_variance,
-    }
-    if reserve_space is not None:
-        outputs["ReserveSpace"] = reserve_space
-    helper.append_op(
-        type="inplace_abn", inputs=inputs, outputs=outputs, attrs=attrs
-    )
-    return batch_norm_out
 def instance_norm(
    input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
 ):

--- a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py
@@ -17,7 +17,6 @@ import numpy as np
 import os
 import paddle.fluid.core as core
 import paddle.fluid as fluid
-from paddle.fluid import compiler
 import paddle
@@ -54,33 +53,21 @@ class TestInplaceANBOpTraining(unittest.TestCase):
                    append_batch_size=False,
                    stop_gradient=False,
                )
-                if inplace:
-                    bn = fluid.layers.inplace_abn(
+                bn = fluid.layers.batch_norm(
-                        data,
+                    data,
-                        act=activation,
+                    param_attr=fluid.ParamAttr(name='bn_scale'),
-                        param_attr=fluid.ParamAttr(name='bn_scale'),
+                    bias_attr=fluid.ParamAttr(name='bn_bias'),
-                        bias_attr=fluid.ParamAttr(name='bn_bias'),
+                    moving_mean_name='bn_moving_mean',
-                        moving_mean_name='bn_moving_mean',
+                    moving_variance_name='bn_moving_variance',
-                        moving_variance_name='bn_moving_variance',
+                    data_layout=layout,
-                        data_layout=layout,
+                    is_test=only_forward,
-                        is_test=only_forward,
+                    in_place=inplace,
-                        act_alpha=alpha,
+                )
-                    )
+                if activation == 'leaky_relu':
-                else:
+                    bn = paddle.nn.functional.leaky_relu(bn, alpha)
-                    bn = fluid.layers.batch_norm(
+                if activation == 'elu':
-                        data,
+                    bn = paddle.nn.functional.elu(bn, alpha)
-                        param_attr=fluid.ParamAttr(name='bn_scale'),
-                        bias_attr=fluid.ParamAttr(name='bn_bias'),
-                        moving_mean_name='bn_moving_mean',
-                        moving_variance_name='bn_moving_variance',
-                        data_layout=layout,
-                        is_test=only_forward,
-                        in_place=inplace,
-                    )
-                    if activation == 'leaky_relu':
-                        bn = paddle.nn.functional.leaky_relu(bn, alpha)
-                    if activation == 'elu':
-                        bn = paddle.nn.functional.elu(bn, alpha)
                # NOTE: in inplace mode input and output of bn
                # may have same name, multiply 1. to generate
@@ -94,105 +81,6 @@ class TestInplaceANBOpTraining(unittest.TestCase):
                    sgd_opt.backward(out)
        return main, startup, [out, bn]
-    def compare(self, place, layout, only_forward, activation, alpha, use_cuda):
-        seed = 10
-        os.environ['FLAGS_cudnn_deterministic'] = "1"
-        data = np.random.random(size=self.dshape).astype(self.dtype) * 4.0 - 2
-        fetch_outs = []
-        fetch_names = []
-        for inplace in [False, True]:
-            main, startup, outs = self.build_program(
-                place,
-                layout,
-                seed,
-                only_forward,
-                activation,
-                alpha,
-                inplace=inplace,
-            )
-            exe = fluid.Executor(place)
-            exe.run(startup)
-            fetch_name = [v.name for v in outs] + [
-                'bn_moving_mean',
-                'bn_moving_variance',
-                'bn_scale',
-                'bn_bias',
-            ]
-            if not only_forward:
-                others = [
-                    'inplace_abn_0.tmp_0' if inplace else 'batch_norm_0.tmp_0',
-                    'inplace_abn_0.tmp_1' if inplace else 'batch_norm_0.tmp_1',
-                    'bn_scale@GRAD',
-                    'bn_bias@GRAD',
-                    'input@GRAD',
-                ]
-                fetch_name += others
-            for nm in fetch_name:
-                fv = fluid.framework._get_var(str(nm), program=main)
-                fv.persistable = True
-            build_strategy = fluid.BuildStrategy()
-            build_strategy.sync_batch_norm = (
-                use_cuda and fluid.core.get_cuda_device_count() > 1
-            )
-            build_strategy.enable_inplace = inplace
-            exec_strategy = fluid.ExecutionStrategy()
-            exec_strategy.num_threads = 1 if os.name == 'nt' else 0
-            comp_prog1 = compiler.CompiledProgram(main).with_data_parallel(
-                outs[0].name if not only_forward else None,
-                build_strategy=build_strategy,
-                exec_strategy=exec_strategy,
-            )
-            bn_fetches = exe.run(
-                program=main, feed={'input': data}, fetch_list=fetch_name
-            )
-            fetch_outs.append(bn_fetches)
-            fetch_names.append(fetch_name)
-        for bn_val, inplace_abn_val, name1, name2 in zip(
-            *(fetch_outs + fetch_names)
-        ):
-            np.testing.assert_allclose(
-                bn_val,
-                inplace_abn_val,
-                rtol=1e-05,
-                atol=0.01,
-                err_msg='Output ('
-                + name1
-                + ':'
-                + name2
-                + ') has diff on {} with {} layout and {} activation. \n'.format(
-                    place, layout, activation
-                )
-                + '\nBN     '
-                + str(bn_val)
-                + '\n'
-                + 'Inplace ABN '
-                + str(inplace_abn_val),
-            )
-    def test_op(self):
-        use_cudas = [False, True] if core.is_compiled_with_cuda() else [False]
-        # use_cudas = [False]
-        for use_cuda in use_cudas:
-            place = core.CUDAPlace(0) if use_cuda else core.CPUPlace()
-            layouts = ["NCHW", "NHWC"]
-            for layout in layouts:
-                for activation, alpha in zip(
-                    [None, 'elu', 'leaky_relu'], [0.0, 1.0, 0.02]
-                ):
-                    for infer_only in [True, False]:
-                        self.compare(
-                            place,
-                            layout,
-                            infer_only,
-                            activation,
-                            alpha,
-                            use_cuda,
-                        )
    def test_all_branches(self):
        seed = 10
        os.environ['FLAGS_cudnn_deterministic'] = "1"
@@ -212,7 +100,7 @@ class TestInplaceANBOpTraining(unittest.TestCase):
                        activation,
                        alpha,
                        use_cuda,
-                        True,
+                        False,
                    )
                    exe = fluid.Executor(place)
                    exe.run(startup)

--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -3686,34 +3686,6 @@ class TestBook(LayerTest):
            out = layers.batch_norm(data, momentum=momentum)
            return out
-    def make_inplace_abn(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            data = self._get_data(
-                name='data', shape=[32, 128, 128], dtype="float32"
-            )
-            out = layers.inplace_abn(data, act='leaky_relu', act_alpha=0.2)
-            return out
-    def make_inplace_abn_momentum_variable(self):
-        with program_guard(
-            fluid.default_main_program(), fluid.default_startup_program()
-        ):
-            data = self._get_data(
-                name='data', shape=[32, 128, 128], dtype="float32"
-            )
-            momentum = self._get_data(
-                name='momentum',
-                shape=[1],
-                dtype='float32',
-                append_batch_size=False,
-            )
-            out = layers.inplace_abn(
-                data, momentum=momentum, act='elu', act_alpha=2.0
-            )
-            return out
    def make_range(self):
        with program_guard(
            fluid.default_main_program(), fluid.default_startup_program()