From 6138331db6ba898bc0f28af427f6beb421c2d21a Mon Sep 17 00:00:00 2001 From: ccrrong <101700995+ccrrong@users.noreply.github.com> Date: Thu, 24 Nov 2022 17:15:52 +0800 Subject: [PATCH] remove inplace_abn (#48275) --- python/paddle/fluid/layers/nn.py | 288 ------------------ .../tests/unittests/test_inplace_abn_op.py | 144 +-------- .../fluid/tests/unittests/test_layers.py | 28 -- 3 files changed, 16 insertions(+), 444 deletions(-) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 49d6906d6a..0f542bfab1 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -74,7 +74,6 @@ __all__ = [ 'pool2d', 'pool3d', 'batch_norm', - 'inplace_abn', 'instance_norm', 'data_norm', 'reduce_sum', @@ -2797,293 +2796,6 @@ def batch_norm( return helper.append_activation(batch_norm_out) -def inplace_abn( - input, - act=None, - is_test=False, - momentum=0.9, - epsilon=1e-05, - param_attr=None, - bias_attr=None, - data_layout='NCHW', - name=None, - moving_mean_name=None, - moving_variance_name=None, - do_model_average_for_mean_and_var=True, - use_global_stats=False, - act_alpha=1.0, -): - r""" - **In-place Activation Batch Normalization Layer** - - This layer calculates batch normalization and activation with in-place memory. - For batch normalization calculations, see `fluid.layers.batch_norm`. - For in-place activation batch normalization, see `In-Place Activated BatchNorm for - Memory-Optimized Training of DNNs `_ - - `inplace_abn` only support activation type as `None`, `identity`, `leaky_relu`, - `elu` currently. - `inplace_abn` only support data type as `float32`, `float64` currently. - - Note: - if build_strategy.sync_batch_norm=True, the batch_norm in network will use - sync_batch_norm automatically. - `is_test = True` can only be used in test program and inference program, `is_test` CANNOT be set to True in train program, if you want to use global status from pre_train model in train program, please set `use_global_stats = True`. - - Args: - input(Variable): The rank of input variable can be 2, 3, 4, 5. The data type - is float16 or float32 or float64. - act(string, Default None): Activation type, linear|relu|prelu|... - is_test (bool, Default False): A flag indicating whether it is in - test phrase or not. - momentum(float|Variable, Default 0.9): The value used for the moving_mean and - moving_var computation. This should be a float number or a Variable with - shape [1] and data type as float32. The updated formula is: - :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` - :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` - Default is 0.9. - epsilon(float, Default 1e-05): A value added to the denominator for - numerical stability. Default is 1e-5. - param_attr(ParamAttr|None): The parameter attribute for Parameter `scale` - of inplace_abn. If it is set to None or one attribute of ParamAttr, inplace_abn - will create ParamAttr as param_attr, the name of scale can be set in ParamAttr. - If the Initializer of the param_attr is not set, the parameter is initialized - with Xavier. Default: None. - bias_attr(ParamAttr|None): The parameter attribute for the bias of inplace_abn. - If it is set to None or one attribute of ParamAttr, inplace_abn - will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr. - If the Initializer of the bias_attr is not set, the bias is initialized zero. - Default: None. - data_layout (str, optional): Specify the data format of the input, and the data format of the output - will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. - The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: - `[batch_size, input_channels, input_height, input_width]`. - name(str|None): For detailed information, please refer to :ref:`api_guide_Name`. - Usually name is no need to set and None by default. - moving_mean_name(str, Default None): The name of moving_mean which store the global Mean. If it - is set to None, inplace_abn will save global mean with a random name, otherwise, inplace_abn - will save global mean with the string. - moving_variance_name(str, Default None): The name of the moving_variance which store the global Variance. - If it is set to None, inplace_abn, will save global variance with a random name, otherwise, inplace_abn - will save global variance with the string. - do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance should do model - average when model average is enabled. - use_global_stats(bool, Default False): Whether to use global mean and - variance. In inference or test mode, set use_global_stats to true - or is_test to true, and the behavior is equivalent. - In train mode, when setting use_global_stats True, the global mean - and variance are also used during train period. - act_alpha(float, Default 1.0): when activation is in ['elu', 'identity', 'leaky_relu'], - inplace activative batch normalization will be used, and alpha parameter for activation - can be given by this parameter. - Returns: - A Variable holding Tensor which is the result after applying batch normalization and activation on the input, - has same shape and data type with input. - - Examples: - - .. code-block:: python - - import paddle.fluid as fluid - x = fluid.data(name='x', shape=[3, 7, 3, 7], dtype='float32') - hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w') - hidden2 = fluid.layers.inplace_abn(input=hidden1) - hidden3 = fluid.layers.inplace_abn(input=hidden2, act='leaky_relu', act_alpha=0.2) - - """ - assert act in [None, 'identity', 'leaky_relu', 'elu'], ( - "inplace_abn only support act as None, 'identity', " - "'leaky_relu', 'elu' currently" - ) - assert ( - bias_attr is not False - ), "bias_attr should not be False in inplace_abn." - helper = LayerHelper('inplace_abn', **locals()) - - check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'inplace_abn' - ) - dtype = helper.input_dtype() - - input_shape = input.shape - if data_layout == 'NCHW': - channel_num = input_shape[1] - else: - if data_layout == 'NHWC': - channel_num = input_shape[-1] - else: - raise ValueError("unsupported data layout:" + data_layout) - - param_shape = [channel_num] - - # create parameter - scale = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=dtype, - default_initializer=Constant(1.0), - ) - bias = helper.create_parameter( - attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True - ) - - mean = helper.create_parameter( - attr=ParamAttr( - name=moving_mean_name, - initializer=Constant(0.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var, - ), - shape=param_shape, - dtype=dtype, - ) - mean.stop_gradient = True - - variance = helper.create_parameter( - attr=ParamAttr( - name=moving_variance_name, - initializer=Constant(1.0), - trainable=False, - do_model_average=do_model_average_for_mean_and_var, - ), - shape=param_shape, - dtype=dtype, - ) - variance.stop_gradient = True - - # create output - # mean and mean_out share the same memory - mean_out = mean - # variance and variance out share the same memory - variance_out = variance - # batch_norm_out and input share the same memory - batch_norm_out = input - - if in_dygraph_mode(): - inputs_has_MomemtumTensor = False - attrs_has_momentum = False - tmp_tensor_type = core.eager.Tensor - if isinstance(momentum, tmp_tensor_type): - inputs_has_MomemtumTensor = True - else: - attrs_has_momentum = True - - attrs__ = () - if attrs_has_momentum: - attrs__ = ( - 'momentum', - momentum, - 'epsilon', - epsilon, - 'is_test', - is_test, - 'data_layout', - data_layout, - 'use_mkldnn', - False, - 'fuse_with_relu', - False, - 'use_global_stats', - use_global_stats, - 'activation', - act, - 'alpha', - act_alpha, - ) - else: - attrs__ = ( - 'epsilon', - epsilon, - 'is_test', - is_test, - 'data_layout', - data_layout, - 'use_mkldnn', - False, - 'fuse_with_relu', - False, - 'use_global_stats', - use_global_stats, - 'activation', - act, - 'alpha', - act_alpha, - ) - if inputs_has_MomemtumTensor: - batch_norm_out, _, _, _, _, _ = _legacy_C_ops.inplace_abn_( - input, - scale, - bias, - mean, - variance, - momentum, - mean_out, - variance_out, - *attrs__, - ) - return batch_norm_out - else: - batch_norm_out, _, _, _, _, _ = _legacy_C_ops.inplace_abn_( - input, - scale, - bias, - mean, - variance, - None, - mean_out, - variance_out, - *attrs__, - ) - return batch_norm_out - - saved_mean = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True - ) - saved_variance = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True - ) - reserve_space = helper.create_variable_for_type_inference( - dtype=dtype, stop_gradient=True - ) - - inputs = { - "X": input, - "Scale": scale, - "Bias": bias, - "Mean": mean, - "Variance": variance, - } - attrs = { - "epsilon": epsilon, - "is_test": is_test, - "data_layout": data_layout, - "use_mkldnn": False, - "fuse_with_relu": False, - "use_global_stats": use_global_stats, - "activation": act, - "alpha": act_alpha, - } - if isinstance(momentum, Variable): - inputs['MomemtumTensor'] = momentum - else: - attrs['momentum'] = momentum - outputs = { - "Y": batch_norm_out, - "MeanOut": mean_out, - "VarianceOut": variance_out, - "SavedMean": saved_mean, - "SavedVariance": saved_variance, - } - if reserve_space is not None: - outputs["ReserveSpace"] = reserve_space - - helper.append_op( - type="inplace_abn", inputs=inputs, outputs=outputs, attrs=attrs - ) - - return batch_norm_out - - def instance_norm( input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None ): diff --git a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py index 56f3c13f4f..f29dbc7086 100644 --- a/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py +++ b/python/paddle/fluid/tests/unittests/test_inplace_abn_op.py @@ -17,7 +17,6 @@ import numpy as np import os import paddle.fluid.core as core import paddle.fluid as fluid -from paddle.fluid import compiler import paddle @@ -54,33 +53,21 @@ class TestInplaceANBOpTraining(unittest.TestCase): append_batch_size=False, stop_gradient=False, ) - if inplace: - bn = fluid.layers.inplace_abn( - data, - act=activation, - param_attr=fluid.ParamAttr(name='bn_scale'), - bias_attr=fluid.ParamAttr(name='bn_bias'), - moving_mean_name='bn_moving_mean', - moving_variance_name='bn_moving_variance', - data_layout=layout, - is_test=only_forward, - act_alpha=alpha, - ) - else: - bn = fluid.layers.batch_norm( - data, - param_attr=fluid.ParamAttr(name='bn_scale'), - bias_attr=fluid.ParamAttr(name='bn_bias'), - moving_mean_name='bn_moving_mean', - moving_variance_name='bn_moving_variance', - data_layout=layout, - is_test=only_forward, - in_place=inplace, - ) - if activation == 'leaky_relu': - bn = paddle.nn.functional.leaky_relu(bn, alpha) - if activation == 'elu': - bn = paddle.nn.functional.elu(bn, alpha) + + bn = fluid.layers.batch_norm( + data, + param_attr=fluid.ParamAttr(name='bn_scale'), + bias_attr=fluid.ParamAttr(name='bn_bias'), + moving_mean_name='bn_moving_mean', + moving_variance_name='bn_moving_variance', + data_layout=layout, + is_test=only_forward, + in_place=inplace, + ) + if activation == 'leaky_relu': + bn = paddle.nn.functional.leaky_relu(bn, alpha) + if activation == 'elu': + bn = paddle.nn.functional.elu(bn, alpha) # NOTE: in inplace mode input and output of bn # may have same name, multiply 1. to generate @@ -94,105 +81,6 @@ class TestInplaceANBOpTraining(unittest.TestCase): sgd_opt.backward(out) return main, startup, [out, bn] - def compare(self, place, layout, only_forward, activation, alpha, use_cuda): - seed = 10 - os.environ['FLAGS_cudnn_deterministic'] = "1" - data = np.random.random(size=self.dshape).astype(self.dtype) * 4.0 - 2 - - fetch_outs = [] - fetch_names = [] - for inplace in [False, True]: - main, startup, outs = self.build_program( - place, - layout, - seed, - only_forward, - activation, - alpha, - inplace=inplace, - ) - exe = fluid.Executor(place) - exe.run(startup) - - fetch_name = [v.name for v in outs] + [ - 'bn_moving_mean', - 'bn_moving_variance', - 'bn_scale', - 'bn_bias', - ] - if not only_forward: - others = [ - 'inplace_abn_0.tmp_0' if inplace else 'batch_norm_0.tmp_0', - 'inplace_abn_0.tmp_1' if inplace else 'batch_norm_0.tmp_1', - 'bn_scale@GRAD', - 'bn_bias@GRAD', - 'input@GRAD', - ] - fetch_name += others - for nm in fetch_name: - fv = fluid.framework._get_var(str(nm), program=main) - fv.persistable = True - - build_strategy = fluid.BuildStrategy() - build_strategy.sync_batch_norm = ( - use_cuda and fluid.core.get_cuda_device_count() > 1 - ) - build_strategy.enable_inplace = inplace - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.num_threads = 1 if os.name == 'nt' else 0 - comp_prog1 = compiler.CompiledProgram(main).with_data_parallel( - outs[0].name if not only_forward else None, - build_strategy=build_strategy, - exec_strategy=exec_strategy, - ) - bn_fetches = exe.run( - program=main, feed={'input': data}, fetch_list=fetch_name - ) - fetch_outs.append(bn_fetches) - fetch_names.append(fetch_name) - - for bn_val, inplace_abn_val, name1, name2 in zip( - *(fetch_outs + fetch_names) - ): - np.testing.assert_allclose( - bn_val, - inplace_abn_val, - rtol=1e-05, - atol=0.01, - err_msg='Output (' - + name1 - + ':' - + name2 - + ') has diff on {} with {} layout and {} activation. \n'.format( - place, layout, activation - ) - + '\nBN ' - + str(bn_val) - + '\n' - + 'Inplace ABN ' - + str(inplace_abn_val), - ) - - def test_op(self): - use_cudas = [False, True] if core.is_compiled_with_cuda() else [False] - # use_cudas = [False] - for use_cuda in use_cudas: - place = core.CUDAPlace(0) if use_cuda else core.CPUPlace() - layouts = ["NCHW", "NHWC"] - for layout in layouts: - for activation, alpha in zip( - [None, 'elu', 'leaky_relu'], [0.0, 1.0, 0.02] - ): - for infer_only in [True, False]: - self.compare( - place, - layout, - infer_only, - activation, - alpha, - use_cuda, - ) - def test_all_branches(self): seed = 10 os.environ['FLAGS_cudnn_deterministic'] = "1" @@ -212,7 +100,7 @@ class TestInplaceANBOpTraining(unittest.TestCase): activation, alpha, use_cuda, - True, + False, ) exe = fluid.Executor(place) exe.run(startup) diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index ed230ea98b..937c027222 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -3686,34 +3686,6 @@ class TestBook(LayerTest): out = layers.batch_norm(data, momentum=momentum) return out - def make_inplace_abn(self): - with program_guard( - fluid.default_main_program(), fluid.default_startup_program() - ): - data = self._get_data( - name='data', shape=[32, 128, 128], dtype="float32" - ) - out = layers.inplace_abn(data, act='leaky_relu', act_alpha=0.2) - return out - - def make_inplace_abn_momentum_variable(self): - with program_guard( - fluid.default_main_program(), fluid.default_startup_program() - ): - data = self._get_data( - name='data', shape=[32, 128, 128], dtype="float32" - ) - momentum = self._get_data( - name='momentum', - shape=[1], - dtype='float32', - append_batch_size=False, - ) - out = layers.inplace_abn( - data, momentum=momentum, act='elu', act_alpha=2.0 - ) - return out - def make_range(self): with program_guard( fluid.default_main_program(), fluid.default_startup_program() -- GitLab