未验证 提交 6138331d 编写于 作者: C ccrrong 提交者: GitHub

remove inplace_abn (#48275)

上级 de4310e6
......@@ -74,7 +74,6 @@ __all__ = [
'pool2d',
'pool3d',
'batch_norm',
'inplace_abn',
'instance_norm',
'data_norm',
'reduce_sum',
......@@ -2797,293 +2796,6 @@ def batch_norm(
return helper.append_activation(batch_norm_out)
def inplace_abn(
input,
act=None,
is_test=False,
momentum=0.9,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
data_layout='NCHW',
name=None,
moving_mean_name=None,
moving_variance_name=None,
do_model_average_for_mean_and_var=True,
use_global_stats=False,
act_alpha=1.0,
):
r"""
**In-place Activation Batch Normalization Layer**
This layer calculates batch normalization and activation with in-place memory.
For batch normalization calculations, see `fluid.layers.batch_norm`.
For in-place activation batch normalization, see `In-Place Activated BatchNorm for
Memory-Optimized Training of DNNs <https://arxiv.org/abs/1712.02616>`_
`inplace_abn` only support activation type as `None`, `identity`, `leaky_relu`,
`elu` currently.
`inplace_abn` only support data type as `float32`, `float64` currently.
Note:
if build_strategy.sync_batch_norm=True, the batch_norm in network will use
sync_batch_norm automatically.
`is_test = True` can only be used in test program and inference program, `is_test` CANNOT be set to True in train program, if you want to use global status from pre_train model in train program, please set `use_global_stats = True`.
Args:
input(Variable): The rank of input variable can be 2, 3, 4, 5. The data type
is float16 or float32 or float64.
act(string, Default None): Activation type, linear|relu|prelu|...
is_test (bool, Default False): A flag indicating whether it is in
test phrase or not.
momentum(float|Variable, Default 0.9): The value used for the moving_mean and
moving_var computation. This should be a float number or a Variable with
shape [1] and data type as float32. The updated formula is:
:math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
:math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
Default is 0.9.
epsilon(float, Default 1e-05): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
of inplace_abn. If it is set to None or one attribute of ParamAttr, inplace_abn
will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
If the Initializer of the param_attr is not set, the parameter is initialized
with Xavier. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the bias of inplace_abn.
If it is set to None or one attribute of ParamAttr, inplace_abn
will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
If the Initializer of the bias_attr is not set, the bias is initialized zero.
Default: None.
data_layout (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str|None): For detailed information, please refer to :ref:`api_guide_Name`.
Usually name is no need to set and None by default.
moving_mean_name(str, Default None): The name of moving_mean which store the global Mean. If it
is set to None, inplace_abn will save global mean with a random name, otherwise, inplace_abn
will save global mean with the string.
moving_variance_name(str, Default None): The name of the moving_variance which store the global Variance.
If it is set to None, inplace_abn, will save global variance with a random name, otherwise, inplace_abn
will save global variance with the string.
do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance should do model
average when model average is enabled.
use_global_stats(bool, Default False): Whether to use global mean and
variance. In inference or test mode, set use_global_stats to true
or is_test to true, and the behavior is equivalent.
In train mode, when setting use_global_stats True, the global mean
and variance are also used during train period.
act_alpha(float, Default 1.0): when activation is in ['elu', 'identity', 'leaky_relu'],
inplace activative batch normalization will be used, and alpha parameter for activation
can be given by this parameter.
Returns:
A Variable holding Tensor which is the result after applying batch normalization and activation on the input,
has same shape and data type with input.
Examples:
.. code-block:: python
import paddle.fluid as fluid
x = fluid.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
hidden2 = fluid.layers.inplace_abn(input=hidden1)
hidden3 = fluid.layers.inplace_abn(input=hidden2, act='leaky_relu', act_alpha=0.2)
"""
assert act in [None, 'identity', 'leaky_relu', 'elu'], (
"inplace_abn only support act as None, 'identity', "
"'leaky_relu', 'elu' currently"
)
assert (
bias_attr is not False
), "bias_attr should not be False in inplace_abn."
helper = LayerHelper('inplace_abn', **locals())
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'inplace_abn'
)
dtype = helper.input_dtype()
input_shape = input.shape
if data_layout == 'NCHW':
channel_num = input_shape[1]
else:
if data_layout == 'NHWC':
channel_num = input_shape[-1]
else:
raise ValueError("unsupported data layout:" + data_layout)
param_shape = [channel_num]
# create parameter
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0),
)
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
)
mean = helper.create_parameter(
attr=ParamAttr(
name=moving_mean_name,
initializer=Constant(0.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var,
),
shape=param_shape,
dtype=dtype,
)
mean.stop_gradient = True
variance = helper.create_parameter(
attr=ParamAttr(
name=moving_variance_name,
initializer=Constant(1.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var,
),
shape=param_shape,
dtype=dtype,
)
variance.stop_gradient = True
# create output
# mean and mean_out share the same memory
mean_out = mean
# variance and variance out share the same memory
variance_out = variance
# batch_norm_out and input share the same memory
batch_norm_out = input
if in_dygraph_mode():
inputs_has_MomemtumTensor = False
attrs_has_momentum = False
tmp_tensor_type = core.eager.Tensor
if isinstance(momentum, tmp_tensor_type):
inputs_has_MomemtumTensor = True
else:
attrs_has_momentum = True
attrs__ = ()
if attrs_has_momentum:
attrs__ = (
'momentum',
momentum,
'epsilon',
epsilon,
'is_test',
is_test,
'data_layout',
data_layout,
'use_mkldnn',
False,
'fuse_with_relu',
False,
'use_global_stats',
use_global_stats,
'activation',
act,
'alpha',
act_alpha,
)
else:
attrs__ = (
'epsilon',
epsilon,
'is_test',
is_test,
'data_layout',
data_layout,
'use_mkldnn',
False,
'fuse_with_relu',
False,
'use_global_stats',
use_global_stats,
'activation',
act,
'alpha',
act_alpha,
)
if inputs_has_MomemtumTensor:
batch_norm_out, _, _, _, _, _ = _legacy_C_ops.inplace_abn_(
input,
scale,
bias,
mean,
variance,
momentum,
mean_out,
variance_out,
*attrs__,
)
return batch_norm_out
else:
batch_norm_out, _, _, _, _, _ = _legacy_C_ops.inplace_abn_(
input,
scale,
bias,
mean,
variance,
None,
mean_out,
variance_out,
*attrs__,
)
return batch_norm_out
saved_mean = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
saved_variance = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
reserve_space = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
inputs = {
"X": input,
"Scale": scale,
"Bias": bias,
"Mean": mean,
"Variance": variance,
}
attrs = {
"epsilon": epsilon,
"is_test": is_test,
"data_layout": data_layout,
"use_mkldnn": False,
"fuse_with_relu": False,
"use_global_stats": use_global_stats,
"activation": act,
"alpha": act_alpha,
}
if isinstance(momentum, Variable):
inputs['MomemtumTensor'] = momentum
else:
attrs['momentum'] = momentum
outputs = {
"Y": batch_norm_out,
"MeanOut": mean_out,
"VarianceOut": variance_out,
"SavedMean": saved_mean,
"SavedVariance": saved_variance,
}
if reserve_space is not None:
outputs["ReserveSpace"] = reserve_space
helper.append_op(
type="inplace_abn", inputs=inputs, outputs=outputs, attrs=attrs
)
return batch_norm_out
def instance_norm(
input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
):
......
......@@ -17,7 +17,6 @@ import numpy as np
import os
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid import compiler
import paddle
......@@ -54,33 +53,21 @@ class TestInplaceANBOpTraining(unittest.TestCase):
append_batch_size=False,
stop_gradient=False,
)
if inplace:
bn = fluid.layers.inplace_abn(
data,
act=activation,
param_attr=fluid.ParamAttr(name='bn_scale'),
bias_attr=fluid.ParamAttr(name='bn_bias'),
moving_mean_name='bn_moving_mean',
moving_variance_name='bn_moving_variance',
data_layout=layout,
is_test=only_forward,
act_alpha=alpha,
)
else:
bn = fluid.layers.batch_norm(
data,
param_attr=fluid.ParamAttr(name='bn_scale'),
bias_attr=fluid.ParamAttr(name='bn_bias'),
moving_mean_name='bn_moving_mean',
moving_variance_name='bn_moving_variance',
data_layout=layout,
is_test=only_forward,
in_place=inplace,
)
if activation == 'leaky_relu':
bn = paddle.nn.functional.leaky_relu(bn, alpha)
if activation == 'elu':
bn = paddle.nn.functional.elu(bn, alpha)
bn = fluid.layers.batch_norm(
data,
param_attr=fluid.ParamAttr(name='bn_scale'),
bias_attr=fluid.ParamAttr(name='bn_bias'),
moving_mean_name='bn_moving_mean',
moving_variance_name='bn_moving_variance',
data_layout=layout,
is_test=only_forward,
in_place=inplace,
)
if activation == 'leaky_relu':
bn = paddle.nn.functional.leaky_relu(bn, alpha)
if activation == 'elu':
bn = paddle.nn.functional.elu(bn, alpha)
# NOTE: in inplace mode input and output of bn
# may have same name, multiply 1. to generate
......@@ -94,105 +81,6 @@ class TestInplaceANBOpTraining(unittest.TestCase):
sgd_opt.backward(out)
return main, startup, [out, bn]
def compare(self, place, layout, only_forward, activation, alpha, use_cuda):
seed = 10
os.environ['FLAGS_cudnn_deterministic'] = "1"
data = np.random.random(size=self.dshape).astype(self.dtype) * 4.0 - 2
fetch_outs = []
fetch_names = []
for inplace in [False, True]:
main, startup, outs = self.build_program(
place,
layout,
seed,
only_forward,
activation,
alpha,
inplace=inplace,
)
exe = fluid.Executor(place)
exe.run(startup)
fetch_name = [v.name for v in outs] + [
'bn_moving_mean',
'bn_moving_variance',
'bn_scale',
'bn_bias',
]
if not only_forward:
others = [
'inplace_abn_0.tmp_0' if inplace else 'batch_norm_0.tmp_0',
'inplace_abn_0.tmp_1' if inplace else 'batch_norm_0.tmp_1',
'bn_scale@GRAD',
'bn_bias@GRAD',
'input@GRAD',
]
fetch_name += others
for nm in fetch_name:
fv = fluid.framework._get_var(str(nm), program=main)
fv.persistable = True
build_strategy = fluid.BuildStrategy()
build_strategy.sync_batch_norm = (
use_cuda and fluid.core.get_cuda_device_count() > 1
)
build_strategy.enable_inplace = inplace
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = 1 if os.name == 'nt' else 0
comp_prog1 = compiler.CompiledProgram(main).with_data_parallel(
outs[0].name if not only_forward else None,
build_strategy=build_strategy,
exec_strategy=exec_strategy,
)
bn_fetches = exe.run(
program=main, feed={'input': data}, fetch_list=fetch_name
)
fetch_outs.append(bn_fetches)
fetch_names.append(fetch_name)
for bn_val, inplace_abn_val, name1, name2 in zip(
*(fetch_outs + fetch_names)
):
np.testing.assert_allclose(
bn_val,
inplace_abn_val,
rtol=1e-05,
atol=0.01,
err_msg='Output ('
+ name1
+ ':'
+ name2
+ ') has diff on {} with {} layout and {} activation. \n'.format(
place, layout, activation
)
+ '\nBN '
+ str(bn_val)
+ '\n'
+ 'Inplace ABN '
+ str(inplace_abn_val),
)
def test_op(self):
use_cudas = [False, True] if core.is_compiled_with_cuda() else [False]
# use_cudas = [False]
for use_cuda in use_cudas:
place = core.CUDAPlace(0) if use_cuda else core.CPUPlace()
layouts = ["NCHW", "NHWC"]
for layout in layouts:
for activation, alpha in zip(
[None, 'elu', 'leaky_relu'], [0.0, 1.0, 0.02]
):
for infer_only in [True, False]:
self.compare(
place,
layout,
infer_only,
activation,
alpha,
use_cuda,
)
def test_all_branches(self):
seed = 10
os.environ['FLAGS_cudnn_deterministic'] = "1"
......@@ -212,7 +100,7 @@ class TestInplaceANBOpTraining(unittest.TestCase):
activation,
alpha,
use_cuda,
True,
False,
)
exe = fluid.Executor(place)
exe.run(startup)
......
......@@ -3686,34 +3686,6 @@ class TestBook(LayerTest):
out = layers.batch_norm(data, momentum=momentum)
return out
def make_inplace_abn(self):
with program_guard(
fluid.default_main_program(), fluid.default_startup_program()
):
data = self._get_data(
name='data', shape=[32, 128, 128], dtype="float32"
)
out = layers.inplace_abn(data, act='leaky_relu', act_alpha=0.2)
return out
def make_inplace_abn_momentum_variable(self):
with program_guard(
fluid.default_main_program(), fluid.default_startup_program()
):
data = self._get_data(
name='data', shape=[32, 128, 128], dtype="float32"
)
momentum = self._get_data(
name='momentum',
shape=[1],
dtype='float32',
append_batch_size=False,
)
out = layers.inplace_abn(
data, momentum=momentum, act='elu', act_alpha=2.0
)
return out
def make_range(self):
with program_guard(
fluid.default_main_program(), fluid.default_startup_program()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册