未验证 提交 6138331d 编写于 作者: C ccrrong 提交者: GitHub

remove inplace_abn (#48275)

上级 de4310e6
...@@ -74,7 +74,6 @@ __all__ = [ ...@@ -74,7 +74,6 @@ __all__ = [
'pool2d', 'pool2d',
'pool3d', 'pool3d',
'batch_norm', 'batch_norm',
'inplace_abn',
'instance_norm', 'instance_norm',
'data_norm', 'data_norm',
'reduce_sum', 'reduce_sum',
...@@ -2797,293 +2796,6 @@ def batch_norm( ...@@ -2797,293 +2796,6 @@ def batch_norm(
return helper.append_activation(batch_norm_out) return helper.append_activation(batch_norm_out)
def inplace_abn(
input,
act=None,
is_test=False,
momentum=0.9,
epsilon=1e-05,
param_attr=None,
bias_attr=None,
data_layout='NCHW',
name=None,
moving_mean_name=None,
moving_variance_name=None,
do_model_average_for_mean_and_var=True,
use_global_stats=False,
act_alpha=1.0,
):
r"""
**In-place Activation Batch Normalization Layer**
This layer calculates batch normalization and activation with in-place memory.
For batch normalization calculations, see `fluid.layers.batch_norm`.
For in-place activation batch normalization, see `In-Place Activated BatchNorm for
Memory-Optimized Training of DNNs <https://arxiv.org/abs/1712.02616>`_
`inplace_abn` only support activation type as `None`, `identity`, `leaky_relu`,
`elu` currently.
`inplace_abn` only support data type as `float32`, `float64` currently.
Note:
if build_strategy.sync_batch_norm=True, the batch_norm in network will use
sync_batch_norm automatically.
`is_test = True` can only be used in test program and inference program, `is_test` CANNOT be set to True in train program, if you want to use global status from pre_train model in train program, please set `use_global_stats = True`.
Args:
input(Variable): The rank of input variable can be 2, 3, 4, 5. The data type
is float16 or float32 or float64.
act(string, Default None): Activation type, linear|relu|prelu|...
is_test (bool, Default False): A flag indicating whether it is in
test phrase or not.
momentum(float|Variable, Default 0.9): The value used for the moving_mean and
moving_var computation. This should be a float number or a Variable with
shape [1] and data type as float32. The updated formula is:
:math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)`
:math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)`
Default is 0.9.
epsilon(float, Default 1e-05): A value added to the denominator for
numerical stability. Default is 1e-5.
param_attr(ParamAttr|None): The parameter attribute for Parameter `scale`
of inplace_abn. If it is set to None or one attribute of ParamAttr, inplace_abn
will create ParamAttr as param_attr, the name of scale can be set in ParamAttr.
If the Initializer of the param_attr is not set, the parameter is initialized
with Xavier. Default: None.
bias_attr(ParamAttr|None): The parameter attribute for the bias of inplace_abn.
If it is set to None or one attribute of ParamAttr, inplace_abn
will create ParamAttr as bias_attr, the name of bias can be set in ParamAttr.
If the Initializer of the bias_attr is not set, the bias is initialized zero.
Default: None.
data_layout (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str|None): For detailed information, please refer to :ref:`api_guide_Name`.
Usually name is no need to set and None by default.
moving_mean_name(str, Default None): The name of moving_mean which store the global Mean. If it
is set to None, inplace_abn will save global mean with a random name, otherwise, inplace_abn
will save global mean with the string.
moving_variance_name(str, Default None): The name of the moving_variance which store the global Variance.
If it is set to None, inplace_abn, will save global variance with a random name, otherwise, inplace_abn
will save global variance with the string.
do_model_average_for_mean_and_var(bool, Default True): Whether parameter mean and variance should do model
average when model average is enabled.
use_global_stats(bool, Default False): Whether to use global mean and
variance. In inference or test mode, set use_global_stats to true
or is_test to true, and the behavior is equivalent.
In train mode, when setting use_global_stats True, the global mean
and variance are also used during train period.
act_alpha(float, Default 1.0): when activation is in ['elu', 'identity', 'leaky_relu'],
inplace activative batch normalization will be used, and alpha parameter for activation
can be given by this parameter.
Returns:
A Variable holding Tensor which is the result after applying batch normalization and activation on the input,
has same shape and data type with input.
Examples:
.. code-block:: python
import paddle.fluid as fluid
x = fluid.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
hidden2 = fluid.layers.inplace_abn(input=hidden1)
hidden3 = fluid.layers.inplace_abn(input=hidden2, act='leaky_relu', act_alpha=0.2)
"""
assert act in [None, 'identity', 'leaky_relu', 'elu'], (
"inplace_abn only support act as None, 'identity', "
"'leaky_relu', 'elu' currently"
)
assert (
bias_attr is not False
), "bias_attr should not be False in inplace_abn."
helper = LayerHelper('inplace_abn', **locals())
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'inplace_abn'
)
dtype = helper.input_dtype()
input_shape = input.shape
if data_layout == 'NCHW':
channel_num = input_shape[1]
else:
if data_layout == 'NHWC':
channel_num = input_shape[-1]
else:
raise ValueError("unsupported data layout:" + data_layout)
param_shape = [channel_num]
# create parameter
scale = helper.create_parameter(
attr=helper.param_attr,
shape=param_shape,
dtype=dtype,
default_initializer=Constant(1.0),
)
bias = helper.create_parameter(
attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True
)
mean = helper.create_parameter(
attr=ParamAttr(
name=moving_mean_name,
initializer=Constant(0.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var,
),
shape=param_shape,
dtype=dtype,
)
mean.stop_gradient = True
variance = helper.create_parameter(
attr=ParamAttr(
name=moving_variance_name,
initializer=Constant(1.0),
trainable=False,
do_model_average=do_model_average_for_mean_and_var,
),
shape=param_shape,
dtype=dtype,
)
variance.stop_gradient = True
# create output
# mean and mean_out share the same memory
mean_out = mean
# variance and variance out share the same memory
variance_out = variance
# batch_norm_out and input share the same memory
batch_norm_out = input
if in_dygraph_mode():
inputs_has_MomemtumTensor = False
attrs_has_momentum = False
tmp_tensor_type = core.eager.Tensor
if isinstance(momentum, tmp_tensor_type):
inputs_has_MomemtumTensor = True
else:
attrs_has_momentum = True
attrs__ = ()
if attrs_has_momentum:
attrs__ = (
'momentum',
momentum,
'epsilon',
epsilon,
'is_test',
is_test,
'data_layout',
data_layout,
'use_mkldnn',
False,
'fuse_with_relu',
False,
'use_global_stats',
use_global_stats,
'activation',
act,
'alpha',
act_alpha,
)
else:
attrs__ = (
'epsilon',
epsilon,
'is_test',
is_test,
'data_layout',
data_layout,
'use_mkldnn',
False,
'fuse_with_relu',
False,
'use_global_stats',
use_global_stats,
'activation',
act,
'alpha',
act_alpha,
)
if inputs_has_MomemtumTensor:
batch_norm_out, _, _, _, _, _ = _legacy_C_ops.inplace_abn_(
input,
scale,
bias,
mean,
variance,
momentum,
mean_out,
variance_out,
*attrs__,
)
return batch_norm_out
else:
batch_norm_out, _, _, _, _, _ = _legacy_C_ops.inplace_abn_(
input,
scale,
bias,
mean,
variance,
None,
mean_out,
variance_out,
*attrs__,
)
return batch_norm_out
saved_mean = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
saved_variance = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
reserve_space = helper.create_variable_for_type_inference(
dtype=dtype, stop_gradient=True
)
inputs = {
"X": input,
"Scale": scale,
"Bias": bias,
"Mean": mean,
"Variance": variance,
}
attrs = {
"epsilon": epsilon,
"is_test": is_test,
"data_layout": data_layout,
"use_mkldnn": False,
"fuse_with_relu": False,
"use_global_stats": use_global_stats,
"activation": act,
"alpha": act_alpha,
}
if isinstance(momentum, Variable):
inputs['MomemtumTensor'] = momentum
else:
attrs['momentum'] = momentum
outputs = {
"Y": batch_norm_out,
"MeanOut": mean_out,
"VarianceOut": variance_out,
"SavedMean": saved_mean,
"SavedVariance": saved_variance,
}
if reserve_space is not None:
outputs["ReserveSpace"] = reserve_space
helper.append_op(
type="inplace_abn", inputs=inputs, outputs=outputs, attrs=attrs
)
return batch_norm_out
def instance_norm( def instance_norm(
input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None input, epsilon=1e-05, param_attr=None, bias_attr=None, name=None
): ):
......
...@@ -17,7 +17,6 @@ import numpy as np ...@@ -17,7 +17,6 @@ import numpy as np
import os import os
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import compiler
import paddle import paddle
...@@ -54,33 +53,21 @@ class TestInplaceANBOpTraining(unittest.TestCase): ...@@ -54,33 +53,21 @@ class TestInplaceANBOpTraining(unittest.TestCase):
append_batch_size=False, append_batch_size=False,
stop_gradient=False, stop_gradient=False,
) )
if inplace:
bn = fluid.layers.inplace_abn( bn = fluid.layers.batch_norm(
data, data,
act=activation, param_attr=fluid.ParamAttr(name='bn_scale'),
param_attr=fluid.ParamAttr(name='bn_scale'), bias_attr=fluid.ParamAttr(name='bn_bias'),
bias_attr=fluid.ParamAttr(name='bn_bias'), moving_mean_name='bn_moving_mean',
moving_mean_name='bn_moving_mean', moving_variance_name='bn_moving_variance',
moving_variance_name='bn_moving_variance', data_layout=layout,
data_layout=layout, is_test=only_forward,
is_test=only_forward, in_place=inplace,
act_alpha=alpha, )
) if activation == 'leaky_relu':
else: bn = paddle.nn.functional.leaky_relu(bn, alpha)
bn = fluid.layers.batch_norm( if activation == 'elu':
data, bn = paddle.nn.functional.elu(bn, alpha)
param_attr=fluid.ParamAttr(name='bn_scale'),
bias_attr=fluid.ParamAttr(name='bn_bias'),
moving_mean_name='bn_moving_mean',
moving_variance_name='bn_moving_variance',
data_layout=layout,
is_test=only_forward,
in_place=inplace,
)
if activation == 'leaky_relu':
bn = paddle.nn.functional.leaky_relu(bn, alpha)
if activation == 'elu':
bn = paddle.nn.functional.elu(bn, alpha)
# NOTE: in inplace mode input and output of bn # NOTE: in inplace mode input and output of bn
# may have same name, multiply 1. to generate # may have same name, multiply 1. to generate
...@@ -94,105 +81,6 @@ class TestInplaceANBOpTraining(unittest.TestCase): ...@@ -94,105 +81,6 @@ class TestInplaceANBOpTraining(unittest.TestCase):
sgd_opt.backward(out) sgd_opt.backward(out)
return main, startup, [out, bn] return main, startup, [out, bn]
def compare(self, place, layout, only_forward, activation, alpha, use_cuda):
seed = 10
os.environ['FLAGS_cudnn_deterministic'] = "1"
data = np.random.random(size=self.dshape).astype(self.dtype) * 4.0 - 2
fetch_outs = []
fetch_names = []
for inplace in [False, True]:
main, startup, outs = self.build_program(
place,
layout,
seed,
only_forward,
activation,
alpha,
inplace=inplace,
)
exe = fluid.Executor(place)
exe.run(startup)
fetch_name = [v.name for v in outs] + [
'bn_moving_mean',
'bn_moving_variance',
'bn_scale',
'bn_bias',
]
if not only_forward:
others = [
'inplace_abn_0.tmp_0' if inplace else 'batch_norm_0.tmp_0',
'inplace_abn_0.tmp_1' if inplace else 'batch_norm_0.tmp_1',
'bn_scale@GRAD',
'bn_bias@GRAD',
'input@GRAD',
]
fetch_name += others
for nm in fetch_name:
fv = fluid.framework._get_var(str(nm), program=main)
fv.persistable = True
build_strategy = fluid.BuildStrategy()
build_strategy.sync_batch_norm = (
use_cuda and fluid.core.get_cuda_device_count() > 1
)
build_strategy.enable_inplace = inplace
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = 1 if os.name == 'nt' else 0
comp_prog1 = compiler.CompiledProgram(main).with_data_parallel(
outs[0].name if not only_forward else None,
build_strategy=build_strategy,
exec_strategy=exec_strategy,
)
bn_fetches = exe.run(
program=main, feed={'input': data}, fetch_list=fetch_name
)
fetch_outs.append(bn_fetches)
fetch_names.append(fetch_name)
for bn_val, inplace_abn_val, name1, name2 in zip(
*(fetch_outs + fetch_names)
):
np.testing.assert_allclose(
bn_val,
inplace_abn_val,
rtol=1e-05,
atol=0.01,
err_msg='Output ('
+ name1
+ ':'
+ name2
+ ') has diff on {} with {} layout and {} activation. \n'.format(
place, layout, activation
)
+ '\nBN '
+ str(bn_val)
+ '\n'
+ 'Inplace ABN '
+ str(inplace_abn_val),
)
def test_op(self):
use_cudas = [False, True] if core.is_compiled_with_cuda() else [False]
# use_cudas = [False]
for use_cuda in use_cudas:
place = core.CUDAPlace(0) if use_cuda else core.CPUPlace()
layouts = ["NCHW", "NHWC"]
for layout in layouts:
for activation, alpha in zip(
[None, 'elu', 'leaky_relu'], [0.0, 1.0, 0.02]
):
for infer_only in [True, False]:
self.compare(
place,
layout,
infer_only,
activation,
alpha,
use_cuda,
)
def test_all_branches(self): def test_all_branches(self):
seed = 10 seed = 10
os.environ['FLAGS_cudnn_deterministic'] = "1" os.environ['FLAGS_cudnn_deterministic'] = "1"
...@@ -212,7 +100,7 @@ class TestInplaceANBOpTraining(unittest.TestCase): ...@@ -212,7 +100,7 @@ class TestInplaceANBOpTraining(unittest.TestCase):
activation, activation,
alpha, alpha,
use_cuda, use_cuda,
True, False,
) )
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(startup) exe.run(startup)
......
...@@ -3686,34 +3686,6 @@ class TestBook(LayerTest): ...@@ -3686,34 +3686,6 @@ class TestBook(LayerTest):
out = layers.batch_norm(data, momentum=momentum) out = layers.batch_norm(data, momentum=momentum)
return out return out
def make_inplace_abn(self):
with program_guard(
fluid.default_main_program(), fluid.default_startup_program()
):
data = self._get_data(
name='data', shape=[32, 128, 128], dtype="float32"
)
out = layers.inplace_abn(data, act='leaky_relu', act_alpha=0.2)
return out
def make_inplace_abn_momentum_variable(self):
with program_guard(
fluid.default_main_program(), fluid.default_startup_program()
):
data = self._get_data(
name='data', shape=[32, 128, 128], dtype="float32"
)
momentum = self._get_data(
name='momentum',
shape=[1],
dtype='float32',
append_batch_size=False,
)
out = layers.inplace_abn(
data, momentum=momentum, act='elu', act_alpha=2.0
)
return out
def make_range(self): def make_range(self):
with program_guard( with program_guard(
fluid.default_main_program(), fluid.default_startup_program() fluid.default_main_program(), fluid.default_startup_program()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册