diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc index 89459d00ae813611ac0eafc9b94137ac4b32a61f..344b104b5948ca17ae87a8125adac69807f718b2 100644 --- a/paddle/fluid/operators/inplace_abn_op.cc +++ b/paddle/fluid/operators/inplace_abn_op.cc @@ -324,10 +324,12 @@ class InplaceABNGradKernel : public framework::OpKernel { namespace ops = paddle::operators; +DECLARE_INPLACE_OP_INFERER(InplaceAbnOpInplaceInferer, {"X", "Y"}); REGISTER_OPERATOR(inplace_abn, ops::InplaceABNOp, ops::InplaceABNOpMaker, ops::BatchNormOpInferVarType, ops::InplaceABNOpGradMaker, - ops::InplaceABNOpGradMaker) + ops::InplaceABNOpGradMaker, + InplaceAbnOpInplaceInferer) REGISTER_OPERATOR(inplace_abn_grad, ops::InplaceABNGradOp) REGISTER_OP_CPU_KERNEL( diff --git a/paddle/fluid/pybind/op_function_generator.h b/paddle/fluid/pybind/op_function_generator.h index c348e04e6c7acbecdd36099bc9dbeceba8262229..f1e9c7e8f491b64df48858d3cdebc6d7bd82aa67 100644 --- a/paddle/fluid/pybind/op_function_generator.h +++ b/paddle/fluid/pybind/op_function_generator.h @@ -110,6 +110,11 @@ std::map> op_ins_map = { {"graph_reindex", {"X", "Neighbors", "Count", "HashTable_Value", "HashTable_Index"}}, {"graph_sample_neighbors", {"Row", "Col_Ptr", "X", "Eids", "Perm_Buffer"}}, + {"crop", {"X", "Y", "Offsets"}}, + {"batch_norm", + {"X", "Scale", "Bias", "Mean", "Variance", "MomentumTensor"}}, + {"inplace_abn", + {"X", "Scale", "Bias", "Mean", "Variance", "MomentumTensor"}}, }; // NOTE(zhiqiu): Like op_ins_map. @@ -126,6 +131,9 @@ std::map> op_outs_map = { {"batch_norm", {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", "ReserveSpace"}}, + {"inplace_abn", + {"Y", "MeanOut", "VarianceOut", "SavedMean", "SavedVariance", + "ReserveSpace"}}, {"fused_attention", {"LnMean", "LnVariance", "LnOut", "QKVOut", "QKVBiasOut", "TransposeOut2", @@ -211,6 +219,7 @@ std::map> op_passing_outs_map = { {"merged_momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}}, {"sparse_momentum", {"ParamOut", "VelocityOut", "MasterParamOut"}}, {"batch_norm", {"MeanOut", "VarianceOut"}}, + {"inplace_abn", {"MeanOut", "VarianceOut"}}, {"sync_batch_norm", {"MeanOut", "VarianceOut"}}, {"accuracy", {"Correct", "Total"}}, {"fill_constant", {"Out"}}, diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py index df6af698abafcb768820d3006d25b91c71d110bc..89fcbe1a5d18da99ce204380eafe315f1f6899ea 100644 --- a/python/paddle/fluid/dygraph/nn.py +++ b/python/paddle/fluid/dygraph/nn.py @@ -21,7 +21,7 @@ from ..layers import utils from ..layers import nn as F from .. import dygraph_utils from . import layers -from ..framework import Variable, _non_static_mode, OpProtoHolder, Parameter, _dygraph_tracer, _varbase_creator, default_main_program, _global_flags, in_dygraph_mode +from ..framework import Variable, _non_static_mode, OpProtoHolder, Parameter, _dygraph_tracer, _varbase_creator, default_main_program, _global_flags, in_dygraph_mode, _in_legacy_dygraph from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype from ..param_attr import ParamAttr from ..initializer import Normal, Constant, NumpyArrayInitializer @@ -1357,7 +1357,10 @@ class BatchNorm(layers.Layer): self._momentum, self._epsilon, self._data_layout, not self.training, self._use_global_stats, self._trainable_statistics, False) - else: + return dygraph_utils._append_activation_in_dygraph( + batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn) + + elif _in_legacy_dygraph(): attrs = ("momentum", self._momentum, "epsilon", self._epsilon, "is_test", not self.training, "data_layout", self._data_layout, "use_mkldnn", self._use_mkldnn, @@ -1366,7 +1369,8 @@ class BatchNorm(layers.Layer): 'trainable_statistics', self._trainable_statistics) batch_norm_out, _, _, _, _, _ = _C_ops.batch_norm( input, self.weight, self.bias, self._mean, self._variance, - mean_out, variance_out, *attrs) + None, mean_out, variance_out, *attrs) + return dygraph_utils._append_activation_in_dygraph( batch_norm_out, act=self._act, use_mkldnn=self._use_mkldnn) diff --git a/python/paddle/fluid/dygraph/tracer.py b/python/paddle/fluid/dygraph/tracer.py index 747fe7d32cb654f9e7d3d434323e0bf6148da62a..05ae17c5e1816702b068b6c2f7c6e357352ad521 100644 --- a/python/paddle/fluid/dygraph/tracer.py +++ b/python/paddle/fluid/dygraph/tracer.py @@ -40,12 +40,12 @@ final_state_name_mapping = { "x": "X", "out": "Out", }, - "pool2d": { - "final_op_name": "final_state_pool2d", - "x": "X", - "kernel_size": "ksize", - "out": "Out", - }, + # "pool2d": { + # "final_op_name": "final_state_pool2d", + # "x": "X", + # "kernel_size": "ksize", + # "out": "Out", + # }, "abs": { "final_op_name": "final_state_abs", "x": "X", @@ -64,12 +64,12 @@ final_state_name_mapping = { "axis2": "axis2", "out": "Out", }, - "one_hot": { - "final_op_name": "final_state_one_hot", - "x": "X", - "num_class": "depth", - "out": "Out", - } + # "one_hot": { + # "final_op_name": "final_state_one_hot", + # "x": "X", + # "num_class": "depth", + # "out": "Out", + # } } diff --git a/python/paddle/fluid/layers/loss.py b/python/paddle/fluid/layers/loss.py index f3ebfb9de10cfc4acdb9364b0bd2f39bcdb7c9af..ad09a4662ced275cd2a1ac883c8ab745625fcf09 100644 --- a/python/paddle/fluid/layers/loss.py +++ b/python/paddle/fluid/layers/loss.py @@ -1101,6 +1101,25 @@ def sampled_softmax_with_cross_entropy(logits, out = fluid.layers.sampled_softmax_with_cross_entropy( logits=fc, label=label, num_samples=25) """ + if _non_static_mode(): + sample_logits_attrs = ('use_customized_samples', use_customized_samples, + 'uniq', True, 'remove_accidental_hits', + remove_accidental_hits, 'num_samples', + num_samples, 'seed', seed) + _, _, _, _, sampled_logits_out, sampled_label_out = _C_ops.sample_logits( + logits, label, *sample_logits_attrs) + depth = num_samples + 1 + sampled_softlabel_out = _C_ops.one_hot(sampled_label_out, 'depth', + depth) + + softmax_with_cross_entropy_attrs = ('soft_label', True, + 'numeric_stable_mode', False) + + _, loss = _C_ops.softmax_with_cross_entropy( + sampled_logits_out, sampled_softlabel_out, + *softmax_with_cross_entropy_attrs) + return loss / num_true + helper = LayerHelper('sample_logits', **locals()) samples = customized_samples if use_customized_samples else helper.create_variable_for_type_inference( dtype='int64') diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 9be15d23bb371a977c3f78cd178dbe5bd2cccc24..1f3625a6a805de362c654fcedb3ec6645698cd28 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -41,7 +41,6 @@ from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, c import paddle from paddle.utils import deprecated from paddle import _C_ops -from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph __all__ = [ 'fc', @@ -2948,6 +2947,38 @@ def batch_norm(input, mean_out = mean # variance and variance_out share the same memory variance_out = variance + + if in_dygraph_mode(): + inputs_has_MomemtumTensor = False + attrs_has_momentum = False + tmp_tensor_type = core.eager.Tensor + if isinstance(momentum, tmp_tensor_type): + inputs_has_MomemtumTensor = True + else: + attrs_has_momentum = True + + attrs_ = () + if attrs_has_momentum: + attrs_ = ('momentum', momentum, 'epsilon', epsilon, 'is_test', + is_test, 'data_layout', data_layout, 'use_mkldnn', False, + 'fuse_with_relu', False, 'use_global_stats', + use_global_stats) + else: + attrs_ = ('epsilon', epsilon, 'is_test', is_test, 'data_layout', + data_layout, 'use_mkldnn', False, 'fuse_with_relu', False, + 'use_global_stats', use_global_stats) + if inputs_has_MomemtumTensor: + batch_norm_out, _, _, _, _, _ = _C_ops.batch_norm( + input, scale, bias, mean, variance, momentum, mean_out, + variance_out, *attrs_) + else: + batch_norm_out, _, _, _, _, _ = _C_ops.batch_norm( + input, scale, bias, mean, variance, None, mean_out, + variance_out, *attrs_) + + return dygraph_utils._append_activation_in_dygraph( + batch_norm_out, act=act, use_mkldnn=False) + saved_mean = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( @@ -2965,7 +2996,9 @@ def batch_norm(input, "Scale": scale, "Bias": bias, "Mean": mean, - "Variance": variance + "Variance": variance, + "MeanOut": mean_out, + "VarianceOut": variance_out } attrs = { "epsilon": epsilon, @@ -3143,13 +3176,46 @@ def inplace_abn(input, mean_out = mean # variance and variance out share the same memory variance_out = variance + # batch_norm_out and input share the same memory + batch_norm_out = input + + if in_dygraph_mode(): + inputs_has_MomemtumTensor = False + attrs_has_momentum = False + tmp_tensor_type = core.eager.Tensor + if isinstance(momentum, tmp_tensor_type): + inputs_has_MomemtumTensor = True + else: + attrs_has_momentum = True + + attrs__ = () + if attrs_has_momentum: + attrs__ = ('momentum', momentum, 'epsilon', epsilon, 'is_test', + is_test, 'data_layout', data_layout, 'use_mkldnn', False, + 'fuse_with_relu', False, 'use_global_stats', + use_global_stats, 'activation', act, 'alpha', act_alpha) + else: + attrs__ = ('epsilon', epsilon, 'is_test', is_test, 'data_layout', + data_layout, 'use_mkldnn', False, 'fuse_with_relu', + False, 'use_global_stats', use_global_stats, + 'activation', act, 'alpha', act_alpha) + if inputs_has_MomemtumTensor: + batch_norm_out, _, _, _, _, _ = _C_ops.inplace_abn_( + input, scale, bias, mean, variance, momentum, mean_out, + variance_out, *attrs__) + return batch_norm_out + else: + batch_norm_out, _, _, _, _, _ = _C_ops.inplace_abn_( + input, scale, bias, mean, variance, None, mean_out, + variance_out, *attrs__) + return batch_norm_out + saved_mean = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) saved_variance = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) reserve_space = helper.create_variable_for_type_inference( dtype=dtype, stop_gradient=True) - batch_norm_out = input inputs = { "X": input, diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 51bedda40714cbd8cf119fec5765895ad8c28ed9..b02494d52451766a428abfec612312fa74d0539b 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -1074,7 +1074,7 @@ set_tests_properties(test_matrix_nms_op PROPERTIES TIMEOUT 120) set_tests_properties(test_generator_dataloader PROPERTIES TIMEOUT 120) set_tests_properties(test_partial_concat_op PROPERTIES TIMEOUT 120) set_tests_properties(test_fuse_optimizer_pass PROPERTIES TIMEOUT 120) -set_tests_properties(test_softmax_with_cross_entropy_op PROPERTIES TIMEOUT 120) +set_tests_properties(test_softmax_with_cross_entropy_op PROPERTIES TIMEOUT 220) set_tests_properties(test_reduce_op PROPERTIES TIMEOUT 500) set_tests_properties(test_adam_optimizer_fp32_fp64 PROPERTIES TIMEOUT 120) set_tests_properties(test_elementwise_nn_grad PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index bb244a20bd873d34c6f01a4ec5a8b87018d71668..6c5864cfebc93c115f07bf9c0bef5987ca4d50e8 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -2819,7 +2819,7 @@ class TestBook(LayerTest): }) self.all_close_compare = set({"make_spectral_norm"}) - def test_all_layers(self): + def func_all_layers(self): attrs = (getattr(self, name) for name in dir(self)) methods = filter(inspect.ismethod, attrs) for method in methods: @@ -2867,6 +2867,11 @@ class TestBook(LayerTest): np.array_equal(static_result[0], dy_result_value), "Result of function [{}] not equal".format(method.__name__)) + def test_all_layers(self): + with _test_eager_guard(): + self.func_all_layers() + self.func_all_layers() + def _get_np_data(self, shape, dtype, append_batch_size=True): np.random.seed(self.seed) if append_batch_size: @@ -3656,8 +3661,9 @@ class TestBook(LayerTest): shape=[1], dtype='float32', append_batch_size=False) - - out = layers.scale(input, scale=scale_var) + _scale = scale_var.numpy().item(0) if isinstance( + scale_var, core.eager.Tensor) else scale_var + out = layers.scale(input, scale=_scale) return out def make_softshrink(self): diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index 38a6d7a09d2082e3ed4a029bf8061887b8b42458..8aca31921808552b6ca7d905911dae06ce323037 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -24,7 +24,7 @@ from ...fluid import dygraph_utils import numbers from paddle import _C_ops from paddle import in_dynamic_mode -from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph +from paddle.fluid.framework import core, _non_static_mode, in_dygraph_mode, _in_legacy_dygraph __all__ = [] @@ -186,23 +186,24 @@ def batch_norm(x, else: trainable_statistics = not use_global_stats - if in_dygraph_mode(): - batch_norm_out, _, _, _, _, _ = _C_ops.final_state_batch_norm( - x, weight, bias, running_mean, running_var, momentum, epsilon, - data_format, not training, use_global_stats, trainable_statistics, - False) - return batch_norm_out - - if _in_legacy_dygraph(): - # for dygraph need tuple - attrs = ("momentum", momentum, "epsilon", epsilon, "is_test", - not training, "data_layout", data_format, "use_mkldnn", False, - "fuse_with_relu", False, "use_global_stats", use_global_stats, - "trainable_statistics", trainable_statistics) - - batch_norm_out, _, _, _, _, _ = _C_ops.batch_norm( - x, weight, bias, running_mean, running_var, mean_out, variance_out, - *attrs) + if _non_static_mode(): + if in_dygraph_mode(): + batch_norm_out, _, _, _, _, _ = _C_ops.final_state_batch_norm( + x, weight, bias, running_mean, running_var, momentum, epsilon, + data_format, not training, use_global_stats, + trainable_statistics, False) + + elif _in_legacy_dygraph(): + # for dygraph need tuple + attrs = ("momentum", momentum, "epsilon", epsilon, "is_test", + not training, "data_layout", data_format, "use_mkldnn", + False, "fuse_with_relu", False, "use_global_stats", + use_global_stats, "trainable_statistics", + trainable_statistics) + + batch_norm_out, _, _, _, _, _ = _C_ops.batch_norm( + x, weight, bias, running_mean, running_var, None, mean_out, + variance_out, *attrs) return dygraph_utils._append_activation_in_dygraph( batch_norm_out, act=None)