From 839fac658fa84e236440f114094c1dcf00ab34da Mon Sep 17 00:00:00 2001 From: hong <43953930+phlrain@users.noreply.github.com> Date: Thu, 25 Aug 2022 10:52:59 +0800 Subject: [PATCH] Fix relu python call (#45082) * add python final state * fix bug * fix bugs * fix bug * fix bug * revert impl, final state mul not support selected rows * fix softmax use cudnn error * add softlable false unitest * revert loss.py --- paddle/phi/api/ext/tensor_compat.h | 2 +- paddle/phi/api/yaml/legacy_api.yaml | 1 + python/paddle/fluid/clip.py | 2 +- python/paddle/fluid/dygraph_utils.py | 2 +- python/paddle/fluid/layers/control_flow.py | 7 + python/paddle/fluid/layers/nn.py | 3 + python/paddle/fluid/layers/tensor.py | 3 +- python/paddle/fluid/regularizer.py | 6 +- .../dygraph_to_static/test_se_resnet.py | 2 +- .../unittests/test_cross_entropy_loss.py | 155 +++++++++--------- .../unittests/test_dygraph_multi_forward.py | 1 + .../fluid/tests/unittests/test_l1_loss.py | 1 + python/paddle/nn/functional/activation.py | 43 +++-- python/paddle/nn/functional/vision.py | 6 +- python/paddle/tensor/math.py | 2 +- python/paddle/tensor/random.py | 2 + 16 files changed, 143 insertions(+), 95 deletions(-) diff --git a/paddle/phi/api/ext/tensor_compat.h b/paddle/phi/api/ext/tensor_compat.h index 6de78d8c92c..2833629f0ff 100644 --- a/paddle/phi/api/ext/tensor_compat.h +++ b/paddle/phi/api/ext/tensor_compat.h @@ -71,7 +71,7 @@ using experimental::gather_nd; using experimental::gelu; using experimental::gumbel_softmax; using experimental::imag; -using experimental::increment; +using experimental::increment_; using experimental::index_sample; using experimental::is_empty; using experimental::isclose; diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index 98d75b7f2d2..2c538c27bf8 100755 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -1246,6 +1246,7 @@ func : IncrementInferMeta kernel : func : increment + inplace : (x -> out) - api : index_sample args : (Tensor x, Tensor index) diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index c912d7d8d40..b5575928c4f 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -267,7 +267,7 @@ class ClipGradByValue(ClipGradBase): if getattr(p, 'need_clip', True) is False: params_and_grads.append((p, g)) continue - new_grad = layers.clip(x=g, min=self.min, max=self.max) + new_grad = paddle.clip(x=g, min=self.min, max=self.max) params_and_grads.append((p, new_grad)) return params_and_grads diff --git a/python/paddle/fluid/dygraph_utils.py b/python/paddle/fluid/dygraph_utils.py index 44d3d2f783b..d8c19c94f27 100644 --- a/python/paddle/fluid/dygraph_utils.py +++ b/python/paddle/fluid/dygraph_utils.py @@ -13,7 +13,7 @@ # limitations under the License. from . import core -from .framework import dygraph_only +from .framework import dygraph_only, in_dygraph_mode from paddle import _C_ops diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py index 73a1ccfb6c5..43781665dc3 100755 --- a/python/paddle/fluid/layers/control_flow.py +++ b/python/paddle/fluid/layers/control_flow.py @@ -1554,6 +1554,9 @@ def increment(x, value=1.0, in_place=True): counter = fluid.layers.zeros(shape=[1], dtype='float32') # [0.] fluid.layers.increment(counter) # [1.] """ + if in_dygraph_mode(): + return _C_ops.final_state_increment_(x, value) + check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'], 'increment') helper = LayerHelper("increment", **locals()) @@ -1973,6 +1976,10 @@ def equal(x, y, cond=None, name=None): out1 = fluid.layers.equal(x=label,y=limit) #out1=[True, False] out2 = fluid.layers.equal(x=label_cond,y=limit, cond=out_cond) #out2=[False, True] out_cond=[False, True] """ + if in_dygraph_mode(): + default_axis = -1 + return _C_ops.final_state_equal(x, y, default_axis) + check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"], "equal") check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"], diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 57961efc14a..6b0fb3f62c7 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -1454,6 +1454,9 @@ def softmax(input, use_cudnn=True, name=None, axis=-1): """ + if in_dygraph_mode(): + return _C_ops.final_state_softmax(input, axis) + if _non_static_mode(): return _C_ops.softmax(input, 'axis', axis, 'use_cudnn', use_cudnn) diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 3a928415512..f823f98a536 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -329,8 +329,7 @@ def concat(input, axis=0, name=None): axis = axis.item(0) if not isinstance(input, Variable): input = [t for t in input if t.shape.count(0) == 0] - out = _varbase_creator() - _C_ops.concat(input, out, 'axis', axis) + out = _C_ops.final_state_concat(input, axis) return out if _in_legacy_dygraph(): diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py index da0b91cc5c9..4328d824071 100644 --- a/python/paddle/fluid/regularizer.py +++ b/python/paddle/fluid/regularizer.py @@ -16,7 +16,7 @@ from __future__ import print_function import logging from . import framework -from .framework import _non_static_mode, _varbase_creator +from .framework import _non_static_mode, _varbase_creator, in_dygraph_mode from . import core from paddle import _C_ops @@ -252,6 +252,10 @@ class L1DecayRegularizer(WeightDecayRegularizer): decay = block.create_var(dtype=param.dtype, shape=param.shape, lod_level=param.lod_level) + if in_dygraph_mode(): + sign = _C_ops.final_state_sign(param) + return _C_ops.final_state_scale(sign, self._regularization_coeff, + 0.0, True) # Append sign op block.append_op(type='sign', inputs={"X": param}, outputs={"Out": sign}) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index dffc0340a6d..2a86c3f77a1 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -316,7 +316,7 @@ class SeResNeXt(fluid.dygraph.Layer): y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output]) out = self.out(y) - softmax_out = fluid.layers.softmax(out, use_cudnn=False) + softmax_out = fluid.layers.softmax(out) loss = fluid.layers.cross_entropy(input=softmax_out, label=label) avg_loss = paddle.mean(x=loss) diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py index 624f10d1e66..0c526182ab7 100644 --- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py +++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py @@ -560,83 +560,90 @@ class CrossEntropyLoss(unittest.TestCase): ###soft_label test 5 def test_cross_entropy_loss_soft_2d(self): - self.numeric_stable_mode = False - self.soft_label = True - self.dtype = 'float32' if fluid.core.is_compiled_with_rocm( - ) else 'float64' - self.axis = -1 - self.ignore_index = -100 #should not be changed - self.N = 3 - self.H = 2 - self.W = 2 - self.C = 5 - self.shape = [self.N, self.H, self.W, self.C] - self.use_softmax = True - self.reduction = 'none' - self.weight = None - self.logits = getattr( - self, "logits", - np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)) - softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits) - self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype) - self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True) + def inner_cross_entropy_loss_soft_2d(soft_label): + self.numeric_stable_mode = False + self.soft_label = soft_label + self.dtype = 'float32' if fluid.core.is_compiled_with_rocm( + ) else 'float64' + self.axis = -1 + self.ignore_index = -100 #should not be changed + self.N = 3 + self.H = 2 + self.W = 2 + self.C = 5 + self.shape = [self.N, self.H, self.W, self.C] + self.use_softmax = True + self.reduction = 'none' + self.weight = None + self.logits = getattr( + self, "logits", + np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)) + softmax = np.apply_along_axis(stable_softmax, self.axis, + self.logits) - #1. numpy - expected = cross_entropy_soft_2d(softmax, - self.labels, - self.axis, - self.N, - self.H, - self.W, - weight=self.weight, - reduction=self.reduction, - ignore_index=self.ignore_index) - - paddle.set_device("cpu") - paddle.disable_static() - - #2. dygraph - paddle_loss_none_weight = paddle.nn.functional.cross_entropy( - fluid.dygraph.to_variable(self.logits), - fluid.dygraph.to_variable(self.labels), - soft_label=True, - axis=self.axis, - weight=fluid.dygraph.to_variable(self.weight) - if self.weight is not None else None, - reduction=self.reduction) - dy_ret_value = paddle_loss_none_weight.numpy() - - #3. static - paddle.enable_static() - prog = fluid.Program() - startup_prog = fluid.Program() - place = fluid.CUDAPlace( - 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() - with fluid.program_guard(prog, startup_prog): - input = fluid.data(name='input', - shape=[self.N, self.H, self.W, self.C], - dtype=self.dtype) - label = fluid.data(name='label', - shape=[self.N, self.H, self.W, self.C], - dtype=self.dtype) - - cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( - reduction=self.reduction, soft_label=True) - ret = cross_entropy_loss(input, label) - exe = fluid.Executor(place) - static_ret = exe.run(prog, - feed={ - 'input': self.logits, - 'label': self.labels, - }, - fetch_list=[ret]) - self.assertIsNotNone(static_ret) - paddle.disable_static() + self.labels = np.random.uniform(0.1, 1.0, + self.shape).astype(self.dtype) + self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True) - np.testing.assert_allclose(static_ret[0], dy_ret_value, rtol=1e-05) - np.testing.assert_allclose(static_ret[0], expected, rtol=1e-05) - np.testing.assert_allclose(dy_ret_value, expected, rtol=1e-05) + #1. numpy + expected = cross_entropy_soft_2d(softmax, + self.labels, + self.axis, + self.N, + self.H, + self.W, + weight=self.weight, + reduction=self.reduction, + ignore_index=self.ignore_index) + + paddle.set_device("cpu") + paddle.disable_static() + + #2. dygraph + paddle_loss_none_weight = paddle.nn.functional.cross_entropy( + fluid.dygraph.to_variable(self.logits), + fluid.dygraph.to_variable(self.labels), + soft_label=True, + axis=self.axis, + weight=fluid.dygraph.to_variable(self.weight) + if self.weight is not None else None, + reduction=self.reduction) + dy_ret_value = paddle_loss_none_weight.numpy() + + #3. static + paddle.enable_static() + prog = fluid.Program() + startup_prog = fluid.Program() + place = fluid.CUDAPlace( + 0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace() + with fluid.program_guard(prog, startup_prog): + input = fluid.data(name='input', + shape=[self.N, self.H, self.W, self.C], + dtype=self.dtype) + label = fluid.data(name='label', + shape=[self.N, self.H, self.W, self.C], + dtype=self.dtype) + + cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss( + reduction=self.reduction, soft_label=True) + ret = cross_entropy_loss(input, label) + exe = fluid.Executor(place) + static_ret = exe.run(prog, + feed={ + 'input': self.logits, + 'label': self.labels, + }, + fetch_list=[ret]) + self.assertIsNotNone(static_ret) + paddle.disable_static() + + np.testing.assert_allclose(static_ret[0], dy_ret_value, rtol=1e-05) + np.testing.assert_allclose(static_ret[0], expected, rtol=1e-05) + np.testing.assert_allclose(dy_ret_value, expected, rtol=1e-05) + + inner_cross_entropy_loss_soft_2d(True) + inner_cross_entropy_loss_soft_2d(False) ###soft_label test 6 def test_cross_entropy_loss_soft_2d_weight_mean(self): diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py index 9a61162bac0..ef4cbe1c2c8 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py @@ -214,4 +214,5 @@ class TestDygraphMultiForward(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_l1_loss.py b/python/paddle/fluid/tests/unittests/test_l1_loss.py index 0e8221d1b4c..abe47e75db7 100644 --- a/python/paddle/fluid/tests/unittests/test_l1_loss.py +++ b/python/paddle/fluid/tests/unittests/test_l1_loss.py @@ -199,4 +199,5 @@ class TestClassL1Loss(unittest.TestCase): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py index a210f806fc4..f6898347de2 100644 --- a/python/paddle/nn/functional/activation.py +++ b/python/paddle/nn/functional/activation.py @@ -231,7 +231,10 @@ def hardshrink(x, threshold=0.5, name=None): out = F.hardshrink(x) # [-1., 0., 2.5] """ - if in_dynamic_mode(): + if in_dygraph_mode(): + return _C_ops.final_state_hard_shrink(x, threshold) + + if _in_legacy_dygraph(): return _C_ops.hard_shrink(x, 'threshold', threshold) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], @@ -281,7 +284,10 @@ def hardtanh(x, min=-1.0, max=1.0, name=None): out = F.hardtanh(x) # [-1., 0.3, 1.] """ - if in_dynamic_mode(): + if in_dygraph_mode(): + return _C_ops.final_state_brelu(x, min, max) + + if _in_legacy_dygraph(): return _C_ops.brelu(x, 't_min', min, 't_max', max) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], @@ -337,7 +343,10 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None): out = F.hardsigmoid(x) # [0., 1., 0.666667] """ - if in_dynamic_mode(): + if in_dygraph_mode(): + return _C_ops.final_state_hard_sigmoid(x, slope, offset) + + if _in_legacy_dygraph(): return _C_ops.hard_sigmoid(x, 'slope', slope, 'offset', offset) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], @@ -744,7 +753,10 @@ def log_sigmoid(x, name=None): out = F.log_sigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499] """ - if in_dynamic_mode(): + if in_dygraph_mode(): + return _C_ops.final_state_logsigmoid(x) + + if _in_legacy_dygraph(): return _C_ops.logsigmoid(x) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], @@ -1099,7 +1111,7 @@ def softmax(x, axis=-1, dtype=None, name=None): if in_dygraph_mode(): outs_cast = x if dtype is None \ - else _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype) + else _C_ops.final_state_cast(x, dtype) return _C_ops.final_state_softmax(outs_cast, axis) if _in_legacy_dygraph(): @@ -1413,7 +1425,10 @@ def tanhshrink(x, name=None): x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739] """ - if in_dynamic_mode(): + if in_dygraph_mode(): + return _C_ops.final_state_tanh_shrink(x) + + if _in_legacy_dygraph(): return _C_ops.tanh_shrink(x) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], @@ -1459,7 +1474,10 @@ def thresholded_relu(x, threshold=1.0, name=None): out = F.thresholded_relu(x) # [2., 0., 0.] """ - if in_dynamic_mode(): + if in_dygraph_mode(): + return _C_ops.final_state_thresholded_relu(x, threshold) + + if _in_legacy_dygraph(): return _C_ops.thresholded_relu(x, 'threshold', threshold) check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], @@ -1532,13 +1550,16 @@ def log_softmax(x, axis=-1, dtype=None, name=None): if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)): dtype = convert_np_dtype_to_dtype_(dtype) - if _non_static_mode(): + if in_dygraph_mode(): if dtype is not None: - x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype) - if _in_legacy_dygraph(): - return _C_ops.log_softmax(x, 'axis', axis) + x = _C_ops.final_state_cast(x, dtype) return _C_ops.final_state_log_softmax(x, axis) + if _in_legacy_dygraph(): + if dtype is not None: + x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype) + return _C_ops.log_softmax(x, 'axis', axis) + if dtype is None: check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'log_softmax') diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index b94e6ec95d1..0b562e515ea 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -21,7 +21,7 @@ import numpy as np from paddle import _C_ops from ...device import is_compiled_with_rocm from paddle import in_dynamic_mode -from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph from paddle.framework import _non_static_mode __all__ = [] @@ -335,8 +335,10 @@ def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None): raise ValueError( "Attr(data_format) should be 'NCHW' or 'NHWC'." "But recevie Attr(data_format): {} ".format(data_format)) + if in_dygraph_mode(): + return _C_ops.final_state_pixel_shuffle(x, upscale_factor, data_format) - if in_dynamic_mode(): + if _in_legacy_dygraph(): return _C_ops.pixel_shuffle(x, "upscale_factor", upscale_factor, "data_format", data_format) diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 8c15f8633a8..3b146349675 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -3522,7 +3522,7 @@ def increment(x, value=1.0, name=None): """ if in_dygraph_mode(): - return _C_ops.final_state_increment( x, value) + return _C_ops.final_state_increment_( x, value) if _in_legacy_dygraph(): return _C_ops.increment(x, 'step', value) diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index df93b30b7c2..fa3e2fa8fed 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -114,6 +114,8 @@ def poisson(x, name=None): # [5., 1., 3.]] """ + if in_dygraph_mode(): + return _C_ops.final_state_poisson(x) if paddle.in_dynamic_mode(): return _C_ops.poisson(x) -- GitLab