Fix relu python call (#45082)

* add python final state * fix bug * fix bugs * fix bug * fix bug * revert impl, final state mul not support selected rows * fix softmax use cudnn error * add softlable false unitest * revert loss.py

Fix relu python call (#45082)
* add python final state * fix bug * fix bugs * fix bug * fix bug * revert impl, final state mul not support selected rows * fix softmax use cudnn error * add softlable false unitest * revert loss.py
839fac65 · hong · GitHub · 63d9a175 · 839fac65 · 839fac65
16 changed file
--- a/paddle/phi/api/ext/tensor_compat.h
+++ b/paddle/phi/api/ext/tensor_compat.h
@@ -71,7 +71,7 @@ using experimental::gather_nd;
 using experimental::gelu;
 using experimental::gumbel_softmax;
 using experimental::imag;
-using experimental::increment;
+using experimental::increment_;
 using experimental::index_sample;
 using experimental::is_empty;
 using experimental::isclose;

--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -1246,6 +1246,7 @@
    func : IncrementInferMeta
  kernel :
    func : increment
+  inplace : (x -> out)
 - api : index_sample
  args : (Tensor x, Tensor index)

--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -267,7 +267,7 @@ class ClipGradByValue(ClipGradBase):
            if getattr(p, 'need_clip', True) is False:
                params_and_grads.append((p, g))
                continue
-            new_grad = layers.clip(x=g, min=self.min, max=self.max)
+            new_grad = paddle.clip(x=g, min=self.min, max=self.max)
            params_and_grads.append((p, new_grad))
        return params_and_grads

--- a/python/paddle/fluid/dygraph_utils.py
+++ b/python/paddle/fluid/dygraph_utils.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 from . import core
-from .framework import dygraph_only
+from .framework import dygraph_only, in_dygraph_mode
 from paddle import _C_ops

--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -1554,6 +1554,9 @@ def increment(x, value=1.0, in_place=True):
          counter = fluid.layers.zeros(shape=[1], dtype='float32') # [0.]
          fluid.layers.increment(counter) # [1.]
    """
+    if in_dygraph_mode():
+        return _C_ops.final_state_increment_(x, value)
    check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'],
                             'increment')
    helper = LayerHelper("increment", **locals())
@@ -1973,6 +1976,10 @@ def equal(x, y, cond=None, name=None):
          out1 = fluid.layers.equal(x=label,y=limit) #out1=[True, False]
          out2 = fluid.layers.equal(x=label_cond,y=limit, cond=out_cond) #out2=[False, True] out_cond=[False, True]
    """
+    if in_dygraph_mode():
+        default_axis = -1
+        return _C_ops.final_state_equal(x, y, default_axis)
    check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"],
                             "equal")
    check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"],

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1454,6 +1454,9 @@ def softmax(input, use_cudnn=True, name=None, axis=-1):
    """
+    if in_dygraph_mode():
+        return _C_ops.final_state_softmax(input, axis)
    if _non_static_mode():
        return _C_ops.softmax(input, 'axis', axis, 'use_cudnn', use_cudnn)

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -329,8 +329,7 @@ def concat(input, axis=0, name=None):
            axis = axis.item(0)
        if not isinstance(input, Variable):
            input = [t for t in input if t.shape.count(0) == 0]
-        out = _varbase_creator()
+        out = _C_ops.final_state_concat(input, axis)
-        _C_ops.concat(input, out, 'axis', axis)
        return out
    if _in_legacy_dygraph():

--- a/python/paddle/fluid/regularizer.py
+++ b/python/paddle/fluid/regularizer.py
@@ -16,7 +16,7 @@ from __future__ import print_function
 import logging
 from . import framework
-from .framework import _non_static_mode, _varbase_creator
+from .framework import _non_static_mode, _varbase_creator, in_dygraph_mode
 from . import core
 from paddle import _C_ops
@@ -252,6 +252,10 @@ class L1DecayRegularizer(WeightDecayRegularizer):
            decay = block.create_var(dtype=param.dtype,
                                     shape=param.shape,
                                     lod_level=param.lod_level)
+        if in_dygraph_mode():
+            sign = _C_ops.final_state_sign(param)
+            return _C_ops.final_state_scale(sign, self._regularization_coeff,
+                                            0.0, True)
        # Append sign op
        block.append_op(type='sign', inputs={"X": param}, outputs={"Out": sign})

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -316,7 +316,7 @@ class SeResNeXt(fluid.dygraph.Layer):
        y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
        out = self.out(y)
-        softmax_out = fluid.layers.softmax(out, use_cudnn=False)
+        softmax_out = fluid.layers.softmax(out)
        loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
        avg_loss = paddle.mean(x=loss)

--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
@@ -560,83 +560,90 @@ class CrossEntropyLoss(unittest.TestCase):
    ###soft_label test 5
    def test_cross_entropy_loss_soft_2d(self):
-        self.numeric_stable_mode = False
-        self.soft_label = True
-        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
-        ) else 'float64'
-        self.axis = -1
-        self.ignore_index = -100  #should not be changed
-        self.N = 3
-        self.H = 2
-        self.W = 2
-        self.C = 5
-        self.shape = [self.N, self.H, self.W, self.C]
-        self.use_softmax = True
-        self.reduction = 'none'
-        self.weight = None
-        self.logits = getattr(
-            self, "logits",
-            np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
-        softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
-        self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)
+        def inner_cross_entropy_loss_soft_2d(soft_label):
-        self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
+            self.numeric_stable_mode = False
+            self.soft_label = soft_label
+            self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+            ) else 'float64'
+            self.axis = -1
+            self.ignore_index = -100  #should not be changed
+            self.N = 3
+            self.H = 2
+            self.W = 2
+            self.C = 5
+            self.shape = [self.N, self.H, self.W, self.C]
+            self.use_softmax = True
+            self.reduction = 'none'
+            self.weight = None
+            self.logits = getattr(
+                self, "logits",
+                np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
+            softmax = np.apply_along_axis(stable_softmax, self.axis,
+                                          self.logits)
-        #1. numpy
+            self.labels = np.random.uniform(0.1, 1.0,
-        expected = cross_entropy_soft_2d(softmax,
+                                            self.shape).astype(self.dtype)
-                                         self.labels,
+            self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
-                                         self.axis,
-                                         self.N,
-                                         self.H,
-                                         self.W,
-                                         weight=self.weight,
-                                         reduction=self.reduction,
-                                         ignore_index=self.ignore_index)
-        paddle.set_device("cpu")
-        paddle.disable_static()
-        #2. dygraph
-        paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
-            fluid.dygraph.to_variable(self.logits),
-            fluid.dygraph.to_variable(self.labels),
-            soft_label=True,
-            axis=self.axis,
-            weight=fluid.dygraph.to_variable(self.weight)
-            if self.weight is not None else None,
-            reduction=self.reduction)
-        dy_ret_value = paddle_loss_none_weight.numpy()
-        #3. static
-        paddle.enable_static()
-        prog = fluid.Program()
-        startup_prog = fluid.Program()
-        place = fluid.CUDAPlace(
-            0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace()
-        with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input',
-                               shape=[self.N, self.H, self.W, self.C],
-                               dtype=self.dtype)
-            label = fluid.data(name='label',
-                               shape=[self.N, self.H, self.W, self.C],
-                               dtype=self.dtype)
-            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
-                reduction=self.reduction, soft_label=True)
-            ret = cross_entropy_loss(input, label)
-            exe = fluid.Executor(place)
-            static_ret = exe.run(prog,
-                                 feed={
-                                     'input': self.logits,
-                                     'label': self.labels,
-                                 },
-                                 fetch_list=[ret])
-            self.assertIsNotNone(static_ret)
-        paddle.disable_static()
-        np.testing.assert_allclose(static_ret[0], dy_ret_value, rtol=1e-05)
+            #1. numpy
-        np.testing.assert_allclose(static_ret[0], expected, rtol=1e-05)
+            expected = cross_entropy_soft_2d(softmax,
-        np.testing.assert_allclose(dy_ret_value, expected, rtol=1e-05)
+                                             self.labels,
+                                             self.axis,
+                                             self.N,
+                                             self.H,
+                                             self.W,
+                                             weight=self.weight,
+                                             reduction=self.reduction,
+                                             ignore_index=self.ignore_index)
+            paddle.set_device("cpu")
+            paddle.disable_static()
+            #2. dygraph
+            paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
+                fluid.dygraph.to_variable(self.logits),
+                fluid.dygraph.to_variable(self.labels),
+                soft_label=True,
+                axis=self.axis,
+                weight=fluid.dygraph.to_variable(self.weight)
+                if self.weight is not None else None,
+                reduction=self.reduction)
+            dy_ret_value = paddle_loss_none_weight.numpy()
+            #3. static
+            paddle.enable_static()
+            prog = fluid.Program()
+            startup_prog = fluid.Program()
+            place = fluid.CUDAPlace(
+                0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace()
+            with fluid.program_guard(prog, startup_prog):
+                input = fluid.data(name='input',
+                                   shape=[self.N, self.H, self.W, self.C],
+                                   dtype=self.dtype)
+                label = fluid.data(name='label',
+                                   shape=[self.N, self.H, self.W, self.C],
+                                   dtype=self.dtype)
+                cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                    reduction=self.reduction, soft_label=True)
+                ret = cross_entropy_loss(input, label)
+                exe = fluid.Executor(place)
+                static_ret = exe.run(prog,
+                                     feed={
+                                         'input': self.logits,
+                                         'label': self.labels,
+                                     },
+                                     fetch_list=[ret])
+                self.assertIsNotNone(static_ret)
+            paddle.disable_static()
+            np.testing.assert_allclose(static_ret[0], dy_ret_value, rtol=1e-05)
+            np.testing.assert_allclose(static_ret[0], expected, rtol=1e-05)
+            np.testing.assert_allclose(dy_ret_value, expected, rtol=1e-05)
+        inner_cross_entropy_loss_soft_2d(True)
+        inner_cross_entropy_loss_soft_2d(False)
    ###soft_label test 6
    def test_cross_entropy_loss_soft_2d_weight_mean(self):

--- a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
@@ -214,4 +214,5 @@ class TestDygraphMultiForward(unittest.TestCase):
 if __name__ == '__main__':
+    paddle.enable_static()
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_l1_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_l1_loss.py
@@ -199,4 +199,5 @@ class TestClassL1Loss(unittest.TestCase):
 if __name__ == "__main__":
+    paddle.enable_static()
    unittest.main()
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -231,7 +231,10 @@ def hardshrink(x, threshold=0.5, name=None):
            out = F.hardshrink(x) # [-1., 0., 2.5]
    """
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_hard_shrink(x, threshold)
+    if _in_legacy_dygraph():
        return _C_ops.hard_shrink(x, 'threshold', threshold)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -281,7 +284,10 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
            out = F.hardtanh(x) # [-1., 0.3, 1.]
    """
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_brelu(x, min, max)
+    if _in_legacy_dygraph():
        return _C_ops.brelu(x, 't_min', min, 't_max', max)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -337,7 +343,10 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
            out = F.hardsigmoid(x) # [0., 1., 0.666667]
    """
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_hard_sigmoid(x, slope, offset)
+    if _in_legacy_dygraph():
        return _C_ops.hard_sigmoid(x, 'slope', slope, 'offset', offset)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -744,7 +753,10 @@ def log_sigmoid(x, name=None):
            out = F.log_sigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499]
    """
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_logsigmoid(x)
+    if _in_legacy_dygraph():
        return _C_ops.logsigmoid(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1099,7 +1111,7 @@ def softmax(x, axis=-1, dtype=None, name=None):
    if in_dygraph_mode():
        outs_cast = x if dtype is None \
-            else _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
+            else _C_ops.final_state_cast(x, dtype)
        return _C_ops.final_state_softmax(outs_cast, axis)
    if _in_legacy_dygraph():
@@ -1413,7 +1425,10 @@ def tanhshrink(x, name=None):
            x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
            out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739]
    """
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_tanh_shrink(x)
+    if _in_legacy_dygraph():
        return _C_ops.tanh_shrink(x)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1459,7 +1474,10 @@ def thresholded_relu(x, threshold=1.0, name=None):
            out = F.thresholded_relu(x) # [2., 0., 0.]
    """
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_thresholded_relu(x, threshold)
+    if _in_legacy_dygraph():
        return _C_ops.thresholded_relu(x, 'threshold', threshold)
    check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1532,13 +1550,16 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
    if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
        dtype = convert_np_dtype_to_dtype_(dtype)
-    if _non_static_mode():
+    if in_dygraph_mode():
        if dtype is not None:
-            x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
+            x = _C_ops.final_state_cast(x, dtype)
-        if _in_legacy_dygraph():
-            return _C_ops.log_softmax(x, 'axis', axis)
        return _C_ops.final_state_log_softmax(x, axis)
+    if _in_legacy_dygraph():
+        if dtype is not None:
+            x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
+        return _C_ops.log_softmax(x, 'axis', axis)
    if dtype is None:
        check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                 'log_softmax')

--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -21,7 +21,7 @@ import numpy as np
 from paddle import _C_ops
 from ...device import is_compiled_with_rocm
 from paddle import in_dynamic_mode
-from paddle.fluid.framework import in_dygraph_mode
+from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
 from paddle.framework import _non_static_mode
 __all__ = []
@@ -335,8 +335,10 @@ def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None):
        raise ValueError(
            "Attr(data_format) should be 'NCHW' or 'NHWC'."
            "But recevie Attr(data_format): {} ".format(data_format))
+    if in_dygraph_mode():
+        return _C_ops.final_state_pixel_shuffle(x, upscale_factor, data_format)
-    if in_dynamic_mode():
+    if _in_legacy_dygraph():
        return _C_ops.pixel_shuffle(x, "upscale_factor", upscale_factor,
                                    "data_format", data_format)

--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3522,7 +3522,7 @@ def increment(x, value=1.0, name=None):
    """
    if in_dygraph_mode():
-        return _C_ops.final_state_increment( x, value)
+        return _C_ops.final_state_increment_( x, value)
    if _in_legacy_dygraph():
        return _C_ops.increment(x, 'step', value)

--- a/python/paddle/tensor/random.py
+++ b/python/paddle/tensor/random.py
@@ -114,6 +114,8 @@ def poisson(x, name=None):
            # [5., 1., 3.]]
    """
+    if in_dygraph_mode():
+        return _C_ops.final_state_poisson(x)
    if paddle.in_dynamic_mode():
        return _C_ops.poisson(x)