From 839fac658fa84e236440f114094c1dcf00ab34da Mon Sep 17 00:00:00 2001
From: hong <43953930+phlrain@users.noreply.github.com>
Date: Thu, 25 Aug 2022 10:52:59 +0800
Subject: [PATCH] Fix relu python call (#45082)

* add python final state

* fix bug

* fix bugs

* fix bug

* fix bug

* revert impl, final state mul not support selected rows

* fix softmax use cudnn error

* add softlable false unitest

* revert loss.py
---
 paddle/phi/api/ext/tensor_compat.h            |   2 +-
 paddle/phi/api/yaml/legacy_api.yaml           |   1 +
 python/paddle/fluid/clip.py                   |   2 +-
 python/paddle/fluid/dygraph_utils.py          |   2 +-
 python/paddle/fluid/layers/control_flow.py    |   7 +
 python/paddle/fluid/layers/nn.py              |   3 +
 python/paddle/fluid/layers/tensor.py          |   3 +-
 python/paddle/fluid/regularizer.py            |   6 +-
 .../dygraph_to_static/test_se_resnet.py       |   2 +-
 .../unittests/test_cross_entropy_loss.py      | 155 +++++++++---------
 .../unittests/test_dygraph_multi_forward.py   |   1 +
 .../fluid/tests/unittests/test_l1_loss.py     |   1 +
 python/paddle/nn/functional/activation.py     |  43 +++--
 python/paddle/nn/functional/vision.py         |   6 +-
 python/paddle/tensor/math.py                  |   2 +-
 python/paddle/tensor/random.py                |   2 +
 16 files changed, 143 insertions(+), 95 deletions(-)

diff --git a/paddle/phi/api/ext/tensor_compat.h b/paddle/phi/api/ext/tensor_compat.h
index 6de78d8c92c..2833629f0ff 100644
--- a/paddle/phi/api/ext/tensor_compat.h
+++ b/paddle/phi/api/ext/tensor_compat.h
@@ -71,7 +71,7 @@ using experimental::gather_nd;
 using experimental::gelu;
 using experimental::gumbel_softmax;
 using experimental::imag;
-using experimental::increment;
+using experimental::increment_;
 using experimental::index_sample;
 using experimental::is_empty;
 using experimental::isclose;
diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml
index 98d75b7f2d2..2c538c27bf8 100755
--- a/paddle/phi/api/yaml/legacy_api.yaml
+++ b/paddle/phi/api/yaml/legacy_api.yaml
@@ -1246,6 +1246,7 @@
     func : IncrementInferMeta
   kernel :
     func : increment
+  inplace : (x -> out)
 
 - api : index_sample
   args : (Tensor x, Tensor index)
diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py
index c912d7d8d40..b5575928c4f 100644
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -267,7 +267,7 @@ class ClipGradByValue(ClipGradBase):
             if getattr(p, 'need_clip', True) is False:
                 params_and_grads.append((p, g))
                 continue
-            new_grad = layers.clip(x=g, min=self.min, max=self.max)
+            new_grad = paddle.clip(x=g, min=self.min, max=self.max)
             params_and_grads.append((p, new_grad))
         return params_and_grads
 
diff --git a/python/paddle/fluid/dygraph_utils.py b/python/paddle/fluid/dygraph_utils.py
index 44d3d2f783b..d8c19c94f27 100644
--- a/python/paddle/fluid/dygraph_utils.py
+++ b/python/paddle/fluid/dygraph_utils.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from . import core
-from .framework import dygraph_only
+from .framework import dygraph_only, in_dygraph_mode
 from paddle import _C_ops
 
 
diff --git a/python/paddle/fluid/layers/control_flow.py b/python/paddle/fluid/layers/control_flow.py
index 73a1ccfb6c5..43781665dc3 100755
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
@@ -1554,6 +1554,9 @@ def increment(x, value=1.0, in_place=True):
           counter = fluid.layers.zeros(shape=[1], dtype='float32') # [0.]
           fluid.layers.increment(counter) # [1.]
     """
+    if in_dygraph_mode():
+        return _C_ops.final_state_increment_(x, value)
+
     check_variable_and_dtype(x, 'x', ['float32', 'float64', 'int32', 'int64'],
                              'increment')
     helper = LayerHelper("increment", **locals())
@@ -1973,6 +1976,10 @@ def equal(x, y, cond=None, name=None):
           out1 = fluid.layers.equal(x=label,y=limit) #out1=[True, False]
           out2 = fluid.layers.equal(x=label_cond,y=limit, cond=out_cond) #out2=[False, True] out_cond=[False, True]
     """
+    if in_dygraph_mode():
+        default_axis = -1
+        return _C_ops.final_state_equal(x, y, default_axis)
+
     check_variable_and_dtype(x, "x", ["float32", "float64", "int32", "int64"],
                              "equal")
     check_variable_and_dtype(y, "y", ["float32", "float64", "int32", "int64"],
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 57961efc14a..6b0fb3f62c7 100755
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1454,6 +1454,9 @@ def softmax(input, use_cudnn=True, name=None, axis=-1):
 
     """
 
+    if in_dygraph_mode():
+        return _C_ops.final_state_softmax(input, axis)
+
     if _non_static_mode():
         return _C_ops.softmax(input, 'axis', axis, 'use_cudnn', use_cudnn)
 
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 3a928415512..f823f98a536 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -329,8 +329,7 @@ def concat(input, axis=0, name=None):
             axis = axis.item(0)
         if not isinstance(input, Variable):
             input = [t for t in input if t.shape.count(0) == 0]
-        out = _varbase_creator()
-        _C_ops.concat(input, out, 'axis', axis)
+        out = _C_ops.final_state_concat(input, axis)
         return out
 
     if _in_legacy_dygraph():
diff --git a/python/paddle/fluid/regularizer.py b/python/paddle/fluid/regularizer.py
index da0b91cc5c9..4328d824071 100644
--- a/python/paddle/fluid/regularizer.py
+++ b/python/paddle/fluid/regularizer.py
@@ -16,7 +16,7 @@ from __future__ import print_function
 import logging
 
 from . import framework
-from .framework import _non_static_mode, _varbase_creator
+from .framework import _non_static_mode, _varbase_creator, in_dygraph_mode
 from . import core
 from paddle import _C_ops
 
@@ -252,6 +252,10 @@ class L1DecayRegularizer(WeightDecayRegularizer):
             decay = block.create_var(dtype=param.dtype,
                                      shape=param.shape,
                                      lod_level=param.lod_level)
+        if in_dygraph_mode():
+            sign = _C_ops.final_state_sign(param)
+            return _C_ops.final_state_scale(sign, self._regularization_coeff,
+                                            0.0, True)
 
         # Append sign op
         block.append_op(type='sign', inputs={"X": param}, outputs={"Out": sign})
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
index dffc0340a6d..2a86c3f77a1 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py
@@ -316,7 +316,7 @@ class SeResNeXt(fluid.dygraph.Layer):
         y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
         out = self.out(y)
 
-        softmax_out = fluid.layers.softmax(out, use_cudnn=False)
+        softmax_out = fluid.layers.softmax(out)
         loss = fluid.layers.cross_entropy(input=softmax_out, label=label)
         avg_loss = paddle.mean(x=loss)
 
diff --git a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
index 624f10d1e66..0c526182ab7 100644
--- a/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_cross_entropy_loss.py
@@ -560,83 +560,90 @@ class CrossEntropyLoss(unittest.TestCase):
 
     ###soft_label test 5
     def test_cross_entropy_loss_soft_2d(self):
-        self.numeric_stable_mode = False
-        self.soft_label = True
-        self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
-        ) else 'float64'
-        self.axis = -1
-        self.ignore_index = -100  #should not be changed
-        self.N = 3
-        self.H = 2
-        self.W = 2
-        self.C = 5
-        self.shape = [self.N, self.H, self.W, self.C]
-        self.use_softmax = True
-        self.reduction = 'none'
-        self.weight = None
-        self.logits = getattr(
-            self, "logits",
-            np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
-        softmax = np.apply_along_axis(stable_softmax, self.axis, self.logits)
 
-        self.labels = np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype)
-        self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
+        def inner_cross_entropy_loss_soft_2d(soft_label):
+            self.numeric_stable_mode = False
+            self.soft_label = soft_label
+            self.dtype = 'float32' if fluid.core.is_compiled_with_rocm(
+            ) else 'float64'
+            self.axis = -1
+            self.ignore_index = -100  #should not be changed
+            self.N = 3
+            self.H = 2
+            self.W = 2
+            self.C = 5
+            self.shape = [self.N, self.H, self.W, self.C]
+            self.use_softmax = True
+            self.reduction = 'none'
+            self.weight = None
+            self.logits = getattr(
+                self, "logits",
+                np.random.uniform(0.1, 1.0, self.shape).astype(self.dtype))
+            softmax = np.apply_along_axis(stable_softmax, self.axis,
+                                          self.logits)
 
-        #1. numpy
-        expected = cross_entropy_soft_2d(softmax,
-                                         self.labels,
-                                         self.axis,
-                                         self.N,
-                                         self.H,
-                                         self.W,
-                                         weight=self.weight,
-                                         reduction=self.reduction,
-                                         ignore_index=self.ignore_index)
-
-        paddle.set_device("cpu")
-        paddle.disable_static()
-
-        #2. dygraph
-        paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
-            fluid.dygraph.to_variable(self.logits),
-            fluid.dygraph.to_variable(self.labels),
-            soft_label=True,
-            axis=self.axis,
-            weight=fluid.dygraph.to_variable(self.weight)
-            if self.weight is not None else None,
-            reduction=self.reduction)
-        dy_ret_value = paddle_loss_none_weight.numpy()
-
-        #3. static
-        paddle.enable_static()
-        prog = fluid.Program()
-        startup_prog = fluid.Program()
-        place = fluid.CUDAPlace(
-            0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace()
-        with fluid.program_guard(prog, startup_prog):
-            input = fluid.data(name='input',
-                               shape=[self.N, self.H, self.W, self.C],
-                               dtype=self.dtype)
-            label = fluid.data(name='label',
-                               shape=[self.N, self.H, self.W, self.C],
-                               dtype=self.dtype)
-
-            cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
-                reduction=self.reduction, soft_label=True)
-            ret = cross_entropy_loss(input, label)
-            exe = fluid.Executor(place)
-            static_ret = exe.run(prog,
-                                 feed={
-                                     'input': self.logits,
-                                     'label': self.labels,
-                                 },
-                                 fetch_list=[ret])
-            self.assertIsNotNone(static_ret)
-        paddle.disable_static()
+            self.labels = np.random.uniform(0.1, 1.0,
+                                            self.shape).astype(self.dtype)
+            self.labels /= np.sum(self.labels, axis=self.axis, keepdims=True)
 
-        np.testing.assert_allclose(static_ret[0], dy_ret_value, rtol=1e-05)
-        np.testing.assert_allclose(static_ret[0], expected, rtol=1e-05)
-        np.testing.assert_allclose(dy_ret_value, expected, rtol=1e-05)
+            #1. numpy
+            expected = cross_entropy_soft_2d(softmax,
+                                             self.labels,
+                                             self.axis,
+                                             self.N,
+                                             self.H,
+                                             self.W,
+                                             weight=self.weight,
+                                             reduction=self.reduction,
+                                             ignore_index=self.ignore_index)
+
+            paddle.set_device("cpu")
+            paddle.disable_static()
+
+            #2. dygraph
+            paddle_loss_none_weight = paddle.nn.functional.cross_entropy(
+                fluid.dygraph.to_variable(self.logits),
+                fluid.dygraph.to_variable(self.labels),
+                soft_label=True,
+                axis=self.axis,
+                weight=fluid.dygraph.to_variable(self.weight)
+                if self.weight is not None else None,
+                reduction=self.reduction)
+            dy_ret_value = paddle_loss_none_weight.numpy()
+
+            #3. static
+            paddle.enable_static()
+            prog = fluid.Program()
+            startup_prog = fluid.Program()
+            place = fluid.CUDAPlace(
+                0) if fluid.core.is_compiled_with_cuda() else fluid.CPUPlace()
+            with fluid.program_guard(prog, startup_prog):
+                input = fluid.data(name='input',
+                                   shape=[self.N, self.H, self.W, self.C],
+                                   dtype=self.dtype)
+                label = fluid.data(name='label',
+                                   shape=[self.N, self.H, self.W, self.C],
+                                   dtype=self.dtype)
+
+                cross_entropy_loss = paddle.nn.loss.CrossEntropyLoss(
+                    reduction=self.reduction, soft_label=True)
+                ret = cross_entropy_loss(input, label)
+                exe = fluid.Executor(place)
+                static_ret = exe.run(prog,
+                                     feed={
+                                         'input': self.logits,
+                                         'label': self.labels,
+                                     },
+                                     fetch_list=[ret])
+                self.assertIsNotNone(static_ret)
+            paddle.disable_static()
+
+            np.testing.assert_allclose(static_ret[0], dy_ret_value, rtol=1e-05)
+            np.testing.assert_allclose(static_ret[0], expected, rtol=1e-05)
+            np.testing.assert_allclose(dy_ret_value, expected, rtol=1e-05)
+
+        inner_cross_entropy_loss_soft_2d(True)
+        inner_cross_entropy_loss_soft_2d(False)
 
     ###soft_label test 6
     def test_cross_entropy_loss_soft_2d_weight_mean(self):
diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
index 9a61162bac0..ef4cbe1c2c8 100644
--- a/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_multi_forward.py
@@ -214,4 +214,5 @@ class TestDygraphMultiForward(unittest.TestCase):
 
 
 if __name__ == '__main__':
+    paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_l1_loss.py b/python/paddle/fluid/tests/unittests/test_l1_loss.py
index 0e8221d1b4c..abe47e75db7 100644
--- a/python/paddle/fluid/tests/unittests/test_l1_loss.py
+++ b/python/paddle/fluid/tests/unittests/test_l1_loss.py
@@ -199,4 +199,5 @@ class TestClassL1Loss(unittest.TestCase):
 
 
 if __name__ == "__main__":
+    paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index a210f806fc4..f6898347de2 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -231,7 +231,10 @@ def hardshrink(x, threshold=0.5, name=None):
             out = F.hardshrink(x) # [-1., 0., 2.5]
 
     """
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_hard_shrink(x, threshold)
+
+    if _in_legacy_dygraph():
         return _C_ops.hard_shrink(x, 'threshold', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -281,7 +284,10 @@ def hardtanh(x, min=-1.0, max=1.0, name=None):
             out = F.hardtanh(x) # [-1., 0.3, 1.]
     """
 
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_brelu(x, min, max)
+
+    if _in_legacy_dygraph():
         return _C_ops.brelu(x, 't_min', min, 't_max', max)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -337,7 +343,10 @@ def hardsigmoid(x, slope=0.1666667, offset=0.5, name=None):
             out = F.hardsigmoid(x) # [0., 1., 0.666667]
     """
 
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_hard_sigmoid(x, slope, offset)
+
+    if _in_legacy_dygraph():
         return _C_ops.hard_sigmoid(x, 'slope', slope, 'offset', offset)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -744,7 +753,10 @@ def log_sigmoid(x, name=None):
             out = F.log_sigmoid(x) # [-0.313262 -0.126928 -0.0485874 -0.0181499]
     """
 
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_logsigmoid(x)
+
+    if _in_legacy_dygraph():
         return _C_ops.logsigmoid(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1099,7 +1111,7 @@ def softmax(x, axis=-1, dtype=None, name=None):
 
     if in_dygraph_mode():
         outs_cast = x if dtype is None \
-            else _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
+            else _C_ops.final_state_cast(x, dtype)
         return _C_ops.final_state_softmax(outs_cast, axis)
 
     if _in_legacy_dygraph():
@@ -1413,7 +1425,10 @@ def tanhshrink(x, name=None):
             x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3]))
             out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739]
     """
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_tanh_shrink(x)
+
+    if _in_legacy_dygraph():
         return _C_ops.tanh_shrink(x)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1459,7 +1474,10 @@ def thresholded_relu(x, threshold=1.0, name=None):
             out = F.thresholded_relu(x) # [2., 0., 0.]
     """
 
-    if in_dynamic_mode():
+    if in_dygraph_mode():
+        return _C_ops.final_state_thresholded_relu(x, threshold)
+
+    if _in_legacy_dygraph():
         return _C_ops.thresholded_relu(x, 'threshold', threshold)
 
     check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
@@ -1532,13 +1550,16 @@ def log_softmax(x, axis=-1, dtype=None, name=None):
     if (dtype is not None) and (not isinstance(dtype, core.VarDesc.VarType)):
         dtype = convert_np_dtype_to_dtype_(dtype)
 
-    if _non_static_mode():
+    if in_dygraph_mode():
         if dtype is not None:
-            x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
-        if _in_legacy_dygraph():
-            return _C_ops.log_softmax(x, 'axis', axis)
+            x = _C_ops.final_state_cast(x, dtype)
         return _C_ops.final_state_log_softmax(x, axis)
 
+    if _in_legacy_dygraph():
+        if dtype is not None:
+            x = _C_ops.cast(x, 'in_dtype', x.dtype, 'out_dtype', dtype)
+        return _C_ops.log_softmax(x, 'axis', axis)
+
     if dtype is None:
         check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'],
                                  'log_softmax')
diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py
index b94e6ec95d1..0b562e515ea 100644
--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -21,7 +21,7 @@ import numpy as np
 from paddle import _C_ops
 from ...device import is_compiled_with_rocm
 from paddle import in_dynamic_mode
-from paddle.fluid.framework import in_dygraph_mode
+from paddle.fluid.framework import in_dygraph_mode, _in_legacy_dygraph
 from paddle.framework import _non_static_mode
 
 __all__ = []
@@ -335,8 +335,10 @@ def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None):
         raise ValueError(
             "Attr(data_format) should be 'NCHW' or 'NHWC'."
             "But recevie Attr(data_format): {} ".format(data_format))
+    if in_dygraph_mode():
+        return _C_ops.final_state_pixel_shuffle(x, upscale_factor, data_format)
 
-    if in_dynamic_mode():
+    if _in_legacy_dygraph():
         return _C_ops.pixel_shuffle(x, "upscale_factor", upscale_factor,
                                     "data_format", data_format)
 
diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
index 8c15f8633a8..3b146349675 100644
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
@@ -3522,7 +3522,7 @@ def increment(x, value=1.0, name=None):
 
     """
     if in_dygraph_mode():
-        return _C_ops.final_state_increment( x, value)
+        return _C_ops.final_state_increment_( x, value)
 
     if _in_legacy_dygraph():
         return _C_ops.increment(x, 'step', value)
diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py
index df93b30b7c2..fa3e2fa8fed 100644
--- a/python/paddle/tensor/random.py
+++ b/python/paddle/tensor/random.py
@@ -114,6 +114,8 @@ def poisson(x, name=None):
             # [5., 1., 3.]]
 
     """
+    if in_dygraph_mode():
+        return _C_ops.final_state_poisson(x)
 
     if paddle.in_dynamic_mode():
         return _C_ops.poisson(x)
-- 
GitLab