From adab3c59d8ffb0ace8abb4e733fa775c4da14e32 Mon Sep 17 00:00:00 2001
From: Charles-hit <56987902+Charles-hit@users.noreply.github.com>
Date: Mon, 19 Sep 2022 17:17:45 +0800
Subject: [PATCH] (cherry-pick)support some op backward refuse forward (#46201)

* add unit test for sum higher level op (#45961)

* support slice op backward refuse forward and add high level unit test (#45960)

* support tile op backward refuse forward (#45942)

* support expand_v2 op backward refuse forward (#45941)

* support concat backward refuse forward (#45940)
---
 paddle/phi/api/yaml/legacy_backward.yaml      | 22 +----
 .../fluid/tests/unittests/test_concat_op.py   | 81 +++++++++++++++++
 .../tests/unittests/test_expand_v2_op.py      | 79 +++++++++++++++-
 .../fluid/tests/unittests/test_slice_op.py    | 89 +++++++++++++++++++
 .../fluid/tests/unittests/test_sum_op.py      | 72 +++++++++++++++
 .../fluid/tests/unittests/test_tile_op.py     | 79 +++++++++++++++-
 6 files changed, 402 insertions(+), 20 deletions(-)

diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 6267c99e147..76d60522d86 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -404,11 +404,7 @@
   forward : concat_grad (Tensor[] x, Tensor grad_out, Scalar axis) -> Tensor[](grad_x)
   args : (Tensor[] grad_x_grad, Scalar axis = 0)
   output : Tensor(grad_out_grad)
-  infer_meta :
-    func : ConcatInferMeta
-    param : [grad_x_grad, axis]
-  kernel :
-    func : concat
+  invoke : concat(grad_x_grad, axis)
 
 - backward_op : concat_grad
   forward : concat (Tensor[] x, Scalar axis) -> Tensor(out)
@@ -771,10 +767,7 @@
   forward : expand_grad (Tensor x, Tensor grad_out, IntArray shape) -> Tensor(grad_x)
   args : (Tensor grad_x_grad, IntArray shape)
   output : Tensor(grad_out_grad)
-  infer_meta :
-    func : ExpandInferMeta
-  kernel :
-    func : expand
+  invoke : expand(grad_x_grad, shape)
 
 - backward_op : expand_grad
   forward : expand (Tensor x, IntArray shape) -> Tensor(out)
@@ -2145,11 +2138,7 @@
   forward : slice_grad (Tensor input, Tensor grad_out, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(grad_input)
   args : (Tensor grad_input_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis)
   output : Tensor(grad_out_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [grad_input_grad]
-  kernel :
-    func : slice
+  invoke : slice(grad_input_grad, axes, starts, ends, infer_flags, decrease_axis)
 
 - backward_op : slice_grad
   forward : slice (Tensor input, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(out)
@@ -2507,10 +2496,7 @@
   forward : tile_grad (Tensor x, Tensor grad_out, IntArray repeat_times) -> Tensor(grad_x)
   args : (Tensor grad_x_grad, IntArray repeat_times)
   output : Tensor(grad_out_grad)
-  infer_meta :
-    func : TileInferMeta
-  kernel :
-    func : tile
+  invoke : tile(grad_x_grad, repeat_times)
 
 - backward_op : tile_grad
   forward : tile (Tensor x, IntArray repeat_times) -> Tensor(out)
diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py
index 0bf3d6230d8..10c74107083 100644
--- a/python/paddle/fluid/tests/unittests/test_concat_op.py
+++ b/python/paddle/fluid/tests/unittests/test_concat_op.py
@@ -21,6 +21,9 @@ import paddle.fluid as fluid
 from paddle.fluid import compiler, Program, program_guard, core
 from paddle.fluid.framework import _test_eager_guard
 import paddle
+import gradient_checker
+from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 class TestConcatOp(OpTest):
@@ -451,5 +454,83 @@ class TestConcatAPIWithLoDTensorArray(unittest.TestCase):
             res[0], np.concatenate([self.x] * self.iter_num, axis=self.axis))
 
 
+class TestConcatDoubleGradCheck(unittest.TestCase):
+
+    def concat_wrapper(self, x):
+        return paddle.concat(x)
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data1 = layers.data('data1', [2, 3], False, dtype)
+        data1.persistable = True
+        data2 = layers.data('data2', [2, 3], False, dtype)
+        data2.persistable = True
+        out = paddle.concat([data1, data2])
+        data1_arr = np.random.uniform(-1, 1, data1.shape).astype(dtype)
+        data2_arr = np.random.uniform(-1, 1, data2.shape).astype(dtype)
+        gradient_checker.double_grad_check([data1, data2],
+                                           out,
+                                           x_init=[data1_arr, data2_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.double_grad_check_for_dygraph(
+            self.concat_wrapper, [data1, data2],
+            out,
+            x_init=[data1_arr, data2_arr],
+            place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestConcatTripleGradCheck(unittest.TestCase):
+
+    def concat_wrapper(self, x):
+        return paddle.concat(x, 1)
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data1 = layers.data('data1', [2, 3, 4], False, dtype)
+        data1.persistable = True
+        data2 = layers.data('data2', [2, 3, 4], False, dtype)
+        data2.persistable = True
+        out = paddle.concat([data1, data2], 1)
+        data1_arr = np.random.uniform(-1, 1, data1.shape).astype(dtype)
+        data2_arr = np.random.uniform(-1, 1, data2.shape).astype(dtype)
+        gradient_checker.double_grad_check([data1, data2],
+                                           out,
+                                           x_init=[data1_arr, data2_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.double_grad_check_for_dygraph(
+            self.concat_wrapper, [data1, data2],
+            out,
+            x_init=[data1_arr, data2_arr],
+            place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
index 6fc6fc8f7eb..82fb8284fe7 100644
--- a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
+++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py
@@ -18,9 +18,12 @@ import unittest
 import numpy as np
 from op_test import OpTest
 import paddle.fluid as fluid
-from paddle.fluid import compiler, Program, program_guard
+from paddle.fluid import compiler, Program, program_guard, core
 import paddle
 from paddle.fluid.framework import _test_eager_guard
+import gradient_checker
+from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 # Situation 1: shape is a list(without tensor)
@@ -284,6 +287,80 @@ class TestExpandV2DygraphAPI(unittest.TestCase):
                                           egr_expand_1.numpy())
 
 
+class TestExpandDoubleGradCheck(unittest.TestCase):
+
+    def expand_wrapper(self, x):
+        return paddle.expand(x[0], [2, 3])
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [2, 3], False, dtype)
+        data.persistable = True
+        out = paddle.expand(data, [2, 3])
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.double_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.double_grad_check_for_dygraph(self.expand_wrapper,
+                                                       [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestExpandTripleGradCheck(unittest.TestCase):
+
+    def expand_wrapper(self, x):
+        return paddle.expand(x[0], [2, 3])
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [2, 3], False, dtype)
+        data.persistable = True
+        out = paddle.expand(data, [2, 3])
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.triple_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.triple_grad_check_for_dygraph(self.expand_wrapper,
+                                                       [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py
index ee5ab189bba..bc10e4f4fde 100644
--- a/python/paddle/fluid/tests/unittests/test_slice_op.py
+++ b/python/paddle/fluid/tests/unittests/test_slice_op.py
@@ -22,6 +22,9 @@ import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 import paddle
 from paddle.fluid.framework import _test_eager_guard, _enable_legacy_dygraph
+import gradient_checker
+from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 paddle.enable_static()
 
@@ -867,6 +870,92 @@ class TestImperativeCUDAPinnedInput(unittest.TestCase):
             self.assertEqual(sliced.shape, [2, 70, 80])
 
 
+class TestSliceDoubleGradCheck(unittest.TestCase):
+
+    def slice_wrapper(self, x):
+        return paddle.slice(x[0],
+                            axes=[0, 1, 2],
+                            starts=[-3, 0, 2],
+                            ends=[3, 2, 4])
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [4, 5, 6], False, dtype)
+        data.persistable = True
+        out = paddle.slice(data,
+                           axes=[0, 1, 2],
+                           starts=[-3, 0, 2],
+                           ends=[3, 2, 4])
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.double_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.double_grad_check_for_dygraph(self.slice_wrapper,
+                                                       [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestSliceTripleGradCheck(unittest.TestCase):
+
+    def slice_wrapper(self, x):
+        return paddle.slice(x[0],
+                            axes=[0, 1, 2],
+                            starts=[-3, 0, 2],
+                            ends=[3, 2, 4])
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [4, 5, 6], False, dtype)
+        data.persistable = True
+        out = paddle.slice(data,
+                           axes=[0, 1, 2],
+                           starts=[-3, 0, 2],
+                           ends=[3, 2, 4])
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.triple_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.triple_grad_check_for_dygraph(self.slice_wrapper,
+                                                       [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
 if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py
index ebf5631fea9..c4d7bb7c2ba 100644
--- a/python/paddle/fluid/tests/unittests/test_sum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sum_op.py
@@ -663,6 +663,78 @@ class TestAddNTripleGradCheck(unittest.TestCase):
             self.func(p)
 
 
+class TestSumDoubleGradCheck(unittest.TestCase):
+
+    def sum_wrapper(self, x):
+        return paddle.sum(x[0], axis=1, keepdim=True)
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [2, 4], False, dtype)
+        data.persistable = True
+        out = paddle.sum(data, axis=1, keepdim=True)
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.double_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.double_grad_check_for_dygraph(self.sum_wrapper, [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestSumTripleGradCheck(unittest.TestCase):
+
+    def sum_wrapper(self, x):
+        return paddle.sum(x[0], axis=1, keepdim=True)
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [2, 4], False, dtype)
+        data.persistable = True
+        out = paddle.sum(data, axis=1, keepdim=True)
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.triple_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.triple_grad_check_for_dygraph(self.sum_wrapper, [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
 if __name__ == "__main__":
     enable_static()
     unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py
index c1c6820d9c1..9f694ab3319 100644
--- a/python/paddle/fluid/tests/unittests/test_tile_op.py
+++ b/python/paddle/fluid/tests/unittests/test_tile_op.py
@@ -19,7 +19,10 @@ import numpy as np
 from op_test import OpTest
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid import compiler, Program, program_guard
+from paddle.fluid import compiler, Program, program_guard, core
+import gradient_checker
+from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 #Situation 1: repeat_times is a list (without tensor)
@@ -263,6 +266,80 @@ class TestTileAPI(unittest.TestCase):
             assert np.array_equal(out_3.numpy(), np.tile(np_x, (2, 3)))
 
 
+class TestTileDoubleGradCheck(unittest.TestCase):
+
+    def tile_wrapper(self, x):
+        return paddle.tile(x[0], [2, 1])
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [1, 2], False, dtype)
+        data.persistable = True
+        out = paddle.tile(data, [2, 1])
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.double_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.double_grad_check_for_dygraph(self.tile_wrapper,
+                                                       [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestTileTripleGradCheck(unittest.TestCase):
+
+    def tile_wrapper(self, x):
+        return paddle.tile(x[0], [2, 1])
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [1, 2], False, dtype)
+        data.persistable = True
+        out = paddle.tile(data, [2, 1])
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.triple_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.triple_grad_check_for_dygraph(self.tile_wrapper,
+                                                       [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
 if __name__ == "__main__":
     paddle.enable_static()
     unittest.main()
-- 
GitLab