From adab3c59d8ffb0ace8abb4e733fa775c4da14e32 Mon Sep 17 00:00:00 2001 From: Charles-hit <56987902+Charles-hit@users.noreply.github.com> Date: Mon, 19 Sep 2022 17:17:45 +0800 Subject: [PATCH] (cherry-pick)support some op backward refuse forward (#46201) * add unit test for sum higher level op (#45961) * support slice op backward refuse forward and add high level unit test (#45960) * support tile op backward refuse forward (#45942) * support expand_v2 op backward refuse forward (#45941) * support concat backward refuse forward (#45940) --- paddle/phi/api/yaml/legacy_backward.yaml | 22 +---- .../fluid/tests/unittests/test_concat_op.py | 81 +++++++++++++++++ .../tests/unittests/test_expand_v2_op.py | 79 +++++++++++++++- .../fluid/tests/unittests/test_slice_op.py | 89 +++++++++++++++++++ .../fluid/tests/unittests/test_sum_op.py | 72 +++++++++++++++ .../fluid/tests/unittests/test_tile_op.py | 79 +++++++++++++++- 6 files changed, 402 insertions(+), 20 deletions(-) diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 6267c99e147..76d60522d86 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -404,11 +404,7 @@ forward : concat_grad (Tensor[] x, Tensor grad_out, Scalar axis) -> Tensor[](grad_x) args : (Tensor[] grad_x_grad, Scalar axis = 0) output : Tensor(grad_out_grad) - infer_meta : - func : ConcatInferMeta - param : [grad_x_grad, axis] - kernel : - func : concat + invoke : concat(grad_x_grad, axis) - backward_op : concat_grad forward : concat (Tensor[] x, Scalar axis) -> Tensor(out) @@ -771,10 +767,7 @@ forward : expand_grad (Tensor x, Tensor grad_out, IntArray shape) -> Tensor(grad_x) args : (Tensor grad_x_grad, IntArray shape) output : Tensor(grad_out_grad) - infer_meta : - func : ExpandInferMeta - kernel : - func : expand + invoke : expand(grad_x_grad, shape) - backward_op : expand_grad forward : expand (Tensor x, IntArray shape) -> Tensor(out) @@ -2145,11 +2138,7 @@ forward : slice_grad (Tensor input, Tensor grad_out, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(grad_input) args : (Tensor grad_input_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) output : Tensor(grad_out_grad) - infer_meta : - func : UnchangedInferMeta - param : [grad_input_grad] - kernel : - func : slice + invoke : slice(grad_input_grad, axes, starts, ends, infer_flags, decrease_axis) - backward_op : slice_grad forward : slice (Tensor input, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(out) @@ -2507,10 +2496,7 @@ forward : tile_grad (Tensor x, Tensor grad_out, IntArray repeat_times) -> Tensor(grad_x) args : (Tensor grad_x_grad, IntArray repeat_times) output : Tensor(grad_out_grad) - infer_meta : - func : TileInferMeta - kernel : - func : tile + invoke : tile(grad_x_grad, repeat_times) - backward_op : tile_grad forward : tile (Tensor x, IntArray repeat_times) -> Tensor(out) diff --git a/python/paddle/fluid/tests/unittests/test_concat_op.py b/python/paddle/fluid/tests/unittests/test_concat_op.py index 0bf3d6230d8..10c74107083 100644 --- a/python/paddle/fluid/tests/unittests/test_concat_op.py +++ b/python/paddle/fluid/tests/unittests/test_concat_op.py @@ -21,6 +21,9 @@ import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard, core from paddle.fluid.framework import _test_eager_guard import paddle +import gradient_checker +from decorator_helper import prog_scope +import paddle.fluid.layers as layers class TestConcatOp(OpTest): @@ -451,5 +454,83 @@ class TestConcatAPIWithLoDTensorArray(unittest.TestCase): res[0], np.concatenate([self.x] * self.iter_num, axis=self.axis)) +class TestConcatDoubleGradCheck(unittest.TestCase): + + def concat_wrapper(self, x): + return paddle.concat(x) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data1 = layers.data('data1', [2, 3], False, dtype) + data1.persistable = True + data2 = layers.data('data2', [2, 3], False, dtype) + data2.persistable = True + out = paddle.concat([data1, data2]) + data1_arr = np.random.uniform(-1, 1, data1.shape).astype(dtype) + data2_arr = np.random.uniform(-1, 1, data2.shape).astype(dtype) + gradient_checker.double_grad_check([data1, data2], + out, + x_init=[data1_arr, data2_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph( + self.concat_wrapper, [data1, data2], + out, + x_init=[data1_arr, data2_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestConcatTripleGradCheck(unittest.TestCase): + + def concat_wrapper(self, x): + return paddle.concat(x, 1) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data1 = layers.data('data1', [2, 3, 4], False, dtype) + data1.persistable = True + data2 = layers.data('data2', [2, 3, 4], False, dtype) + data2.persistable = True + out = paddle.concat([data1, data2], 1) + data1_arr = np.random.uniform(-1, 1, data1.shape).astype(dtype) + data2_arr = np.random.uniform(-1, 1, data2.shape).astype(dtype) + gradient_checker.double_grad_check([data1, data2], + out, + x_init=[data1_arr, data2_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph( + self.concat_wrapper, [data1, data2], + out, + x_init=[data1_arr, data2_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py index 6fc6fc8f7eb..82fb8284fe7 100644 --- a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py @@ -18,9 +18,12 @@ import unittest import numpy as np from op_test import OpTest import paddle.fluid as fluid -from paddle.fluid import compiler, Program, program_guard +from paddle.fluid import compiler, Program, program_guard, core import paddle from paddle.fluid.framework import _test_eager_guard +import gradient_checker +from decorator_helper import prog_scope +import paddle.fluid.layers as layers # Situation 1: shape is a list(without tensor) @@ -284,6 +287,80 @@ class TestExpandV2DygraphAPI(unittest.TestCase): egr_expand_1.numpy()) +class TestExpandDoubleGradCheck(unittest.TestCase): + + def expand_wrapper(self, x): + return paddle.expand(x[0], [2, 3]) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [2, 3], False, dtype) + data.persistable = True + out = paddle.expand(data, [2, 3]) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.double_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph(self.expand_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestExpandTripleGradCheck(unittest.TestCase): + + def expand_wrapper(self, x): + return paddle.expand(x[0], [2, 3]) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [2, 3], False, dtype) + data.persistable = True + out = paddle.expand(data, [2, 3]) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.triple_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.triple_grad_check_for_dygraph(self.expand_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index ee5ab189bba..bc10e4f4fde 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -22,6 +22,9 @@ import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle from paddle.fluid.framework import _test_eager_guard, _enable_legacy_dygraph +import gradient_checker +from decorator_helper import prog_scope +import paddle.fluid.layers as layers paddle.enable_static() @@ -867,6 +870,92 @@ class TestImperativeCUDAPinnedInput(unittest.TestCase): self.assertEqual(sliced.shape, [2, 70, 80]) +class TestSliceDoubleGradCheck(unittest.TestCase): + + def slice_wrapper(self, x): + return paddle.slice(x[0], + axes=[0, 1, 2], + starts=[-3, 0, 2], + ends=[3, 2, 4]) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [4, 5, 6], False, dtype) + data.persistable = True + out = paddle.slice(data, + axes=[0, 1, 2], + starts=[-3, 0, 2], + ends=[3, 2, 4]) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.double_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph(self.slice_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestSliceTripleGradCheck(unittest.TestCase): + + def slice_wrapper(self, x): + return paddle.slice(x[0], + axes=[0, 1, 2], + starts=[-3, 0, 2], + ends=[3, 2, 4]) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [4, 5, 6], False, dtype) + data.persistable = True + out = paddle.slice(data, + axes=[0, 1, 2], + starts=[-3, 0, 2], + ends=[3, 2, 4]) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.triple_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.triple_grad_check_for_dygraph(self.slice_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index ebf5631fea9..c4d7bb7c2ba 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -663,6 +663,78 @@ class TestAddNTripleGradCheck(unittest.TestCase): self.func(p) +class TestSumDoubleGradCheck(unittest.TestCase): + + def sum_wrapper(self, x): + return paddle.sum(x[0], axis=1, keepdim=True) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [2, 4], False, dtype) + data.persistable = True + out = paddle.sum(data, axis=1, keepdim=True) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.double_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph(self.sum_wrapper, [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestSumTripleGradCheck(unittest.TestCase): + + def sum_wrapper(self, x): + return paddle.sum(x[0], axis=1, keepdim=True) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [2, 4], False, dtype) + data.persistable = True + out = paddle.sum(data, axis=1, keepdim=True) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.triple_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.triple_grad_check_for_dygraph(self.sum_wrapper, [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py index c1c6820d9c1..9f694ab3319 100644 --- a/python/paddle/fluid/tests/unittests/test_tile_op.py +++ b/python/paddle/fluid/tests/unittests/test_tile_op.py @@ -19,7 +19,10 @@ import numpy as np from op_test import OpTest import paddle import paddle.fluid as fluid -from paddle.fluid import compiler, Program, program_guard +from paddle.fluid import compiler, Program, program_guard, core +import gradient_checker +from decorator_helper import prog_scope +import paddle.fluid.layers as layers #Situation 1: repeat_times is a list (without tensor) @@ -263,6 +266,80 @@ class TestTileAPI(unittest.TestCase): assert np.array_equal(out_3.numpy(), np.tile(np_x, (2, 3))) +class TestTileDoubleGradCheck(unittest.TestCase): + + def tile_wrapper(self, x): + return paddle.tile(x[0], [2, 1]) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [1, 2], False, dtype) + data.persistable = True + out = paddle.tile(data, [2, 1]) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.double_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph(self.tile_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestTileTripleGradCheck(unittest.TestCase): + + def tile_wrapper(self, x): + return paddle.tile(x[0], [2, 1]) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [1, 2], False, dtype) + data.persistable = True + out = paddle.tile(data, [2, 1]) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.triple_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.triple_grad_check_for_dygraph(self.tile_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": paddle.enable_static() unittest.main() -- GitLab