From bc92d5f5eb4f648764fb2b32ecbbd4a82262d80c Mon Sep 17 00:00:00 2001 From: Charles-hit <56987902+Charles-hit@users.noreply.github.com> Date: Tue, 20 Sep 2022 13:48:43 +0800 Subject: [PATCH] (cherry-pick)Support some op refuse forward and fix some bugs (#46211) * support cast op backward refuse forward and fix some bugs (#46173) * support cast op backward refuse forward * Fix the bug of high order unit test framework * support sign op backward refuse forward (#46002) --- paddle/phi/api/yaml/legacy_backward.yaml | 13 ++-- paddle/phi/api/yaml/legacy_ops.yaml | 1 + .../fluid/tests/unittests/gradient_checker.py | 20 ++++- .../fluid/tests/unittests/test_cast_op.py | 77 ++++++++++++++++++ .../fluid/tests/unittests/test_sign_op.py | 78 +++++++++++++++++++ 5 files changed, 181 insertions(+), 8 deletions(-) diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 76d60522d8..2f4eff9835 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -326,12 +326,7 @@ forward : cast (Tensor x, DataType out_dtype) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) - infer_meta : - func : UnchangedInferMeta - param : [x] - kernel : - func : cast_grad - data_type : out_grad + invoke : cast (out_grad, x.dtype()) no_need_buffer : x - backward_op : ceil_grad @@ -2101,6 +2096,12 @@ optional : grad_grad_out_grad inplace : (grad_grad_x -> fwd_grad_out_grad) +- backward_op : sign_grad + forward : sign (Tensor x) -> Tensor(out) + args : (Tensor out_grad) + output : Tensor(x_grad) + invoke : scale(out_grad, 0.0, 0.0, true) + - backward_op : silu_grad forward : silu (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index e69d54eb1e..cd531142e2 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -2377,6 +2377,7 @@ func : UnchangedInferMeta kernel : func : sign + backward : sign_grad - op : silu args : (Tensor x) diff --git a/python/paddle/fluid/tests/unittests/gradient_checker.py b/python/paddle/fluid/tests/unittests/gradient_checker.py index e261fd8165..15320c395e 100644 --- a/python/paddle/fluid/tests/unittests/gradient_checker.py +++ b/python/paddle/fluid/tests/unittests/gradient_checker.py @@ -268,6 +268,9 @@ def grad_check(x, for v in x: v.stop_gradient = False v.persistable = True + for u in y: + u.stop_gradient = False + u.persistable = True if place is None: place = fluid.CPUPlace() if program is None: @@ -364,6 +367,9 @@ def double_grad_check(x, v.stop_gradient = False v.persistable = True y = _as_list(y) + for u in y: + u.stop_gradient = False + u.persistable = True if program is None: program = fluid.default_main_program() @@ -445,6 +451,9 @@ def triple_grad_check(x, v.stop_gradient = False v.persistable = True y = _as_list(y) + for u in y: + u.stop_gradient = False + u.persistable = True if program is None: program = fluid.default_main_program() @@ -578,6 +587,9 @@ def get_static_double_grad(x, for v in x: v.stop_gradient = False v.persistable = True + for u in y: + u.stop_gradient = False + u.persistable = True if place is None: place = fluid.CPUPlace() if program is None: @@ -736,7 +748,9 @@ def double_grad_check_for_dygraph(func, v.stop_gradient = False v.persistable = True y = _as_list(y) - + for u in y: + u.stop_gradient = False + u.persistable = True y_grads_init = [] for yi in y: np_type = dtype_to_np_dtype(yi.dtype) @@ -903,7 +917,9 @@ def triple_grad_check_for_dygraph(func, v.stop_gradient = False v.persistable = True y = _as_list(y) - + for u in y: + u.stop_gradient = False + u.persistable = True y_grads_init = [] for yi in y: np_type = dtype_to_np_dtype(yi.dtype) diff --git a/python/paddle/fluid/tests/unittests/test_cast_op.py b/python/paddle/fluid/tests/unittests/test_cast_op.py index cd67440990..a3a6805a86 100644 --- a/python/paddle/fluid/tests/unittests/test_cast_op.py +++ b/python/paddle/fluid/tests/unittests/test_cast_op.py @@ -23,6 +23,9 @@ import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard from op_test import OpTest, convert_uint16_to_float, convert_float_to_uint16 from paddle.fluid.framework import _test_eager_guard +import gradient_checker +from decorator_helper import prog_scope +import paddle.fluid.layers as layers class TestCastOpFp32ToFp64(OpTest): @@ -137,6 +140,80 @@ class TestCastOpEager(unittest.TestCase): self.assertTrue(x.gradient().dtype == np.float16) +class TestCastDoubleGradCheck(unittest.TestCase): + + def cast_wrapper(self, x): + return paddle.cast(x[0], 'float64') + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [2, 3, 4], False, dtype) + data.persistable = True + out = paddle.cast(data, 'float64') + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.double_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph(self.cast_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestCastTripleGradCheck(unittest.TestCase): + + def cast_wrapper(self, x): + return paddle.cast(x[0], 'float64') + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [2, 3, 4], False, dtype) + data.persistable = True + out = paddle.cast(data, 'float64') + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.triple_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.triple_grad_check_for_dygraph(self.cast_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py index 444675a4bb..3eda8b286c 100644 --- a/python/paddle/fluid/tests/unittests/test_sign_op.py +++ b/python/paddle/fluid/tests/unittests/test_sign_op.py @@ -19,7 +19,11 @@ import numpy as np from op_test import OpTest import paddle import paddle.fluid as fluid +import paddle.fluid.core as core from paddle.fluid import Program, program_guard +import gradient_checker +from decorator_helper import prog_scope +import paddle.fluid.layers as layers class TestSignOp(OpTest): @@ -91,6 +95,80 @@ class TestSignAPI(unittest.TestCase): paddle.sign(input4) +class TestSignDoubleGradCheck(unittest.TestCase): + + def sign_wrapper(self, x): + return paddle.sign(x[0]) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [1, 4], False, dtype) + data.persistable = True + out = paddle.sign(data) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.double_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph(self.sign_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestSignTripleGradCheck(unittest.TestCase): + + def sign_wrapper(self, x): + return paddle.sign(x[0]) + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [1, 4], False, dtype) + data.persistable = True + out = paddle.sign(data) + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.triple_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.triple_grad_check_for_dygraph(self.sign_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == "__main__": paddle.enable_static() unittest.main() -- GitLab