diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 76d60522d863314defaf2f3c23d4f37f1097042a..234885a9d7ee5dc7265325e5582325433fed02f1 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -326,12 +326,7 @@ forward : cast (Tensor x, DataType out_dtype) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) - infer_meta : - func : UnchangedInferMeta - param : [x] - kernel : - func : cast_grad - data_type : out_grad + invoke : cast (out_grad, x.dtype()) no_need_buffer : x - backward_op : ceil_grad diff --git a/python/paddle/fluid/tests/unittests/gradient_checker.py b/python/paddle/fluid/tests/unittests/gradient_checker.py index e7a8b8580e9cf31acfb7c59d5059d285ae4c03f3..3f143fa37ef3798b9dbfaa207332e2f6ee3d384b 100644 --- a/python/paddle/fluid/tests/unittests/gradient_checker.py +++ b/python/paddle/fluid/tests/unittests/gradient_checker.py @@ -268,6 +268,9 @@ def grad_check(x, for v in x: v.stop_gradient = False v.persistable = True + for u in y: + u.stop_gradient = False + u.persistable = True if place is None: place = fluid.CPUPlace() if program is None: @@ -364,6 +367,9 @@ def double_grad_check(x, v.stop_gradient = False v.persistable = True y = _as_list(y) + for u in y: + u.stop_gradient = False + u.persistable = True if program is None: program = fluid.default_main_program() @@ -445,6 +451,9 @@ def triple_grad_check(x, v.stop_gradient = False v.persistable = True y = _as_list(y) + for u in y: + u.stop_gradient = False + u.persistable = True if program is None: program = fluid.default_main_program() @@ -578,6 +587,9 @@ def get_static_double_grad(x, for v in x: v.stop_gradient = False v.persistable = True + for u in y: + u.stop_gradient = False + u.persistable = True if place is None: place = fluid.CPUPlace() if program is None: @@ -736,7 +748,9 @@ def double_grad_check_for_dygraph(func, v.stop_gradient = False v.persistable = True y = _as_list(y) - + for u in y: + u.stop_gradient = False + u.persistable = True y_grads_init = [] for yi in y: np_type = dtype_to_np_dtype(yi.dtype) @@ -903,7 +917,9 @@ def triple_grad_check_for_dygraph(func, v.stop_gradient = False v.persistable = True y = _as_list(y) - + for u in y: + u.stop_gradient = False + u.persistable = True y_grads_init = [] for yi in y: np_type = dtype_to_np_dtype(yi.dtype) diff --git a/python/paddle/fluid/tests/unittests/test_cast_op.py b/python/paddle/fluid/tests/unittests/test_cast_op.py index cd67440990cb852a13eb9bfdab121f8a5df4db05..a3a6805a868ee5d4b4312899c347e29238d91f2d 100644 --- a/python/paddle/fluid/tests/unittests/test_cast_op.py +++ b/python/paddle/fluid/tests/unittests/test_cast_op.py @@ -23,6 +23,9 @@ import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard from op_test import OpTest, convert_uint16_to_float, convert_float_to_uint16 from paddle.fluid.framework import _test_eager_guard +import gradient_checker +from decorator_helper import prog_scope +import paddle.fluid.layers as layers class TestCastOpFp32ToFp64(OpTest): @@ -137,6 +140,80 @@ class TestCastOpEager(unittest.TestCase): self.assertTrue(x.gradient().dtype == np.float16) +class TestCastDoubleGradCheck(unittest.TestCase): + + def cast_wrapper(self, x): + return paddle.cast(x[0], 'float64') + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [2, 3, 4], False, dtype) + data.persistable = True + out = paddle.cast(data, 'float64') + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.double_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.double_grad_check_for_dygraph(self.cast_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + +class TestCastTripleGradCheck(unittest.TestCase): + + def cast_wrapper(self, x): + return paddle.cast(x[0], 'float64') + + @prog_scope() + def func(self, place): + # the shape of input variable should be clearly specified, not inlcude -1. + eps = 0.005 + dtype = np.float32 + + data = layers.data('data', [2, 3, 4], False, dtype) + data.persistable = True + out = paddle.cast(data, 'float64') + data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype) + + gradient_checker.triple_grad_check([data], + out, + x_init=[data_arr], + place=place, + eps=eps) + fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) + gradient_checker.triple_grad_check_for_dygraph(self.cast_wrapper, + [data], + out, + x_init=[data_arr], + place=place) + + def test_grad(self): + paddle.enable_static() + places = [fluid.CPUPlace()] + if core.is_compiled_with_cuda(): + places.append(fluid.CUDAPlace(0)) + for p in places: + self.func(p) + + if __name__ == '__main__': paddle.enable_static() unittest.main()