diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml
index 76d60522d863314defaf2f3c23d4f37f1097042a..234885a9d7ee5dc7265325e5582325433fed02f1 100755
--- a/paddle/phi/api/yaml/legacy_backward.yaml
+++ b/paddle/phi/api/yaml/legacy_backward.yaml
@@ -326,12 +326,7 @@
   forward : cast (Tensor x, DataType out_dtype) -> Tensor(out)
   args : (Tensor x, Tensor out_grad)
   output : Tensor(x_grad)
-  infer_meta :
-    func : UnchangedInferMeta
-    param : [x]
-  kernel :
-    func : cast_grad
-    data_type : out_grad
+  invoke : cast (out_grad, x.dtype())
   no_need_buffer : x
 
 - backward_op : ceil_grad
diff --git a/python/paddle/fluid/tests/unittests/gradient_checker.py b/python/paddle/fluid/tests/unittests/gradient_checker.py
index e7a8b8580e9cf31acfb7c59d5059d285ae4c03f3..3f143fa37ef3798b9dbfaa207332e2f6ee3d384b 100644
--- a/python/paddle/fluid/tests/unittests/gradient_checker.py
+++ b/python/paddle/fluid/tests/unittests/gradient_checker.py
@@ -268,6 +268,9 @@ def grad_check(x,
     for v in x:
         v.stop_gradient = False
         v.persistable = True
+    for u in y:
+        u.stop_gradient = False
+        u.persistable = True
     if place is None:
         place = fluid.CPUPlace()
     if program is None:
@@ -364,6 +367,9 @@ def double_grad_check(x,
         v.stop_gradient = False
         v.persistable = True
     y = _as_list(y)
+    for u in y:
+        u.stop_gradient = False
+        u.persistable = True
 
     if program is None:
         program = fluid.default_main_program()
@@ -445,6 +451,9 @@ def triple_grad_check(x,
         v.stop_gradient = False
         v.persistable = True
     y = _as_list(y)
+    for u in y:
+        u.stop_gradient = False
+        u.persistable = True
 
     if program is None:
         program = fluid.default_main_program()
@@ -578,6 +587,9 @@ def get_static_double_grad(x,
     for v in x:
         v.stop_gradient = False
         v.persistable = True
+    for u in y:
+        u.stop_gradient = False
+        u.persistable = True
     if place is None:
         place = fluid.CPUPlace()
     if program is None:
@@ -736,7 +748,9 @@ def double_grad_check_for_dygraph(func,
         v.stop_gradient = False
         v.persistable = True
     y = _as_list(y)
-
+    for u in y:
+        u.stop_gradient = False
+        u.persistable = True
     y_grads_init = []
     for yi in y:
         np_type = dtype_to_np_dtype(yi.dtype)
@@ -903,7 +917,9 @@ def triple_grad_check_for_dygraph(func,
         v.stop_gradient = False
         v.persistable = True
     y = _as_list(y)
-
+    for u in y:
+        u.stop_gradient = False
+        u.persistable = True
     y_grads_init = []
     for yi in y:
         np_type = dtype_to_np_dtype(yi.dtype)
diff --git a/python/paddle/fluid/tests/unittests/test_cast_op.py b/python/paddle/fluid/tests/unittests/test_cast_op.py
index cd67440990cb852a13eb9bfdab121f8a5df4db05..a3a6805a868ee5d4b4312899c347e29238d91f2d 100644
--- a/python/paddle/fluid/tests/unittests/test_cast_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cast_op.py
@@ -23,6 +23,9 @@ import paddle.fluid as fluid
 from paddle.fluid import compiler, Program, program_guard
 from op_test import OpTest, convert_uint16_to_float, convert_float_to_uint16
 from paddle.fluid.framework import _test_eager_guard
+import gradient_checker
+from decorator_helper import prog_scope
+import paddle.fluid.layers as layers
 
 
 class TestCastOpFp32ToFp64(OpTest):
@@ -137,6 +140,80 @@ class TestCastOpEager(unittest.TestCase):
                 self.assertTrue(x.gradient().dtype == np.float16)
 
 
+class TestCastDoubleGradCheck(unittest.TestCase):
+
+    def cast_wrapper(self, x):
+        return paddle.cast(x[0], 'float64')
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [2, 3, 4], False, dtype)
+        data.persistable = True
+        out = paddle.cast(data, 'float64')
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.double_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.double_grad_check_for_dygraph(self.cast_wrapper,
+                                                       [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
+class TestCastTripleGradCheck(unittest.TestCase):
+
+    def cast_wrapper(self, x):
+        return paddle.cast(x[0], 'float64')
+
+    @prog_scope()
+    def func(self, place):
+        # the shape of input variable should be clearly specified, not inlcude -1.
+        eps = 0.005
+        dtype = np.float32
+
+        data = layers.data('data', [2, 3, 4], False, dtype)
+        data.persistable = True
+        out = paddle.cast(data, 'float64')
+        data_arr = np.random.uniform(-1, 1, data.shape).astype(dtype)
+
+        gradient_checker.triple_grad_check([data],
+                                           out,
+                                           x_init=[data_arr],
+                                           place=place,
+                                           eps=eps)
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
+        gradient_checker.triple_grad_check_for_dygraph(self.cast_wrapper,
+                                                       [data],
+                                                       out,
+                                                       x_init=[data_arr],
+                                                       place=place)
+
+    def test_grad(self):
+        paddle.enable_static()
+        places = [fluid.CPUPlace()]
+        if core.is_compiled_with_cuda():
+            places.append(fluid.CUDAPlace(0))
+        for p in places:
+            self.func(p)
+
+
 if __name__ == '__main__':
     paddle.enable_static()
     unittest.main()