diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py
index a2be8260e39b69fe19b8f5932fb00e498459d30b..7b251e8063a05e7d4a09238feaf1efef04739fe4 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py
@@ -150,26 +150,29 @@ def custom_relu_double_grad_dynamic(func, device, dtype, np_x, use_func=True):
     t = paddle.to_tensor(np_x, dtype=dtype, stop_gradient=False)
 
     out = func(t) if use_func else paddle.nn.functional.relu(t)
-    out.stop_gradient = False
-
     dx = paddle.grad(
-        outputs=[out], inputs=[t], create_graph=True, retain_graph=True
+        outputs=out,
+        inputs=t,
+        grad_outputs=paddle.ones_like(t),
+        create_graph=True,
+        retain_graph=True,
     )
 
-    dx[0].backward()
-
-    assert dx[0].grad is not None
-    return dx[0].numpy(), dx[0].grad.numpy()
+    ddout = paddle.grad(
+        outputs=dx[0],
+        inputs=out.grad,
+        grad_outputs=paddle.ones_like(t),
+        create_graph=False,
+    )
     fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
 
+    assert ddout[0].numpy() is not None
+    return dx[0].numpy(), ddout[0].numpy()
 
-class TestNewCustomOpSetUpInstall(unittest.TestCase):
+
+class TestNewCustomOpXpuSetUpInstall(unittest.TestCase):
     def setUp(self):
         cur_dir = os.path.dirname(os.path.abspath(__file__))
-        # compile, install the custom op egg into site-packages under background
-        # Currently custom XPU op does not support Windows
-        if os.name == 'nt':
-            return
         cmd = 'cd {} && {} custom_relu_xpu_setup.py install'.format(
             cur_dir, sys.executable
         )
@@ -192,7 +195,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
         self.custom_op = custom_relu_xpu_module_setup.custom_relu
 
         self.dtypes = ['float32', 'float64']
-        self.devices = ['xpu']
+        self.device = 'xpu'
 
         # config seed
         SEED = 2021
@@ -200,91 +203,90 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
         paddle.framework.random._manual_program_seed(SEED)
 
     def test_static(self):
-        for device in self.devices:
-            for dtype in self.dtypes:
-                x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
-                out = custom_relu_static(self.custom_op, device, dtype, x)
-                pd_out = custom_relu_static(
-                    self.custom_op, device, dtype, x, False
-                )
-                np.testing.assert_array_equal(
-                    out,
-                    pd_out,
-                    err_msg='custom op out: {},\n paddle api out: {}'.format(
-                        out, pd_out
-                    ),
-                )
+        for dtype in self.dtypes:
+            x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
+            out = custom_relu_static(self.custom_op, self.device, dtype, x)
+            pd_out = custom_relu_static(
+                self.custom_op, self.device, dtype, x, False
+            )
+            np.testing.assert_array_equal(
+                out,
+                pd_out,
+                err_msg='custom op out: {},\n paddle api out: {}'.format(
+                    out, pd_out
+                ),
+            )
 
     def test_static_pe(self):
-        for device in self.devices:
-            for dtype in self.dtypes:
-                x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
-                out = custom_relu_static_pe(self.custom_op, device, dtype, x)
-                pd_out = custom_relu_static_pe(
-                    self.custom_op, device, dtype, x, False
-                )
-                np.testing.assert_array_equal(
-                    out,
-                    pd_out,
-                    err_msg='custom op out: {},\n paddle api out: {}'.format(
-                        out, pd_out
-                    ),
-                )
+        for dtype in self.dtypes:
+            x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
+            out = custom_relu_static_pe(self.custom_op, self.device, dtype, x)
+            pd_out = custom_relu_static_pe(
+                self.custom_op, self.device, dtype, x, False
+            )
+            np.testing.assert_allclose(
+                out,
+                pd_out,
+                atol=1e-2,
+                err_msg='custom op out: {},\n paddle api out: {}'.format(
+                    out, pd_out
+                ),
+            )
 
     def test_dynamic(self):
-        for device in self.devices:
-            for dtype in self.dtypes:
-                x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
-                out, x_grad = custom_relu_dynamic(
-                    self.custom_op, device, dtype, x
-                )
-                pd_out, pd_x_grad = custom_relu_dynamic(
-                    self.custom_op, device, dtype, x, False
-                )
-                np.testing.assert_array_equal(
-                    out,
-                    pd_out,
-                    err_msg='custom op out: {},\n paddle api out: {}'.format(
-                        out, pd_out
-                    ),
-                )
-                np.testing.assert_array_equal(
-                    x_grad,
-                    pd_x_grad,
-                    err_msg='custom op x grad: {},\n paddle api x grad: {}'.format(
-                        x_grad, pd_x_grad
-                    ),
-                )
+        for dtype in self.dtypes:
+            x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
+            out, x_grad = custom_relu_dynamic(
+                self.custom_op, self.device, dtype, x
+            )
+            pd_out, pd_x_grad = custom_relu_dynamic(
+                self.custom_op, self.device, dtype, x, False
+            )
+            np.testing.assert_array_equal(
+                out,
+                pd_out,
+                err_msg='custom op out: {},\n paddle api out: {}'.format(
+                    out, pd_out
+                ),
+            )
+            np.testing.assert_array_equal(
+                x_grad,
+                pd_x_grad,
+                err_msg='custom op x grad: {},\n paddle api x grad: {}'.format(
+                    x_grad, pd_x_grad
+                ),
+            )
 
     def test_static_save_and_load_inference_model(self):
         paddle.enable_static()
         np_data = np.random.random((1, 1, 28, 28)).astype("float32")
         np_label = np.random.random((1, 1)).astype("int64")
         path_prefix = "self.custom_op_inference/custom_relu"
-        for device in self.devices:
-            predict = custom_relu_static_inference(
-                self.custom_op, device, np_data, np_label, path_prefix
+
+        predict = custom_relu_static_inference(
+            self.custom_op, self.device, np_data, np_label, path_prefix
+        )
+        # load inference model
+        with static.scope_guard(static.Scope()):
+            exe = static.Executor()
+            [
+                inference_program,
+                feed_target_names,
+                fetch_targets,
+            ] = static.load_inference_model(path_prefix, exe)
+            predict_infer = exe.run(
+                inference_program,
+                feed={feed_target_names[0]: np_data},
+                fetch_list=fetch_targets,
+            )
+            np.testing.assert_allclose(
+                predict,
+                predict_infer,
+                atol=1e-2,
+                err_msg='custom op predict: {},\n custom op infer predict: {}'.format(
+                    predict, predict_infer
+                ),
             )
-            # load inference model
-            with static.scope_guard(static.Scope()):
-                exe = static.Executor()
-                [
-                    inference_program,
-                    feed_target_names,
-                    fetch_targets,
-                ] = static.load_inference_model(path_prefix, exe)
-                predict_infer = exe.run(
-                    inference_program,
-                    feed={feed_target_names[0]: np_data},
-                    fetch_list=fetch_targets,
-                )
-                np.testing.assert_array_equal(
-                    predict,
-                    predict_infer,
-                    err_msg='custom op predict: {},\n custom op infer predict: {}'.format(
-                        predict, predict_infer
-                    ),
-                )
         paddle.disable_static()
 
     def test_static_save_and_run_inference_predictor(self):
@@ -294,92 +296,97 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
         path_prefix = "self.custom_op_inference/custom_relu"
         from paddle.inference import Config, create_predictor
 
-        for device in self.devices:
-            predict = custom_relu_static_inference(
-                self.custom_op, device, np_data, np_label, path_prefix
-            )
-            # load inference model
-            config = Config(
-                path_prefix + ".pdmodel", path_prefix + ".pdiparams"
+        predict = custom_relu_static_inference(
+            self.custom_op, self.device, np_data, np_label, path_prefix
+        )
+        # load inference model
+        config = Config(path_prefix + ".pdmodel", path_prefix + ".pdiparams")
+        predictor = create_predictor(config)
+        input_tensor = predictor.get_input_handle(
+            predictor.get_input_names()[0]
+        )
+        input_tensor.reshape(np_data.shape)
+        input_tensor.copy_from_cpu(np_data.copy())
+        predictor.run()
+        output_tensor = predictor.get_output_handle(
+            predictor.get_output_names()[0]
+        )
+        predict_infer = output_tensor.copy_to_cpu()
+        predict = np.array(predict).flatten()
+        predict_infer = np.array(predict_infer).flatten()
+        np.testing.assert_allclose(
+            predict,
+            predict_infer,
+            rtol=5e-5,
+            atol=1e-2,
+            err_msg="custom op predict: {},\n custom op infer predict: {}".format(
+                predict, predict_infer
+            ),
+        )
+        paddle.disable_static()
+
+    def test_func_double_grad_dynamic(self):
+        for dtype in self.dtypes:
+            x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
+            out, dx_grad = custom_relu_double_grad_dynamic(
+                self.custom_op, self.device, dtype, x
             )
-            predictor = create_predictor(config)
-            input_tensor = predictor.get_input_handle(
-                predictor.get_input_names()[0]
+            pd_out, pd_dx_grad = custom_relu_double_grad_dynamic(
+                self.custom_op, self.device, dtype, x, False
             )
-            input_tensor.reshape(np_data.shape)
-            input_tensor.copy_from_cpu(np_data.copy())
-            predictor.run()
-            output_tensor = predictor.get_output_handle(
-                predictor.get_output_names()[0]
+            np.testing.assert_array_equal(
+                out,
+                pd_out,
+                err_msg='custom op out: {},\n paddle api out: {}'.format(
+                    out, pd_out
+                ),
             )
-            predict_infer = output_tensor.copy_to_cpu()
-            self.assertTrue(
-                np.isclose(predict, predict_infer, rtol=5e-5).any(),
-                "custom op predict: {},\n custom op infer predict: {}".format(
-                    predict, predict_infer
+            np.testing.assert_array_equal(
+                dx_grad,
+                pd_dx_grad,
+                err_msg='custom op dx grad: {},\n paddle api dx grad: {}'.format(
+                    dx_grad, pd_dx_grad
                 ),
             )
-        paddle.disable_static()
-
-    def test_func_double_grad_dynamic(self):
-        for device in self.devices:
-            for dtype in self.dtypes:
-                x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
-                out, dx_grad = custom_relu_double_grad_dynamic(
-                    self.custom_op, device, dtype, x
-                )
-                pd_out, pd_dx_grad = custom_relu_double_grad_dynamic(
-                    self.custom_op, device, dtype, x, False
-                )
-                np.testing.assert_array_equal(
-                    out,
-                    pd_out,
-                    err_msg='custom op out: {},\n paddle api out: {}'.format(
-                        out, pd_out
-                    ),
-                )
-                np.testing.assert_array_equal(
-                    dx_grad,
-                    pd_dx_grad,
-                    err_msg='custom op dx grad: {},\n paddle api dx grad: {}'.format(
-                        dx_grad, pd_dx_grad
-                    ),
-                )
 
     def test_with_dataloader(self):
         paddle.disable_static()
-        for device in self.devices:
-            paddle.set_device(device)
-            # data loader
-            transform = Compose(
-                [Normalize(mean=[127.5], std=[127.5], data_format='CHW')]
-            )
-            train_dataset = paddle.vision.datasets.MNIST(
-                mode='train', transform=transform
-            )
-            train_loader = paddle.io.DataLoader(
-                train_dataset,
-                batch_size=64,
-                shuffle=True,
-                drop_last=True,
-                num_workers=0,
-            )
+        paddle.set_device(self.device)
+        # data loader
+        transform = Compose(
+            [Normalize(mean=[127.5], std=[127.5], data_format='CHW')]
+        )
+        train_dataset = paddle.vision.datasets.MNIST(
+            mode='train', transform=transform
+        )
+        train_loader = paddle.io.DataLoader(
+            train_dataset,
+            batch_size=64,
+            shuffle=True,
+            drop_last=True,
+            num_workers=0,
+        )
 
-            for batch_id, (image, _) in enumerate(train_loader()):
-                out = self.custom_op(image)
-                pd_out = paddle.nn.functional.relu(image)
-                np.testing.assert_array_equal(
-                    out,
-                    pd_out,
-                    err_msg='custom op out: {},\n paddle api out: {}'.format(
-                        out, pd_out
-                    ),
-                )
+        for batch_id, (image, _) in enumerate(train_loader()):
+            out = self.custom_op(image)
+            pd_out = paddle.nn.functional.relu(image)
+            np.testing.assert_allclose(
+                out,
+                pd_out,
+                atol=1e-2,
+                err_msg='custom op out: {},\n paddle api out: {}'.format(
+                    out, pd_out
+                ),
+            )
 
-                if batch_id == 5:
-                    break
+            if batch_id == 5:
+                break
         paddle.enable_static()
 
 
 if __name__ == '__main__':
+    # compile, install the custom op egg into site-packages under background
+    # Currently custom XPU op does not support Windows
+    if os.name == 'nt':
+        exit()
     unittest.main()