diff --git a/python/paddle/fluid/tests/custom_op/test_context_pool.py b/python/paddle/fluid/tests/custom_op/test_context_pool.py
index 1150c6706e305b2b22c8249c483675236dacba30..9e9085e91b4df40687d37eb1b79d10c6063177da 100644
--- a/python/paddle/fluid/tests/custom_op/test_context_pool.py
+++ b/python/paddle/fluid/tests/custom_op/test_context_pool.py
@@ -19,7 +19,6 @@ import numpy as np
 from utils import extra_cc_args, extra_nvcc_args, paddle_includes
 
 import paddle
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -49,17 +48,12 @@ class TestContextPool(unittest.TestCase):
         if paddle.is_compiled_with_cuda():
             self.devices.append('gpu')
 
-    def use_context_pool(self):
+    def test_use_context_pool(self):
         x = paddle.ones([2, 2], dtype='float32')
         out = custom_ops.context_pool_test(x)
 
         np.testing.assert_array_equal(x.numpy(), out.numpy())
 
-    def test_using_context_pool(self):
-        with _test_eager_guard():
-            self.use_context_pool()
-        self.use_context_pool()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py b/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
index 1c923a6c809685c725d2cfcef829b3e8db63497b..c8f86ed004b1128174b7a1d7b9eefd0d3f7d2bed 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
@@ -19,7 +19,6 @@ import numpy as np
 from utils import extra_cc_args, extra_nvcc_args, paddle_includes
 
 import paddle
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -57,7 +56,7 @@ class TestJitCustomAttrs(unittest.TestCase):
         self.int64_vec_attr = [10000000000, 10000000000, 10000000000]
         self.str_vec_attr = ["StrAttr", "StrAttr", "StrAttr"]
 
-    def func_attr_value(self):
+    def test_func_attr_value(self):
         x = paddle.ones([2, 2], dtype='float32')
         x.stop_gradient = False
         out = custom_attrs.attr_test(
@@ -77,12 +76,7 @@ class TestJitCustomAttrs(unittest.TestCase):
 
         np.testing.assert_array_equal(x.numpy(), out.numpy())
 
-    def test_attr_value(self):
-        with _test_eager_guard():
-            self.func_attr_value()
-        self.func_attr_value()
-
-    def func_const_attr_value(self):
+    def test_const_attr_value(self):
         x = paddle.ones([2, 2], dtype='float32')
         x.stop_gradient = False
         out = custom_attrs.const_attr_test(
@@ -102,11 +96,6 @@ class TestJitCustomAttrs(unittest.TestCase):
 
         np.testing.assert_array_equal(x.numpy(), out.numpy())
 
-    def test_const_attr_value(self):
-        with _test_eager_guard():
-            self.func_const_attr_value()
-        self.func_const_attr_value()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_concat.py b/python/paddle/fluid/tests/custom_op/test_custom_concat.py
index 97309be8ff81aa1fae6a846487a6c953182e6169..50479c433da6bc8e7ab42ed0dc90dcda160bfd58 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_concat.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_concat.py
@@ -20,7 +20,6 @@ from utils import extra_cc_args, extra_nvcc_args, paddle_includes
 
 import paddle
 import paddle.static as static
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -122,7 +121,7 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase):
             ),
         )
 
-    def func_dynamic(self):
+    def test_dynamic(self):
         for dtype in self.dtypes:
             for axis in self.axises:
                 out, grad_inputs = concat_dynamic(
@@ -136,11 +135,6 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase):
                 for x_grad, pd_x_grad in zip(grad_inputs, pd_grad_inputs):
                     self.check_output(x_grad, pd_x_grad, "x_grad")
 
-    def test_dynamic(self):
-        with _test_eager_guard():
-            self.func_dynamic()
-        self.func_dynamic()
-
     def test_static(self):
         for dtype in self.dtypes:
             for axis in self.axises:
@@ -155,7 +149,7 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase):
                 self.check_output(x1_grad, pd_x1_grad, "x1_grad")
                 self.check_output(x2_grad, pd_x2_grad, "x2_grad")
 
-    def func_dynamic_with_attr(self):
+    def test_dynamic_with_attr(self):
         for dtype in self.dtypes:
             for axis in self.axises:
                 out, grad_inputs = concat_dynamic(
@@ -173,11 +167,6 @@ class TestCustomConcatDynamicAxisJit(unittest.TestCase):
                 for x_grad, pd_x_grad in zip(grad_inputs, pd_grad_inputs):
                     self.check_output(x_grad, pd_x_grad, "x_grad")
 
-    def test_dynamic_with_attr(self):
-        with _test_eager_guard():
-            self.func_dynamic_with_attr()
-        self.func_dynamic_with_attr()
-
     def test_static_with_attr(self):
         for dtype in self.dtypes:
             for axis in self.axises:
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_conj.py b/python/paddle/fluid/tests/custom_op/test_custom_conj.py
index 1f94408e27cf4ede8e4e40f5c3835b6fa2c49d0a..38d5723912ac833f075837f2835b5921506c8fe3 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_conj.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_conj.py
@@ -20,7 +20,6 @@ from utils import extra_cc_args, extra_nvcc_args, paddle_includes
 
 import paddle
 import paddle.static as static
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -128,16 +127,11 @@ class TestCustomConjJit(unittest.TestCase):
         self.check_output(out, pd_out, "out")
         self.check_output(x_grad, pd_x_grad, "x's grad")
 
-    def func_dynamic(self):
+    def test_dynamic(self):
         for dtype in self.dtypes:
             np_input = np.random.random(self.shape).astype(dtype)
             self.run_dynamic(dtype, np_input)
 
-    def test_dynamic(self):
-        with _test_eager_guard():
-            self.func_dynamic()
-        self.func_dynamic()
-
     def test_static(self):
         for dtype in self.dtypes:
             np_input = np.random.random(self.shape).astype(dtype)
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_linear.py b/python/paddle/fluid/tests/custom_op/test_custom_linear.py
index 95570d762457b6c08b5b370239cd1f20d65cae0d..6150f69b0b173996c4c2b08e978c1920253bb858 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_linear.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_linear.py
@@ -21,7 +21,6 @@ from utils import extra_cc_args, extra_nvcc_args, paddle_includes
 import paddle
 import paddle.nn.functional as F
 import paddle.static as static
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -140,7 +139,7 @@ class TestCustomLinearJit(unittest.TestCase):
                 )
                 self.check_output(phi_bias_grad, pd_bias_grad, "bias_grad")
 
-    def func_dynamic(self):
+    def test_dynamic(self):
         for device in self.devices:
             for dtype in self.dtypes:
                 (
@@ -176,11 +175,6 @@ class TestCustomLinearJit(unittest.TestCase):
                 )
                 self.check_output(phi_bias_grad, pd_bias_grad, "bias_grad")
 
-    def test_dynamic(self):
-        with _test_eager_guard():
-            self.func_dynamic()
-        self.func_dynamic()
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
index 1ce77e2069a06b38839ed02b04820c64ae3f93b2..67060c931ecf3d618b73f972c12e9634e4b47fe1 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
@@ -21,7 +21,7 @@ from utils import IS_MAC, extra_cc_args, extra_nvcc_args, paddle_includes
 
 import paddle
 from paddle import nn
-from paddle.fluid.framework import _in_legacy_dygraph, _test_eager_guard
+from paddle.fluid.framework import _in_legacy_dygraph
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -115,7 +115,7 @@ class TestDygraphModel(unittest.TestCase):
             shape=[None, self.in_dim], dtype='float32', name='x'
         )
 
-    def func_train_eval(self):
+    def test_train_eval(self):
         for device in self.devices:
             # set device
             paddle.set_device(device)
@@ -151,11 +151,6 @@ class TestDygraphModel(unittest.TestCase):
                 origin_relu_eval_out, custom_relu_eval_out
             )
 
-    def test_train_eval(self):
-        with _test_eager_guard():
-            self.func_train_eval()
-        self.func_train_eval()
-
     def train_model(self, use_custom_op=False, dy2stat=False):
         # reset random seed
         paddle.seed(self.seed)
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
index dad3fbd94c6d0ae0a303b3a15212dddc49b7f4b3..05827d77914e0412c000ed311fdf8537f9da0ef1 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
@@ -20,7 +20,6 @@ from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static
 from utils import IS_MAC, extra_cc_args, extra_nvcc_args, paddle_includes
 
 import paddle
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -85,7 +84,7 @@ class TestJITLoad(unittest.TestCase):
                         ),
                     )
 
-    def func_dynamic(self):
+    def test_dynamic(self):
         for device in self.devices:
             for dtype in self.dtypes:
                 if device == 'cpu' and dtype == 'float16':
@@ -113,12 +112,7 @@ class TestJITLoad(unittest.TestCase):
                         ),
                     )
 
-    def test_dynamic(self):
-        with _test_eager_guard():
-            self.func_dynamic()
-        self.func_dynamic()
-
-    def func_exception(self):
+    def test_exception(self):
         caught_exception = False
         try:
             x = np.random.uniform(-1, 1, [4, 8]).astype('int32')
@@ -143,11 +137,6 @@ class TestJITLoad(unittest.TestCase):
             self.assertTrue("custom_relu_op.cu" in str(e))
         self.assertTrue(caught_exception)
 
-    def test_exception(self):
-        with _test_eager_guard():
-            self.func_exception()
-        self.func_exception()
-
     def test_load_multiple_module(self):
         custom_module = load(
             name='custom_conj_jit',
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
index 878de3f3e0ecbfc94c0b93b1d13a630ca19cdb7c..c2cd953b47a4e5d8efcff2d8628365a7c9c16179 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
@@ -21,7 +21,7 @@ import numpy as np
 
 import paddle
 import paddle.static as static
-from paddle.fluid.framework import _test_eager_guard
+from paddle import fluid
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 from paddle.vision.transforms import Compose, Normalize
 
@@ -251,7 +251,8 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
                         ),
                     )
 
-    def func_dynamic(self):
+    def test_dynamic(self):
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
         for device in self.devices:
             for dtype in self.dtypes:
                 if device == 'cpu' and dtype == 'float16':
@@ -278,11 +279,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
                             x_grad, pd_x_grad
                         ),
                     )
-
-    def test_dynamic(self):
-        with _test_eager_guard():
-            self.func_dynamic()
-        self.func_dynamic()
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
 
     def test_static_save_and_load_inference_model(self):
         paddle.enable_static()
@@ -350,6 +347,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
         paddle.disable_static()
 
     def test_func_double_grad_dynamic(self):
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
         for device in self.devices:
             for dtype in self.dtypes:
                 if device == 'cpu' and dtype == 'float16':
@@ -375,6 +373,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
                         dx_grad, pd_dx_grad
                     ),
                 )
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
 
     def test_with_dataloader(self):
         for device in self.devices:
@@ -395,6 +394,7 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
             )
 
             for batch_id, (image, _) in enumerate(train_loader()):
+                image = paddle.to_tensor(image)
                 out = self.custom_ops[0](image)
                 pd_out = paddle.nn.functional.relu(image)
                 np.testing.assert_array_equal(
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py b/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py
index 4113e1c650d72550fcdfdda2c670fdb02a3a32b7..166f15ca4c249a1c6c278b59ad8d7c7e7ccf75d5 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_simple_slice.py
@@ -19,7 +19,6 @@ import numpy as np
 from utils import extra_cc_args, extra_nvcc_args, paddle_includes
 
 import paddle
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -43,7 +42,7 @@ custom_ops = load(
 
 
 class TestCustomSimpleSliceJit(unittest.TestCase):
-    def func_slice_output(self):
+    def test_slice_output(self):
         np_x = np.random.random((5, 2)).astype("float32")
         x = paddle.to_tensor(np_x)
         custom_op_out = custom_ops.custom_simple_slice(x, 2, 3)
@@ -56,11 +55,6 @@ class TestCustomSimpleSliceJit(unittest.TestCase):
             ),
         )
 
-    def test_slice_output(self):
-        with _test_eager_guard():
-            self.func_slice_output()
-        self.func_slice_output()
-
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py b/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py
index f7c0b5c56976b50aba342e51c98088511c99c017..bedaf36832f91ee600ea7f789ea0ff6b73366a78 100644
--- a/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_tanh_double_grad.py
@@ -20,7 +20,6 @@ from utils import extra_cc_args, extra_nvcc_args, paddle_includes
 
 import paddle
 import paddle.fluid as fluid
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -68,7 +67,8 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase):
         self.dtypes = ['float32', 'float64']
         self.devices = ['cpu']
 
-    def func_double_grad_dynamic(self):
+    def test_double_grad_dynamic(self):
+        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
         for device in self.devices:
             for dtype in self.dtypes:
                 x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
@@ -102,12 +102,6 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase):
                         dout, pd_dout
                     ),
                 )
-
-    def test_func_double_grad_dynamic(self):
-        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
-        with _test_eager_guard():
-            self.func_double_grad_dynamic()
-        self.func_double_grad_dynamic()
         fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
 
 
diff --git a/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py b/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
index 3b046f14bad68a57f8cf8bb208aa02e6ebeb51c3..db144675a58beae1edb96a227a6e251130a6479f 100644
--- a/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
@@ -19,7 +19,6 @@ import numpy as np
 from utils import extra_cc_args, paddle_includes
 
 import paddle
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -43,7 +42,7 @@ class TestJitDispatch(unittest.TestCase):
     def setUp(self):
         paddle.set_device('cpu')
 
-    def run_dispatch_test_impl(self, func, dtype):
+    def run_dispatch_test(self, func, dtype):
         np_x = np.ones([2, 2]).astype(dtype)
         x = paddle.to_tensor(np_x)
         out = func(x)
@@ -56,11 +55,6 @@ class TestJitDispatch(unittest.TestCase):
             err_msg='custom op x: {},\n custom op out: {}'.format(np_x, np_out),
         )
 
-    def run_dispatch_test(self, func, dtype):
-        with _test_eager_guard():
-            self.run_dispatch_test_impl(func, dtype)
-        self.run_dispatch_test_impl(func, dtype)
-
     def test_dispatch_integer(self):
         dtypes = ["int32", "int64", "int8", "uint8", "int16"]
         for dtype in dtypes:
diff --git a/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py b/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
index 7caa44c6113a63eff34b5705ec4fffd9a6806c3f..7e252e048b64c9b158fabe21b818fbccaf71a26c 100644
--- a/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
@@ -19,7 +19,6 @@ import numpy as np
 from utils import extra_cc_args, paddle_includes
 
 import paddle
-from paddle.fluid.framework import _test_eager_guard
 from paddle.utils.cpp_extension import get_build_directory, load
 from paddle.utils.cpp_extension.extension_utils import run_cmd
 
@@ -89,7 +88,7 @@ class TestMultiOutputDtypes(unittest.TestCase):
                 self.check_multi_outputs(res)
         paddle.disable_static()
 
-    def func_dynamic(self):
+    def test_dynamic(self):
         for device in self.devices:
             for dtype in self.dtypes:
                 paddle.set_device(device)
@@ -100,11 +99,6 @@ class TestMultiOutputDtypes(unittest.TestCase):
                 self.assertTrue(len(outs) == 3)
                 self.check_multi_outputs(outs, True)
 
-    def test_dynamic(self):
-        with _test_eager_guard():
-            self.func_dynamic()
-        self.func_dynamic()
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/fluid/tests/custom_runtime/process_group_xccl.py b/python/paddle/fluid/tests/custom_runtime/process_group_xccl.py
index d97b889ff04c5f835e9478d70266796acb8738c4..383713e0f5ff52525ff07d42ed27a07721fb943c 100644
--- a/python/paddle/fluid/tests/custom_runtime/process_group_xccl.py
+++ b/python/paddle/fluid/tests/custom_runtime/process_group_xccl.py
@@ -20,7 +20,6 @@ import numpy as np
 import paddle
 from paddle.fluid import core
 from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.fluid.framework import _test_eager_guard
 
 
 def init_process_group(strategy=None):
@@ -50,192 +49,187 @@ class TestProcessGroupFp32(unittest.TestCase):
         self.shape = (2, 10, 5)
 
     def test_create_process_group_xccl(self):
-        with _test_eager_guard():
-            device_id = paddle.distributed.ParallelEnv().dev_id
-            paddle.set_device('custom_cpu:%d' % device_id)
-
-            pg = init_process_group()
-
-            x = np.random.random(self.shape).astype(self.dtype)
-            tensor_x = paddle.to_tensor(x)
-            y = np.random.random(self.shape).astype(self.dtype)
-            tensor_y = paddle.to_tensor(y)
-
-            sum_result = tensor_x + tensor_y
-            if pg.rank() == 0:
-                task = pg.all_reduce(tensor_x, core.ReduceOp.SUM, sync_op=True)
-                task.wait()
-                # assert np.array_equal(tensor_x, sum_result)
-            else:
-                task = pg.all_reduce(tensor_y, core.ReduceOp.SUM, sync_op=True)
-                task.wait()
-                # assert np.array_equal(tensor_y, sum_result)
-
-            print("test allreduce sum api ok")
-
-            x = np.random.random(self.shape).astype(self.dtype)
-            tensor_x = paddle.to_tensor(x)
-            y = np.random.random(self.shape).astype(self.dtype)
-            tensor_y = paddle.to_tensor(y)
-
-            max_result = paddle.maximum(tensor_x, tensor_y)
-
-            if pg.rank() == 0:
-                task = pg.all_reduce(tensor_x, core.ReduceOp.MAX, sync_op=True)
-                task.wait()
-                # assert np.array_equal(tensor_x, max_result)
-            else:
-                task = pg.all_reduce(tensor_y, core.ReduceOp.MAX, sync_op=True)
-                task.wait()
-                # assert np.array_equal(tensor_y, max_result)
-
-            print("test allreduce max api ok")
-
-            # test broadcast
-            # rank 0
-            x = np.random.random(self.shape).astype(self.dtype)
-            tensor_x = paddle.to_tensor(x)
-            # rank 1
-            y = np.random.random(self.shape).astype(self.dtype)
-            tensor_y = paddle.to_tensor(y)
-
-            broadcast_result = paddle.assign(tensor_x)
-            if pg.rank() == 0:
-                task = pg.broadcast(tensor_x, 0, sync_op=True)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-                assert task.is_completed()
-                # assert np.array_equal(broadcast_result, tensor_x)
-            else:
-                task = pg.broadcast(tensor_y, 0, sync_op=True)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-                assert task.is_completed()
-                # assert np.array_equal(broadcast_result, tensor_y)
-
-            print("test broadcast api ok")
-
-            # test barrier
-            # rank 0
-            if pg.rank() == 0:
-                task = pg.barrier(device_id)
-                task.wait()
-            # rank 1
-            else:
-                task = pg.barrier(device_id)
-                task.wait()
-
-            print("test barrier api ok\n")
-            return
-
-            # test allgather
-            # rank 0
-            x = np.random.random(self.shape).astype(self.dtype)
-            y = np.random.random(self.shape).astype(self.dtype)
-            tensor_x = paddle.to_tensor(x)
-            tensor_y = paddle.to_tensor(y)
-            out_shape = list(self.shape)
-            out_shape[0] *= 2
-            out = np.random.random(out_shape).astype(self.dtype)
-            tensor_out = paddle.to_tensor(out)
-            if pg.rank() == 0:
-                task = pg.all_gather(tensor_out, tensor_x, sync_op=True)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-            # rank 1
-            else:
-                task = pg.all_gather(tensor_out, tensor_y, sync_op=True)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-            out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2])
-            out_2 = paddle.slice(
-                tensor_out, [0], [out_shape[0] // 2], [out_shape[0]]
-            )
-            # assert np.array_equal(tensor_x, out_1)
-            # assert np.array_equal(tensor_y, out_2)
-            print("test allgather api ok\n")
-
-            # test alltoall
-            # rank 0
-            x = np.random.random(self.shape).astype(self.dtype)
-            y = np.random.random(self.shape).astype(self.dtype)
-            out1 = np.random.random(self.shape).astype(self.dtype)
-            out2 = np.random.random(self.shape).astype(self.dtype)
-            tensor_x = paddle.to_tensor(x)
-            tensor_y = paddle.to_tensor(y)
-            tensor_out1 = paddle.to_tensor(out1)
-            tensor_out2 = paddle.to_tensor(out2)
-            raw_tensor_x_2 = paddle.slice(
-                tensor_x, [0], [self.shape[0] // 2], [self.shape[0]]
-            )
-            raw_tensor_y_1 = paddle.slice(
-                tensor_y, [0], [0], [self.shape[0] // 2]
-            )
-            if pg.rank() == 0:
-                task = pg.alltoall(tensor_x, tensor_out1)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-            # rank 1
-            else:
-                task = pg.alltoall(tensor_y, tensor_out2)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-            out1_2 = paddle.slice(
-                tensor_out1, [0], [self.shape[0] // 2], [self.shape[0]]
-            )
-            out2_1 = paddle.slice(tensor_out2, [0], [0], [self.shape[0] // 2])
-            # if pg.rank() == 0:
-            #     assert np.array_equal(out1_2.numpy(), raw_tensor_y_1.numpy())
-            # else:
-            #     assert np.array_equal(out2_1, raw_tensor_x_2)
-            print("test alltoall api ok\n")
-
-            # test Reduce
-            # rank 0
-            x = np.random.random(self.shape).astype(self.dtype)
-            y = np.random.random(self.shape).astype(self.dtype)
-            tensor_x = paddle.to_tensor(x)
-            tensor_y = paddle.to_tensor(y)
-            sum_result = tensor_x + tensor_y
-            if pg.rank() == 0:
-                task = pg.reduce(tensor_x, 0)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-            # rank 1
-            else:
-                task = pg.reduce(tensor_y, 0)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-            # if pg.rank() == 0:
-            #     assert np.array_equal(tensor_x, sum_result)
-            print("test reduce sum api ok\n")
-
-            # test Scatter
-            # rank 0
-            in_shape = list(self.shape)
-            in_shape[0] *= 2
-            x = np.random.random(in_shape).astype(self.dtype)
-            y = np.random.random(self.shape).astype(self.dtype)
-            tensor_x = paddle.to_tensor(x)
-            tensor_y = paddle.to_tensor(y)
-            if pg.rank() == 0:
-                task = pg.scatter(tensor_x, tensor_y, 0)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-            # rank 1
-            else:
-                task = pg.scatter(tensor_x, tensor_y, 0)
-                task.wait()
-                # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
-            out1 = paddle.slice(tensor_x, [0], [0], [self.shape[0]])
-            out2 = paddle.slice(
-                tensor_x, [0], [self.shape[0]], [self.shape[0] * 2]
-            )
-            # if pg.rank() == 0:
-            #     assert np.array_equal(tensor_y, out1)
-            # else:
-            #     assert np.array_equal(tensor_y, out2)
-            print("test scatter api ok\n")
+        device_id = paddle.distributed.ParallelEnv().dev_id
+        paddle.set_device('custom_cpu:%d' % device_id)
+
+        pg = init_process_group()
+
+        x = np.random.random(self.shape).astype(self.dtype)
+        tensor_x = paddle.to_tensor(x)
+        y = np.random.random(self.shape).astype(self.dtype)
+        tensor_y = paddle.to_tensor(y)
+
+        sum_result = tensor_x + tensor_y
+        if pg.rank() == 0:
+            task = pg.all_reduce(tensor_x, core.ReduceOp.SUM, sync_op=True)
+            task.wait()
+            # assert np.array_equal(tensor_x, sum_result)
+        else:
+            task = pg.all_reduce(tensor_y, core.ReduceOp.SUM, sync_op=True)
+            task.wait()
+            # assert np.array_equal(tensor_y, sum_result)
+
+        print("test allreduce sum api ok")
+
+        x = np.random.random(self.shape).astype(self.dtype)
+        tensor_x = paddle.to_tensor(x)
+        y = np.random.random(self.shape).astype(self.dtype)
+        tensor_y = paddle.to_tensor(y)
+
+        max_result = paddle.maximum(tensor_x, tensor_y)
+
+        if pg.rank() == 0:
+            task = pg.all_reduce(tensor_x, core.ReduceOp.MAX, sync_op=True)
+            task.wait()
+            # assert np.array_equal(tensor_x, max_result)
+        else:
+            task = pg.all_reduce(tensor_y, core.ReduceOp.MAX, sync_op=True)
+            task.wait()
+            # assert np.array_equal(tensor_y, max_result)
+
+        print("test allreduce max api ok")
+
+        # test broadcast
+        # rank 0
+        x = np.random.random(self.shape).astype(self.dtype)
+        tensor_x = paddle.to_tensor(x)
+        # rank 1
+        y = np.random.random(self.shape).astype(self.dtype)
+        tensor_y = paddle.to_tensor(y)
+
+        broadcast_result = paddle.assign(tensor_x)
+        if pg.rank() == 0:
+            task = pg.broadcast(tensor_x, 0, sync_op=True)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+            assert task.is_completed()
+            # assert np.array_equal(broadcast_result, tensor_x)
+        else:
+            task = pg.broadcast(tensor_y, 0, sync_op=True)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+            assert task.is_completed()
+            # assert np.array_equal(broadcast_result, tensor_y)
+
+        print("test broadcast api ok")
+
+        # test barrier
+        # rank 0
+        if pg.rank() == 0:
+            task = pg.barrier(device_id)
+            task.wait()
+        # rank 1
+        else:
+            task = pg.barrier(device_id)
+            task.wait()
+
+        print("test barrier api ok\n")
+        return
+
+        # test allgather
+        # rank 0
+        x = np.random.random(self.shape).astype(self.dtype)
+        y = np.random.random(self.shape).astype(self.dtype)
+        tensor_x = paddle.to_tensor(x)
+        tensor_y = paddle.to_tensor(y)
+        out_shape = list(self.shape)
+        out_shape[0] *= 2
+        out = np.random.random(out_shape).astype(self.dtype)
+        tensor_out = paddle.to_tensor(out)
+        if pg.rank() == 0:
+            task = pg.all_gather(tensor_out, tensor_x, sync_op=True)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+        # rank 1
+        else:
+            task = pg.all_gather(tensor_out, tensor_y, sync_op=True)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+        out_1 = paddle.slice(tensor_out, [0], [0], [out_shape[0] // 2])
+        out_2 = paddle.slice(
+            tensor_out, [0], [out_shape[0] // 2], [out_shape[0]]
+        )
+        # assert np.array_equal(tensor_x, out_1)
+        # assert np.array_equal(tensor_y, out_2)
+        print("test allgather api ok\n")
+
+        # test alltoall
+        # rank 0
+        x = np.random.random(self.shape).astype(self.dtype)
+        y = np.random.random(self.shape).astype(self.dtype)
+        out1 = np.random.random(self.shape).astype(self.dtype)
+        out2 = np.random.random(self.shape).astype(self.dtype)
+        tensor_x = paddle.to_tensor(x)
+        tensor_y = paddle.to_tensor(y)
+        tensor_out1 = paddle.to_tensor(out1)
+        tensor_out2 = paddle.to_tensor(out2)
+        raw_tensor_x_2 = paddle.slice(
+            tensor_x, [0], [self.shape[0] // 2], [self.shape[0]]
+        )
+        raw_tensor_y_1 = paddle.slice(tensor_y, [0], [0], [self.shape[0] // 2])
+        if pg.rank() == 0:
+            task = pg.alltoall(tensor_x, tensor_out1)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+        # rank 1
+        else:
+            task = pg.alltoall(tensor_y, tensor_out2)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+        out1_2 = paddle.slice(
+            tensor_out1, [0], [self.shape[0] // 2], [self.shape[0]]
+        )
+        out2_1 = paddle.slice(tensor_out2, [0], [0], [self.shape[0] // 2])
+        # if pg.rank() == 0:
+        #     assert np.array_equal(out1_2.numpy(), raw_tensor_y_1.numpy())
+        # else:
+        #     assert np.array_equal(out2_1, raw_tensor_x_2)
+        print("test alltoall api ok\n")
+
+        # test Reduce
+        # rank 0
+        x = np.random.random(self.shape).astype(self.dtype)
+        y = np.random.random(self.shape).astype(self.dtype)
+        tensor_x = paddle.to_tensor(x)
+        tensor_y = paddle.to_tensor(y)
+        sum_result = tensor_x + tensor_y
+        if pg.rank() == 0:
+            task = pg.reduce(tensor_x, 0)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+        # rank 1
+        else:
+            task = pg.reduce(tensor_y, 0)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+        # if pg.rank() == 0:
+        #     assert np.array_equal(tensor_x, sum_result)
+        print("test reduce sum api ok\n")
+
+        # test Scatter
+        # rank 0
+        in_shape = list(self.shape)
+        in_shape[0] *= 2
+        x = np.random.random(in_shape).astype(self.dtype)
+        y = np.random.random(self.shape).astype(self.dtype)
+        tensor_x = paddle.to_tensor(x)
+        tensor_y = paddle.to_tensor(y)
+        if pg.rank() == 0:
+            task = pg.scatter(tensor_x, tensor_y, 0)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+        # rank 1
+        else:
+            task = pg.scatter(tensor_x, tensor_y, 0)
+            task.wait()
+            # paddle.fluid.core._custom_device_synchronize("custom_cpu", -1)
+        out1 = paddle.slice(tensor_x, [0], [0], [self.shape[0]])
+        out2 = paddle.slice(tensor_x, [0], [self.shape[0]], [self.shape[0] * 2])
+        # if pg.rank() == 0:
+        #     assert np.array_equal(tensor_y, out1)
+        # else:
+        #     assert np.array_equal(tensor_y, out2)
+        print("test scatter api ok\n")
 
 
 if __name__ == "__main__":
diff --git a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py
index 0d087cab118e08302e9586c6c847d5ec9c4bbf04..3139f13127d6aeea612bcd7e1f5ce0aa8a61f7ed 100755
--- a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py
+++ b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py
@@ -50,18 +50,12 @@ class TestCustomCPUPlugin(unittest.TestCase):
         del os.environ['CUSTOM_DEVICE_ROOT']
 
     def test_custom_device(self):
-        import paddle
-
-        with paddle.fluid.framework._test_eager_guard():
-            self._test_custom_device_dataloader()
-            self._test_custom_device_mnist()
-            self._test_eager_backward_api()
-            self._test_eager_copy_to()
-            self._test_fallback_kernel()
-            self._test_scalar()
-            self._test_custom_device_gradient_accumulation()
         self._test_custom_device_dataloader()
         self._test_custom_device_mnist()
+        self._test_eager_backward_api()
+        self._test_eager_copy_to()
+        self._test_fallback_kernel()
+        self._test_scalar()
 
     def _test_custom_device_dataloader(self):
         import paddle
diff --git a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_profiler_plugin.py b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_profiler_plugin.py
index faf346ff3626735eb6e5039f5517b048874f9366..9501b22299fcabf4b0bff1400df7339809ed533b 100644
--- a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_profiler_plugin.py
+++ b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_profiler_plugin.py
@@ -47,13 +47,7 @@ class TestCustomCPUProfilerPlugin(unittest.TestCase):
         self.temp_dir.cleanup()
         del os.environ['CUSTOM_DEVICE_ROOT']
 
-    def test_custom_device(self):
-        import paddle
-
-        with paddle.fluid.framework._test_eager_guard():
-            self._test_custom_profiler()
-
-    def _test_custom_profiler(self):
+    def test_custom_profiler(self):
         import paddle
         import paddle.profiler as profiler