[Dygraph] Support process group in dp with fleet api (#41119)

* support process group in dp with fleet api * update * fix uts * update

[Dygraph] Support process group in dp with fleet api (#41119)
* support process group in dp with fleet api * update * fix uts * update
1f829f6e · Haohongxiang · GitHub · 7554f428 · 1f829f6e · 1f829f6e
19 changed file
--- a/python/paddle/distributed/parallel.py
+++ b/python/paddle/distributed/parallel.py
@@ -217,6 +217,7 @@ def init_parallel_env():
            "required to create a process group.")
        master_addr = os.getenv("MASTER_ADDR", None)
        master_port = os.getenv("MASTER_PORT", None)
+        endpoints = None
        if not master_addr or not master_port:
            endpoints = os.getenv("PADDLE_MASTER", None)
        if endpoints is None:

--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@@ -398,16 +398,6 @@ def sync_params_buffers(model,
                   'axis': 0})
-@imperative_base.no_grad
-@framework.dygraph_only
-def sync_eager_params(model, comm_group=None, src_rank=0):
-    for _, param in model._obtain_parameters_buffers().items():
-        if not isinstance(param, core.eager.Tensor):
-            raise TypeError("The data type of '%s' must be '%s'" %
-                            (param.name, core.eager.Tensor))
-        comm_group.broadcast(param, src_rank).synchronize()
 class DataParallel(layers.Layer):
    """
    Run the dygraph module with data parallelism.
@@ -575,7 +565,7 @@ class DataParallel(layers.Layer):
                 comm_buffer_size=25,
                 last_comm_buffer_size=1,
                 find_unused_parameters=False,
-                 process_group=None):
+                 group=None):
        super(DataParallel,
              self).__init__(layers.full_name() + "_data_parallel")
@@ -585,7 +575,7 @@ class DataParallel(layers.Layer):
        self._layers = layers
        self.find_unused_parameters = find_unused_parameters
        self.grad_need_sync = True
-        self.process_group = process_group
+        self.group = group
        self.var_dtype = core.eager.Tensor if in_dygraph_mode(
        ) else core.VarBase
@@ -604,19 +594,17 @@ class DataParallel(layers.Layer):
            "ParallelContext must be initialized before. You should use init_parallel_env() before" \
            "constructing the DataParallel."
-            if self.process_group is None and in_dygraph_mode():
+            if in_dygraph_mode():
-                raise RuntimeError(
+                self.group = paddle.distributed.collective._get_default_group(
-                    "Process group should be built for DataParallel in eager mode."
+                ) if self.group is None else self.group
-                )
+                assert isinstance(self.group, paddle.distributed.collective.Group), \
+                    "ProcessGroup must be an instance of Group in DataParallel."
            # sync buffer and params
            # TODO(liuyuhui) Currently not support xpu. xpu is 
            # still broadcasting parameters when calling layer
            if not paddle.is_compiled_with_xpu():
-                if in_dygraph_mode():
-                    sync_eager_params(
-                        self._layers, comm_group=self.process_group)
-                elif _in_legacy_dygraph():
                sync_params_buffers(self._layers)
            self.comm_buffer_size = int(comm_buffer_size * 1024 * 1024)
@@ -678,7 +666,7 @@ class DataParallel(layers.Layer):
            self._reducer = core.EagerReducer(
                trainable_parameters,
                list(reversed(self.group_indices)), is_sparse_gradient,
-                self.process_group,
+                self.group.process_group,
                [self.last_comm_buffer_size, self.comm_buffer_size],
                self.find_unused_parameters)
        elif _in_legacy_dygraph():

--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -39,9 +39,7 @@ if (WITH_GPU OR WITH_XPU OR WITH_ASCEND OR WITH_ASCEND_CL)
 endif()
 list(APPEND DIST_TEST_OPS test_parallel_dygraph_unused_variables)
 list(APPEND DIST_TEST_OPS test_parallel_dygraph_control_flow)
-list(APPEND DIST_TEST_OPS test_parallel_dygraph_control_flow_in_eager_mode)
 list(APPEND DIST_TEST_OPS test_parallel_dygraph_no_sync)
-list(APPEND DIST_TEST_OPS test_parallel_dygraph_no_sync_in_eager_mode)
 list(APPEND DIST_TEST_OPS test_parallel_dygraph_no_sync_gradient_check)
 list(APPEND DIST_TEST_OPS test_parallel_dygraph_dataparallel)
 list(APPEND DIST_TEST_OPS test_parallel_dygraph_pipeline_parallel)
@@ -279,9 +277,7 @@ if ((NOT WITH_GPU) AND (NOT WITH_ROCM))
    LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_transformer)
    LIST(REMOVE_ITEM TEST_OPS test_parallel_dygraph_sync_batch_norm)
    list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_control_flow)
-    list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_control_flow_in_eager_mode)
    list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync)
-    list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync_in_eager_mode)
    list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_no_sync_gradient_check)
    list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_dataparallel)
    list(REMOVE_ITEM TEST_OPS test_parallel_dygraph_pipeline_parallel)
@@ -1128,12 +1124,11 @@ set_tests_properties(test_cumprod_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_split_program PROPERTIES TIMEOUT 120)
 if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL)
    set_tests_properties(test_parallel_dygraph_dataparallel PROPERTIES TIMEOUT 120)
-    set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT 120)
+    set_tests_properties(test_parallel_dygraph_mnist PROPERTIES TIMEOUT 200)
-    set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 300)
+    set_tests_properties(test_parallel_dygraph_se_resnext PROPERTIES TIMEOUT 200)
-    set_tests_properties(test_parallel_dygraph_control_flow PROPERTIES TIMEOUT 200)
+    set_tests_properties(test_parallel_dygraph_unused_variables PROPERTIES TIMEOUT 350)
-    set_tests_properties(test_parallel_dygraph_control_flow_in_eager_mode PROPERTIES TIMEOUT 150)
+    set_tests_properties(test_parallel_dygraph_control_flow PROPERTIES TIMEOUT 350)
-    set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 150)
+    set_tests_properties(test_parallel_dygraph_no_sync PROPERTIES TIMEOUT 300)
-    set_tests_properties(test_parallel_dygraph_no_sync_in_eager_mode PROPERTIES TIMEOUT 150)
    set_tests_properties(test_parallel_dygraph_no_sync_gradient_check PROPERTIES TIMEOUT 30)
    set_tests_properties(test_parallel_dygraph_pipeline_parallel PROPERTIES TIMEOUT 200)
    set_tests_properties(test_parallel_dygraph_tensor_parallel PROPERTIES TIMEOUT 200)
@@ -1155,8 +1150,8 @@ if(WITH_DISTRIBUTE AND WITH_GPU AND WITH_NCCL)
    if(${NCCL_VERSION} VERSION_GREATER_EQUAL 2212)
        set_tests_properties(test_parallel_dygraph_sparse_embedding PROPERTIES TIMEOUT 200)
+        set_tests_properties(test_parallel_dygraph_transformer PROPERTIES TIMEOUT 200)
        set_tests_properties(test_parallel_dygraph_sparse_embedding_over_height PROPERTIES TIMEOUT 150)
-        set_tests_properties(test_parallel_dygraph_transformer PROPERTIES TIMEOUT 150)
    endif()
 endif()

--- a/python/paddle/fluid/tests/unittests/dygraph_fleet_api.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_fleet_api.py
@@ -57,4 +57,6 @@ class TestDygraphFleetAPI(unittest.TestCase):
 if __name__ == "__main__":
+    with _test_eager_guard():
+        pass
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_dataparallel_in_eager_mode.py
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import division
-from __future__ import print_function
-import unittest
-import os
-import copy
-import numpy as np
-import random
-import socket
-import paddle
-import paddle.nn as nn
-from paddle.fluid.dygraph.nn import Linear
-import paddle.fluid.core as core
-from paddle.fluid.framework import _test_eager_guard
-import paddle.distributed as dist
-from paddle.fluid.dygraph.parallel import ParallelEnv
-from paddle.optimizer import SGD
-from paddle.fluid.initializer import NumpyArrayInitializer
-from test_parallel_dygraph_dataparallel import get_dist_port_from_flags
-def init_process_group(strategy=None):
-    nranks = ParallelEnv().nranks
-    rank = ParallelEnv().local_rank
-    is_master = True if rank == 0 else False
-    envs = copy.copy(os.environ.copy())
-    port = get_dist_port_from_flags()
-    store = paddle.fluid.core.TCPStore("127.0.0.1", port, is_master, nranks)
-    if 'PADDLE_DISTRI_BACKEND' in envs.keys() and envs[
-            'PADDLE_DISTRI_BACKEND'] == 'gloo':
-        group = core.ProcessGroupGloo(store, rank, nranks)
-    else:
-        group = core.ProcessGroupNCCL(store, rank, nranks)
-    return group
-class LinearModel(nn.Layer):
-    def __init__(self, attr_list):
-        super(LinearModel, self).__init__()
-        self._linear1 = paddle.nn.Linear(
-            50, 30, weight_attr=attr_list[0], bias_attr=False)
-        self._linear2 = paddle.nn.Linear(
-            30, 10, weight_attr=attr_list[1], bias_attr=False)
-        self._linear3 = paddle.nn.Linear(
-            10, 10, weight_attr=attr_list[2], bias_attr=False)
-    def forward(self, x):
-        output = self._linear1(x)
-        output = self._linear2(output)
-        output = self._linear3(output)
-        return output
-class TestDistTraning(unittest.TestCase):
-    def test_multiple_gpus(self):
-        process_group = init_process_group()
-        self.generate_reducer("float32", process_group)
-        if paddle.get_device() != "cpu":
-            self.generate_reducer("float16", process_group)
-    def generate_reducer(self, dtype, process_group):
-        local_rank = ParallelEnv().local_rank
-        np.random.seed(2022 + local_rank)
-        paddle.set_default_dtype(dtype)
-        w_1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(
-            np.random.rand(50, 30).astype(dtype)))
-        w_2 = paddle.ParamAttr(initializer=NumpyArrayInitializer(
-            np.random.rand(30, 10).astype(dtype)))
-        w_3 = paddle.ParamAttr(initializer=NumpyArrayInitializer(
-            np.random.rand(10, 10).astype(dtype)))
-        attr_list = [w_1, w_2, w_3]
-        inp = np.random.rand(10, 50).astype(dtype)
-        # original reducer
-        params_a = self.model_train(attr_list, inp)
-        # refactored reducer in eager mode
-        with _test_eager_guard():
-            params_b = self.model_train(
-                attr_list, inp, process_group=process_group)
-        for i in range(len(params_a)):
-            np.testing.assert_allclose(params_a[i].numpy(), params_b[i].numpy())
-    def model_train(self, attr_list, inp, process_group=None):
-        model = LinearModel(attr_list)
-        model = paddle.DataParallel(model, process_group=process_group)
-        optimizer = SGD(learning_rate=0.0003, parameters=model.parameters())
-        x = paddle.to_tensor(inp)
-        x.stop_gradient = False
-        for step in range(10):
-            y = model(x)
-            loss = y.mean()
-            loss.backward()
-            optimizer.step()
-            optimizer.clear_grad()
-        return model.parameters()
-class TestCatchErrors1(unittest.TestCase):
-    def test_multiple_gpus(self):
-        linear = paddle.nn.Linear(2, 4)
-        with _test_eager_guard():
-            self.assertRaises(RuntimeError, paddle.DataParallel, linear)
-class TestCatchErrors2(unittest.TestCase):
-    def test_multiple_gpus(self):
-        with _test_eager_guard():
-            linear = paddle.nn.Linear(2, 4)
-            self.assertRaises(RuntimeError, paddle.DataParallel, linear)
-if __name__ == '__main__':
-    dist.init_parallel_env()
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_gradient_check_in_eager_mode.py
@@ -36,19 +36,6 @@ in_dim = 10
 out_dim = 20
-def init_process_group(strategy=None):
-    nranks = ParallelEnv().nranks
-    rank = ParallelEnv().local_rank
-    is_master = True if rank == 0 else False
-    current_env = copy.copy(os.environ.copy())
-    port = 6175
-    if 'PADDLE_DIST_UT_PORT' in current_env.keys():
-        port = int(current_env['PADDLE_DIST_UT_PORT'])
-    store = paddle.fluid.core.TCPStore("127.0.0.1", port, is_master, nranks)
-    group = core.ProcessGroupNCCL(store, rank, nranks)
-    return group
 class SimpleNet(fluid.Layer):
    def __init__(self, train_id):
        super(SimpleNet, self).__init__()
@@ -83,12 +70,9 @@ class SimpleNet(fluid.Layer):
 class TestDistTraning(unittest.TestCase):
    def test_multiple_gpus(self):
-        dist.init_parallel_env()
        self.trainer_id = dist.get_rank()
-        process_group = init_process_group()
-        self.pg = process_group
        with _test_eager_guard():
+            self.pg = dist.init_parallel_env()
            model_a = SimpleNet(self.trainer_id)
            model_b = SimpleNet(self.trainer_id)
@@ -97,13 +81,9 @@ class TestDistTraning(unittest.TestCase):
            model_b.set_state_dict(state_dict)
            model_a = paddle.DataParallel(
-                model_a,
+                model_a, find_unused_parameters=True, group=self.pg)
-                find_unused_parameters=True,
-                process_group=process_group)
            model_b = paddle.DataParallel(
-                model_b,
+                model_b, find_unused_parameters=True, group=self.pg)
-                find_unused_parameters=True,
-                process_group=process_group)
            ones_input = paddle.ones(shape=(batch, in_dim))
            ones_input.stop_gradient = True
@@ -150,7 +130,7 @@ class TestDistTraning(unittest.TestCase):
            print(*args)
    def broadcast_param(self, param, root):
-        self.pg.broadcast(param, root)
+        self.pg.process_group.broadcast(param, root)
        return param
    def check_gradient(self, params):

--- a/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync.py
+++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_no_sync.py
@@ -69,18 +69,6 @@ class TestNoSync(TestParallelDyGraphRunnerBase):
        loss = out.sum() / len(batch)
        return loss
-    def run_trainer(self, args):
-        if args.eager_mode:
-            self.run_trainer_in_eager_mode(args)
-        else:
-            self.run_trainer_func(args)
-    def run_trainer_with_spawn(self, args):
-        if args.eager_mode:
-            return self.run_trainer_with_spawn_in_eager_mode(args)
-        else:
-            return self.run_trainer_with_spawn_func(args)
    def run_trainer_func(self, args):
        if fluid.core.is_compiled_with_cuda():
            device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
@@ -103,41 +91,36 @@ class TestNoSync(TestParallelDyGraphRunnerBase):
                model = paddle.DataParallel(
                    model, find_unused_parameters=args.find_unused_parameters)
            print_to_err(type(self).__name__, "model built in dygraph")
-            return self.model_train(args, model, opt, train_reader)
+            out_losses = self.model_train(args, model, opt, train_reader)
+            print_to_out(out_losses)
+            return out_losses
-    def run_trainer_in_eager_mode(self, args):
+    def run_trainer_with_spawn_func(self, args):
-        if fluid.core.is_compiled_with_cuda():
+        # 1. enable dygraph
-            device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
+        paddle.disable_static()
-            place = fluid.CUDAPlace(device_id)
-        else:
-            assert ("Only support CUDAPlace for now.")
-        with fluid.dygraph.guard(place):
+        # 2. init seed
-            fluid.default_startup_program().random_seed = seed
+        seed = 90
-            fluid.default_main_program().random_seed = seed
+        paddle.static.default_startup_program().random_seed = seed
+        paddle.static.default_main_program().random_seed = seed
        np.random.seed(seed)
        random.seed(seed)
+        # get trainer id
+        args.trainer_id = paddle.distributed.get_rank()
-            with _test_eager_guard():
+        # 3. init parallel env
-                model, train_reader, opt = self.get_model()
+        if args.update_method in ["nccl2", "gloo"]:
-                if args.update_method == "nccl2":
+            paddle.distributed.init_parallel_env()
-                    dist.init_parallel_env()
-                    print_to_err(
-                        type(self).__name__,
-                        "begin to prepare context in dygraph with nccl2")
-                    nranks = ParallelEnv().nranks
+        # 4. train model
-                    rank = ParallelEnv().local_rank
+        model, train_reader, opt = self.get_model()
-                    is_master = True if rank == 0 else False
+        if args.update_method in ["nccl2", "gloo"]:
-                    store = paddle.fluid.core.TCPStore(
-                        "127.0.0.1", args.dist_port, is_master, nranks)
-                    group = core.ProcessGroupNCCL(store, rank, nranks)
            model = paddle.DataParallel(
-                        model,
+                model, find_unused_parameters=args.find_unused_parameters)
-                        process_group=group,
-                        find_unused_parameters=args.find_unused_parameters)
+        out_losses = self.model_train(args, model, opt, train_reader)
-                print_to_err(type(self).__name__, "model built in dygraph")
+        print_to_out(out_losses)
-                return self.model_train(args, model, opt, train_reader)
+        return out_losses
    def model_train(self, args, model, opt, train_reader):
        out_losses = []
@@ -157,12 +140,8 @@ class TestNoSync(TestParallelDyGraphRunnerBase):
                loss = self.run_one_loop(model, opt, data)
                loss.backward()
                opt.minimize(loss)
-                print_to_err(
-                    type(self).__name__,
-                    "loss at step %d: %f" % (step_id, loss.numpy()))
                out_losses.append(loss.numpy())
                model.clear_gradients()
-        print_to_out(out_losses)
        return out_losses

--- a/python/paddle/fluid/tests/unittests/spawn_runner_base.py
+++ b/python/paddle/fluid/tests/unittests/spawn_runner_base.py
@@ -21,7 +21,7 @@ import paddle
 # used by model.run_trainer in test_dist_base
 from test_dist_base import RUN_STEP
-from test_parallel_dygraph_dataparallel import get_dist_port_from_flags
+from paddle.fluid.framework import _test_eager_guard
 # NOTE: compatible TestParallelDyGraphRunnerBase args
@@ -29,8 +29,6 @@ class SpawnAssistTestArgs(object):
    update_method = "local"
    trainer_id = 0
    find_unused_parameters = False
-    eager_mode = False
-    dist_port = get_dist_port_from_flags()
 class TestDistSpawnRunner(unittest.TestCase):
@@ -55,14 +53,17 @@ class TestDistSpawnRunner(unittest.TestCase):
            result_list.append(res_queue.get())
        return result_list
-    def _args_config(self, args):
-        return
    def check_dist_result_with_spawn(self, test_class, delta=1e-3):
+        with _test_eager_guard():
+            self.check_dist_result_with_spawn_func(
+                test_class=test_class, delta=delta)
+        self.check_dist_result_with_spawn_func(
+            test_class=test_class, delta=delta)
+    def check_dist_result_with_spawn_func(self, test_class, delta=1e-3):
        # 0. prepare model and args
        model = test_class()
        args = SpawnAssistTestArgs()
-        self._args_config(args)
        # 1. calc signal card loss
        losses = self._run(model, args)

--- a/python/paddle/fluid/tests/unittests/test_dist_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_base.py
@@ -36,7 +36,6 @@ import paddle.fluid.dygraph as dygraph
 from paddle.fluid.dygraph.base import to_variable
 from paddle.fluid.dygraph.parallel import DataParallel, ParallelEnv
 from paddle.fluid.framework import _test_eager_guard
 from paddle.fluid.incubate.fleet.collective import fleet, DistributedStrategy
 import paddle.fluid.incubate.fleet.base.role_maker as role_maker
@@ -543,12 +542,6 @@ class TestParallelDyGraphRunnerBase(object):
            return batch
    def run_trainer(self, args):
-        if args.eager_mode:
-            self.run_trainer_in_eager_mode(args)
-        else:
-            self.run_trainer_func(args)
-    def run_trainer_func(self, args):
        seed = 90
        if args.update_method == 'gloo':
            place = fluid.CPUPlace()
@@ -580,6 +573,7 @@ class TestParallelDyGraphRunnerBase(object):
                strategy.local_rank = args.trainer_id
                strategy.trainer_endpoints = args.endpoints.split(",")
                strategy.current_endpoint = args.current_endpoint
+                paddle.distributed.init_parallel_env()
                print_to_err(
                    type(self).__name__,
                    "begin to prepare context in dygraph with nccl2")
@@ -621,82 +615,7 @@ class TestParallelDyGraphRunnerBase(object):
                    model.clear_gradients()
        print_to_out(out_losses)
-    def run_trainer_in_eager_mode(self, args):
-        seed = 90
-        if args.update_method == 'gloo':
-            place = fluid.CPUPlace()
-        elif fluid.core.is_compiled_with_cuda():
-            device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
-            place = fluid.CUDAPlace(device_id)
-        elif fluid.core.is_compiled_with_xpu():
-            device_id = int(os.getenv("FLAGS_selected_xpus", "0"))
-            place = fluid.XPUPlace(device_id)
-        elif fluid.core.is_compiled_with_npu():
-            device_id = int(os.getenv("FLAGS_selected_npus", "0"))
-            place = fluid.NPUPlace(device_id)
-        else:
-            assert ("Only support CUDAPlace or XPUPlace or CPU(Gloo) for now.")
-        with _test_eager_guard():
-            with fluid.dygraph.guard(place):
-                fluid.default_startup_program().random_seed = seed
-                fluid.default_main_program().random_seed = seed
-                np.random.seed(seed)
-                import random
-                random.seed(seed)
-                model, train_reader, opt = self.get_model()
-                #if args.update_method == "nccl2":
-                if args.update_method in ["nccl2", "gloo"]:
-                    paddle.distributed.init_parallel_env()
-                    nranks = ParallelEnv().nranks
-                    rank = ParallelEnv().local_rank
-                    is_master = True if rank == 0 else False
-                    store = paddle.fluid.core.TCPStore(
-                        "127.0.0.1", args.dist_port, is_master, nranks)
-                    if args.update_method == "nccl2":
-                        group = core.ProcessGroupNCCL(store, rank, nranks)
-                    elif args.update_method == "gloo":
-                        group = core.ProcessGroupGloo(store, rank, nranks)
-                    print_to_err(
-                        type(self).__name__,
-                        "begin to prepare context in dygraph with nccl2")
-                    model = dygraph.parallel.DataParallel(
-                        model,
-                        process_group=group,
-                        find_unused_parameters=args.find_unused_parameters)
-                    print_to_err(type(self).__name__, "model built in dygraph")
-                out_losses = []
-                print_to_err(
-                    type(self).__name__, "begin to run dygraph training")
-                for step_id, data in enumerate(train_reader()):
-                    data = self._get_data(data, args)
-                    if step_id == RUN_STEP:
-                        break
-                    loss = self.run_one_loop(model, opt, data)
-                    if step_id % 10 == 0:
-                        print_to_err(
-                            type(self).__name__,
-                            "loss at step %d: %f" % (step_id, loss.numpy()))
-                    out_losses.append(loss.numpy())
-                    loss.backward()
-                    opt.minimize(loss)
-                    if not args.accumulate_gradient:
-                        model.clear_gradients()
-            print_to_out(out_losses)
    def run_trainer_with_spawn(self, args):
-        if args.eager_mode:
-            return self.run_trainer_with_spawn_in_eager_mode(args)
-        else:
-            return self.run_trainer_with_spawn_func(args)
-    def run_trainer_with_spawn_func(self, args):
        # 1. enable dygraph
        paddle.disable_static()
@@ -733,109 +652,7 @@ class TestParallelDyGraphRunnerBase(object):
            model.clear_gradients()
        return out_losses
-    def run_trainer_with_spawn_in_eager_mode(self, args):
-        # 1. enable dygraph
-        paddle.disable_static()
-        # 2. init seed
-        seed = 90
-        paddle.static.default_startup_program().random_seed = seed
-        paddle.static.default_main_program().random_seed = seed
-        np.random.seed(seed)
-        random.seed(seed)
-        # get trainer id
-        args.trainer_id = paddle.distributed.get_rank()
-        # 3. init parallel env
-        if args.update_method in ["nccl2", "gloo"]:
-            paddle.distributed.init_parallel_env()
-            # 4. build process group
-            nranks = ParallelEnv().nranks
-            rank = ParallelEnv().local_rank
-            is_master = True if rank == 0 else False
-            store = paddle.fluid.core.TCPStore("127.0.0.1", args.dist_port,
-                                               is_master, nranks)
-            if args.update_method == "nccl2":
-                group = core.ProcessGroupNCCL(store, rank, nranks)
-            elif args.update_method == "gloo":
-                group = core.ProcessGroupGloo(store, rank, nranks)
-        # 5. train model
-        with _test_eager_guard():
-            model, train_reader, opt = self.get_model()
-            if args.update_method in ["nccl2", "gloo"]:
-                model = paddle.DataParallel(
-                    model,
-                    process_group=group,
-                    find_unused_parameters=args.find_unused_parameters)
-            out_losses = []
-            for step_id, data in enumerate(train_reader()):
-                data = self._get_data(data, args)
-                if step_id == RUN_STEP:
-                    break
-                loss = self.run_one_loop(model, opt, data)
-                out_losses.append(loss.numpy())
-                loss.backward()
-                opt.minimize(loss)
-                model.clear_gradients()
-        return out_losses
    def run_use_fleet_api_trainer(self, args):
-        if args.eager_mode:
-            self.run_use_fleet_api_trainer_in_eager_mode(args)
-        else:
-            self.run_use_fleet_api_trainer_func(args)
-    def run_use_fleet_api_trainer_func(self, args):
-        import paddle.distributed.fleet as fleet
-        import paddle.distributed.fleet.base.role_maker as role_maker
-        # 1. enable dygraph
-        paddle.disable_static()
-        # 2. init seed
-        seed = 90
-        paddle.static.default_startup_program().random_seed = seed
-        paddle.static.default_main_program().random_seed = seed
-        np.random.seed(seed)
-        random.seed(seed)
-        # get trainer id
-        args.trainer_id = paddle.distributed.get_rank()
-        # set strategy
-        strategy = fleet.DistributedStrategy()
-        if args.find_unused_parameters:
-            strategy.find_unused_parameters = True
-        # 3. init parallel env
-        if args.update_method == "nccl2" or "bkcl" or "hccl":
-            fleet.init(is_collective=True, strategy=strategy)
-        # 4. train model
-        model, train_reader, opt = self.get_model()
-        if args.update_method == "nccl2" or "bkcl" or "hccl":
-            opt = fleet.distributed_optimizer(opt)
-            model = fleet.distributed_model(model)
-        out_losses = []
-        for step_id, data in enumerate(train_reader()):
-            data = self._get_data(data, args)
-            if step_id == RUN_STEP:
-                break
-            loss = self.run_one_loop(model, opt, data)
-            out_losses.append(loss.numpy())
-            loss.backward()
-            opt.step()
-            if not args.accumulate_gradient:
-                opt.clear_grad()
-        print_to_out(out_losses)
-    def run_use_fleet_api_trainer_in_eager_mode(self, args):
        import paddle.distributed.fleet as fleet
        import paddle.distributed.fleet.base.role_maker as role_maker
        # 1. enable dygraph
@@ -860,7 +677,6 @@ class TestParallelDyGraphRunnerBase(object):
            fleet.init(is_collective=True, strategy=strategy)
        # 4. train model
-        with _test_eager_guard():
        model, train_reader, opt = self.get_model()
        if args.update_method == "nccl2" or "bkcl" or "hccl":
            opt = fleet.distributed_optimizer(opt)
@@ -911,8 +727,6 @@ def runtime_main(test_class):
    parser.add_argument(
        '--current_endpoint', type=str, required=False, default="")
    parser.add_argument('--sync_mode', action='store_true')
-    parser.add_argument('--eager_mode', action='store_true')
-    parser.add_argument('--dist_port', type=int, required=False, default=6175)
    parser.add_argument('--use_cuda', action='store_true')
    parser.add_argument('--use_cpu', action='store_true')
    parser.add_argument('--use_xpu', action='store_true')
@@ -1005,8 +819,6 @@ class TestDistBase(unittest.TestCase):
        self._port_set = set()
        self._python_interp = sys.executable
        self._sync_mode = True
-        self._dist_port = 6175
-        self._eager_mode = False
        self._hogwild_mode = False
        self._enforce_place = None
        self._use_reduce = False
@@ -1168,10 +980,6 @@ class TestDistBase(unittest.TestCase):
        if len(devices) > 1 and self._use_dgc:
            cmd += " --use_dgc"
-        if self._eager_mode:
-            cmd += " --eager_mode"
-            cmd += " --dist_port {}".format(self._dist_port)
        if self._accumulate_gradient:
            cmd += " --accumulate_gradient"
@@ -1245,11 +1053,6 @@ class TestDistBase(unittest.TestCase):
        if self._sync_mode:
            tr0_cmd += " --sync_mode"
            tr1_cmd += " --sync_mode"
-        if self._eager_mode:
-            tr0_cmd += " --eager_mode"
-            tr1_cmd += " --eager_mode"
-            tr0_cmd += " --dist_port {}".format(self._dist_port)
-            tr1_cmd += " --dist_port {}".format(self._dist_port)
        if self._hogwild_mode:
            tr0_cmd += " --hogwild"
            tr1_cmd += " --hogwild"
@@ -1356,10 +1159,6 @@ class TestDistBase(unittest.TestCase):
        assert self._use_dgc == False, "gloo not support use dgc"
-        if self._eager_mode:
-            tr_cmd += " --eager_mode"
-            tr_cmd += " --dist_port {}".format(self._dist_port)
        if self._accumulate_gradient:
            tr_cmd += " --accumulate_gradient"
@@ -1437,10 +1236,6 @@ class TestDistBase(unittest.TestCase):
        if self._use_dgc:
            tr_cmd += " --use_dgc"
-        if self._eager_mode:
-            tr_cmd += " --eager_mode"
-            tr_cmd += " --dist_port {}".format(self._dist_port)
        if self._accumulate_gradient:
            tr_cmd += " --accumulate_gradient"
@@ -1665,7 +1460,34 @@ class TestDistBase(unittest.TestCase):
                         check_error_log=False,
                         need_envs={},
                         log_name=""):
+        if self._dygraph and (self._gloo_mode or self._nccl2_mode):
+            with _test_eager_guard():
+                self.check_with_place_func(
+                    model_file=model_file,
+                    delta=delta,
+                    check_error_log=check_error_log,
+                    need_envs=need_envs,
+                    log_name=log_name)
+            self.check_with_place_func(
+                model_file=model_file,
+                delta=delta,
+                check_error_log=check_error_log,
+                need_envs=need_envs,
+                log_name=log_name)
+        else:
+            self.check_with_place_func(
+                model_file=model_file,
+                delta=delta,
+                check_error_log=check_error_log,
+                need_envs=need_envs,
+                log_name=log_name)
+    def check_with_place_func(self,
+                              model_file,
+                              delta=1e-3,
+                              check_error_log=False,
+                              need_envs={},
+                              log_name=""):
        required_envs = self._get_required_envs(check_error_log, need_envs)
        if self._gloo_mode:

--- a/python/paddle/fluid/tests/unittests/test_imperative_group.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_group.py
@@ -26,7 +26,7 @@ import paddle.fluid.dygraph as dygraph
 from paddle.fluid.dygraph.nn import Linear
 import paddle.fluid.core as core
 from paddle.fluid.optimizer import SGDOptimizer
-from paddle.fluid.framework import _test_eager_guard
+from paddle.fluid.framework import _test_eager_guard, _in_legacy_dygraph, in_dygraph_mode
 class TestDataParallelGroup(unittest.TestCase):
@@ -34,6 +34,9 @@ class TestDataParallelGroup(unittest.TestCase):
        return paddle.rand(shape=shape, dtype=dtype)
    def assign_group_by_size(self, *args):
+        if in_dygraph_mode():
+            return core.eager_assign_group_by_size(*args)
+        elif _in_legacy_dygraph():
            return core.assign_group_by_size(*args)
    def test_construct_group0(self):
@@ -160,14 +163,19 @@ class TestDataParallelGroup(unittest.TestCase):
                                        [300], [1, 0, 2, 3])
        self.assertEqual([[1, 0], [3], [2]], res)
+    def test_construct_group_in_legacy_mode(self):
-class TestDataParallelGroupEager(TestDataParallelGroup):
-    def create_varbase(self, dtype, shape):
        with _test_eager_guard():
-            return paddle.rand(shape=shape, dtype=dtype)
+            pass
+        self.test_construct_group0()
-    def assign_group_by_size(self, *args):
+        self.test_construct_group1()
-        return core.eager_assign_group_by_size(*args)
+        self.test_construct_group2()
+        self.test_construct_group3()
+        self.test_construct_group4()
+        self.test_construct_group5()
+        self.test_construct_group6()
+        self.test_construct_group7()
+        self.test_construct_group8()
+        self.test_construct_group9()
 if __name__ == '__main__':

--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_control_flow_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_control_flow_in_eager_mode.py
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import os
-import sys
-import unittest
-import paddle.fluid as fluid
-from test_dist_base import TestDistBase
-from spawn_runner_base import TestDistSpawnRunner
-flag_name = os.path.splitext(__file__)[0]
-class TestDygraphControlFlowSameEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._nccl2_mode = True
-        self._eager_mode = True
-        self._dygraph = True
-        self._find_unused_parameters = True
-    def test_net(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_control_flow_same.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestDygraphControlFlowSameAccGradEager(TestDygraphControlFlowSameEager):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._nccl2_mode = True
-        self._eager_mode = True
-        self._dygraph = True
-        self._accumulate_gradient = True
-        self._find_unused_parameters = True
-class TestDygraphControlFlowDiffEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._nccl2_mode = True
-        self._eager_mode = True
-        self._dygraph = True
-        self._find_unused_parameters = True
-    def test_net(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_control_flow_different.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestFleetDygraphControlFlowDiffAccGradEager(
-        TestDygraphControlFlowDiffEager):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._nccl2_mode = True
-        self._eager_mode = True
-        self._dygraph = True
-        self._accumulate_gradient = True
-        self._find_unused_parameters = True
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel.py
@@ -208,11 +208,6 @@ class TestDataParallelWithPyLayer(TestMultipleGpus):
        self.run_mnist_2gpu('parallel_dygraph_dataparallel_with_pylayer.py')
-class TestDataParallelInEagerMode(TestMultipleGpus):
-    def test_multiple_gpus_dynamic(self):
-        self.run_mnist_2gpu('parallel_dygraph_dataparallel_in_eager_mode.py')
 class TestGradientCheckInEagerMode(TestMultipleGpus):
    def test_multiple_gpus_dynamic(self):
        self.run_mnist_2gpu('parallel_dygraph_gradient_check_in_eager_mode.py')

--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_dataparallel_cpuonly.py
@@ -136,7 +136,7 @@ class TestDataParallelGradientCheck(TestMultipleGpus):
 class TestDataParallelGradientCheckInEagerMode(TestMultipleGpus):
    def test_multiple_gpus_dynamic(self):
-        self.run_mnist_2gpu('parallel_dygraph_dataparallel_in_eager_mode.py')
+        self.run_mnist_2gpu('parallel_dygraph_gradient_check_in_eager_mode.py')
 if __name__ == "__main__":

--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync_in_eager_mode.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_no_sync_in_eager_mode.py
-# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import os
-import sys
-import unittest
-import paddle.fluid as fluid
-from test_dist_base import TestDistBase
-from spawn_runner_base import TestDistSpawnRunner
-from parallel_dygraph_no_sync import TestNoSync
-from parallel_dygraph_no_sync_unused_params import TestNoSyncUnusedParam
-from parallel_dygraph_no_sync_control_flow import TestNoSyncControlFlow
-flag_name = os.path.splitext(__file__)[0]
-class TestParallelDygraphNoSync(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-        self._find_unused_parameters = False
-    def test_no_sync(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_no_sync.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestParallelDygraphNoSyncUnusedParam(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-        self._find_unused_parameters = True
-    def test_no_sync_ununsed_param(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_no_sync_unused_params.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestParallelDygraphNoSyncControlFlow(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-        self._find_unused_parameters = True
-    def test_no_sync_control_flow(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_no_sync_control_flow.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestParallelDygraphNoSyncSpawn(TestDistSpawnRunner):
-    def test_no_sync_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
-            self.check_dist_result_with_spawn(test_class=TestNoSync, delta=1e-5)
-class TestParallelDygraphNoSyncUnusedParamSpawn(TestDistSpawnRunner):
-    def _args_config(self, args):
-        args.find_unused_parameters = True
-        args.eager_mode = True
-    def test_no_sync_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
-            self.check_dist_result_with_spawn(
-                test_class=TestNoSyncUnusedParam, delta=1e-5)
-class TestParallelDygraphNoSyncControlFlowSpawn(TestDistSpawnRunner):
-    def _args_config(self, args):
-        args.find_unused_parameters = True
-        args.eager_mode = True
-    def test_no_sync_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
-            self.check_dist_result_with_spawn(
-                test_class=TestNoSyncControlFlow, delta=1e-5)
-if __name__ == "__main__":
-    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding.py
@@ -64,47 +64,5 @@ class TestParallelDygraphSparseEmdeddingSpawn(TestDistSpawnRunner):
                test_class=TestSparseEmbedding, delta=1e-5)
-class TestParallelDygraphSparseEmdeddingEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._nccl2_mode = True
-        self._eager_mode = True
-        self._dygraph = True
-    def test_sparse_embedding(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_sparse_embedding.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestParallelDygraphSparseEmdeddingFP64Eager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-    def test_sparse_embedding_fp64(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_sparse_embedding_fp64.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestParallelDygraphSparseEmdeddingSpawnEager(TestDistSpawnRunner):
-    def _args_config(self, args):
-        args.eager_mode = True
-    def test_sparse_embedding_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
-            self.check_dist_result_with_spawn(
-                test_class=TestSparseEmbedding, delta=1e-5)
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sparse_embedding_over_height.py
@@ -48,32 +48,5 @@ class TestParallelDygraphSparseEmdeddingOverHeightSpawn(TestDistSpawnRunner):
                test_class=TestSparseEmbeddingOverHeight, delta=1e-5)
-class TestParallelDygraphSparseEmdeddingOverHeightEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-    def test_sparse_embedding(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_sparse_embedding_over_height.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestParallelDygraphSparseEmdeddingOverHeightSpawnEager(
-        TestDistSpawnRunner):
-    def _args_config(self, args):
-        args.eager_mode = True
-    def test_sparse_embedding_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
-            self.check_dist_result_with_spawn(
-                test_class=TestSparseEmbeddingOverHeight, delta=1e-5)
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_sync_batch_norm.py
@@ -36,21 +36,5 @@ class TestParallelDygraphMnist(TestDistBase):
                log_name=flag_name)
-class TestParallelDygraphMnistEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-    def test_mnist(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_sync_batch_norm.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_transformer.py
@@ -41,13 +41,6 @@ class TestParallelDygraphTransformer(TestDistBase):
                log_name=flag_name)
-class TestParallelDygraphTransformerSpawn(TestDistSpawnRunner):
-    def test_transformer_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
-            self.check_dist_result_with_spawn(
-                test_class=TestTransformer, delta=1e-5)
 class TestParallelDygraphTransformerAccGrad(TestDistBase):
    def _setup_config(self):
        self._sync_mode = False
@@ -65,21 +58,5 @@ class TestParallelDygraphTransformerAccGrad(TestDistBase):
                log_name=flag_name)
-class TestParallelDygraphTransformerEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-    def test_transformer(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_transformer.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_unused_variables.py
@@ -86,71 +86,5 @@ class TestParallelDygraphSharedUnusedVariables(TestDistBase):
                log_name=flag_name)
-class TestParallelDygraphUnusedVarEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-    def test_net(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_unused_variables.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestDygraphUnusedVarEager(TestParallelDygraphUnusedVar):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-class TestSparseEmbeddingUnusedVarsSpawnEager(TestDistSpawnRunner):
-    def _args_config(self, args):
-        args.eager_mode = True
-    def test_mnist_with_spawn(self):
-        if fluid.core.is_compiled_with_cuda() and sys.version_info >= (3, 4):
-            self.check_dist_result_with_spawn(
-                test_class=TestSparseEmbeddingUnusedVars, delta=1e-5)
-class TestParallelDygraphNoVarEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-    def test_net(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_none_var.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
-class TestParallelDygraphSharedUnusedVariablesEager(TestDistBase):
-    def _setup_config(self):
-        self._sync_mode = False
-        self._eager_mode = True
-        self._nccl2_mode = True
-        self._dygraph = True
-    def test_mnist(self):
-        if fluid.core.is_compiled_with_cuda():
-            self.check_with_place(
-                "parallel_dygraph_shared_unused_var.py",
-                delta=1e-5,
-                check_error_log=True,
-                log_name=flag_name)
 if __name__ == "__main__":
    unittest.main()