fix dgc test and bug when not set trainers_endpoints_, test=develop (#20617)

cadc6a97 · WangXi · gongweibao · 46797f53 · cadc6a97 · cadc6a97
3 changed file
--- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
@@ -465,8 +465,7 @@ void MultiDevSSAGraphBuilderBase::CreateAllReduceOp(ir::Graph *result,
          new details::SparseAllReduceOpHandle(
              result->CreateEmptyNode("allreduce", ir::Node::Type::kOperation),
              scopes, places, multi_nccl_ctxs_, is_encoded,
-              static_cast<int>(strategy_.trainers_endpoints_.size()) *
+              strategy_.num_trainers_ * places_.size()));
-                  places_.size()));
    } else {
      result->Get<GraphOps>(kGraphOps).emplace_back(
          new details::AllReduceOpHandle(

--- a/python/paddle/fluid/incubate/fleet/collective/__init__.py
+++ b/python/paddle/fluid/incubate/fleet/collective/__init__.py
@@ -271,7 +271,6 @@ class CollectiveOptimizer(DistributedOptimizer):
        node_num = self._node_num()
        assert node_num >= 1, "nccl2 node_num must >= 1, now:{}" % node_num
-        self._strategy.fuse_all_reduce_ops = True
        exec_strategy = self._strategy.exec_strategy
        if node_num <= 1:

--- a/python/paddle/fluid/tests/unittests/test_dist_base.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_base.py
@@ -291,6 +291,10 @@ class TestDistRunnerBase(object):
            build_stra.num_trainers = 1
            build_stra.trainer_id = 0
+        if args.use_dgc:
+            # fuse_all_reduce_ops require that gradients should not be sparse types
+            build_stra.fuse_all_reduce_ops = False
        print_to_err(type(self).__name__, "begin to compile with data parallel")
        binary = compiler.CompiledProgram(trainer_prog).with_data_parallel(
            loss_name=avg_cost.name,
@@ -852,7 +856,9 @@ class TestDistBase(unittest.TestCase):
        if check_error_log:
            required_envs["GLOG_vmodule"] = \
-                "fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10,alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10,executor=10,operator=10"
+                "fused_all_reduce_op_handle=10,all_reduce_op_handle=10,alloc_continuous_space_op=10,fuse_all_reduce_op_pass=10," \
+                "alloc_continuous_space_for_grad_pass=10,fast_threaded_ssa_graph_executor=10,executor=10,operator=10," \
+                "sparse_all_reduce_op_handle=10"
            required_envs["GLOG_logtostderr"] = "1"
        local_losses \