diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc index a1b913a863cc1853ea3a786d22e6e8baa8c98a02..1aa33768c80c2b60de88a5545bdc9f5a425a1dcb 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc @@ -55,6 +55,7 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( const ProgramDesc &program) const { auto graph = new SSAGraph(); SSAGraph &result = *graph; + std::unordered_set og_has_bc; result.vars_.resize(places_.size()); bool is_forwarding = true; @@ -123,8 +124,10 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build( if (!is_forwarding) { auto var_names = op->OutputArgumentNames(); for (auto &og : var_names) { - if (grad_names_.count(og) != 0) { // is param grad - // Insert NCCL AllReduce Op + if (grad_names_.count(og) != 0 && + og_has_bc.count(og) == 0) { // is param grad + // Insert NCCL AllReduce Op + og_has_bc.insert(og); #ifdef PADDLE_WITH_CUDA result.ops_.emplace_back( new NCCLAllReduceOpHandle(local_scopes_, places_, *nccl_ctxs_));