diff --git a/paddle/fluid/framework/details/multi_devices_graph_pass.cc b/paddle/fluid/framework/details/multi_devices_graph_pass.cc index 11b085c5c78ba2e90f7a2dd9f325bb5016399a16..5781936cb370507be3ba91824d87dc1a9186e9e3 100644 --- a/paddle/fluid/framework/details/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/details/multi_devices_graph_pass.cc @@ -458,8 +458,9 @@ std::unique_ptr MultiDevSSAGraphBuilder::ApplyImpl( use_gpu = nccl_ctxs_ != nullptr; #endif - if (use_gpu && strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce && - !is_dist_train) { + if ((use_gpu && + strategy_.reduce_ == BuildStrategy::ReduceStrategy::kReduce) || + is_dist_train) { // Insert BCast Ops for (size_t dev_id = 0; dev_id < bcast_var_name_set.size(); ++dev_id) { auto &to_bcast_set = bcast_var_name_set[dev_id]; diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc index 878828693bcbf637d8456bc1a63915bcde1f774b..7fc06f234d42a992328c0b6164f17945d8075c28 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.cc +++ b/paddle/fluid/framework/details/reduce_op_handle.cc @@ -27,11 +27,8 @@ namespace framework { namespace details { void ReduceOpHandle::RunImpl() { - if (dev_ctxes_.size() > 0UL) { - platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second); - } else { - platform::RecordEvent record_event(Name(), nullptr); - } + platform::RecordEvent record_event(Name(), dev_ctxes_.begin()->second); + if (places_.size() == 1) return; // the input and output may have dummy var. auto in_var_handles = DynamicCast(inputs_);