diff --git a/paddle/fluid/framework/details/all_reduce_op_handle.cc b/paddle/fluid/framework/details/all_reduce_op_handle.cc index ae20338746210a5d59582d307e8134fd6c1cddc4..6b7bbf9003a57731d4d78933fe3cba933e1405d5 100644 --- a/paddle/fluid/framework/details/all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/all_reduce_op_handle.cc @@ -107,6 +107,7 @@ void AllReduceOpHandle::RunImpl() { PADDLE_ENFORCE(platform::dynload::ncclAllReduce( buffer, buffer, numel, static_cast(dtype), ncclSum, comm, stream)); + if (!nccl_ctxs_->need_group_call_) cudaStreamSynchronize(stream); }); }