diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index bbfaac7339d0becf4c83d97c8c1fecb31d75a02c..d61f1438a61fefb185c44accdddb93b22a83b997 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -340,6 +340,8 @@ ncclDataType_t ToNCCLDataType(std::type_index type) { } } +static std::mutex g_nccl_mtx_; + struct NCCLAllReduceOpHandle : public OpHandle { ParallelExecutorPrivate *member_; @@ -361,6 +363,8 @@ struct NCCLAllReduceOpHandle : public OpHandle { int dtype = -1; size_t numel = 0; + std::lock_guard g(g_nccl_mtx_); + platform::dynload::ncclGroupStart(); for (size_t i = 0; i < member_->local_scopes_.size(); ++i) {