提交 a478a11e 编写于 作者: Y Yu Yang

NCCL Guard for bcast

上级 f2685bed
......@@ -606,7 +606,7 @@ void ParallelExecutor::BCastParamsToGPUs(
auto &dims = main_tensor.dims();
size_t numel = main_tensor.numel();
platform::dynload::ncclGroupStart();
platform::NCCLGroupGuard guard;
for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i];
......@@ -624,7 +624,6 @@ void ParallelExecutor::BCastParamsToGPUs(
platform::dynload::ncclBcast(buffer, numel, data_type, 0, nccl_ctx.comm,
nccl_ctx.stream());
}
platform::dynload::ncclGroupEnd();
}
for (auto &stream : member_->communication_streams_) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册