提交 a478a11e 编写于 作者: Y Yu Yang

NCCL Guard for bcast

上级 f2685bed
...@@ -606,7 +606,7 @@ void ParallelExecutor::BCastParamsToGPUs( ...@@ -606,7 +606,7 @@ void ParallelExecutor::BCastParamsToGPUs(
auto &dims = main_tensor.dims(); auto &dims = main_tensor.dims();
size_t numel = main_tensor.numel(); size_t numel = main_tensor.numel();
platform::dynload::ncclGroupStart(); platform::NCCLGroupGuard guard;
for (size_t i = 0; i < member_->places_.size(); ++i) { for (size_t i = 0; i < member_->places_.size(); ++i) {
auto place = member_->places_[i]; auto place = member_->places_[i];
...@@ -624,7 +624,6 @@ void ParallelExecutor::BCastParamsToGPUs( ...@@ -624,7 +624,6 @@ void ParallelExecutor::BCastParamsToGPUs(
platform::dynload::ncclBcast(buffer, numel, data_type, 0, nccl_ctx.comm, platform::dynload::ncclBcast(buffer, numel, data_type, 0, nccl_ctx.comm,
nccl_ctx.stream()); nccl_ctx.stream());
} }
platform::dynload::ncclGroupEnd();
} }
for (auto &stream : member_->communication_streams_) { for (auto &stream : member_->communication_streams_) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册