提交 d3a4da5c 编写于 作者: Y Yancey1989

fix comment test=develop

上级 49870f50
...@@ -109,7 +109,7 @@ void AllReduceOpHandle::RunImpl() { ...@@ -109,7 +109,7 @@ void AllReduceOpHandle::RunImpl() {
buffer, buffer, numel, static_cast<ncclDataType_t>(dtype), buffer, buffer, numel, static_cast<ncclDataType_t>(dtype),
ncclSum, comm, stream)); ncclSum, comm, stream));
// TODO(Yancey1989): synchronize here can get better performance // TODO(Yancey1989): synchronize here can get better performance
// if don't use NCCL group call, but need more profileing. // if don't use NCCL group call, but need more profiling.
if (local_scopes_.size() == 1UL) cudaStreamSynchronize(stream); if (local_scopes_.size() == 1UL) cudaStreamSynchronize(stream);
}); });
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册