提交 47740ace 编写于 作者: Y Yancey1989

fix performance

上级 73edf137
......@@ -107,6 +107,7 @@ void AllReduceOpHandle::RunImpl() {
PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
buffer, buffer, numel, static_cast<ncclDataType_t>(dtype),
ncclSum, comm, stream));
if (!nccl_ctxs_->need_group_call_) cudaStreamSynchronize(stream);
});
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册