提交 3635af62 编写于 作者: M Megvii Engine Team

style(atlas): add comment for async d2d

GitOrigin-RevId: 606a56ac4ecc7a3b76f2c10a93eef10fe330564a
上级 d68d4d1d
......@@ -51,6 +51,7 @@ void AtlasComputingContext::memcpy(void* dst, const void* src,
ACL_MEMCPY_HOST_TO_DEVICE));
break;
case megcoreMemcpyDeviceToDevice:
// async d2d is always faster than sync d2d because of SDMA
acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes,
ACL_MEMCPY_DEVICE_TO_DEVICE, m_ctx.stream));
break;
......
......@@ -230,14 +230,10 @@ void AtlasCompNodeImpl::peer_copy_to(Impl* dest_impl, void* dest,
auto&& src_env = m_env.atlas_env();
activate();
if (dst_env.device == src_env.device) {
#if 1
// async d2d use SDMA which is faster than sync ctrl cpu d2d
MGB_ATLAS_CHECK(aclrtMemcpyAsync(dest, size, src, size,
ACL_MEMCPY_DEVICE_TO_DEVICE,
dst_env.stream));
#else
MGB_ATLAS_CHECK(aclrtMemcpy(dest, size, src, size,
ACL_MEMCPY_DEVICE_TO_DEVICE));
#endif
} else {
mgb_throw(MegBrainError,
"Atlas does not support peer copy between differents "
......
......@@ -361,7 +361,6 @@ void AtlasRuntimeOpr::scn_do_execute() {
i, output(i)->cname());
aclmdlAddDatasetBuffer(model_outputs, output_db);
}
MGB_ATLAS_CHECK(aclmdlExecute(m_model_id, model_inputs, model_outputs));
for (size_t i = 0; i < nr_inputs; ++i) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册