From 3635af627429daddf1c1f365280f05d5a5696138 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 21 Oct 2020 18:07:43 +0800 Subject: [PATCH] style(atlas): add comment for async d2d GitOrigin-RevId: 606a56ac4ecc7a3b76f2c10a93eef10fe330564a --- dnn/src/atlas/megcore/computing_context.cpp | 1 + src/core/impl/comp_node/atlas/comp_node.cpp | 6 +----- src/opr/impl/atlas_runtime_op.cpp | 1 - 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/dnn/src/atlas/megcore/computing_context.cpp b/dnn/src/atlas/megcore/computing_context.cpp index 69b4d3b62..7bda2c91b 100644 --- a/dnn/src/atlas/megcore/computing_context.cpp +++ b/dnn/src/atlas/megcore/computing_context.cpp @@ -51,6 +51,7 @@ void AtlasComputingContext::memcpy(void* dst, const void* src, ACL_MEMCPY_HOST_TO_DEVICE)); break; case megcoreMemcpyDeviceToDevice: + // async d2d is always faster than sync d2d because of SDMA acl_check(aclrtMemcpyAsync(dst, size_in_bytes, src, size_in_bytes, ACL_MEMCPY_DEVICE_TO_DEVICE, m_ctx.stream)); break; diff --git a/src/core/impl/comp_node/atlas/comp_node.cpp b/src/core/impl/comp_node/atlas/comp_node.cpp index 1f64bf0eb..f0d5fb1b1 100644 --- a/src/core/impl/comp_node/atlas/comp_node.cpp +++ b/src/core/impl/comp_node/atlas/comp_node.cpp @@ -230,14 +230,10 @@ void AtlasCompNodeImpl::peer_copy_to(Impl* dest_impl, void* dest, auto&& src_env = m_env.atlas_env(); activate(); if (dst_env.device == src_env.device) { -#if 1 + // async d2d use SDMA which is faster than sync ctrl cpu d2d MGB_ATLAS_CHECK(aclrtMemcpyAsync(dest, size, src, size, ACL_MEMCPY_DEVICE_TO_DEVICE, dst_env.stream)); -#else - MGB_ATLAS_CHECK(aclrtMemcpy(dest, size, src, size, - ACL_MEMCPY_DEVICE_TO_DEVICE)); -#endif } else { mgb_throw(MegBrainError, "Atlas does not support peer copy between differents " diff --git a/src/opr/impl/atlas_runtime_op.cpp b/src/opr/impl/atlas_runtime_op.cpp index 051a2c8f2..7853545a2 100644 --- a/src/opr/impl/atlas_runtime_op.cpp +++ b/src/opr/impl/atlas_runtime_op.cpp @@ -361,7 +361,6 @@ void AtlasRuntimeOpr::scn_do_execute() { i, output(i)->cname()); aclmdlAddDatasetBuffer(model_outputs, output_db); } - MGB_ATLAS_CHECK(aclmdlExecute(m_model_id, model_inputs, model_outputs)); for (size_t i = 0; i < nr_inputs; ++i) { -- GitLab