diff --git a/CMakeLists.txt b/CMakeLists.txt index 5437f9ff1fa6d865b4f37118ddedbe62d101d3b9..76223fd08febdfca13fe64860fb6391be24cc3c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -513,6 +513,7 @@ if(MGE_WITH_ATLAS) set(MGB_ATLAS ${MGE_WITH_ATLAS}) endif() + find_program(CCACHE_BIN ccache) if(CCACHE_BIN) set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN}) @@ -688,6 +689,7 @@ if(MGE_ARCH STREQUAL "aarch64") set(MEGDNN_AARCH64 1) set(MEGDNN_64_BIT 1) set(MARCH "-march=armv8-a") + set(MGB_AARCH64 1) if(MGE_ARMV8_2_FEATURE_FP16) message(STATUS "Enable fp16 feature support in armv8.2") if(NOT ${MGE_DISABLE_FLOAT16}) diff --git a/src/core/impl/comp_node/comp_node.cpp b/src/core/impl/comp_node/comp_node.cpp index 34a3ee8c0b6d96dfa6ab96d343c416d31c40051d..907ccda2d57f6a8397a369acdea0bb16bbb3a67a 100644 --- a/src/core/impl/comp_node/comp_node.cpp +++ b/src/core/impl/comp_node/comp_node.cpp @@ -177,11 +177,11 @@ CompNode::Locator CompNode::Locator::parse(const std::string &id) { dev_type = DeviceType::CAMBRICON; ptr += 9; } else if (ptr[0] == 'm') { - if (strncmp(ptr, "multithread", 11)) { - err(); - } - dev_type = DeviceType::MULTITHREAD; - ptr += 11; + if (strncmp(ptr, "multithread", 11)) { + err(); + } + dev_type = DeviceType::MULTITHREAD; + ptr += 11; } else { if (ptr[1] != 'p' || ptr[2] != 'u') { err(); diff --git a/src/core/impl/comp_node_env.cpp b/src/core/impl/comp_node_env.cpp index c652028810b22594ead6b9333a5f4864b140ff90..6dbe629e41950f1d6f90d17a7eaf39f5b5b70602 100644 --- a/src/core/impl/comp_node_env.cpp +++ b/src/core/impl/comp_node_env.cpp @@ -35,6 +35,7 @@ #include "megcore_atlas.h" #endif + using namespace mgb; /* =================== MegDNNHandle =================== */ @@ -101,6 +102,7 @@ MegDNNHandle::MegDNNHandle(const CompNodeEnv& env) { } #endif + if (env.property().type == CompNode::DeviceType::CPU) { megcoreCreateDeviceHandle(&m_dev_hdl, megcorePlatformCPU); megcoreCreateComputingHandleWithCPUDispatcher(&m_comp_hdl, m_dev_hdl, @@ -254,6 +256,7 @@ void CompNodeEnv::init_atlas(CompNode comp_node, const AtlasEnv& env) { #endif + #if MGB_ROCM void mgb::_on_hip_error(const char* expr, hipError_t err, const char* file, diff --git a/src/core/impl/exception.cpp b/src/core/impl/exception.cpp index 26d2c4c40feb4d606b3c836006a4291b6492bbd0..6e2efd3219d52849258207dc63e37204143a9d6e 100644 --- a/src/core/impl/exception.cpp +++ b/src/core/impl/exception.cpp @@ -77,6 +77,7 @@ AtlasError::AtlasError(const std::string &msg): } + ROCmError::ROCmError(const std::string &msg): SystemError(msg) { diff --git a/src/core/impl/graph/var_node_mem_mgr.cpp b/src/core/impl/graph/var_node_mem_mgr.cpp index 8b612c9ddd0e4d06a7e93b09b34969f1b90c8b19..235ef1c121d55b74a78c5430e0360ec08e10a980 100644 --- a/src/core/impl/graph/var_node_mem_mgr.cpp +++ b/src/core/impl/graph/var_node_mem_mgr.cpp @@ -125,7 +125,7 @@ StaticDeviceMemoryManager::make_default_impl() { #endif // MGB_THREAD_SAFE /* ==================== AsyncVarReleaser ==================== */ -#if MGB_CUDA || MGB_ATLAS +#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON class VarNodeMemManager::AsyncVarReleaser { struct WaiterParam { CompNode cn; @@ -245,18 +245,18 @@ bool VarNodeMemManager::ImpureMemPlanManager::check_need_realloc() { } /* ==================== VarNodeMemManager ==================== */ -VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph): - m_owner_graph(graph), - m_seq_mem_opt(graph) -#if MGB_CUDA || MGB_ATLAS - ,m_asyn_var_releaser(new AsyncVarReleaser) +VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl* graph) + : m_owner_graph(graph), + m_seq_mem_opt(graph) +#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON + ,m_asyn_var_releaser(new AsyncVarReleaser) #endif { auto on_comp_seq_finish = [this](const event::CompSeqExecFinished& ev) { MGB_MARK_USED_VAR(ev); // async release is only used for sync between multiple comp nodes, and // does not wait for device to finish -#if MGB_CUDA || MGB_ATLAS +#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON m_asyn_var_releaser->wait_release_finish(); #endif m_cpu_async_release_barrier.wait_zero(); @@ -297,7 +297,8 @@ VarNodeMemManager::VarNodeMemManager(ComputingGraphImpl *graph): graph->event().register_receiver_permanent( on_comp_seq_error); -#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && (MGB_CUDA || MGB_ATLAS) +#if MGB_ENABLE_VAR_DEV_MEM_DEFRAGMENTER && \ + (MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON ) auto on_mem_defrag_start = [this](const event::BeforeMemDefrag&) { m_asyn_var_releaser->wait_release_finish(); }; @@ -1448,6 +1449,13 @@ void VarNodeMemManager::decr_var_mem_refcnt( m_asyn_var_releaser->add(dispatch_cn, var); break; } +#endif +#if MGB_CAMBRICON + case DT::CAMBRICON: + { + m_asyn_var_releaser->add(dispatch_cn, var); + break; + } #endif default: mgb_throw(MegBrainError, diff --git a/src/core/impl/graph/var_node_mem_mgr.h b/src/core/impl/graph/var_node_mem_mgr.h index 271ab17be416379f999d90c8170e159c2245fc25..5eaea8578147e5b2f0f0d391a5c2dc85c4152e6f 100644 --- a/src/core/impl/graph/var_node_mem_mgr.h +++ b/src/core/impl/graph/var_node_mem_mgr.h @@ -446,7 +446,7 @@ class VarNodeMemManager { SyncableCounter m_cpu_async_release_barrier; -#if MGB_CUDA || MGB_ATLAS +#if MGB_CUDA || MGB_ATLAS || MGB_CAMBRICON //! release dynamic var on after compnode event finishes class AsyncVarReleaser; std::unique_ptr m_asyn_var_releaser; diff --git a/src/core/include/megbrain/comp_node_env.h b/src/core/include/megbrain/comp_node_env.h index ca6308a0aff46e2efb003c1b5d04344a8838bbb8..1173c05b1549b1e94a1c546f82344b49361ee3ed 100644 --- a/src/core/include/megbrain/comp_node_env.h +++ b/src/core/include/megbrain/comp_node_env.h @@ -90,6 +90,7 @@ #endif // MGB_ATLAS + #if MGB_ROCM #include "hcc_detail/hcc_defs_prologue.h" #include "megcore_rocm.h" @@ -194,6 +195,7 @@ namespace mgb { const char* file, const char* func, int line); #endif + #if MGB_CUDA [[noreturn]] void _on_cuda_error(const char* expr, cudaError_t err, const char* file, const char* func, int line); @@ -325,6 +327,7 @@ public: } #endif + } /*! @@ -426,6 +429,8 @@ public: void init_atlas(CompNode comp_node, const AtlasEnv& env); #endif + + #if MGB_ROCM struct ROCmEnv { int device = -1; @@ -485,9 +490,7 @@ public: }; static InitStatus init_status; - static void init() { - init_status.init(); - } + static void init() { init_status.init(); } void activate() const { init(); diff --git a/src/core/test/comp_node.cpp b/src/core/test/comp_node.cpp index b4420804f661299f0502cd36c68b571c30e04d79..41a0751cc73986961dd2af8d3718c2b98805cd95 100644 --- a/src/core/test/comp_node.cpp +++ b/src/core/test/comp_node.cpp @@ -62,6 +62,7 @@ TEST(TestCompNode, Parse) { ASSERT_EQ(L::parse("multithread:default:2"), make_lc(D::MULTITHREAD, L::DEVICE_MULTITHREAD_DEFAULT, 2)); + ASSERT_THROW(L::parse("apu"), MegBrainError); ASSERT_THROW(L::parse("fpgbx"), MegBrainError); ASSERT_THROW(L::parse("cab0"), MegBrainError); @@ -149,6 +150,7 @@ TEST(TestCompNode, Load) { auto atlas1 = CompNode::load("atlas1"); ASSERT_NE(atlas0, atlas1); #endif + } TEST(TestCompNode, FreeAfterFinalize) { @@ -762,6 +764,7 @@ TEST(TestCompNodeAtlas, D2DCopy) { } #endif + namespace { class CompNodeDepedentObjectInst final : public CompNodeDepedentObject { int *m_dst, *m_timer; diff --git a/src/megbrain_build_config.h.in b/src/megbrain_build_config.h.in index 6caf1c08f80679527b9ee2ef545029ddec81efe1..0db1250fbec1ff4f68d8fb4419854fd8e284e407 100644 --- a/src/megbrain_build_config.h.in +++ b/src/megbrain_build_config.h.in @@ -33,7 +33,6 @@ #cmakedefine01 MGB_ENABLE_OPR_MM #cmakedefine01 MGB_ENABLE_FBS_SERIALIZATION #cmakedefine01 MGB_IS_DEV - // DNN related flags // Platform macro's #cmakedefine01 MEGDNN_WITH_CUDA