diff --git a/CMakeLists.txt b/CMakeLists.txt index 45005245a16576f398dbe557fa33df0af48fbaaf..e92aa84b6404bf23e0a884eae7933c71fa10e66c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -798,10 +798,15 @@ endif() if(MGE_WITH_CAMBRICON) include_directories("$ENV{NEUWARE_HOME}/include") link_directories("$ENV{NEUWARE_HOME}/lib64") - include(cmake/cnrt.cmake) - include(cmake/cndev.cmake) - include(cmake/cnml.cmake) - list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml) + if (BANG_ARCH LESS 300) + include(cmake/cnml.cmake) + list(APPEND MGE_CAMBRICON_LIBS libcnml) + else() + include(cmake/cnnl.cmake) + include(cmake/cnlight.cmake) + include(cmake/magicmind.cmake) + list(APPEND MGE_CAMBRICON_LIBS libcnnl libcnnl_extra libcnlight libmagicmind libmagicmid_runtime) + endif() set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}") endif() diff --git a/cmake/cnlight.cmake b/cmake/cnlight.cmake new file mode 100644 index 0000000000000000000000000000000000000000..b32bb9ec0c18dc314ab1f8b6377d23a430a721e5 --- /dev/null +++ b/cmake/cnlight.cmake @@ -0,0 +1,40 @@ +find_library(CNLIGHT_LIBRARY + NAMES libcnlight.so + PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} + HINTS ${ALTER_LIBRARY_PATHS} + PATH_SUFFIXES lib lib64 + DOC "CNLIGHT library." ) + +if(CNLIGHT_LIBRARY STREQUAL "CNLIGHT_LIBRARY-NOTFOUND") + message(FATAL_ERROR "Can not find CNLIGHT Library") +endif() + +get_filename_component(__found_cnlight_root "${CNLIGHT_LIBRARY}/../include" REALPATH) +find_path(CNLIGHT_INCLUDE_DIR + NAMES cnlight.h + HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnlight_root} + PATH_SUFFIXES include + DOC "Path to CNLIGHT include directory." ) + +if(CNLIGHT_INCLUDE_DIR STREQUAL "CNLIGHT_INCLUDE_DIR-NOTFOUND") + message(FATAL_ERROR "Can not find CNLIGHT Library") +endif() + +file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MAJOR REGEX "^#define CNLIGHT_MAJOR_VERSION [0-9]+.*$") +file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_MINOR REGEX "^#define CNLIGHT_MINOR_VERSION [0-9]+.*$") +file(STRINGS "${CNLIGHT_INCLUDE_DIR}/cnlight.h" CNLIGHT_PATCH REGEX "^#define CNLIGHT_PATCH_VERSION [0-9]+.*$") + +string(REGEX REPLACE "^#define CNLIGHT_MAJOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MAJOR "${CNLIGHT_MAJOR}") +string(REGEX REPLACE "^#define CNLIGHT_MINOR_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_MINOR "${CNLIGHT_MINOR}") +string(REGEX REPLACE "^#define CNLIGHT_PATCH_VERSION ([0-9]+).*$" "\\1" CNLIGHT_VERSION_PATCH "${CNLIGHT_PATCH}") +set(CNLIGHT_VERSION_STRING "${CNLIGHT_VERSION_MAJOR}.${CNLIGHT_VERSION_MINOR}.${CNLIGHT_VERSION_PATCH}") + +add_library(libcnlight SHARED IMPORTED) + +set_target_properties(libcnlight PROPERTIES + IMPORTED_LOCATION ${CNLIGHT_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${CNLIGHT_INCLUDE_DIR} +) + +message(STATUS "Found CNLIGHT: ${__found_cnlight_root} (found version: ${CNLIGHT_VERSION_STRING})") + diff --git a/cmake/cnnl.cmake b/cmake/cnnl.cmake new file mode 100644 index 0000000000000000000000000000000000000000..9797a78e97182c56e51a3e8154ea9a1b4520bc8e --- /dev/null +++ b/cmake/cnnl.cmake @@ -0,0 +1,80 @@ +find_library(CNNL_LIBRARY + NAMES libcnnl.so + PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} + HINTS ${ALTER_LIBRARY_PATHS} + PATH_SUFFIXES lib lib64 + DOC "CNNL library." ) + +if(CNNL_LIBRARY STREQUAL "CNNL_LIBRARY-NOTFOUND") + message(FATAL_ERROR "Can not find CNNL Library") +endif() + +get_filename_component(__found_cnnl_root "${CNNL_LIBRARY}/../include" REALPATH) +find_path(CNNL_INCLUDE_DIR + NAMES cnnl.h + HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_root} + PATH_SUFFIXES include + DOC "Path to CNNL include directory." ) + +if(CNNL_INCLUDE_DIR STREQUAL "CNNL_INCLUDE_DIR-NOTFOUND") + message(FATAL_ERROR "Can not find CNNL Library") +endif() + +file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MAJOR REGEX "^#define CNNL_MAJOR [0-9]+.*$") +file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_MINOR REGEX "^#define CNNL_MINOR [0-9]+.*$") +file(STRINGS "${CNNL_INCLUDE_DIR}/cnnl.h" CNNL_PATCH REGEX "^#define CNNL_PATCHLEVEL [0-9]+.*$") + +string(REGEX REPLACE "^#define CNNL_MAJOR ([0-9]+).*$" "\\1" CNNL_VERSION_MAJOR "${CNNL_MAJOR}") +string(REGEX REPLACE "^#define CNNL_MINOR ([0-9]+).*$" "\\1" CNNL_VERSION_MINOR "${CNNL_MINOR}") +string(REGEX REPLACE "^#define CNNL_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_VERSION_PATCH "${CNNL_PATCH}") +set(CNNL_VERSION_STRING "${CNNL_VERSION_MAJOR}.${CNNL_VERSION_MINOR}.${CNNL_VERSION_PATCH}") + +add_library(libcnnl SHARED IMPORTED) + +set_target_properties(libcnnl PROPERTIES + IMPORTED_LOCATION ${CNNL_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${CNNL_INCLUDE_DIR} +) + +message(STATUS "Found CNNL: ${__found_cnnl_root} (found version: ${CNNL_VERSION_STRING})") + +find_library(CNNL_EXTRA_LIBRARY + NAMES libcnnl_extra.so + PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} + HINTS ${ALTER_LIBRARY_PATHS} + PATH_SUFFIXES lib lib64 + DOC "CNNL_EXTRA library." ) + +if(CNNL_EXTRA_LIBRARY STREQUAL "CNNL_EXTRA_LIBRARY-NOTFOUND") + message(FATAL_ERROR "Can not find CNNL_EXTRA Library") +endif() + +get_filename_component(__found_cnnl_extra_root "${CNNL_EXTRA_LIBRARY}/../include" REALPATH) +find_path(CNNL_EXTRA_INCLUDE_DIR + NAMES cnnl_extra.h + HINTS "$ENV{NEUWARE_HOME}/include" ${__found_cnnl_extra_root} + PATH_SUFFIXES include + DOC "Path to CNNL_EXTRA include directory." ) + +if(CNNL_EXTRA_INCLUDE_DIR STREQUAL "CNNL_EXTRA_INCLUDE_DIR-NOTFOUND") + message(FATAL_ERROR "Can not find CNNL_EXTRA Library") +endif() + +file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MAJOR REGEX "^#define CNNL_EXTRA_MAJOR [0-9]+.*$") +file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_MINOR REGEX "^#define CNNL_EXTRA_MINOR [0-9]+.*$") +file(STRINGS "${CNNL_EXTRA_INCLUDE_DIR}/cnnl_extra.h" CNNL_EXTRA_PATCH REGEX "^#define CNNL_EXTRA_PATCHLEVEL [0-9]+.*$") + +string(REGEX REPLACE "^#define CNNL_EXTRA_MAJOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MAJOR "${CNNL_EXTRA_MAJOR}") +string(REGEX REPLACE "^#define CNNL_EXTRA_MINOR ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_MINOR "${CNNL_EXTRA_MINOR}") +string(REGEX REPLACE "^#define CNNL_EXTRA_PATCHLEVEL ([0-9]+).*$" "\\1" CNNL_EXTRA_VERSION_PATCH "${CNNL_EXTRA_PATCH}") +set(CNNL_EXTRA_VERSION_STRING "${CNNL_EXTRA_VERSION_MAJOR}.${CNNL_EXTRA_VERSION_MINOR}.${CNNL_EXTRA_VERSION_PATCH}") + +add_library(libcnnl_extra SHARED IMPORTED) + +set_target_properties(libcnnl_extra PROPERTIES + IMPORTED_LOCATION ${CNNL_EXTRA_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${CNNL_EXTRA_INCLUDE_DIR} +) + +message(STATUS "Found CNNL_EXTRA: ${__found_cnnl_extra_root} (found version: ${CNNL_EXTRA_VERSION_STRING})") + diff --git a/cmake/magicmind.cmake b/cmake/magicmind.cmake new file mode 100644 index 0000000000000000000000000000000000000000..c0cf8398213178b43b1ad50233f3cbc6bf7d7d80 --- /dev/null +++ b/cmake/magicmind.cmake @@ -0,0 +1,54 @@ +find_library(MAGICMIND_LIBRARY + NAMES libmagicmind.so + PATHS ${ALTER_LD_LIBRARY_PATHS} "$ENV{NEUWARE_HOME}/lib64" ${CMAKE_INSTALL_PREFIX} + HINTS ${ALTER_LIBRARY_PATHS} + PATH_SUFFIXES lib lib64 + DOC "MAGICMIND library." ) + +if(MAGICMIND_LIBRARY STREQUAL "MAGICMIND_LIBRARY-NOTFOUND") + message(FATAL_ERROR "Can not find MAGICMIND Library") +endif() + +get_filename_component(__found_magicmind_root "${MAGICMIND_LIBRARY}/../include" REALPATH) +find_path(MAGICMIND_INCLUDE_DIR + NAMES common.h + HINTS "$ENV{NEUWARE_HOME}/include" ${__found_magicmind_root} + PATH_SUFFIXES include + DOC "Path to MAGICMIND include directory." ) + +if(MAGICMIND_INCLUDE_DIR STREQUAL "MAGICMIND_INCLUDE_DIR-NOTFOUND") + message(FATAL_ERROR "Can not find MAGICMIND Library") +endif() + +file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MAJOR REGEX "^#define MM_MAJOR_VERSION [0-9]+.*$") +file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_MINOR REGEX "^#define MM_MINOR_VERSION [0-9]+.*$") +file(STRINGS "${MAGICMIND_INCLUDE_DIR}/common.h" MAGICMIND_PATCH REGEX "^#define MM_PATCH_VERSION [0-9]+.*$") + +string(REGEX REPLACE "^#define MM_MAJOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MAJOR "${MAGICMIND_MAJOR}") +string(REGEX REPLACE "^#define MM_MINOR_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_MINOR "${MAGICMIND_MINOR}") +string(REGEX REPLACE "^#define MM_PATCH_VERSION ([0-9]+).*$" "\\1" MAGICMIND_VERSION_PATCH "${MAGICMIND_PATCH}") +set(MAGICMIND_VERSION_STRING "${MAGICMIND_VERSION_MAJOR}.${MAGICMIND_VERSION_MINOR}.${MAGICMIND_VERSION_PATCH}") + +add_library(libmagicmind SHARED IMPORTED) + +set_target_properties(libmagicmind PROPERTIES + IMPORTED_LOCATION ${MAGICMIND_LIBRARY} + INTERFACE_INCLUDE_DIRECTORIES ${MAGICMIND_INCLUDE_DIR} +) + +message(STATUS "Found MAGICMIND: ${__found_magicmind_root} (found version: ${MAGICMIND_VERSION_STRING})") + +find_library(MAGICMIND_RUNTIME_LIBRARY + NAMES libmagicmind_runtime.so + PATHS "${__found_magicmind_root}/../lib64" + ) + +if(MAGICMIND_RUNTIME_LIBRARY STREQUAL "MAGICMIND_RUNTIME_LIBRARY-NOTFOUND") + message(FATAL_ERROR "Can not find MAGICMIND_RUNTIME Library") +else() + message(STATUS "Found MAGICMIND_RUNTIME: ${MAGICMIND_RUNTIME_LIBRARY}") +endif() +add_library(libmagicmind_runtime SHARED IMPORTED) +set_target_properties(libmagicmind_runtime PROPERTIES + IMPORTED_LOCATION ${MAGICMIND_RUNTIME_LIBRARY} +) diff --git a/src/cambricon/impl/magicmind_runtime_opr.cpp b/src/cambricon/impl/magicmind_runtime_opr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9cd9174496b952811b5572d818153b2743ed0a52 --- /dev/null +++ b/src/cambricon/impl/magicmind_runtime_opr.cpp @@ -0,0 +1,397 @@ +/** + * \file src/cambricon/impl/magicmind_runtime_opr.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "megbrain/cambricon/magicmind_runtime_opr.h" +#include +#include "megbrain/common.h" +#include "megbrain/comp_node_env.h" + +#if MGB_CAMBRICON + +using namespace mgb; +using namespace opr; +using namespace magicmind; + +#define MM_CHECK(stmt) \ + do { \ + auto ret = (stmt); \ + if (ret != magicmind::Status::OK()) { \ + std::ostringstream msg; \ + msg << ret; \ + mgb_throw(MegBrainError, "mm failure(extra msg:%s)", msg.str().c_str()); \ + } \ + } while (0) + +namespace { +Dims mgb_shape_to_mm_dims(TensorShape mgb_shp) { + size_t ndim = mgb_shp.ndim; + std::vector dimensions(ndim); + for (size_t i = 0; i < ndim; ++i) { + dimensions[i] = mgb_shp[i]; + } + return Dims{dimensions}; +} +TensorShape mm_dims_to_mgb_shape(const Dims& dims) { + TensorShape ret; + ret.ndim = dims.GetDimsNum(); + auto&& dimensions = dims.GetDims(); + for (size_t i = 0; i < ret.ndim; ++i) { + ret[i] = dimensions[i]; + } + return ret; +} +DType mm_dtype_to_mgb_dtype(DataType data_type) { + switch (data_type) { + case DataType::FLOAT16: +#if !MEGDNN_DISABLE_FLOAT16 + return dtype::Float16(); +#else + mgb_throw(MegBrainError, "Float16 support is disabled at compile time."); +#endif + case DataType::FLOAT32: + return dtype::Float32(); + case DataType::INT8: + return dtype::QuantizedS8(1.f); + case DataType::INT16: + return dtype::Int16(); + case DataType::INT32: + return dtype::Int32(); + case DataType::UINT8: + return dtype::Uint8(); + //! TODO: check scale + case DataType::QINT8: + return dtype::QuantizedS8(1.f); + case DataType::INT4: + return dtype::QuantizedS4(1.f); + case DataType::UINT4: + return dtype::Quantized4Asymm(1.f, static_cast(8)); + default: + mgb_throw( + MegBrainError, "DataType %u is not supported by MegEngine.", + static_cast(data_type)); + } +} +DataType mgb_dtype_to_mm_dtype(DType data_type) { + switch (data_type.enumv()) { +#if !MEGDNN_DISABLE_FLOAT16 + case DTypeEnum::Float16: + return DataType::FLOAT16; +#endif + case DTypeEnum::Float32: + return DataType::FLOAT32; + case DTypeEnum::QuantizedS8: + return DataType::QINT8; + case DTypeEnum::Int8: + return DataType::INT8; + case DTypeEnum::Int32: + return DataType::INT32; + case DTypeEnum::Uint8: + return DataType::UINT8; + case DTypeEnum::QuantizedS4: + return DataType::INT4; + case DTypeEnum::Quantized4Asymm: + return DataType::UINT4; + default: + mgb_throw( + MegBrainError, + "megengine data type %s is not supported by magicmind.", + data_type.name()); + } +} +}; // namespace + +/* =========== MagicMindRuntimeOpr::CambriconAllocator =========== */ +class MagicMindRuntimeOpr::CambriconAllocator final : public IAllocator { + CompNode m_cn; + std::mutex m_ptr2size_mtx; + ThinHashMap m_ptr2size; + +public: + explicit CambriconAllocator(CompNode cn); + ~CambriconAllocator() noexcept; + + void* AllocateRaw(size_t size, size_t alignment) override; + void DeallocateRaw(void* ptr) override; + + CompNode comp_node() const { return m_cn; } +}; + +MagicMindRuntimeOpr::CambriconAllocator::CambriconAllocator(CompNode cn) : m_cn{cn} { + mgb_assert( + cn.device_type() == CompNode::DeviceType::CAMBRICON, + "invalid comp node %s for CambriconAllocator", cn.to_string().c_str()); +} + +MagicMindRuntimeOpr::CambriconAllocator::~CambriconAllocator() noexcept { + MGB_LOCK_GUARD(m_ptr2size_mtx); + if (!m_ptr2size.empty()) { + std::string msg{"there are unreleased magicmind mem buffers:\n"}; + for (auto&& i : m_ptr2size) { + msg.append(ssprintf(" %p: %zu\n", i.first, i.second)); + } + mgb_log_error("%sabort now", msg.c_str()); + mgb_trap(); + } +} + +void* MagicMindRuntimeOpr::CambriconAllocator::AllocateRaw( + size_t size, size_t alignment) { + static bool enable_log = getenv("MGE_LOG_MAGICMIND_MEM_ALLOC"); + mgb_assert(!(alignment & (alignment - 1)), "invalid alignment(%zu)", alignment); + auto ret = m_cn.alloc_device(size); + mgb_assert( + !(reinterpret_cast(ret) & (alignment - 1)), + "alignment not required(ptr:%p,alignment:%zu)", ret, alignment); + if (enable_log) { + mgb_log("magicmind mem alloc on %s: size=%zu, align=%zu, ptr=%p", + m_cn.to_string().c_str(), size, alignment, ret); + } + { + MGB_LOCK_GUARD(m_ptr2size_mtx); + m_ptr2size[ret] = size; + } + return ret; +} + +void MagicMindRuntimeOpr::CambriconAllocator::DeallocateRaw(void* ptr) { + { + auto iter = m_ptr2size.find(ptr); + mgb_assert(iter != m_ptr2size.end(), "ptr %p not found", ptr); + m_ptr2size.erase(iter); + } + m_cn.free_device(ptr); +} + +/* ====================== MagicMindRuntimeOpr ==================== */ +MGB_DYN_TYPE_OBJ_FINAL_IMPL(MagicMindRuntimeOpr); +MagicMindRuntimeOpr::MagicMindRuntimeOpr( + IModelPtr model, CambriconAllocatorPtr allocator, const VarNodeArray& inputs, + const OperatorNodeConfig& config) + : Super(inputs[0]->owner_graph(), config, "magic_runtime", inputs), + m_allocator{std::move(allocator)}, + m_context{nullptr}, + m_engine{nullptr}, + m_model{std::move(model)} { + mgb_assert( + inputs[0]->comp_node().device_type() == CompNode::DeviceType::CAMBRICON, + "MagicMindRuntimeOpr can only be used on cambricon comp node; " + "got %s", + inputs[0]->comp_node().to_string().c_str()); + size_t nr_inputs = m_model->GetInputNum(); + mgb_assert( + nr_inputs == inputs.size(), "input number mismatch(got:%zu,expected:%zu)", + inputs.size(), nr_inputs); + for (auto i : inputs) { + add_input({i}); + } + size_t nr_outputs = m_model->GetOutputNum(); + for (size_t i = 0; i < nr_outputs; ++i) { + add_output(m_model->GetOutputName(i)); + } + IModel::EngineConfig engine_config; + engine_config.device_type = "MLU"; + engine_config.allocator = m_allocator.get(); + auto&& cnrt_env = CompNodeEnv::from_comp_node(m_allocator->comp_node()).cnrt_env(); + cnrt_env.activate(); + m_engine = { + m_model->CreateIEngine(engine_config), + magicmind_intl::MagicMindDeleter()}; + mgb_assert( + m_engine != nullptr, + "create IEngine failed, corresponding MagicMindRuntimeOpr(%s)", cname()); + cg::add_workspace_output(this); + add_equivalence_component>(m_model.get()); +}; + +void MagicMindRuntimeOpr::scn_do_execute() { + mgb_assert(m_engine != nullptr); + mgb_assert(m_context != nullptr); + auto&& cnrt_env = CompNodeEnv::from_comp_node(input(0)->comp_node()).cnrt_env(); + cnrt_env.activate(); + std::vector inputs, outputs; + MM_CHECK(CreateInputTensors(m_context.get(), &inputs)); + MM_CHECK(CreateInputTensors(m_context.get(), &outputs)); + size_t nr_inputs = input().size(); + mgb_assert(nr_inputs == inputs.size()); + for (size_t i = 0; i < nr_inputs; ++i) { + auto&& iname = m_model->GetInputName(i); + auto tensor = FindIRTTensorByName(inputs, iname); + mgb_assert( + tensor != nullptr, "failed to find input tensor(name:%s)", + iname.c_str()); + MM_CHECK(tensor->SetDimensions(mgb_shape_to_mm_dims(input(i)->shape()))); + MM_CHECK(tensor->SetData(input(i)->dev_tensor().raw_ptr())); + } + size_t nr_outputs = output().size(); + mgb_assert(nr_outputs == outputs.size() + 1); + for (size_t i = 0; i < nr_outputs - 1; ++i) { + auto&& oname = m_model->GetOutputName(i); + auto tensor = FindIRTTensorByName(outputs, oname); + mgb_assert( + tensor != nullptr, "failed to find output tensor(name:%s)", + oname.c_str()); + MM_CHECK(tensor->SetDimensions(mgb_shape_to_mm_dims(output(i)->shape()))); + MM_CHECK(tensor->SetData(output(i)->dev_tensor().raw_ptr())); + } + auto size = output().back()->dev_tensor().layout().span().dist_byte(); + MM_CHECK(m_context->SetWorkspace(output().back()->dev_tensor().raw_ptr(), size)); + MM_CHECK(m_context->Enqueue(inputs, outputs, cnrt_env.queue)); + for (auto&& i : inputs) { + i->SetData(nullptr); + i->Destroy(); + } + for (auto&& o : outputs) { + o->SetData(nullptr); + o->Destroy(); + } +} + +void MagicMindRuntimeOpr::get_output_var_shape( + const TensorShapeArray& inp_shape, TensorShapeArray& out_shape) const { + mgb_assert(m_engine != nullptr); + mgb_assert(input().size() == inp_shape.size()); + auto&& cnrt_env = CompNodeEnv::from_comp_node(input(0)->comp_node()).cnrt_env(); + cnrt_env.activate(); + if (m_context == nullptr) { + m_context = { + m_engine->CreateIContext(), + magicmind_intl::MagicMindDeleter()}; + mgb_assert( + m_context != nullptr, + "failed to create IContext, corresponding MagicMindRuntimeOpr(%s)", + cname()); + } + std::vector inputs, outputs; + MM_CHECK(CreateInputTensors(m_context.get(), &inputs)); + MM_CHECK(CreateInputTensors(m_context.get(), &outputs)); + size_t nr_inputs = input().size(); + mgb_assert(nr_inputs == inputs.size()); + for (size_t i = 0; i < nr_inputs; ++i) { + auto&& iname = m_model->GetInputName(i); + auto tensor = FindIRTTensorByName(inputs, iname); + mgb_assert( + tensor != nullptr, "failed to find input tensor(name:%s)", + iname.c_str()); + MM_CHECK(tensor->SetDimensions(mgb_shape_to_mm_dims(input(i)->shape()))); + } + if (Status::OK() == m_context->InferOutputShape(inputs, outputs)) { + size_t nr_outputs = output().size(); + mgb_assert(nr_outputs == outputs.size() + 1); + for (size_t i = 0; i < nr_outputs - 1; ++i) { + auto&& oname = m_model->GetOutputName(i); + auto tensor = FindIRTTensorByName(outputs, oname); + mgb_assert( + tensor != nullptr, "failed to find output tensor(name:%s)", + oname.c_str()); + auto&& dims = tensor->GetDimensions(); + out_shape[i] = mm_dims_to_mgb_shape(dims); + } + std::vector shape(inp_shape.size()); + for (size_t i = 0; i < nr_inputs; ++i) { + shape[i] = mgb_shape_to_mm_dims(input(i)->shape()); + } + size_t wk_size = 0; + MM_CHECK(m_engine->QueryContextMaxWorkspaceSize(shape, &wk_size)); + out_shape.back() = {wk_size}; + } else { + mgb_assert( + false, "static shape infer for MagicMindRuntimeOpr(%s) failed", + cname()); + } + for (auto&& i : inputs) { + i->SetData(nullptr); + i->Destroy(); + } + for (auto&& o : outputs) { + o->SetData(nullptr); + o->Destroy(); + } +} + +void MagicMindRuntimeOpr::add_input_layout_constraint() { + //! default contiguous + for (auto i : input()) { + i->add_layout_constraint_contiguous(); + } +} + +void MagicMindRuntimeOpr::init_output_dtype() { + std::vector inp_dtypes = m_model->GetInputDataTypes(); + mgb_assert( + inp_dtypes.size() == input().size(), + "input size mismatch(got:%zu,expected:%zu)", inp_dtypes.size(), + input().size()); + size_t nr_inputs = input().size(); + for (size_t i = 0; i < nr_inputs; ++i) { + auto dt_mm = mm_dtype_to_mgb_dtype(inp_dtypes[i]); + auto dt_inp = input(i)->dtype(); + MGB_MARK_USED_VAR(dt_mm); + MGB_MARK_USED_VAR(dt_inp); + mgb_assert( + dt_mm.valid() && dt_inp.valid() && dt_mm.enumv() == dt_inp.enumv(), + "input %zu's data type mismatch with that in " + "IModel: expected %s, got %s", + i, dt_mm.name(), dt_inp.name()); + } + std::vector out_dtypes = m_model->GetOutputDataTypes(); + mgb_assert( + out_dtypes.size() == output().size(), + "output size mismatch(got:%zu,expected:%zu)", out_dtypes.size(), + output().size()); + size_t nr_outputs = output().size(); + for (size_t i = 0; i < nr_outputs; ++i) { + auto dt_mm = mm_dtype_to_mgb_dtype(out_dtypes[i]); + mgb_assert( + dt_mm.valid(), "output dtype checking failed: invalid dtype returned."); + if (dt_mm.enumv() == DTypeEnum::QuantizedS8) { + mgb_assert( + output(i)->dtype().valid(), + "user should specify scale of output tensor of " + "MagicMindRuntimeOpr."); + } + if (!output(i)->dtype().valid()) + output(i)->dtype(dt_mm); + } +} + +SymbolVarArray MagicMindRuntimeOpr::make( + IModelPtr model, CambriconAllocatorPtr allocator, const SymbolVarArray& src, + const OperatorNodeConfig& config) { + VarNodeArray var_node_array = cg::to_var_node_array(src); + auto magicmind_runtime_opr = std::make_unique( + std::move(model), std::move(allocator), var_node_array, config); + auto ret = cg::to_symbol_var_array( + src[0].node() + ->owner_graph() + ->insert_opr(std::move(magicmind_runtime_opr)) + ->output()); + ret.pop_back(); // remove workspace + return ret; +} + +SymbolVarArray MagicMindRuntimeOpr::make( + const void* buf, size_t size, const SymbolVarArray& src, + const OperatorNodeConfig& config) { + mgb_throw_if( + !CompNode::get_device_count(CompNode::DeviceType::CAMBRICON), SystemError, + "can not create MagicMindRuntimeOpr when MagicMind is not " + "available"); + auto cambricon_allocator = + std::make_shared(src[0].node()->comp_node()); + IModelPtr model = make_model_ptr(CreateIModel()); + model->DeserializeFromMemory(const_cast(buf), size); + return make(std::move(model), std::move(cambricon_allocator), src, config); +} +#undef MM_CHECK + +#endif // MGB_CAMBRICON + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/cambricon/impl/magicmind_runtime_opr.sereg.h b/src/cambricon/impl/magicmind_runtime_opr.sereg.h new file mode 100644 index 0000000000000000000000000000000000000000..6e9150290881627cb98538ce2a34c6424ec76505 --- /dev/null +++ b/src/cambricon/impl/magicmind_runtime_opr.sereg.h @@ -0,0 +1,77 @@ +/** + * \file src/cambricon/impl/magicmind_runtime_opr.sereg.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include +#include "megbrain/cambricon/magicmind_runtime_opr.h" +#include "megbrain/serialization/sereg.h" + +namespace mgb { +namespace serialization { + +#define MM_CHECK(stmt) \ + do { \ + auto ret = (stmt); \ + if (ret != magicmind::Status::OK()) { \ + std::ostringstream msg; \ + msg << ret; \ + mgb_throw(MegBrainError, "mm failure(extra msg:%s)", msg.str().c_str()); \ + } \ + } while (0) + +template <> +struct OprLoadDumpImpl { + static void dump(OprDumpContext& ctx, const cg::OperatorNodeBase& opr_) { + auto&& opr = opr_.cast_final_safe(); + auto&& model = opr.inference_model(); + size_t size = 0; + MM_CHECK(model->GetSerializedModelSize(&size)); + std::string buf; + buf.resize(size); + MM_CHECK(model->SerializeToMemory( + reinterpret_cast(buf.data()), buf.size())); + ctx.dump_buf_with_len(buf.data(), buf.size()); + } + static cg::OperatorNodeBase* load( + OprLoadContext& ctx, const cg::VarNodeArray& inputs, + const OperatorNodeConfig& config) { + auto buf = ctx.load_shared_buf_with_len(); + return opr::MagicMindRuntimeOpr::make( + reinterpret_cast(buf.data()), buf.size(), + cg::to_symbol_var_array(inputs), config) + .at(0) + .node() + ->owner_opr(); + } +}; +} // namespace serialization + +namespace opr { +cg::OperatorNodeBase* opr_shallow_copy_magicmind_runtime_opr( + const serialization::OprShallowCopyContext& ctx, + const cg::OperatorNodeBase& opr_, const VarNodeArray& inputs, + const OperatorNodeConfig& config) { + auto&& opr = opr_.cast_final_safe(); + return MagicMindRuntimeOpr::make( + opr.inference_model(), opr.cambricon_allocator(), + cg::to_symbol_var_array(inputs), config) + .at(0) + .node() + ->owner_opr(); +} + +MGB_SEREG_OPR(MagicMindRuntimeOpr, 0); +MGB_REG_OPR_SHALLOW_COPY(MagicMindRuntimeOpr, opr_shallow_copy_magicmind_runtime_opr); + +#undef MM_CHECK +} // namespace opr +} // namespace mgb + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/cambricon/include/megbrain/cambricon/magicmind_runtime_opr.h b/src/cambricon/include/megbrain/cambricon/magicmind_runtime_opr.h new file mode 100644 index 0000000000000000000000000000000000000000..c2f150a7afd1990b61afb724eae485b7fbe5e182 --- /dev/null +++ b/src/cambricon/include/megbrain/cambricon/magicmind_runtime_opr.h @@ -0,0 +1,92 @@ +/** + * \file src/cambricon/include/megbrain/cambricon/magicmind_runtime_opr.h + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#pragma once + +#include "megbrain/graph.h" +#include "megbrain/serialization/file.h" + +#if MGB_CAMBRICON + +#include "interface_runtime.h" + +namespace mgb { +namespace opr { +namespace magicmind_intl { +template +struct MagicMindDeleter { + void operator()(T* p) { + if (p != nullptr) + p->Destroy(); + } +}; + +template +using MagicMindUniquePtr = std::unique_ptr>; +} // namespace magicmind_intl + +MGB_DEFINE_OPR_CLASS( + MagicMindRuntimeOpr, cg::SingleCNOutshapePureByInshapeOprBase) // { + void scn_do_execute() override; + void get_output_var_shape( + const TensorShapeArray& inp_shape, + TensorShapeArray& out_shape) const override; + void add_input_layout_constraint() override; + void init_output_dtype() override; + +public: + template + using MagicMindUniquePtr = magicmind_intl::MagicMindUniquePtr; + //! Due to the requirement of shallow copy, the IModel should be shared among + //! instances of magicmind operators. + using IModelPtr = std::shared_ptr; + using IContextPtr = MagicMindUniquePtr; + using IEnginePtr = MagicMindUniquePtr; + class CambriconAllocator; + using CambriconAllocatorPtr = std::shared_ptr; + + MagicMindRuntimeOpr( + IModelPtr model, CambriconAllocatorPtr allocator, + const VarNodeArray& inputs, const OperatorNodeConfig& config); + + //! get underlying inference model + const IModelPtr& inference_model() const { return m_model; } + + //! get underlying cambricon allocator + const CambriconAllocatorPtr& cambricon_allocator() const { return m_allocator; } + + //! + static SymbolVarArray make( + IModelPtr model, CambriconAllocatorPtr allocator, const SymbolVarArray& src, + const OperatorNodeConfig& config); + + //! creator a magicmind runtime operator from a serialized memory buffer + static SymbolVarArray make( + const void* buf, size_t buf_size, const SymbolVarArray& src, + const OperatorNodeConfig& config = {}); + + static IModelPtr make_model_ptr(magicmind::IModel* model) { + return {model, magicmind_intl::MagicMindDeleter()}; + } + +private: + CambriconAllocatorPtr m_allocator; + mutable IContextPtr m_context; + IEnginePtr m_engine; + IModelPtr m_model; +}; + +} // namespace opr +} // namespace mgb + +#endif // MGB_CAMBRICON + +// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/cambricon/test/cambricon_runtime_opr.cpp b/src/cambricon/test/cambricon_runtime_opr.cpp index 4af23874746c1a3dff1cf755fcbbaf2bdc12b73c..0cfdc8d9fe020c4b1e824b10b69fdbe7928dc5c2 100644 --- a/src/cambricon/test/cambricon_runtime_opr.cpp +++ b/src/cambricon/test/cambricon_runtime_opr.cpp @@ -22,6 +22,7 @@ using namespace mgb; +#if CNRT_MAJOR_VERSION < 5 namespace { class CnmlModelContext { public: @@ -620,6 +621,7 @@ TEST(TestCambriconRuntimeOpr, CrossCNCopy) { MGB_ASSERT_TENSOR_NEAR(out_cnml, out_mgb, 1e-4); } +#endif #endif // vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/src/core/impl/comp_node/cambricon/comp_node.cpp b/src/core/impl/comp_node/cambricon/comp_node.cpp index b1a200e595cf7cbf1c0c94b75bc43a913f7b6b73..2b7217b837d9c850ce2c7df46cd50e3da30ddb77 100644 --- a/src/core/impl/comp_node/cambricon/comp_node.cpp +++ b/src/core/impl/comp_node/cambricon/comp_node.cpp @@ -193,11 +193,19 @@ public: std::pair get_mem_status_bytes() override { m_env.cnrt_env().activate(); cndevMemoryInfo_t mem_info; +#if CNRT_MAJOR_VERSION >= 5 + mem_info.version = CNDEV_VERSION_5; +#endif MGB_CNDEV_CHECK(cndevGetMemoryUsage(&mem_info, m_env.cnrt_env().device)); size_t tot, used, free; constexpr size_t mb2size = 1024 * 1024; +#if CNRT_MAJOR_VERSION >= 5 + tot = static_cast(mem_info.physicalMemoryTotal) * mb2size; + used = static_cast(mem_info.physicalMemoryUsed) * mb2size; +#else tot = static_cast(mem_info.PhysicalMemoryTotal) * mb2size; used = static_cast(mem_info.PhysicalMemoryUsed) * mb2size; +#endif free = tot - used + m_mem_alloc->get_free_memory_dev().tot; return {tot, free}; } @@ -417,10 +425,18 @@ size_t CambriconCompNodeImpl::DeviceInfo::get_mem_reserve_size() { } size_t tot, free; cndevMemoryInfo_t mem_info; +#if CNRT_MAJOR_VERSION >= 5 + mem_info.version = CNDEV_VERSION_5; +#endif MGB_CNDEV_CHECK(cndevGetMemoryUsage(&mem_info, dev_num)); constexpr size_t mb2size = 1024 * 1024; +#if CNRT_MAJOR_VERSION >= 5 + tot = static_cast(mem_info.physicalMemoryTotal) * mb2size; + size_t used = static_cast(mem_info.physicalMemoryUsed) * mb2size; +#else tot = static_cast(mem_info.PhysicalMemoryTotal) * mb2size; size_t used = static_cast(mem_info.PhysicalMemoryUsed) * mb2size; +#endif free = tot - used; return free - get_min_system_memory(free); } else { @@ -701,11 +717,19 @@ void mgb::mem_alloc::CambriconRawAlloctor::get_mem_info(size_t& free, size_t& to } mgb_assert(device >= 0, "current device has not been initialized in static data"); cndevMemoryInfo_t mem_info; +#if CNRT_MAJOR_VERSION >= 5 + mem_info.version = CNDEV_VERSION_5; +#endif auto ret = cndevGetMemoryUsage(&mem_info, device); if (ret == CNDEV_SUCCESS) { constexpr size_t mb2size = 1024 * 1024; +#if CNRT_MAJOR_VERSION >= 5 + tot = static_cast(mem_info.physicalMemoryTotal) * mb2size; + size_t used = static_cast(mem_info.physicalMemoryUsed) * mb2size; +#else tot = static_cast(mem_info.PhysicalMemoryTotal) * mb2size; size_t used = static_cast(mem_info.PhysicalMemoryUsed) * mb2size; +#endif free = tot - used; return; } diff --git a/src/core/impl/comp_node_env.cpp b/src/core/impl/comp_node_env.cpp index a0130662f3f65a797588ea92bf8eb4255eb02383..e981fb5bd535c15d1fbeedec0954936576580f1c 100644 --- a/src/core/impl/comp_node_env.cpp +++ b/src/core/impl/comp_node_env.cpp @@ -318,6 +318,7 @@ void CompNodeEnv::init_rocm_async( #endif #if MGB_CAMBRICON +#if CNRT_MAJOR_VERSION < 5 const char* mgb::cnml_get_error_string(cnmlStatus_t err) { switch (err) { #define cb(_err) \ @@ -341,6 +342,7 @@ const char* mgb::cnml_get_error_string(cnmlStatus_t err) { } return "Unknown CNML error"; } +#endif void mgb::_on_cnrt_error( const char* expr, cnrtRet_t err, const char* file, const char* func, int line) { @@ -357,6 +359,7 @@ void mgb::_on_cndev_error( cndevGetErrorString(err), expr, file, func, line); } +#if CNRT_MAJOR_VERSION < 5 void mgb::_on_cnml_error( const char* expr, cnmlStatus_t err, const char* file, const char* func, int line) { @@ -365,6 +368,7 @@ void mgb::_on_cnml_error( cnml_get_error_string(err), expr, file, func, line); } #endif +#endif void CompNodeEnv::init_cpu(const CpuEnv& env, CompNode comp_node) { m_comp_node = comp_node; diff --git a/src/core/include/megbrain/comp_node_env.h b/src/core/include/megbrain/comp_node_env.h index 4d051729b80a77d559e71b436b7843a5893adabb..3f4623c345f3dab9234d388111402f575270ad67 100644 --- a/src/core/include/megbrain/comp_node_env.h +++ b/src/core/include/megbrain/comp_node_env.h @@ -116,8 +116,10 @@ #if MGB_CAMBRICON #include -#include #include +#if CNRT_MAJOR_VERSION < 5 +#include +#endif #if MGB_ENABLE_LOGGING #define MGB_CNRT_CHECK(expr) \ @@ -204,15 +206,19 @@ namespace mgb { #endif #if MGB_CAMBRICON +#if CNRT_MAJOR_VERSION < 5 const char* cnml_get_error_string(cnmlStatus_t err); +#endif [[noreturn]] void _on_cnrt_error( const char* expr, cnrtRet_t err, const char* file, const char* func, int line); [[noreturn]] void _on_cndev_error( const char* expr, cndevRet_t err, const char* file, const char* func, int line); +#if CNRT_MAJOR_VERSION < 5 [[noreturn]] void _on_cnml_error( const char* expr, cnmlStatus_t err, const char* file, const char* func, int line); #endif +#endif class CPUDispatcher : public MegcoreCPUDispatcher { public: @@ -456,6 +462,13 @@ public: initialized = cnrt_err == CNRT_RET_SUCCESS; auto cndev_err = cndevInit(0); initialized &= cndev_err == CNDEV_SUCCESS; +#if CNRT_MAJOR_VERSION >= 5 + mgb_throw_if( + !initialized, CnrtError, + "cnrt/cndev initialize failed: (cnrt:%d, " + "cndev:%d)", + static_cast(cnrt_err), static_cast(cndev_err)); +#else auto cnml_err = cnmlInit(0); initialized &= cnml_err == CNML_STATUS_SUCCESS; mgb_throw_if( @@ -464,11 +477,14 @@ public: "cndev:%d, cnml: %d)", static_cast(cnrt_err), static_cast(cndev_err), static_cast(cnml_err)); +#endif } } ~InitStatus() { if (initialized) { +#if CNRT_MAJOR_VERSION < 5 MGB_CNML_CHECK(cnmlExit()); +#endif MGB_CNDEV_CHECK(cndevRelease()); cnrtDestroy(); initialized = false; diff --git a/src/core/test/comp_node.cpp b/src/core/test/comp_node.cpp index 1a366cb2bedcfcf91a661b6c143fbfcdc313e894..b39a4a3f9ca0544062187be7fc67b6511ee05b7c 100644 --- a/src/core/test/comp_node.cpp +++ b/src/core/test/comp_node.cpp @@ -674,6 +674,7 @@ TEST(TestCompNodeCambricon, D2DCopy) { REQUIRE_CAMBRICON_DEVICE(1); auto cn = CompNode::load("cambricon0"); run(cn); + REQUIRE_CAMBRICON_DEVICE(2); cn = CompNode::load("cambricon1"); run(cn); } diff --git a/src/serialization/impl/sereg_caller.cpp b/src/serialization/impl/sereg_caller.cpp index 207bcae3fe3768922e94cb612d0f7a0a426ff7f0..f6331733a76d719eeb9a2e8e7ce324c3882ee9cf 100644 --- a/src/serialization/impl/sereg_caller.cpp +++ b/src/serialization/impl/sereg_caller.cpp @@ -43,6 +43,7 @@ void call_sereg() {} #endif #if MGB_CAMBRICON #include "../../cambricon/impl/cambricon_runtime_opr.sereg.h" +#include "../../cambricon/impl/magicmind_runtime_opr.sereg.h" #endif #if MGB_CUSTOM_OP