提交 f91881ff 编写于 作者: M Megvii Engine Team

MegEngine: Initial commit of MegEngine.

GitOrigin-RevId: f0c8338beb9cac953bd2d8b76710790940dc9300
上级

要显示的变更太多。

To preserve performance only 1000 of 1000+ files are displayed.
# Mark generated files as binary, ignore them in git diff.
# dnn
dnn/src/cuda/conv_bias/int8/kimpl/* binary
dnn/src/cuda/conv_bias/int8_imma/kimpl/* binary
dnn/src/cuda/batch_conv_bias/int8/kimpl/* binary
---
name: Bug Issue
about: 请使用此模板提出您遇到的问题
title: BUG Issue
labels: ''
assignees: ''
---
<!-- 请您简介清晰的描述您遇到的问题 -->
## 环境
1.系统环境:
2.MegEngine版本:
3.python版本:
## 复现步骤
1.
2.
3.
## 请提供关键的代码片段便于追查问题
## 请提供完整的日志及报错信息
---
name: Documentation Issue
about: 请使用此模板提出在文档中遇到的问题
title: ''
labels: ''
assignees: ''
---
## 文档链接
<!-- 请您贴出有问题的文档链接 -->
## 问题描述
<!-- 请您简要清晰的描述您的问题 -->
---
name: Feature Request
about: 请使用此模板提出您的建议
title: Feature Request
labels: ''
assignees: ''
---
<!-- 请简介清晰的描述您的需求 -->
## 背景
<!-- 请简单描述您将在什么场景下需要这个功能 -->
## 需求描述
<!-- 请详细描述您的需求并给出验收目标 -->
---
name: Others Issue
about: 如上述分类不符合,请使用此模板提出您的问题
title: ''
labels: ''
assignees: ''
---
## 请简要描述您的需求
/build/
__pycache__/
[submodule "third_party/Halide"]
path = third_party/Halide
url = https://github.com/halide/Halide.git
[submodule "third_party/OpenBLAS"]
path = third_party/OpenBLAS
url = https://github.com/xianyi/OpenBLAS.git
[submodule "third_party/cppzmq"]
path = third_party/cppzmq
url = https://github.com/zeromq/cppzmq.git
[submodule "third_party/gtest"]
path = third_party/gtest
url = https://github.com/google/googletest.git
[submodule "third_party/mkl-dnn"]
path = third_party/intel-mkl-dnn
url = https://github.com/intel/mkl-dnn.git
[submodule "third_party/libzmq"]
path = third_party/libzmq
url = https://github.com/zeromq/libzmq.git
[submodule "third_party/protobuf"]
path = third_party/protobuf
url = https://github.com/protocolbuffers/protobuf
[submodule "third_party/MegRay"]
path = third_party/MegRay
url = https://github.com/MegEngine/MegRay.git
[submodule "third_party/flatbuffers"]
path = third_party/flatbuffers
url = https://github.com/google/flatbuffers.git
因为 它太大了无法显示 source diff 。你可以改为 查看blob
cmake_minimum_required(VERSION 3.9.0)
project(MegEngine)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
if(NOT MSVC)
set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq <TARGET> <LINK_FLAGS> <OBJECTS>")
set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
endif()
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)
set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
x86_64 i386
naive fallback
)
if(${MGE_ARCH} STREQUAL "AUTO")
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64")
set(MGE_ARCH "x86_64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
set(MGE_ARCH "i386")
else()
message(FATAL "Unknown machine architecture for MegEngine.")
endif()
endif()
CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
if(CXX_SUPPORT_GOLD)
message("-- Using GNU gold linker.")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fuse-ld=gold")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fuse-ld=gold")
endif()
option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" ON)
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
if(MGE_WITH_CUDA)
include(CheckLanguage)
check_language(CUDA)
if(NOT CMAKE_CUDA_COMPILER)
message(FATAL_ERROR "CUDA compiler not found in PATH")
endif()
enable_language(CUDA)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()
if(NOT MGE_WITH_CUDA)
message("-- Disable JIT support, as CUDA is not enabled.")
set(MGE_WITH_JIT OFF)
set(MGE_WITH_HALIDE OFF)
message("-- Disable TensorRT support, as CUDA is not enabled.")
set(MGE_WITH_TRT OFF)
endif()
find_package(PythonInterp 3 REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
set_property(TARGET Threads::Threads
PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
"$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
endif()
if(CMAKE_THREAD_LIBS_INIT)
add_definitions(-DMGB_HAVE_THREAD=1)
endif()
set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
if(NOT CMAKE_CUDA_HOST_COMPILER)
set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
endif()
option(MGE_ENABLE_RTTI "Build with RTTI" ON)
option(MGE_ENABLE_LOGGING "Build with logging" ON)
option(MGE_DEBUG_UTIL "Enable debug utility" ON)
if(MGE_DEBUG_UTIL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMGB_ENABLE_DEBUG_UTIL=1")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMGB_ENABLE_DEBUG_UTIL=0")
endif()
if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
set(CMAKE_BUILD_TYPE RelWithDebInfo)
endif()
if(NOT MGE_ENABLE_RTTI)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
endif()
option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
if(NOT MGE_ENABLE_EXCEPTIONS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exception")
endif()
# RTTI
if(MGE_ENABLE_RTTI)
add_definitions(-DMEGDNN_ENABLE_MANGLING=0 -DMEGDNN_ENABLE_RTTI=1)
else()
add_definitions(-DMEGDNN_ENABLE_MANGLING=1 -DMEGDNN_ENABLE_RTTI=0)
endif()
# Logging
if(MGE_ENABLE_LOGGING)
add_definitions(-DMEGDNN_ENABLE_LOGGING=1 -DMGB_ENABLE_LOGGING=1 -DMGB_ENABLE_JSON=1)
else()
add_definitions(-DMEGDNN_ENABLE_LOGGING=0 -DMGB_ENABLE_LOGGING=0 -DMGB_ENABLE_JSON=0)
endif()
# Exception
if(MGE_ENABLE_EXCEPTIONS)
add_definitions(-DMEGDNN_ENABLE_EXCEPTIONS=1)
else()
message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
add_definitions(-DMEGDNN_ENABLE_EXCEPTIONS=0)
endif()
if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
include(cmake/Halide.cmake)
add_definitions(-DMGB_JIT_HALIDE=1)
endif()
option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
if(MGE_WITH_TEST)
include(cmake/gtest.cmake)
endif()
option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
if(NOT MGE_WITH_CUDA)
message("-- Disable distributed support, as CUDA is not enabled.")
set(MGE_WITH_DISTRIBUTED OFF)
endif()
option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_PYTHON_MODULE "Build MegEngine Python Module." ON)
if(MGE_INFERENCE_ONLY)
message("-- Disable distributed support for inference only build.")
set(MGE_WITH_DISTRIBUTED OFF)
message("-- Disable python module for inference only build.")
set(MGE_WITH_PYTHON_MODULE OFF)
message("-- Disable tests for inference only build.")
set(MGE_WITH_TEST OFF)
endif()
if(MGE_WITH_DISTRIBUTED)
include(cmake/protobuf.cmake)
include(cmake/zmq.cmake)
endif()
if(MGB_WITH_FLATBUFFERS)
include(cmake/flatbuffers.cmake)
endif()
if(MSVC)
add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG")
endif()
if(MGE_WITH_CUDA)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
get_filename_component(_NAME ${path} NAME)
if(NOT ${_NAME} STREQUAL "stubs")
list(APPEND CUDA_LINK_DIRECTORIES ${path})
endif()
endforeach()
link_directories(${CUDA_LINK_DIRECTORIES})
set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
if(NOT MGE_ENABLE_RTTI)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
endif()
if(NOT MGE_ENABLE_EXCEPTIONS)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exception")
endif()
if(NOT MGE_CUDA_GENCODE)
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DMEGDNN_THREADS_512=0")
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
else()
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
endif()
else()
message(FATAL_ERROR "Unsupported CUDA host arch.")
endif()
else()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DMEGDNN_THREADS_512=1")
endif()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
include(cmake/cudnn.cmake)
if(MGE_WITH_TRT)
include(cmake/tensorrt.cmake)
endif()
if(MGE_CUDA_USE_STATIC)
if(MGE_WITH_TRT)
list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer libcudnn -Wl,--no-whole-archive)
else()
list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
endif()
list(APPEND MGE_CUDA_LIBS cusolver_static cublas_static curand_static culibos cudart_static cusparse_static)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
list(APPEND MGE_CUDA_LIBS cublasLt_static)
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
# mark all symbols from liblapack_static.a as weak to avoid
# duplicated definition with mkl
find_library(
LAPACK_STATIC_PATH lapack_static
HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
if(NOT LAPACK_STATIC_PATH)
message(FATAL_ERROR "liblapack_static.a not found")
endif()
set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)
# add a target that run objcopy
add_custom_command(
OUTPUT ${LAPACK_STATIC_COPY_PATH}
COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
VERBATIM)
add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})
# create a library named "lapack_static_weak"
add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
add_dependencies(lapack_static_weak lapack_static_weak_target)
set_target_properties(
lapack_static_weak PROPERTIES
IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
endif()
else()
if(MGE_WITH_TRT)
list(APPEND MGE_CUDA_LIBS libnvinfer)
endif()
list(APPEND MGE_CUDA_LIBS libcudnn)
if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
endif()
endif()
add_subdirectory(dnn/cuda-stub)
list(APPEND MGE_CUDA_LIBS nvrtc cuda-stub nvToolsExt)
set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS}")
endif()
find_program(CCACHE_BIN ccache)
if(CCACHE_BIN)
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
message("-- Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
endif()
endif()
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
if(${MGE_BLAS} STREQUAL "MKL")
include(cmake/mkl.cmake)
set(MGE_BLAS_LIBS libmkl)
elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
include(cmake/OpenBLAS.cmake)
set(MGE_BLAS_LIBS libopenblas)
else()
message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
endif()
endif()
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
# MKLDNN build
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
add_definitions(-DMEGDNN_X86_WITH_MKL_DNN)
include(cmake/MKL_DNN.cmake)
endif()
add_subdirectory(dnn)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DMGB_ASSERT_LOC=1")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DMGB_ASSERT_LOC=0")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -DMGB_ASSERT_LOC=1")
set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS_MINSIZEREL} -DMGB_ASSERT_LOC=0")
if(MGE_ENABLE_RTTI)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMGB_VERBOSE_TYPEINFO_NAME=1")
endif()
if(MGE_ENABLE_EXCEPTIONS)
add_definitions(-DMGB_ENABLE_EXCEPTION=1)
else()
add_definitions(-DMGB_ENABLE_EXCEPTION=0)
endif()
list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)
set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
add_custom_command(
OUTPUT
${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
COMMAND ${PYTHON_EXECUTABLE} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS}
${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
VERBATIM
)
list(APPEND MGB_OPR_PARAM_DEFS_OUTS
${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
)
install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION include/megbrain/opr/)
list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
add_library(mgb_opr_param_defs INTERFACE)
target_include_directories(mgb_opr_param_defs INTERFACE ${MGB_OPR_PARAM_DEFS_INC})
add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
if(MGE_WITH_DISTRIBUTED)
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
endif()
add_subdirectory(src)
add_subdirectory(sdk/load-and-run)
if(MGE_WITH_PYTHON_MODULE)
add_subdirectory(python_module)
endif()
if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
add_subdirectory(test)
endif()
if(TARGET _mgb)
add_custom_target(
develop
COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/$<TARGET_FILE_NAME:_mgb>
${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/$<TARGET_FILE_NAME:_mgb>
COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/mgb.py
${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/mgb.py
COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/opr.py
${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/opr.py
COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/opr_param_defs.py
${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/opr_param_defs.py
COMMAND ${CMAKE_COMMAND} -E create_symlink
${CMAKE_CURRENT_BINARY_DIR}/python_module/megengine/_internal/include
${CMAKE_CURRENT_SOURCE_DIR}/python_module/megengine/_internal/include
DEPENDS _mgb
VERBATIM
)
endif()
set(MGB_CUDA ${MGE_WITH_CUDA})
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
set(MGB_ASSERT_LOC 1)
else()
set(MGB_ASSERT_LOC 0)
endif()
set(MGB_ENABLE_DEBUG_UTIL ${MGE_DEBUG_UTIL})
set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
set(MGB_JIT ${MGE_WITH_JIT})
set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})
set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})
set(MGB_ENABLE_GRAD NOT ${MGE_INFERENCE_ONLY})
set(MGB_BUILD_SLIM_SERVING ${MGE_INFERENCE_ONLY})
configure_file(src/core/include/megbrain_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
file(READ src/core/include/megbrain_build_config.h _CONTENT)
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h ${_CONTENT})
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION include)
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
## Our Standards
Examples of behavior that contributes to a positive environment for our community include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or advances of any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others’ private information, such as a physical or email address, without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a professional setting
All MegEngine forums and spaces are meant for professional interactions, and any behavior which could reasonably be considered inappropriate in a professional setting is unacceptable.
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at megengine@megvii.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
## Attribution
This Code of Conduct is updated from the Contributor Covenant, version 2.0, available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
# MegEngine Contributor License Agreement
In order to clarify the intellectual property license granted with Contributions from any person or entity, the open source project MegEngine ("MegEngine") must have a Contributor License Agreement (CLA) on file that has been signed by each Contributor, indicating agreement to the license terms below. This license is for your protection as a Contributor as well as the protection of MegEngine and its users; it does not change your rights to use your own Contributions for any other purpose.
This Agreement allows an individual or an entity to submit Contributions to MegEngine, to authorize Contributions submitted by its designated employees to MegEngine, and to grant copyright and patent licenses.
thereto. You accept and agree to the following terms and conditions for Your present and future Contributions submitted to MegEngine. Except for the license granted herein to MegEngine and recipients of software distributed by MegEngine, You reserve all right, title, and interest in and to Your Contributions.
1. **Definitions**. "You" (or "Your") shall mean the copyright owner or legal entity authorized by the copyright owner that is making this Agreement with MegEngine. For legal entities, the entity making a Contribution and all other entities that control, are controlled by, or are under common control with that entity are considered to be a single Contributor.
For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
"Contribution" shall mean the code, documentation or any original work of authorship, including any modifications or additions to an existing work, that is intentionally submitted by You to MegEngine for inclusion in, or documentation of, any of the products owned or managed by MegEngine (the "Work").
For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to MegEngine or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, MegEngine for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by You as "Not a Contribution."
2. **Grant of Copyright License**. Subject to the terms and conditions of this Agreement, You hereby grant to MegEngine and to recipients of software distributed by MegEngine a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense, and distribute Your Contributions and such derivative works.
3. **Grant of Patent License**. Subject to the terms and conditions of this Agreement, You hereby grant to MegEngine and to recipients of software distributed by MegEngine a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by You that are necessarily infringed by Your Contribution(s) alone or by combination of Your Contribution(s) with the Work to which such Contribution(s) was submitted. If any entity institutes patent litigation against You or any other entity (including a crossclaim or counterclaim in a lawsuit) alleging that Your Contribution, or the Work to which You have contributed, constitutes direct or contributory patent infringement, then any patent licenses granted to that entity under this Agreement for that Contribution or Work shall terminate as of the date such litigation is filed.
4. You represent that You are legally entitled to grant the above license. If You are an entity, You represent further that each of Your employee designated by You is authorized to submit Contributions on behalf of You. If You are an individual and Your employer(s) has rights to intellectual property that You create that includes Your Contributions, You represent further that You have received permission to make Contributions on behalf of that employer, that Your employer has waived such rights for Your Contributions to MegEngine, or that Your employer has executed a separate CLA with MegEngine.
5. If you do post content or submit material on MegEngine and unless we indicate otherwise, you grant MegEngine a nonexclusive, royalty-free, perpetual, irrevocable, and fully sublicensable right to use, reproduce, modify, adapt, publish, perform, translate, create derivative works from, distribute, and display such content throughout the world in any media. You grant MegEngine and sublicensees the right to use your GitHub Public Profile, including but not limited to name, that you submit in connection with such content. You represent and warrant that you own or otherwise control all of the rights to the content that you post; that the content is accurate; that use of the content you supply does not violate this policy and will not cause injury to any person or entity; and that you will indemnify MegEngine for all claims resulting from content you supply. MegEngine has the right but not the obligation to monitor and edit or remove any activity or content. MegEngine takes no responsibility and assumes no liability for any content posted by you or any third party.
6. You represent that each of Your Contributions is Your original creation. Should You wish to submit work that is not Your original creation, You may submit it to MegEngine separately from any Contribution, identifying the complete details of its source and of any license or other restriction (including, but not limited to, related patents, trademarks, and license agreements) of which You are personally aware, and conspicuously marking the work as "Submitted on behalf of a third party: [named here]".
7. You are not expected to provide support for Your Contributions, except to the extent You desire to provide support. You may provide support for free, for a fee, or not at all. Unless required by applicable law or agreed to in writing, You provide Your Contributions on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE.
8. You agree to notify MegEngine of any facts or circumstances of which You become aware that would make these representations inaccurate in any respect.
9. This the effective date of this Contributor License Agreement is 2020/3/23. MegEngine reserves the right to update or change this Agreement at any time, by posting the most current version of the Agreement on MegEngine, with a new effective date. All such changes in the Agreement are effective from the effective date. Your continued use of MegEngine after we post any such changes signifies your agreement to those changes. If you do not agree to the then-current Agreement, you must immediately discontinue using MegEngine.
MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
2. Grant of Copyright License.
Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
3. Grant of Patent License.
Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
4. Redistribution.
You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
You must give any other recipients of the Work or Derivative Works a copy of this License; and
You must cause any modified files to carry prominent notices stating that You changed the files; and
You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
5. Submission of Contributions.
Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
6. Trademarks.
This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty.
Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
8. Limitation of Liability.
In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability.
While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
# MegEngine
![MegEngine Logo](logo.png)
English | [中文](README_CN.md)
MegEngine is a fast, scalable and easy-to-use numerical evaluation framework, with auto-differentiation.
------
## Installation
**NOTE:** MegEngine now only supports Linux platform with Python 3.5 or higher. On Windows 10 you could try [WSL(Windows Subsystem for Linux)](https://docs.microsoft.com/en-us/windows/wsl) to use Linux within Windows.
### Binaries
Commands to install from binaries via pip wheels are as follows:
```bash
pip3 install megengine -f https://megengine.org.cn/whl/mge.html
```
## Build from Source
### Prerequisites
Most of the dependencies of MegEngine are located in `third_party` directory, and you do
not need to install these by yourself. you can prepare these repositories by executing:
```bash
./third_party/prepare.sh
./third_party/install-mkl.sh
```
But some dependencies should be manually installed:
* [CUDA](https://developer.nvidia.com/cuda-toolkit-archive)(>=10.1), [cuDNN](https://developer.nvidia.com/cudnn)(>=7.6)are required when building MegEngine with CUDA support (default ON)
* [TensorRT](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)(>=5.1.5) is required when building with TensorRT support (default ON)
* LLVM/Clang(>=6.0) is required when building with Halide JIT support (default ON)
* Python(>=3.5), Numpy, SWIG(>=3.0) are required to build Python modules. (default ON)
### Build
MegEngine prefers `Out-Of-Source` flavor, and compile in a `mostly-static` way.
Here are the instructions:
1. Make a directory for the build.
```bash
mkdir -p build
cd build
```
2. Generate build configurations by `CMake`.
For CUDA build:
```bash
cmake .. -DMGE_WITH_TEST=ON
```
For CPU only build, use `-DMGE_WITH_CUDA=OFF`:
```bash
cmake .. -DMGE_WITH_CUDA=OFF -DMGE_WITH_TEST=ON
```
For deployment with C++ only, use `-DMGE_INFERENCE_ONLY=ON`, and turn off test with `-DMGE_WITH_TEST=OFF`:
```bash
cmake .. -DMGE_INFERENCE_ONLY=ON -DMGE_WITH_TEST=OFF
```
Use `-DCMAKE_INSTALL_PREFIX=YOUR_PATH` to specify the install path.
3. Start to build.
```bash
make -j$(nproc)
```
4. [optional] Install the library if compiled for deployment at step 2.
```bash
make install
```
Here are some other useful options for the build.
* `MGE_ARCH` specifies which arch MegEngine are building for. (default AUTO)
* `MGE_WITH_DISTRIBUTED` if multiple machine distributed support is enabled. (default ON)
* `MGE_WITH_PYTHON_MODULE` if build python module. (default ON)
* `MGE_BLAS` chooses `MKL` or `OpenBLAS` as BLAS library for MegEngine. (default `MKL`)
* `MGE_CUDA_GENCODE` supplies the `-gencode` option for `nvcc`. (default not supply)
* `MGE_DISABLE_FLOAT16` if disable float16 support. (default OFF)
* `MGE_ENABLE_EXCEPTIONS` if enable exception support in C++. (default ON)
* `MGE_ENABLE_LOGGING` if enable logging in MegEngine. (default AUTO)
More options can be found by:
```bash
cd build
cmake -LAH .. 2>/dev/null| grep -B 1 'MGE_' | less
```
## How to Contribute
* MegEngine adopts [Contributor Covenant](https://contributor-covenant.org) to maintain our community. Please read the [Code of Conduct](CODE_OF_CONDUCT.md) to get more information.
* Every contributor of MegEngine must sign a Contributor License Agreement (CLA) to clarify the intellectual property license granted with the contributions. For more details, please refer [Contributor License Agreement](CONTRIBUTOR_LICENSE_AGREEMENT.md)
* You can help MegEngine better in many ways:
* Write code.
* Improve [documentation](https://github.com/MegEngine/Docs).
* Answer questions on [MegEngine Forum](https://discuss.megengine.org.cn), or Stack Overflow.
* Contribute new models in [MegEngine Model Hub](https://github.com/megengine/hub).
* Try a new idea on [MegStudio](https://studio.brainpp.com).
* Report or investigate [bugs and issues](https://github.com/MegEngine/MegEngine/issues).
* Review [Pull Requests](https://github.com/MegEngine/MegEngine/pulls).
* Star MegEngine repo.
* Reference MegEngine in your papers and articles.
* Recommend MegEngine to your friends.
* ...
We believe we can build an open and friendly community and power humanity with AI.
## How to contact us
* Issue: [github.com/MegEngine/MegEngine/issues](https://github.com/MegEngine/MegEngine/issues)
* Email: [megengine-support@megvii.com](mailto:megengine-support@megvii.com)
* Forum: [discuss.megengine.org.cn](https://discuss.megengine.org.cn)
* QQ: 1029741705
## Resources
- [MegEngine](https://megengine.org.cn)
- [MegStudio](https://studio.brainpp.com)
- [Brain++](https://brainpp.megvii.com)
## License
MegEngine is Licensed under the Apache License, Version 2.0
Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
# MegEngine
![MegEngine Logo](logo.png)
[English](README.md) | 中文
MegEngine 是一个快速、可拓展、易于使用且支持自动求导的数值计算框架。
------
## 安装说明
**注意:** MegEngine 现在仅支持 Linux 平台安装,以及 Python3.5 及以上的版本(不支持 Python2 )。对于 Windows 10 用户,可以通过安装 [WSL(Windows Subsystem for Linux)](https://docs.microsoft.com/en-us/windows/wsl) 进行体验。
### 通过包管理器安装
通过 pip 安装的命令如下:
```bash
pip3 install megengine -f https://megengine.org.cn/whl/mge.html
```
## 通过源码编译安装
### 环境依赖
大多数编译 MegEngine 的依赖位于 `third_party` 目录,可以通过以下命令自动安装:
```bash
$ ./third_party/prepare.sh
$ ./third_party/install-mkl.sh
```
但是有一些依赖需要手动安装:
* [CUDA](https://developer.nvidia.com/cuda-toolkit-archive)(>=10.1), [cuDNN](https://developer.nvidia.com/cudnn)(>=7.6) ,如果需要编译支持 CUDA 的版本(默认开启)
* [TensorRT](https://docs.nvidia.com/deeplearning/sdk/tensorrt-archived/index.html)(>=5.1.5) ,如果需要编译支持 TensorRT 的版本(默认开启)
* LLVM/Clang(>=6.0) ,如果需要编译支持 Halide JIT 的版本(默认开启)
* Python(>=3.5), Numpy, SWIG(>=3.0) ,如果需要编译生成 Python 模块(默认开启)
### 开始编译
MegEngine 遵循“源外构建”([Out-of-Source Build](https://zh.m.wikibooks.org/zh-hans/CMake_%E5%85%A5%E9%96%80/Out-of-source_Build))原则,并且使用静态编译方式。编译的具体流程如下:
1. 创建用于编译的目录:
```bash
mkdir -p build
cd build
```
2. 使用 `CMake` 生成编译配置:
生成支持 CUDA 环境的配置:
```bash
cmake .. -DMGE_WITH_TEST=ON
```
生成仅支持 CPU 环境的配置,使用 `-DMGE_WITH_CUDA=OFF` 选项:
```bash
cmake .. -DMGE_WITH_CUDA=OFF -DMGE_WITH_TEST=ON
```
生成仅用于 C++ 环境部署的配置,使用 `-DMGE_INFERENCE_ONLY=ON` ,并可用 `-DMGE_WITH_TEST=OFF` 关闭测试:
```bash
cmake .. -DMGE_INFERENCE_ONLY=ON -DMGE_WITH_TEST=OFF
```
可以使用 `-DCMAKE_INSTALL_PREFIX=YOUR_PATH` 指定具体安装目录。
3. 开始编译:
```bash
make -j$(nproc)
```
4. [可选] 如果需要用于部署,可以安装 MegEngine 的 C++ 库:
```bash
make install
```
以下是其它常用编译选项:
* `MGE_ARCH` 指定编译的目标平台(默认自动检测当前平台)
* `MGE_WITH_DISTRIBUTED` 是否开启多机分布式支持(默认开启)
* `MGE_WITH_PYTHON_MODULE` 是否编译生成 Python 模块(默认开启)
* `MGE_BLAS` 选择 BLAS 的后端实现,可以是 `MKL``OpenBLAS` (默认 `MKL`
* `MGE_CUDA_GENCODE` 指定提供给 `nvcc``-gencode` 选项(默认不指定)
* `MGE_DISABLE_FLOAT16` 是否不提供 `float16` 类型支持(默认关闭)
* `MGE_ENABLE_EXCEPTIONS` 是否开启 C++ 报错支持(默认开启)
* `MGE_ENABLE_LOGGING` 是否开启 MegEngine 日志信息(默认自动检测)
更多选项可以通过以下命令查看:
```bash
cd build
cmake -LAH .. 2>/dev/null| grep -B 1 'MGE_' | less
```
## 如何参与贡献
* MegEngine 依据 [贡献者公约(Contributor Covenant)](https://contributor-covenant.org)来管理开源社区。请阅读 [行为准则](CODE_OF_CONDUCT.md) 了解更多信息。
* 每一名 MegEngine 的贡献者都需要签署贡献者许可协议(Contributor License Agreement,CLA)来明确贡献内容相关的知识产权许可。更多细节请参考 [协议内容](CONTRIBUTOR_LICENSE_AGREEMENT.md)
* 我们欢迎你通过以下方式来帮助 MegEngine 变得更好:
* 贡献代码;
* 完善[文档](https://github.com/MegEngine/Docs)
*[MegEngine 论坛](https://discuss.megengine.org.cn) 和 Stack Overflow 回答问题;
*[MegEngine Model Hub](https://github.com/megengine/hub) 贡献新模型;
*[MegStudio](https://studio.brainpp.com) 平台尝试新想法;
* 报告使用中的 [Bugs 和 Issues](https://github.com/MegEngine/MegEngine/issues)
* 审查 [Pull Requests](https://github.com/MegEngine/MegEngine/pulls)
* 给 MegEngine 点亮小星星;
* 在你的论文和文章中引用 MegEngine;
* 向你的好友推荐 MegEngine;
* ...
我们相信我们能够搭建一个开放友善的开源社区环境,用人工智能造福人类。
## 联系我们
* 问题: [github.com/MegEngine/MegEngine/issues](https://github.com/MegEngine/MegEngine/issues)
* 邮箱: [megengine-support@megvii.com](mailto:megengine-support@megvii.com)
* 论坛: [discuss.megengine.org.cn](https://discuss.megengine.org.cn)
* QQ: 1029741705
## 资源
- [MegEngine](https://megengine.org.cn)
- [MegStudio](https://studio.brainpp.com)
- [Brain++](https://brainpp.megvii.com)
## 开源许可
MegEngine 使用 Apache License, Version 2.0
Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
/output/
/build_image.sh
/build_wheel.sh
FROM quay.io/pypa/manylinux2010_x86_64:2020-01-31-046f791
ENV UID=1024 \
PATH=${PATH}:/usr/local/cuda/bin \
LIBRARY_PATH=${LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/opt/cudnn/lib64:/opt/tensorrt/lib \
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:/opt/cudnn/lib64:/opt/tensorrt/lib \
CPATH=${CPATH}:/usr/local/cuda/include:/opt/cudnn/include:/opt/tensorrt/include
ADD init_image.sh /tmp
RUN /tmp/init_image.sh && rm -f /tmp/init_image.sh
#!/bin/bash -e
cd $(dirname $0)
docker build -t env_manylinux2010:latest .
#!/bin/bash -e
CWD=$(dirname $0)
BASEDIR=$(readlink -f ${CWD}/../../..)
OUTPUTDIR=$(readlink -f ${CWD}/output)
USERID=$(id -u)
TMPFS_ARGS="--tmpfs /tmp:exec"
pushd ${BASEDIR}/third_party >/dev/null
./prepare.sh
popd >/dev/null
cd ${CWD}
mkdir -p ${OUTPUTDIR}
if [[ -z ${CUDA_ROOT_DIR} ]]; then
echo "Environment variable CUDA_ROOT_DIR not set."
exit -1
fi
if [[ -z ${CUDNN_ROOT_DIR} ]]; then
echo "Environment variable CUDNN_ROOT_DIR not set."
exit -1
fi
if [[ -z ${TENSORRT_ROOT_DIR} ]]; then
echo "Environment variable TENSORRT_ROOT_DIR not set."
exit -1
fi
docker run -it --rm $TMPFS_ARGS -e UID=${USERID} -e LOCAL_VERSION=${LOCAL_VERSION} -e ALL_PYTHON=${ALL_PYTHON} -v ${CUDA_ROOT_DIR}:/usr/local/cuda -v ${CUDNN_ROOT_DIR}:/opt/cudnn -v ${TENSORRT_ROOT_DIR}:/opt/tensorrt -v ${BASEDIR}:/home/code -v ${OUTPUTDIR}:/home/output:rw env_manylinux2010:latest /home/code/ci/docker_env/manylinux2010/do_build.sh
#!/bin/bash -e
ALL_PYTHON=${ALL_PYTHON}
if [[ -z ${ALL_PYTHON} ]]
then
ALL_PYTHON="35m 36m 37m 38"
fi
EXTRA_CMAKE_ARGS=
for ver in ${ALL_PYTHON}
do
python_ver=${ver:0:2}
BUILD_DIR=/tmp/build_megengine/python${python_ver}
MAJOR=${python_ver:0:1}
MINOR=${ver:1}
PYTHON_DIR=/opt/python/cp${python_ver}-cp${ver}/
EXT_NAME=_mgb.cpython-${ver}-x86_64-linux-gnu.so
mkdir -p ${BUILD_DIR}
pushd ${BUILD_DIR} >/dev/null
cmake /home/code -DMGE_WITH_DISTRIBUTED=ON -DMGE_WITH_CUDA=ON \
-DCMAKE_PREFIX_PATH=${PYTHON_DIR} \
-DMGE_WITH_TEST=ON -DCMAKE_INSTALL_PREFIX=/home/output \
-DPYTHON_LIBRARY=${PYTHON_DIR}lib/ \
-DPYTHON_INCLUDE_DIR=${PYTHON_DIR}include/python${MAJOR}.${MINOR}/ \
${EXTRA_CMAKE_ARGS}
make -j$(nproc)
make install
mkdir -p staging
mkdir -p /home/output/debug
cp -a python_module/{megengine,setup.py} staging/
pushd dnn/cuda-stub/ >/dev/null
strip -s libcuda.so
ln -sf libcuda.so libcuda.so.1
popd >/dev/null
pushd staging >/dev/null
pushd megengine/_internal >/dev/null
objcopy --only-keep-debug _mgb.so ${EXT_NAME}.dbg
strip -s _mgb.so
objcopy --add-gnu-debuglink=${EXT_NAME}.dbg _mgb.so
cp -a ${EXT_NAME}.dbg /home/output/debug
mkdir -p lib/ucx
cp -L /usr/local/cuda/lib*/libnvrtc-builtins.so lib
cp -L ${BUILD_DIR}/third_party/MegRay/third_party/ucx/lib/ucx/*.so lib/ucx/
strip -s lib/ucx/*.so
popd >/dev/null
${PYTHON_DIR}/bin/python setup.py bdist_wheel
popd >/dev/null
popd >/dev/null
pushd /home/output >/dev/null
LD_LIBRARY_PATH=${BUILD_DIR}/dnn/cuda-stub:$LD_LIBRARY_PATH auditwheel repair -L _internal/lib ${BUILD_DIR}/staging/dist/Meg*.whl
chown -R ${UID}.${UID} .
popd >/dev/null
rm -rf ${BUILD_DIR}
done
#!/bin/bash -e
GET_PIP_URL='https://bootstrap.pypa.io/get-pip.py'
SWIG_URL='https://downloads.sourceforge.net/project/swig/swig/swig-3.0.12/swig-3.0.12.tar.gz?use_mirror=autoselect'
LLVM_URL='https://github.com/llvm-mirror/llvm/archive/release_60.tar.gz'
CLANG_URL='https://github.com/llvm-mirror/clang/archive/release_60.tar.gz'
yum erase -y cmake cmake28
yum install -y python34-pip pcre-devel
pip3 install --no-cache-dir --only-binary :all: -U pip==19.1
pip3 install --no-cache-dir --only-binary :all: cmake==3.16.3
for ver in 35m 36m 37m 38
do
python_ver=${ver:0:2}
curl ${GET_PIP_URL} | /opt/python/cp${python_ver}-cp${ver}/bin/python - \
--no-cache-dir --only-binary :all:
/opt/python/cp${python_ver}-cp${ver}/bin/pip install \
--no-cache-dir --only-binary :all: numpy==1.18.1
done
pushd /home >/dev/null
curl -sSL ${SWIG_URL} | tar xz
pushd swig-3.0.12 >/dev/null
mkdir build
pushd build >/dev/null
../configure
make -j$(nproc)
make install
popd >/dev/null
popd >/dev/null
rm -rf swig-3.0.12
curl -sSL ${LLVM_URL} | tar xz
pushd llvm-release_60 >/dev/null
mkdir build
pushd build >/dev/null
cmake .. -DCMAKE_PREFIX_PATH=/opt/python/cp36-cp36m/ \
-DCMAKE_BUILD_TYPE=Release
make -j$(nproc)
make install
popd >/dev/null
popd >/dev/null
rm -rf llvm-release_60
curl -sSL ${CLANG_URL} | tar xz
pushd clang-release_60 >/dev/null
mkdir build
pushd build >/dev/null
cmake .. -DCMAKE_PREFIX_PATH=/opt/python/cp36-cp36m/ \
-DCMAKE_BUILD_TYPE=Release
make -j$(nproc)
make install
popd >/dev/null
popd >/dev/null
rm -rf clang-release_60
popd >/dev/null
pushd /tmp >/dev/null
curl -sSL https://github.com/NixOS/patchelf/archive/0.10.tar.gz | tar xz
pushd /tmp/patchelf-0.10 >/dev/null
patch -p1 <<'EOF'
diff --git a/src/patchelf.cc b/src/patchelf.cc
index 0b4965a..7aae7a4 100644
--- a/src/patchelf.cc
+++ b/src/patchelf.cc
@@ -1074,13 +1074,6 @@ void ElfFile<ElfFileParamNames>::modifySoname(sonameMode op, const std::string &
return;
}
- /* Zero out the previous SONAME */
- unsigned int sonameSize = 0;
- if (soname) {
- sonameSize = strlen(soname);
- memset(soname, 'X', sonameSize);
- }
-
debug("new SONAME is '%s'\n", newSoname.c_str());
/* Grow the .dynstr section to make room for the new SONAME. */
@@ -1264,7 +1257,6 @@ void ElfFile<ElfFileParamNames>::modifyRPath(RPathOp op,
unsigned int rpathSize = 0;
if (rpath) {
rpathSize = strlen(rpath);
- memset(rpath, 'X', rpathSize);
}
debug("new rpath is '%s'\n", newRPath.c_str());
EOF
./bootstrap.sh && ./configure && make install-strip
popd
rm -rf /tmp/patchelf-0.10
popd
yum clean all
include(ExternalProject)
find_package(LLVM 6.0 REQUIRED CONFIG)
STRING(REPLACE "." ";" LLVM_VERSION_LIST ${LLVM_PACKAGE_VERSION})
list(GET LLVM_VERSION_LIST 0 LLVM_VERSION_MAJOR)
list(GET LLVM_VERSION_LIST 1 LLVM_VERSION_MINOR)
set(HALIDE_DIR "${PROJECT_SOURCE_DIR}/third_party/Halide" CACHE STRING "halide directory")
set(HALIDE_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/Halide)
set(HALIDE_LIB ${HALIDE_BUILD_DIR}/lib/libHalide.a)
ExternalProject_add(
halide
SOURCE_DIR ${HALIDE_DIR}
PREFIX ${HALIDE_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${HALIDE_BUILD_DIR} -DWITH_APPS=OFF -DWITH_TESTS=OFF -DWITH_TUTORIALS=OFF -DHALIDE_SHARED_LIBRARY=OFF -DHALIDE_REQUIRE_LLVM_VERSION=${LLVM_VERSION_MAJOR}${LLVM_VERSION_MINOR} -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DTARGET_MIPS=OFF -DTARGET_POWERPC=OFF
BUILD_BYPRODUCTS ${HALIDE_LIB}
)
set(HALIDE_INC ${HALIDE_BUILD_DIR}/include)
file(MAKE_DIRECTORY ${HALIDE_INC})
add_library(libhalide STATIC IMPORTED GLOBAL)
add_dependencies(libhalide halide)
set_target_properties(
libhalide PROPERTIES
IMPORTED_LOCATION ${HALIDE_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${HALIDE_INC}
)
set(LLVM_COMPONENTS mcjit;bitwriter;linker;passes;X86;ARM;AArch64;Hexagon;NVPTX;AMDGPU)
llvm_map_components_to_libnames(HALIDE_LLVM_LIBS ${LLVM_COMPONENTS})
include(ExternalProject)
include(GNUInstallDirs)
set(MKLDNN_DIR "${PROJECT_SOURCE_DIR}/third_party/intel-mkl-dnn" CACHE STRING "mkldnn directory")
set(MKLDNN_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/intel-mkl-dnn)
set(MKLDNN_LIB ${MKLDNN_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libdnnl.a)
if(MGE_BLAS STREQUAL "MKL")
list(APPEND MKLDNN_BUILD_ARGS -D_DNNL_USE_MKL=ON -DMKLROOT=${MKL_ROOT_DIR})
else()
list(APPEND MKLDNN_BUILD_ARGS -D_DNNL_USE_MKL=OFF)
endif()
ExternalProject_add(
mkl_dnn
SOURCE_DIR ${MKLDNN_DIR}
PREFIX ${MKLDNN_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DDNNL_BUILD_TESTS=OFF -DDNNL_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${MKLDNN_BUILD_DIR} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DDNNL_LIBRARY_TYPE=STATIC -DDNNL_CPU_RUNTIME=DNNL_RUNTIME_SEQ ${MKLDNN_BUILD_ARGS}
BUILD_BYPRODUCTS ${MKLDNN_LIB}
)
set(MKLDNN_INC ${MKLDNN_BUILD_DIR}/include)
file(MAKE_DIRECTORY ${MKLDNN_INC})
add_library(libmkl_dnn STATIC IMPORTED GLOBAL)
add_dependencies(libmkl_dnn mkl_dnn)
set_target_properties(
libmkl_dnn PROPERTIES
IMPORTED_LOCATION ${MKLDNN_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${MKLDNN_INC}
)
# - Find the NumPy libraries
# This module finds if NumPy is installed, and sets the following variables
# indicating where it is.
#
# TODO: Update to provide the libraries and paths for linking npymath lib.
#
# NUMPY_FOUND - was NumPy found
# NUMPY_VERSION - the version of NumPy found as a string
# NUMPY_VERSION_MAJOR - the major version number of NumPy
# NUMPY_VERSION_MINOR - the minor version number of NumPy
# NUMPY_VERSION_PATCH - the patch version number of NumPy
# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601
# NUMPY_INCLUDE_DIR - path to the NumPy include files
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
if(PYTHONINTERP_FOUND)
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import numpy as n; print(n.__version__); print(n.get_include());"
RESULT_VARIABLE __result
OUTPUT_VARIABLE __output
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(__result MATCHES 0)
string(REGEX REPLACE ";" "\\\\;" __values ${__output})
string(REGEX REPLACE "\r?\n" ";" __values ${__values})
list(GET __values 0 NUMPY_VERSION)
list(GET __values 1 NUMPY_INCLUDE_DIR)
string(REGEX MATCH "^([0-9])+\\.([0-9])+\\.([0-9])+" __ver_check "${NUMPY_VERSION}")
if(NOT "${__ver_check}" STREQUAL "")
set(NUMPY_VERSION_MAJOR ${CMAKE_MATCH_1})
set(NUMPY_VERSION_MINOR ${CMAKE_MATCH_2})
set(NUMPY_VERSION_PATCH ${CMAKE_MATCH_3})
math(EXPR NUMPY_VERSION_DECIMAL
"(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}")
string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIR ${NUMPY_INCLUDE_DIR})
else()
unset(NUMPY_VERSION)
unset(NUMPY_INCLUDE_DIR)
message(STATUS "Requested NumPy version and include path, but got instead:\n${__output}\n")
endif()
endif()
else()
message(STATUS "To find NumPy Python interpretator is required to be found.")
endif()
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NumPy REQUIRED_VARS NUMPY_INCLUDE_DIR NUMPY_VERSION
VERSION_VAR NUMPY_VERSION)
if(NUMPY_FOUND)
message(STATUS "NumPy ver. ${NUMPY_VERSION} found (include: ${NUMPY_INCLUDE_DIR})")
endif()
include(ExternalProject)
include(GNUInstallDirs)
set(OPENBLAS_DIR "${PROJECT_SOURCE_DIR}/third_party/OpenBLAS" CACHE STRING "OpenBLAS directory")
set(OPENBLAS_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/OpenBLAS)
set(OPENBLAS_INC ${OPENBLAS_BUILD_DIR}/include)
set(OPENBLAS_LIB ${OPENBLAS_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libopenblas.a)
if(${CMAKE_GENERATOR} STREQUAL "Ninja")
set(MAKE_COMMAND make)
else()
set(MAKE_COMMAND "$(MAKE)")
endif()
ExternalProject_add(
openblas
SOURCE_DIR ${OPENBLAS_DIR}
PREFIX ${OPENBLAS_BUILD_DIR}
CMAKE_GENERATOR "Unix Makefiles"
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${OPENBLAS_BUILD_DIR} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_POSITION_INDEPENDENT_CODE=ON
BUILD_COMMAND ${MAKE_COMMAND}
BUILD_BYPRODUCTS ${OPENBLAS_LIB} ${OPENBLAS_PROTOC_EXECUTABLE}
)
file(MAKE_DIRECTORY ${OPENBLAS_INC})
add_library(libopenblas STATIC IMPORTED GLOBAL)
add_dependencies(libopenblas openblas)
set_target_properties(
libopenblas PROPERTIES
IMPORTED_LOCATION ${OPENBLAS_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${OPENBLAS_BUILD_DIR}/include
)
find_package(PkgConfig)
if(${PkgConfig_FOUND})
pkg_check_modules(PC_CUDNN QUIET CUDNN)
endif()
if(NOT "$ENV{LIBRARY_PATH}" STREQUAL "")
string(REPLACE ":" ";" SYSTEM_LIBRARY_PATHS $ENV{LIBRARY_PATH})
endif()
if(MGE_CUDA_USE_STATIC)
find_library(CUDNN_LIBRARY
NAMES libcudnn_static.a libcudnn_static.lib
PATHS $ENV{LD_LIBRARY_PATH} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
HINTS ${SYSTEM_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CUDNN library." )
else()
find_library(CUDNN_LIBRARY
NAMES libcudnn.so libcudnn.dylib cudnn64.dll
PATHS $ENV{LD_LIBRARY_PATH} ${CUDNN_ROOT_DIR} ${PC_CUDNN_LIBRARY_DIRS} ${CMAKE_INSTALL_PREFIX}
HINTS ${SYSTEM_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "CUDNN library." )
endif()
if(CUDNN_LIBRARY STREQUAL "CUDNN_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find CuDNN Library")
endif()
get_filename_component(__found_cudnn_root ${CUDNN_LIBRARY}/../.. REALPATH)
find_path(CUDNN_INCLUDE_DIR
NAMES cudnn.h
HINTS ${PC_CUDNN_INCLUDE_DIRS} ${CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_cudnn_root}
PATH_SUFFIXES include
DOC "Path to CUDNN include directory." )
if(CUDNN_INCLUDE_DIR STREQUAL "CUDNN_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find CuDNN Library")
endif()
file(READ ${CUDNN_INCLUDE_DIR}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
CUDNN_MAJOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
CUDNN_MAJOR_VERSION "${CUDNN_MAJOR_VERSION}")
string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
CUDNN_MINOR_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
CUDNN_MINOR_VERSION "${CUDNN_MINOR_VERSION}")
string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
CUDNN_PATCH_VERSION "${CUDNN_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
CUDNN_PATCH_VERSION "${CUDNN_PATCH_VERSION}")
set(CUDNN_VERSION ${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION})
if(MGE_CUDA_USE_STATIC)
add_library(libcudnn STATIC IMPORTED)
else()
add_library(libcudnn SHARED IMPORTED)
endif()
set_target_properties(libcudnn PROPERTIES
IMPORTED_LOCATION ${CUDNN_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${CUDNN_INCLUDE_DIR})
message("-- Found CuDNN: ${__found_cudnn_root} (found version: ${CUDNN_VERSION})")
if (MGE_USE_SYSTEM_LIB)
find_package(FlatBuffers REQUIRED)
return()
endif()
option(FLATBUFFERS_BUILD_TESTS "" OFF)
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/flatbuffers
${CMAKE_CURRENT_BINARY_DIR}/flatbuffers
EXCLUDE_FROM_ALL)
\ No newline at end of file
add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/gtest ${CMAKE_CURRENT_BINARY_DIR}/gtest EXCLUDE_FROM_ALL)
find_path(MKL_ROOT_DIR
include/mkl_cblas.h
PATHS
${PROJECT_SOURCE_DIR}/third_party/mkl/${MGE_ARCH}
$ENV{MKLDIR}
/opt/intel/mkl/*/
/opt/intel/cmkl/*/
/Library/Frameworks/Intel_MKL.framework/Versions/Current/lib/universal
)
if(${MKL_ROOT_DIR} STREQUAL "MKL_ROOT_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find MKL")
endif()
message("-- Build with MKL in ${MKL_ROOT_DIR}")
find_path(MKL_INCLUDE_DIR
mkl_cblas.h
PATHS
${MKL_ROOT_DIR}/include
${INCLUDE_INSTALL_DIR}
)
option(MGE_MKL_USE_STATIC "Build MegEngine with static MKL" ON)
if(MGE_MKL_USE_STATIC)
find_library(MKL_CORE_LIBRARY
NAMES libmkl_core.a libmkl_core.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
find_library(MKL_SEQUENTIAL_LIBRARY
NAMES libmkl_sequential.a libmkl_sequential.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
if(${MGE_ARCH} STREQUAL "x86_64")
find_library(MKL_IPL_LIBRARY
NAMES libmkl_intel_ilp64.a libmkl_intel_ilp64.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
elseif(${MGE_ARCH} STREQUAL "x86_32")
find_library(MKL_IPL_LIBRARY
NAMES libmkl_intel_32.a libmkl_intel_32.lib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
endif()
add_library(libmkl INTERFACE)
target_link_libraries(libmkl INTERFACE -Wl,--start-group ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY} -Wl,--end-group)
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR})
else()
find_library(MKL_CORE_LIBRARY
NAMES libmkl_core.so libmkl_core.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
find_library(MKL_SEQUENTIAL_LIBRARY
NAMES libmkl_sequential.so libmkl_sequential.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
if(${MGE_ARCH} STREQUAL "x86_64")
find_library(MKL_IPL_LIBRARY
NAMES libmkl_intel_ilp64.so libmkl_intel_ilp64.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
elseif(${MGE_ARCH} STREQUAL "x86_32")
find_library(MKL_IPL_LIBRARY
NAMES libmkl_intel_32.so libmkl_intel_32.dylib
PATHS ${MKL_ROOT_DIR}/lib/${MKL_ARCH_DIR} ${MKL_ROOT_DIR}/lib/)
endif()
target_link_libraries(libmkl INTERFACE ${MKL_CORE_LIBRARY} ${MKL_SEQUENTIAL_LIBRARY} ${MKL_IPL_LIBRARY})
target_include_directories(libmkl INTERFACE ${MKL_INCLUDE_DIR})
endif()
if(${MGE_ARCH} STREQUAL "x86_64")
target_compile_definitions(libmkl INTERFACE -DMKL_ILP64)
endif()
function(PROTOBUF_GENERATE_CPP_WITH_ROOT SRCS HDRS ROOT_DIR)
if(NOT ARGN)
message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP_WITH_ROOT() called without any proto files")
return()
endif()
set(${SRCS})
set(${HDRS})
foreach(FIL ${ARGN})
set(ABS_FIL ${ROOT_DIR}/${FIL})
get_filename_component(FIL_WE ${FIL} NAME_WE)
get_filename_component(FIL_DIR ${ABS_FIL} PATH)
file(RELATIVE_PATH REL_DIR ${ROOT_DIR} ${FIL_DIR})
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE}
ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FIL_DIR} ${ABS_FIL} -I ${PROTOBUF_INCLUDE_DIRS}
DEPENDS ${ABS_FIL} libprotobuf
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM)
endforeach()
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
endfunction()
if(MGE_USE_SYSTEM_LIB)
find_package(Protobuf)
if(Protobuf_FOUND)
add_library(libprotobuf INTERFACE)
target_link_libraries(libprotobuf INTERFACE ${Protobuf_LIBRARIES})
target_include_directories(libprotobuf INTERFACE ${Protobuf_INCLUDE_DIRS})
get_filename_component(Protobuf_ROOT ${Protobuf_INCLUDE_DIR} DIRECTORY)
set(PROTOBUF_ROOT ${Protobuf_ROOT})
set(PROTOBUF_PROTOC_EXECUTABLE ${Protobuf_PROTOC_EXECUTABLE})
set(PROTOBUF_INCLUDE_DIRS ${Protobuf_INCLUDE_DIRS})
return()
endif()
endif()
include(ExternalProject)
include(GNUInstallDirs)
set(PROTOBUF_DIR "${PROJECT_SOURCE_DIR}/third_party/protobuf" CACHE STRING "protobuf directory")
set(PROTOBUF_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/protobuf)
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobufd.a)
else()
set(PROTOBUF_LIB ${PROTOBUF_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libprotobuf.a)
endif()
set(PROTOBUF_PROTOC_EXECUTABLE ${PROTOBUF_BUILD_DIR}/bin/protoc)
ExternalProject_add(
protobuf
SOURCE_DIR ${PROTOBUF_DIR}/cmake
PREFIX ${PROTOBUF_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_BUILD_DIR} -Dprotobuf_BUILD_EXAMPLES=OFF -Dprotobuf_BUILD_TESTS=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON
BUILD_BYPRODUCTS ${PROTOBUF_LIB} ${PROTOBUF_PROTOC_EXECUTABLE}
)
set(PROTOBUF_INC ${PROTOBUF_BUILD_DIR}/include)
file(MAKE_DIRECTORY ${PROTOBUF_INC})
add_library(libprotobuf STATIC IMPORTED GLOBAL)
add_dependencies(libprotobuf protobuf)
set_target_properties(
libprotobuf PROPERTIES
IMPORTED_LOCATION ${PROTOBUF_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${PROTOBUF_BUILD_DIR}/include
)
add_executable(protoc IMPORTED GLOBAL)
add_dependencies(protoc protobuf)
set_target_properties(
protoc PROPERTIES
IMPORTED_LOCATION ${PROTOBUF_BUILD_DIR}/bin/protoc
)
set(PROTOBUF_ROOT ${PROTOBUF_BUILD_DIR})
set(PROTOBUF_PROTOC_EXECUTABLE protoc)
set(PROTOBUF_INCLUDE_DIRS ${PROTOBUF_BUILD_DIR}/include)
if($ENV{LIBRARY_PATH})
string(REPLACE ":" ";" SYSTEM_LIBRARY_PATHS $ENV{LIBRARY_PATH})
endif()
if(MGE_CUDA_USE_STATIC)
find_library(TRT_LIBRARY
NAMES libnvinfer_static.a libnvinfer_static.lib
PATHS $ENV{LD_LIBRARY_PATH} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${SYSTEM_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT library." )
else()
find_library(TRT_LIBRARY
NAMES libnvinfer.so libnvinfer.dylib
PATHS $ENV{LD_LIBRARY_PATH} ${TRT_ROOT_DIR} ${CMAKE_INSTALL_PREFIX}
HINTS ${SYSTEM_LIBRARY_PATHS}
PATH_SUFFIXES lib lib64
DOC "TRT library." )
endif()
if(TRT_LIBRARY STREQUAL "TRT_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Can not find TensorRT Library")
endif()
get_filename_component(__found_trt_root ${TRT_LIBRARY}/../.. REALPATH)
find_path(TRT_INCLUDE_DIR
NAMES NvInfer.h
HINTS ${TRT_ROOT_DIR} ${CUDA_TOOLKIT_INCLUDE} ${__found_trt_root}
PATH_SUFFIXES include
DOC "Path to TRT include directory." )
if(TRT_INCLUDE_DIR STREQUAL "TRT_INCLUDE_DIR-NOTFOUND")
message(FATAL_ERROR "Can not find TensorRT Library")
endif()
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInfer.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")
if (TensorRT_MAJOR STREQUAL "")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MAJOR REGEX "^#define NV_TENSORRT_MAJOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_MINOR REGEX "^#define NV_TENSORRT_MINOR [0-9]+.*$")
file(STRINGS "${TRT_INCLUDE_DIR}/NvInferVersion.h" TensorRT_PATCH REGEX "^#define NV_TENSORRT_PATCH [0-9]+.*$")
endif()
string(REGEX REPLACE "^#define NV_TENSORRT_MAJOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MAJOR "${TensorRT_MAJOR}")
string(REGEX REPLACE "^#define NV_TENSORRT_MINOR ([0-9]+).*$" "\\1" TensorRT_VERSION_MINOR "${TensorRT_MINOR}")
string(REGEX REPLACE "^#define NV_TENSORRT_PATCH ([0-9]+).*$" "\\1" TensorRT_VERSION_PATCH "${TensorRT_PATCH}")
set(TRT_VERSION_STRING "${TensorRT_VERSION_MAJOR}.${TensorRT_VERSION_MINOR}.${TensorRT_VERSION_PATCH}")
if(MGE_CUDA_USE_STATIC)
add_library(libnvinfer STATIC IMPORTED)
else()
add_library(libnvinfer SHARED IMPORTED)
endif()
set_target_properties(libnvinfer PROPERTIES
IMPORTED_LOCATION ${TRT_LIBRARY}
INTERFACE_INCLUDE_DIRECTORIES ${TRT_INCLUDE_DIR}
)
message("-- Found TensorRT: ${__found_trt_root} (found version: ${TRT_VERSION_STRING})")
include(ExternalProject)
include(GNUInstallDirs)
set(ZMQ_DIR ${PROJECT_SOURCE_DIR}/third_party/libzmq CACHE STRING "ZMQ directory")
set(ZMQ_BUILD_DIR ${PROJECT_BINARY_DIR}/third_party/libzmq)
set(ZMQ_LIB ${ZMQ_BUILD_DIR}/${CMAKE_INSTALL_LIBDIR}/libzmq.a)
ExternalProject_add(
zmq
SOURCE_DIR ${ZMQ_DIR}
PREFIX ${ZMQ_BUILD_DIR}
CMAKE_ARGS -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DCMAKE_INSTALL_PREFIX=${ZMQ_BUILD_DIR} -DWITH_PERF_TOOL=OFF -DZMQ_BUILD_TESTS=OFF -DENABLE_CPACK=OFF -DENABLE_CURVE=OFF
BUILD_BYPRODUCTS ${ZMQ_LIB}
)
set(ZMQ_INC ${ZMQ_BUILD_DIR}/include)
file(MAKE_DIRECTORY ${ZMQ_INC})
add_library(libzmq STATIC IMPORTED GLOBAL)
add_dependencies(libzmq zmq)
set_target_properties(
libzmq PROPERTIES
IMPORTED_LOCATION ${ZMQ_LIB}
INTERFACE_INCLUDE_DIRECTORIES ${ZMQ_INC}
)
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
if(${MGE_BLAS} STREQUAL "MKL")
add_definitions(-DMEGDNN_X86_WITH_MKL)
elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
add_definitions(-DMEGDNN_X86_WITH_OPENBLAS)
endif()
endif()
# Enable Naive
if(${MGE_ARCH} STREQUAL "naive")
add_definitions(-DMEGDNN_NAIVE=1)
message(WARNING "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
else()
add_definitions(-DMEGDNN_NAIVE=0)
endif()
if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
add_definitions(-DMEGDNN_X86=1)
if(${MGE_ARCH} STREQUAL "x86_64")
add_definitions(-DMEGDNN_X86_64 -DMEGDNN_64_BIT)
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif()
else()
add_definitions(-DMEGDNN_X86_32)
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
endif()
endif()
if(NOT MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
endif()
endif()
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
list(APPEND OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/scripts/opr_param_defs.py)
set(OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/scripts/gen_param_defs.py)
set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/include/)
file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/megdnn)
add_custom_command(
OUTPUT
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h
COMMAND ${PYTHON_EXECUTABLE} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS}
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
COMMAND ${PYTHON_EXECUTABLE} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS}
/dev/null --write-cppjson ${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT}
VERBATIM
)
list(APPEND OPR_PARAM_DEFS_OUTS
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_defs.h
${OPR_PARAM_DEFS_OUT_DIR}/megdnn/opr_param_json.h
)
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR})
set(OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR})
file(MAKE_DIRECTORY ${OPR_PARAM_DEFS_OUT_DIR}/src/common)
add_custom_command(
OUTPUT
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh
COMMAND ${PYTHON_EXECUTABLE} ${OPR_PARAM_DEFS_SCRIPT}
--enumv ${OPR_PARAM_DEFS_SRCS}
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh
DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT}
VERBATIM
)
list(APPEND OPR_PARAM_DEFS_OUTS
${OPR_PARAM_DEFS_OUT_DIR}/src/common/opr_param_defs_enumv.cuh
)
list(APPEND OPR_PARAM_DEFS_INC ${OPR_PARAM_DEFS_OUT_DIR})
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/megdnn DESTINATION include FILES_MATCHING PATTERN "*.h")
add_custom_target(_opr_param_defs DEPENDS ${OPR_PARAM_DEFS_OUTS})
add_library(opr_param_defs INTERFACE)
target_include_directories(opr_param_defs INTERFACE ${OPR_PARAM_DEFS_INC})
add_dependencies(opr_param_defs _opr_param_defs)
if(MGE_WITH_TEST)
# use multi threads
add_definitions (-DMEGDNN_ENABLE_MULTI_THREADS=1)
add_subdirectory(test)
endif()
add_subdirectory(src)
file (GLOB_RECURSE SOURCES src/*.cpp)
add_library (cuda-stub SHARED ${SOURCES})
set_target_properties(cuda-stub PROPERTIES OUTPUT_NAME cuda)
target_compile_definitions(cuda-stub PRIVATE __CUDA_API_VERSION_INTERNAL)
target_link_libraries(cuda-stub PRIVATE dl -Wl,--no-undefined)
此差异已折叠。
/*
* LIBCUDA_PATH: candidate paths to libcuda.so; multiple paths are
* splitted by colons
**/
#pragma GCC visibility push(default)
#include <cstdio>
#define LOGE(fmt, v...) fprintf(stderr, "err: " fmt "\n", ##v)
extern "C" {
#include <cuda.h>
}
#include <cudaProfiler.h>
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
static const char* default_so_paths[] = {
"/usr/local/nvidia/lib64/libcuda.so",
"/usr/lib/x86_64-linux-gnu/libcuda.so",
"libcuda.so",
};
#if defined(_WIN32)
#include <io.h>
#include <windows.h>
#define F_OK 0
#define RTLD_LAZY 0
// On the windows platform we use a lib_filename without a full path so
// the win-api "LoadLibrary" would uses a standard search strategy to
// find the lib module. As we cannot access to the lib_filename without a
// full path, we should not use "access(a, b)" to verify it.
#define access(a, b) false
static void* dlopen(const char* file, int) {
return static_cast<void*>(LoadLibrary(file));
}
static void* dlerror() {
const char* errmsg = "dlerror not aviable in windows";
return const_cast<char*>(errmsg);
}
static void* dlsym(void* handle, const char* name) {
FARPROC symbol = GetProcAddress((HMODULE)handle, name);
return reinterpret_cast<void*>(symbol);
}
#else
#include <dlfcn.h>
#include <unistd.h>
#endif
static void log_failed_load(int func_idx);
namespace {
template <typename T>
T on_init_failed(int func_idx);
template <>
CUresult on_init_failed(int func_idx) {
log_failed_load(func_idx);
return CUDA_ERROR_UNKNOWN;
}
}
#define _WRAPLIB_API_CALL CUDAAPI
#define _WRAPLIB_CALLBACK CUDA_CB
#include "./libcuda-wrap.h"
#undef _WRAPLIB_CALLBACK
#undef _WRAPLIB_API_CALL
static bool open_shared_lib(const char* path, void*& handle) {
if (!access(path, F_OK)) {
handle = dlopen(path, RTLD_LAZY);
if (handle)
return true;
LOGE("cuda lib found but can not be opened: %s err=%s", path,
dlerror());
}
return false;
}
static void* get_library_handle() {
const char* path = nullptr;
auto str_cptr = getenv("LIBCUDA_PATH");
std::string str;
void* handle = nullptr;
if (str_cptr) {
str = str_cptr;
char* p = &str[0];
const char* begin = p;
while (*p) {
if (*p == ':') {
*p = 0;
if (open_shared_lib(begin, handle)) {
path = begin;
break;
}
begin = p + 1;
}
++p;
}
if (open_shared_lib(begin, handle)) {
path = begin;
}
}
if (!path) {
for (size_t i = 0; i < (sizeof(default_so_paths) / sizeof(char*));
i++) {
if (open_shared_lib(default_so_paths[i], handle)) {
path = default_so_paths[i];
break;
}
}
}
if (!path) {
LOGE("can not find cuda");
return nullptr;
}
return handle;
}
static void log_failed_load(int func_idx) {
LOGE("failed to load cuda func: %s", g_func_name[func_idx]);
}
static void* resolve_library_func(void* handle, const char* func) {
if (!handle) {
LOGE("handle should not be nullptr!");
return nullptr;
}
auto ret = dlsym(handle, func);
if (!ret) {
LOGE("failed to load cuda func: %s", func);
}
return ret;
}
/**
* \file dnn/include/megcore.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/thin/function.h"
#include "megcore_cdefs.h"
#include <cstddef>
#include <memory>
#include "megdnn/internal/visibility_prologue.h"
namespace megcore {
/*!
* \brief a callback to dispatch computing task on desired CPU thread
*
* This is analogous to cuda streams. The default dispatcher on CPU executes in
* the caller thread immediately.
*/
class CPUDispatcher {
public:
using Task = megdnn::thin_function<void()>;
using MultiThreadingTask = megdnn::thin_function<void(size_t, size_t)>;
virtual ~CPUDispatcher() noexcept;
/*!
* \brief dispatch a task on the computing thread
* \param task the task that would be moved away
*/
virtual void dispatch(Task&& task) = 0;
/*!
* \brief dispatch a multithreading task on the computing thread
* \param task the task would be moved away
* \param parallelism the parallelism of the task.
*/
virtual void dispatch(MultiThreadingTask&& task,
size_t parallelism) = 0;
/*!
* \brief synchronize the calling thread with the computing thread
*/
virtual void sync() = 0;
/*!
* \brief the computing thread number.
*/
virtual size_t nr_threads() = 0;
};
} // namespace megcore
using MegcoreCPUDispatcher = megcore::CPUDispatcher;
/**
* \brief Layer 1: device handle
*/
struct megcoreDeviceContext;
typedef struct megcoreDeviceContext *megcoreDeviceHandle_t;
megcoreStatus_t megcoreCreateDeviceHandle(
megcoreDeviceHandle_t *handle,
megcorePlatform_t platform,
int deviceID = -1,
unsigned int flags = 0);
megcoreStatus_t megcoreDestroyDeviceHandle(
megcoreDeviceHandle_t handle);
megcoreStatus_t megcoreGetPlatform(megcoreDeviceHandle_t handle,
megcorePlatform_t *platform);
megcoreStatus_t megcoreGetDeviceID(megcoreDeviceHandle_t handle,
int *deviceID);
megcoreStatus_t megcoreGetMemAlignment(megcoreDeviceHandle_t handle,
size_t *memAlignmentInBytes);
megcoreStatus_t megcoreGetDeviceFlags(
megcoreDeviceHandle_t handle,
unsigned int *flags);
megcoreStatus_t megcoreActivate(megcoreDeviceHandle_t handle);
megcoreStatus_t megcoreMalloc(megcoreDeviceHandle_t handle,
void **devPtr, size_t sizeInBytes);
megcoreStatus_t megcoreFree(megcoreDeviceHandle_t handle,
void *devPtr);
/**
* \brief Layer 2: computing handle
*/
struct megcoreComputingContext;
typedef struct megcoreComputingContext *megcoreComputingHandle_t;
megcoreStatus_t megcoreCreateComputingHandle(
megcoreComputingHandle_t *compHandle,
megcoreDeviceHandle_t devHandle,
unsigned int flags = 0);
megcoreStatus_t megcoreCreateComputingHandleWithCPUDispatcher(
megcoreComputingHandle_t *compHandle,
megcoreDeviceHandle_t devHandle,
const std::shared_ptr<MegcoreCPUDispatcher>& dispatcher,
unsigned int flags = 0);
megcoreStatus_t megcoreDestroyComputingHandle(
megcoreComputingHandle_t handle);
megcoreStatus_t megcoreGetDeviceHandle(
megcoreComputingHandle_t compHandle,
megcoreDeviceHandle_t *devHandle);
megcoreStatus_t megcoreGetComputingFlags(
megcoreComputingHandle_t handle,
unsigned int *flags);
MegcoreCPUDispatcher* megcoreGetCPUDispatcher(megcoreComputingHandle_t handle);
megcoreStatus_t megcoreMemcpy(
megcoreComputingHandle_t handle,
void *dst, const void *src, size_t sizeInBytes,
megcoreMemcpyKind_t kind);
megcoreStatus_t megcoreMemset(
megcoreComputingHandle_t handle,
void *dst, int value, size_t sizeInBytes);
megcoreStatus_t megcoreSynchronize(megcoreComputingHandle_t handle);
/**
* \brief Miscellaneous
*/
const char *megcoreGetErrorName(megcoreStatus_t status);
#include "megdnn/internal/visibility_epilogue.h"
// vim: syntax=cpp.doxygen
/**
* \file dnn/include/megcore_cdefs.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include <stdint.h>
/**
* \brief MegCore platform types
*/
typedef enum {
megcorePlatformCPU = 1,
megcorePlatformCUDA = 4,
} megcorePlatform_t;
/**
* \brief MegCore return codes
*
* Note: since MegCore has been merged into MegDNN and uses C++ API with
* exception, this return status only serves for backward compatibility and all
* API would return megcoreSuccess
*/
typedef enum {
megcoreSuccess = 0,
megcoreErrorMemoryAllocation = 1,
megcoreErrorInvalidArgument = 2,
megcoreErrorInvalidDeviceHandle = 3,
megcoreErrorInvalidComputingHandle = 4,
megcoreErrorInternalError = 5,
} megcoreStatus_t;
/**
* \brief Memcpy kind
*/
typedef enum {
megcoreMemcpyHostToDevice = 1,
megcoreMemcpyDeviceToHost = 2,
megcoreMemcpyDeviceToDevice = 3,
} megcoreMemcpyKind_t;
namespace megcore {
/*!
* \brief error reporting from asynchronous execution devices
*
* This is currently used by CUDA kernels. It is used to report errors that
* depend on input data.
*/
struct AsyncErrorInfo {
//! number of errors occurred; only detailed information of the first error
//! would be recorded
uint32_t nr_error;
//! tracker set by set_error_tracker()
void* tracker_ptr;
//! human readable message; it can contain %d which would be replaced by
//! msg_args
char msg[228];
int msg_args[4];
};
} // namespace megcore
// vim: syntax=cpp.doxygen
/**
* \file dnn/include/megcore_cuda.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "./megcore.h"
#include <cuda_runtime_api.h>
#include "megdnn/internal/visibility_prologue.h"
namespace megcore {
struct CudaContext {
cudaStream_t stream = nullptr;
//! device pointer to buffer for error reporting from kernels
AsyncErrorInfo* error_info = nullptr;
CudaContext() = default;
CudaContext(cudaStream_t s, AsyncErrorInfo* e) : stream{s}, error_info{e} {}
};
megcoreStatus_t createComputingHandleWithCUDAContext(
megcoreComputingHandle_t* compHandle, megcoreDeviceHandle_t devHandle,
unsigned int flags, const CudaContext& ctx);
megcoreStatus_t getCUDAContext(megcoreComputingHandle_t handle,
CudaContext* ctx);
} // namespace megcore
static inline megcoreStatus_t megcoreCreateComputingHandleWithCUDAStream(
megcoreComputingHandle_t* compHandle, megcoreDeviceHandle_t devHandle,
unsigned int flags, cudaStream_t stream) {
megcore::CudaContext ctx;
ctx.stream = stream;
return megcore::createComputingHandleWithCUDAContext(compHandle, devHandle,
flags, ctx);
}
static inline megcoreStatus_t megcoreGetCUDAStream(
megcoreComputingHandle_t handle, cudaStream_t* stream) {
megcore::CudaContext ctx;
auto ret = megcore::getCUDAContext(handle, &ctx);
*stream = ctx.stream;
return ret;
}
#include "megdnn/internal/visibility_epilogue.h"
// vim: syntax=cpp.doxygen
/**
* \file dnn/include/megdnn.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/version.h"
#include "megdnn/oprs.h"
// vim: syntax=cpp.doxygen
/**
* \file dnn/include/megdnn/arch.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
// include general build configurations
#include "megdnn/config/config.h"
#if defined(__GNUC__) || defined(__clang__)
#if !defined (__clang__)
// gcc specific
#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#if GCC_VERSION < 40800
#error "GCC version should be at least 4.8.0."
#endif // GCC_VERSION < 40800
#endif // !defined(__clang__)
#ifndef megdnn_trap
#define megdnn_trap() __builtin_trap()
#endif
#define megdnn_likely(v) __builtin_expect(bool(v), 1)
#define megdnn_unlikely(v) __builtin_expect(bool(v), 0)
#define MEGDNN_DEPRECATED __attribute__((deprecated))
#define MEGDNN_PACKED __attribute__((packed))
#define MEGDNN_CONSTEXPR constexpr
#define MEGDNN_NOEXCEPT noexcept
#define MEGDNN_STATIC_ASSERT static_assert
#define MEGDNN_FINAL final
#define MEGDNN_NORETURN __attribute__((noreturn))
#define MEGDNN_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
#define MEGDNN_ATTRIBUTE_TARGET(simd) __attribute__((target(simd)))
#if defined(__clang_major__) && (__clang_major__ >= 7)
#define MEGDNN_LAMBDA_ATTRIBUTE_TARGET(simd) __attribute__((target(simd)))
#else
#define MEGDNN_LAMBDA_ATTRIBUTE_TARGET(simd) [[gnu::target(simd)]]
#endif
#define MEGDNN_NOINLINE __attribute__((noinline))
#define megdnn_isatty(x) isatty(x)
#elif defined(__INTEL_COMPILER) || defined(_MSC_VER)
#ifndef megdnn_trap
#define megdnn_trap() __debugbreak()
#endif
#define megdnn_likely(v) (bool(v))
#define megdnn_unlikely(v) (bool(v))
#define MEGDNN_DEPRECATED
#define MEGDNN_PACKED
#define MEGDNN_CONSTEXPR constexpr
#define MEGDNN_NOEXCEPT noexcept
#define MEGDNN_STATIC_ASSERT static_assert
#define MEGDNN_FINAL final
#if defined(_MSC_VER)
#define MEGDNN_NORETURN __declspec(noreturn)
#define MEGDNN_NOINLINE __declspec(noinline)
#else
#define MEGDNN_NORETURN
#define MEGDNN_FORCE_NOINLINE
#endif // _MSC_VER
#define MEGDNN_WARN_UNUSED_RESULT
#define megdnn_isatty(x) _isatty(x)
#else
#error "unknown compiler"
#endif // __GNUC__
// __cpp_exceptions and __cpp_rtti is referred from
// https://isocpp.org/std/standing-documentssd-6-sg10-feature-test-recommendations
// gcc < 5 does not define __cpp_exceptions but __EXCEPTIONS,
// similar for __GXX_RTTI
// _CPPUNWIND and _CPPRTTI is used by MSVC, see
// https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macrosview=vs-2019
#ifndef MEGDNN_ENABLE_EXCEPTIONS
#if __cpp_exceptions || __EXCEPTIONS || \
(defined(_MSC_VER) && defined(_CPPUNWIND))
#define MEGDNN_ENABLE_EXCEPTIONS 1
#else
#define MEGDNN_ENABLE_EXCEPTIONS 0
#endif
#endif
#ifndef MEGDNN_ENABLE_RTTI
#if __cpp_rtti || __GXX_RTTI || (defined(_MSC_VER) && defined(_CPPRTTI))
#define MEGDNN_ENABLE_RTTI 1
#else
#define MEGDNN_ENABLE_RTTI 0
#endif
#endif
#ifdef __CUDACC__
#define MEGDNN_CC_CUDA 1
#undef MEGDNN_CONSTEXPR
#define MEGDNN_CONSTEXPR const
#if defined(__CUDACC_VER_MAJOR__)
#if __CUDACC_VER_MAJOR__ >= 9
#undef MEGDNN_STATIC_ASSERT
#define MEGDNN_STATIC_ASSERT(cond, msg) static_assert(cond, msg);
#else
#undef MEGDNN_STATIC_ASSERT
#define MEGDNN_STATIC_ASSERT(cond, msg)
#endif
#endif
#define nullptr NULL
#undef MEGDNN_FINAL
#define MEGDNN_FINAL
#elif defined(__HIPCC__)
#define MEGDNN_CC_CUDA 1
#else
#define MEGDNN_CC_HOST 1
#endif // __CUDACC__
// MEGDNN_HOST and MEGDNN_DEVICE
#if MEGDNN_CC_CUDA
#define MEGDNN_HOST __host__
#define MEGDNN_DEVICE __device__
#else
#define MEGDNN_HOST
#define MEGDNN_DEVICE
#endif
// vim: syntax=cpp.doxygen
/**
* \file dnn/include/megdnn/basic_types.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/arch.h"
#include "megdnn/dtype.h"
#include "megdnn/internal/defs.h"
#if MEGDNN_CC_HOST
#include <string>
#include <type_traits>
#include <vector>
#include <cstdarg>
#include "megdnn/thin/small_vector.h"
#endif // MEGDNN_CC_HOST
#include "megdnn/internal/visibility_prologue.h"
namespace megdnn {
class ErrorHandler {
#if MEGDNN_CC_HOST
static ErrorHandler* sm_inst;
static ErrorHandler* inst();
protected:
MEGDNN_NORETURN virtual void do_on_megdnn_error(const std::string& msg) = 0;
MEGDNN_NORETURN virtual void do_on_tensor_reshape_error(
const std::string& msg) {
on_megdnn_error(msg);
}
~ErrorHandler() = default;
#endif
public:
//! called on general megdnn error
MEGDNN_NORETURN static void on_megdnn_error(const char* msg);
//! called on tensor reshape error
MEGDNN_NORETURN static void on_tensor_reshape_error(const char* msg);
#if MEGDNN_CC_HOST
MEGDNN_NORETURN static void on_megdnn_error(const std::string& msg);
MEGDNN_NORETURN static void on_tensor_reshape_error(const std::string& msg);
/*!
* \brief set the global error handler instance
*
* This method is not thread-safe. The caller is responsible to ensure the
* ErrorHandler is a global object with enough life span.
*
* \return original error handler
*/
static void set_handler(ErrorHandler* handler);
#endif // MEGDNN_CC_HOST
};
#if MEGDNN_CC_HOST
enum class LogLevel { DEBUG, INFO, WARN, ERROR };
typedef void (*LogHandler)(LogLevel level, const char* file, const char* func,
int line, const char* fmt, va_list ap);
/*!
* \brief set the callback to receive all log messages
*
* Note: the log handler can be NULL (which is also the default value). In this
* case, no log message would be recorded.
*
* \return original log handler
*/
LogHandler set_log_handler(LogHandler handler);
#endif
/**
* \brief Describing the tensor shape.
*
* Uninitialized shape: ndim == 0; total_nr_elems() is also defined to be 0
*
* Empty shape: ndim > 0 && shape[i] == 0 for 0 <= i < ndim; it is always
* considered non-contiguous.
*/
struct TensorShape {
static MEGDNN_CONSTEXPR size_t MAX_NDIM = MEGDNN_MAX_NDIM;
#if MEGDNN_CC_HOST
size_t shape[MAX_NDIM], ndim = 0;
#else
size_t shape[MAX_NDIM], ndim;
#endif
#if MEGDNN_CC_HOST
TensorShape() = default;
TensorShape(const TensorShape& rhs) = default;
TensorShape(const SmallVector<size_t>& init_shape);
TensorShape(std::initializer_list<size_t> init_shape);
std::string to_string() const;
#endif
//! total number of elements
size_t total_nr_elems() const;
//! check whether two shapes are equal
bool eq_shape(const TensorShape& rhs) const;
//! check whether the shape can be treated as a scalar
bool is_scalar() const { return ndim == 1 && shape[0] == 1; }
//! check whether ndim != 0 and at least one shape is 0
bool is_empty() const;
//! access single element, without boundary check
size_t& operator[](size_t i) { return shape[i]; }
size_t operator[](size_t i) const { return shape[i]; }
};
class Handle;
/**
* \brief Describing the tensor shape with its actual layout in memory and dtype
*
* x(i, j, ...) is stored at offset
* stride[0]*i + stride[1]*j + ..., in number of elements; physical offset needs
* to be multiplied by dtype size.
*/
struct TensorLayout : public TensorShape {
/*!
* \brief Describes min and max offsets of tensor elements with respect to
* its first element, so all tensor elements are guaranteed to be in
* the range [elem[0]+low, elem[0]+high).
*/
struct Span {
ptrdiff_t low_elem, low_byte;
size_t high_elem, high_byte;
Span(ptrdiff_t low_elem, ptrdiff_t low_byte, size_t high_elem,
size_t high_byte)
: low_elem(low_elem),
low_byte(low_byte),
high_elem(high_elem),
high_byte(high_byte) {}
size_t dist_elem() const { return high_elem - low_elem; }
size_t dist_byte() const { return high_byte - low_byte; }
};
/*!
* \brief Describing the requirements for tensor layouts
*
* Some runtime (e.g. opencl) may have alignment requirements for special
* memory types (e.g. image in texture memory). Format objects can be used
* to impose such constraints on methods related to tensor strides.
*
* Note that ImplBase is defined in tensor_format.h
*/
class Format {
public:
class ImplBase;
#if MEGDNN_CC_HOST
Format();
const ImplBase* impl() const { return m_impl; }
enum class Type;
//! get impl type; defined in tensor_format.h
inline Type type() const;
//! convert to the implementation class; exception would be raised if
//! type mismatches
template <class Impl>
const Impl& as_impl() const {
static_assert(std::is_base_of<ImplBase, Impl>::value, "bad type");
if (type() != Impl::TYPE) {
on_bad_cvt(Impl::TYPE);
}
return *static_cast<const Impl*>(m_impl);
}
//! get human-readable string description of this format
std::string to_string() const;
std::string serialize() const;
static Format deserialize(const std::string& bin, const Handle* handle);
//! whether this is the default tensor format
bool is_default() const;
bool operator==(Format rhs) const { return m_impl == rhs.m_impl; }
bool operator!=(Format rhs) const { return m_impl != rhs.m_impl; }
#endif
private:
const ImplBase* m_impl;
#if MEGDNN_CC_HOST
Format(ImplBase* impl) : m_impl{impl} {}
MEGDNN_NORETURN void on_bad_cvt(Type dst_type) const;
#endif
};
ptrdiff_t stride[MAX_NDIM];
DType dtype;
Format format;
#if MEGDNN_CC_HOST
TensorLayout();
TensorLayout(const TensorLayout& layout) = default;
//! create empty layout with given dtype
explicit TensorLayout(DType dtype_);
TensorLayout(DType dtype_, Format format);
//! create layout with given shape and contiguous stride.
TensorLayout(const TensorShape& shape, DType dtype);
TensorLayout(const TensorShape& shape, DType dtype, Format format);
//! creating layout with user-specified shape and stride.
TensorLayout(const TensorShape& shape, const std::vector<ptrdiff_t>& stride,
DType dtype);
TensorLayout(const TensorShape& shape, const std::vector<ptrdiff_t>& stride,
DType dtype, Format format);
/* =================== inplace modifiers =================== */
/*!
* \brief init stride to be contiguous
*
* Use current shape and format
*
* \return total number of elements
*/
size_t init_contiguous_stride();
/*!
* \brief init stride to be contiguous by first assigning shape
*
* Use current format.
*/
size_t init_contiguous_stride(const TensorShape& shape);
size_t init_contiguous_stride(const TensorShape& shape, Format format);
/*!
* \brief inplace version of remove_axis
*/
void remove_axis_inplace(size_t idx);
/*!
* \brief add an axis before given *axis* with given shape and stride
*
* Other shapes and strides would not be changed.
*/
void add_axis_inplace(size_t axis, size_t shape, ptrdiff_t stride);
/*!
* \brief add an axis before given *axis*, with shape 1 and contiguous
* stride
*/
void add_axis_cont_inplace(size_t axis) {
add_axis_inplace(axis, 1, stride[axis] * shape[axis]);
}
/* =================== generate new layout =================== */
/**
* \brief Returns the layout with permuted dimensions.
*
* example:
* (2, 0, 1) -> AxBxC to CxAxB
*/
TensorLayout dimshuffle(const std::vector<size_t>& dims) const
MEGDNN_WARN_UNUSED_RESULT;
/**
* \brief Remove an axis from the layout by moving later shape/stride
* elements earlier. No extra check is performed.
*/
TensorLayout remove_axis(size_t idx) const MEGDNN_WARN_UNUSED_RESULT;
/**
* \brief Returns a different view.
*
* \throw TensorReshapeError if no stride exists for target shape.
*/
TensorLayout reshape(const TensorShape& shape) const
MEGDNN_WARN_UNUSED_RESULT;
/*!
* \brief try to reshape to another view; return whether these two shapes
* are compatible
* \return true iff there exists target stride so this layout can be
* converted to target shape and the elements can match.
*/
bool try_reshape(TensorLayout& output,
const TensorShape& shape) const MEGDNN_WARN_UNUSED_RESULT;
/*!
* \brief Broadcast on dims with shape == 1 to match target *shape*.
* \throw TensorReshapeError if could not be satisfied
*/
TensorLayout broadcast(const TensorShape& shape) const
MEGDNN_WARN_UNUSED_RESULT;
/*!
* \brief Collapse consecutive axes with contiguous layout together
*
* This transforms the tensor into a canonized form. For empty tensors or
* scalar, the result would always be a one-dimensional empty or scalar,
* with stride being 1.
*/
TensorLayout collapse_contiguous() const MEGDNN_WARN_UNUSED_RESULT;
/* =================== properties =================== */
std::string to_string() const;
#endif // MEGDNN_CC_HOST
/*!
* \brief check whether the is contiguous under its format definition
*
* See is_contiguous_spec() in Format impl classes for more detail. When the
* format is default, this is equivalent to is_physical_contiguous().
*
* Note that empty tensors (i.e. with 0 shapes) are not considered as
* contiguous.
*/
bool is_contiguous() const;
//! check whether it is physically contiguous disregarding format
bool is_physical_contiguous() const;
/*!
* \brief check whether the layout is monotonous
*
* A tensor is monotonous if abs(stride[i]) >= abs(stride[i+1])*shape[i+1]
*/
bool is_abs_monotonous_allow_brdcst() const;
/*!
* \brief check whether the layout is contiguous, allowing broadcasting
*
* This checks whether the underlying storage is contiguous, where
* broadcasting is also considered to be so.
*/
bool is_contiguous_allow_brdcst() const;
/*!
* \brief if this function returns true, then no two elements can occupy the
* same memory slot
*
* Note that this test is a sufficient but not necessary condition for the
* layout being non-overlapping: when this function returns false, it is
* still possible that actually no two elements share the same memory
* location.
*/
bool is_non_overlapping_strong() const;
bool eq_layout(const TensorLayout& rhs) const;
//! get lowest and highest offset reachable from this layout
Span span() const;
};
/**
* \brief A simple encapsulation class for n-dimensional tensor.
*/
struct TensorND {
void* raw_ptr;
TensorLayout layout;
TensorND() : raw_ptr(NULL) {}
TensorND(void* raw_ptr_, const TensorLayout& layout_)
: raw_ptr(raw_ptr_), layout(layout_) {}
//! get typed pointer; type check is performed
template <typename T>
T* ptr() const {
layout.dtype.assert_is_ctype<T>();
return static_cast<T*>(raw_ptr);
}
//! get typed pointer of compatible type
template <typename T>
T* compatible_ptr() const {
layout.dtype.assert_is_compatible_ctype<T>();
return reinterpret_cast<T*>(raw_ptr);
}
};
#if MEGDNN_CC_HOST
using TensorFormat = TensorLayout::Format;
using TensorShapeArray = SmallVector<TensorShape>;
using TensorNDArray = SmallVector<TensorND>;
using TensorLayoutArray = SmallVector<TensorLayout>;
using TensorLayoutPtrArray = SmallVector<TensorLayout*>;
using TensorFormatArray = SmallVector<TensorFormat>;
#endif
/**
* \brief A struct representing workspace.
*
* It differs from TensorND in that workspace does not have a "layout" concept.
*/
struct Workspace {
dt_byte* raw_ptr;
size_t size;
Workspace() : raw_ptr(NULL), size(0) {}
Workspace(dt_byte* raw_ptr_, size_t size_)
: raw_ptr(raw_ptr_), size(size_) {}
template <typename T>
T* ptr(size_t offset_in_bytes = 0) const {
return static_cast<T*>(static_cast<void*>(raw_ptr + offset_in_bytes));
}
};
#if MEGDNN_CC_HOST
/*!
* \brief manage output and workspace memory for dynamic output oprs
*/
class DynOutMallocPolicy {
protected:
~DynOutMallocPolicy() = default;
public:
/*!
* \brief allocate an output var
* \param id output index, starting from 0
* \param dtype requested output data type
* \param shape requested output shape
* \param user_data extra user data passed in DynOutMallocPolicyCall
*/
virtual TensorND alloc_output(size_t id, DType dtype,
const TensorShape& shape,
void* user_data) = 0;
/*!
* \brief allocate workspace memory
* \param sz requested workspace in bytes
*/
virtual void* alloc_workspace(size_t sz, void* user_data) = 0;
/*!
* \brief free workspace memory
*
* Every operator should guarantee that alloc_workspace() and
* free_workspace() calls are matched
*/
virtual void free_workspace(void* ptr, void* user_data) = 0;
};
/*!
* \brief bind a DynOutMallocPolicy with arbitrary user data
*/
struct DynOutMallocPolicyCall {
DynOutMallocPolicy* policy;
void* user_data;
DynOutMallocPolicyCall(DynOutMallocPolicy* p = nullptr, void* ud = nullptr)
: policy{p}, user_data{ud} {}
TensorND alloc_output(size_t id, DType dtype, const TensorShape& shape) {
return policy->alloc_output(id, dtype, shape, user_data);
}
/*!
* \brief allocate workspace with return type conversion
* \tparam elem element type for size calculation
* \param nr_elem number of elements; allocated size is sizeof(elem) *
* nr_elem
*/
template <typename T = void, typename elem = T>
T* alloc_workspace(size_t nr_elem) {
using real_elem =
typename std::conditional<std::is_same<elem, void>::value,
uint8_t, elem>::type;
return static_cast<T*>(policy->alloc_workspace(
nr_elem * sizeof(real_elem), user_data));
}
void free_workspace(void* ptr) {
return policy->free_workspace(ptr, user_data);
}
};
#endif // MEGDNN_CC_HOST
} // namespace megdnn
#include "megdnn/internal/visibility_epilogue.h"
// vim: syntax=cpp.doxygen
/**
* \file dnn/include/megdnn/config/config.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#if !defined(__CUDACC__)
// Try to detect if no architecture flags defined.
#if !defined(MEGDNN_NAIVE) && !defined(MEGDNN_X86) && \
!defined(MEGDNN_X86_64) && !defined(MEGDNN_X86_32) && \
!defined(MEGDNN_64_BIT) && !defined(MEGDNN_MIPS) && \
!defined(MEGDNN_ARMV7) && !defined(MEGDNN_AARCH64)
#if defined(__x86_64__) || defined(_M_X64)
#define MEGDNN_X86 1
#define MEGDNN_X86_64 1
#define MEGDNN_64_BIT 1
#elif defined(__i386) || defined(_M_IX86)
#define MEGDNN_X86 1
#define MEGDNN_X86_32 1
#endif
#endif
#endif // !defined(__CUDACC__)
// vim: syntax=cpp.doxygen
/**
* \file dnn/include/megdnn/cuda.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/basic_types.h"
#include <cuda_runtime_api.h>
#include <memory>
#include "megdnn/internal/visibility_prologue.h"
namespace megdnn {