CMakeLists.txt 52.2 KB
Newer Older
1
cmake_minimum_required(VERSION 3.15.2)
2 3 4 5
message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}" )
if (NOT ${CMAKE_GENERATOR} STREQUAL "Ninja")
    message(WARNING "CMAKE_GENERATOR NOT EQUAL Ninja, which we do not recommend")
endif()
6 7 8

include (cmake/FetchMegBrainVersion.cmake)
project(MegEngine LANGUAGES C CXX VERSION ${MGB_VER_STRING})
9 10 11 12

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
13
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
14 15
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
16
set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
17

18
if(NOT MSVC AND NOT APPLE AND NOT WIN32)
19 20 21 22 23
    set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
    set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq  <TARGET> <LINK_FLAGS> <OBJECTS>")
    set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
endif()

24
include(GNUInstallDirs)
25
include(CheckCXXCompilerFlag)
26 27
include(CheckIPOSupported)

28 29 30 31 32
CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)

set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
    x86_64 i386
33
    armv7 aarch64
34 35
    naive fallback
)
36
set (MGE_EXPORT_TARGETS MegEngine-targets)
37

38 39 40 41 42 43 44 45 46 47 48 49
if(NOT "$ENV{LD_LIBRARY_PATH}" STREQUAL "")
    string(REPLACE ":" ";" ALTER_LD_LIBRARY_PATHS $ENV{LD_LIBRARY_PATH})
else()
    set(ALTER_LD_LIBRARY_PATHS "")
endif()

if(NOT "$ENV{LIBRARY_PATH}" STREQUAL "")
    string(REPLACE ":" ";" ALTER_LIBRARY_PATHS $ENV{LIBRARY_PATH})
else()
    set(ALTER_LIBRARY_PATHS "")
endif()

50
option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
M
Megvii Engine Team 已提交
51
option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
52
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
53
option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
54
option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run." OFF)
55
option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
56 57 58
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
59
option(MGE_WITH_LITE "Build MGE with lite" ON)
60
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
61 62
option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
63 64
option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." ON)
option(MGE_WITH_CUBLAS_SHARED "Build MegEngine with CUBLAS shared." OFF)
65 66
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
67
option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
68
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
69
option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
70 71 72 73 74 75
option(MGE_ENABLE_RTTI "Build with RTTI" ON)
option(MGE_ENABLE_LOGGING "Build with logging" ON)
option(MGE_DEBUG_UTIL "Enable debug utility" ON)
option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
76
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
77 78
option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
79
option(MGE_WITH_ROCM "Enable ROCM support" OFF)
80
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
81
option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
82
option(MGE_WITH_CUSTOM_OP "Build with Custom op" OFF)
83
if(MSVC OR WIN32)
84 85 86 87 88 89 90 91 92 93
    # FIXME: static link Windows vc runtime with some version from Visual Studio have
    # some runtime issue at some call PATH, for example: _imperative_rt.pyd --> megengine_shared.dll
    # for example c api flush can not find the fd args, I have no idea about this issue
    # as a Workround, dynamic link vc runtime,  but at some case, we will static link vcrt
    # when MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP/MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2, so please
    # use lite_static_all_in_one(lite/CMakeLists.txt) in Windows XP env as possible
    # How to install VC runtime if you env do not install, refer to:
    # https://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-160
    option(MGE_STATIC_LINK_WITH_VC_RUNTIME "Enable mge static link with Windows vc runtime" OFF)

94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
    option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP "Enable deploy inference on Windows xp" OFF)
    # special MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 for Windows XP sp2(32bit)
    # internal behavior:
    # 1: will force define MGB_HAVE_THREAD=0, which means only support single thread
    # 2: some Feature will be disable, eg: MGB_ENABLE_JSON and var sanity check, do
    #    not too many care this!!, if you want to use this Feature to 'DEBUG', you can
    #    run same model at NON-XP-SP2 env, eg Win7 or XP-SP3(build without MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
    # 3: we only support MegEngine(load_and_run) and MegEngineLite API work on XP SP2
    #    some debug utils, eg, megbrain_test/megdnn_test not support run, most caused by gtest src code
    # sdk caller:
    # 1: as we remove mutex, when you use MSVC self API eg CreateThread to start several MegEngine instances
    #    in the same progress, please call MegEngine API(init/run) as serial as possible, also please
    #    do not use std::thread std::mutex/std::this_thread_id at SDK caller side!!!
    # check dll/exe can deploy on Windows XP sp2 or not:
    #    please checkout scripts/misc/check_windows_xp_sp2_deploy.py
    option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 "Enable deploy inference on Windows xp sp2" OFF)

    # PE file linked by LLVM lld can not run at Windows XP env, so we force use link.exe
    # which always locate in Microsoft Visual Studio/*/*/VC/Tools/MSVC/*/bin/*/*/link.exe
113
    set(CMAKE_LINKER "link.exe")
114
    if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP OR MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
115 116 117
        set(MGE_STATIC_LINK_WITH_VC_RUNTIME ON)
        message(STATUS "Force set MGE_STATIC_LINK_WITH_VC_RUNTIME ON when build for Windows XP")

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
        if(NOT ${MGE_ARCH} STREQUAL "i386")
            message(FATAL_ERROR "only support 32bit when build for Windows xp")
        endif()

        if(NOT MGE_INFERENCE_ONLY)
            message(FATAL_ERROR "only support inference when build for Windows xp")
        endif()

        if(MGE_WITH_CUDA)
            message(FATAL_ERROR "do not support CUDA when build for Windows xp")
        endif()

        # Windows XP sp3 have thread issue, Workround for it
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
        # for Windows XP type
        add_link_options("/SUBSYSTEM:CONSOLE,5.01")
        # some old lib(for example mkl for xp) use legacy stdio, so we force link legacy_stdio_definitions
        add_link_options("/DEFAULTLIB:legacy_stdio_definitions.lib")

        if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
        endif()
    else()
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0601")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0601")
    endif()
endif()
147

148
if(MSVC OR WIN32)
149 150
    message(STATUS "windows force cudnn static link")
    set(MGE_WITH_CUDNN_SHARED OFF)
151 152
endif()

153 154 155 156 157
if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
    set(MGE_WITH_ANY_CUDA_STUB ON)
else()
    set(MGE_WITH_ANY_CUDA_STUB OFF)
endif()
158

159 160 161 162 163 164 165 166
if(MGE_WITH_MIDOUT_PROFILE)
    message(STATUS "build with MIDOUT PROFILE and force set MGE_WITH_MINIMUM_SIZE off and force rtti ON")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
    set(MGE_WITH_MINIMUM_SIZE OFF)
    set(MGE_ENABLE_RTTI ON)
    if(WIN32)
        message(FATAL_ERROR "do not support midout at WIN32")
167
    endif()
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
endif()

set(BIN_REDUCE ${PROJECT_SOURCE_DIR}/src/bin_reduce_cmake.h)
if(MGE_WITH_MINIMUM_SIZE)
    message(STATUS "build with MGE_WITH_MINIMUM_SIZE bin_reduce header is: ${BIN_REDUCE}")
    set(MGE_ENABLE_RTTI OFF)
    set(MGE_ENABLE_LOGGING OFF)
    set(MGE_ENABLE_EXCEPTIONS OFF)
    set(MGE_INFERENCE_ONLY ON)
    # MGE_WITH_MINIMUM_SIZE will triger unused-parameter
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter")
endif()

if(NOT MGE_WITH_MIDOUT_PROFILE AND NOT WIN32)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${BIN_REDUCE}")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${BIN_REDUCE}")
185 186
endif()

187 188 189 190 191 192 193 194 195
if (NOT APPLE)
    # check CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT on APPLE will leak cmake crash
    CHECK_CXX_COMPILER_FLAG("-ffunction-sections -fdata-sections  -Wl,--gc-sections"  CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
    if(CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections")
        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections")
    endif()
196 197 198
endif()

check_ipo_supported(RESULT IS_LTO_SUPPORT OUTPUT output_info)
199 200
# LLVM on Windows report support LTO, but do not support -flto=full at link stage
if(IS_LTO_SUPPORT AND NOT WIN32)
201
    message(STATUS "lto is supported in this compiler")
202 203
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
204 205
else()
    message(STATUS "lto is not supported in this compiler")
206 207
endif()

208 209
if (APPLE)
    set (BUILD_SHARED_LIBS OFF)
210
    message(STATUS "build static for xcode framework require")
211 212
endif()

213 214 215 216 217 218 219
if (MGE_USE_SYSTEM_LIB)
    set (MGE_CUDA_USE_STATIC OFF)
endif()

if (MGB_WITH_FLATBUFFERS)
    set(MGB_ENABLE_FBS_SERIALIZATION ON)
endif()
220

221
if(CMAKE_TOOLCHAIN_FILE)
222 223
    message(STATUS "We are cross compiling.")
    message(STATUS "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
    set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
    if(ANDROID_TOOLCHAIN_ROOT)
        if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
            set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
        endif()
        if(${ANDROID_ARCH} STREQUAL "arm")
            set(MGE_ARCH "armv7")
        elseif(${ANDROID_ARCH} STREQUAL "arm64")
            set(MGE_ARCH "aarch64")
        else()
            message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
        endif()
    elseif(IOS_TOOLCHAIN_ROOT)
        if(${IOS_ARCH} STREQUAL "armv7")
            set(MGE_ARCH "armv7")
        elseif(${IOS_ARCH} STREQUAL "arm64")
            set(MGE_ARCH "aarch64")
        elseif(${IOS_ARCH} STREQUAL "armv7k")
            set(MGE_ARCH "armv7")
        elseif(${IOS_ARCH} STREQUAL "arm64e")
            set(MGE_ARCH "aarch64")
        elseif(${IOS_ARCH} STREQUAL "armv7s")
            set(MGE_ARCH "armv7")
        else()
            message(FATAL_ERROR "Unsupported IOS_ARCH.")
        endif()
250 251
    elseif(RISCV_TOOLCHAIN_ROOT)
        set(MGE_ARCH "riscv64")
252 253 254 255 256
    elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
        set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
    else()
        message(FATAL_ERROR "Unknown cross-compiling settings.")
    endif()
257
    message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
258
endif()
259 260

if(${MGE_ARCH} STREQUAL "AUTO")
261
    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
262 263 264
        set(MGE_ARCH "x86_64")
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
        set(MGE_ARCH "i386")
265 266 267 268
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
        set(MGE_ARCH "aarch64")
    elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
        set(MGE_ARCH "armv7")
269
    else()
L
llehtahw 已提交
270
        message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
271 272 273
    endif()
endif()

274 275 276 277 278
if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
    message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
    set(CMAKE_BUILD_TYPE RelWithDebInfo)
endif()

279
if(${CMAKE_BUILD_TYPE} STREQUAL "Release" AND NOT MGE_WITH_TEST AND NOT ${MGE_ARCH} STREQUAL "x86_64" AND NOT MGE_WITH_MIDOUT_PROFILE)
280
    set(MGE_ENABLE_RTTI OFF)
281
    message(STATUS "disable MGE_ENABLE_RTTI when Release/NON-x86_64/NON-MGE_WITH_MIDOUT_PROFILE mode!!")
282 283
endif()

284
if(MSVC OR WIN32)
285 286
    # for cmake after 3.15.2
    cmake_policy(SET CMP0091 NEW)
287
    set(CMAKE_OBJECT_PATH_MAX 300)
288 289 290 291 292 293 294 295 296 297
    if(MGE_BUILD_WITH_ASAN)
        set(MGE_STATIC_LINK_WITH_VC_RUNTIME ON)
        message(STATUS "Force set MGE_STATIC_LINK_WITH_VC_RUNTIME ON when build for Windows MGE_BUILD_WITH_ASAN")
    endif()
    if(MGE_STATIC_LINK_WITH_VC_RUNTIME)
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
            set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
        else()
            set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
        endif()
298
    else()
299 300 301 302 303
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
            set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL")
        else()
            set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL")
        endif()
304 305
    endif()

306
    add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
307 308
    message(STATUS "into windows build CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
    if (NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
309 310
        message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
    endif()
311 312 313
    # on windows need append VS_PATH/VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows
    # and VS_PATH/VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows to PATH env
    if (MGE_BUILD_WITH_ASAN)
314 315 316 317 318
        message(WARNING "please do (set)export ASAN_OPTIONS=windows_hook_rtl_allocators=true when run test after build finish, caused by we link asan dll!!")
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
            message(WARNING "Windows AddressSanitizer doesn't support linking with debug runtime libraries yet, which means do not support CMAKE_BUILD_TYPE=Debug")
            message(FATAL_ERROR "Please build with RelWithDebInfo or Release by : EXTRA_CMAKE_ARGS=\"-DMGE_BUILD_WITH_ASAN=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo ...\"")
        endif()
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
        if("$ENV{VS_PATH}" STREQUAL "")
            message(FATAL_ERROR "can not find VS_PATH, please export Visual Studio root dir to VS_PATH env")
        endif()
        if(${MGE_ARCH} STREQUAL "x86_64")
            set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-x86_64.lib")
            set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-x86_64")
            set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
        elseif(${MGE_ARCH} STREQUAL "i386")
            set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-i386.lib")
            set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-i386.lib")
            set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
        else()
            message(FATAL_ERROR "unsupport asan ARCH: ${MGE_ARCH} on Windows")
        endif()
        find_path(ASAN_DLL_PATH
            NAMES ${WINDOWS_ASAN_DLL_NAME}
            HINTS $ENV{VS_PATH}
            PATH_SUFFIXES ${WINDOWS_ASAN_PATH_SUFFIXES}
            DOC "Windows asan library path" )
        if(ASAN_DLL_PATH STREQUAL "ASAN_DLL_PATH-NOTFOUND")
            message(FATAL_ERROR "can not find asan dll, please upgrade you LLVM")
        endif()

        message(STATUS "Windows asan dll path: ${ASAN_DLL_PATH}")
        link_directories(${ASAN_DLL_PATH})
        link_libraries(${WINDOWS_ASAN_DLL_NAME})
        link_libraries(${WINDOWS_ASAN_RUNTIME_THUNK_NAME})
        set(WIN_FLAGS "/Od -DNDEBUG -fsanitize=address")
347 348 349 350 351 352
        # windows Llvm asan do not take effect when /O2
        # RELWITHDEBINFO default value is /O2, so override it
        set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_C_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_CXX_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
353 354 355
    else()
        set(WIN_FLAGS "/O2")
    endif()
356
    # add flags for enable sse instruction optimize for X86, enable avx header to compile avx code
357
    set(WIN_FLAGS "${WIN_FLAGS} -msse4.2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
358 359 360 361 362 363 364 365 366
    # if u CPU is cascadelake series, u can enable for performance
    # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
    # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")

    # for windows build
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated  -Wno-error=documentation  -Wno-error=unreachable-code-break")
367
    set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /wd4819")
368 369 370 371 372
    set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
    set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")

    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
373 374

    #FIXME: fix halide JIT on windows
375
    message(STATUS "disable jit, halide and mlir on windows host build...")
376 377
    set(MGE_WITH_HALIDE OFF)
    set(MGE_WITH_JIT OFF)
M
Megvii Engine Team 已提交
378
    set(MGE_WITH_JIT_MLIR OFF)
379
    #FIXME: fix MegRay on windows
380
    message(STATUS "Disable distributed build on windows host build...")
381
    set(MGE_WITH_DISTRIBUTED OFF)
382 383
else()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
384

385 386 387 388
    # NONE windows DEBUG general flags
    if(MGE_BUILD_WITH_ASAN)
        set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
        set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
389
    else()
390 391 392 393 394 395 396 397 398 399 400 401
        set(CMAKE_C_FLAGS_DEBUG "-O0 -g")
        set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
    endif()

    # NONE windows opt general flags
    if (MGE_BUILD_WITH_ASAN)
        set(OPTIMIZE_LEVEL "-g -O0 -DNDEBUG -fsanitize=address -fno-omit-frame-pointer")
    elseif(ANDROID)
        set(OPTIMIZE_LEVEL "-g -Ofast -DNDEBUG")
    else()
        set(OPTIMIZE_LEVEL "-g -O3 -DNDEBUG")
    endif()
402 403 404 405 406 407 408 409
    #remove finite-math-only opt from Ofast, caused by clang have a different
    #runtime finite math logic, this issue do not find at g++, but as a unity
    #build flags, we force add -fno-finite-math-only when compiler support
    CHECK_CXX_COMPILER_FLAG("-fno-finite-math-only"  CXX_NO_FINITE_MATH_ONLY_SUPPORT)
    if(CXX_NO_FINITE_MATH_ONLY_SUPPORT)
        message(STATUS "force add -fno-finite-math-only for this compiler")
        set(OPTIMIZE_LEVEL "${OPTIMIZE_LEVEL} -fno-finite-math-only")
    endif()
410 411 412 413 414 415 416 417 418 419 420
    set(CMAKE_C_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
    set(CMAKE_CXX_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
    set(CMAKE_C_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
    #some gnu(gcc) compiler use -static -libasan have runtime issue
    #also, when target is big, clang ld will take a long long long
    #time when use -static-libsan, so we use dynamic asan by default
    #ANDROID asan.so depends on log, so broadcast log link_libraries
    #for megengine depends target, for example flatc target
    if (MGE_BUILD_WITH_ASAN AND ANDROID)
        link_libraries(log)
421 422 423
    endif()
endif()

424 425 426 427 428 429 430
if(MGE_WITH_CUDA)
include(cmake/cudnn.cmake)
    if(MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
        message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
        set(MGE_WITH_LARGE_ARCHIVE ON)
    endif()
endif()
431
CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
432 433 434 435
if(MGE_WITH_LARGE_ARCHIVE)
    message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
    set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
elseif(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32 AND NOT MGE_WITH_LARGE_ARCHIVE)
436
    message(STATUS "Using GNU gold linker.")
437
    set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")    
438
endif()
439 440 441
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
442

443 444 445 446 447
if(NOT MGE_WITH_JIT)
    if(MGE_WITH_HALIDE)
        message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
        set(MGE_WITH_HALIDE OFF)
    endif()
M
Megvii Engine Team 已提交
448 449 450 451 452 453 454 455 456
    if(MGE_WITH_JIT_MLIR)
        message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
        set(MGE_WITH_JIT_MLIR OFF)
    endif()
endif()

# FIXME At present, there are some conflicts between the LLVM that halide
# depends on and the LLVM that MLIR depends on. Should be fixed in subsequent
# versions.
457 458
if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
    message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
459
endif()
460 461
if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
    message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
462 463
endif()

464
if(MGE_WITH_CUDA)
465 466 467 468 469 470 471
    # FIXME: check_language(CUDA) failed when sbsa mode!
    # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676
    if(CMAKE_TOOLCHAIN_FILE)
        set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
        message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!")
    endif()

472 473
    include(CheckLanguage)
    check_language(CUDA)
474
    if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
475 476
        message(FATAL_ERROR "CUDA compiler not found in PATH")
    endif()
477 478 479 480 481 482 483

    # remove this after CMAKE fix nvcc sbsa
    if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
        set(CMAKE_CUDA_COMPILER "nvcc")
        message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
    endif()

484 485 486 487 488 489
    enable_language(CUDA)
    set(CMAKE_CUDA_STANDARD 14)
    set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

if(NOT MGE_WITH_CUDA)
M
Megvii Engine Team 已提交
490
    if(NOT MGE_ARCH STREQUAL "x86_64" AND NOT MGE_ARCH STREQUAL "i386")
491
        message(STATUS "Disable JIT support, as the MGE_ARCH is not X86 and CUDA is not enabled.")
M
Megvii Engine Team 已提交
492 493 494
        set(MGE_WITH_JIT OFF)
        set(MGE_WITH_JIT_MLIR OFF)
    endif()
495
    set(MGE_WITH_HALIDE OFF)
496
    message(STATUS "Disable TensorRT support, as CUDA is not enabled.")
497 498 499 500
    set(MGE_WITH_TRT OFF)
endif()

find_package(PythonInterp 3 REQUIRED)
501 502 503 504 505 506 507 508 509 510 511 512 513
# NOTICE: just use for target, which do not depend on python api
# PURPOSE: reuse target obj when switch python3 version
# will fallback to PYTHON_EXECUTABLE if can not find in PATH env
set(PYTHON3_IN_ENV "python3")
find_program(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
if (PYTHON3_EXECUTABLE_WITHOUT_VERSION)
    message(STATUS "use ${PYTHON3_IN_ENV} as PYTHON3_EXECUTABLE_WITHOUT_VERSION")
    set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
else()
    message(STATUS "fallback ${PYTHON_EXECUTABLE} as PYTHON3_EXECUTABLE_WITHOUT_VERSION,\
    target which depend on PYTHON3_EXECUTABLE_WITHOUT_VERSION will be rebuild when switch python3")
    set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON_EXECUTABLE})
endif()
514 515 516

set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
517 518 519 520 521 522
if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
    if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
        set_property(TARGET Threads::Threads
            PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
            "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
    endif()
523 524 525 526 527 528 529 530 531 532 533 534 535 536
endif()

set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
if(NOT CMAKE_CUDA_HOST_COMPILER)
    set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
endif()

if(NOT MGE_ENABLE_RTTI)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
endif()

if(NOT MGE_ENABLE_EXCEPTIONS)
537
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
538 539 540 541 542 543
endif()

if(MGE_WITH_TEST)
    include(cmake/gtest.cmake)
endif()

544 545
include(cmake/gflags.cmake)

546
if(MGE_BUILD_IMPERATIVE_RT)
547 548 549
    set(CMAKE_CXX_STANDARD 17)
endif()

550 551
if(NOT ${MGE_WITH_CUDA} AND NOT ${MGE_WITH_ROCM})
    message(STATUS "Disable distributed support, as both CUDA and ROCm are disabled.")
552 553 554 555
    set(MGE_WITH_DISTRIBUTED OFF)
endif()

if(MGE_INFERENCE_ONLY)
556
    message(STATUS "Disable distributed support for inference only build.")
557
    set(MGE_WITH_DISTRIBUTED OFF)
558
    message(STATUS "Disable imperative_rt python module for inference only build.")
559
    set(MGE_BUILD_IMPERATIVE_RT OFF)
560 561
endif()

562
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
M
Megvii Engine Team 已提交
563 564 565
    include(cmake/llvm-project.cmake)
endif()

566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
if(MGE_WITH_DISTRIBUTED)
    include(cmake/protobuf.cmake)
    include(cmake/zmq.cmake)
endif()

if(MGB_WITH_FLATBUFFERS)
    include(cmake/flatbuffers.cmake)
endif()

if(MGE_WITH_CUDA)
    include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
    foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
        get_filename_component(_NAME ${path} NAME)
        if(NOT ${_NAME} STREQUAL "stubs")
            list(APPEND CUDA_LINK_DIRECTORIES ${path})
        endif()
    endforeach()
    link_directories(${CUDA_LINK_DIRECTORIES})

    set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
    set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
    set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
    set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
589 590
    if(MSVC OR WIN32)
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
591
        set(CCBIN_FLAG "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068 /std:c++14")
592
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
593 594 595 596 597 598
            set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
        endif()
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
    else()
        set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
    endif()
599 600 601 602 603

    if(NOT MGE_ENABLE_RTTI)
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
    endif()
    if(NOT MGE_ENABLE_EXCEPTIONS)
604
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
605 606
    endif()
    if(NOT MGE_CUDA_GENCODE)
607
        if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64")
608
            set(MEGDNN_THREADS_512 0)
609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
            if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
                message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")                
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
            else()
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
            endif()
        else()
            message(FATAL_ERROR "Unsupported CUDA host arch.")
        endif()
    else()
651
        set(MEGDNN_THREADS_512 1)
652 653 654 655 656 657 658 659
    endif()

    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
    if(MGE_WITH_TRT)
        include(cmake/tensorrt.cmake)
    endif()
    if(MGE_CUDA_USE_STATIC)
        if(MGE_WITH_TRT)
660
            if(MSVC OR WIN32)
661
                message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
662
                list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY})
663
            else()                
664 665 666 667 668
                list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive)
            endif()
            if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
                message(STATUS "handle trt myelin lib after trt7")
                list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library)
669 670 671 672 673
            endif()
        endif()
        
        if("${CUDNN_VERSION}" STREQUAL "7.5.0")
            if(MSVC OR WIN32)
674
                message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
675
                list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
676
            else()
677 678
                message(STATUS "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html")
                list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
679
            endif()
680
        else()
681 682 683 684 685 686
            if(MSVC OR WIN32)
                message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
                list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
            else()
                list(APPEND MGE_CUDA_LIBS libcudnn)
            endif()
687
        endif()
688
        if(MSVC OR WIN32)
689 690 691 692 693 694
            list(APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib)
        else()
            list(APPEND MGE_CUDA_LIBS cusolver_static curand_static culibos cudart_static cusparse_static)
        endif()
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS cublas.lib)
695
        else()
696 697 698 699 700
            if(MGE_WITH_CUBLAS_SHARED)
                list(APPEND MGE_CUDA_LIBS cublas)
            else()
                list(APPEND MGE_CUDA_LIBS cublas_static)
            endif()
701
        endif()
702
        if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
703 704 705
            if(MSVC OR WIN32)
                list(APPEND MGE_CUDA_LIBS cublasLt.lib)
            else()
706 707 708
                if(MGE_WITH_CUBLAS_SHARED)
                    list(APPEND MGE_CUDA_LIBS cublasLt)
                else()
709
                    list(APPEND MGE_CUDA_LIBS cublasLt_static culibos)
710
                endif()
711
            endif()
712
        endif()
713
        if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") AND NOT MSVC AND NOT WIN32)
714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
            # mark all symbols from liblapack_static.a as weak to avoid
            # duplicated definition with mkl
            find_library(
                LAPACK_STATIC_PATH lapack_static
                HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
            if(NOT LAPACK_STATIC_PATH)
                message(FATAL_ERROR "liblapack_static.a not found")
            endif()
            set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)

            # add a target that run objcopy
            add_custom_command(
                OUTPUT ${LAPACK_STATIC_COPY_PATH}
                COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
                VERBATIM)
            add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})

            # create a library named "lapack_static_weak"
            add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
            add_dependencies(lapack_static_weak lapack_static_weak_target)
            set_target_properties(
                lapack_static_weak PROPERTIES
                IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
            list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
        endif()
    else()
        if(MGE_WITH_TRT)
            list(APPEND MGE_CUDA_LIBS libnvinfer)
742 743 744 745
            if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
                message(STATUS "handle trt myelin lib after trt7")
                list(APPEND MGE_CUDA_LIBS libmyelin)
            endif()
746 747 748 749 750
        endif()
        list(APPEND MGE_CUDA_LIBS libcudnn)
        if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
            list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
        endif()
751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772
        list(APPEND MGE_CUDA_LIBS cudart)
    endif()

    if(NOT MGE_WITH_CUDA_STUB)
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS cuda.lib)
        else()
            list(APPEND MGE_CUDA_LIBS cuda)
        endif()
    endif()

    if(NOT MGE_WITH_NVRTC_STUB)
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS nvrtc.lib)
        else()
            list(APPEND MGE_CUDA_LIBS nvrtc)
        endif()
    endif()

    if(MGE_WITH_ANY_CUDA_STUB)
        add_subdirectory(dnn/cuda-stub)
        list(APPEND MGE_CUDA_LIBS cuda-stub)
773 774
    endif()

775
    if(MSVC OR WIN32)
776
        list(APPEND MGE_CUDA_LIBS nvrtc.lib)
777
    else()
778
        list(APPEND MGE_CUDA_LIBS nvToolsExt)
779
    endif()
780 781
    
    set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
782 783 784 785
    if(UNIX)
        set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
    endif()

786 787
endif()

788
###########please add_subdirectory from here###############
789
if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE AND NOT MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
790 791 792 793 794 795 796 797
    option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
    if(MGE_ENABLE_CPUINFO)
        message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
        add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
        include(cmake/cpuinfo.cmake)
    endif()
endif()

798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
if(MGE_WITH_CAMBRICON)
    include_directories("$ENV{NEUWARE_HOME}/include")
    link_directories("$ENV{NEUWARE_HOME}/lib64")
    include(cmake/FindBANG/FindBANG.cmake)
    if (${MGE_MLU_ARCH} STREQUAL "MLU100")
        set(BANG_ARCH "100")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU1h8")
        set(BANG_ARCH "110")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU220")
        set(BANG_ARCH "220")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU270")
        set(BANG_ARCH "270")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU290")
        set(BANG_ARCH "290")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU200")
        set(BANG_ARCH "200")
    else()
        message (FATAL_ERROR "Unsupported MLU arch.")
    endif()
    set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} --bang-mlu-arch=${MGE_MLU_ARCH}")
    set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -std=c++11 -Werror")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__BANG_ARCH__=${BANG_ARCH}")
    if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O0")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "Release")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O3")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "MinSizeRel")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -Os")
    endif()
    include(cmake/cnrt.cmake)
    include(cmake/cndev.cmake)
    include(cmake/cnml.cmake)
    list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml)
    set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
endif()

836 837 838 839
if (MGE_WITH_ROCM)
    include(cmake/rocm.cmake)
endif ()

840
if(MGE_WITH_ATLAS)
841 842
    add_subdirectory(dnn/atlas-stub)
    list(APPEND MGE_ATLAS_LIBS atlas-stub)
843 844 845
    set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
    set(MGB_ATLAS ${MGE_WITH_ATLAS})
endif()
846

847 848 849 850
find_program(CCACHE_BIN ccache)
if(CCACHE_BIN)
    set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
    if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
851
        message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870
        set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
    endif()
endif()

if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
    if(${MGE_BLAS} STREQUAL "MKL")
        include(cmake/mkl.cmake)
        set(MGE_BLAS_LIBS libmkl)
    elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
        include(cmake/OpenBLAS.cmake)
        set(MGE_BLAS_LIBS libopenblas)
    else()
        message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
    endif()
endif()

# MKLDNN build
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
    include(cmake/MKL_DNN.cmake)
871
    set(MEGDNN_X86_WITH_MKL_DNN 1)
872 873
endif()

874 875
# RTTI
if(MGE_ENABLE_RTTI)
876 877
    set(MEGDNN_ENABLE_MANGLING 0)
    set(MEGDNN_ENABLE_RTTI 1)
878
else()
879 880
    set(MEGDNN_ENABLE_MANGLING 1)
    set(MEGDNN_ENABLE_RTTI 0)
881 882
endif()
set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
883

884 885
# Logging
set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
886
set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
887 888 889
set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})

# Exception
890
if(NOT MGE_ENABLE_EXCEPTIONS)
891 892 893
    message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
endif()
set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
894
set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
895 896 897 898 899 900

# JIT
if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
    set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
    include(cmake/Halide.cmake)
endif()
901

902
# Thread
903 904 905 906 907 908
IF(APPLE)
    set(CMAKE_THREAD_LIBS_INIT "-lpthread")
    set(CMAKE_HAVE_THREADS_LIBRARY 1)
    set(CMAKE_USE_WIN32_THREADS_INIT 0)
    set(CMAKE_USE_PTHREADS_INIT 1)
    set(THREADS_PREFER_PTHREAD_FLAG ON)
909
    message(STATUS "disable jit, halide and mlir on macos host build...")
M
Megvii Engine Team 已提交
910 911 912
    set(MGE_WITH_HALIDE OFF)
    set(MGE_WITH_JIT OFF)
    set(MGE_WITH_JIT_MLIR OFF)
913 914
ENDIF()

M
Megvii Engine Team 已提交
915 916 917
set(MGB_JIT ${MGE_WITH_JIT})
set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
918 919 920 921 922
# for consumer override MGB_C_OPR_INIT_FUNC symbol interface
if(NOT "${CUSTOM_C_OPR_INIT_FUNC}" STREQUAL "")
    add_compile_definitions(MGB_C_OPR_INIT_FUNC=${CUSTOM_C_OPR_INIT_FUNC})
    message(STATUS "override MGB_C_OPR_INIT_FUNC to ${CUSTOM_C_OPR_INIT_FUNC}")
endif()
M
Megvii Engine Team 已提交
923

924 925
set(MGB_CUSTOM_OP ${MGE_WITH_CUSTOM_OP})

926 927 928 929 930 931 932
if(MSVC OR WIN32)
    set(CMAKE_HAVE_THREADS_LIBRARY 1)
    set(CMAKE_USE_WIN32_THREADS_INIT 1)
    set(CMAKE_USE_PTHREADS_INIT 1)
    set(THREADS_PREFER_PTHREAD_FLAG ON)
endif()

933
if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT OR ANDROID)
934 935
    set(MGB_HAVE_THREAD 1)
endif()
936

937 938 939 940 941 942 943 944
if(MSVC OR WIN32)
    if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
        message(STATUS "disable MGB_HAVE_THREAD/MGB_ENABLE_JSON when DEPLOY ON XP SP2")
        set(MGB_HAVE_THREAD 0)
        set(MGB_ENABLE_JSON 0)
    endif()
endif()

945 946 947 948 949
if(MGE_WITH_TEST)
    # use intra-op multi threads
    set(MEGDNN_ENABLE_MULTI_THREADS 1)
endif()

950 951
# CUDA
set(MGB_CUDA ${MGE_WITH_CUDA})
952 953
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})

954 955 956 957
#ROCM
set(MGB_ROCM ${MGE_WITH_ROCM})
set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})

958 959 960 961
# CAMBRICON
set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})

962 963 964
# ENFLAME
set(MGB_ENFLAME ${MGE_WITH_ENFLAME})
set(MEGDNN_WITH_ENFLAME ${MGE_WITH_ENFLAME})
965

966 967 968 969 970 971 972
# Debug info
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
    set(MGB_ASSERT_LOC 1)
    set(MGB_ENABLE_DEBUG_UTIL 1)
else()
    set(MGB_ASSERT_LOC 0)
    set(MGB_ENABLE_DEBUG_UTIL 0)
973 974
endif()

975 976 977 978 979 980 981
if(MSVC OR WIN32)
    if(${MGE_ARCH} STREQUAL "i386")
        set(MGB_ENABLE_DEBUG_UTIL 0)
        message(STATUS "disable MGB_ENABLE_DEBUG_UTIL at Windows i386 build")
    endif()
endif()

982 983 984 985
# TensorRT
set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})

# Inference only
986
if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
987 988
    set(MGB_ENABLE_GRAD 0)
    set(MGB_BUILD_SLIM_SERVING 1)
989
else()
990 991
    set(MGB_ENABLE_GRAD 1)
    set(MGB_BUILD_SLIM_SERVING 0)
992 993
endif()

994 995 996
# Distributed communication
set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})

997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
# MGE_ARCH related flags
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
    if(MGE_BLAS STREQUAL "MKL")
        set(MEGDNN_X86_WITH_MKL 1)
    elseif(MGE_BLAS STREQUAL "OpenBLAS")
        set(MEGDNN_X86_WITH_OPENBLAS 1)
    endif()
endif()

# Enable Naive
if(MGE_ARCH STREQUAL "naive")
    set(MEGDNN_NAIVE 1)
1009
    message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
endif()

if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
    set(MEGDNN_X86 1)
    if(MGE_ARCH STREQUAL "x86_64")
        set(MEGDNN_X86_64 1)
        set(MEGDNN_64_BIT 1)
        if(NOT MSVC)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
        endif()
    else()
        set(MEGDNN_X86_32 1)
        if(NOT MSVC)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
        endif()
    endif()
    if(NOT MSVC)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
    endif()
endif()
1030
# dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE
1031
if(NOT APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
1032 1033 1034 1035 1036 1037
    CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT)
    if(CXX_COMPILER_SUPPORT_DOT)
        message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT")
        set(MGB_ENABLE_DOT 1)
    endif()
endif()
1038

1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052
if(MGE_ARCH STREQUAL "armv7")
    # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
    if(ANDROID)
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
    endif()
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
    set (MARCH "-march=armv7-a")
    set (MEGDNN_ARMV7 1)
endif()

if(MGE_ARCH STREQUAL "aarch64")
    set(MEGDNN_AARCH64 1)
    set(MEGDNN_64_BIT 1)
    set(MARCH "-march=armv8-a")
1053
    set(MGB_AARCH64 1)
1054
    if(MGE_ARMV8_2_FEATURE_FP16)
1055
        message(STATUS "Enable fp16 feature support in armv8.2")
1056 1057 1058 1059 1060 1061
        if(NOT ${MGE_DISABLE_FLOAT16})
            set(MEGDNN_ENABLE_FP16_NEON 1)
        endif()
        set(MARCH "-march=armv8.2-a+fp16")
    endif()

1062 1063 1064 1065 1066 1067 1068
    if(MGE_WITH_CUDA)
        message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
        when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
        for save link time(14min->1min), you may open below flags if not deploy on\
        arm a53 platform, or just build release type!")
        #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
    endif()
1069
endif()
1070

1071 1072 1073 1074 1075
if(MGE_ARCH STREQUAL "riscv64")
    set(MEGDNN_RISCV64 1)
    set(MEGDNN_64_BIT 1)
endif()

1076 1077
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")

1078 1079
set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")

1080
# Write out megbrain_build_config.h
1081 1082
# It defines macros needed by both megbrain and dnn
configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
1083
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
1084 1085 1086

add_subdirectory(dnn)

1087 1088 1089 1090 1091 1092
list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)

set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
add_custom_command(
1093 1094
    OUTPUT ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
    COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
1095 1096 1097 1098 1099 1100 1101 1102
    DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
    VERBATIM
)

list(APPEND MGB_OPR_PARAM_DEFS_OUTS
    ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
)

1103
install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
1104 1105 1106 1107

list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
add_library(mgb_opr_param_defs INTERFACE)
1108 1109 1110 1111 1112
target_include_directories(mgb_opr_param_defs
    INTERFACE
        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
        $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>
)
1113
add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
1114
install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
1115

1116
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
1117 1118
    # generate param_defs.td
    set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
1119
    set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
1120 1121
    set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
    set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
1122
    set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
1123 1124 1125
    file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${MGE_GENFILE_DIR})
    file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
    file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
1126
    file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
1127 1128 1129 1130
    add_custom_command(
        OUTPUT ${OPR_PARAM_DEFS_OUT}
        COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
        DEPENDS ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py ${OPR_PARAM_DEFS_SCRIPT}
1131 1132 1133 1134
        VERBATIM
    )
    # mlir tblgen sources
    set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
1135
    set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
1136 1137
    list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
    file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
1138
    add_custom_target(param_defs_tblgen DEPENDS ${OPR_PARAM_DEFS_OUT})
1139 1140
endif()

1141
if(MGE_WITH_DISTRIBUTED)
1142
    set(MEGRAY_WITH_NCCL ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
1143
    set(MEGRAY_WITH_SHM ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
1144
    set(MEGRAY_WITH_RCCL ${MGE_WITH_ROCM} CACHE BOOL "Override MegRay option" FORCE)
1145
    set(MEGRAY_CUDA_GENCODE ${MGE_CUDA_GENCODE} CACHE STRING "Overwrite MegRay CUDA -gencode specifications" FORCE)
1146 1147 1148 1149
    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
endif()

add_subdirectory(src)
1150

1151 1152
if(MGE_BUILD_IMPERATIVE_RT)
    add_subdirectory(imperative)
1153
    message(STATUS "Enable imperative python wrapper runtime")
1154 1155 1156
endif()

if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
1157
    add_subdirectory(test)
1158 1159
endif()

1160
if(TARGET _imperative_rt)
1161 1162 1163
    add_custom_target(
        develop
        COMMAND ${CMAKE_COMMAND} -E create_symlink
1164 1165
          ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
          ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
1166 1167 1168
        COMMAND ${CMAKE_COMMAND} -E create_symlink
          ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
          ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
1169
        DEPENDS _imperative_rt
1170 1171
        VERBATIM
    )
1172 1173
endif()

1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186
# Configure and install pkg-config.
# Note that unlike the Config.cmake modules, this is not relocatable (and not
# really portable) because we have two dependencies without pkg-config
# descriptions: FlatBuffers and MKL-DNN
if (MGE_USE_SYSTEM_MKLDNN)
    set (MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
endif()
if (MGE_USE_SYSTEM_OPENBLAS)
    set (MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
endif()
configure_file(cmake/megengine.pc.in
               ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
               @ONLY)
1187
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
1188 1189 1190 1191 1192 1193
        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
if (NOT MGE_WITH_DISTRIBUTED)
    include(CMakePackageConfigHelpers)
    set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
1194
    configure_package_config_file(cmake/MegEngineConfig.cmake.in
1195 1196 1197 1198 1199
        ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
        INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR}
    )
    write_basic_package_version_file(
        ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
1200
        VERSION ${MGB_VER_STRING}
1201 1202 1203 1204 1205 1206 1207
        COMPATIBILITY SameMajorVersion)

    install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
                ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
            DESTINATION ${MGE_INSTALL_CMAKEDIR})
endif()
1208

1209 1210 1211 1212
if(MGE_WITH_JIT_MLIR)
    add_subdirectory(tools/mlir/mgb-opt)
    add_subdirectory(tools/mlir/mgb-file-check)
endif()
1213 1214 1215 1216 1217

if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
1218
endif()
1219 1220 1221 1222

if(MGE_WITH_LITE)
    add_subdirectory(lite)
endif()