CMakeLists.txt 50.4 KB
Newer Older
1
cmake_minimum_required(VERSION 3.15.2)
2 3 4 5
message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}" )
if (NOT ${CMAKE_GENERATOR} STREQUAL "Ninja")
    message(WARNING "CMAKE_GENERATOR NOT EQUAL Ninja, which we do not recommend")
endif()
6 7 8

include (cmake/FetchMegBrainVersion.cmake)
project(MegEngine LANGUAGES C CXX VERSION ${MGB_VER_STRING})
9 10 11 12 13 14

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
15
set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
16

17
if(NOT MSVC AND NOT APPLE AND NOT WIN32)
18 19 20 21 22
    set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
    set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq  <TARGET> <LINK_FLAGS> <OBJECTS>")
    set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
endif()

23
include(GNUInstallDirs)
24
include(CheckCXXCompilerFlag)
25 26
include(CheckIPOSupported)

27 28 29 30 31
CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)

set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
    x86_64 i386
32
    armv7 aarch64
33 34
    naive fallback
)
35
set (MGE_EXPORT_TARGETS MegEngine-targets)
36

37 38 39 40 41 42 43 44 45 46 47 48
if(NOT "$ENV{LD_LIBRARY_PATH}" STREQUAL "")
    string(REPLACE ":" ";" ALTER_LD_LIBRARY_PATHS $ENV{LD_LIBRARY_PATH})
else()
    set(ALTER_LD_LIBRARY_PATHS "")
endif()

if(NOT "$ENV{LIBRARY_PATH}" STREQUAL "")
    string(REPLACE ":" ";" ALTER_LIBRARY_PATHS $ENV{LIBRARY_PATH})
else()
    set(ALTER_LIBRARY_PATHS "")
endif()

49
option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
M
Megvii Engine Team 已提交
50
option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
51
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
52 53
option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF)
54
option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
55 56 57
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
58
option(MGE_WITH_LITE "Build MGE with lite" ON)
59
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
60 61
option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
62 63
option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." ON)
option(MGE_WITH_CUBLAS_SHARED "Build MegEngine with CUBLAS shared." OFF)
64 65
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
66
option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
67
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
68
option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
69 70 71 72 73 74
option(MGE_ENABLE_RTTI "Build with RTTI" ON)
option(MGE_ENABLE_LOGGING "Build with logging" ON)
option(MGE_DEBUG_UTIL "Enable debug utility" ON)
option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
75
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
76 77 78
option(MGE_BUILD_SDK "Build load_and_run" ON)
option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
79
option(MGE_WITH_ROCM "Enable ROCM support" OFF)
80
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
81
option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
82
option(MGE_WITH_CUSTOM_OP "Build with Custom op" OFF)
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
if(MSVC OR WIN32)
    option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP "Enable deploy inference on Windows xp" OFF)
    # special MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 for Windows XP sp2(32bit)
    # internal behavior:
    # 1: will force define MGB_HAVE_THREAD=0, which means only support single thread
    # 2: some Feature will be disable, eg: MGB_ENABLE_JSON and var sanity check, do
    #    not too many care this!!, if you want to use this Feature to 'DEBUG', you can
    #    run same model at NON-XP-SP2 env, eg Win7 or XP-SP3(build without MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
    # 3: we only support MegEngine(load_and_run) and MegEngineLite API work on XP SP2
    #    some debug utils, eg, megbrain_test/megdnn_test not support run, most caused by gtest src code
    # sdk caller:
    # 1: as we remove mutex, when you use MSVC self API eg CreateThread to start several MegEngine instances
    #    in the same progress, please call MegEngine API(init/run) as serial as possible, also please
    #    do not use std::thread std::mutex/std::this_thread_id at SDK caller side!!!
    # check dll/exe can deploy on Windows XP sp2 or not:
    #    please checkout scripts/misc/check_windows_xp_sp2_deploy.py
    option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 "Enable deploy inference on Windows xp sp2" OFF)

    # PE file linked by LLVM lld can not run at Windows XP env, so we force use link.exe
    # which always locate in Microsoft Visual Studio/*/*/VC/Tools/MSVC/*/bin/*/*/link.exe
103
    set(CMAKE_LINKER "link.exe")
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
    if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP OR MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
        if(NOT ${MGE_ARCH} STREQUAL "i386")
            message(FATAL_ERROR "only support 32bit when build for Windows xp")
        endif()

        if(NOT MGE_INFERENCE_ONLY)
            message(FATAL_ERROR "only support inference when build for Windows xp")
        endif()

        if(MGE_WITH_CUDA)
            message(FATAL_ERROR "do not support CUDA when build for Windows xp")
        endif()

        # Windows XP sp3 have thread issue, Workround for it
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
        # for Windows XP type
        add_link_options("/SUBSYSTEM:CONSOLE,5.01")
        # some old lib(for example mkl for xp) use legacy stdio, so we force link legacy_stdio_definitions
        add_link_options("/DEFAULTLIB:legacy_stdio_definitions.lib")

        if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
        endif()
    else()
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0601")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0601")
    endif()
endif()
134

135
if(MSVC OR WIN32)
136 137
    message(STATUS "windows force cudnn static link")
    set(MGE_WITH_CUDNN_SHARED OFF)
138 139
endif()

140 141 142 143 144
if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
    set(MGE_WITH_ANY_CUDA_STUB ON)
else()
    set(MGE_WITH_ANY_CUDA_STUB OFF)
endif()
145

146
if(NOT ${MGE_BIN_REDUCE} STREQUAL "")
147
    message(STATUS "build with BIN REDUCE")
148 149 150 151 152 153 154 155
    if(MGE_WITH_MINIMUM_SIZE)
        set(MGE_ENABLE_RTTI OFF)
        set(MGE_ENABLE_LOGGING OFF)
        set(MGE_ENABLE_EXCEPTIONS OFF)
        set(MGE_INFERENCE_ONLY ON)
    endif()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${MGE_BIN_REDUCE}")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${MGE_BIN_REDUCE}")
156 157
endif()

158 159 160 161 162 163 164 165 166
if (NOT APPLE)
    # check CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT on APPLE will leak cmake crash
    CHECK_CXX_COMPILER_FLAG("-ffunction-sections -fdata-sections  -Wl,--gc-sections"  CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
    if(CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections")
        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections")
    endif()
167 168 169 170 171
endif()

check_ipo_supported(RESULT IS_LTO_SUPPORT OUTPUT output_info)
if(IS_LTO_SUPPORT)
    message(STATUS "lto is supported in this compiler")
172 173
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
174 175
else()
    message(STATUS "lto is not supported in this compiler")
176 177 178
endif()

if(MGE_WITH_MIDOUT_PROFILE)
179
    message(STATUS "build with MIDOUT PROFILE")
180 181 182 183
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
endif()

184 185
if (APPLE)
    set (BUILD_SHARED_LIBS OFF)
186
    message(STATUS "build static for xcode framework require")
187 188
endif()

189 190 191 192 193 194 195
if (MGE_USE_SYSTEM_LIB)
    set (MGE_CUDA_USE_STATIC OFF)
endif()

if (MGB_WITH_FLATBUFFERS)
    set(MGB_ENABLE_FBS_SERIALIZATION ON)
endif()
196

197
if(CMAKE_TOOLCHAIN_FILE)
198 199
    message(STATUS "We are cross compiling.")
    message(STATUS "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
    set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
    if(ANDROID_TOOLCHAIN_ROOT)
        if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
            set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
        endif()
        if(${ANDROID_ARCH} STREQUAL "arm")
            set(MGE_ARCH "armv7")
        elseif(${ANDROID_ARCH} STREQUAL "arm64")
            set(MGE_ARCH "aarch64")
        else()
            message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
        endif()
    elseif(IOS_TOOLCHAIN_ROOT)
        if(${IOS_ARCH} STREQUAL "armv7")
            set(MGE_ARCH "armv7")
        elseif(${IOS_ARCH} STREQUAL "arm64")
            set(MGE_ARCH "aarch64")
        elseif(${IOS_ARCH} STREQUAL "armv7k")
            set(MGE_ARCH "armv7")
        elseif(${IOS_ARCH} STREQUAL "arm64e")
            set(MGE_ARCH "aarch64")
        elseif(${IOS_ARCH} STREQUAL "armv7s")
            set(MGE_ARCH "armv7")
        else()
            message(FATAL_ERROR "Unsupported IOS_ARCH.")
        endif()
226 227
    elseif(RISCV_TOOLCHAIN_ROOT)
        set(MGE_ARCH "riscv64")
228 229 230 231 232
    elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
        set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
    else()
        message(FATAL_ERROR "Unknown cross-compiling settings.")
    endif()
233
    message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
234
endif()
235 236

if(${MGE_ARCH} STREQUAL "AUTO")
237
    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
238 239 240
        set(MGE_ARCH "x86_64")
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
        set(MGE_ARCH "i386")
241 242 243 244
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
        set(MGE_ARCH "aarch64")
    elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
        set(MGE_ARCH "armv7")
245
    else()
L
llehtahw 已提交
246
        message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
247 248 249
    endif()
endif()

250 251 252 253 254 255 256 257
if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
    message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
    set(CMAKE_BUILD_TYPE RelWithDebInfo)
endif()

if(${CMAKE_BUILD_TYPE} STREQUAL "Release" AND NOT MGE_WITH_TEST AND NOT ${MGE_ARCH} STREQUAL "x86_64")
    set(MGE_ENABLE_RTTI OFF)
    message(STATUS "disable MGE_ENABLE_RTTI when Release/NON-x86_64 mode!!")
258 259
endif()

260
if(MSVC OR WIN32)
261 262
    # for cmake after 3.15.2
    cmake_policy(SET CMP0091 NEW)
263
    set(CMAKE_OBJECT_PATH_MAX 300)
264 265 266 267 268 269
    if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
    else()
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
    endif()

270
    add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
271 272
    message(STATUS "into windows build CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
    if (NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
273 274
        message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
    endif()
275 276 277
    # on windows need append VS_PATH/VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows
    # and VS_PATH/VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows to PATH env
    if (MGE_BUILD_WITH_ASAN)
278 279 280 281 282
        message(WARNING "please do (set)export ASAN_OPTIONS=windows_hook_rtl_allocators=true when run test after build finish, caused by we link asan dll!!")
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
            message(WARNING "Windows AddressSanitizer doesn't support linking with debug runtime libraries yet, which means do not support CMAKE_BUILD_TYPE=Debug")
            message(FATAL_ERROR "Please build with RelWithDebInfo or Release by : EXTRA_CMAKE_ARGS=\"-DMGE_BUILD_WITH_ASAN=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo ...\"")
        endif()
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
        if("$ENV{VS_PATH}" STREQUAL "")
            message(FATAL_ERROR "can not find VS_PATH, please export Visual Studio root dir to VS_PATH env")
        endif()
        if(${MGE_ARCH} STREQUAL "x86_64")
            set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-x86_64.lib")
            set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-x86_64")
            set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
        elseif(${MGE_ARCH} STREQUAL "i386")
            set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-i386.lib")
            set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-i386.lib")
            set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
        else()
            message(FATAL_ERROR "unsupport asan ARCH: ${MGE_ARCH} on Windows")
        endif()
        find_path(ASAN_DLL_PATH
            NAMES ${WINDOWS_ASAN_DLL_NAME}
            HINTS $ENV{VS_PATH}
            PATH_SUFFIXES ${WINDOWS_ASAN_PATH_SUFFIXES}
            DOC "Windows asan library path" )
        if(ASAN_DLL_PATH STREQUAL "ASAN_DLL_PATH-NOTFOUND")
            message(FATAL_ERROR "can not find asan dll, please upgrade you LLVM")
        endif()

        message(STATUS "Windows asan dll path: ${ASAN_DLL_PATH}")
        link_directories(${ASAN_DLL_PATH})
        link_libraries(${WINDOWS_ASAN_DLL_NAME})
        link_libraries(${WINDOWS_ASAN_RUNTIME_THUNK_NAME})
        set(WIN_FLAGS "/Od -DNDEBUG -fsanitize=address")
311 312 313 314 315 316
        # windows Llvm asan do not take effect when /O2
        # RELWITHDEBINFO default value is /O2, so override it
        set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_C_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_CXX_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
317 318 319
    else()
        set(WIN_FLAGS "/O2")
    endif()
320
    # add flags for enable sse instruction optimize for X86, enable avx header to compile avx code
321
    set(WIN_FLAGS "${WIN_FLAGS} -msse4.2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
322 323 324 325 326 327 328 329 330
    # if u CPU is cascadelake series, u can enable for performance
    # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
    # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")

    # for windows build
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated  -Wno-error=documentation  -Wno-error=unreachable-code-break")
331
    set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /wd4819")
332 333 334 335 336
    set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
    set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")

    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
337 338

    #FIXME: fix halide JIT on windows
339
    message(STATUS "disable jit, halide and mlir on windows host build...")
340 341
    set(MGE_WITH_HALIDE OFF)
    set(MGE_WITH_JIT OFF)
M
Megvii Engine Team 已提交
342
    set(MGE_WITH_JIT_MLIR OFF)
343
    #FIXME: fix MegRay on windows
344
    message(STATUS "Disable distributed build on windows host build...")
345
    set(MGE_WITH_DISTRIBUTED OFF)
346 347
else()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
348

349 350 351 352
    # NONE windows DEBUG general flags
    if(MGE_BUILD_WITH_ASAN)
        set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
        set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
353
    else()
354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
        set(CMAKE_C_FLAGS_DEBUG "-O0 -g")
        set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
    endif()

    # NONE windows opt general flags
    if (MGE_BUILD_WITH_ASAN)
        set(OPTIMIZE_LEVEL "-g -O0 -DNDEBUG -fsanitize=address -fno-omit-frame-pointer")
    elseif(ANDROID)
        set(OPTIMIZE_LEVEL "-g -Ofast -DNDEBUG")
    else()
        set(OPTIMIZE_LEVEL "-g -O3 -DNDEBUG")
    endif()
    set(CMAKE_C_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
    set(CMAKE_CXX_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
    set(CMAKE_C_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
    #some gnu(gcc) compiler use -static -libasan have runtime issue
    #also, when target is big, clang ld will take a long long long
    #time when use -static-libsan, so we use dynamic asan by default
    #ANDROID asan.so depends on log, so broadcast log link_libraries
    #for megengine depends target, for example flatc target
    if (MGE_BUILD_WITH_ASAN AND ANDROID)
        link_libraries(log)
377 378 379
    endif()
endif()

380 381 382 383 384 385 386
if(MGE_WITH_CUDA)
include(cmake/cudnn.cmake)
    if(MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
        message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
        set(MGE_WITH_LARGE_ARCHIVE ON)
    endif()
endif()
387
CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
388 389 390 391
if(MGE_WITH_LARGE_ARCHIVE)
    message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
    set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
elseif(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32 AND NOT MGE_WITH_LARGE_ARCHIVE)
392
    message(STATUS "Using GNU gold linker.")
393
    set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")    
394
endif()
395 396 397
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
398

399 400 401 402 403
if(NOT MGE_WITH_JIT)
    if(MGE_WITH_HALIDE)
        message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
        set(MGE_WITH_HALIDE OFF)
    endif()
M
Megvii Engine Team 已提交
404 405 406 407 408 409 410 411 412
    if(MGE_WITH_JIT_MLIR)
        message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
        set(MGE_WITH_JIT_MLIR OFF)
    endif()
endif()

# FIXME At present, there are some conflicts between the LLVM that halide
# depends on and the LLVM that MLIR depends on. Should be fixed in subsequent
# versions.
413 414
if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
    message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
415
endif()
416 417
if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
    message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
418 419
endif()

420
if(MGE_WITH_CUDA)
421 422 423 424 425 426 427
    # FIXME: check_language(CUDA) failed when sbsa mode!
    # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676
    if(CMAKE_TOOLCHAIN_FILE)
        set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
        message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!")
    endif()

428 429
    include(CheckLanguage)
    check_language(CUDA)
430
    if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
431 432
        message(FATAL_ERROR "CUDA compiler not found in PATH")
    endif()
433 434 435 436 437 438 439

    # remove this after CMAKE fix nvcc sbsa
    if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
        set(CMAKE_CUDA_COMPILER "nvcc")
        message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
    endif()

440 441 442 443 444 445
    enable_language(CUDA)
    set(CMAKE_CUDA_STANDARD 14)
    set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

if(NOT MGE_WITH_CUDA)
M
Megvii Engine Team 已提交
446
    if(NOT MGE_ARCH STREQUAL "x86_64" AND NOT MGE_ARCH STREQUAL "i386")
447
        message(STATUS "Disable JIT support, as the MGE_ARCH is not X86 and CUDA is not enabled.")
M
Megvii Engine Team 已提交
448 449 450
        set(MGE_WITH_JIT OFF)
        set(MGE_WITH_JIT_MLIR OFF)
    endif()
451
    set(MGE_WITH_HALIDE OFF)
452
    message(STATUS "Disable TensorRT support, as CUDA is not enabled.")
453 454 455 456
    set(MGE_WITH_TRT OFF)
endif()

find_package(PythonInterp 3 REQUIRED)
457 458 459 460 461 462 463 464 465 466 467 468 469
# NOTICE: just use for target, which do not depend on python api
# PURPOSE: reuse target obj when switch python3 version
# will fallback to PYTHON_EXECUTABLE if can not find in PATH env
set(PYTHON3_IN_ENV "python3")
find_program(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
if (PYTHON3_EXECUTABLE_WITHOUT_VERSION)
    message(STATUS "use ${PYTHON3_IN_ENV} as PYTHON3_EXECUTABLE_WITHOUT_VERSION")
    set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
else()
    message(STATUS "fallback ${PYTHON_EXECUTABLE} as PYTHON3_EXECUTABLE_WITHOUT_VERSION,\
    target which depend on PYTHON3_EXECUTABLE_WITHOUT_VERSION will be rebuild when switch python3")
    set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON_EXECUTABLE})
endif()
470 471 472

set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
473 474 475 476 477 478
if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
    if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
        set_property(TARGET Threads::Threads
            PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
            "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
    endif()
479 480 481 482 483 484 485 486 487 488 489 490 491 492
endif()

set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
if(NOT CMAKE_CUDA_HOST_COMPILER)
    set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
endif()

if(NOT MGE_ENABLE_RTTI)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
endif()

if(NOT MGE_ENABLE_EXCEPTIONS)
493
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
494 495 496 497 498 499
endif()

if(MGE_WITH_TEST)
    include(cmake/gtest.cmake)
endif()

500
if(MGE_BUILD_IMPERATIVE_RT)
501 502 503
    set(CMAKE_CXX_STANDARD 17)
endif()

504 505
if(NOT ${MGE_WITH_CUDA} AND NOT ${MGE_WITH_ROCM})
    message(STATUS "Disable distributed support, as both CUDA and ROCm are disabled.")
506 507 508 509
    set(MGE_WITH_DISTRIBUTED OFF)
endif()

if(MGE_INFERENCE_ONLY)
510
    message(STATUS "Disable distributed support for inference only build.")
511
    set(MGE_WITH_DISTRIBUTED OFF)
512
    message(STATUS "Disable imperative_rt python module for inference only build.")
513
    set(MGE_BUILD_IMPERATIVE_RT OFF)
514 515
endif()

516
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
M
Megvii Engine Team 已提交
517 518 519
    include(cmake/llvm-project.cmake)
endif()

520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
if(MGE_WITH_DISTRIBUTED)
    include(cmake/protobuf.cmake)
    include(cmake/zmq.cmake)
endif()

if(MGB_WITH_FLATBUFFERS)
    include(cmake/flatbuffers.cmake)
endif()

if(MGE_WITH_CUDA)
    include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
    foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
        get_filename_component(_NAME ${path} NAME)
        if(NOT ${_NAME} STREQUAL "stubs")
            list(APPEND CUDA_LINK_DIRECTORIES ${path})
        endif()
    endforeach()
    link_directories(${CUDA_LINK_DIRECTORIES})

    set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
    set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
    set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
    set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
543 544
    if(MSVC OR WIN32)
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
545
        set(CCBIN_FLAG "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068 /std:c++14")
546
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
547 548 549 550 551 552
            set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
        endif()
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
    else()
        set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
    endif()
553 554 555 556 557

    if(NOT MGE_ENABLE_RTTI)
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
    endif()
    if(NOT MGE_ENABLE_EXCEPTIONS)
558
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
559 560
    endif()
    if(NOT MGE_CUDA_GENCODE)
561
        if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64")
562
            set(MEGDNN_THREADS_512 0)
563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
            if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
                message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")                
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
            else()
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
            endif()
        else()
            message(FATAL_ERROR "Unsupported CUDA host arch.")
        endif()
    else()
605
        set(MEGDNN_THREADS_512 1)
606 607 608 609 610 611 612 613
    endif()

    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
    if(MGE_WITH_TRT)
        include(cmake/tensorrt.cmake)
    endif()
    if(MGE_CUDA_USE_STATIC)
        if(MGE_WITH_TRT)
614
            if(MSVC OR WIN32)
615
                message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
616
                list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY})
617
            else()                
618 619 620 621 622
                list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive)
            endif()
            if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
                message(STATUS "handle trt myelin lib after trt7")
                list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library)
623 624 625 626 627
            endif()
        endif()
        
        if("${CUDNN_VERSION}" STREQUAL "7.5.0")
            if(MSVC OR WIN32)
628
                message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
629
                list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
630
            else()
631 632
                message(STATUS "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html")
                list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
633
            endif()
634
        else()
635 636 637 638 639 640
            if(MSVC OR WIN32)
                message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
                list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
            else()
                list(APPEND MGE_CUDA_LIBS libcudnn)
            endif()
641
        endif()
642
        if(MSVC OR WIN32)
643 644 645 646 647 648
            list(APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib)
        else()
            list(APPEND MGE_CUDA_LIBS cusolver_static curand_static culibos cudart_static cusparse_static)
        endif()
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS cublas.lib)
649
        else()
650 651 652 653 654
            if(MGE_WITH_CUBLAS_SHARED)
                list(APPEND MGE_CUDA_LIBS cublas)
            else()
                list(APPEND MGE_CUDA_LIBS cublas_static)
            endif()
655
        endif()
656
        if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
657 658 659
            if(MSVC OR WIN32)
                list(APPEND MGE_CUDA_LIBS cublasLt.lib)
            else()
660 661 662
                if(MGE_WITH_CUBLAS_SHARED)
                    list(APPEND MGE_CUDA_LIBS cublasLt)
                else()
663
                    list(APPEND MGE_CUDA_LIBS cublasLt_static culibos)
664
                endif()
665
            endif()
666
        endif()
667
        if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") AND NOT MSVC AND NOT WIN32)
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
            # mark all symbols from liblapack_static.a as weak to avoid
            # duplicated definition with mkl
            find_library(
                LAPACK_STATIC_PATH lapack_static
                HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
            if(NOT LAPACK_STATIC_PATH)
                message(FATAL_ERROR "liblapack_static.a not found")
            endif()
            set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)

            # add a target that run objcopy
            add_custom_command(
                OUTPUT ${LAPACK_STATIC_COPY_PATH}
                COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
                VERBATIM)
            add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})

            # create a library named "lapack_static_weak"
            add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
            add_dependencies(lapack_static_weak lapack_static_weak_target)
            set_target_properties(
                lapack_static_weak PROPERTIES
                IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
            list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
        endif()
    else()
        if(MGE_WITH_TRT)
            list(APPEND MGE_CUDA_LIBS libnvinfer)
696 697 698 699
            if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
                message(STATUS "handle trt myelin lib after trt7")
                list(APPEND MGE_CUDA_LIBS libmyelin)
            endif()
700 701 702 703 704
        endif()
        list(APPEND MGE_CUDA_LIBS libcudnn)
        if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
            list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
        endif()
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
        list(APPEND MGE_CUDA_LIBS cudart)
    endif()

    if(NOT MGE_WITH_CUDA_STUB)
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS cuda.lib)
        else()
            list(APPEND MGE_CUDA_LIBS cuda)
        endif()
    endif()

    if(NOT MGE_WITH_NVRTC_STUB)
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS nvrtc.lib)
        else()
            list(APPEND MGE_CUDA_LIBS nvrtc)
        endif()
    endif()

    if(MGE_WITH_ANY_CUDA_STUB)
        add_subdirectory(dnn/cuda-stub)
        list(APPEND MGE_CUDA_LIBS cuda-stub)
727 728
    endif()

729
    if(MSVC OR WIN32)
730
        list(APPEND MGE_CUDA_LIBS nvrtc.lib)
731
    else()
732
        list(APPEND MGE_CUDA_LIBS nvToolsExt)
733
    endif()
734 735
    
    set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
736 737 738 739
    if(UNIX)
        set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
    endif()

740 741
endif()

742
###########please add_subdirectory from here###############
743
if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE AND NOT MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
744 745 746 747 748 749 750 751
    option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
    if(MGE_ENABLE_CPUINFO)
        message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
        add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
        include(cmake/cpuinfo.cmake)
    endif()
endif()

752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
if(MGE_WITH_CAMBRICON)
    include_directories("$ENV{NEUWARE_HOME}/include")
    link_directories("$ENV{NEUWARE_HOME}/lib64")
    include(cmake/FindBANG/FindBANG.cmake)
    if (${MGE_MLU_ARCH} STREQUAL "MLU100")
        set(BANG_ARCH "100")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU1h8")
        set(BANG_ARCH "110")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU220")
        set(BANG_ARCH "220")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU270")
        set(BANG_ARCH "270")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU290")
        set(BANG_ARCH "290")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU200")
        set(BANG_ARCH "200")
    else()
        message (FATAL_ERROR "Unsupported MLU arch.")
    endif()
    set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} --bang-mlu-arch=${MGE_MLU_ARCH}")
    set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -std=c++11 -Werror")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__BANG_ARCH__=${BANG_ARCH}")
    if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O0")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "Release")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O3")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "MinSizeRel")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -Os")
    endif()
    include(cmake/cnrt.cmake)
    include(cmake/cndev.cmake)
    include(cmake/cnml.cmake)
    list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml)
    set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
endif()

790 791 792 793
if (MGE_WITH_ROCM)
    include(cmake/rocm.cmake)
endif ()

794
if(MGE_WITH_ATLAS)
795 796
    add_subdirectory(dnn/atlas-stub)
    list(APPEND MGE_ATLAS_LIBS atlas-stub)
797 798 799
    set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
    set(MGB_ATLAS ${MGE_WITH_ATLAS})
endif()
800

801 802 803 804
find_program(CCACHE_BIN ccache)
if(CCACHE_BIN)
    set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
    if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
805
        message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824
        set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
    endif()
endif()

if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
    if(${MGE_BLAS} STREQUAL "MKL")
        include(cmake/mkl.cmake)
        set(MGE_BLAS_LIBS libmkl)
    elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
        include(cmake/OpenBLAS.cmake)
        set(MGE_BLAS_LIBS libopenblas)
    else()
        message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
    endif()
endif()

# MKLDNN build
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
    include(cmake/MKL_DNN.cmake)
825
    set(MEGDNN_X86_WITH_MKL_DNN 1)
826 827
endif()

828 829
# RTTI
if(MGE_ENABLE_RTTI)
830 831
    set(MEGDNN_ENABLE_MANGLING 0)
    set(MEGDNN_ENABLE_RTTI 1)
832
else()
833 834
    set(MEGDNN_ENABLE_MANGLING 1)
    set(MEGDNN_ENABLE_RTTI 0)
835 836
endif()
set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
837

838 839
# Logging
set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
840
set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
841 842 843
set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})

# Exception
844
if(NOT MGE_ENABLE_EXCEPTIONS)
845 846 847
    message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
endif()
set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
848
set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
849 850 851 852 853 854

# JIT
if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
    set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
    include(cmake/Halide.cmake)
endif()
855

856
# Thread
857 858 859 860 861 862
IF(APPLE)
    set(CMAKE_THREAD_LIBS_INIT "-lpthread")
    set(CMAKE_HAVE_THREADS_LIBRARY 1)
    set(CMAKE_USE_WIN32_THREADS_INIT 0)
    set(CMAKE_USE_PTHREADS_INIT 1)
    set(THREADS_PREFER_PTHREAD_FLAG ON)
863
    message(STATUS "disable jit, halide and mlir on macos host build...")
M
Megvii Engine Team 已提交
864 865 866
    set(MGE_WITH_HALIDE OFF)
    set(MGE_WITH_JIT OFF)
    set(MGE_WITH_JIT_MLIR OFF)
867 868
ENDIF()

M
Megvii Engine Team 已提交
869 870 871
set(MGB_JIT ${MGE_WITH_JIT})
set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
872 873 874 875 876
# for consumer override MGB_C_OPR_INIT_FUNC symbol interface
if(NOT "${CUSTOM_C_OPR_INIT_FUNC}" STREQUAL "")
    add_compile_definitions(MGB_C_OPR_INIT_FUNC=${CUSTOM_C_OPR_INIT_FUNC})
    message(STATUS "override MGB_C_OPR_INIT_FUNC to ${CUSTOM_C_OPR_INIT_FUNC}")
endif()
M
Megvii Engine Team 已提交
877

878 879
set(MGB_CUSTOM_OP ${MGE_WITH_CUSTOM_OP})

880 881 882 883 884 885 886
if(MSVC OR WIN32)
    set(CMAKE_HAVE_THREADS_LIBRARY 1)
    set(CMAKE_USE_WIN32_THREADS_INIT 1)
    set(CMAKE_USE_PTHREADS_INIT 1)
    set(THREADS_PREFER_PTHREAD_FLAG ON)
endif()

887
if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT OR ANDROID)
888 889
    set(MGB_HAVE_THREAD 1)
endif()
890

891 892 893 894 895 896 897 898
if(MSVC OR WIN32)
    if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
        message(STATUS "disable MGB_HAVE_THREAD/MGB_ENABLE_JSON when DEPLOY ON XP SP2")
        set(MGB_HAVE_THREAD 0)
        set(MGB_ENABLE_JSON 0)
    endif()
endif()

899 900 901 902 903
if(MGE_WITH_TEST)
    # use intra-op multi threads
    set(MEGDNN_ENABLE_MULTI_THREADS 1)
endif()

904 905
# CUDA
set(MGB_CUDA ${MGE_WITH_CUDA})
906 907
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})

908 909 910 911
#ROCM
set(MGB_ROCM ${MGE_WITH_ROCM})
set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})

912 913 914 915
# CAMBRICON
set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})

916 917 918
# ENFLAME
set(MGB_ENFLAME ${MGE_WITH_ENFLAME})
set(MEGDNN_WITH_ENFLAME ${MGE_WITH_ENFLAME})
919

920 921 922 923 924 925 926
# Debug info
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
    set(MGB_ASSERT_LOC 1)
    set(MGB_ENABLE_DEBUG_UTIL 1)
else()
    set(MGB_ASSERT_LOC 0)
    set(MGB_ENABLE_DEBUG_UTIL 0)
927 928
endif()

929 930 931 932 933 934 935
if(MSVC OR WIN32)
    if(${MGE_ARCH} STREQUAL "i386")
        set(MGB_ENABLE_DEBUG_UTIL 0)
        message(STATUS "disable MGB_ENABLE_DEBUG_UTIL at Windows i386 build")
    endif()
endif()

936 937 938 939
# TensorRT
set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})

# Inference only
940
if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
941 942
    set(MGB_ENABLE_GRAD 0)
    set(MGB_BUILD_SLIM_SERVING 1)
943
else()
944 945
    set(MGB_ENABLE_GRAD 1)
    set(MGB_BUILD_SLIM_SERVING 0)
946 947
endif()

948 949 950
# Distributed communication
set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})

951 952 953 954 955 956 957 958 959 960 961 962
# MGE_ARCH related flags
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
    if(MGE_BLAS STREQUAL "MKL")
        set(MEGDNN_X86_WITH_MKL 1)
    elseif(MGE_BLAS STREQUAL "OpenBLAS")
        set(MEGDNN_X86_WITH_OPENBLAS 1)
    endif()
endif()

# Enable Naive
if(MGE_ARCH STREQUAL "naive")
    set(MEGDNN_NAIVE 1)
963
    message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
endif()

if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
    set(MEGDNN_X86 1)
    if(MGE_ARCH STREQUAL "x86_64")
        set(MEGDNN_X86_64 1)
        set(MEGDNN_64_BIT 1)
        if(NOT MSVC)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
        endif()
    else()
        set(MEGDNN_X86_32 1)
        if(NOT MSVC)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
        endif()
    endif()
    if(NOT MSVC)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
    endif()
endif()
984
# dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE
985
if(NOT APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
986 987 988 989 990 991
    CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT)
    if(CXX_COMPILER_SUPPORT_DOT)
        message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT")
        set(MGB_ENABLE_DOT 1)
    endif()
endif()
992

993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006
if(MGE_ARCH STREQUAL "armv7")
    # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
    if(ANDROID)
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
    endif()
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
    set (MARCH "-march=armv7-a")
    set (MEGDNN_ARMV7 1)
endif()

if(MGE_ARCH STREQUAL "aarch64")
    set(MEGDNN_AARCH64 1)
    set(MEGDNN_64_BIT 1)
    set(MARCH "-march=armv8-a")
1007
    set(MGB_AARCH64 1)
1008
    if(MGE_ARMV8_2_FEATURE_FP16)
1009
        message(STATUS "Enable fp16 feature support in armv8.2")
1010 1011 1012 1013 1014 1015
        if(NOT ${MGE_DISABLE_FLOAT16})
            set(MEGDNN_ENABLE_FP16_NEON 1)
        endif()
        set(MARCH "-march=armv8.2-a+fp16")
    endif()

1016 1017 1018 1019 1020 1021 1022
    if(MGE_WITH_CUDA)
        message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
        when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
        for save link time(14min->1min), you may open below flags if not deploy on\
        arm a53 platform, or just build release type!")
        #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
    endif()
1023
endif()
1024

1025 1026 1027 1028 1029
if(MGE_ARCH STREQUAL "riscv64")
    set(MEGDNN_RISCV64 1)
    set(MEGDNN_64_BIT 1)
endif()

1030 1031
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")

1032 1033
set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")

1034
# Write out megbrain_build_config.h
1035 1036
# It defines macros needed by both megbrain and dnn
configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
1037
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
1038 1039 1040

add_subdirectory(dnn)

1041 1042 1043 1044 1045 1046
list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)

set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
add_custom_command(
1047 1048
    OUTPUT ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
    COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
1049 1050 1051 1052 1053 1054 1055 1056
    DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
    VERBATIM
)

list(APPEND MGB_OPR_PARAM_DEFS_OUTS
    ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
)

1057
install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
1058 1059 1060 1061

list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
add_library(mgb_opr_param_defs INTERFACE)
1062 1063 1064 1065 1066
target_include_directories(mgb_opr_param_defs
    INTERFACE
        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
        $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>
)
1067
add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
1068
install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
1069

1070
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
1071 1072
    # generate param_defs.td
    set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
1073
    set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
1074 1075
    set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
    set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
1076
    set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
1077 1078 1079
    file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${MGE_GENFILE_DIR})
    file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
    file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
1080
    file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
1081 1082 1083 1084
    add_custom_command(
        OUTPUT ${OPR_PARAM_DEFS_OUT}
        COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
        DEPENDS ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py ${OPR_PARAM_DEFS_SCRIPT}
1085 1086 1087 1088
        VERBATIM
    )
    # mlir tblgen sources
    set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
1089
    set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
1090 1091
    list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
    file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
1092
    add_custom_target(param_defs_tblgen DEPENDS ${OPR_PARAM_DEFS_OUT})
1093 1094
endif()

1095
if(MGE_WITH_DISTRIBUTED)
1096
    set(MEGRAY_WITH_NCCL ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
1097
    set(MEGRAY_WITH_SHM ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
1098
    set(MEGRAY_WITH_RCCL ${MGE_WITH_ROCM} CACHE BOOL "Override MegRay option" FORCE)
1099 1100 1101 1102
    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
endif()

add_subdirectory(src)
1103 1104 1105 1106

if(MGE_BUILD_SDK)
    add_subdirectory(sdk/load-and-run)
endif()
1107

1108 1109
if(MGE_BUILD_IMPERATIVE_RT)
    add_subdirectory(imperative)
1110
    message(STATUS "Enable imperative python wrapper runtime")
1111 1112 1113
endif()

if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
1114
    add_subdirectory(test)
1115 1116
endif()

1117
if(TARGET _imperative_rt)
1118 1119 1120
    add_custom_target(
        develop
        COMMAND ${CMAKE_COMMAND} -E create_symlink
1121 1122
          ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
          ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
1123 1124 1125
        COMMAND ${CMAKE_COMMAND} -E create_symlink
          ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
          ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
1126
        DEPENDS _imperative_rt
1127 1128
        VERBATIM
    )
1129 1130
endif()

1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143
# Configure and install pkg-config.
# Note that unlike the Config.cmake modules, this is not relocatable (and not
# really portable) because we have two dependencies without pkg-config
# descriptions: FlatBuffers and MKL-DNN
if (MGE_USE_SYSTEM_MKLDNN)
    set (MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
endif()
if (MGE_USE_SYSTEM_OPENBLAS)
    set (MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
endif()
configure_file(cmake/megengine.pc.in
               ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
               @ONLY)
1144
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
1145 1146 1147 1148 1149 1150
        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
if (NOT MGE_WITH_DISTRIBUTED)
    include(CMakePackageConfigHelpers)
    set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
1151
    configure_package_config_file(cmake/MegEngineConfig.cmake.in
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
        ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
        INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR}
    )
    write_basic_package_version_file(
        ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
        VERSION ${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}
        COMPATIBILITY SameMajorVersion)

    install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
                ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
            DESTINATION ${MGE_INSTALL_CMAKEDIR})
endif()
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179

if(MSVC OR WIN32)
    add_compile_options(
        $<$<CONFIG:>:/MT>
        $<$<CONFIG:Debug>:/MTd>
        $<$<CONFIG:Release>:/MT>
        )
    foreach (CompilerFlag
            CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
            CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
            CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
            CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
        if(${CompilerFlag} MATCHES "/MD")
            string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
            set(${CompilerFlag} "${${CompilerFlag}}" CACHE STRING "msvc compiler flags" FORCE)
1180
            message(VERBOSE "MSVC flags: ${CompilerFlag}:${${CompilerFlag}}")
1181 1182 1183
        endif()
    endforeach()
endif()
1184 1185 1186 1187 1188

if(MGE_WITH_JIT_MLIR)
    add_subdirectory(tools/mlir/mgb-opt)
    add_subdirectory(tools/mlir/mgb-file-check)
endif()
1189 1190 1191 1192 1193

if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
1194
endif()
1195 1196 1197 1198

if(MGE_WITH_LITE)
    add_subdirectory(lite)
endif()