CMakeLists.txt 51.6 KB
Newer Older
1
cmake_minimum_required(VERSION 3.15.2)
2 3 4 5
message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}" )
if (NOT ${CMAKE_GENERATOR} STREQUAL "Ninja")
    message(WARNING "CMAKE_GENERATOR NOT EQUAL Ninja, which we do not recommend")
endif()
6 7 8

include (cmake/FetchMegBrainVersion.cmake)
project(MegEngine LANGUAGES C CXX VERSION ${MGB_VER_STRING})
9 10 11 12

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
13
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
14 15
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
16
set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
17

18
if(NOT MSVC AND NOT APPLE AND NOT WIN32)
19 20 21 22 23
    set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
    set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq  <TARGET> <LINK_FLAGS> <OBJECTS>")
    set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
endif()

24
include(GNUInstallDirs)
25
include(CheckCXXCompilerFlag)
26 27
include(CheckIPOSupported)

28 29 30 31 32
CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)

set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
    x86_64 i386
33
    armv7 aarch64
34 35
    naive fallback
)
36
set (MGE_EXPORT_TARGETS MegEngine-targets)
37

38 39 40 41 42 43 44 45 46 47 48 49
if(NOT "$ENV{LD_LIBRARY_PATH}" STREQUAL "")
    string(REPLACE ":" ";" ALTER_LD_LIBRARY_PATHS $ENV{LD_LIBRARY_PATH})
else()
    set(ALTER_LD_LIBRARY_PATHS "")
endif()

if(NOT "$ENV{LIBRARY_PATH}" STREQUAL "")
    string(REPLACE ":" ";" ALTER_LIBRARY_PATHS $ENV{LIBRARY_PATH})
else()
    set(ALTER_LIBRARY_PATHS "")
endif()

50
option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
M
Megvii Engine Team 已提交
51
option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
52
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
53
option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
54
option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run." OFF)
55
option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
56 57 58
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
59
option(MGE_WITH_LITE "Build MGE with lite" ON)
60
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
61 62
option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
63 64
option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." ON)
option(MGE_WITH_CUBLAS_SHARED "Build MegEngine with CUBLAS shared." OFF)
65 66
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
67
option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
68
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
69
option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
70 71 72 73 74 75
option(MGE_ENABLE_RTTI "Build with RTTI" ON)
option(MGE_ENABLE_LOGGING "Build with logging" ON)
option(MGE_DEBUG_UTIL "Enable debug utility" ON)
option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
76
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
77 78 79
option(MGE_BUILD_SDK "Build load_and_run" ON)
option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
80
option(MGE_WITH_ROCM "Enable ROCM support" OFF)
81
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
82
option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
83
option(MGE_WITH_CUSTOM_OP "Build with Custom op" OFF)
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
if(MSVC OR WIN32)
    option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP "Enable deploy inference on Windows xp" OFF)
    # special MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 for Windows XP sp2(32bit)
    # internal behavior:
    # 1: will force define MGB_HAVE_THREAD=0, which means only support single thread
    # 2: some Feature will be disable, eg: MGB_ENABLE_JSON and var sanity check, do
    #    not too many care this!!, if you want to use this Feature to 'DEBUG', you can
    #    run same model at NON-XP-SP2 env, eg Win7 or XP-SP3(build without MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
    # 3: we only support MegEngine(load_and_run) and MegEngineLite API work on XP SP2
    #    some debug utils, eg, megbrain_test/megdnn_test not support run, most caused by gtest src code
    # sdk caller:
    # 1: as we remove mutex, when you use MSVC self API eg CreateThread to start several MegEngine instances
    #    in the same progress, please call MegEngine API(init/run) as serial as possible, also please
    #    do not use std::thread std::mutex/std::this_thread_id at SDK caller side!!!
    # check dll/exe can deploy on Windows XP sp2 or not:
    #    please checkout scripts/misc/check_windows_xp_sp2_deploy.py
    option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 "Enable deploy inference on Windows xp sp2" OFF)

    # PE file linked by LLVM lld can not run at Windows XP env, so we force use link.exe
    # which always locate in Microsoft Visual Studio/*/*/VC/Tools/MSVC/*/bin/*/*/link.exe
104
    set(CMAKE_LINKER "link.exe")
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
    if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP OR MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
        if(NOT ${MGE_ARCH} STREQUAL "i386")
            message(FATAL_ERROR "only support 32bit when build for Windows xp")
        endif()

        if(NOT MGE_INFERENCE_ONLY)
            message(FATAL_ERROR "only support inference when build for Windows xp")
        endif()

        if(MGE_WITH_CUDA)
            message(FATAL_ERROR "do not support CUDA when build for Windows xp")
        endif()

        # Windows XP sp3 have thread issue, Workround for it
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
        # for Windows XP type
        add_link_options("/SUBSYSTEM:CONSOLE,5.01")
        # some old lib(for example mkl for xp) use legacy stdio, so we force link legacy_stdio_definitions
        add_link_options("/DEFAULTLIB:legacy_stdio_definitions.lib")

        if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
            set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
        endif()
    else()
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0601")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0601")
    endif()
endif()
135

136
if(MSVC OR WIN32)
137 138
    message(STATUS "windows force cudnn static link")
    set(MGE_WITH_CUDNN_SHARED OFF)
139 140
endif()

141 142 143 144 145
if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
    set(MGE_WITH_ANY_CUDA_STUB ON)
else()
    set(MGE_WITH_ANY_CUDA_STUB OFF)
endif()
146

147 148 149 150 151 152 153 154
if(MGE_WITH_MIDOUT_PROFILE)
    message(STATUS "build with MIDOUT PROFILE and force set MGE_WITH_MINIMUM_SIZE off and force rtti ON")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
    set(MGE_WITH_MINIMUM_SIZE OFF)
    set(MGE_ENABLE_RTTI ON)
    if(WIN32)
        message(FATAL_ERROR "do not support midout at WIN32")
155
    endif()
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
endif()

set(BIN_REDUCE ${PROJECT_SOURCE_DIR}/src/bin_reduce_cmake.h)
if(MGE_WITH_MINIMUM_SIZE)
    message(STATUS "build with MGE_WITH_MINIMUM_SIZE bin_reduce header is: ${BIN_REDUCE}")
    set(MGE_ENABLE_RTTI OFF)
    set(MGE_ENABLE_LOGGING OFF)
    set(MGE_ENABLE_EXCEPTIONS OFF)
    set(MGE_INFERENCE_ONLY ON)
    # MGE_WITH_MINIMUM_SIZE will triger unused-parameter
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter")
endif()

if(NOT MGE_WITH_MIDOUT_PROFILE AND NOT WIN32)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${BIN_REDUCE}")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${BIN_REDUCE}")
173 174
endif()

175 176 177 178 179 180 181 182 183
if (NOT APPLE)
    # check CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT on APPLE will leak cmake crash
    CHECK_CXX_COMPILER_FLAG("-ffunction-sections -fdata-sections  -Wl,--gc-sections"  CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
    if(CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections")
        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
        set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections")
    endif()
184 185 186
endif()

check_ipo_supported(RESULT IS_LTO_SUPPORT OUTPUT output_info)
187 188
# LLVM on Windows report support LTO, but do not support -flto=full at link stage
if(IS_LTO_SUPPORT AND NOT WIN32)
189
    message(STATUS "lto is supported in this compiler")
190 191
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
192 193
else()
    message(STATUS "lto is not supported in this compiler")
194 195
endif()

196 197
if (APPLE)
    set (BUILD_SHARED_LIBS OFF)
198
    message(STATUS "build static for xcode framework require")
199 200
endif()

201 202 203 204 205 206 207
if (MGE_USE_SYSTEM_LIB)
    set (MGE_CUDA_USE_STATIC OFF)
endif()

if (MGB_WITH_FLATBUFFERS)
    set(MGB_ENABLE_FBS_SERIALIZATION ON)
endif()
208

209
if(CMAKE_TOOLCHAIN_FILE)
210 211
    message(STATUS "We are cross compiling.")
    message(STATUS "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
    set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
    if(ANDROID_TOOLCHAIN_ROOT)
        if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
            set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
        endif()
        if(${ANDROID_ARCH} STREQUAL "arm")
            set(MGE_ARCH "armv7")
        elseif(${ANDROID_ARCH} STREQUAL "arm64")
            set(MGE_ARCH "aarch64")
        else()
            message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
        endif()
    elseif(IOS_TOOLCHAIN_ROOT)
        if(${IOS_ARCH} STREQUAL "armv7")
            set(MGE_ARCH "armv7")
        elseif(${IOS_ARCH} STREQUAL "arm64")
            set(MGE_ARCH "aarch64")
        elseif(${IOS_ARCH} STREQUAL "armv7k")
            set(MGE_ARCH "armv7")
        elseif(${IOS_ARCH} STREQUAL "arm64e")
            set(MGE_ARCH "aarch64")
        elseif(${IOS_ARCH} STREQUAL "armv7s")
            set(MGE_ARCH "armv7")
        else()
            message(FATAL_ERROR "Unsupported IOS_ARCH.")
        endif()
238 239
    elseif(RISCV_TOOLCHAIN_ROOT)
        set(MGE_ARCH "riscv64")
240 241 242 243 244
    elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
        set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
    else()
        message(FATAL_ERROR "Unknown cross-compiling settings.")
    endif()
245
    message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
246
endif()
247 248

if(${MGE_ARCH} STREQUAL "AUTO")
249
    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
250 251 252
        set(MGE_ARCH "x86_64")
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
        set(MGE_ARCH "i386")
253 254 255 256
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
        set(MGE_ARCH "aarch64")
    elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
        set(MGE_ARCH "armv7")
257
    else()
L
llehtahw 已提交
258
        message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
259 260 261
    endif()
endif()

262 263 264 265 266
if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
    message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
    set(CMAKE_BUILD_TYPE RelWithDebInfo)
endif()

267
if(${CMAKE_BUILD_TYPE} STREQUAL "Release" AND NOT MGE_WITH_TEST AND NOT ${MGE_ARCH} STREQUAL "x86_64" AND NOT MGE_WITH_MIDOUT_PROFILE)
268
    set(MGE_ENABLE_RTTI OFF)
269
    message(STATUS "disable MGE_ENABLE_RTTI when Release/NON-x86_64/NON-MGE_WITH_MIDOUT_PROFILE mode!!")
270 271
endif()

272
if(MSVC OR WIN32)
273 274
    # for cmake after 3.15.2
    cmake_policy(SET CMP0091 NEW)
275
    set(CMAKE_OBJECT_PATH_MAX 300)
276 277 278 279 280 281
    if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
    else()
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
    endif()

282
    add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
283 284
    message(STATUS "into windows build CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
    if (NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
285 286
        message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
    endif()
287 288 289
    # on windows need append VS_PATH/VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows
    # and VS_PATH/VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows to PATH env
    if (MGE_BUILD_WITH_ASAN)
290 291 292 293 294
        message(WARNING "please do (set)export ASAN_OPTIONS=windows_hook_rtl_allocators=true when run test after build finish, caused by we link asan dll!!")
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
            message(WARNING "Windows AddressSanitizer doesn't support linking with debug runtime libraries yet, which means do not support CMAKE_BUILD_TYPE=Debug")
            message(FATAL_ERROR "Please build with RelWithDebInfo or Release by : EXTRA_CMAKE_ARGS=\"-DMGE_BUILD_WITH_ASAN=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo ...\"")
        endif()
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
        if("$ENV{VS_PATH}" STREQUAL "")
            message(FATAL_ERROR "can not find VS_PATH, please export Visual Studio root dir to VS_PATH env")
        endif()
        if(${MGE_ARCH} STREQUAL "x86_64")
            set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-x86_64.lib")
            set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-x86_64")
            set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
        elseif(${MGE_ARCH} STREQUAL "i386")
            set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-i386.lib")
            set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-i386.lib")
            set(WINDOWS_ASAN_PATH_SUFFIXES "VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
        else()
            message(FATAL_ERROR "unsupport asan ARCH: ${MGE_ARCH} on Windows")
        endif()
        find_path(ASAN_DLL_PATH
            NAMES ${WINDOWS_ASAN_DLL_NAME}
            HINTS $ENV{VS_PATH}
            PATH_SUFFIXES ${WINDOWS_ASAN_PATH_SUFFIXES}
            DOC "Windows asan library path" )
        if(ASAN_DLL_PATH STREQUAL "ASAN_DLL_PATH-NOTFOUND")
            message(FATAL_ERROR "can not find asan dll, please upgrade you LLVM")
        endif()

        message(STATUS "Windows asan dll path: ${ASAN_DLL_PATH}")
        link_directories(${ASAN_DLL_PATH})
        link_libraries(${WINDOWS_ASAN_DLL_NAME})
        link_libraries(${WINDOWS_ASAN_RUNTIME_THUNK_NAME})
        set(WIN_FLAGS "/Od -DNDEBUG -fsanitize=address")
323 324 325 326 327 328
        # windows Llvm asan do not take effect when /O2
        # RELWITHDEBINFO default value is /O2, so override it
        set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_C_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
        set(CMAKE_CXX_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
329 330 331
    else()
        set(WIN_FLAGS "/O2")
    endif()
332
    # add flags for enable sse instruction optimize for X86, enable avx header to compile avx code
333
    set(WIN_FLAGS "${WIN_FLAGS} -msse4.2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
334 335 336 337 338 339 340 341 342
    # if u CPU is cascadelake series, u can enable for performance
    # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
    # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")

    # for windows build
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated  -Wno-error=documentation  -Wno-error=unreachable-code-break")
343
    set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /wd4819")
344 345 346 347 348
    set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
    set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")

    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
349 350

    #FIXME: fix halide JIT on windows
351
    message(STATUS "disable jit, halide and mlir on windows host build...")
352 353
    set(MGE_WITH_HALIDE OFF)
    set(MGE_WITH_JIT OFF)
M
Megvii Engine Team 已提交
354
    set(MGE_WITH_JIT_MLIR OFF)
355
    #FIXME: fix MegRay on windows
356
    message(STATUS "Disable distributed build on windows host build...")
357
    set(MGE_WITH_DISTRIBUTED OFF)
358 359
else()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
360

361 362 363 364
    # NONE windows DEBUG general flags
    if(MGE_BUILD_WITH_ASAN)
        set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
        set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
365
    else()
366 367 368 369 370 371 372 373 374 375 376 377
        set(CMAKE_C_FLAGS_DEBUG "-O0 -g")
        set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
    endif()

    # NONE windows opt general flags
    if (MGE_BUILD_WITH_ASAN)
        set(OPTIMIZE_LEVEL "-g -O0 -DNDEBUG -fsanitize=address -fno-omit-frame-pointer")
    elseif(ANDROID)
        set(OPTIMIZE_LEVEL "-g -Ofast -DNDEBUG")
    else()
        set(OPTIMIZE_LEVEL "-g -O3 -DNDEBUG")
    endif()
378 379 380 381 382 383 384 385
    #remove finite-math-only opt from Ofast, caused by clang have a different
    #runtime finite math logic, this issue do not find at g++, but as a unity
    #build flags, we force add -fno-finite-math-only when compiler support
    CHECK_CXX_COMPILER_FLAG("-fno-finite-math-only"  CXX_NO_FINITE_MATH_ONLY_SUPPORT)
    if(CXX_NO_FINITE_MATH_ONLY_SUPPORT)
        message(STATUS "force add -fno-finite-math-only for this compiler")
        set(OPTIMIZE_LEVEL "${OPTIMIZE_LEVEL} -fno-finite-math-only")
    endif()
386 387 388 389 390 391 392 393 394 395 396
    set(CMAKE_C_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
    set(CMAKE_CXX_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
    set(CMAKE_C_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
    #some gnu(gcc) compiler use -static -libasan have runtime issue
    #also, when target is big, clang ld will take a long long long
    #time when use -static-libsan, so we use dynamic asan by default
    #ANDROID asan.so depends on log, so broadcast log link_libraries
    #for megengine depends target, for example flatc target
    if (MGE_BUILD_WITH_ASAN AND ANDROID)
        link_libraries(log)
397 398 399
    endif()
endif()

400 401 402 403 404 405 406
if(MGE_WITH_CUDA)
include(cmake/cudnn.cmake)
    if(MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
        message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
        set(MGE_WITH_LARGE_ARCHIVE ON)
    endif()
endif()
407
CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
408 409 410 411
if(MGE_WITH_LARGE_ARCHIVE)
    message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
    set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
elseif(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32 AND NOT MGE_WITH_LARGE_ARCHIVE)
412
    message(STATUS "Using GNU gold linker.")
413
    set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")    
414
endif()
415 416 417
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
418

419 420 421 422 423
if(NOT MGE_WITH_JIT)
    if(MGE_WITH_HALIDE)
        message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
        set(MGE_WITH_HALIDE OFF)
    endif()
M
Megvii Engine Team 已提交
424 425 426 427 428 429 430 431 432
    if(MGE_WITH_JIT_MLIR)
        message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
        set(MGE_WITH_JIT_MLIR OFF)
    endif()
endif()

# FIXME At present, there are some conflicts between the LLVM that halide
# depends on and the LLVM that MLIR depends on. Should be fixed in subsequent
# versions.
433 434
if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
    message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
435
endif()
436 437
if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
    message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
438 439
endif()

440
if(MGE_WITH_CUDA)
441 442 443 444 445 446 447
    # FIXME: check_language(CUDA) failed when sbsa mode!
    # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676
    if(CMAKE_TOOLCHAIN_FILE)
        set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
        message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!")
    endif()

448 449
    include(CheckLanguage)
    check_language(CUDA)
450
    if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
451 452
        message(FATAL_ERROR "CUDA compiler not found in PATH")
    endif()
453 454 455 456 457 458 459

    # remove this after CMAKE fix nvcc sbsa
    if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
        set(CMAKE_CUDA_COMPILER "nvcc")
        message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
    endif()

460 461 462 463 464 465
    enable_language(CUDA)
    set(CMAKE_CUDA_STANDARD 14)
    set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

if(NOT MGE_WITH_CUDA)
M
Megvii Engine Team 已提交
466
    if(NOT MGE_ARCH STREQUAL "x86_64" AND NOT MGE_ARCH STREQUAL "i386")
467
        message(STATUS "Disable JIT support, as the MGE_ARCH is not X86 and CUDA is not enabled.")
M
Megvii Engine Team 已提交
468 469 470
        set(MGE_WITH_JIT OFF)
        set(MGE_WITH_JIT_MLIR OFF)
    endif()
471
    set(MGE_WITH_HALIDE OFF)
472
    message(STATUS "Disable TensorRT support, as CUDA is not enabled.")
473 474 475 476
    set(MGE_WITH_TRT OFF)
endif()

find_package(PythonInterp 3 REQUIRED)
477 478 479 480 481 482 483 484 485 486 487 488 489
# NOTICE: just use for target, which do not depend on python api
# PURPOSE: reuse target obj when switch python3 version
# will fallback to PYTHON_EXECUTABLE if can not find in PATH env
set(PYTHON3_IN_ENV "python3")
find_program(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
if (PYTHON3_EXECUTABLE_WITHOUT_VERSION)
    message(STATUS "use ${PYTHON3_IN_ENV} as PYTHON3_EXECUTABLE_WITHOUT_VERSION")
    set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
else()
    message(STATUS "fallback ${PYTHON_EXECUTABLE} as PYTHON3_EXECUTABLE_WITHOUT_VERSION,\
    target which depend on PYTHON3_EXECUTABLE_WITHOUT_VERSION will be rebuild when switch python3")
    set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON_EXECUTABLE})
endif()
490 491 492

set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
493 494 495 496 497 498
if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
    if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
        set_property(TARGET Threads::Threads
            PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
            "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
    endif()
499 500 501 502 503 504 505 506 507 508 509 510 511 512
endif()

set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
if(NOT CMAKE_CUDA_HOST_COMPILER)
    set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
endif()

if(NOT MGE_ENABLE_RTTI)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
endif()

if(NOT MGE_ENABLE_EXCEPTIONS)
513
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
514 515 516 517 518 519
endif()

if(MGE_WITH_TEST)
    include(cmake/gtest.cmake)
endif()

520
if(MGE_BUILD_IMPERATIVE_RT)
521 522 523
    set(CMAKE_CXX_STANDARD 17)
endif()

524 525
if(NOT ${MGE_WITH_CUDA} AND NOT ${MGE_WITH_ROCM})
    message(STATUS "Disable distributed support, as both CUDA and ROCm are disabled.")
526 527 528 529
    set(MGE_WITH_DISTRIBUTED OFF)
endif()

if(MGE_INFERENCE_ONLY)
530
    message(STATUS "Disable distributed support for inference only build.")
531
    set(MGE_WITH_DISTRIBUTED OFF)
532
    message(STATUS "Disable imperative_rt python module for inference only build.")
533
    set(MGE_BUILD_IMPERATIVE_RT OFF)
534 535
endif()

536
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
M
Megvii Engine Team 已提交
537 538 539
    include(cmake/llvm-project.cmake)
endif()

540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
if(MGE_WITH_DISTRIBUTED)
    include(cmake/protobuf.cmake)
    include(cmake/zmq.cmake)
endif()

if(MGB_WITH_FLATBUFFERS)
    include(cmake/flatbuffers.cmake)
endif()

if(MGE_WITH_CUDA)
    include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
    foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
        get_filename_component(_NAME ${path} NAME)
        if(NOT ${_NAME} STREQUAL "stubs")
            list(APPEND CUDA_LINK_DIRECTORIES ${path})
        endif()
    endforeach()
    link_directories(${CUDA_LINK_DIRECTORIES})

    set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
    set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
    set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
    set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
563 564
    if(MSVC OR WIN32)
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
565
        set(CCBIN_FLAG "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068 /std:c++14")
566
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
567 568 569 570 571 572
            set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
        endif()
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
    else()
        set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
    endif()
573 574 575 576 577

    if(NOT MGE_ENABLE_RTTI)
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
    endif()
    if(NOT MGE_ENABLE_EXCEPTIONS)
578
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
579 580
    endif()
    if(NOT MGE_CUDA_GENCODE)
581
        if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64")
582
            set(MEGDNN_THREADS_512 0)
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
            if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
                message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")                
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
            else()
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
            endif()
        else()
            message(FATAL_ERROR "Unsupported CUDA host arch.")
        endif()
    else()
625
        set(MEGDNN_THREADS_512 1)
626 627 628 629 630 631 632 633
    endif()

    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
    if(MGE_WITH_TRT)
        include(cmake/tensorrt.cmake)
    endif()
    if(MGE_CUDA_USE_STATIC)
        if(MGE_WITH_TRT)
634
            if(MSVC OR WIN32)
635
                message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
636
                list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY})
637
            else()                
638 639 640 641 642
                list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive)
            endif()
            if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
                message(STATUS "handle trt myelin lib after trt7")
                list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library)
643 644 645 646 647
            endif()
        endif()
        
        if("${CUDNN_VERSION}" STREQUAL "7.5.0")
            if(MSVC OR WIN32)
648
                message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
649
                list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
650
            else()
651 652
                message(STATUS "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html")
                list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
653
            endif()
654
        else()
655 656 657 658 659 660
            if(MSVC OR WIN32)
                message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
                list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
            else()
                list(APPEND MGE_CUDA_LIBS libcudnn)
            endif()
661
        endif()
662
        if(MSVC OR WIN32)
663 664 665 666 667 668
            list(APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib)
        else()
            list(APPEND MGE_CUDA_LIBS cusolver_static curand_static culibos cudart_static cusparse_static)
        endif()
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS cublas.lib)
669
        else()
670 671 672 673 674
            if(MGE_WITH_CUBLAS_SHARED)
                list(APPEND MGE_CUDA_LIBS cublas)
            else()
                list(APPEND MGE_CUDA_LIBS cublas_static)
            endif()
675
        endif()
676
        if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
677 678 679
            if(MSVC OR WIN32)
                list(APPEND MGE_CUDA_LIBS cublasLt.lib)
            else()
680 681 682
                if(MGE_WITH_CUBLAS_SHARED)
                    list(APPEND MGE_CUDA_LIBS cublasLt)
                else()
683
                    list(APPEND MGE_CUDA_LIBS cublasLt_static culibos)
684
                endif()
685
            endif()
686
        endif()
687
        if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") AND NOT MSVC AND NOT WIN32)
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
            # mark all symbols from liblapack_static.a as weak to avoid
            # duplicated definition with mkl
            find_library(
                LAPACK_STATIC_PATH lapack_static
                HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
            if(NOT LAPACK_STATIC_PATH)
                message(FATAL_ERROR "liblapack_static.a not found")
            endif()
            set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)

            # add a target that run objcopy
            add_custom_command(
                OUTPUT ${LAPACK_STATIC_COPY_PATH}
                COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
                VERBATIM)
            add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})

            # create a library named "lapack_static_weak"
            add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
            add_dependencies(lapack_static_weak lapack_static_weak_target)
            set_target_properties(
                lapack_static_weak PROPERTIES
                IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
            list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
        endif()
    else()
        if(MGE_WITH_TRT)
            list(APPEND MGE_CUDA_LIBS libnvinfer)
716 717 718 719
            if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
                message(STATUS "handle trt myelin lib after trt7")
                list(APPEND MGE_CUDA_LIBS libmyelin)
            endif()
720 721 722 723 724
        endif()
        list(APPEND MGE_CUDA_LIBS libcudnn)
        if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
            list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
        endif()
725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
        list(APPEND MGE_CUDA_LIBS cudart)
    endif()

    if(NOT MGE_WITH_CUDA_STUB)
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS cuda.lib)
        else()
            list(APPEND MGE_CUDA_LIBS cuda)
        endif()
    endif()

    if(NOT MGE_WITH_NVRTC_STUB)
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS nvrtc.lib)
        else()
            list(APPEND MGE_CUDA_LIBS nvrtc)
        endif()
    endif()

    if(MGE_WITH_ANY_CUDA_STUB)
        add_subdirectory(dnn/cuda-stub)
        list(APPEND MGE_CUDA_LIBS cuda-stub)
747 748
    endif()

749
    if(MSVC OR WIN32)
750
        list(APPEND MGE_CUDA_LIBS nvrtc.lib)
751
    else()
752
        list(APPEND MGE_CUDA_LIBS nvToolsExt)
753
    endif()
754 755
    
    set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
756 757 758 759
    if(UNIX)
        set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
    endif()

760 761
endif()

762
###########please add_subdirectory from here###############
763
if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE AND NOT MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
764 765 766 767 768 769 770 771
    option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
    if(MGE_ENABLE_CPUINFO)
        message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
        add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
        include(cmake/cpuinfo.cmake)
    endif()
endif()

772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
if(MGE_WITH_CAMBRICON)
    include_directories("$ENV{NEUWARE_HOME}/include")
    link_directories("$ENV{NEUWARE_HOME}/lib64")
    include(cmake/FindBANG/FindBANG.cmake)
    if (${MGE_MLU_ARCH} STREQUAL "MLU100")
        set(BANG_ARCH "100")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU1h8")
        set(BANG_ARCH "110")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU220")
        set(BANG_ARCH "220")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU270")
        set(BANG_ARCH "270")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU290")
        set(BANG_ARCH "290")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU200")
        set(BANG_ARCH "200")
    else()
        message (FATAL_ERROR "Unsupported MLU arch.")
    endif()
    set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} --bang-mlu-arch=${MGE_MLU_ARCH}")
    set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -std=c++11 -Werror")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__BANG_ARCH__=${BANG_ARCH}")
    if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O0")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "Release")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O3")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "MinSizeRel")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -Os")
    endif()
    include(cmake/cnrt.cmake)
    include(cmake/cndev.cmake)
    include(cmake/cnml.cmake)
    list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml)
    set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
endif()

810 811 812 813
if (MGE_WITH_ROCM)
    include(cmake/rocm.cmake)
endif ()

814
if(MGE_WITH_ATLAS)
815 816
    add_subdirectory(dnn/atlas-stub)
    list(APPEND MGE_ATLAS_LIBS atlas-stub)
817 818 819
    set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
    set(MGB_ATLAS ${MGE_WITH_ATLAS})
endif()
820

821 822 823 824
find_program(CCACHE_BIN ccache)
if(CCACHE_BIN)
    set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
    if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
825
        message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
        set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
    endif()
endif()

if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
    if(${MGE_BLAS} STREQUAL "MKL")
        include(cmake/mkl.cmake)
        set(MGE_BLAS_LIBS libmkl)
    elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
        include(cmake/OpenBLAS.cmake)
        set(MGE_BLAS_LIBS libopenblas)
    else()
        message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
    endif()
endif()

# MKLDNN build
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
    include(cmake/MKL_DNN.cmake)
845
    set(MEGDNN_X86_WITH_MKL_DNN 1)
846 847
endif()

848 849
# RTTI
if(MGE_ENABLE_RTTI)
850 851
    set(MEGDNN_ENABLE_MANGLING 0)
    set(MEGDNN_ENABLE_RTTI 1)
852
else()
853 854
    set(MEGDNN_ENABLE_MANGLING 1)
    set(MEGDNN_ENABLE_RTTI 0)
855 856
endif()
set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
857

858 859
# Logging
set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
860
set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
861 862 863
set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})

# Exception
864
if(NOT MGE_ENABLE_EXCEPTIONS)
865 866 867
    message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
endif()
set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
868
set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
869 870 871 872 873 874

# JIT
if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
    set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
    include(cmake/Halide.cmake)
endif()
875

876
# Thread
877 878 879 880 881 882
IF(APPLE)
    set(CMAKE_THREAD_LIBS_INIT "-lpthread")
    set(CMAKE_HAVE_THREADS_LIBRARY 1)
    set(CMAKE_USE_WIN32_THREADS_INIT 0)
    set(CMAKE_USE_PTHREADS_INIT 1)
    set(THREADS_PREFER_PTHREAD_FLAG ON)
883
    message(STATUS "disable jit, halide and mlir on macos host build...")
M
Megvii Engine Team 已提交
884 885 886
    set(MGE_WITH_HALIDE OFF)
    set(MGE_WITH_JIT OFF)
    set(MGE_WITH_JIT_MLIR OFF)
887 888
ENDIF()

M
Megvii Engine Team 已提交
889 890 891
set(MGB_JIT ${MGE_WITH_JIT})
set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
892 893 894 895 896
# for consumer override MGB_C_OPR_INIT_FUNC symbol interface
if(NOT "${CUSTOM_C_OPR_INIT_FUNC}" STREQUAL "")
    add_compile_definitions(MGB_C_OPR_INIT_FUNC=${CUSTOM_C_OPR_INIT_FUNC})
    message(STATUS "override MGB_C_OPR_INIT_FUNC to ${CUSTOM_C_OPR_INIT_FUNC}")
endif()
M
Megvii Engine Team 已提交
897

898 899
set(MGB_CUSTOM_OP ${MGE_WITH_CUSTOM_OP})

900 901 902 903 904 905 906
if(MSVC OR WIN32)
    set(CMAKE_HAVE_THREADS_LIBRARY 1)
    set(CMAKE_USE_WIN32_THREADS_INIT 1)
    set(CMAKE_USE_PTHREADS_INIT 1)
    set(THREADS_PREFER_PTHREAD_FLAG ON)
endif()

907
if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT OR ANDROID)
908 909
    set(MGB_HAVE_THREAD 1)
endif()
910

911 912 913 914 915 916 917 918
if(MSVC OR WIN32)
    if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
        message(STATUS "disable MGB_HAVE_THREAD/MGB_ENABLE_JSON when DEPLOY ON XP SP2")
        set(MGB_HAVE_THREAD 0)
        set(MGB_ENABLE_JSON 0)
    endif()
endif()

919 920 921 922 923
if(MGE_WITH_TEST)
    # use intra-op multi threads
    set(MEGDNN_ENABLE_MULTI_THREADS 1)
endif()

924 925
# CUDA
set(MGB_CUDA ${MGE_WITH_CUDA})
926 927
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})

928 929 930 931
#ROCM
set(MGB_ROCM ${MGE_WITH_ROCM})
set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})

932 933 934 935
# CAMBRICON
set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})

936 937 938
# ENFLAME
set(MGB_ENFLAME ${MGE_WITH_ENFLAME})
set(MEGDNN_WITH_ENFLAME ${MGE_WITH_ENFLAME})
939

940 941 942 943 944 945 946
# Debug info
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
    set(MGB_ASSERT_LOC 1)
    set(MGB_ENABLE_DEBUG_UTIL 1)
else()
    set(MGB_ASSERT_LOC 0)
    set(MGB_ENABLE_DEBUG_UTIL 0)
947 948
endif()

949 950 951 952 953 954 955
if(MSVC OR WIN32)
    if(${MGE_ARCH} STREQUAL "i386")
        set(MGB_ENABLE_DEBUG_UTIL 0)
        message(STATUS "disable MGB_ENABLE_DEBUG_UTIL at Windows i386 build")
    endif()
endif()

956 957 958 959
# TensorRT
set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})

# Inference only
960
if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
961 962
    set(MGB_ENABLE_GRAD 0)
    set(MGB_BUILD_SLIM_SERVING 1)
963
else()
964 965
    set(MGB_ENABLE_GRAD 1)
    set(MGB_BUILD_SLIM_SERVING 0)
966 967
endif()

968 969 970
# Distributed communication
set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})

971 972 973 974 975 976 977 978 979 980 981 982
# MGE_ARCH related flags
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
    if(MGE_BLAS STREQUAL "MKL")
        set(MEGDNN_X86_WITH_MKL 1)
    elseif(MGE_BLAS STREQUAL "OpenBLAS")
        set(MEGDNN_X86_WITH_OPENBLAS 1)
    endif()
endif()

# Enable Naive
if(MGE_ARCH STREQUAL "naive")
    set(MEGDNN_NAIVE 1)
983
    message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003
endif()

if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
    set(MEGDNN_X86 1)
    if(MGE_ARCH STREQUAL "x86_64")
        set(MEGDNN_X86_64 1)
        set(MEGDNN_64_BIT 1)
        if(NOT MSVC)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
        endif()
    else()
        set(MEGDNN_X86_32 1)
        if(NOT MSVC)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
        endif()
    endif()
    if(NOT MSVC)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
    endif()
endif()
1004
# dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE
1005
if(NOT APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
1006 1007 1008 1009 1010 1011
    CHECK_CXX_COMPILER_FLAG("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT)
    if(CXX_COMPILER_SUPPORT_DOT)
        message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT")
        set(MGB_ENABLE_DOT 1)
    endif()
endif()
1012

1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
if(MGE_ARCH STREQUAL "armv7")
    # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
    if(ANDROID)
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
    endif()
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
    set (MARCH "-march=armv7-a")
    set (MEGDNN_ARMV7 1)
endif()

if(MGE_ARCH STREQUAL "aarch64")
    set(MEGDNN_AARCH64 1)
    set(MEGDNN_64_BIT 1)
    set(MARCH "-march=armv8-a")
1027
    set(MGB_AARCH64 1)
1028
    if(MGE_ARMV8_2_FEATURE_FP16)
1029
        message(STATUS "Enable fp16 feature support in armv8.2")
1030 1031 1032 1033 1034 1035
        if(NOT ${MGE_DISABLE_FLOAT16})
            set(MEGDNN_ENABLE_FP16_NEON 1)
        endif()
        set(MARCH "-march=armv8.2-a+fp16")
    endif()

1036 1037 1038 1039 1040 1041 1042
    if(MGE_WITH_CUDA)
        message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
        when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
        for save link time(14min->1min), you may open below flags if not deploy on\
        arm a53 platform, or just build release type!")
        #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
    endif()
1043
endif()
1044

1045 1046 1047 1048 1049
if(MGE_ARCH STREQUAL "riscv64")
    set(MEGDNN_RISCV64 1)
    set(MEGDNN_64_BIT 1)
endif()

1050 1051
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")

1052 1053
set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")

1054
# Write out megbrain_build_config.h
1055 1056
# It defines macros needed by both megbrain and dnn
configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
1057
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
1058 1059 1060

add_subdirectory(dnn)

1061 1062 1063 1064 1065 1066
list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)

set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
add_custom_command(
1067 1068
    OUTPUT ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
    COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
1069 1070 1071 1072 1073 1074 1075 1076
    DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
    VERBATIM
)

list(APPEND MGB_OPR_PARAM_DEFS_OUTS
    ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
)

1077
install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
1078 1079 1080 1081

list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
add_library(mgb_opr_param_defs INTERFACE)
1082 1083 1084 1085 1086
target_include_directories(mgb_opr_param_defs
    INTERFACE
        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
        $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>
)
1087
add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
1088
install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
1089

1090
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
1091 1092
    # generate param_defs.td
    set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
1093
    set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
1094 1095
    set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
    set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
1096
    set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
1097 1098 1099
    file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${MGE_GENFILE_DIR})
    file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
    file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
1100
    file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
1101 1102 1103 1104
    add_custom_command(
        OUTPUT ${OPR_PARAM_DEFS_OUT}
        COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
        DEPENDS ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py ${OPR_PARAM_DEFS_SCRIPT}
1105 1106 1107 1108
        VERBATIM
    )
    # mlir tblgen sources
    set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
1109
    set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
1110 1111
    list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
    file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
1112
    add_custom_target(param_defs_tblgen DEPENDS ${OPR_PARAM_DEFS_OUT})
1113 1114
endif()

1115
if(MGE_WITH_DISTRIBUTED)
1116
    set(MEGRAY_WITH_NCCL ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
1117
    set(MEGRAY_WITH_SHM ${MGE_WITH_CUDA} CACHE BOOL "Override MegRay option" FORCE)
1118
    set(MEGRAY_WITH_RCCL ${MGE_WITH_ROCM} CACHE BOOL "Override MegRay option" FORCE)
1119 1120 1121 1122
    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
endif()

add_subdirectory(src)
1123 1124 1125 1126

if(MGE_BUILD_SDK)
    add_subdirectory(sdk/load-and-run)
endif()
1127

1128 1129
if(MGE_BUILD_IMPERATIVE_RT)
    add_subdirectory(imperative)
1130
    message(STATUS "Enable imperative python wrapper runtime")
1131 1132 1133
endif()

if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
1134
    add_subdirectory(test)
1135 1136
endif()

1137
if(TARGET _imperative_rt)
1138 1139 1140
    add_custom_target(
        develop
        COMMAND ${CMAKE_COMMAND} -E create_symlink
1141 1142
          ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
          ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
1143 1144 1145
        COMMAND ${CMAKE_COMMAND} -E create_symlink
          ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
          ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
1146
        DEPENDS _imperative_rt
1147 1148
        VERBATIM
    )
1149 1150
endif()

1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163
# Configure and install pkg-config.
# Note that unlike the Config.cmake modules, this is not relocatable (and not
# really portable) because we have two dependencies without pkg-config
# descriptions: FlatBuffers and MKL-DNN
if (MGE_USE_SYSTEM_MKLDNN)
    set (MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
endif()
if (MGE_USE_SYSTEM_OPENBLAS)
    set (MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
endif()
configure_file(cmake/megengine.pc.in
               ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
               @ONLY)
1164
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
1165 1166 1167 1168 1169 1170
        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
if (NOT MGE_WITH_DISTRIBUTED)
    include(CMakePackageConfigHelpers)
    set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
1171
    configure_package_config_file(cmake/MegEngineConfig.cmake.in
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
        ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
        INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR}
    )
    write_basic_package_version_file(
        ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
        VERSION ${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}
        COMPATIBILITY SameMajorVersion)

    install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
                ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
            DESTINATION ${MGE_INSTALL_CMAKEDIR})
endif()
1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199

if(MSVC OR WIN32)
    add_compile_options(
        $<$<CONFIG:>:/MT>
        $<$<CONFIG:Debug>:/MTd>
        $<$<CONFIG:Release>:/MT>
        )
    foreach (CompilerFlag
            CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
            CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
            CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
            CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
        if(${CompilerFlag} MATCHES "/MD")
            string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
            set(${CompilerFlag} "${${CompilerFlag}}" CACHE STRING "msvc compiler flags" FORCE)
1200
            message(VERBOSE "MSVC flags: ${CompilerFlag}:${${CompilerFlag}}")
1201 1202 1203
        endif()
    endforeach()
endif()
1204 1205 1206 1207 1208

if(MGE_WITH_JIT_MLIR)
    add_subdirectory(tools/mlir/mgb-opt)
    add_subdirectory(tools/mlir/mgb-file-check)
endif()
1209 1210 1211 1212 1213

if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
1214
endif()
1215 1216 1217 1218

if(MGE_WITH_LITE)
    add_subdirectory(lite)
endif()