CMakeLists.txt 39.5 KB
Newer Older
1
cmake_minimum_required(VERSION 3.15.2)
2 3 4

include (cmake/FetchMegBrainVersion.cmake)
project(MegEngine LANGUAGES C CXX VERSION ${MGB_VER_STRING})
5 6 7 8 9 10

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
11
set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
12

13
if(NOT MSVC AND NOT APPLE AND NOT WIN32)
14 15 16 17 18
    set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
    set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq  <TARGET> <LINK_FLAGS> <OBJECTS>")
    set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
endif()

19
include(GNUInstallDirs)
20 21 22 23 24 25
include(CheckCXXCompilerFlag)
CHECK_CXX_COMPILER_FLAG(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)

set(MGE_ARCH AUTO CACHE STRING "Architecture on which MegEngine to be built.")
set_property(CACHE MGE_ARCH PROPERTY STRINGS AUTO
    x86_64 i386
26
    armv7 aarch64
27 28
    naive fallback
)
29
set (MGE_EXPORT_TARGETS MegEngine-targets)
30

31
option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
M
Megvii Engine Team 已提交
32
option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
33
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
34 35
option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
option(MGE_WITH_MINIMUM_SIZE "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run. Take effect only when MGE_BIN_REDUCE was set" OFF)
36 37
option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
option(MGE_ARMV8_2_FEATURE_DOTPROD "enable armv8.2-a+dotprod support" OFF)
38 39 40 41
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
42 43 44
option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." OFF)
45 46
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
47
option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
48
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
49
option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
50 51 52 53 54 55
option(MGE_ENABLE_RTTI "Build with RTTI" ON)
option(MGE_ENABLE_LOGGING "Build with logging" ON)
option(MGE_DEBUG_UTIL "Enable debug utility" ON)
option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
56
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
57 58 59
option(MGE_BUILD_SDK "Build load_and_run" ON)
option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
60
option(MGE_WITH_ROCM "Enable ROCM support" OFF)
61 62 63 64 65 66 67 68
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)


if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
    set(MGE_WITH_ANY_CUDA_STUB ON)
else()
    set(MGE_WITH_ANY_CUDA_STUB OFF)
endif()
69

70
if(NOT ${MGE_BIN_REDUCE} STREQUAL "")
71
    message(STATUS "build with BIN REDUCE")
72 73 74 75 76 77 78 79 80 81 82 83 84
    if(MGE_WITH_MINIMUM_SIZE)
        set(MGE_ENABLE_RTTI OFF)
        set(MGE_ENABLE_LOGGING OFF)
        set(MGE_ENABLE_EXCEPTIONS OFF)
        set(MGE_INFERENCE_ONLY ON)
    endif()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${MGE_BIN_REDUCE}")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${MGE_BIN_REDUCE}")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
endif()

if(MGE_WITH_MIDOUT_PROFILE)
85
    message(STATUS "build with MIDOUT PROFILE")
86 87 88 89
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
endif()

90 91
if (APPLE)
    set (BUILD_SHARED_LIBS OFF)
92
    message(STATUS "build static for xcode framework require")
93 94
endif()

95 96 97 98 99 100 101
if (MGE_USE_SYSTEM_LIB)
    set (MGE_CUDA_USE_STATIC OFF)
endif()

if (MGB_WITH_FLATBUFFERS)
    set(MGB_ENABLE_FBS_SERIALIZATION ON)
endif()
102

103
if(CMAKE_TOOLCHAIN_FILE)
104 105
    message(STATUS "We are cross compiling.")
    message(STATUS "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
    set(FLATBUFFERS_FLATC_EXECUTABLE "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
    if(ANDROID_TOOLCHAIN_ROOT)
        if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
            set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
        endif()
        if(${ANDROID_ARCH} STREQUAL "arm")
            set(MGE_ARCH "armv7")
        elseif(${ANDROID_ARCH} STREQUAL "arm64")
            set(MGE_ARCH "aarch64")
        else()
            message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
        endif()
    elseif(IOS_TOOLCHAIN_ROOT)
        if(${IOS_ARCH} STREQUAL "armv7")
            set(MGE_ARCH "armv7")
        elseif(${IOS_ARCH} STREQUAL "arm64")
            set(MGE_ARCH "aarch64")
        elseif(${IOS_ARCH} STREQUAL "armv7k")
            set(MGE_ARCH "armv7")
        elseif(${IOS_ARCH} STREQUAL "arm64e")
            set(MGE_ARCH "aarch64")
        elseif(${IOS_ARCH} STREQUAL "armv7s")
            set(MGE_ARCH "armv7")
        else()
            message(FATAL_ERROR "Unsupported IOS_ARCH.")
        endif()
132 133
    elseif(RISCV_TOOLCHAIN_ROOT)
        set(MGE_ARCH "riscv64")
134 135 136 137 138
    elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
        set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
    else()
        message(FATAL_ERROR "Unknown cross-compiling settings.")
    endif()
139
    message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
140
endif()
141 142

if(${MGE_ARCH} STREQUAL "AUTO")
143
    if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
144 145 146
        set(MGE_ARCH "x86_64")
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i686")
        set(MGE_ARCH "i386")
147 148 149 150
    elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
        set(MGE_ARCH "aarch64")
    elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
        set(MGE_ARCH "armv7")
151
    else()
L
llehtahw 已提交
152
        message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
153 154 155
    endif()
endif()

156
if((${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "armv7" OR ${MGE_ARCH} STREQUAL "aarch64") AND NOT APPLE)
157 158
    option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
    if(MGE_ENABLE_CPUINFO)
159
        message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
160 161 162 163 164
        add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
        include(cmake/cpuinfo.cmake)
    endif()
endif()

165
if(MSVC OR WIN32)
166 167
    # for cmake after 3.15.2
    cmake_policy(SET CMP0091 NEW)
168
    set(CMAKE_OBJECT_PATH_MAX 300)
169 170 171 172 173 174
    if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
    else()
        set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
    endif()

175
    add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
176 177
    message(STATUS "into windows build...")
    message(VERBOSE "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
178 179 180
    if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang-cl")
        message(FATAL_ERROR "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md")
    endif()
181 182
    # add flags for enable sse instruction optimize for X86, enable avx header to compile avx code
    set(WIN_FLAGS "-msse4.2 -O2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
183 184 185 186 187 188 189 190 191
    # if u CPU is cascadelake series, u can enable for performance
    # set(WIN_FLAGS "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake")
    # set(WIN_FLAGS "{WIN_FLAGS} -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")

    # for windows build
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default")
    set(WIN_FLAGS "${WIN_FLAGS} -Wno-error=deprecated  -Wno-error=documentation  -Wno-error=unreachable-code-break")
192
    set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /D_WIN32_WINNT=0x0601 /wd4819")
193 194 195 196 197
    set(WIN_FLAGS "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj")
    set(WIN_FLAGS "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport")

    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")
198 199

    #FIXME: fix halide JIT on windows
200
    message(STATUS "disable jit, halide and mlir on windows host build...")
201 202
    set(MGE_WITH_HALIDE OFF)
    set(MGE_WITH_JIT OFF)
M
Megvii Engine Team 已提交
203
    set(MGE_WITH_JIT_MLIR OFF)
204
    #FIXME: fix MegRay on windows
205
    message(STATUS "Disable distributed build on windows host build...")
206
    set(MGE_WITH_DISTRIBUTED OFF)
207 208 209 210 211
else()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
    set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
    if(ANDROID)
        set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -DNDEBUG")
212
        set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -DNDEBUG -g")
213

214 215
    else()
        set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG")
216
        set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -DNDEBUG -g")
217 218 219
    endif()
endif()

220 221 222 223 224 225 226
if(MGE_WITH_CUDA)
include(cmake/cudnn.cmake)
    if(MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
        message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
        set(MGE_WITH_LARGE_ARCHIVE ON)
    endif()
endif()
227
CHECK_CXX_COMPILER_FLAG(-fuse-ld=gold CXX_SUPPORT_GOLD)
228 229 230 231
if(MGE_WITH_LARGE_ARCHIVE)
    message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
    set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
elseif(CXX_SUPPORT_GOLD AND NOT ANDROID AND NOT APPLE AND NOT MSVC AND NOT WIN32 AND NOT MGE_WITH_LARGE_ARCHIVE)
232
    message(STATUS "Using GNU gold linker.")
233
    set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")    
234
endif()
235 236 237
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
238

239

240 241 242 243 244
if(NOT MGE_WITH_JIT)
    if(MGE_WITH_HALIDE)
        message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
        set(MGE_WITH_HALIDE OFF)
    endif()
M
Megvii Engine Team 已提交
245 246 247 248 249 250 251 252 253
    if(MGE_WITH_JIT_MLIR)
        message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
        set(MGE_WITH_JIT_MLIR OFF)
    endif()
endif()

# FIXME At present, there are some conflicts between the LLVM that halide
# depends on and the LLVM that MLIR depends on. Should be fixed in subsequent
# versions.
254 255
if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
    message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
256
endif()
257 258
if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
    message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
259 260
endif()

261
if(MGE_WITH_CUDA)
262 263 264 265 266 267 268
    # FIXME: check_language(CUDA) failed when sbsa mode!
    # detail: https://gitlab.kitware.com/cmake/cmake/-/issues/20676
    if(CMAKE_TOOLCHAIN_FILE)
        set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
        message(WARNING "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!")
    endif()

269 270
    include(CheckLanguage)
    check_language(CUDA)
271
    if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
272 273
        message(FATAL_ERROR "CUDA compiler not found in PATH")
    endif()
274 275 276 277 278 279 280

    # remove this after CMAKE fix nvcc sbsa
    if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
        set(CMAKE_CUDA_COMPILER "nvcc")
        message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
    endif()

281 282 283 284 285 286
    enable_language(CUDA)
    set(CMAKE_CUDA_STANDARD 14)
    set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()

if(NOT MGE_WITH_CUDA)
M
Megvii Engine Team 已提交
287
    if(NOT MGE_ARCH STREQUAL "x86_64" AND NOT MGE_ARCH STREQUAL "i386")
288
        message(STATUS "Disable JIT support, as the MGE_ARCH is not X86 and CUDA is not enabled.")
M
Megvii Engine Team 已提交
289 290 291
        set(MGE_WITH_JIT OFF)
        set(MGE_WITH_JIT_MLIR OFF)
    endif()
292
    set(MGE_WITH_HALIDE OFF)
293
    message(STATUS "Disable TensorRT support, as CUDA is not enabled.")
294 295 296 297 298 299 300
    set(MGE_WITH_TRT OFF)
endif()

find_package(PythonInterp 3 REQUIRED)

set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
301 302 303 304 305 306
if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
    if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
        set_property(TARGET Threads::Threads
            PROPERTY INTERFACE_COMPILE_OPTIONS "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
            "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
    endif()
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
endif()

set(MGE_BLAS MKL CACHE STRING "BLAS implementaion used by MegEngine.")
set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
set(MGE_CUDA_GENCODE "" CACHE STRING "Overwrite -gencode specifications for CUDA")
if(NOT CMAKE_CUDA_HOST_COMPILER)
    set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
endif()

if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
    message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
    set(CMAKE_BUILD_TYPE RelWithDebInfo)
endif()

if(NOT MGE_ENABLE_RTTI)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
endif()

if(NOT MGE_ENABLE_EXCEPTIONS)
326
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
327 328 329 330 331 332
endif()

if(MGE_WITH_TEST)
    include(cmake/gtest.cmake)
endif()

333
if(MGE_BUILD_IMPERATIVE_RT)
334 335 336
    set(CMAKE_CXX_STANDARD 17)
endif()

337
if(NOT MGE_WITH_CUDA)
338
    message(STATUS "Disable distributed support, as CUDA is not enabled.")
339 340 341 342
    set(MGE_WITH_DISTRIBUTED OFF)
endif()

if(MGE_INFERENCE_ONLY)
343
    message(STATUS "Disable distributed support for inference only build.")
344
    set(MGE_WITH_DISTRIBUTED OFF)
345
    message(STATUS "Disable imperative_rt python module for inference only build.")
346
    set(MGE_BUILD_IMPERATIVE_RT OFF)
347 348
endif()

349
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
M
Megvii Engine Team 已提交
350 351 352
    include(cmake/llvm-project.cmake)
endif()

353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
if(MGE_WITH_DISTRIBUTED)
    include(cmake/protobuf.cmake)
    include(cmake/zmq.cmake)
endif()

if(MGB_WITH_FLATBUFFERS)
    include(cmake/flatbuffers.cmake)
endif()

if(MGE_WITH_CUDA)
    include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
    foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
        get_filename_component(_NAME ${path} NAME)
        if(NOT ${_NAME} STREQUAL "stubs")
            list(APPEND CUDA_LINK_DIRECTORIES ${path})
        endif()
    endforeach()
    link_directories(${CUDA_LINK_DIRECTORIES})

    set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
    set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
    set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
    set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
376 377 378
    if(MSVC OR WIN32)
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
        set(CCBIN_FLAG "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068")
379
        if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
380 381 382 383 384 385
            set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
        endif()
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
    else()
        set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
    endif()
386 387 388 389 390

    if(NOT MGE_ENABLE_RTTI)
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
    endif()
    if(NOT MGE_ENABLE_EXCEPTIONS)
391
        set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
392 393
    endif()
    if(NOT MGE_CUDA_GENCODE)
394
        if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386" OR ${MGE_ARCH} STREQUAL "aarch64")
395
            set(MEGDNN_THREADS_512 0)
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
            if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
                message(WARNING "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")                
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
            elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
            else()
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
                set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
            endif()
        else()
            message(FATAL_ERROR "Unsupported CUDA host arch.")
        endif()
    else()
438
        set(MEGDNN_THREADS_512 1)
439 440 441 442 443 444 445 446
    endif()

    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
    if(MGE_WITH_TRT)
        include(cmake/tensorrt.cmake)
    endif()
    if(MGE_CUDA_USE_STATIC)
        if(MGE_WITH_TRT)
447
            if(MSVC OR WIN32)
448
                message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
449
                list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY})
450
            else()                
451 452 453 454 455
                list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer -Wl,--no-whole-archive)
            endif()
            if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
                message(STATUS "handle trt myelin lib after trt7")
                list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor libmyelin_pattern_runtime libmyelin_pattern_library)
456 457 458 459 460
            endif()
        endif()
        
        if("${CUDNN_VERSION}" STREQUAL "7.5.0")
            if(MSVC OR WIN32)
461
                message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
462
                list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
463
            else()
464 465
                message(STATUS "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html")
                list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
466
            endif()
467
        else()
468 469 470 471 472 473
            if(MSVC OR WIN32)
                message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
                list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
            else()
                list(APPEND MGE_CUDA_LIBS libcudnn)
            endif()
474
        endif()
475 476 477 478 479
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS cusolver.lib cublas.lib curand.lib cudart_static.lib cusparse.lib)
        else()
            list(APPEND MGE_CUDA_LIBS cusolver_static cublas_static curand_static culibos cudart_static cusparse_static)
        endif()
480
        if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
481 482 483 484 485
            if(MSVC OR WIN32)
                list(APPEND MGE_CUDA_LIBS cublasLt.lib)
            else()
                list(APPEND MGE_CUDA_LIBS cublasLt_static)
            endif()
486
        endif()
487
        if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0") AND NOT MSVC AND NOT WIN32)
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
            # mark all symbols from liblapack_static.a as weak to avoid
            # duplicated definition with mkl
            find_library(
                LAPACK_STATIC_PATH lapack_static
                HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
            if(NOT LAPACK_STATIC_PATH)
                message(FATAL_ERROR "liblapack_static.a not found")
            endif()
            set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)

            # add a target that run objcopy
            add_custom_command(
                OUTPUT ${LAPACK_STATIC_COPY_PATH}
                COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
                VERBATIM)
            add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})

            # create a library named "lapack_static_weak"
            add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
            add_dependencies(lapack_static_weak lapack_static_weak_target)
            set_target_properties(
                lapack_static_weak PROPERTIES
                IMPORTED_LOCATION ${LAPACK_STATIC_COPY_PATH})
            list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
        endif()
    else()
        if(MGE_WITH_TRT)
            list(APPEND MGE_CUDA_LIBS libnvinfer)
516 517 518 519
            if(TensorRT_VERSION_MAJOR GREATER_EQUAL 7)
                message(STATUS "handle trt myelin lib after trt7")
                list(APPEND MGE_CUDA_LIBS libmyelin)
            endif()
520 521 522 523 524
        endif()
        list(APPEND MGE_CUDA_LIBS libcudnn)
        if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0" OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
            list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
        endif()
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
        list(APPEND MGE_CUDA_LIBS cudart)
    endif()

    if(NOT MGE_WITH_CUDA_STUB)
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS cuda.lib)
        else()
            list(APPEND MGE_CUDA_LIBS cuda)
        endif()
    endif()

    if(NOT MGE_WITH_NVRTC_STUB)
        if(MSVC OR WIN32)
            list(APPEND MGE_CUDA_LIBS nvrtc.lib)
        else()
            list(APPEND MGE_CUDA_LIBS nvrtc)
        endif()
    endif()

    if(MGE_WITH_ANY_CUDA_STUB)
        add_subdirectory(dnn/cuda-stub)
        list(APPEND MGE_CUDA_LIBS cuda-stub)
547 548
    endif()

549
    if(MSVC OR WIN32)
550
        list(APPEND MGE_CUDA_LIBS nvrtc.lib)
551
    else()
552
        list(APPEND MGE_CUDA_LIBS nvToolsExt)
553
    endif()
554 555
    
    set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
556 557 558 559
    if(UNIX)
        set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
    endif()

560 561
endif()

562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
if(MGE_WITH_CAMBRICON)
    include_directories("$ENV{NEUWARE_HOME}/include")
    link_directories("$ENV{NEUWARE_HOME}/lib64")
    include(cmake/FindBANG/FindBANG.cmake)
    if (${MGE_MLU_ARCH} STREQUAL "MLU100")
        set(BANG_ARCH "100")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU1h8")
        set(BANG_ARCH "110")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU220")
        set(BANG_ARCH "220")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU270")
        set(BANG_ARCH "270")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU290")
        set(BANG_ARCH "290")
    elseif (${MGE_MLU_ARCH} STREQUAL "MLU200")
        set(BANG_ARCH "200")
    else()
        message (FATAL_ERROR "Unsupported MLU arch.")
    endif()
    set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} --bang-mlu-arch=${MGE_MLU_ARCH}")
    set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -std=c++11 -Werror")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__BANG_ARCH__=${BANG_ARCH}")
    if (${CMAKE_BUILD_TYPE} STREQUAL "Debug")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O0")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "Release")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -O3")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -g -O3")
    elseif (${CMAKE_BUILD_TYPE} STREQUAL "MinSizeRel")
        set(BANG_CNCC_FLAGS "${BANG_CNCC_FLAGS} -Os")
    endif()
    include(cmake/cnrt.cmake)
    include(cmake/cndev.cmake)
    include(cmake/cnml.cmake)
    list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev libcnml)
    set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
endif()

600 601 602 603
if (MGE_WITH_ROCM)
    include(cmake/rocm.cmake)
endif ()

604 605

if(MGE_WITH_ATLAS)
606 607
    add_subdirectory(dnn/atlas-stub)
    list(APPEND MGE_ATLAS_LIBS atlas-stub)
608 609 610
    set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
    set(MGB_ATLAS ${MGE_WITH_ATLAS})
endif()
611

612

613 614 615 616
find_program(CCACHE_BIN ccache)
if(CCACHE_BIN)
    set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
    if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
617
        message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636
        set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
    endif()
endif()

if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
    if(${MGE_BLAS} STREQUAL "MKL")
        include(cmake/mkl.cmake)
        set(MGE_BLAS_LIBS libmkl)
    elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
        include(cmake/OpenBLAS.cmake)
        set(MGE_BLAS_LIBS libopenblas)
    else()
        message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
    endif()
endif()

# MKLDNN build
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
    include(cmake/MKL_DNN.cmake)
637
    set(MEGDNN_X86_WITH_MKL_DNN 1)
638 639
endif()

640 641
# RTTI
if(MGE_ENABLE_RTTI)
642 643
    set(MEGDNN_ENABLE_MANGLING 0)
    set(MEGDNN_ENABLE_RTTI 1)
644
else()
645 646
    set(MEGDNN_ENABLE_MANGLING 1)
    set(MEGDNN_ENABLE_RTTI 0)
647 648
endif()
set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
649

650 651
# Logging
set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
652
set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
653 654 655
set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})

# Exception
656
if(NOT MGE_ENABLE_EXCEPTIONS)
657 658 659
    message(STATUS "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception.")
endif()
set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
660
set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
661 662 663 664 665 666

# JIT
if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
    set(HALIDE_SHARED_LIBRARY OFF CACHE BOOL "Build as a shared library")
    include(cmake/Halide.cmake)
endif()
667

668
# Thread
669 670 671 672 673 674
IF(APPLE)
    set(CMAKE_THREAD_LIBS_INIT "-lpthread")
    set(CMAKE_HAVE_THREADS_LIBRARY 1)
    set(CMAKE_USE_WIN32_THREADS_INIT 0)
    set(CMAKE_USE_PTHREADS_INIT 1)
    set(THREADS_PREFER_PTHREAD_FLAG ON)
675
    message(STATUS "disable jit, halide and mlir on macos host build...")
M
Megvii Engine Team 已提交
676 677 678
    set(MGE_WITH_HALIDE OFF)
    set(MGE_WITH_JIT OFF)
    set(MGE_WITH_JIT_MLIR OFF)
679 680
ENDIF()

M
Megvii Engine Team 已提交
681 682 683 684
set(MGB_JIT ${MGE_WITH_JIT})
set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})

685 686 687 688 689 690 691 692
if(MSVC OR WIN32)
    set(CMAKE_HAVE_THREADS_LIBRARY 1)
    set(CMAKE_USE_WIN32_THREADS_INIT 1)
    set(CMAKE_USE_PTHREADS_INIT 1)
    set(THREADS_PREFER_PTHREAD_FLAG ON)
endif()

if(CMAKE_THREAD_LIBS_INIT OR CMAKE_USE_WIN32_THREADS_INIT)
693 694
    set(MGB_HAVE_THREAD 1)
endif()
695

696 697 698 699 700
if(MGE_WITH_TEST)
    # use intra-op multi threads
    set(MEGDNN_ENABLE_MULTI_THREADS 1)
endif()

701 702
# CUDA
set(MGB_CUDA ${MGE_WITH_CUDA})
703 704
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})

705

706 707 708 709 710
#ROCM
set(MGB_ROCM ${MGE_WITH_ROCM})
set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})


711 712 713 714
# CAMBRICON
set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
set(MEGDNN_WITH_CAMBRICON ${MGE_WITH_CAMBRICON})

715 716 717
# ENFLAME
set(MGB_ENFLAME ${MGE_WITH_ENFLAME})
set(MEGDNN_WITH_ENFLAME ${MGE_WITH_ENFLAME})
718

719 720 721 722 723 724 725
# Debug info
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo")
    set(MGB_ASSERT_LOC 1)
    set(MGB_ENABLE_DEBUG_UTIL 1)
else()
    set(MGB_ASSERT_LOC 0)
    set(MGB_ENABLE_DEBUG_UTIL 0)
726 727
endif()

728 729 730 731
# TensorRT
set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})

# Inference only
732
if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
733 734
    set(MGB_ENABLE_GRAD 0)
    set(MGB_BUILD_SLIM_SERVING 1)
735
else()
736 737
    set(MGB_ENABLE_GRAD 1)
    set(MGB_BUILD_SLIM_SERVING 0)
738 739
endif()

740 741 742
# Distributed communication
set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})

743 744 745 746 747 748 749 750 751 752 753 754
# MGE_ARCH related flags
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
    if(MGE_BLAS STREQUAL "MKL")
        set(MEGDNN_X86_WITH_MKL 1)
    elseif(MGE_BLAS STREQUAL "OpenBLAS")
        set(MEGDNN_X86_WITH_OPENBLAS 1)
    endif()
endif()

# Enable Naive
if(MGE_ARCH STREQUAL "naive")
    set(MEGDNN_NAIVE 1)
755
    message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776
endif()

if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
    set(MEGDNN_X86 1)
    if(MGE_ARCH STREQUAL "x86_64")
        set(MEGDNN_X86_64 1)
        set(MEGDNN_64_BIT 1)
        if(NOT MSVC)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
        endif()
    else()
        set(MEGDNN_X86_32 1)
        if(NOT MSVC)
            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
        endif()
    endif()
    if(NOT MSVC)
        set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
    endif()
endif()

777 778 779 780 781 782 783 784 785 786 787 788 789 790
if(MGE_ARCH STREQUAL "armv7")
    # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
    if(ANDROID)
        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
    endif()
    set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
    set (MARCH "-march=armv7-a")
    set (MEGDNN_ARMV7 1)
endif()

if(MGE_ARCH STREQUAL "aarch64")
    set(MEGDNN_AARCH64 1)
    set(MEGDNN_64_BIT 1)
    set(MARCH "-march=armv8-a")
791
    set(MGB_AARCH64 1)
792
    if(MGE_ARMV8_2_FEATURE_FP16)
793
        message(STATUS "Enable fp16 feature support in armv8.2")
794 795 796 797 798 799 800
        if(NOT ${MGE_DISABLE_FLOAT16})
            set(MEGDNN_ENABLE_FP16_NEON 1)
        endif()
        set(MARCH "-march=armv8.2-a+fp16")
    endif()

    if(MGE_ARMV8_2_FEATURE_DOTPROD)
801
        message(STATUS "Enable dotprod feature support in armv8.2")
802 803 804 805 806 807 808
        if(MGE_ARMV8_2_FEATURE_FP16)
            set(MARCH "-march=armv8.2-a+fp16+dotprod")
        else()
            set(MARCH "-march=armv8.2-a+dotprod")
        endif()
    endif()

809 810 811 812 813 814 815
    if(MGE_WITH_CUDA)
        message(WARNING "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
        when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
        for save link time(14min->1min), you may open below flags if not deploy on\
        arm a53 platform, or just build release type!")
        #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419 -mno-fix-cortex-a53-835769")
    endif()
816
endif()
817

818 819 820 821 822
if(MGE_ARCH STREQUAL "riscv64")
    set(MEGDNN_RISCV64 1)
    set(MEGDNN_64_BIT 1)
endif()

823 824
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")

825 826
set(MGE_VERSION_SCRIPT ${PROJECT_SOURCE_DIR}/src/version.ld CACHE INTERNAL "Path to linker version script")

827
# Write out megbrain_build_config.h
828 829
# It defines macros needed by both megbrain and dnn
configure_file(src/megbrain_build_config.h.in ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
830
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
831 832 833

add_subdirectory(dnn)

834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
list(APPEND MGB_OPR_PARAM_DEFS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)

set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
add_custom_command(
    OUTPUT
        ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
    COMMAND ${PYTHON_EXECUTABLE} ${MGB_OPR_PARAM_DEFS_SCRIPT} ${MGB_OPR_PARAM_DEFS_SRCS}
        ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
    DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
    VERBATIM
)

list(APPEND MGB_OPR_PARAM_DEFS_OUTS
    ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
)

852
install(FILES ${MGB_OPR_PARAM_DEFS_OUTS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
853 854 855 856

list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
add_library(mgb_opr_param_defs INTERFACE)
857 858 859 860 861
target_include_directories(mgb_opr_param_defs
    INTERFACE
        $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
        $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>
)
862
add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
863
install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
864

865
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
866 867
    # generate param_defs.td
    set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
868
    set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
869 870
    set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
    set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
871
    set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
872 873 874
    file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py DESTINATION ${MGE_GENFILE_DIR})
    file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
    file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
875
    file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
876 877 878 879 880 881 882
    add_custom_target(param_defs_tblgen
        COMMAND ${PYTHON_EXECUTABLE} ${OPR_PARAM_DEFS_SCRIPT} ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
        DEPENDS ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_SCRIPT}
        VERBATIM
    )
    # mlir tblgen sources
    set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
883
    set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
884 885 886 887
    list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
    file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
endif()

888 889 890 891 892
if(MGE_WITH_DISTRIBUTED)
    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
endif()

add_subdirectory(src)
893 894 895 896

if(MGE_BUILD_SDK)
    add_subdirectory(sdk/load-and-run)
endif()
897

898 899 900

if(MGE_BUILD_IMPERATIVE_RT)
    add_subdirectory(imperative)
901
    message(STATUS "Enable imperative python wrapper runtime")
902 903 904
endif()

if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
905
    add_subdirectory(test)
906 907
endif()

908
if(TARGET _imperative_rt)
909 910 911
    add_custom_target(
        develop
        COMMAND ${CMAKE_COMMAND} -E create_symlink
912 913
          ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
          ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
914 915 916
        COMMAND ${CMAKE_COMMAND} -E create_symlink
          ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
          ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
917
        DEPENDS _imperative_rt
918 919
        VERBATIM
    )
920 921
endif()

922 923 924 925 926 927 928 929 930 931 932 933 934
# Configure and install pkg-config.
# Note that unlike the Config.cmake modules, this is not relocatable (and not
# really portable) because we have two dependencies without pkg-config
# descriptions: FlatBuffers and MKL-DNN
if (MGE_USE_SYSTEM_MKLDNN)
    set (MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
endif()
if (MGE_USE_SYSTEM_OPENBLAS)
    set (MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
endif()
configure_file(cmake/megengine.pc.in
               ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
               @ONLY)
935
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
936 937 938 939 940 941
        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
if (NOT MGE_WITH_DISTRIBUTED)
    include(CMakePackageConfigHelpers)
    set (MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
942
    configure_package_config_file(cmake/MegEngineConfig.cmake.in
943 944 945 946 947 948 949 950 951 952 953 954 955
        ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
        INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR}
    )
    write_basic_package_version_file(
        ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
        VERSION ${MGB_VER_MAJOR}.${MGB_VER_MINOR}.${MGB_VER_PATCH}
        COMPATIBILITY SameMajorVersion)

    install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
                ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
            DESTINATION ${MGE_INSTALL_CMAKEDIR})
endif()
956 957 958 959 960 961 962 963 964 965 966 967 968 969 970

if(MSVC OR WIN32)
    add_compile_options(
        $<$<CONFIG:>:/MT>
        $<$<CONFIG:Debug>:/MTd>
        $<$<CONFIG:Release>:/MT>
        )
    foreach (CompilerFlag
            CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
            CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
            CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
            CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
        if(${CompilerFlag} MATCHES "/MD")
            string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
            set(${CompilerFlag} "${${CompilerFlag}}" CACHE STRING "msvc compiler flags" FORCE)
971
            message(VERBOSE "MSVC flags: ${CompilerFlag}:${${CompilerFlag}}")
972 973 974
        endif()
    endforeach()
endif()
975 976 977 978 979

if(MGE_WITH_JIT_MLIR)
    add_subdirectory(tools/mlir/mgb-opt)
    add_subdirectory(tools/mlir/mgb-file-check)
endif()
980 981 982 983 984

if(MGE_WITH_CUDA AND MGE_CUDA_USE_STATIC AND("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL "8.0.0") AND (NOT MGE_WITH_CUDNN_SHARED))
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
    message(WARNING "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" ")
985
endif()
986