CMakeLists.txt 56.0 KB
Newer Older
1
cmake_minimum_required(VERSION 3.15.2)
2 3 4
message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}")
if(NOT ${CMAKE_GENERATOR} STREQUAL "Ninja")
  message(WARNING "CMAKE_GENERATOR NOT EQUAL Ninja, which we do not recommend")
5
endif()
6

7 8 9 10 11
include(cmake/FetchMegBrainVersion.cmake)
project(
  MegEngine
  LANGUAGES C CXX
  VERSION ${MGB_VER_STRING})
12 13 14 15

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
16
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
17 18
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
19
set(CMAKE_POLICY_DEFAULT_CMP0048 NEW)
20

21 22 23 24 25 26
if(NOT MSVC
   AND NOT APPLE
   AND NOT WIN32)
  set(CMAKE_CXX_ARCHIVE_CREATE "<CMAKE_AR> Dqc <TARGET> <LINK_FLAGS> <OBJECTS>")
  set(CMAKE_CXX_ARCHIVE_APPEND "<CMAKE_AR> Dq  <TARGET> <LINK_FLAGS> <OBJECTS>")
  set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -D <TARGET>")
27 28
endif()

29
include(GNUInstallDirs)
30
include(CheckCXXCompilerFlag)
31
include(CheckIPOSupported)
32
include(CMakeDependentOption)
33

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
check_cxx_compiler_flag(-Wclass-memaccess CXX_SUPPORT_WCLASS_MEMACCESS)

set(MGE_ARCH
    AUTO
    CACHE STRING "Architecture on which MegEngine to be built.")
set_property(
  CACHE MGE_ARCH
  PROPERTY STRINGS
           AUTO
           x86_64
           i386
           armv7
           aarch64
           naive
           fallback)
set(MGE_EXPORT_TARGETS MegEngine-targets)
50

51
if(NOT "$ENV{LD_LIBRARY_PATH}" STREQUAL "")
52
  string(REPLACE ":" ";" ALTER_LD_LIBRARY_PATHS $ENV{LD_LIBRARY_PATH})
53
else()
54
  set(ALTER_LD_LIBRARY_PATHS "")
55 56 57
endif()

if(NOT "$ENV{LIBRARY_PATH}" STREQUAL "")
58
  string(REPLACE ":" ";" ALTER_LIBRARY_PATHS $ENV{LIBRARY_PATH})
59
else()
60
  set(ALTER_LIBRARY_PATHS "")
61 62
endif()

63
option(MGE_WITH_JIT "Build MegEngine with JIT." ON)
M
Megvii Engine Team 已提交
64
option(MGE_WITH_JIT_MLIR "Build MegEngine with MLIR JIT." OFF)
65
option(MGE_WITH_HALIDE "Build MegEngine with Halide JIT" OFF)
66
option(MGE_WITH_MIDOUT_PROFILE "Build MegEngine with Midout profile." OFF)
67 68 69 70
option(
  MGE_WITH_MINIMUM_SIZE
  "Swith off MGE_ENABLE_RTTI、MGE_ENABLE_EXCEPTIONS、MGE_ENABLE_LOGGING and switch on MGE_INFERENCE_ONLY so that compile minimum load_and_run."
  OFF)
71
option(MGE_ARMV8_2_FEATURE_FP16 "Enable armv8.2-a+fp16 support" OFF)
72 73 74
option(MGE_DISABLE_FLOAT16 "Disable MegEngine float16 support." OFF)
option(MGE_WITH_CUDA "Enable MegEngine CUDA support." ON)
option(MGE_CUDA_USE_STATIC "Enable MegEngine CUDA static linking." ON)
75
option(MGE_WITH_LITE "Build MGE with lite" ON)
76
option(MGE_WITH_TRT "Build MegEngine with TensorRT." ON)
77 78
option(MGE_WITH_CUDA_STUB "Build MegEngine with CUDA stub." ON)
option(MGE_WITH_NVRTC_STUB "Build MegEngine with NVRTC stub." OFF)
79 80
option(MGE_WITH_CUDNN_SHARED "Build MegEngine with CUDNN shared." ON)
option(MGE_WITH_CUBLAS_SHARED "Build MegEngine with CUBLAS shared." OFF)
81 82
option(MGE_USE_SYSTEM_LIB "Build MegEngine with system libraries." OFF)
option(MGB_WITH_FLATBUFFERS "Build MegBrain with FlatBuffers serialization support." ON)
83
option(MGE_WITH_CAMBRICON "Build MegEngine with Cambricon support" OFF)
84
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
85
option(MGE_WITH_ATLAS "Build MegEngine with Atlas support" OFF)
86 87 88 89 90
option(MGE_ENABLE_RTTI "Build with RTTI" ON)
option(MGE_ENABLE_LOGGING "Build with logging" ON)
option(MGE_DEBUG_UTIL "Enable debug utility" ON)
option(MGE_ENABLE_EXCEPTIONS "Build with exceptions" ON)
option(MGE_WITH_TEST "Enable test for MegEngine." OFF)
91
option(MGE_WITH_BENCHMARK "Enable DNN BENCHMARK" OFF)
92
option(MGE_WITH_DISTRIBUTED "Build with distributed support" ON)
93
option(MGE_BUILD_IMPERATIVE_RT "Build _imperative_rt Python Module " ON)
94 95
option(MGE_INFERENCE_ONLY "Build inference only library." OFF)
option(MGE_WITH_MKLDNN "Enable Intel MKL_DNN support," ON)
96
option(MGE_WITH_ROCM "Enable ROCM support" OFF)
97
option(MGE_WITH_LARGE_ARCHIVE "Enable big archive link support" OFF)
98
option(MGE_BUILD_WITH_ASAN "Enable build with ASAN, need compiler support" OFF)
99
option(MGE_WITH_CUSTOM_OP "Build with Custom op" OFF)
100
option(MGE_SYNC_THIRD_PARTY "help sync third_party submodule" OFF)
101 102 103 104 105
option(MGE_PROFILE_COMPILE_TIME "help profile compile time per file" OFF)

if(MGE_PROFILE_COMPILE_TIME)
  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "cmake -E time")
endif()
106

107
# TODO: add windows support
108
cmake_dependent_option(MGE_WITH_CUPTI "Build with CUPTI" OFF
109 110 111 112
                       "MGE_WITH_CUDA;MGE_BUILD_IMPERATIVE_RT;NOT MSVC;NOT WIN32" OFF)

set(MGB_CUPTI ${MGE_WITH_CUPTI})

113
if(MSVC OR WIN32)
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
  # FIXME: static link Windows vc runtime with some version from Visual Studio have some
  # runtime issue at some call PATH, for example: _imperative_rt.pyd -->
  # megengine_shared.dll for example c api flush can not find the fd args, I have no
  # idea about this issue as a Workround, dynamic link vc runtime,  but at some case, we
  # will static link vcrt when
  # MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP/MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2, so please
  # use lite_static_all_in_one(lite/CMakeLists.txt) in Windows XP env as possible How to
  # install VC runtime if you env do not install, refer to:
  # https://docs.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist?view=msvc-160
  option(MGE_STATIC_LINK_WITH_VC_RUNTIME
         "Enable mge static link with Windows vc runtime" OFF)

  option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP "Enable deploy inference on Windows xp" OFF)
  # special MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2 for Windows XP sp2(32bit) internal
  # behavior: 1: will force define MGB_HAVE_THREAD=0, which means only support single
  # thread 2: some Feature will be disable, eg: MGB_ENABLE_JSON and var sanity check, do
  # not too many care this!!, if you want to use this Feature to 'DEBUG', you can run
  # same model at NON-XP-SP2 env, eg Win7 or XP-SP3(build without
  # MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2) 3: we only support MegEngine(load_and_run)
  # and MegEngineLite API work on XP SP2 some debug utils, eg, megbrain_test/megdnn_test
  # not support run, most caused by gtest src code sdk caller: 1: as we remove mutex,
  # when you use MSVC self API eg CreateThread to start several MegEngine instances in
  # the same progress, please call MegEngine API(init/run) as serial as possible, also
  # please do not use std::thread std::mutex/std::this_thread_id at SDK caller side!!!
  # check dll/exe can deploy on Windows XP sp2 or not: please checkout
  # scripts/misc/check_windows_xp_sp2_deploy.py
  option(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2
         "Enable deploy inference on Windows xp sp2" OFF)

  # PE file linked by LLVM lld can not run at Windows XP env, so we force use link.exe
  # which always locate in Microsoft Visual Studio/*/*/VC/Tools/MSVC/*/bin/*/*/link.exe
  set(CMAKE_LINKER "link.exe")
  if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP OR MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
    set(MGE_STATIC_LINK_WITH_VC_RUNTIME ON)
    message(
      STATUS "Force set MGE_STATIC_LINK_WITH_VC_RUNTIME ON when build for Windows XP")

    if(NOT ${MGE_ARCH} STREQUAL "i386")
      message(FATAL_ERROR "only support 32bit when build for Windows xp")
    endif()
154

155 156 157
    if(NOT MGE_INFERENCE_ONLY)
      message(FATAL_ERROR "only support inference when build for Windows xp")
    endif()
158

159 160 161
    if(MGE_WITH_CUDA)
      message(FATAL_ERROR "do not support CUDA when build for Windows xp")
    endif()
162

163 164 165 166 167 168 169 170 171 172 173 174
    # Windows XP sp3 have thread issue, Workround for it
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0501 /Zc:threadSafeInit-")
    # for Windows XP type
    add_link_options("/SUBSYSTEM:CONSOLE,5.01")
    # some old lib(for example mkl for xp) use legacy stdio, so we force link
    # legacy_stdio_definitions
    add_link_options("/DEFAULTLIB:legacy_stdio_definitions.lib")

    if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__DEPLOY_ON_XP_SP2__=1")
175
    endif()
176 177 178 179
  else()
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D_WIN32_WINNT=0x0601")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /D_WIN32_WINNT=0x0601")
  endif()
180
endif()
181

182
if(MSVC OR WIN32)
183 184
  message(STATUS "windows force cudnn static link")
  set(MGE_WITH_CUDNN_SHARED OFF)
185 186
endif()

187
if(MGE_WITH_NVRTC_STUB OR MGE_WITH_CUDA_STUB)
188
  set(MGE_WITH_ANY_CUDA_STUB ON)
189
else()
190
  set(MGE_WITH_ANY_CUDA_STUB OFF)
191
endif()
192

193
if(MGE_WITH_MIDOUT_PROFILE)
194 195 196 197 198 199 200 201 202 203 204
  message(
    STATUS
      "build with MIDOUT PROFILE and force set MGE_WITH_MINIMUM_SIZE off and force rtti ON"
  )
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMIDOUT_PROFILING")
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DMIDOUT_PROFILING")
  set(MGE_WITH_MINIMUM_SIZE OFF)
  set(MGE_ENABLE_RTTI ON)
  if(WIN32)
    message(FATAL_ERROR "do not support midout at WIN32")
  endif()
205 206 207 208
endif()

set(BIN_REDUCE ${PROJECT_SOURCE_DIR}/src/bin_reduce_cmake.h)
if(MGE_WITH_MINIMUM_SIZE)
209 210 211 212 213 214 215 216
  message(STATUS "build with MGE_WITH_MINIMUM_SIZE bin_reduce header is: ${BIN_REDUCE}")
  set(MGE_ENABLE_RTTI OFF)
  set(MGE_ENABLE_LOGGING OFF)
  set(MGE_ENABLE_EXCEPTIONS OFF)
  set(MGE_INFERENCE_ONLY ON)
  # MGE_WITH_MINIMUM_SIZE will triger unused-parameter
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unused-parameter")
217 218 219
endif()

if(NOT MGE_WITH_MIDOUT_PROFILE AND NOT WIN32)
220 221 222 223 224 225 226 227 228 229 230 231 232 233
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -include ${BIN_REDUCE}")
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -include ${BIN_REDUCE}")
endif()

if(NOT APPLE)
  # check CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT on APPLE will leak cmake crash
  check_cxx_compiler_flag("-ffunction-sections -fdata-sections  -Wl,--gc-sections"
                          CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
  if(CXX_FUNCTION_DATA_GC_SECTIONS_SUPPORT)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffunction-sections -fdata-sections")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ffunction-sections -fdata-sections")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")
    set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gc-sections")
  endif()
234 235 236
endif()

check_ipo_supported(RESULT IS_LTO_SUPPORT OUTPUT output_info)
237 238
# LLVM on Windows report support LTO, but do not support -flto=full at link stage
if(IS_LTO_SUPPORT AND NOT WIN32)
239 240 241
  message(STATUS "lto is supported in this compiler")
  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -flto=full")
  set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -flto=full")
242
else()
243
  message(STATUS "lto is not supported in this compiler")
244 245
endif()

246 247 248
if(APPLE)
  set(BUILD_SHARED_LIBS OFF)
  message(STATUS "build static for xcode framework require")
249 250
endif()

251 252
if(MGE_USE_SYSTEM_LIB)
  set(MGE_CUDA_USE_STATIC OFF)
253 254
endif()

255 256
if(MGB_WITH_FLATBUFFERS)
  set(MGB_ENABLE_FBS_SERIALIZATION ON)
257
endif()
258

259
if(CMAKE_TOOLCHAIN_FILE)
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
  message(STATUS "We are cross compiling.")
  message(
    STATUS
      "config FLATBUFFERS_FLATC_EXECUTABLE to: ${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc"
  )
  set(FLATBUFFERS_FLATC_EXECUTABLE
      "${PROJECT_SOURCE_DIR}/build_dir/host_flatc/install/bin/flatc")
  if(ANDROID_TOOLCHAIN_ROOT)
    if(NOT "${ANDROID_ARCH_NAME}" STREQUAL "")
      set(ANDROID_ARCH ${ANDROID_ARCH_NAME})
    endif()
    if(${ANDROID_ARCH} STREQUAL "arm")
      set(MGE_ARCH "armv7")
    elseif(${ANDROID_ARCH} STREQUAL "arm64")
      set(MGE_ARCH "aarch64")
    else()
      message(FATAL_ERROR "DO NOT SUPPORT ANDROID ARCH NOW")
    endif()
  elseif(IOS_TOOLCHAIN_ROOT)
    if(${IOS_ARCH} STREQUAL "armv7")
      set(MGE_ARCH "armv7")
    elseif(${IOS_ARCH} STREQUAL "arm64")
      set(MGE_ARCH "aarch64")
    elseif(${IOS_ARCH} STREQUAL "armv7k")
      set(MGE_ARCH "armv7")
    elseif(${IOS_ARCH} STREQUAL "arm64e")
      set(MGE_ARCH "aarch64")
    elseif(${IOS_ARCH} STREQUAL "armv7s")
      set(MGE_ARCH "armv7")
289
    else()
290
      message(FATAL_ERROR "Unsupported IOS_ARCH.")
291
    endif()
292 293 294 295 296 297 298 299
  elseif(RISCV_TOOLCHAIN_ROOT)
    set(MGE_ARCH "riscv64")
  elseif(NOT "${ARM_CROSS_BUILD_ARCH}" STREQUAL "")
    set(MGE_ARCH ${ARM_CROSS_BUILD_ARCH})
  else()
    message(FATAL_ERROR "Unknown cross-compiling settings.")
  endif()
  message(STATUS "CONFIG MGE_ARCH TO ${MGE_ARCH}")
300
endif()
301 302

if(${MGE_ARCH} STREQUAL "AUTO")
303 304 305 306 307 308 309 310 311 312 313 314 315 316
  if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64" OR ${CMAKE_SYSTEM_PROCESSOR} STREQUAL
                                                    "AMD64")
    set(MGE_ARCH "x86_64")
  elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386" OR ${CMAKE_SYSTEM_PROCESSOR}
                                                      STREQUAL "i686")
    set(MGE_ARCH "i386")
  elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64" OR ${CMAKE_SYSTEM_PROCESSOR}
                                                         STREQUAL "arm64")
    set(MGE_ARCH "aarch64")
  elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm")
    set(MGE_ARCH "armv7")
  else()
    message(FATAL_ERROR "Unknown machine architecture for MegEngine.")
  endif()
317 318
endif()

319
if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE)
320 321
  message(STATUS "Setting build type to 'RelWithDebInfo' as none was specified.")
  set(CMAKE_BUILD_TYPE RelWithDebInfo)
322 323
endif()

324 325 326 327 328 329 330 331 332
if(${CMAKE_BUILD_TYPE} STREQUAL "Release"
   AND NOT MGE_WITH_TEST
   AND NOT ${MGE_ARCH} STREQUAL "x86_64"
   AND NOT MGE_WITH_MIDOUT_PROFILE)
  set(MGE_ENABLE_RTTI OFF)
  message(
    STATUS
      "disable MGE_ENABLE_RTTI when Release/NON-x86_64/NON-MGE_WITH_MIDOUT_PROFILE mode!!"
  )
333 334
endif()

335
if(MSVC OR WIN32)
336 337 338 339 340 341 342 343 344 345 346 347 348
  # for cmake after 3.15.2
  cmake_policy(SET CMP0091 NEW)
  set(CMAKE_OBJECT_PATH_MAX 300)
  if(MGE_BUILD_WITH_ASAN)
    set(MGE_STATIC_LINK_WITH_VC_RUNTIME ON)
    message(
      STATUS
        "Force set MGE_STATIC_LINK_WITH_VC_RUNTIME ON when build for Windows MGE_BUILD_WITH_ASAN"
    )
  endif()
  if(MGE_STATIC_LINK_WITH_VC_RUNTIME)
    if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
      set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebug")
349
    else()
350
      set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded")
351
    endif()
352 353 354
  else()
    if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
      set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDebugDLL")
355
    else()
356
      set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreadedDLL")
357
    endif()
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
  endif()

  add_compile_definitions(NOMINMAX=1 _USE_MATH_DEFINES=1 WIN32=1)
  message(STATUS "into windows build CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
  if(NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang" AND NOT ${CMAKE_C_COMPILER_ID}
                                                     STREQUAL "Clang-cl")
    message(
      FATAL_ERROR
        "only support clang-cl for windows build, pls check detail: scripts/cmake-build/BUILD_README.md"
    )
  endif()
  # on windows need append
  # VS_PATH/VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows and
  # VS_PATH/VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows to PATH
  # env
  if(MGE_BUILD_WITH_ASAN)
    message(
      WARNING
        "please do (set)export ASAN_OPTIONS=windows_hook_rtl_allocators=true when run test after build finish, caused by we link asan dll!!"
    )
    if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
      message(
        WARNING
          "Windows AddressSanitizer doesn't support linking with debug runtime libraries yet, which means do not support CMAKE_BUILD_TYPE=Debug"
      )
      message(
        FATAL_ERROR
          "Please build with RelWithDebInfo or Release by : EXTRA_CMAKE_ARGS=\"-DMGE_BUILD_WITH_ASAN=ON -DCMAKE_BUILD_TYPE=RelWithDebInfo ...\""
      )
387
    endif()
388 389 390 391
    if("$ENV{VS_PATH}" STREQUAL "")
      message(
        FATAL_ERROR
          "can not find VS_PATH, please export Visual Studio root dir to VS_PATH env")
392
    endif()
393 394 395 396 397 398 399 400 401 402 403 404 405
    if(${MGE_ARCH} STREQUAL "x86_64")
      set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-x86_64.lib")
      set(WINDOWS_ASAN_RUNTIME_THUNK_NAME "clang_rt.asan_dynamic_runtime_thunk-x86_64")
      set(WINDOWS_ASAN_PATH_SUFFIXES
          "VC/Tools/Llvm/x64/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
    elseif(${MGE_ARCH} STREQUAL "i386")
      set(WINDOWS_ASAN_DLL_NAME "clang_rt.asan_dynamic-i386.lib")
      set(WINDOWS_ASAN_RUNTIME_THUNK_NAME
          "clang_rt.asan_dynamic_runtime_thunk-i386.lib")
      set(WINDOWS_ASAN_PATH_SUFFIXES
          "VC/Tools/Llvm/lib/clang/${CMAKE_CXX_COMPILER_VERSION}/lib/windows")
    else()
      message(FATAL_ERROR "unsupport asan ARCH: ${MGE_ARCH} on Windows")
406
    endif()
407 408 409 410 411 412 413 414
    find_path(
      ASAN_DLL_PATH
      NAMES ${WINDOWS_ASAN_DLL_NAME}
      HINTS $ENV{VS_PATH}
      PATH_SUFFIXES ${WINDOWS_ASAN_PATH_SUFFIXES}
      DOC "Windows asan library path")
    if(ASAN_DLL_PATH STREQUAL "ASAN_DLL_PATH-NOTFOUND")
      message(FATAL_ERROR "can not find asan dll, please upgrade you LLVM")
415
    endif()
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460

    message(STATUS "Windows asan dll path: ${ASAN_DLL_PATH}")
    link_directories(${ASAN_DLL_PATH})
    link_libraries(${WINDOWS_ASAN_DLL_NAME})
    link_libraries(${WINDOWS_ASAN_RUNTIME_THUNK_NAME})
    set(WIN_FLAGS "/Od -DNDEBUG -fsanitize=address")
    # windows Llvm asan do not take effect when /O2 RELWITHDEBINFO default value is /O2,
    # so override it
    set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
    set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Zi /Od /Ob1 /DNDEBUG")
    set(CMAKE_C_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
    set(CMAKE_CXX_FLAGS_RELEASE "/Zi /Od /Ob1 /DNDEBUG")
  else()
    set(WIN_FLAGS "/O2")
  endif()
  # add flags for enable sse instruction optimize for X86, enable avx header to compile
  # avx code
  set(WIN_FLAGS "${WIN_FLAGS} -msse4.2 -D_AVX_ -D_AVX2_ -D__AVX__ -D__AVX2__ -D__FMA__")
  # if u CPU is cascadelake series, u can enable for performance set(WIN_FLAGS
  # "{WIN_FLAGS} -march=cascadelake -mtune=cascadelake") set(WIN_FLAGS "{WIN_FLAGS}
  # -mavx512cd -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vnni")

  # for windows build
  set(WIN_FLAGS
      "${WIN_FLAGS} -Wno-error=implicit-int-conversion -Wno-error=double-promotion")
  set(WIN_FLAGS
      "${WIN_FLAGS} -Wno-error=zero-as-null-pointer-constant -Wno-error=implicit-int-conversion"
  )
  set(WIN_FLAGS
      "${WIN_FLAGS} -Wno-error=float-conversion -Wno-error=shadow-field -Wno-error=covered-switch-default"
  )
  set(WIN_FLAGS
      "${WIN_FLAGS} -Wno-error=deprecated  -Wno-error=documentation  -Wno-error=unreachable-code-break"
  )
  set(WIN_FLAGS "${WIN_FLAGS} /DWIN32 -Wno-macro-redefined /wd4819")
  set(WIN_FLAGS
      "${WIN_FLAGS} /D_CRT_SECURE_NO_DEPRECATE /D_CRT_SECURE_NO_WARNINGS /DNOGDI /D_USE_MATH_DEFINES /bigobj"
  )
  set(WIN_FLAGS
      "${WIN_FLAGS} /Zm500 /EHs /wd4351 /wd4291 /wd4250 /wd4996 /wd4819 -Wno-inconsistent-dllimport"
  )

  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${WIN_FLAGS}")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${WIN_FLAGS}")

461 462
  # FIXME: fix halide/mlir JIT backends on windows
  message(STATUS "disable halide and mlir jit backends on windows host build...")
463 464
  set(MGE_WITH_HALIDE OFF)
  set(MGE_WITH_JIT_MLIR OFF)
465 466 467 468
  # TODO: imp ExecutableHelperImpl@src/jit/impl/utils.cpp build with Windows, then
  # enable base jit on Windows
  message(STATUS "disable base jit on windows host build...")
  set(MGE_WITH_JIT OFF)
469 470 471
  # FIXME: fix MegRay on windows
  message(STATUS "Disable distributed build on windows host build...")
  set(MGE_WITH_DISTRIBUTED OFF)
M
Megvii Engine Team 已提交
472 473 474 475 476 477 478 479 480 481
  if(${MGE_ARCH} STREQUAL "i386" AND ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
    # https://docs.microsoft.com/en-us/cpp/build/reference/z7-zi-zi-debug-information-format?view=msvc-170
    # Workround for error LNK1318
    message(
      STATUS
        "force use full symbolic debugging with build for 32bit for Windows with Debug mode"
    )
    set(CMAKE_C_FLAGS_DEBUG "/Z7")
    set(CMAKE_CXX_FLAGS_DEBUG "/Z7")
  endif()
482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
else()
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")

  # NONE windows DEBUG general flags
  if(MGE_BUILD_WITH_ASAN)
    set(CMAKE_C_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
    set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g -fsanitize=address -fno-omit-frame-pointer")
  else()
    set(CMAKE_C_FLAGS_DEBUG "-O0 -g")
    set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g")
  endif()

  # NONE windows opt general flags
  if(MGE_BUILD_WITH_ASAN)
    set(OPTIMIZE_LEVEL "-g -O0 -DNDEBUG -fsanitize=address -fno-omit-frame-pointer")
  elseif(ANDROID)
    set(OPTIMIZE_LEVEL "-g -Ofast -DNDEBUG")
  else()
    set(OPTIMIZE_LEVEL "-g -O3 -DNDEBUG")
  endif()
  # remove finite-math-only opt from Ofast, caused by clang have a different runtime
  # finite math logic, this issue do not find at g++, but as a unity build flags, we
  # force add -fno-finite-math-only when compiler support
  check_cxx_compiler_flag("-fno-finite-math-only" CXX_NO_FINITE_MATH_ONLY_SUPPORT)
  if(CXX_NO_FINITE_MATH_ONLY_SUPPORT)
    message(STATUS "force add -fno-finite-math-only for this compiler")
    set(OPTIMIZE_LEVEL "${OPTIMIZE_LEVEL} -fno-finite-math-only")
  endif()
  set(CMAKE_C_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
  set(CMAKE_CXX_FLAGS_RELEASE "${OPTIMIZE_LEVEL}")
  set(CMAKE_C_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
  set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${OPTIMIZE_LEVEL}")
  # some gnu(gcc) compiler use -static -libasan have runtime issue also, when target is
  # big, clang ld will take a long long long time when use -static-libsan, so we use
  # dynamic asan by default ANDROID asan.so depends on log, so broadcast log
  # link_libraries for megengine depends target, for example flatc target
  if(MGE_BUILD_WITH_ASAN AND ANDROID)
    link_libraries(log)
  endif()
521 522
endif()

523
if(MGE_WITH_CUDA)
524 525 526 527 528 529 530 531 532 533
  include(cmake/cudnn.cmake)
  if(MGE_CUDA_USE_STATIC
     AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL
                                                        "8.0.0")
     AND (NOT MGE_WITH_CUDNN_SHARED))
    message(WARNING "Static link CUDNN8 will auto enable MGE_WITH_LARGE_ARCHIVE=ON")
    set(MGE_WITH_LARGE_ARCHIVE ON)
  endif()
endif()
check_cxx_compiler_flag(-fuse-ld=gold CXX_SUPPORT_GOLD)
534
if(MGE_WITH_LARGE_ARCHIVE)
535 536 537 538 539 540 541 542
  message(STATUS "Set -mcmodel=large and disable -fuse-ld=gold")
  set(MGE_COMMON_LINKER_FLAGS "-mcmodel=large")
elseif(
  CXX_SUPPORT_GOLD
  AND NOT ANDROID
  AND NOT APPLE
  AND NOT MSVC
  AND NOT WIN32
543 544
  AND NOT MGE_WITH_LARGE_ARCHIVE
  AND NOT ${MGE_ARCH} STREQUAL "riscv64")
545 546
  message(STATUS "Using GNU gold linker.")
  set(MGE_COMMON_LINKER_FLAGS "-fuse-ld=gold")
547
endif()
548 549 550
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MGE_COMMON_LINKER_FLAGS}")
551

552 553 554 555 556 557
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
  # x86 cpu jit backends only support MLIR now, but MLIR runtime do not support at xp
  # sp2
  message(WARNING "disable MGE_WITH_JIT when build for windows xp sp2")
  set(MGE_WITH_JIT OFF)
endif()
558
if(NOT MGE_WITH_JIT)
559 560 561 562 563 564 565 566
  if(MGE_WITH_HALIDE)
    message(WARNING "MGE_WITH_HALIDE is set to OFF with MGE_WITH_JIT disabled")
    set(MGE_WITH_HALIDE OFF)
  endif()
  if(MGE_WITH_JIT_MLIR)
    message(WARNING "MGE_WITH_JIT_MLIR is set to OFF with MGE_WITH_JIT disabled")
    set(MGE_WITH_JIT_MLIR OFF)
  endif()
M
Megvii Engine Team 已提交
567 568
endif()

569 570
# FIXME At present, there are some conflicts between the LLVM that halide depends on and
# the LLVM that MLIR depends on. Should be fixed in subsequent versions.
571
if(MGE_BUILD_IMPERATIVE_RT AND MGE_WITH_HALIDE)
572
  message(FATAL_ERROR "cannot use HALIDE when building IMPERATIVE_RT")
573
endif()
574
if(MGE_WITH_JIT_MLIR AND MGE_WITH_HALIDE)
575
  message(FATAL_ERROR "cannot use HALIDE with MGE_WITH_JIT_MLIR enabled")
576 577
endif()

578
if(MGE_WITH_CUDA)
579 580 581 582 583 584 585 586 587
  # FIXME: check_language(CUDA) failed when sbsa mode! detail:
  # https://gitlab.kitware.com/cmake/cmake/-/issues/20676
  if(CMAKE_TOOLCHAIN_FILE)
    set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
    message(
      WARNING
        "force set CMAKE_CUDA_HOST_COMPILER to CMAKE_CXX_COMPILER when nvcc sbsa mode!!"
    )
  endif()
588

589 590 591 592 593
  include(CheckLanguage)
  check_language(CUDA)
  if(NOT CMAKE_CUDA_COMPILER AND NOT CMAKE_TOOLCHAIN_FILE)
    message(FATAL_ERROR "CUDA compiler not found in PATH")
  endif()
594

595 596 597 598 599
  # remove this after CMAKE fix nvcc sbsa
  if(NOT CMAKE_CUDA_COMPILER AND CMAKE_TOOLCHAIN_FILE)
    set(CMAKE_CUDA_COMPILER "nvcc")
    message(WARNING "force set CMAKE_CUDA_COMPILER to nvcc when nvcc sbsa mode!!")
  endif()
600

601
  find_package(CUDA)
602 603 604
  enable_language(CUDA)
  set(CMAKE_CUDA_STANDARD 14)
  set(CMAKE_CUDA_STANDARD_REQUIRED ON)
605 606
endif()

607 608 609 610 611 612 613 614 615
if(CMAKE_TOOLCHAIN_FILE)
  # TODO: fix cross build mlir-linalg-ods-gen for enable cross build with MLIR
  message(
    STATUS
      "Disable MLIR jit backends support, as we do not support cross build MLIR module caused by mlir-linalg-ods-gen, if you really need this, try build at host env, for example Android termux env for android, arm-linux env for arm with linux board"
  )
  set(MGE_WITH_JIT_MLIR OFF)
endif()

616
if(NOT MGE_WITH_CUDA)
617
  message(STATUS "Disable TensorRT support and disable HALIDE, as CUDA is not enabled.")
618 619
  set(MGE_WITH_HALIDE OFF)
  set(MGE_WITH_TRT OFF)
620 621 622
endif()

find_package(PythonInterp 3 REQUIRED)
623 624 625
# NOTICE: just use for target, which do not depend on python api PURPOSE: reuse target
# obj when switch python3 version will fallback to PYTHON_EXECUTABLE if can not find in
# PATH env
626 627
set(PYTHON3_IN_ENV "python3")
find_program(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
628 629 630
if(PYTHON3_EXECUTABLE_WITHOUT_VERSION)
  message(STATUS "use ${PYTHON3_IN_ENV} as PYTHON3_EXECUTABLE_WITHOUT_VERSION")
  set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON3_IN_ENV})
631
else()
632 633 634 635 636 637
  message(
    STATUS
      "fallback ${PYTHON_EXECUTABLE} as PYTHON3_EXECUTABLE_WITHOUT_VERSION,\
    target which depend on PYTHON3_EXECUTABLE_WITHOUT_VERSION will be rebuild when switch python3"
  )
  set(PYTHON3_EXECUTABLE_WITHOUT_VERSION ${PYTHON_EXECUTABLE})
638
endif()
639 640 641

set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
642
if(NOT "${CMAKE_THREAD_LIBS_INIT}" STREQUAL "")
643 644 645 646 647 648 649 650 651 652 653 654
  if(${CMAKE_THREAD_LIBS_INIT} STREQUAL "-pthread" AND MGE_WITH_CUDA)
    set_property(
      TARGET Threads::Threads
      PROPERTY INTERFACE_COMPILE_OPTIONS
               "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-pthread>"
               "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-pthread>")
  endif()
endif()

set(MGE_BLAS
    MKL
    CACHE STRING "BLAS implementaion used by MegEngine.")
655
set_property(CACHE MGE_BLAS PROPERTY STRINGS MKL OpenBLAS)
656 657 658
set(MGE_CUDA_GENCODE
    ""
    CACHE STRING "Overwrite -gencode specifications for CUDA")
659
if(NOT CMAKE_CUDA_HOST_COMPILER)
660
  set(CMAKE_CUDA_HOST_COMPILER $(CMAKE_CXX_COMPILER))
661 662 663
endif()

if(NOT MGE_ENABLE_RTTI)
664
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
665 666 667
endif()

if(NOT MGE_ENABLE_EXCEPTIONS)
668
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
669 670
endif()

M
Megvii Engine Team 已提交
671 672
if(MGE_BUILD_IMPERATIVE_RT OR ANDROID)
  message(STATUS "config cxx standard to 17.")
673
  set(CMAKE_CXX_STANDARD 17)
674 675
endif()

676
if(NOT ${MGE_WITH_CUDA} AND NOT ${MGE_WITH_ROCM})
677 678
  message(STATUS "Disable distributed support, as both CUDA and ROCm are disabled.")
  set(MGE_WITH_DISTRIBUTED OFF)
679 680 681
endif()

if(MGE_INFERENCE_ONLY)
682 683 684 685
  message(STATUS "Disable distributed support for inference only build.")
  set(MGE_WITH_DISTRIBUTED OFF)
  message(STATUS "Disable imperative_rt python module for inference only build.")
  set(MGE_BUILD_IMPERATIVE_RT OFF)
686 687
endif()

688 689 690 691 692 693 694 695 696 697 698
# please do any include(cmake/* after do this execute_process
if(MGE_SYNC_THIRD_PARTY)
  include(cmake/third_party_sync.cmake)
endif()

if(MGE_WITH_TEST)
  include(cmake/gtest.cmake)
endif()

include(cmake/gflags.cmake)

699
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
700
  include(cmake/llvm-project.cmake)
M
Megvii Engine Team 已提交
701 702
endif()

703 704 705 706
if(MGE_BUILD_IMPERATIVE_RT)
  set(MGE_WITH_CUSTOM_OP ON)
endif()

707
if(MGE_WITH_DISTRIBUTED)
708 709
  include(cmake/protobuf.cmake)
  include(cmake/zmq.cmake)
710 711 712
endif()

if(MGB_WITH_FLATBUFFERS)
713
  include(cmake/flatbuffers.cmake)
714 715
endif()

716 717 718 719
if(MGE_WITH_CUPTI)
  include(cmake/cupti.cmake)
endif()

720
if(MGE_WITH_CUDA)
721 722 723 724 725
  include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
  foreach(path ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
    get_filename_component(_NAME ${path} NAME)
    if(NOT ${_NAME} STREQUAL "stubs")
      list(APPEND CUDA_LINK_DIRECTORIES ${path})
726
    endif()
727 728 729 730 731 732 733 734 735 736 737 738 739 740
  endforeach()
  link_directories(${CUDA_LINK_DIRECTORIES})

  set(CMAKE_CUDA_FLAGS_DEBUG "-O0 -g")
  set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
  set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O3 -g")
  set(CMAKE_CUDA_FLAGS_MINSIZEREL "-Os")
  if(MSVC OR WIN32)
    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin -compress-all")
    set(CCBIN_FLAG
        "${CCBIN_FLAG} /wd4819 /wd4334 /wd4267 /wd4002 /wd4244 /wd4068 /std:c++14 /bigobj"
    )
    if(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
      set(CCBIN_FLAG "${CCBIN_FLAG} -D_ITERATOR_DEBUG_LEVEL=2 -MTd")
741
    endif()
742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769
    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options \" ${CCBIN_FLAG} \" ")
  else()
    set(CMAKE_CUDA_FLAGS "-Xcompiler -Wall,-Wextra -Xfatbin -compress-all")
  endif()

  if(NOT MGE_ENABLE_RTTI)
    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-rtti")
  endif()
  if(NOT MGE_ENABLE_EXCEPTIONS)
    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -fno-exceptions")
  endif()
  if(NOT MGE_CUDA_GENCODE)
    if(${MGE_ARCH} STREQUAL "x86_64"
       OR ${MGE_ARCH} STREQUAL "i386"
       OR ${MGE_ARCH} STREQUAL "aarch64")
      set(MEGDNN_THREADS_512 0)
      if(MGE_WITH_CUDA
         AND MGE_CUDA_USE_STATIC
         AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}"
                                                            VERSION_EQUAL "8.0.0")
         AND (NOT MGE_WITH_CUDNN_SHARED))
        message(
          WARNING
            "Static link CUDNN8 with many sm is unworkable, we only enable sm61 sm70 sm75 by default, and enable MGE_WITH_LARGE_ARCHIVE=ON"
        )
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
770 771 772 773 774 775 776 777 778 779
      elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.8.0"
             OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.8.0")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_89,code=sm_89")
        set(MGE_CUDA_GENCODE
            "${MGE_CUDA_GENCODE} -gencode arch=compute_89,code=compute_89")
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
      elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.1.0"
             OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.1.0")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=sm_86")
        set(MGE_CUDA_GENCODE
            "${MGE_CUDA_GENCODE} -gencode arch=compute_86,code=compute_86")
      elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "11.0.0"
             OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "11.0.0")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=sm_80")
        set(MGE_CUDA_GENCODE
            "${MGE_CUDA_GENCODE} -gencode arch=compute_80,code=compute_80")
      elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0"
             OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=sm_75")
        set(MGE_CUDA_GENCODE
            "${MGE_CUDA_GENCODE} -gencode arch=compute_75,code=compute_75")
      elseif(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "9.0.0"
             OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "9.0.0")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=sm_70")
        set(MGE_CUDA_GENCODE
            "${MGE_CUDA_GENCODE} -gencode arch=compute_70,code=compute_70")
      else()
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_35,code=sm_35")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_52,code=sm_52")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_60,code=sm_60")
        set(MGE_CUDA_GENCODE "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=sm_61")
        set(MGE_CUDA_GENCODE
            "${MGE_CUDA_GENCODE} -gencode arch=compute_61,code=compute_61")
      endif()
822
    else()
823
      message(FATAL_ERROR "Unsupported CUDA host arch.")
824
    endif()
825 826 827 828 829 830 831 832 833
  else()
    set(MEGDNN_THREADS_512 1)
  endif()

  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${MGE_CUDA_GENCODE}")
  if(MGE_WITH_TRT)
    include(cmake/tensorrt.cmake)
  endif()
  if(MGE_CUDA_USE_STATIC)
834
    if(MGE_WITH_TRT)
835 836 837 838
      if(MSVC OR WIN32)
        message(STATUS "windows TRT_LIBRARY: ${TRT_LIBRARY}")
        list(APPEND MGE_CUDA_LIBS ${TRT_LIBRARY} ${TRT_PLUGIN_LIBRARY})
      else()
839
        if(TensorRT_VERSION_MAJOR GREATER_EQUAL 8)
840
          list(APPEND MGE_CUDA_LIBS libnvinfer libnvinfer_plugin)
841 842 843 844
        else()
          list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libnvinfer libnvinfer_plugin
               -Wl,--no-whole-archive)
        endif()
845
      endif()
846
      if(TensorRT_VERSION_MAJOR STREQUAL 7)
847 848 849 850
        message(STATUS "handle trt myelin lib after trt7")
        list(APPEND MGE_CUDA_LIBS libmyelin_compiler libmyelin_executor
             libmyelin_pattern_runtime libmyelin_pattern_library)
      endif()
851
    endif()
852 853 854 855 856 857 858 859 860 861 862 863

    if("${CUDNN_VERSION}" STREQUAL "7.5.0")
      if(MSVC OR WIN32)
        message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
        list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
      else()
        message(
          STATUS
            "cudnn 7.5.0 has bug in cudnnConvolutionBiasActivationForward, need --whole-archive to workaround, ref https://docs.nvidia.com/deeplearning/cudnn/release-notes/rel_7xx.html"
        )
        list(APPEND MGE_CUDA_LIBS -Wl,--whole-archive libcudnn -Wl,--no-whole-archive)
      endif()
864
    else()
865 866 867 868
      if(MSVC OR WIN32)
        message(STATUS "windows CUDNN_LIBRARY: ${CUDNN_LIBRARY}")
        list(APPEND MGE_CUDA_LIBS ${CUDNN_LIBRARY})
      else()
869
        list(APPEND MGE_CUDA_LIBS libcudnn)
870
      endif()
871
    endif()
872 873 874 875 876 877 878 879 880 881 882
    if(MSVC OR WIN32)
      list(APPEND MGE_CUDA_LIBS cusolver.lib curand.lib cudart_static.lib cusparse.lib)
    else()
      list(
        APPEND
        MGE_CUDA_LIBS
        cusolver_static
        curand_static
        culibos
        cudart_static
        cusparse_static)
883
    endif()
884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
    if(MSVC OR WIN32)
      list(APPEND MGE_CUDA_LIBS cublas.lib)
    else()
      if(MGE_WITH_CUBLAS_SHARED)
        list(APPEND MGE_CUDA_LIBS cublas)
      else()
        list(APPEND MGE_CUDA_LIBS cublas_static)
      endif()
    endif()
    if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0"
       OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
      if(MSVC OR WIN32)
        list(APPEND MGE_CUDA_LIBS cublasLt.lib)
      else()
        if(MGE_WITH_CUBLAS_SHARED)
          list(APPEND MGE_CUDA_LIBS cublasLt)
900
        else()
901
          list(APPEND MGE_CUDA_LIBS cublasLt_static culibos)
902
        endif()
903
      endif()
904
    endif()
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
    if((${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.0.0"
        OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.0.0")
       AND NOT MSVC
       AND NOT WIN32)
      # mark all symbols from liblapack_static.a as weak to avoid duplicated definition
      # with mkl
      find_library(LAPACK_STATIC_PATH lapack_static
                   HINTS ${CMAKE_CUDA_HOST_IMPLICIT_LINK_DIRECTORIES})
      if(NOT LAPACK_STATIC_PATH)
        message(FATAL_ERROR "liblapack_static.a not found")
      endif()
      set(LAPACK_STATIC_COPY_PATH ${CMAKE_CURRENT_BINARY_DIR}/liblapack_static_copy.a)

      # add a target that run objcopy
      add_custom_command(
        OUTPUT ${LAPACK_STATIC_COPY_PATH}
        COMMAND ${CMAKE_OBJCOPY} -w -W* ${LAPACK_STATIC_PATH} ${LAPACK_STATIC_COPY_PATH}
        VERBATIM)
      add_custom_target(lapack_static_weak_target DEPENDS ${LAPACK_STATIC_COPY_PATH})

      # create a library named "lapack_static_weak"
      add_library(lapack_static_weak STATIC IMPORTED GLOBAL)
      add_dependencies(lapack_static_weak lapack_static_weak_target)
      set_target_properties(lapack_static_weak PROPERTIES IMPORTED_LOCATION
                                                          ${LAPACK_STATIC_COPY_PATH})
      list(APPEND MGE_CUDA_LIBS lapack_static_weak ${LAPACK_STATIC_COPY_PATH})
931
    endif()
932 933 934
  else()
    if(MGE_WITH_TRT)
      list(APPEND MGE_CUDA_LIBS libnvinfer libnvinfer_plugin)
935
      if(TensorRT_VERSION_MAJOR STREQUAL 7)
936 937 938 939 940 941 942 943 944 945 946
        message(STATUS "handle trt myelin lib after trt7")
        list(APPEND MGE_CUDA_LIBS libmyelin)
      endif()
    endif()
    list(APPEND MGE_CUDA_LIBS libcudnn)
    if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER "10.1.0"
       OR ${CMAKE_CUDA_COMPILER_VERSION} VERSION_EQUAL "10.1.0")
      list(APPEND MGE_CUDA_LIBS cublasLt cusolver cublas curand)
    endif()
    list(APPEND MGE_CUDA_LIBS cudart)
  endif()
947

948
  if(NOT MGE_WITH_CUDA_STUB)
949
    if(MSVC OR WIN32)
950
      list(APPEND MGE_CUDA_LIBS cuda.lib)
951
    else()
952
      list(APPEND MGE_CUDA_LIBS cuda)
953
    endif()
954
  endif()
955

956 957 958 959 960
  if(NOT MGE_WITH_NVRTC_STUB)
    if(MSVC OR WIN32)
      list(APPEND MGE_CUDA_LIBS nvrtc.lib)
    else()
      list(APPEND MGE_CUDA_LIBS nvrtc)
961
    endif()
962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
  endif()

  if(MGE_WITH_ANY_CUDA_STUB)
    add_subdirectory(dnn/cuda-stub)
    list(APPEND MGE_CUDA_LIBS cuda-stub)
  endif()

  if(MSVC OR WIN32)
    list(APPEND MGE_CUDA_LIBS nvrtc.lib)
  else()
    list(APPEND MGE_CUDA_LIBS nvToolsExt)
  endif()

  set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -lrt")
  if(UNIX)
    set(MGE_CUDA_LIBS "${MGE_CUDA_LIBS} -ldl")
  endif()

endif()

# ##########please add_subdirectory from here###############
if((${MGE_ARCH} STREQUAL "x86_64"
    OR ${MGE_ARCH} STREQUAL "i386"
    OR ${MGE_ARCH} STREQUAL "armv7"
    OR ${MGE_ARCH} STREQUAL "aarch64"
   )
   AND NOT APPLE
   AND NOT MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
  option(MGE_ENABLE_CPUINFO "Build cpuinfo library for check runtime." ON)
  if(MGE_ENABLE_CPUINFO)
    message(STATUS "Enable cpuinfo runtime check and little kernel optimize.")
    add_definitions(-DMGB_ENABLE_CPUINFO_CHECK)
    include(cmake/cpuinfo.cmake)
  endif()
996 997
endif()

998
if(MGE_WITH_CAMBRICON)
999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
  include_directories("$ENV{NEUWARE_HOME}/include")
  link_directories("$ENV{NEUWARE_HOME}/lib64")
  list(APPEND MGE_CAMBRICON_LIBS libcnrt libcndev)
  if(CNRT_VERSION_STRING VERSION_GREATER "5.0.0")
    include(cmake/cnnl.cmake)
    include(cmake/cnlight.cmake)
    include(cmake/magicmind.cmake)
    list(
      APPEND
      MGE_CAMBRICON_LIBS
      libcnnl
      libcnnl_extra
      libcnlight
      libmagicmind
      libmagicmind_runtime)
  else()
    include(cmake/cnml.cmake)
    list(APPEND MGE_CAMBRICON_LIBS libcnml)
  endif()
  set(MGE_CAMBRICON_LIBS "${MGE_CAMBRICON_LIBS}")
endif()

if(MGE_WITH_ROCM)
  include(cmake/rocm.cmake)
1023 1024 1025
endif()

if(MGE_WITH_ATLAS)
1026 1027 1028 1029
  add_subdirectory(dnn/atlas-stub)
  list(APPEND MGE_ATLAS_LIBS atlas-stub)
  set(MGE_ATLAS_LIBS "${MGE_ATLAS_LIBS}")
  set(MGB_ATLAS ${MGE_WITH_ATLAS})
1030
endif()
1031

1032 1033
find_program(CCACHE_BIN ccache)
if(CCACHE_BIN)
1034 1035 1036 1037 1038
  set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_BIN})
  if(MGE_WITH_CUDA AND NOT ${CMAKE_VERSION} VERSION_LESS "3.10.0")
    message(STATUS "Using ccache as CMAKE_CUDA_COMPILER_LAUNCHER")
    set(CMAKE_CUDA_COMPILER_LAUNCHER ${CCACHE_BIN})
  endif()
1039 1040 1041
endif()

if(${MGE_ARCH} STREQUAL "x86_64" OR ${MGE_ARCH} STREQUAL "i386")
1042 1043 1044 1045 1046 1047 1048 1049 1050
  if(${MGE_BLAS} STREQUAL "MKL")
    include(cmake/mkl.cmake)
    set(MGE_BLAS_LIBS libmkl)
  elseif(${MGE_BLAS} STREQUAL "OpenBLAS")
    include(cmake/OpenBLAS.cmake)
    set(MGE_BLAS_LIBS libopenblas)
  else()
    message(FATAL_ERROR "Unknown BLAS implementation ${MGE_BLAS}")
  endif()
1051 1052 1053 1054
endif()

# MKLDNN build
if(MGE_WITH_MKLDNN AND ${MGE_ARCH} STREQUAL "x86_64")
1055 1056
  include(cmake/MKL_DNN.cmake)
  set(MEGDNN_X86_WITH_MKL_DNN 1)
1057 1058
endif()

1059 1060
# RTTI
if(MGE_ENABLE_RTTI)
1061 1062
  set(MEGDNN_ENABLE_MANGLING 0)
  set(MEGDNN_ENABLE_RTTI 1)
1063
else()
1064 1065
  set(MEGDNN_ENABLE_MANGLING 1)
  set(MEGDNN_ENABLE_RTTI 0)
1066 1067
endif()
set(MGB_VERBOSE_TYPEINFO_NAME ${MGE_ENABLE_RTTI})
1068

1069 1070
# Logging
set(MGB_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
1071
set(MEGDNN_ENABLE_LOGGING ${MGE_ENABLE_LOGGING})
1072 1073 1074
set(MGB_ENABLE_JSON ${MGE_ENABLE_LOGGING})

# Exception
1075
if(NOT MGE_ENABLE_EXCEPTIONS)
1076 1077 1078 1079
  message(
    STATUS
      "Exceptions disabled; MegEngine would kill itself when it is supposed to throw an exception."
  )
1080 1081
endif()
set(MGB_ENABLE_EXCEPTION ${MGE_ENABLE_EXCEPTIONS})
1082
set(MEGDNN_ENABLE_EXCEPTIONS ${MGE_ENABLE_EXCEPTIONS})
1083 1084 1085

# JIT
if(MGE_WITH_JIT AND MGE_WITH_HALIDE)
1086 1087 1088 1089
  set(HALIDE_SHARED_LIBRARY
      OFF
      CACHE BOOL "Build as a shared library")
  include(cmake/Halide.cmake)
1090
endif()
1091

1092 1093
include(cmake/cpp_redis.cmake)

1094
# Thread
1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105
if(APPLE)
  set(CMAKE_THREAD_LIBS_INIT "-lpthread")
  set(CMAKE_HAVE_THREADS_LIBRARY 1)
  set(CMAKE_USE_WIN32_THREADS_INIT 0)
  set(CMAKE_USE_PTHREADS_INIT 1)
  set(THREADS_PREFER_PTHREAD_FLAG ON)
  message(STATUS "disable jit, halide and mlir on macos host build...")
  set(MGE_WITH_HALIDE OFF)
  set(MGE_WITH_JIT OFF)
  set(MGE_WITH_JIT_MLIR OFF)
endif()
1106

1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
# riscv64
if(${MGE_ARCH} STREQUAL "riscv64")
  set(CMAKE_THREAD_LIBS_INIT "-lpthread")
  set(CMAKE_HAVE_THREADS_LIBRARY 1)
  set(CMAKE_USE_WIN32_THREADS_INIT 0)
  set(CMAKE_USE_PTHREADS_INIT 1)
  set(THREADS_PREFER_PTHREAD_FLAG ON)
  message(STATUS "force config thread when build riscv64, as CMAKE detect failed")
endif()

M
Megvii Engine Team 已提交
1117 1118 1119
set(MGB_JIT ${MGE_WITH_JIT})
set(MGB_JIT_MLIR ${MGE_WITH_JIT_MLIR})
set(MGB_JIT_HALIDE ${MGE_WITH_HALIDE})
1120 1121
# for consumer override MGB_C_OPR_INIT_FUNC symbol interface
if(NOT "${CUSTOM_C_OPR_INIT_FUNC}" STREQUAL "")
1122 1123
  add_compile_definitions(MGB_C_OPR_INIT_FUNC=${CUSTOM_C_OPR_INIT_FUNC})
  message(STATUS "override MGB_C_OPR_INIT_FUNC to ${CUSTOM_C_OPR_INIT_FUNC}")
1124
endif()
M
Megvii Engine Team 已提交
1125

1126 1127
set(MGB_CUSTOM_OP ${MGE_WITH_CUSTOM_OP})

1128
if(MSVC OR WIN32)
1129 1130 1131 1132
  set(CMAKE_HAVE_THREADS_LIBRARY 1)
  set(CMAKE_USE_WIN32_THREADS_INIT 1)
  set(CMAKE_USE_PTHREADS_INIT 1)
  set(THREADS_PREFER_PTHREAD_FLAG ON)
1133 1134
endif()

1135 1136 1137 1138
if(CMAKE_THREAD_LIBS_INIT
   OR CMAKE_USE_WIN32_THREADS_INIT
   OR ANDROID)
  set(MGB_HAVE_THREAD 1)
1139
endif()
1140

1141
if(MSVC OR WIN32)
1142 1143 1144 1145 1146
  if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
    message(STATUS "disable MGB_HAVE_THREAD/MGB_ENABLE_JSON when DEPLOY ON XP SP2")
    set(MGB_HAVE_THREAD 0)
    set(MGB_ENABLE_JSON 0)
  endif()
1147 1148
endif()

1149
if(MGE_WITH_TEST)
1150 1151
  # use intra-op multi threads
  set(MEGDNN_ENABLE_MULTI_THREADS 1)
1152 1153
endif()

1154 1155 1156 1157 1158
# benchmark
if(MGE_WITH_BENCHMARK)
  set(MEGDNN_WITH_BENCHMARK ${MGE_WITH_BENCHMARK})
endif()

1159 1160
# CUDA
set(MGB_CUDA ${MGE_WITH_CUDA})
1161 1162
set(MEGDNN_WITH_CUDA ${MGE_WITH_CUDA})

1163
# ROCM
1164 1165 1166
set(MGB_ROCM ${MGE_WITH_ROCM})
set(MEGDNN_WITH_ROCM ${MGE_WITH_ROCM})

1167 1168
# CAMBRICON
set(MGB_CAMBRICON ${MGE_WITH_CAMBRICON})
1169
# Debug info
1170 1171 1172 1173
if(${CMAKE_BUILD_TYPE} STREQUAL "Debug" OR ${CMAKE_BUILD_TYPE} STREQUAL
                                           "RelWithDebInfo")
  set(MGB_ASSERT_LOC 1)
  set(MGB_ENABLE_DEBUG_UTIL 1)
1174
else()
1175 1176
  set(MGB_ASSERT_LOC 0)
  set(MGB_ENABLE_DEBUG_UTIL 0)
1177 1178
endif()

1179
if(MSVC OR WIN32)
1180 1181 1182 1183
  if(${MGE_ARCH} STREQUAL "i386")
    set(MGB_ENABLE_DEBUG_UTIL 0)
    message(STATUS "disable MGB_ENABLE_DEBUG_UTIL at Windows i386 build")
  endif()
1184 1185
endif()

1186 1187 1188
# TensorRT
set(MGB_ENABLE_TENSOR_RT ${MGE_WITH_TRT})

1189 1190 1191 1192 1193 1194
# inference need jit now, also keep same build logic with bazel
if(MGE_DEPLOY_INFERENCE_ON_WINDOWS_XP_SP2)
  set(MGB_BUILD_SLIM_SERVING 1)
else()
  set(MGB_BUILD_SLIM_SERVING 0)
endif()
1195
# Inference only
1196
if(MGE_INFERENCE_ONLY AND NOT MGE_WITH_TEST)
1197
  set(MGB_ENABLE_GRAD 0)
1198
else()
1199
  set(MGB_ENABLE_GRAD 1)
1200 1201
endif()

1202 1203 1204
# Distributed communication
set(MGB_ENABLE_OPR_MM ${MGE_WITH_DISTRIBUTED})

1205 1206
# MGE_ARCH related flags
if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
1207 1208 1209 1210 1211
  if(MGE_BLAS STREQUAL "MKL")
    set(MEGDNN_X86_WITH_MKL 1)
  elseif(MGE_BLAS STREQUAL "OpenBLAS")
    set(MEGDNN_X86_WITH_OPENBLAS 1)
  endif()
1212 1213 1214 1215
endif()

# Enable Naive
if(MGE_ARCH STREQUAL "naive")
1216 1217
  set(MEGDNN_NAIVE 1)
  message(STATUS "MEGDNN_NAIVE is enabled; MegDNN performance is degraded.")
1218 1219 1220
endif()

if(MGE_ARCH STREQUAL "x86_64" OR MGE_ARCH STREQUAL "i386")
1221 1222 1223 1224 1225 1226
  set(MEGDNN_X86 1)
  if(MGE_ARCH STREQUAL "x86_64")
    set(MEGDNN_X86_64 1)
    set(MEGDNN_64_BIT 1)
    if(NOT MSVC)
      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
1227
    endif()
1228 1229
  else()
    set(MEGDNN_X86_32 1)
1230
    if(NOT MSVC)
1231
      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32")
1232
    endif()
1233 1234 1235 1236
  endif()
  if(NOT MSVC)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2 -mfpmath=sse")
  endif()
1237
endif()
1238
# dotprod is not enable by default on APPLE, cpuinfo has some problem on APPLE
1239
if(NOT APPLE AND ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
1240 1241 1242 1243 1244
  check_cxx_compiler_flag("-march=armv8.2-a+dotprod" CXX_COMPILER_SUPPORT_DOT)
  if(CXX_COMPILER_SUPPORT_DOT)
    message(STATUS "Enable dotprod feature in armv8.2-a using MGB_ENABLE_DOT")
    set(MGB_ENABLE_DOT 1)
  endif()
1245
endif()
1246

1247
if(MGE_ARCH STREQUAL "armv7")
1248 1249 1250 1251 1252 1253 1254 1255
  # -funsafe-math-optimizations to enable neon auto-vectorization (since neon is not
  # fully IEEE 754 compatible, GCC does not turn on neon auto-vectorization by default.
  if(ANDROID)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=softfp -mfpu=neon")
  endif()
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -funsafe-math-optimizations")
  set(MARCH "-march=armv7-a")
  set(MEGDNN_ARMV7 1)
1256 1257 1258
endif()

if(MGE_ARCH STREQUAL "aarch64")
1259 1260 1261 1262 1263 1264 1265 1266
  set(MEGDNN_AARCH64 1)
  set(MEGDNN_64_BIT 1)
  set(MARCH "-march=armv8-a")
  set(MGB_AARCH64 1)
  if(MGE_ARMV8_2_FEATURE_FP16)
    message(STATUS "Enable fp16 feature support in armv8.2")
    if(NOT ${MGE_DISABLE_FLOAT16})
      set(MEGDNN_ENABLE_FP16_NEON 1)
1267
    endif()
1268 1269
    set(MARCH "-march=armv8.2-a+fp16")
  endif()
1270

1271 1272 1273 1274
  if(MGE_WITH_CUDA)
    message(
      WARNING
        "aarch64 ld will add -mfix-cortex-a53-843419 and -mfix-cortex-a53-835769,\
1275 1276 1277
        when cuda enable and CMAKE with DEBUG build type,ld will take about 14min+,\
        for save link time(14min->1min), you may open below flags if not deploy on\
        arm a53 platform, or just build release type!")
1278 1279 1280
    # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mno-fix-cortex-a53-843419
    # -mno-fix-cortex-a53-835769")
  endif()
1281
endif()
1282

1283
if(MGE_ARCH STREQUAL "riscv64")
1284 1285
  set(MEGDNN_RISCV64 1)
  set(MEGDNN_64_BIT 1)
1286 1287
endif()

1288
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MARCH}")
1289

1290 1291 1292
set(MGE_VERSION_SCRIPT
    ${PROJECT_SOURCE_DIR}/src/version.ld
    CACHE INTERNAL "Path to linker version script")
1293

1294 1295 1296 1297 1298 1299
execute_process(
  COMMAND git log -1 --format=%H
  WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
  OUTPUT_VARIABLE GIT_FULL_HASH
  OUTPUT_STRIP_TRAILING_WHITESPACE)

1300
# Write out megbrain_build_config.h It defines macros needed by both megbrain and dnn
1301 1302 1303
# please don't put the configuration that is easy to change at
# megbrain_build_config.h.in for example cuda_sm_gen.h.in and git_full_hash_header.h.in,
# which will lead to CMake build dirty file issue
1304 1305 1306 1307
configure_file(src/megbrain_build_config.h.in
               ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/megbrain_build_config.h
        DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
if(MGE_WITH_CUDA)
  configure_file(src/cuda_sm_gen.h.in
                 ${CMAKE_CURRENT_BINARY_DIR}/genfiles/cuda_sm_gen.h)
  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/cuda_sm_gen.h
          DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()

configure_file(src/git_full_hash_header.h.in
               ${CMAKE_CURRENT_BINARY_DIR}/genfiles/git_full_hash_header.h)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/genfiles/git_full_hash_header.h
        DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
1319 1320 1321

add_subdirectory(dnn)

1322 1323
list(APPEND MGB_OPR_PARAM_DEFS_SRCS
     ${CMAKE_CURRENT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py)
1324 1325 1326 1327 1328
set(MGB_OPR_PARAM_DEFS_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/dnn/scripts/gen_param_defs.py)

set(MGB_OPR_PARAM_DEFS_OUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/src/opr/include/)
file(MAKE_DIRECTORY ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr)
add_custom_command(
1329 1330 1331 1332 1333 1334
  OUTPUT ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  COMMAND
    ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${MGB_OPR_PARAM_DEFS_SCRIPT}
    ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h
  DEPENDS ${MGB_OPR_PARAM_DEFS_SRCS} ${MGB_OPR_PARAM_DEFS_SCRIPT}
  VERBATIM)
1335 1336

list(APPEND MGB_OPR_PARAM_DEFS_OUTS
1337
     ${MGB_OPR_PARAM_DEFS_OUT_DIR}/megbrain/opr/param_defs.h)
1338

1339 1340
install(FILES ${MGB_OPR_PARAM_DEFS_OUTS}
        DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/megbrain/opr/)
1341 1342 1343 1344

list(APPEND MGB_OPR_PARAM_DEFS_INC ${MGB_OPR_PARAM_DEFS_OUT_DIR})
add_custom_target(_mgb_opr_param_defs DEPENDS ${MGB_OPR_PARAM_DEFS_OUTS})
add_library(mgb_opr_param_defs INTERFACE)
1345 1346 1347
target_include_directories(
  mgb_opr_param_defs INTERFACE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
                               $<BUILD_INTERFACE:${MGB_OPR_PARAM_DEFS_INC}>)
1348
add_dependencies(mgb_opr_param_defs _mgb_opr_param_defs)
1349
install(TARGETS mgb_opr_param_defs EXPORT ${MGE_EXPORT_TARGETS})
1350

1351
if(MGE_WITH_JIT_MLIR OR MGE_BUILD_IMPERATIVE_RT)
1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
  # generate param_defs.td
  set(MGE_GENFILE_DIR ${PROJECT_BINARY_DIR}/src/genfiles)
  set(MGE_GEN_IR_DIR ${PROJECT_BINARY_DIR}/src/core/include/megbrain/ir)
  set(OPR_PARAM_DEFS_SRCS ${MGE_GENFILE_DIR}/opr_param_defs.py)
  set(OPR_PARAM_DEFS_SCRIPT ${PROJECT_SOURCE_DIR}/dnn/scripts/gen_tablegen.py)
  set(OPR_PARAM_DEFS_OUT ${MGE_GEN_IR_DIR}/param_defs.td)
  file(COPY ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py
       DESTINATION ${MGE_GENFILE_DIR})
  file(READ ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py CONTENTS)
  file(APPEND ${OPR_PARAM_DEFS_SRCS} ${CONTENTS})
  file(MAKE_DIRECTORY ${MGE_GEN_IR_DIR})
  add_custom_command(
    OUTPUT ${OPR_PARAM_DEFS_OUT}
    COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} ${OPR_PARAM_DEFS_SCRIPT}
            ${OPR_PARAM_DEFS_SRCS} ${OPR_PARAM_DEFS_OUT}
    DEPENDS ${PROJECT_SOURCE_DIR}/dnn/scripts/opr_param_defs.py
            ${PROJECT_SOURCE_DIR}/tools/param_defs/mgb_opr_param_defs.py
            ${OPR_PARAM_DEFS_SCRIPT}
    VERBATIM)
  # mlir tblgen sources
  set(MGE_IR_DIR ${PROJECT_SOURCE_DIR}/src/core/include/megbrain/ir)
  set(MGE_IR_INCLUDE_DIRS ${MLIR_LLVM_INCLUDE_DIR} ${MGE_IR_DIR} ${MGE_GEN_IR_DIR})
  list(TRANSFORM MGE_IR_INCLUDE_DIRS PREPEND "-I")
  file(GLOB_RECURSE MGE_IR_TDS ${MGE_IR_DIR}/*.td)
  add_custom_target(param_defs_tblgen DEPENDS ${OPR_PARAM_DEFS_OUT})
1377 1378
endif()

1379
if(MGE_WITH_DISTRIBUTED)
1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
  set(MEGRAY_WITH_NCCL
      ${MGE_WITH_CUDA}
      CACHE BOOL "Override MegRay option" FORCE)
  set(MEGRAY_WITH_SHM
      ${MGE_WITH_CUDA}
      CACHE BOOL "Override MegRay option" FORCE)
  set(MEGRAY_WITH_RCCL
      ${MGE_WITH_ROCM}
      CACHE BOOL "Override MegRay option" FORCE)
  set(MEGRAY_CUDA_GENCODE
      ${MGE_CUDA_GENCODE}
      CACHE STRING "Overwrite MegRay CUDA -gencode specifications" FORCE)
  add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/MegRay)
1393 1394 1395
endif()

add_subdirectory(src)
1396

1397
if(MGE_BUILD_IMPERATIVE_RT)
1398 1399
  add_subdirectory(imperative)
  message(STATUS "Enable imperative python wrapper runtime")
1400 1401 1402
endif()

if(MGE_WITH_TEST AND MGE_ENABLE_RTTI)
1403
  add_subdirectory(test)
1404 1405
endif()

1406
if(TARGET _imperative_rt)
1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425
  add_custom_target(
    develop
    COMMAND
      ${CMAKE_COMMAND} -E create_symlink
      ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
      ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/$<TARGET_FILE_NAME:${MODULE_NAME}>
    COMMAND
      ${CMAKE_COMMAND} -E create_symlink
      ${CMAKE_CURRENT_BINARY_DIR}/imperative/python/${PACKAGE_NAME}/version.py
      ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/version.py
    COMMAND
      ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/src/custom/include
      ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/include
    COMMAND ${CMAKE_COMMAND} -E make_directory
            ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/lib
    COMMAND
      ${CMAKE_COMMAND} -E create_symlink
      ${CMAKE_CURRENT_BINARY_DIR}/src/$<TARGET_FILE_NAME:${MGE_SHARED_LIB}>
      ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python/${PACKAGE_NAME}/core/lib/$<TARGET_FILE_NAME:${MGE_SHARED_LIB}>
1426
    DEPENDS ${develop_depends}
1427
    VERBATIM)
1428
  add_dependencies(develop _imperative_rt)
1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445

  # generate stub file for _imperative_rt
  execute_process(
    COMMAND ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} -c
            "import mypy.version; assert mypy.version.__version__ >= '0.982'"
    RESULT_VARIABLE NOT_HAVING_MYPY_STUBGEN)
  if(NOT ${NOT_HAVING_MYPY_STUBGEN})
    add_custom_command(
      TARGET develop
      POST_BUILD
      COMMAND
        ${PYTHON3_EXECUTABLE_WITHOUT_VERSION} -c "from mypy.stubgen import main; main()"
        -p ${PACKAGE_NAME}.core.${MODULE_NAME} -o
        ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python
      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/imperative/python
      VERBATIM)
  endif()
1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457
endif()

# Configure and install pkg-config. Note that unlike the Config.cmake modules, this is
# not relocatable (and not really portable) because we have two dependencies without
# pkg-config descriptions: FlatBuffers and MKL-DNN
if(MGE_USE_SYSTEM_MKLDNN)
  set(MGE_PKGCONFIG_LIBS_PRIVATE "-ldnnl")
endif()
if(MGE_USE_SYSTEM_OPENBLAS)
  set(MGE_PKGCONFIG_LIBS_PRIVATE "${MGE_PKGCONFIG_LIBS_PRIVATE} -lopenblas")
endif()
configure_file(cmake/megengine.pc.in ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc @ONLY)
1458
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/megengine.pc
1459 1460 1461
        DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)

# Do not export targets if MGE_WITH_DISTRIBUTED is on. MegRay is not ready.
1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474
if(NOT MGE_WITH_DISTRIBUTED)
  include(CMakePackageConfigHelpers)
  set(MGE_INSTALL_CMAKEDIR ${CMAKE_INSTALL_LIBDIR}/cmake/MegEngine)
  configure_package_config_file(
    cmake/MegEngineConfig.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
    INSTALL_DESTINATION ${MGE_INSTALL_CMAKEDIR})
  write_basic_package_version_file(
    ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
    VERSION ${MGB_VER_STRING}
    COMPATIBILITY SameMajorVersion)

  install(EXPORT ${MGE_EXPORT_TARGETS} DESTINATION ${MGE_INSTALL_CMAKEDIR})
  install(FILES ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfig.cmake
1475
                ${CMAKE_CURRENT_BINARY_DIR}/MegEngineConfigVersion.cmake
1476
          DESTINATION ${MGE_INSTALL_CMAKEDIR})
1477
endif()
1478

1479
if(MGE_WITH_JIT_MLIR)
1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500
  add_subdirectory(tools/mlir/mgb-opt)
  add_subdirectory(tools/mlir/mgb-file-check)
endif()

if(MGE_WITH_CUDA
   AND MGE_CUDA_USE_STATIC
   AND ("${CUDNN_VERSION}" VERSION_GREATER "8.0.0" OR "${CUDNN_VERSION}" VERSION_EQUAL
                                                      "8.0.0")
   AND (NOT MGE_WITH_CUDNN_SHARED))
  message(
    WARNING
      "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" "
  )
  message(
    WARNING
      "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" "
  )
  message(
    WARNING
      "Static link CUDNN8 with many sm is unworkable, please use -DMGE_WITH_CUDNN_SHARED=ON or -DMGE_WITH_LARGE_ARCHIVE=ON -DMGE_CUDA_GENCODE=\"-gencode arch=compute_70,code=sm_70 arch=compute_75,code=sm_75\" "
  )
1501
endif()
1502 1503

if(MGE_WITH_LITE)
1504
  add_subdirectory(lite)
1505
endif()
1506 1507 1508 1509 1510 1511 1512

if(ANDROID)
  message(
    WARNING
      "MegEngine project use thread_local, if you want to deploy MegEngine at dlopen/dlclose scene, please build with c++_shared by -DANDROID_STL=c++_shared, detail at https://github.com/android-ndk/ndk/issues/789 for example: EXTRA_CMAKE_ARGS=\" -DANDROID_STL=c++_shared\" ./scripts/cmake-build/cross_build_android_arm_inference.sh "
  )
endif()