cuda.cmake 12.0 KB
Newer Older
1
if(NOT WITH_GPU)
2
  return()
3 4
endif()

5 6 7
#(risemeup1) note: CMake 3.18 needs to specify the value of CMAKE_CUDA_ARCHITECTURES,otherwise a large number of warnings may appear in cmake
set(CMAKE_CUDA_ARCHITECTURES OFF)

8
if(WITH_NV_JETSON)
9
  add_definitions(-DWITH_NV_JETSON)
10 11
  set(paddle_known_gpu_archs "53 62 72")
  set(paddle_known_gpu_archs10 "53 62 72")
Y
Yuanle Liu 已提交
12
  set(paddle_known_gpu_archs11 "53 62 72 87")
13
  set(paddle_known_gpu_archs12 "53 62 72 87 90")
14 15 16
elseif(NEW_RELEASE_ALL)
  message("Using New Release Strategy - All Arches Packge")
  add_definitions(-DNEW_RELEASE_ALL)
17 18
  set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
  set(paddle_known_gpu_archs10 "50 52 60 61 70 75")
Z
Zhanlue Yang 已提交
19
  set(paddle_known_gpu_archs11 "50 60 61 70 75 80")
20
  set(paddle_known_gpu_archs12 "50 60 61 70 75 80 90")
21
elseif(NEW_RELEASE_PYPI)
22
  message("Using New Release Strategy - Cubin Packge")
23
  add_definitions(-DNEW_RELEASE_PYPI)
24
  set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
25
  set(paddle_known_gpu_archs10 "")
26
  set(paddle_known_gpu_archs11 "61 70 75 80")
27
  set(paddle_known_gpu_archs12 "61 70 75 80 90")
28 29 30
elseif(NEW_RELEASE_JIT)
  message("Using New Release Strategy - JIT Packge")
  add_definitions(-DNEW_RELEASE_JIT)
31 32 33 34
  set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
  set(paddle_known_gpu_archs10 "50 60 70 75")
  set(paddle_known_gpu_archs11 "50 60 70 75 80")
  set(paddle_known_gpu_archs12 "50 60 70 75 80 90")
35
else()
36
  set(paddle_known_gpu_archs "50 52 60 61 70 75 80 90")
37
  set(paddle_known_gpu_archs10 "50 52 60 61 70 75")
38
  set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
39
  set(paddle_known_gpu_archs12 "52 60 61 70 75 80 90")
40
endif()
41 42 43 44 45 46 47 48 49

######################################################################################
# A function for automatic detection of GPUs installed  (if autodetection is enabled)
# Usage:
#   detect_installed_gpus(out_variable)
function(detect_installed_gpus out_variable)
  if(NOT CUDA_gpu_detect_output)
    set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)

50 51 52
    file(
      WRITE ${cufile}
      ""
Z
Zeng Jinle 已提交
53 54 55
      "#include \"stdio.h\"\n"
      "#include \"cuda.h\"\n"
      "#include \"cuda_runtime.h\"\n"
56 57 58 59 60 61 62
      "int main() {\n"
      "  int count = 0;\n"
      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
      "  if (count == 0) return -1;\n"
      "  for (int device = 0; device < count; ++device) {\n"
      "    cudaDeviceProp prop;\n"
      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
Z
Zeng Jinle 已提交
63
      "      printf(\"%d.%d \", prop.major, prop.minor);\n"
64 65 66 67
      "  }\n"
      "  return 0;\n"
      "}\n")

68
    execute_process(
R
risemeup1 已提交
69 70
      COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin=${CMAKE_C_COMPILER}" "--run"
              "${cufile}"
71 72 73 74
      WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
      RESULT_VARIABLE nvcc_res
      OUTPUT_VARIABLE nvcc_out
      ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
75 76 77

    if(nvcc_res EQUAL 0)
      # only keep the last line of nvcc_out
78 79
      string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
      string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
80 81
      list(GET nvcc_out -1 nvcc_out)
      string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
82 83 84 85 86
      set(CUDA_gpu_detect_output
          ${nvcc_out}
          CACHE INTERNAL
                "Returned GPU architetures from detect_installed_gpus tool"
                FORCE)
87 88 89 90
    endif()
  endif()

  if(NOT CUDA_gpu_detect_output)
91 92 93 94 95 96
    message(
      STATUS
        "Automatic GPU detection failed. Building for all known architectures.")
    set(${out_variable}
        ${paddle_known_gpu_archs}
        PARENT_SCOPE)
97
  else()
98 99 100
    set(${out_variable}
        ${CUDA_gpu_detect_output}
        PARENT_SCOPE)
101 102 103 104 105 106 107
  endif()
endfunction()

########################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
# Usage:
#   select_nvcc_arch_flags(out_variable)
108
function(select_nvcc_arch_flags out_variable out_arch_bin)
109
  # List of arch names
110 111 112 113 114 115
  set(archs_names
      "Maxwell"
      "Pascal"
      "Volta"
      "Turing"
      "Ampere"
116
      "Hopper"
117 118
      "All"
      "Manual")
119
  set(archs_name_default "Auto")
120
  list(APPEND archs_names "Auto")
121 122

  # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
123 124 125 126
  set(CUDA_ARCH_NAME
      ${archs_name_default}
      CACHE STRING "Select target NVIDIA GPU achitecture.")
  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names})
127 128 129 130 131
  mark_as_advanced(CUDA_ARCH_NAME)

  # verify CUDA_ARCH_NAME value
  if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
    string(REPLACE ";" ", " archs_names "${archs_names}")
132 133
    message(
      FATAL_ERROR "Only ${archs_names} architectures names are supported.")
134 135 136
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
137 138 139 140 141 142 143 144 145 146 147 148
    set(CUDA_ARCH_BIN
        ${paddle_known_gpu_archs}
        CACHE
          STRING
          "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
    )
    set(CUDA_ARCH_PTX
        ""
        CACHE
          STRING
          "Specify 'virtual' PTX architectures to build PTX intermediate code for"
    )
149 150 151 152 153 154
    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  else()
    unset(CUDA_ARCH_BIN CACHE)
    unset(CUDA_ARCH_PTX CACHE)
  endif()

155
  if(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
156
    if(WITH_NV_JETSON)
157 158 159 160
      set(cuda_arch_bin "53")
    else()
      set(cuda_arch_bin "50")
    endif()
161
  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
162
    if(WITH_NV_JETSON)
163 164 165 166
      set(cuda_arch_bin "62")
    else()
      set(cuda_arch_bin "60 61")
    endif()
167
  elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
168
    if(WITH_NV_JETSON)
169 170 171 172
      set(cuda_arch_bin "72")
    else()
      set(cuda_arch_bin "70")
    endif()
C
chengduo 已提交
173 174
  elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
    set(cuda_arch_bin "75")
175
  elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
Y
Yuanle Liu 已提交
176 177 178 179 180
    if(WITH_NV_JETSON)
      set(cuda_arch_bin "87")
    else()
      if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
        set(cuda_arch_bin "80")
181
      else()
Y
Yuanle Liu 已提交
182 183
        set(cuda_arch_bin "80 86")
      endif()
J
JingZhuangzhuang 已提交
184
    endif()
185 186
  elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper")
    set(cuda_arch_bin "90")
187 188 189
  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
    set(cuda_arch_bin ${paddle_known_gpu_archs})
  elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
190 191 192
    message(
      STATUS
        "WARNING: This is just a warning for publishing release.
193 194 195 196 197
      You are building GPU version without supporting different architectures.
      So the wheel package may fail on other GPU architectures.
      You can add -DCUDA_ARCH_NAME=All in cmake command
      to get a full wheel package to resolve this warning.
      While, this version will still work on local GPU architecture.")
198
    detect_installed_gpus(cuda_arch_bin)
199
  else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
200 201 202
    set(cuda_arch_bin ${CUDA_ARCH_BIN})
  endif()

203
  if(NEW_RELEASE_JIT)
204 205
    set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
    set(cuda_arch_bin "")
206 207
  endif()

208 209
  # remove dots and convert to lists
  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
210
  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
211
  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
212
  string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
213

214 215 216 217 218
  list(REMOVE_DUPLICATES cuda_arch_bin)
  list(REMOVE_DUPLICATES cuda_arch_ptx)

  set(nvcc_flags "")
  set(nvcc_archs_readable "")
MarDino's avatar
MarDino 已提交
219
  set(nvcc_archs_bin_list "")
220 221 222 223 224

  # Tell NVCC to add binaries for the specified GPUs
  foreach(arch ${cuda_arch_bin})
    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
      # User explicitly specified PTX for the concrete BIN
225 226
      string(APPEND nvcc_flags
             " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
T
T8T9 已提交
227
      string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
MarDino's avatar
MarDino 已提交
228
      string(APPEND nvcc_archs_bin_list " ${CMAKE_MATCH_1}")
229 230
    else()
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
T
T8T9 已提交
231 232
      string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
      string(APPEND nvcc_archs_readable " sm_${arch}")
MarDino's avatar
MarDino 已提交
233
      string(APPEND nvcc_archs_bin_list " ${arch}")
234 235 236 237 238
    endif()
  endforeach()

  # Tell NVCC to add PTX intermediate code for the specified architectures
  foreach(arch ${cuda_arch_ptx})
239 240
    string(APPEND nvcc_flags
           " -gencode arch=compute_${arch},code=compute_${arch}")
T
T8T9 已提交
241
    string(APPEND nvcc_archs_readable " compute_${arch}")
242 243 244
  endforeach()

  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
MarDino's avatar
MarDino 已提交
245 246
  string(REGEX MATCHALL "[0-9()]+" nvcc_archs_bin_list "${nvcc_archs_bin_list}")
  string(JOIN "," nvcc_real_archs ${nvcc_archs_bin_list})
247 248 249 250 251 252
  set(${out_variable}
      ${nvcc_flags}
      PARENT_SCOPE)
  set(${out_variable}_readable
      ${nvcc_archs_readable}
      PARENT_SCOPE)
MarDino's avatar
MarDino 已提交
253 254 255
  set(${out_variable}_real_archs
      ${nvcc_real_archs}
      PARENT_SCOPE)
256 257 258
  set(${out_arch_bin}
      ${cuda_arch_bin}
      PARENT_SCOPE)
259 260
endfunction()

T
T8T9 已提交
261
message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
262
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
C
chengduo 已提交
263
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
T
T8T9 已提交
264 265
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
266
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
267
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
268 269 270
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs11})
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
271
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
272
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
Z
Zhou Wei 已提交
273 274 275
  set(paddle_known_gpu_archs "${paddle_known_gpu_archs11} 86")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
276 277 278 279 280
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 13.0) # CUDA 12.0+
  set(paddle_known_gpu_archs "${paddle_known_gpu_archs12} 86")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
281
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
282 283
endif()

284
if(NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)
285 286 287
  add_definitions("-DTRT_PLUGIN_FP16_AVALIABLE")
endif()

288 289 290
add_definitions("-DCUDA_VERSION_MAJOR=\"${CUDA_VERSION_MAJOR}\"")
add_definitions("-DCUDA_VERSION_MINOR=\"${CUDA_VERSION_MINOR}\"")
add_definitions("-DCUDA_TOOLKIT_ROOT_DIR=\"${CUDA_TOOLKIT_ROOT_DIR}\"")
T
T8T9 已提交
291

292
# setting nvcc arch flags
293
select_nvcc_arch_flags(NVCC_FLAGS_EXTRA NVCC_ARCH_BIN)
T
T8T9 已提交
294 295
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
296

297
# Set C++14 support
P
peizhilin 已提交
298
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
299 300
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
R
risemeup1 已提交
301
set(CMAKE_CUDA_STANDARD 17)
D
dzhwinter 已提交
302

303 304
# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
# So replace /W[1-4] with /W0
305
if(WIN32)
306
  string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
W
Wilber 已提交
307
endif()
T
T8T9 已提交
308 309
# in cuda9, suppress cuda warning on eigen
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
310
# Set :expt-relaxed-constexpr to suppress Eigen warnings
T
T8T9 已提交
311
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
312 313
# Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
314

315
if(WIN32)
316 317
  set(CMAKE_CUDA_FLAGS
      "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
318
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
319 320
  if(MSVC_STATIC_CRT)
    foreach(flag_var
321 322 323 324 325
            CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
            CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
      if(${flag_var} MATCHES "-MD")
        string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
      endif()
W
Wilber 已提交
326
    endforeach()
T
T8T9 已提交
327
  endif()
328
endif()
329 330 331

mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
332

333
include(thrust)