cuda.cmake 11.8 KB
Newer Older
1
if(NOT WITH_GPU)
2
  return()
3 4
endif()

5
if(WITH_NV_JETSON)
6
  add_definitions(-DWITH_NV_JETSON)
7 8
  set(paddle_known_gpu_archs "53 62 72")
  set(paddle_known_gpu_archs10 "53 62 72")
Y
Yuanle Liu 已提交
9
  set(paddle_known_gpu_archs11 "53 62 72 87")
10
  set(paddle_known_gpu_archs12 "53 62 72 87 90")
11 12 13
elseif(NEW_RELEASE_ALL)
  message("Using New Release Strategy - All Arches Packge")
  add_definitions(-DNEW_RELEASE_ALL)
14 15
  set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
  set(paddle_known_gpu_archs10 "50 52 60 61 70 75")
Z
Zhanlue Yang 已提交
16
  set(paddle_known_gpu_archs11 "50 60 61 70 75 80")
17
  set(paddle_known_gpu_archs12 "50 60 61 70 75 80 90")
18
elseif(NEW_RELEASE_PYPI)
19
  message("Using New Release Strategy - Cubin Packge")
20
  add_definitions(-DNEW_RELEASE_PYPI)
21
  set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
22
  set(paddle_known_gpu_archs10 "")
23
  set(paddle_known_gpu_archs11 "61 70 75 80")
24
  set(paddle_known_gpu_archs12 "61 70 75 80 90")
25 26 27
elseif(NEW_RELEASE_JIT)
  message("Using New Release Strategy - JIT Packge")
  add_definitions(-DNEW_RELEASE_JIT)
28 29 30 31
  set(paddle_known_gpu_archs "50 52 60 61 70 75 80 86 90")
  set(paddle_known_gpu_archs10 "50 60 70 75")
  set(paddle_known_gpu_archs11 "50 60 70 75 80")
  set(paddle_known_gpu_archs12 "50 60 70 75 80 90")
32
else()
33
  set(paddle_known_gpu_archs "50 52 60 61 70 75 80 90")
34
  set(paddle_known_gpu_archs10 "50 52 60 61 70 75")
35
  set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
36
  set(paddle_known_gpu_archs12 "52 60 61 70 75 80 90")
37
endif()
38 39 40 41 42 43 44 45 46

######################################################################################
# A function for automatic detection of GPUs installed  (if autodetection is enabled)
# Usage:
#   detect_installed_gpus(out_variable)
function(detect_installed_gpus out_variable)
  if(NOT CUDA_gpu_detect_output)
    set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)

47 48 49
    file(
      WRITE ${cufile}
      ""
Z
Zeng Jinle 已提交
50 51 52
      "#include \"stdio.h\"\n"
      "#include \"cuda.h\"\n"
      "#include \"cuda_runtime.h\"\n"
53 54 55 56 57 58 59
      "int main() {\n"
      "  int count = 0;\n"
      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
      "  if (count == 0) return -1;\n"
      "  for (int device = 0; device < count; ++device) {\n"
      "    cudaDeviceProp prop;\n"
      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
Z
Zeng Jinle 已提交
60
      "      printf(\"%d.%d \", prop.major, prop.minor);\n"
61 62 63 64
      "  }\n"
      "  return 0;\n"
      "}\n")

65
    execute_process(
R
risemeup1 已提交
66 67
      COMMAND "${CUDA_NVCC_EXECUTABLE}" "-ccbin=${CMAKE_C_COMPILER}" "--run"
              "${cufile}"
68 69 70 71
      WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
      RESULT_VARIABLE nvcc_res
      OUTPUT_VARIABLE nvcc_out
      ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
72 73 74

    if(nvcc_res EQUAL 0)
      # only keep the last line of nvcc_out
75 76
      string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
      string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
77 78
      list(GET nvcc_out -1 nvcc_out)
      string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
79 80 81 82 83
      set(CUDA_gpu_detect_output
          ${nvcc_out}
          CACHE INTERNAL
                "Returned GPU architetures from detect_installed_gpus tool"
                FORCE)
84 85 86 87
    endif()
  endif()

  if(NOT CUDA_gpu_detect_output)
88 89 90 91 92 93
    message(
      STATUS
        "Automatic GPU detection failed. Building for all known architectures.")
    set(${out_variable}
        ${paddle_known_gpu_archs}
        PARENT_SCOPE)
94
  else()
95 96 97
    set(${out_variable}
        ${CUDA_gpu_detect_output}
        PARENT_SCOPE)
98 99 100 101 102 103 104
  endif()
endfunction()

########################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
# Usage:
#   select_nvcc_arch_flags(out_variable)
105
function(select_nvcc_arch_flags out_variable out_arch_bin)
106
  # List of arch names
107 108 109 110 111 112
  set(archs_names
      "Maxwell"
      "Pascal"
      "Volta"
      "Turing"
      "Ampere"
113
      "Hopper"
114 115
      "All"
      "Manual")
116
  set(archs_name_default "Auto")
117
  list(APPEND archs_names "Auto")
118 119

  # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
120 121 122 123
  set(CUDA_ARCH_NAME
      ${archs_name_default}
      CACHE STRING "Select target NVIDIA GPU achitecture.")
  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names})
124 125 126 127 128
  mark_as_advanced(CUDA_ARCH_NAME)

  # verify CUDA_ARCH_NAME value
  if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
    string(REPLACE ";" ", " archs_names "${archs_names}")
129 130
    message(
      FATAL_ERROR "Only ${archs_names} architectures names are supported.")
131 132 133
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
134 135 136 137 138 139 140 141 142 143 144 145
    set(CUDA_ARCH_BIN
        ${paddle_known_gpu_archs}
        CACHE
          STRING
          "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
    )
    set(CUDA_ARCH_PTX
        ""
        CACHE
          STRING
          "Specify 'virtual' PTX architectures to build PTX intermediate code for"
    )
146 147 148 149 150 151
    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  else()
    unset(CUDA_ARCH_BIN CACHE)
    unset(CUDA_ARCH_PTX CACHE)
  endif()

152
  if(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
153
    if(WITH_NV_JETSON)
154 155 156 157
      set(cuda_arch_bin "53")
    else()
      set(cuda_arch_bin "50")
    endif()
158
  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
159
    if(WITH_NV_JETSON)
160 161 162 163
      set(cuda_arch_bin "62")
    else()
      set(cuda_arch_bin "60 61")
    endif()
164
  elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
165
    if(WITH_NV_JETSON)
166 167 168 169
      set(cuda_arch_bin "72")
    else()
      set(cuda_arch_bin "70")
    endif()
C
chengduo 已提交
170 171
  elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
    set(cuda_arch_bin "75")
172
  elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
Y
Yuanle Liu 已提交
173 174 175 176 177
    if(WITH_NV_JETSON)
      set(cuda_arch_bin "87")
    else()
      if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
        set(cuda_arch_bin "80")
178
      else()
Y
Yuanle Liu 已提交
179 180
        set(cuda_arch_bin "80 86")
      endif()
J
JingZhuangzhuang 已提交
181
    endif()
182 183
  elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper")
    set(cuda_arch_bin "90")
184 185 186
  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
    set(cuda_arch_bin ${paddle_known_gpu_archs})
  elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
187 188 189
    message(
      STATUS
        "WARNING: This is just a warning for publishing release.
190 191 192 193 194
      You are building GPU version without supporting different architectures.
      So the wheel package may fail on other GPU architectures.
      You can add -DCUDA_ARCH_NAME=All in cmake command
      to get a full wheel package to resolve this warning.
      While, this version will still work on local GPU architecture.")
195
    detect_installed_gpus(cuda_arch_bin)
196
  else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
197 198 199
    set(cuda_arch_bin ${CUDA_ARCH_BIN})
  endif()

200
  if(NEW_RELEASE_JIT)
201 202
    set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
    set(cuda_arch_bin "")
203 204
  endif()

205 206
  # remove dots and convert to lists
  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
207
  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
208
  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
209
  string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
210

211 212 213 214 215
  list(REMOVE_DUPLICATES cuda_arch_bin)
  list(REMOVE_DUPLICATES cuda_arch_ptx)

  set(nvcc_flags "")
  set(nvcc_archs_readable "")
MarDino's avatar
MarDino 已提交
216
  set(nvcc_archs_bin_list "")
217 218 219 220 221

  # Tell NVCC to add binaries for the specified GPUs
  foreach(arch ${cuda_arch_bin})
    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
      # User explicitly specified PTX for the concrete BIN
222 223
      string(APPEND nvcc_flags
             " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
T
T8T9 已提交
224
      string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
MarDino's avatar
MarDino 已提交
225
      string(APPEND nvcc_archs_bin_list " ${CMAKE_MATCH_1}")
226 227
    else()
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
T
T8T9 已提交
228 229
      string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
      string(APPEND nvcc_archs_readable " sm_${arch}")
MarDino's avatar
MarDino 已提交
230
      string(APPEND nvcc_archs_bin_list " ${arch}")
231 232 233 234 235
    endif()
  endforeach()

  # Tell NVCC to add PTX intermediate code for the specified architectures
  foreach(arch ${cuda_arch_ptx})
236 237
    string(APPEND nvcc_flags
           " -gencode arch=compute_${arch},code=compute_${arch}")
T
T8T9 已提交
238
    string(APPEND nvcc_archs_readable " compute_${arch}")
239 240 241
  endforeach()

  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
MarDino's avatar
MarDino 已提交
242 243
  string(REGEX MATCHALL "[0-9()]+" nvcc_archs_bin_list "${nvcc_archs_bin_list}")
  string(JOIN "," nvcc_real_archs ${nvcc_archs_bin_list})
244 245 246 247 248 249
  set(${out_variable}
      ${nvcc_flags}
      PARENT_SCOPE)
  set(${out_variable}_readable
      ${nvcc_archs_readable}
      PARENT_SCOPE)
MarDino's avatar
MarDino 已提交
250 251 252
  set(${out_variable}_real_archs
      ${nvcc_real_archs}
      PARENT_SCOPE)
253 254 255
  set(${out_arch_bin}
      ${cuda_arch_bin}
      PARENT_SCOPE)
256 257
endfunction()

T
T8T9 已提交
258
message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
259
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
C
chengduo 已提交
260
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
T
T8T9 已提交
261 262
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
263
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
264
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
265 266 267
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs11})
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
268
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
269
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
Z
Zhou Wei 已提交
270 271 272
  set(paddle_known_gpu_archs "${paddle_known_gpu_archs11} 86")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
273 274 275 276 277
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 13.0) # CUDA 12.0+
  set(paddle_known_gpu_archs "${paddle_known_gpu_archs12} 86")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
278
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
279 280
endif()

281
if(NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)
282 283 284
  add_definitions("-DTRT_PLUGIN_FP16_AVALIABLE")
endif()

285 286 287
add_definitions("-DCUDA_VERSION_MAJOR=\"${CUDA_VERSION_MAJOR}\"")
add_definitions("-DCUDA_VERSION_MINOR=\"${CUDA_VERSION_MINOR}\"")
add_definitions("-DCUDA_TOOLKIT_ROOT_DIR=\"${CUDA_TOOLKIT_ROOT_DIR}\"")
T
T8T9 已提交
288

289
# setting nvcc arch flags
290
select_nvcc_arch_flags(NVCC_FLAGS_EXTRA NVCC_ARCH_BIN)
T
T8T9 已提交
291 292
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
293

294
# Set C++14 support
P
peizhilin 已提交
295
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
296 297
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
R
risemeup1 已提交
298
set(CMAKE_CUDA_STANDARD 14)
D
dzhwinter 已提交
299

300 301
# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
# So replace /W[1-4] with /W0
302
if(WIN32)
303
  string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
W
Wilber 已提交
304
endif()
T
T8T9 已提交
305 306
# in cuda9, suppress cuda warning on eigen
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
307
# Set :expt-relaxed-constexpr to suppress Eigen warnings
T
T8T9 已提交
308
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
309 310
# Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
311

312
if(WIN32)
313 314
  set(CMAKE_CUDA_FLAGS
      "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
315
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
316 317
  if(MSVC_STATIC_CRT)
    foreach(flag_var
318 319 320 321 322
            CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
            CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
      if(${flag_var} MATCHES "-MD")
        string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
      endif()
W
Wilber 已提交
323
    endforeach()
T
T8T9 已提交
324
  endif()
325
endif()
326 327 328

mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
329

330
include(thrust)