cuda.cmake 11.2 KB
Newer Older
1
if(NOT WITH_GPU)
2
  return()
3 4
endif()

5
if(WITH_NV_JETSON)
6
  add_definitions(-DWITH_NV_JETSON)
7 8
  set(paddle_known_gpu_archs "53 62 72")
  set(paddle_known_gpu_archs10 "53 62 72")
Y
Yuanle Liu 已提交
9
  set(paddle_known_gpu_archs11 "53 62 72 87")
10 11 12 13 14
elseif(NEW_RELEASE_ALL)
  message("Using New Release Strategy - All Arches Packge")
  add_definitions(-DNEW_RELEASE_ALL)
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
  set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
Z
Zhanlue Yang 已提交
15
  set(paddle_known_gpu_archs11 "50 60 61 70 75 80")
16
elseif(NEW_RELEASE_PYPI)
17
  message("Using New Release Strategy - Cubin Packge")
18 19 20
  add_definitions(-DNEW_RELEASE_PYPI)
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
  set(paddle_known_gpu_archs10 "")
21
  set(paddle_known_gpu_archs11 "61 70 75 80")
22 23 24
elseif(NEW_RELEASE_JIT)
  message("Using New Release Strategy - JIT Packge")
  add_definitions(-DNEW_RELEASE_JIT)
25
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
26 27
  set(paddle_known_gpu_archs10 "35 50 60 70 75")
  set(paddle_known_gpu_archs11 "35 50 60 70 75 80")
28
else()
T
tianshuo78520a 已提交
29
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
30
  set(paddle_known_gpu_archs10 "50 52 60 61 70 75")
31
  set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
32
endif()
33 34 35 36 37 38 39 40 41

######################################################################################
# A function for automatic detection of GPUs installed  (if autodetection is enabled)
# Usage:
#   detect_installed_gpus(out_variable)
function(detect_installed_gpus out_variable)
  if(NOT CUDA_gpu_detect_output)
    set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)

42 43 44
    file(
      WRITE ${cufile}
      ""
Z
Zeng Jinle 已提交
45 46 47
      "#include \"stdio.h\"\n"
      "#include \"cuda.h\"\n"
      "#include \"cuda_runtime.h\"\n"
48 49 50 51 52 53 54
      "int main() {\n"
      "  int count = 0;\n"
      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
      "  if (count == 0) return -1;\n"
      "  for (int device = 0; device < count; ++device) {\n"
      "    cudaDeviceProp prop;\n"
      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
Z
Zeng Jinle 已提交
55
      "      printf(\"%d.%d \", prop.major, prop.minor);\n"
56 57 58 59
      "  }\n"
      "  return 0;\n"
      "}\n")

60 61 62 63 64 65
    execute_process(
      COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}"
      WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
      RESULT_VARIABLE nvcc_res
      OUTPUT_VARIABLE nvcc_out
      ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
66 67 68

    if(nvcc_res EQUAL 0)
      # only keep the last line of nvcc_out
69 70
      string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
      string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
71 72
      list(GET nvcc_out -1 nvcc_out)
      string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
73 74 75 76 77
      set(CUDA_gpu_detect_output
          ${nvcc_out}
          CACHE INTERNAL
                "Returned GPU architetures from detect_installed_gpus tool"
                FORCE)
78 79 80 81
    endif()
  endif()

  if(NOT CUDA_gpu_detect_output)
82 83 84 85 86 87
    message(
      STATUS
        "Automatic GPU detection failed. Building for all known architectures.")
    set(${out_variable}
        ${paddle_known_gpu_archs}
        PARENT_SCOPE)
88
  else()
89 90 91
    set(${out_variable}
        ${CUDA_gpu_detect_output}
        PARENT_SCOPE)
92 93 94 95 96 97 98
  endif()
endfunction()

########################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
# Usage:
#   select_nvcc_arch_flags(out_variable)
99
function(select_nvcc_arch_flags out_variable out_arch_bin)
100
  # List of arch names
101 102 103 104 105 106 107 108 109
  set(archs_names
      "Kepler"
      "Maxwell"
      "Pascal"
      "Volta"
      "Turing"
      "Ampere"
      "All"
      "Manual")
110
  set(archs_name_default "Auto")
111
  list(APPEND archs_names "Auto")
112 113

  # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
114 115 116 117
  set(CUDA_ARCH_NAME
      ${archs_name_default}
      CACHE STRING "Select target NVIDIA GPU achitecture.")
  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names})
118 119 120 121 122
  mark_as_advanced(CUDA_ARCH_NAME)

  # verify CUDA_ARCH_NAME value
  if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
    string(REPLACE ";" ", " archs_names "${archs_names}")
123 124
    message(
      FATAL_ERROR "Only ${archs_names} architectures names are supported.")
125 126 127
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
128 129 130 131 132 133 134 135 136 137 138 139
    set(CUDA_ARCH_BIN
        ${paddle_known_gpu_archs}
        CACHE
          STRING
          "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
    )
    set(CUDA_ARCH_PTX
        ""
        CACHE
          STRING
          "Specify 'virtual' PTX architectures to build PTX intermediate code for"
    )
140 141 142 143 144 145 146 147 148
    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  else()
    unset(CUDA_ARCH_BIN CACHE)
    unset(CUDA_ARCH_PTX CACHE)
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
    set(cuda_arch_bin "30 35")
  elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
149
    if(WITH_NV_JETSON)
150 151 152 153
      set(cuda_arch_bin "53")
    else()
      set(cuda_arch_bin "50")
    endif()
154
  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
155
    if(WITH_NV_JETSON)
156 157 158 159
      set(cuda_arch_bin "62")
    else()
      set(cuda_arch_bin "60 61")
    endif()
160
  elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
161
    if(WITH_NV_JETSON)
162 163 164 165
      set(cuda_arch_bin "72")
    else()
      set(cuda_arch_bin "70")
    endif()
C
chengduo 已提交
166 167
  elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
    set(cuda_arch_bin "75")
168
  elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
Y
Yuanle Liu 已提交
169 170 171 172 173
    if(WITH_NV_JETSON)
      set(cuda_arch_bin "87")
    else()
      if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
        set(cuda_arch_bin "80")
174
      else()
Y
Yuanle Liu 已提交
175 176
        set(cuda_arch_bin "80 86")
      endif()
J
JingZhuangzhuang 已提交
177
    endif()
178 179 180
  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
    set(cuda_arch_bin ${paddle_known_gpu_archs})
  elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
181 182 183
    message(
      STATUS
        "WARNING: This is just a warning for publishing release.
184 185 186 187 188
      You are building GPU version without supporting different architectures.
      So the wheel package may fail on other GPU architectures.
      You can add -DCUDA_ARCH_NAME=All in cmake command
      to get a full wheel package to resolve this warning.
      While, this version will still work on local GPU architecture.")
189
    detect_installed_gpus(cuda_arch_bin)
190
  else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
191 192 193
    set(cuda_arch_bin ${CUDA_ARCH_BIN})
  endif()

194
  if(NEW_RELEASE_JIT)
195 196
    set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
    set(cuda_arch_bin "")
197 198
  endif()

199 200
  # remove dots and convert to lists
  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
201
  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
202
  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
203
  string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
204

205 206 207 208 209
  list(REMOVE_DUPLICATES cuda_arch_bin)
  list(REMOVE_DUPLICATES cuda_arch_ptx)

  set(nvcc_flags "")
  set(nvcc_archs_readable "")
MarDino's avatar
MarDino 已提交
210
  set(nvcc_archs_bin_list "")
211 212 213 214 215

  # Tell NVCC to add binaries for the specified GPUs
  foreach(arch ${cuda_arch_bin})
    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
      # User explicitly specified PTX for the concrete BIN
216 217
      string(APPEND nvcc_flags
             " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
T
T8T9 已提交
218
      string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
MarDino's avatar
MarDino 已提交
219
      string(APPEND nvcc_archs_bin_list " ${CMAKE_MATCH_1}")
220 221
    else()
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
T
T8T9 已提交
222 223
      string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
      string(APPEND nvcc_archs_readable " sm_${arch}")
MarDino's avatar
MarDino 已提交
224
      string(APPEND nvcc_archs_bin_list " ${arch}")
225 226 227 228 229
    endif()
  endforeach()

  # Tell NVCC to add PTX intermediate code for the specified architectures
  foreach(arch ${cuda_arch_ptx})
230 231
    string(APPEND nvcc_flags
           " -gencode arch=compute_${arch},code=compute_${arch}")
T
T8T9 已提交
232
    string(APPEND nvcc_archs_readable " compute_${arch}")
233 234 235
  endforeach()

  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
MarDino's avatar
MarDino 已提交
236 237
  string(REGEX MATCHALL "[0-9()]+" nvcc_archs_bin_list "${nvcc_archs_bin_list}")
  string(JOIN "," nvcc_real_archs ${nvcc_archs_bin_list})
238 239 240 241 242 243
  set(${out_variable}
      ${nvcc_flags}
      PARENT_SCOPE)
  set(${out_variable}_readable
      ${nvcc_archs_readable}
      PARENT_SCOPE)
MarDino's avatar
MarDino 已提交
244 245 246
  set(${out_variable}_real_archs
      ${nvcc_real_archs}
      PARENT_SCOPE)
247 248 249
  set(${out_arch_bin}
      ${cuda_arch_bin}
      PARENT_SCOPE)
250 251
endfunction()

T
T8T9 已提交
252
message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
253
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
C
chengduo 已提交
254
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
T
T8T9 已提交
255 256
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
257
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
258
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
259 260 261
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs11})
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
262
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
263
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
Z
Zhou Wei 已提交
264 265 266 267
  set(paddle_known_gpu_archs "${paddle_known_gpu_archs11} 86")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
268 269
endif()

270
if(NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)
271 272 273
  add_definitions("-DTRT_PLUGIN_FP16_AVALIABLE")
endif()

274 275 276
add_definitions("-DCUDA_VERSION_MAJOR=\"${CUDA_VERSION_MAJOR}\"")
add_definitions("-DCUDA_VERSION_MINOR=\"${CUDA_VERSION_MINOR}\"")
add_definitions("-DCUDA_TOOLKIT_ROOT_DIR=\"${CUDA_TOOLKIT_ROOT_DIR}\"")
T
T8T9 已提交
277

278
# setting nvcc arch flags
279
select_nvcc_arch_flags(NVCC_FLAGS_EXTRA NVCC_ARCH_BIN)
T
T8T9 已提交
280 281
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
282

283
# Set C++14 support
P
peizhilin 已提交
284
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
285 286
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
287
set(CMAKE_CUDA_STANDARD 14)
D
dzhwinter 已提交
288

289 290
# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
# So replace /W[1-4] with /W0
291
if(WIN32)
292
  string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
W
Wilber 已提交
293
endif()
T
T8T9 已提交
294 295
# in cuda9, suppress cuda warning on eigen
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
296
# Set :expt-relaxed-constexpr to suppress Eigen warnings
T
T8T9 已提交
297
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
298 299
# Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
300

301
if(WIN32)
302 303
  set(CMAKE_CUDA_FLAGS
      "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
304
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
305 306
  if(MSVC_STATIC_CRT)
    foreach(flag_var
307 308 309 310 311
            CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
            CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
      if(${flag_var} MATCHES "-MD")
        string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
      endif()
W
Wilber 已提交
312
    endforeach()
T
T8T9 已提交
313
  endif()
314
endif()
315 316 317

mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
318

319
include(thrust)