cuda.cmake 10.8 KB
Newer Older
1
if(NOT WITH_GPU)
2
  return()
3 4
endif()

5
if(WITH_NV_JETSON)
6
  add_definitions(-DWITH_NV_JETSON)
7 8
  set(paddle_known_gpu_archs "53 62 72")
  set(paddle_known_gpu_archs10 "53 62 72")
Y
Yuanle Liu 已提交
9
  set(paddle_known_gpu_archs11 "53 62 72 87")
10 11 12 13 14
elseif(NEW_RELEASE_ALL)
  message("Using New Release Strategy - All Arches Packge")
  add_definitions(-DNEW_RELEASE_ALL)
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
  set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
Z
Zhanlue Yang 已提交
15
  set(paddle_known_gpu_archs11 "50 60 61 70 75 80")
16
elseif(NEW_RELEASE_PYPI)
17
  message("Using New Release Strategy - Cubin Packge")
18 19 20 21
  add_definitions(-DNEW_RELEASE_PYPI)
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
  set(paddle_known_gpu_archs10 "")
  set(paddle_known_gpu_archs11 "60 61 70 75 80")
22 23 24
elseif(NEW_RELEASE_JIT)
  message("Using New Release Strategy - JIT Packge")
  add_definitions(-DNEW_RELEASE_JIT)
25
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
26 27
  set(paddle_known_gpu_archs10 "35 50 60 70 75")
  set(paddle_known_gpu_archs11 "35 50 60 70 75 80")
28
else()
T
tianshuo78520a 已提交
29
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
30
  set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
31
  set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
32
endif()
33 34 35 36 37 38 39 40 41

######################################################################################
# A function for automatic detection of GPUs installed  (if autodetection is enabled)
# Usage:
#   detect_installed_gpus(out_variable)
function(detect_installed_gpus out_variable)
  if(NOT CUDA_gpu_detect_output)
    set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)

42 43 44
    file(
      WRITE ${cufile}
      ""
Z
Zeng Jinle 已提交
45 46 47
      "#include \"stdio.h\"\n"
      "#include \"cuda.h\"\n"
      "#include \"cuda_runtime.h\"\n"
48 49 50 51 52 53 54
      "int main() {\n"
      "  int count = 0;\n"
      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
      "  if (count == 0) return -1;\n"
      "  for (int device = 0; device < count; ++device) {\n"
      "    cudaDeviceProp prop;\n"
      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
Z
Zeng Jinle 已提交
55
      "      printf(\"%d.%d \", prop.major, prop.minor);\n"
56 57 58 59
      "  }\n"
      "  return 0;\n"
      "}\n")

60 61 62 63 64 65
    execute_process(
      COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}"
      WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
      RESULT_VARIABLE nvcc_res
      OUTPUT_VARIABLE nvcc_out
      ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
66 67 68

    if(nvcc_res EQUAL 0)
      # only keep the last line of nvcc_out
69 70
      string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
      string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
71 72
      list(GET nvcc_out -1 nvcc_out)
      string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
73 74 75 76 77
      set(CUDA_gpu_detect_output
          ${nvcc_out}
          CACHE INTERNAL
                "Returned GPU architetures from detect_installed_gpus tool"
                FORCE)
78 79 80 81
    endif()
  endif()

  if(NOT CUDA_gpu_detect_output)
82 83 84 85 86 87
    message(
      STATUS
        "Automatic GPU detection failed. Building for all known architectures.")
    set(${out_variable}
        ${paddle_known_gpu_archs}
        PARENT_SCOPE)
88
  else()
89 90 91
    set(${out_variable}
        ${CUDA_gpu_detect_output}
        PARENT_SCOPE)
92 93 94 95 96 97 98 99 100
  endif()
endfunction()

########################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
# Usage:
#   select_nvcc_arch_flags(out_variable)
function(select_nvcc_arch_flags out_variable)
  # List of arch names
101 102 103 104 105 106 107 108 109
  set(archs_names
      "Kepler"
      "Maxwell"
      "Pascal"
      "Volta"
      "Turing"
      "Ampere"
      "All"
      "Manual")
110
  set(archs_name_default "Auto")
111
  list(APPEND archs_names "Auto")
112 113

  # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
114 115 116 117
  set(CUDA_ARCH_NAME
      ${archs_name_default}
      CACHE STRING "Select target NVIDIA GPU achitecture.")
  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names})
118 119 120 121 122
  mark_as_advanced(CUDA_ARCH_NAME)

  # verify CUDA_ARCH_NAME value
  if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
    string(REPLACE ";" ", " archs_names "${archs_names}")
123 124
    message(
      FATAL_ERROR "Only ${archs_names} architectures names are supported.")
125 126 127
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
128 129 130 131 132 133 134 135 136 137 138 139
    set(CUDA_ARCH_BIN
        ${paddle_known_gpu_archs}
        CACHE
          STRING
          "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
    )
    set(CUDA_ARCH_PTX
        ""
        CACHE
          STRING
          "Specify 'virtual' PTX architectures to build PTX intermediate code for"
    )
140 141 142 143 144 145 146 147 148
    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  else()
    unset(CUDA_ARCH_BIN CACHE)
    unset(CUDA_ARCH_PTX CACHE)
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
    set(cuda_arch_bin "30 35")
  elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
149
    if(WITH_NV_JETSON)
150 151 152 153
      set(cuda_arch_bin "53")
    else()
      set(cuda_arch_bin "50")
    endif()
154
  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
155
    if(WITH_NV_JETSON)
156 157 158 159
      set(cuda_arch_bin "62")
    else()
      set(cuda_arch_bin "60 61")
    endif()
160
  elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
161
    if(WITH_NV_JETSON)
162 163 164 165
      set(cuda_arch_bin "72")
    else()
      set(cuda_arch_bin "70")
    endif()
C
chengduo 已提交
166 167
  elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
    set(cuda_arch_bin "75")
168
  elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
Y
Yuanle Liu 已提交
169 170 171 172 173 174 175 176
    if(WITH_NV_JETSON)
      set(cuda_arch_bin "87")
    else()
      if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
        set(cuda_arch_bin "80")
      elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+
        set(cuda_arch_bin "80 86")
      endif()
J
JingZhuangzhuang 已提交
177
    endif()
178 179 180
  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
    set(cuda_arch_bin ${paddle_known_gpu_archs})
  elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
181 182 183
    message(
      STATUS
        "WARNING: This is just a warning for publishing release.
184 185 186 187 188
      You are building GPU version without supporting different architectures.
      So the wheel package may fail on other GPU architectures.
      You can add -DCUDA_ARCH_NAME=All in cmake command
      to get a full wheel package to resolve this warning.
      While, this version will still work on local GPU architecture.")
189
    detect_installed_gpus(cuda_arch_bin)
190
  else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
191 192 193
    set(cuda_arch_bin ${CUDA_ARCH_BIN})
  endif()

194
  if(NEW_RELEASE_JIT)
195 196
    set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
    set(cuda_arch_bin "")
197 198
  endif()

199 200
  # remove dots and convert to lists
  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
201
  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
202
  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
203
  string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
204

205 206 207 208 209 210 211 212 213 214
  list(REMOVE_DUPLICATES cuda_arch_bin)
  list(REMOVE_DUPLICATES cuda_arch_ptx)

  set(nvcc_flags "")
  set(nvcc_archs_readable "")

  # Tell NVCC to add binaries for the specified GPUs
  foreach(arch ${cuda_arch_bin})
    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
      # User explicitly specified PTX for the concrete BIN
215 216
      string(APPEND nvcc_flags
             " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
T
T8T9 已提交
217
      string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
218 219
    else()
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
T
T8T9 已提交
220 221
      string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
      string(APPEND nvcc_archs_readable " sm_${arch}")
222 223 224 225 226
    endif()
  endforeach()

  # Tell NVCC to add PTX intermediate code for the specified architectures
  foreach(arch ${cuda_arch_ptx})
227 228
    string(APPEND nvcc_flags
           " -gencode arch=compute_${arch},code=compute_${arch}")
T
T8T9 已提交
229
    string(APPEND nvcc_archs_readable " compute_${arch}")
230 231 232
  endforeach()

  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
233 234 235 236 237 238
  set(${out_variable}
      ${nvcc_flags}
      PARENT_SCOPE)
  set(${out_variable}_readable
      ${nvcc_archs_readable}
      PARENT_SCOPE)
239 240
endfunction()

T
T8T9 已提交
241
message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
242
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
C
chengduo 已提交
243
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
T
T8T9 已提交
244 245
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
246
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
247
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
248 249 250
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs11})
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
251
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
252
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
Z
Zhou Wei 已提交
253 254 255 256
  set(paddle_known_gpu_archs "${paddle_known_gpu_archs11} 86")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
257 258
endif()

259
if(NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)
260 261 262
  add_definitions("-DTRT_PLUGIN_FP16_AVALIABLE")
endif()

263 264 265
add_definitions("-DCUDA_VERSION_MAJOR=\"${CUDA_VERSION_MAJOR}\"")
add_definitions("-DCUDA_VERSION_MINOR=\"${CUDA_VERSION_MINOR}\"")
add_definitions("-DCUDA_TOOLKIT_ROOT_DIR=\"${CUDA_TOOLKIT_ROOT_DIR}\"")
T
T8T9 已提交
266

267 268
# setting nvcc arch flags
select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
T
T8T9 已提交
269 270
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
271

272
# Set C++14 support
P
peizhilin 已提交
273
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
274 275
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
276
set(CMAKE_CUDA_STANDARD 14)
D
dzhwinter 已提交
277

278 279
# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
# So replace /W[1-4] with /W0
280
if(WIN32)
281
  string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
W
Wilber 已提交
282
endif()
T
T8T9 已提交
283 284
# in cuda9, suppress cuda warning on eigen
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
285
# Set :expt-relaxed-constexpr to suppress Eigen warnings
T
T8T9 已提交
286
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
287 288
# Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
289

290
if(WIN32)
291 292
  set(CMAKE_CUDA_FLAGS
      "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
293
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
294 295
  if(MSVC_STATIC_CRT)
    foreach(flag_var
296 297 298 299 300
            CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
            CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
      if(${flag_var} MATCHES "-MD")
        string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
      endif()
W
Wilber 已提交
301
    endforeach()
T
T8T9 已提交
302
  endif()
303
endif()
304 305 306

mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
307

308
include(thrust)