cuda.cmake 10.7 KB
Newer Older
1
if(NOT WITH_GPU)
2
  return()
3 4
endif()

5
if(WITH_NV_JETSON)
6
  add_definitions(-DWITH_NV_JETSON)
7 8
  set(paddle_known_gpu_archs "53 62 72")
  set(paddle_known_gpu_archs10 "53 62 72")
C
chalsliu 已提交
9
  set(paddle_known_gpu_archs11 "53 62 72")
10 11 12 13 14
elseif(NEW_RELEASE_ALL)
  message("Using New Release Strategy - All Arches Packge")
  add_definitions(-DNEW_RELEASE_ALL)
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
  set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
Z
Zhanlue Yang 已提交
15
  set(paddle_known_gpu_archs11 "50 60 61 70 75 80")
16
elseif(NEW_RELEASE_PYPI)
17
  message("Using New Release Strategy - Cubin Packge")
18 19 20 21
  add_definitions(-DNEW_RELEASE_PYPI)
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
  set(paddle_known_gpu_archs10 "")
  set(paddle_known_gpu_archs11 "60 61 70 75 80")
22 23 24
elseif(NEW_RELEASE_JIT)
  message("Using New Release Strategy - JIT Packge")
  add_definitions(-DNEW_RELEASE_JIT)
25
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80 86")
26 27
  set(paddle_known_gpu_archs10 "35 50 60 70 75")
  set(paddle_known_gpu_archs11 "35 50 60 70 75 80")
28
else()
T
tianshuo78520a 已提交
29
  set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
30
  set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
31
  set(paddle_known_gpu_archs11 "52 60 61 70 75 80")
32
endif()
33 34 35 36 37 38 39 40 41

######################################################################################
# A function for automatic detection of GPUs installed  (if autodetection is enabled)
# Usage:
#   detect_installed_gpus(out_variable)
function(detect_installed_gpus out_variable)
  if(NOT CUDA_gpu_detect_output)
    set(cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu)

42 43 44
    file(
      WRITE ${cufile}
      ""
Z
Zeng Jinle 已提交
45 46 47
      "#include \"stdio.h\"\n"
      "#include \"cuda.h\"\n"
      "#include \"cuda_runtime.h\"\n"
48 49 50 51 52 53 54
      "int main() {\n"
      "  int count = 0;\n"
      "  if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;\n"
      "  if (count == 0) return -1;\n"
      "  for (int device = 0; device < count; ++device) {\n"
      "    cudaDeviceProp prop;\n"
      "    if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n"
Z
Zeng Jinle 已提交
55
      "      printf(\"%d.%d \", prop.major, prop.minor);\n"
56 57 58 59
      "  }\n"
      "  return 0;\n"
      "}\n")

60 61 62 63 64 65
    execute_process(
      COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${cufile}"
      WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
      RESULT_VARIABLE nvcc_res
      OUTPUT_VARIABLE nvcc_out
      ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
66 67 68

    if(nvcc_res EQUAL 0)
      # only keep the last line of nvcc_out
69 70
      string(REGEX REPLACE ";" "\\\\;" nvcc_out "${nvcc_out}")
      string(REGEX REPLACE "\n" ";" nvcc_out "${nvcc_out}")
71 72
      list(GET nvcc_out -1 nvcc_out)
      string(REPLACE "2.1" "2.1(2.0)" nvcc_out "${nvcc_out}")
73 74 75 76 77
      set(CUDA_gpu_detect_output
          ${nvcc_out}
          CACHE INTERNAL
                "Returned GPU architetures from detect_installed_gpus tool"
                FORCE)
78 79 80 81
    endif()
  endif()

  if(NOT CUDA_gpu_detect_output)
82 83 84 85 86 87
    message(
      STATUS
        "Automatic GPU detection failed. Building for all known architectures.")
    set(${out_variable}
        ${paddle_known_gpu_archs}
        PARENT_SCOPE)
88
  else()
89 90 91
    set(${out_variable}
        ${CUDA_gpu_detect_output}
        PARENT_SCOPE)
92 93 94 95 96 97 98 99 100
  endif()
endfunction()

########################################################################
# Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME
# Usage:
#   select_nvcc_arch_flags(out_variable)
function(select_nvcc_arch_flags out_variable)
  # List of arch names
101 102 103 104 105 106 107 108 109
  set(archs_names
      "Kepler"
      "Maxwell"
      "Pascal"
      "Volta"
      "Turing"
      "Ampere"
      "All"
      "Manual")
110
  set(archs_name_default "Auto")
111
  list(APPEND archs_names "Auto")
112 113

  # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui)
114 115 116 117
  set(CUDA_ARCH_NAME
      ${archs_name_default}
      CACHE STRING "Select target NVIDIA GPU achitecture.")
  set_property(CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${archs_names})
118 119 120 121 122
  mark_as_advanced(CUDA_ARCH_NAME)

  # verify CUDA_ARCH_NAME value
  if(NOT ";${archs_names};" MATCHES ";${CUDA_ARCH_NAME};")
    string(REPLACE ";" ", " archs_names "${archs_names}")
123 124
    message(
      FATAL_ERROR "Only ${archs_names} architectures names are supported.")
125 126 127
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Manual")
128 129 130 131 132 133 134 135 136 137 138 139
    set(CUDA_ARCH_BIN
        ${paddle_known_gpu_archs}
        CACHE
          STRING
          "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported"
    )
    set(CUDA_ARCH_PTX
        ""
        CACHE
          STRING
          "Specify 'virtual' PTX architectures to build PTX intermediate code for"
    )
140 141 142 143 144 145 146 147 148
    mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX)
  else()
    unset(CUDA_ARCH_BIN CACHE)
    unset(CUDA_ARCH_PTX CACHE)
  endif()

  if(${CUDA_ARCH_NAME} STREQUAL "Kepler")
    set(cuda_arch_bin "30 35")
  elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell")
149
    if(WITH_NV_JETSON)
150 151 152 153
      set(cuda_arch_bin "53")
    else()
      set(cuda_arch_bin "50")
    endif()
154
  elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal")
155
    if(WITH_NV_JETSON)
156 157 158 159
      set(cuda_arch_bin "62")
    else()
      set(cuda_arch_bin "60 61")
    endif()
160
  elseif(${CUDA_ARCH_NAME} STREQUAL "Volta")
161
    if(WITH_NV_JETSON)
162 163 164 165
      set(cuda_arch_bin "72")
    else()
      set(cuda_arch_bin "70")
    endif()
C
chengduo 已提交
166 167
  elseif(${CUDA_ARCH_NAME} STREQUAL "Turing")
    set(cuda_arch_bin "75")
168
  elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere")
169
    if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.1) # CUDA 11.0
J
JingZhuangzhuang 已提交
170
      set(cuda_arch_bin "80")
171
    elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.1+
J
JingZhuangzhuang 已提交
172 173
      set(cuda_arch_bin "80 86")
    endif()
174 175 176
  elseif(${CUDA_ARCH_NAME} STREQUAL "All")
    set(cuda_arch_bin ${paddle_known_gpu_archs})
  elseif(${CUDA_ARCH_NAME} STREQUAL "Auto")
177 178 179
    message(
      STATUS
        "WARNING: This is just a warning for publishing release.
180 181 182 183 184
      You are building GPU version without supporting different architectures.
      So the wheel package may fail on other GPU architectures.
      You can add -DCUDA_ARCH_NAME=All in cmake command
      to get a full wheel package to resolve this warning.
      While, this version will still work on local GPU architecture.")
185
    detect_installed_gpus(cuda_arch_bin)
186
  else() # (${CUDA_ARCH_NAME} STREQUAL "Manual")
187 188 189
    set(cuda_arch_bin ${CUDA_ARCH_BIN})
  endif()

190
  if(NEW_RELEASE_JIT)
191 192
    set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
    set(cuda_arch_bin "")
193 194
  endif()

195 196
  # remove dots and convert to lists
  string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
197
  string(REGEX REPLACE "\\." "" cuda_arch_ptx "${cuda_arch_ptx}")
198
  string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
199
  string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
200

201 202 203 204 205 206 207 208 209 210
  list(REMOVE_DUPLICATES cuda_arch_bin)
  list(REMOVE_DUPLICATES cuda_arch_ptx)

  set(nvcc_flags "")
  set(nvcc_archs_readable "")

  # Tell NVCC to add binaries for the specified GPUs
  foreach(arch ${cuda_arch_bin})
    if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)")
      # User explicitly specified PTX for the concrete BIN
211 212
      string(APPEND nvcc_flags
             " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
T
T8T9 已提交
213
      string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
214 215
    else()
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
T
T8T9 已提交
216 217
      string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
      string(APPEND nvcc_archs_readable " sm_${arch}")
218 219 220 221 222
    endif()
  endforeach()

  # Tell NVCC to add PTX intermediate code for the specified architectures
  foreach(arch ${cuda_arch_ptx})
223 224
    string(APPEND nvcc_flags
           " -gencode arch=compute_${arch},code=compute_${arch}")
T
T8T9 已提交
225
    string(APPEND nvcc_archs_readable " compute_${arch}")
226 227 228
  endforeach()

  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
229 230 231 232 233 234
  set(${out_variable}
      ${nvcc_flags}
      PARENT_SCOPE)
  set(${out_variable}_readable
      ${nvcc_archs_readable}
      PARENT_SCOPE)
235 236
endfunction()

T
T8T9 已提交
237
message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
238
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x
C
chengduo 已提交
239
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs10})
T
T8T9 已提交
240 241
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
242
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
243
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.2) # CUDA 11.0/11.1
244 245 246
  set(paddle_known_gpu_archs ${paddle_known_gpu_archs11})
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
Z
Zhou Wei 已提交
247
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
248
elseif(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0) # CUDA 11.2+
Z
Zhou Wei 已提交
249 250 251 252
  set(paddle_known_gpu_archs "${paddle_known_gpu_archs11} 86")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__")
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets")
253 254
endif()

255
if(NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0)
256 257 258
  add_definitions("-DTRT_PLUGIN_FP16_AVALIABLE")
endif()

259 260 261
add_definitions("-DCUDA_VERSION_MAJOR=\"${CUDA_VERSION_MAJOR}\"")
add_definitions("-DCUDA_VERSION_MINOR=\"${CUDA_VERSION_MINOR}\"")
add_definitions("-DCUDA_TOOLKIT_ROOT_DIR=\"${CUDA_TOOLKIT_ROOT_DIR}\"")
T
T8T9 已提交
262

263 264
# setting nvcc arch flags
select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
T
T8T9 已提交
265 266
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}")
message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}")
267

268
# Set C++14 support
P
peizhilin 已提交
269
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
270 271
# Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc.
# So, don't set these flags here.
272
set(CMAKE_CUDA_STANDARD 14)
D
dzhwinter 已提交
273

274 275
# (Note) For windows, if delete /W[1-4], /W1 will be added defaultly and conflic with -w
# So replace /W[1-4] with /W0
276
if(WIN32)
277
  string(REGEX REPLACE "/W[1-4]" " /W0 " CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS}")
W
Wilber 已提交
278
endif()
T
T8T9 已提交
279 280
# in cuda9, suppress cuda warning on eigen
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w")
281
# Set :expt-relaxed-constexpr to suppress Eigen warnings
T
T8T9 已提交
282
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
283 284
# Set :expt-extended-lambda to enable HOSTDEVICE annotation on lambdas
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda")
285

286
if(WIN32)
287 288
  set(CMAKE_CUDA_FLAGS
      "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
289
  set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
290 291
  if(MSVC_STATIC_CRT)
    foreach(flag_var
292 293 294 295 296
            CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
            CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
      if(${flag_var} MATCHES "-MD")
        string(REGEX REPLACE "-MD" "-MT" ${flag_var} "${${flag_var}}")
      endif()
W
Wilber 已提交
297
    endforeach()
T
T8T9 已提交
298
  endif()
299
endif()
300 301 302

mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD)
mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION)
303

304
include(thrust)