Add cuda compiled arch check (#49592)

c0d6ec63 · MarDino · GitHub · 5a1d081f · c0d6ec63 · c0d6ec63
Showing with 37 addition and 0 deletion

cmake/cuda.cmake cmake/cuda.cmake +8 -0

paddle/phi/backends/CMakeLists.txt paddle/phi/backends/CMakeLists.txt +5 -0

paddle/phi/backends/gpu/gpu_resources.cc paddle/phi/backends/gpu/gpu_resources.cc +24 -0

未找到文件。
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -207,6 +207,7 @@ function(select_nvcc_arch_flags out_variable)
  set(nvcc_flags "")
  set(nvcc_archs_readable "")
+  set(nvcc_archs_bin_list "")
  # Tell NVCC to add binaries for the specified GPUs
  foreach(arch ${cuda_arch_bin})
@@ -215,10 +216,12 @@ function(select_nvcc_arch_flags out_variable)
      string(APPEND nvcc_flags
             " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
      string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
+      string(APPEND nvcc_archs_bin_list " ${CMAKE_MATCH_1}")
    else()
      # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
      string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
      string(APPEND nvcc_archs_readable " sm_${arch}")
+      string(APPEND nvcc_archs_bin_list " ${arch}")
    endif()
  endforeach()
@@ -230,12 +233,17 @@ function(select_nvcc_arch_flags out_variable)
  endforeach()
  string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
+  string(REGEX MATCHALL "[0-9()]+" nvcc_archs_bin_list "${nvcc_archs_bin_list}")
+  string(JOIN "," nvcc_real_archs ${nvcc_archs_bin_list})
  set(${out_variable}
      ${nvcc_flags}
      PARENT_SCOPE)
  set(${out_variable}_readable
      ${nvcc_archs_readable}
      PARENT_SCOPE)
+  set(${out_variable}_real_archs
+      ${nvcc_real_archs}
+      PARENT_SCOPE)
 endfunction()
 message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})

--- a/paddle/phi/backends/CMakeLists.txt
+++ b/paddle/phi/backends/CMakeLists.txt
@@ -12,6 +12,11 @@ if(WITH_GPU OR WITH_ROCM)
       gpu/gpu_resources.cc)
  if(WITH_GPU)
    list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc)
+    set_source_files_properties(
+      gpu/gpu_resources.cc
+      PROPERTIES COMPILE_FLAGS
+                 "-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"")
  endif()
  if(WITH_ROCM)
    list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc)

--- a/paddle/phi/backends/gpu/gpu_resources.cc
+++ b/paddle/phi/backends/gpu/gpu_resources.cc
@@ -14,6 +14,8 @@
 #include "paddle/phi/backends/gpu/gpu_resources.h"
+#include <set>
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/backends/gpu/gpu_decls.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
@@ -57,6 +59,28 @@ void InitGpuProperties(Place place,
  *driver_version = backends::gpu::GetGPUDriverVersion(place.GetDeviceId());
  *runtime_version = backends::gpu::GetGPURuntimeVersion(place.GetDeviceId());
+  const gpuDeviceProp& prop =
+      backends::gpu::GetDeviceProperties(place.GetDeviceId());
+#ifdef PADDLE_WITH_CUDA
+  static const std::set<int> compiled_archs{CUDA_REAL_ARCHS};
+  // Make sure compiled cuda arch is as same as runtime cuda arch.
+  if (compiled_archs.find(*compute_capability) == compiled_archs.cend() &&
+      compiled_archs.find(prop.major * 10) == compiled_archs.cend()) {
+    static std::atomic<bool> once_flag(false);
+    if (!once_flag.exchange(true)) {
+      std::string compile_arch_str = "";
+      for (const int32_t& arch : compiled_archs) {
+        compile_arch_str += std::to_string(arch) + " ";
+      }
+      LOG(WARNING) << "Paddle with runtime capability " << *compute_capability
+                   << " is not compatible with Paddle installation with arch: "
+                   << compile_arch_str
+                   << ". Please check compiled version of Paddle. ";
+    }
+  }
+#endif
  // TODO(wilber): glog may be replaced in the future?
  LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: "
                          << static_cast<int>(place.device)