From c0d6ec6335c8c83cbbfd397cac2735e7c00403ce Mon Sep 17 00:00:00 2001
From: ZZK <359521840@qq.com>
Date: Tue, 10 Jan 2023 10:13:49 +0800
Subject: [PATCH] Add cuda compiled arch check (#49592)

---
 cmake/cuda.cmake                         |  8 ++++++++
 paddle/phi/backends/CMakeLists.txt       |  5 +++++
 paddle/phi/backends/gpu/gpu_resources.cc | 24 ++++++++++++++++++++++++
 3 files changed, 37 insertions(+)
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index ab48a16529e..2f6b5d7759c 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -207,6 +207,7 @@ function(select_nvcc_arch_flags out_variable)
 
   set(nvcc_flags "")
   set(nvcc_archs_readable "")
+  set(nvcc_archs_bin_list "")
 
   # Tell NVCC to add binaries for the specified GPUs
   foreach(arch ${cuda_arch_bin})
@@ -215,10 +216,12 @@ function(select_nvcc_arch_flags out_variable)
       string(APPEND nvcc_flags
              " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
       string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
+      string(APPEND nvcc_archs_bin_list " ${CMAKE_MATCH_1}")
     else()
       # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
       string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
       string(APPEND nvcc_archs_readable " sm_${arch}")
+      string(APPEND nvcc_archs_bin_list " ${arch}")
     endif()
   endforeach()
 
@@ -230,12 +233,17 @@ function(select_nvcc_arch_flags out_variable)
   endforeach()
 
   string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
+  string(REGEX MATCHALL "[0-9()]+" nvcc_archs_bin_list "${nvcc_archs_bin_list}")
+  string(JOIN "," nvcc_real_archs ${nvcc_archs_bin_list})
   set(${out_variable}
       ${nvcc_flags}
       PARENT_SCOPE)
   set(${out_variable}_readable
       ${nvcc_archs_readable}
       PARENT_SCOPE)
+  set(${out_variable}_real_archs
+      ${nvcc_real_archs}
+      PARENT_SCOPE)
 endfunction()
 
 message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt
index 3ee28c31058..cfb55565a2e 100644
--- a/paddle/phi/backends/CMakeLists.txt
+++ b/paddle/phi/backends/CMakeLists.txt
@@ -12,6 +12,11 @@ if(WITH_GPU OR WITH_ROCM)
        gpu/gpu_resources.cc)
   if(WITH_GPU)
     list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc)
+    set_source_files_properties(
+      gpu/gpu_resources.cc
+      PROPERTIES COMPILE_FLAGS
+                 "-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"")
+
   endif()
   if(WITH_ROCM)
     list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc)
diff --git a/paddle/phi/backends/gpu/gpu_resources.cc b/paddle/phi/backends/gpu/gpu_resources.cc
index 3aeb73e1b63..1b484b50336 100644
--- a/paddle/phi/backends/gpu/gpu_resources.cc
+++ b/paddle/phi/backends/gpu/gpu_resources.cc
@@ -14,6 +14,8 @@
 
 #include "paddle/phi/backends/gpu/gpu_resources.h"
 
+#include <set>
+
 #include "paddle/phi/api/include/tensor.h"
 #include "paddle/phi/backends/gpu/gpu_decls.h"
 #include "paddle/phi/backends/gpu/gpu_info.h"
@@ -57,6 +59,28 @@ void InitGpuProperties(Place place,
   *driver_version = backends::gpu::GetGPUDriverVersion(place.GetDeviceId());
   *runtime_version = backends::gpu::GetGPURuntimeVersion(place.GetDeviceId());
 
+  const gpuDeviceProp& prop =
+      backends::gpu::GetDeviceProperties(place.GetDeviceId());
+
+#ifdef PADDLE_WITH_CUDA
+  static const std::set<int> compiled_archs{CUDA_REAL_ARCHS};
+  // Make sure compiled cuda arch is as same as runtime cuda arch.
+  if (compiled_archs.find(*compute_capability) == compiled_archs.cend() &&
+      compiled_archs.find(prop.major * 10) == compiled_archs.cend()) {
+    static std::atomic<bool> once_flag(false);
+    if (!once_flag.exchange(true)) {
+      std::string compile_arch_str = "";
+      for (const int32_t& arch : compiled_archs) {
+        compile_arch_str += std::to_string(arch) + " ";
+      }
+      LOG(WARNING) << "Paddle with runtime capability " << *compute_capability
+                   << " is not compatible with Paddle installation with arch: "
+                   << compile_arch_str
+                   << ". Please check compiled version of Paddle. ";
+    }
+  }
+#endif
+
   // TODO(wilber): glog may be replaced in the future?
   LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: "
                           << static_cast<int>(place.device)
-- 
GitLab