From c0d6ec6335c8c83cbbfd397cac2735e7c00403ce Mon Sep 17 00:00:00 2001 From: ZZK <359521840@qq.com> Date: Tue, 10 Jan 2023 10:13:49 +0800 Subject: [PATCH] Add cuda compiled arch check (#49592) --- cmake/cuda.cmake | 8 ++++++++ paddle/phi/backends/CMakeLists.txt | 5 +++++ paddle/phi/backends/gpu/gpu_resources.cc | 24 ++++++++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index ab48a16529e..2f6b5d7759c 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -207,6 +207,7 @@ function(select_nvcc_arch_flags out_variable) set(nvcc_flags "") set(nvcc_archs_readable "") + set(nvcc_archs_bin_list "") # Tell NVCC to add binaries for the specified GPUs foreach(arch ${cuda_arch_bin}) @@ -215,10 +216,12 @@ function(select_nvcc_arch_flags out_variable) string(APPEND nvcc_flags " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}") string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}") + string(APPEND nvcc_archs_bin_list " ${CMAKE_MATCH_1}") else() # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}") string(APPEND nvcc_archs_readable " sm_${arch}") + string(APPEND nvcc_archs_bin_list " ${arch}") endif() endforeach() @@ -230,12 +233,17 @@ function(select_nvcc_arch_flags out_variable) endforeach() string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") + string(REGEX MATCHALL "[0-9()]+" nvcc_archs_bin_list "${nvcc_archs_bin_list}") + string(JOIN "," nvcc_real_archs ${nvcc_archs_bin_list}) set(${out_variable} ${nvcc_flags} PARENT_SCOPE) set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) + set(${out_variable}_real_archs + ${nvcc_real_archs} + PARENT_SCOPE) endfunction() message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION}) diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index 3ee28c31058..cfb55565a2e 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -12,6 +12,11 @@ if(WITH_GPU OR WITH_ROCM) gpu/gpu_resources.cc) if(WITH_GPU) list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc) + set_source_files_properties( + gpu/gpu_resources.cc + PROPERTIES COMPILE_FLAGS + "-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"") + endif() if(WITH_ROCM) list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc) diff --git a/paddle/phi/backends/gpu/gpu_resources.cc b/paddle/phi/backends/gpu/gpu_resources.cc index 3aeb73e1b63..1b484b50336 100644 --- a/paddle/phi/backends/gpu/gpu_resources.cc +++ b/paddle/phi/backends/gpu/gpu_resources.cc @@ -14,6 +14,8 @@ #include "paddle/phi/backends/gpu/gpu_resources.h" +#include + #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/backends/gpu/gpu_info.h" @@ -57,6 +59,28 @@ void InitGpuProperties(Place place, *driver_version = backends::gpu::GetGPUDriverVersion(place.GetDeviceId()); *runtime_version = backends::gpu::GetGPURuntimeVersion(place.GetDeviceId()); + const gpuDeviceProp& prop = + backends::gpu::GetDeviceProperties(place.GetDeviceId()); + +#ifdef PADDLE_WITH_CUDA + static const std::set compiled_archs{CUDA_REAL_ARCHS}; + // Make sure compiled cuda arch is as same as runtime cuda arch. + if (compiled_archs.find(*compute_capability) == compiled_archs.cend() && + compiled_archs.find(prop.major * 10) == compiled_archs.cend()) { + static std::atomic once_flag(false); + if (!once_flag.exchange(true)) { + std::string compile_arch_str = ""; + for (const int32_t& arch : compiled_archs) { + compile_arch_str += std::to_string(arch) + " "; + } + LOG(WARNING) << "Paddle with runtime capability " << *compute_capability + << " is not compatible with Paddle installation with arch: " + << compile_arch_str + << ". Please check compiled version of Paddle. "; + } + } +#endif + // TODO(wilber): glog may be replaced in the future? LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: " << static_cast(place.device) -- GitLab