未验证 提交 c0d6ec63 编写于 作者: MarDino's avatar MarDino 提交者: GitHub

Add cuda compiled arch check (#49592)

上级 5a1d081f
...@@ -207,6 +207,7 @@ function(select_nvcc_arch_flags out_variable) ...@@ -207,6 +207,7 @@ function(select_nvcc_arch_flags out_variable)
set(nvcc_flags "") set(nvcc_flags "")
set(nvcc_archs_readable "") set(nvcc_archs_readable "")
set(nvcc_archs_bin_list "")
# Tell NVCC to add binaries for the specified GPUs # Tell NVCC to add binaries for the specified GPUs
foreach(arch ${cuda_arch_bin}) foreach(arch ${cuda_arch_bin})
...@@ -215,10 +216,12 @@ function(select_nvcc_arch_flags out_variable) ...@@ -215,10 +216,12 @@ function(select_nvcc_arch_flags out_variable)
string(APPEND nvcc_flags string(APPEND nvcc_flags
" -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}") " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}")
string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}") string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}")
string(APPEND nvcc_archs_bin_list " ${CMAKE_MATCH_1}")
else() else()
# User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN
string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}") string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}")
string(APPEND nvcc_archs_readable " sm_${arch}") string(APPEND nvcc_archs_readable " sm_${arch}")
string(APPEND nvcc_archs_bin_list " ${arch}")
endif() endif()
endforeach() endforeach()
...@@ -230,12 +233,17 @@ function(select_nvcc_arch_flags out_variable) ...@@ -230,12 +233,17 @@ function(select_nvcc_arch_flags out_variable)
endforeach() endforeach()
string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}")
string(REGEX MATCHALL "[0-9()]+" nvcc_archs_bin_list "${nvcc_archs_bin_list}")
string(JOIN "," nvcc_real_archs ${nvcc_archs_bin_list})
set(${out_variable} set(${out_variable}
${nvcc_flags} ${nvcc_flags}
PARENT_SCOPE) PARENT_SCOPE)
set(${out_variable}_readable set(${out_variable}_readable
${nvcc_archs_readable} ${nvcc_archs_readable}
PARENT_SCOPE) PARENT_SCOPE)
set(${out_variable}_real_archs
${nvcc_real_archs}
PARENT_SCOPE)
endfunction() endfunction()
message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION}) message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION})
......
...@@ -12,6 +12,11 @@ if(WITH_GPU OR WITH_ROCM) ...@@ -12,6 +12,11 @@ if(WITH_GPU OR WITH_ROCM)
gpu/gpu_resources.cc) gpu/gpu_resources.cc)
if(WITH_GPU) if(WITH_GPU)
list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc) list(APPEND BACKENDS_SRCS gpu/cuda/cuda_info.cc gpu/cuda/cuda_graph.cc)
set_source_files_properties(
gpu/gpu_resources.cc
PROPERTIES COMPILE_FLAGS
"-DCUDA_REAL_ARCHS=\"${NVCC_FLAGS_EXTRA_real_archs}\"")
endif() endif()
if(WITH_ROCM) if(WITH_ROCM)
list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc) list(APPEND BACKENDS_SRCS gpu/rocm/rocm_info.cc)
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
#include "paddle/phi/backends/gpu/gpu_resources.h" #include "paddle/phi/backends/gpu/gpu_resources.h"
#include <set>
#include "paddle/phi/api/include/tensor.h" #include "paddle/phi/api/include/tensor.h"
#include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/backends/gpu/gpu_decls.h"
#include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_info.h"
...@@ -57,6 +59,28 @@ void InitGpuProperties(Place place, ...@@ -57,6 +59,28 @@ void InitGpuProperties(Place place,
*driver_version = backends::gpu::GetGPUDriverVersion(place.GetDeviceId()); *driver_version = backends::gpu::GetGPUDriverVersion(place.GetDeviceId());
*runtime_version = backends::gpu::GetGPURuntimeVersion(place.GetDeviceId()); *runtime_version = backends::gpu::GetGPURuntimeVersion(place.GetDeviceId());
const gpuDeviceProp& prop =
backends::gpu::GetDeviceProperties(place.GetDeviceId());
#ifdef PADDLE_WITH_CUDA
static const std::set<int> compiled_archs{CUDA_REAL_ARCHS};
// Make sure compiled cuda arch is as same as runtime cuda arch.
if (compiled_archs.find(*compute_capability) == compiled_archs.cend() &&
compiled_archs.find(prop.major * 10) == compiled_archs.cend()) {
static std::atomic<bool> once_flag(false);
if (!once_flag.exchange(true)) {
std::string compile_arch_str = "";
for (const int32_t& arch : compiled_archs) {
compile_arch_str += std::to_string(arch) + " ";
}
LOG(WARNING) << "Paddle with runtime capability " << *compute_capability
<< " is not compatible with Paddle installation with arch: "
<< compile_arch_str
<< ". Please check compiled version of Paddle. ";
}
}
#endif
// TODO(wilber): glog may be replaced in the future? // TODO(wilber): glog may be replaced in the future?
LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: " LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: "
<< static_cast<int>(place.device) << static_cast<int>(place.device)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册