diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index add5cabd444bfe1cff259c1626835dbad2c8b63c..c8d312c61bdd7e6e420b69de417b2b8d69ab2b8a 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -32,6 +32,11 @@ constexpr static float fraction_reserve_gpu_memory = 0.05f; namespace paddle { namespace platform { +/* Here is a very simple CUDA “pro tip”: cudaDeviceGetAttribute() is a much +faster way to query device properties. You can see details in +https://devblogs.nvidia.com/cuda-pro-tip-the-fast-way-to-query-device-properties/ +*/ + inline std::string CudaErrorWebsite() { return "Please see detail in https://docs.nvidia.com/cuda/cuda-runtime-api" "/group__CUDART__TYPES.html#group__CUDART__TYPES_1g3f51e3575c217824" @@ -75,14 +80,23 @@ int GetCUDADeviceCount() { int GetCUDAComputeCapability(int id) { PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count"); - cudaDeviceProp device_prop; - auto error_code = cudaGetDeviceProperties(&device_prop, id); - PADDLE_ENFORCE( - error_code, - "cudaGetDeviceProperties failed in " + int major, minor; + + auto major_error_code = + cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, id); + auto minor_error_code = + cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, id); + PADDLE_ENFORCE_EQ( + major_error_code, 0, + "cudaDevAttrComputeCapabilityMajor failed in " "paddle::platform::GetCUDAComputeCapability, error code : %d, %s", - error_code, CudaErrorWebsite()); - return device_prop.major * 10 + device_prop.minor; + major_error_code, CudaErrorWebsite()); + PADDLE_ENFORCE_EQ( + minor_error_code, 0, + "cudaDevAttrComputeCapabilityMinor failed in " + "paddle::platform::GetCUDAComputeCapability, error code : %d, %s", + minor_error_code, CudaErrorWebsite()); + return major * 10 + minor; } int GetCUDARuntimeVersion(int id) {