add gpu info func to get compute cap

1998d5af · Kexin Zhao · d400b419 · 1998d5af · 1998d5af · 1998d5af
4 changed file
--- a/paddle/fluid/operators/math/math_function_test.cc
+++ b/paddle/fluid/operators/math/math_function_test.cc
@@ -14,11 +14,20 @@
 #include "paddle/fluid/operators/math/math_function.h"
 #include "gtest/gtest.h"
+#include <iostream>
 TEST(math_function, gemm_notrans_cblas) {
  paddle::framework::Tensor input1;
  paddle::framework::Tensor input2;
  paddle::framework::Tensor input3;
+  // fp16 GEMM in cublas requires GPU compute capability >= 53
+  if (GetCUDAComputeCapability(0) >= 53) {
+    std::cout << "Compute capability is " << GetCUDAComputeCapability(0)
+              << std::endl;
+    return;
+  }
  int m = 2;
  int n = 3;
  int k = 3;

--- a/paddle/fluid/operators/math/math_function_test.cu
+++ b/paddle/fluid/operators/math/math_function_test.cu
@@ -24,15 +24,6 @@ void fill_fp16_data(paddle::platform::float16* in_ptr, size_t size,
  }
 }
-bool is_fp16_supported(int device_id) {
-  cudaDeviceProp device_prop;
-  cudaDeviceProperties(&device_prop, device_id);
-  PADDLE_ENFORCE_EQ(cudaGetLastError(), cudaSuccess);
-  int compute_capability = device_prop.major * 10 + device_prop.minor;
-  std::cout << "compute_capability is " << compute_capability << std::endl;
-  return compute_capability >= 53;
-}
 TEST(math_function, notrans_mul_trans_fp32) {
  using namespace paddle::framework;
  using namespace paddle::platform;
@@ -73,7 +64,10 @@ TEST(math_function, notrans_mul_trans_fp16) {
  using namespace paddle::framework;
  using namespace paddle::platform;
-  if (!is_fp16_supported(0)) {
+  // fp16 GEMM in cublas requires GPU compute capability >= 53
+  if (GetCUDAComputeCapability(0) >= 53) {
+    std::cout << "Compute capability is " << GetCUDAComputeCapability(0)
+              << std::endl;
    return;
  }
@@ -154,7 +148,8 @@ TEST(math_function, trans_mul_notrans_fp16) {
  using namespace paddle::framework;
  using namespace paddle::platform;
-  if (!is_fp16_supported(0)) {
+  // fp16 GEMM in cublas requires GPU compute capability >= 53
+  if (GetCUDAComputeCapability(0) >= 53) {
    return;
  }
@@ -256,7 +251,8 @@ TEST(math_function, gemm_notrans_cublas_fp16) {
  using namespace paddle::framework;
  using namespace paddle::platform;
-  if (!is_fp16_supported(0)) {
+  // fp16 GEMM in cublas requires GPU compute capability >= 53
+  if (GetCUDAComputeCapability(0) >= 53) {
    return;
  }
@@ -367,7 +363,8 @@ TEST(math_function, gemm_trans_cublas_fp16) {
  using namespace paddle::framework;
  using namespace paddle::platform;
-  if (!is_fp16_supported(0)) {
+  // fp16 GEMM in cublas requires GPU compute capability >= 53
+  if (GetCUDAComputeCapability(0) >= 53) {
    return;
  }

--- a/paddle/fluid/platform/gpu_info.cc
+++ b/paddle/fluid/platform/gpu_info.cc
@@ -33,6 +33,15 @@ int GetCUDADeviceCount() {
  return count;
 }
+int GetCUDAComputeCapability(int id) {
+  PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
+  cudaDeviceProp device_prop;
+  PADDLE_ENFORCE(cudaGetDeviceProperties(&device_prop, id),
+                 "cudaGetDeviceProperties failed in "
+                 "paddle::platform::GetCUDAComputeCapability");
+  return device_prop.major * 10 + device_prop.minor;
+}
 int GetCUDAMultiProcessors(int id) {
  PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
  int count;

--- a/paddle/fluid/platform/gpu_info.h
+++ b/paddle/fluid/platform/gpu_info.h
@@ -30,6 +30,9 @@ const std::string kEnvFractionGpuMemoryToUse =
 //! Get the total number of GPU devices in system.
 int GetCUDADeviceCount();
+//! Get the compute capability of the ith GPU (format: major * 10 + minor)
+int GetCUDAComputeCapability(int i);
 //! Get the MultiProcessors of the ith GPU.
 int GetCUDAMultiProcessors(int i);