diff --git a/dnn/src/cuda/conv_bias/matmul_8x8x32.cpp b/dnn/src/cuda/conv_bias/matmul_8x8x32.cpp index d243924f4ae370927e8c8e1c5e5863b86c843c0c..33ab1728e91e1c7dc89f087dc3b5d67e81e0e23a 100644 --- a/dnn/src/cuda/conv_bias/matmul_8x8x32.cpp +++ b/dnn/src/cuda/conv_bias/matmul_8x8x32.cpp @@ -21,7 +21,7 @@ bool ConvBiasForwardImpl::AlgoMatmul8x8x32::is_available( const SizeArgs& args) const { if (args.z_layout->ndim > 0) return false; - if (cuda::current_device_prop().major < 6) + if (!is_compute_capability_required(6, 1)) return false; auto dst_layout = *args.dst_layout; diff --git a/dnn/src/cuda/matrix_mul/cublas.cpp b/dnn/src/cuda/matrix_mul/cublas.cpp index 17a9cb65f248c21f2d562473e8d84ee5587ec1f2..2863de0ff18ba07d821093990e12f1c75d843fec 100644 --- a/dnn/src/cuda/matrix_mul/cublas.cpp +++ b/dnn/src/cuda/matrix_mul/cublas.cpp @@ -42,7 +42,7 @@ bool MatrixMulForwardImpl::AlgoCuBlas::is_available( */ return args.layout_a.stride[0] % 4 == 0 && args.layout_b.stride[0] % 4 == 0 && - current_device_prop().major > 5; + is_compute_capability_required(6, 1); } return false; } diff --git a/dnn/test/cuda/benchmark.cpp b/dnn/test/cuda/benchmark.cpp index 66318ed3208e148954168af02917ae691f857c59..6ba3687b98f1b956a58e8eefefed819ab9455489 100644 --- a/dnn/test/cuda/benchmark.cpp +++ b/dnn/test/cuda/benchmark.cpp @@ -24,7 +24,7 @@ namespace test { TEST_F(CUDA, BENCHMARK_CONVOLUTION_8X8X32) { - if (cuda::current_device_prop().major < 6) { + if (!cuda::is_compute_capability_required(6, 1)) { printf("Skip CUDA.BENCHMARK_CONVOLUTION_8X8X32 test as current device" "doesn't support\n"); return; diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index 12bfe06981934d1be9e087d3e8488f1434aae1a8..1460c196339a0577dab922cfadbd1124ae0350e7 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -325,7 +325,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE_SMALL) { } TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE_8x8x32) { - require_compute_capability(6, 0); + require_compute_capability(6, 1); Checker checker(handle_cuda()); checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( ConvBiasForward::algo_name( @@ -472,7 +472,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL) { } TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_8x8x32) { - require_compute_capability(6, 0); + require_compute_capability(6, 1); Checker checker(handle_cuda()); checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( ConvBiasForward::algo_name( @@ -517,7 +517,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_8x8x32) { } TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_NCHW4) { - require_compute_capability(6, 0); + require_compute_capability(6, 1); Checker checker(handle_cuda()); checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( ConvBiasForward::algo_name( diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp index 4930530374b5101d954eb48ddd6fa7087baebf32..b95cfda43c5c8ac74df543c2a24ebe442db753a3 100644 --- a/dnn/test/cuda/convolution.cpp +++ b/dnn/test/cuda/convolution.cpp @@ -30,7 +30,7 @@ namespace test { TEST_F(CUDA, CONVOLUTION_8X8X32) { - if (cuda::current_device_prop().major < 6) { + if (!cuda::is_compute_capability_required(6, 1)) { printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" "doesn't support\n"); return; @@ -112,7 +112,7 @@ TEST_F(CUDA, CONVOLUTION_FORWARD) } TEST_F(CUDA, CONV_FORWARD_MATMUL_NCHW4) { - if (cuda::current_device_prop().major < 6) + if (!cuda::is_compute_capability_required(6, 1)) return; using namespace convolution; Checker checker(handle_cuda()); diff --git a/dnn/test/cuda/convolution3d.cpp b/dnn/test/cuda/convolution3d.cpp index 3a00d5f5b6003409d077f7b0d8cd99855d03f4c4..4412dc58b21f1a7b463936c8adec1fcca106eb31 100644 --- a/dnn/test/cuda/convolution3d.cpp +++ b/dnn/test/cuda/convolution3d.cpp @@ -24,7 +24,7 @@ namespace test { #if 0 TEST_F(CUDA, CONVOLUTION3D_8X8X32) { - if (cuda::current_device_prop().major < 6) { + if (!cuda::is_compute_capability_required(6, 1)) { printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" "doesn't support\n"); return; diff --git a/dnn/test/cuda/group_conv.cpp b/dnn/test/cuda/group_conv.cpp index e5396fe4f98053bbae2b9174c567372cd9f1d254..a4e3c01918d769006b8c072a4b4d3d3cabff1bc6 100644 --- a/dnn/test/cuda/group_conv.cpp +++ b/dnn/test/cuda/group_conv.cpp @@ -23,7 +23,7 @@ namespace test { TEST_F(CUDA, GROUP_CONV_FORWARD) { - bool is_int_available = (cuda::current_device_prop().major >= 6); + bool is_int_available = cuda::is_compute_capability_required(6, 1); auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW, size_t OC, size_t /* OH */, size_t /* OW */, diff --git a/dnn/test/cuda/group_conv3d.cpp b/dnn/test/cuda/group_conv3d.cpp index a26554b44d8148698081b2ee567eea55add84890..3127adcc1d19624b691912b838d1dc3b6a851e9f 100644 --- a/dnn/test/cuda/group_conv3d.cpp +++ b/dnn/test/cuda/group_conv3d.cpp @@ -21,7 +21,7 @@ namespace megdnn { namespace test { TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD) { - bool is_int_available = (cuda::current_device_prop().major >= 6); + bool is_int_available = cuda::is_compute_capability_required(6, 1); static_cast(is_int_available); auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, size_t FD, size_t FH, size_t FW, size_t OC, size_t PD, diff --git a/dnn/test/cuda/matrix_mul.cpp b/dnn/test/cuda/matrix_mul.cpp index b0a621181f0ab87e69220143ecc0f152c7043299..6909a866949ee7dd2f6431eb63db0047cfd4b88a 100644 --- a/dnn/test/cuda/matrix_mul.cpp +++ b/dnn/test/cuda/matrix_mul.cpp @@ -193,8 +193,15 @@ TEST_F(CUDA, MATRIX_MUL) Checker checker(handle_cuda()); using Param = MatrixMul::Param; size_t m = 12, n = 16, k = 20; - for (DType dtype: std::array{ - {dtype::Float32(), dtype::Float16(), dtype::Int32()}}) { + + bool is_int_available = cuda::is_compute_capability_required(6, 1); + std::vector dtype_array; + dtype_array.push_back(dtype::Float32()); + dtype_array.push_back(dtype::Float16()); + if (is_int_available) + dtype_array.push_back(dtype::Int32()); + + for (DType dtype : dtype_array) { for (unsigned mask = 0; mask < 4; ++mask) { Param param; param.transposeA = mask & 1;