From f5833a5294e5b3319b75b81c0c181f9829596bbe Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 15 Apr 2020 15:19:34 +0800 Subject: [PATCH] fix(dnn/cuda): fix cublas matmul on sm60 GitOrigin-RevId: 3fc0c30a23f1dfe35d6629595b2cb1a8c2f379c5 --- dnn/src/cuda/conv_bias/matmul_8x8x32.cpp | 2 +- dnn/src/cuda/matrix_mul/cublas.cpp | 2 +- dnn/test/cuda/benchmark.cpp | 2 +- dnn/test/cuda/conv_bias.cpp | 6 +++--- dnn/test/cuda/convolution.cpp | 4 ++-- dnn/test/cuda/convolution3d.cpp | 2 +- dnn/test/cuda/group_conv.cpp | 2 +- dnn/test/cuda/group_conv3d.cpp | 2 +- dnn/test/cuda/matrix_mul.cpp | 11 +++++++++-- 9 files changed, 20 insertions(+), 13 deletions(-) diff --git a/dnn/src/cuda/conv_bias/matmul_8x8x32.cpp b/dnn/src/cuda/conv_bias/matmul_8x8x32.cpp index d243924f4..33ab1728e 100644 --- a/dnn/src/cuda/conv_bias/matmul_8x8x32.cpp +++ b/dnn/src/cuda/conv_bias/matmul_8x8x32.cpp @@ -21,7 +21,7 @@ bool ConvBiasForwardImpl::AlgoMatmul8x8x32::is_available( const SizeArgs& args) const { if (args.z_layout->ndim > 0) return false; - if (cuda::current_device_prop().major < 6) + if (!is_compute_capability_required(6, 1)) return false; auto dst_layout = *args.dst_layout; diff --git a/dnn/src/cuda/matrix_mul/cublas.cpp b/dnn/src/cuda/matrix_mul/cublas.cpp index 17a9cb65f..2863de0ff 100644 --- a/dnn/src/cuda/matrix_mul/cublas.cpp +++ b/dnn/src/cuda/matrix_mul/cublas.cpp @@ -42,7 +42,7 @@ bool MatrixMulForwardImpl::AlgoCuBlas::is_available( */ return args.layout_a.stride[0] % 4 == 0 && args.layout_b.stride[0] % 4 == 0 && - current_device_prop().major > 5; + is_compute_capability_required(6, 1); } return false; } diff --git a/dnn/test/cuda/benchmark.cpp b/dnn/test/cuda/benchmark.cpp index 66318ed32..6ba3687b9 100644 --- a/dnn/test/cuda/benchmark.cpp +++ b/dnn/test/cuda/benchmark.cpp @@ -24,7 +24,7 @@ namespace test { TEST_F(CUDA, BENCHMARK_CONVOLUTION_8X8X32) { - if (cuda::current_device_prop().major < 6) { + if (!cuda::is_compute_capability_required(6, 1)) { printf("Skip CUDA.BENCHMARK_CONVOLUTION_8X8X32 test as current device" "doesn't support\n"); return; diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index 12bfe0698..1460c1963 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -325,7 +325,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE_SMALL) { } TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE_8x8x32) { - require_compute_capability(6, 0); + require_compute_capability(6, 1); Checker checker(handle_cuda()); checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( ConvBiasForward::algo_name( @@ -472,7 +472,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL) { } TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_8x8x32) { - require_compute_capability(6, 0); + require_compute_capability(6, 1); Checker checker(handle_cuda()); checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( ConvBiasForward::algo_name( @@ -517,7 +517,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_8x8x32) { } TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_NCHW4) { - require_compute_capability(6, 0); + require_compute_capability(6, 1); Checker checker(handle_cuda()); checker.set_before_exec_callback(conv_bias::ConvBiasAlgoChecker( ConvBiasForward::algo_name( diff --git a/dnn/test/cuda/convolution.cpp b/dnn/test/cuda/convolution.cpp index 493053037..b95cfda43 100644 --- a/dnn/test/cuda/convolution.cpp +++ b/dnn/test/cuda/convolution.cpp @@ -30,7 +30,7 @@ namespace test { TEST_F(CUDA, CONVOLUTION_8X8X32) { - if (cuda::current_device_prop().major < 6) { + if (!cuda::is_compute_capability_required(6, 1)) { printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" "doesn't support\n"); return; @@ -112,7 +112,7 @@ TEST_F(CUDA, CONVOLUTION_FORWARD) } TEST_F(CUDA, CONV_FORWARD_MATMUL_NCHW4) { - if (cuda::current_device_prop().major < 6) + if (!cuda::is_compute_capability_required(6, 1)) return; using namespace convolution; Checker checker(handle_cuda()); diff --git a/dnn/test/cuda/convolution3d.cpp b/dnn/test/cuda/convolution3d.cpp index 3a00d5f5b..4412dc58b 100644 --- a/dnn/test/cuda/convolution3d.cpp +++ b/dnn/test/cuda/convolution3d.cpp @@ -24,7 +24,7 @@ namespace test { #if 0 TEST_F(CUDA, CONVOLUTION3D_8X8X32) { - if (cuda::current_device_prop().major < 6) { + if (!cuda::is_compute_capability_required(6, 1)) { printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device" "doesn't support\n"); return; diff --git a/dnn/test/cuda/group_conv.cpp b/dnn/test/cuda/group_conv.cpp index e5396fe4f..a4e3c0191 100644 --- a/dnn/test/cuda/group_conv.cpp +++ b/dnn/test/cuda/group_conv.cpp @@ -23,7 +23,7 @@ namespace test { TEST_F(CUDA, GROUP_CONV_FORWARD) { - bool is_int_available = (cuda::current_device_prop().major >= 6); + bool is_int_available = cuda::is_compute_capability_required(6, 1); auto run = [&](size_t N, size_t IC, size_t IH, size_t IW, size_t FH, size_t FW, size_t OC, size_t /* OH */, size_t /* OW */, diff --git a/dnn/test/cuda/group_conv3d.cpp b/dnn/test/cuda/group_conv3d.cpp index a26554b44..3127adcc1 100644 --- a/dnn/test/cuda/group_conv3d.cpp +++ b/dnn/test/cuda/group_conv3d.cpp @@ -21,7 +21,7 @@ namespace megdnn { namespace test { TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD) { - bool is_int_available = (cuda::current_device_prop().major >= 6); + bool is_int_available = cuda::is_compute_capability_required(6, 1); static_cast(is_int_available); auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW, size_t FD, size_t FH, size_t FW, size_t OC, size_t PD, diff --git a/dnn/test/cuda/matrix_mul.cpp b/dnn/test/cuda/matrix_mul.cpp index b0a621181..6909a8669 100644 --- a/dnn/test/cuda/matrix_mul.cpp +++ b/dnn/test/cuda/matrix_mul.cpp @@ -193,8 +193,15 @@ TEST_F(CUDA, MATRIX_MUL) Checker checker(handle_cuda()); using Param = MatrixMul::Param; size_t m = 12, n = 16, k = 20; - for (DType dtype: std::array{ - {dtype::Float32(), dtype::Float16(), dtype::Int32()}}) { + + bool is_int_available = cuda::is_compute_capability_required(6, 1); + std::vector dtype_array; + dtype_array.push_back(dtype::Float32()); + dtype_array.push_back(dtype::Float16()); + if (is_int_available) + dtype_array.push_back(dtype::Int32()); + + for (DType dtype : dtype_array) { for (unsigned mask = 0; mask < 4; ++mask) { Param param; param.transposeA = mask & 1; -- GitLab