From 10af44abbaf037cc6c7c06586aed110772e53125 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Thu, 14 Oct 2021 16:59:22 +0800 Subject: [PATCH] fix(dnn/cuda): fix cudnn conv impl for nchw4_nchw hybrid layout the conv_bias algo *_IMPLICIT_GEMM in cudnn less than 8.0.0 is disabled due to the incorrect result for int8x4->f32 configs GitOrigin-RevId: 7cc52d0a85c5ba345af52c61534e7e82f42cc088 --- dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp | 10 ++++++---- dnn/test/cuda/conv_bias.cpp | 10 +++------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp index 6d86a49e5..417839ff8 100644 --- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp +++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp @@ -73,10 +73,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available( return false; } - //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal - //! memory access errors, so we have to disable this kernel here. - if (param.format == param::ConvBias::Format::NCHW4_NCHW || - param.format == param::ConvBias::Format::NCHW4_NCHW32 || +#if CUDNN_MAJOR < 8 + if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM && + param.format == param::ConvBias::Format::NCHW4_NCHW) + return false; +#endif + if (param.format == param::ConvBias::Format::NCHW4_NCHW32 || param.format == param::ConvBias::Format::NCHW32_NCHW4) return false; if (param.format == param::ConvBias::Format::NCHW && diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp index 5d6ca313e..693041a6d 100644 --- a/dnn/test/cuda/conv_bias.cpp +++ b/dnn/test/cuda/conv_bias.cpp @@ -571,9 +571,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) { checker.exec({{1, 4, 2, 2, 4}, {16, 4, 3, 3, 4}, {1, 4, 1, 1, 4}, {}, {}}); } -//! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal -//! memory access errors, so we have to disable this test here. -#if 0 TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) { require_compute_capability(6, 1); using namespace conv_bias; @@ -600,8 +597,9 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) { auto run = [&](const TensorShapeArray& shapes) { opr->param() = param; TensorLayout dst_layout; - opr->deduce_layout({shapes[0], dtype::Float32()}, - {shapes[1], dtype::Float32()}, {}, {}, dst_layout); + opr->deduce_layout( + {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()}, {}, {}, + dst_layout); checker.execs({shapes[0], shapes[1], shapes[2], dst_layout, {}}); }; @@ -631,8 +629,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) { } #endif -#endif - TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE) { Checker checker(handle_cuda()); std::vector args = get_chanwise_args(); -- GitLab