From 10af44abbaf037cc6c7c06586aed110772e53125 Mon Sep 17 00:00:00 2001
From: Megvii Engine Team <megengine@megvii.com>
Date: Thu, 14 Oct 2021 16:59:22 +0800
Subject: [PATCH] fix(dnn/cuda): fix cudnn conv impl for nchw4_nchw hybrid
 layout

the conv_bias algo *_IMPLICIT_GEMM in cudnn less than 8.0.0 is disabled due to the incorrect result for int8x4->f32 configs

GitOrigin-RevId: 7cc52d0a85c5ba345af52c61534e7e82f42cc088
---
 dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp | 10 ++++++----
 dnn/test/cuda/conv_bias.cpp                           | 10 +++-------
 2 files changed, 9 insertions(+), 11 deletions(-)
diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
index 6d86a49e5..417839ff8 100644
--- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
+++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
@@ -73,10 +73,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available(
         return false;
     }
 
-    //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
-    //! memory access errors, so we have to disable this kernel here.
-    if (param.format == param::ConvBias::Format::NCHW4_NCHW ||
-        param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
+#if CUDNN_MAJOR < 8
+    if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM &&
+        param.format == param::ConvBias::Format::NCHW4_NCHW)
+        return false;
+#endif
+    if (param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
         param.format == param::ConvBias::Format::NCHW32_NCHW4)
         return false;
     if (param.format == param::ConvBias::Format::NCHW &&
diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp
index 5d6ca313e..693041a6d 100644
--- a/dnn/test/cuda/conv_bias.cpp
+++ b/dnn/test/cuda/conv_bias.cpp
@@ -571,9 +571,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) {
     checker.exec({{1, 4, 2, 2, 4}, {16, 4, 3, 3, 4}, {1, 4, 1, 1, 4}, {}, {}});
 }
 
-//! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
-//! memory access errors, so we have to disable this test here.
-#if 0
 TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
     require_compute_capability(6, 1);
     using namespace conv_bias;
@@ -600,8 +597,9 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
     auto run = [&](const TensorShapeArray& shapes) {
         opr->param() = param;
         TensorLayout dst_layout;
-        opr->deduce_layout({shapes[0], dtype::Float32()},
-                           {shapes[1], dtype::Float32()}, {}, {}, dst_layout);
+        opr->deduce_layout(
+                {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()}, {}, {},
+                dst_layout);
         checker.execs({shapes[0], shapes[1], shapes[2], dst_layout, {}});
     };
 
@@ -631,8 +629,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
 }
 #endif
 
-#endif
-
 TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE) {
     Checker<ConvBiasForward> checker(handle_cuda());
     std::vector<TestArg> args = get_chanwise_args();
-- 
GitLab