diff --git a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
index 6d86a49e54f111d4290e201d1218d880ad85de79..417839ff8eac888742190986c61cbe1678529f97 100644
--- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
+++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
@@ -73,10 +73,12 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available(
         return false;
     }
 
-    //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
-    //! memory access errors, so we have to disable this kernel here.
-    if (param.format == param::ConvBias::Format::NCHW4_NCHW ||
-        param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
+#if CUDNN_MAJOR < 8
+    if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM &&
+        param.format == param::ConvBias::Format::NCHW4_NCHW)
+        return false;
+#endif
+    if (param.format == param::ConvBias::Format::NCHW4_NCHW32 ||
         param.format == param::ConvBias::Format::NCHW32_NCHW4)
         return false;
     if (param.format == param::ConvBias::Format::NCHW &&
diff --git a/dnn/test/cuda/conv_bias.cpp b/dnn/test/cuda/conv_bias.cpp
index 5d6ca313eb5bf5aa05f7e26c8eb4cc6af1d863f5..693041a6d8f68385d4e9c8d34fc320c7bf735d86 100644
--- a/dnn/test/cuda/conv_bias.cpp
+++ b/dnn/test/cuda/conv_bias.cpp
@@ -571,9 +571,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) {
     checker.exec({{1, 4, 2, 2, 4}, {16, 4, 3, 3, 4}, {1, 4, 1, 1, 4}, {}, {}});
 }
 
-//! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
-//! memory access errors, so we have to disable this test here.
-#if 0
 TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
     require_compute_capability(6, 1);
     using namespace conv_bias;
@@ -600,8 +597,9 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
     auto run = [&](const TensorShapeArray& shapes) {
         opr->param() = param;
         TensorLayout dst_layout;
-        opr->deduce_layout({shapes[0], dtype::Float32()},
-                           {shapes[1], dtype::Float32()}, {}, {}, dst_layout);
+        opr->deduce_layout(
+                {shapes[0], dtype::Float32()}, {shapes[1], dtype::Float32()}, {}, {},
+                dst_layout);
         checker.execs({shapes[0], shapes[1], shapes[2], dst_layout, {}});
     };
 
@@ -631,8 +629,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4_NCHW) {
 }
 #endif
 
-#endif
-
 TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE) {
     Checker<ConvBiasForward> checker(handle_cuda());
     std::vector<TestArg> args = get_chanwise_args();