fix(mgb/dnn): fix cudnn8 convbias

GitOrigin-RevId: 0fdbfd258ce2c83e2cf846c13edff090ce94b0ef

fix(mgb/dnn): fix cudnn8 convbias
GitOrigin-RevId: 0fdbfd258ce2c83e2cf846c13edff090ce94b0ef
93894402 · Megvii Engine Team · 5427a67c · 93894402 · 93894402 · 93894402
4 changed file
--- a/dnn/src/cuda/conv_bias/algo.cpp
+++ b/dnn/src/cuda/conv_bias/algo.cpp
@@ -164,15 +164,14 @@ std::string ConvBiasForwardImpl::AlgoBase::SizeArgs::to_string() const {
            megdnn_throw("invalid conv bias nonlinear mode");
    }
    return ssprintf(
-            "src=%s, filter=%u{%u,%u,%u,%u}, bias=%s, z=%s, dst=%s, "
+            "src=%s, filter=%s, bias=%s, z=%s, dst=%s, "
            "pad=%ux%u, stride=%ux%u, dilate=%ux%u, xcorr=%d, dtype=%s,%s, "
            "nonlinear_mode=%s",
-            src_layout->to_string().c_str(), fm.group, fm.ocpg, fm.icpg,
-            fm.spatial[0], fm.spatial[1], bias_layout->to_string().c_str(),
-            z_layout->to_string().c_str(), dst_layout->to_string().c_str(),
-            fm.padding[0], fm.padding[1], fm.stride[0], fm.stride[1],
-            fm.dilation[0], fm.dilation[1], !fm.should_flip,
-            src_layout->dtype.name(), dst_layout->dtype.name(),
+            src_layout->to_string().c_str(), filter_layout->to_string().c_str(),
+            bias_layout->to_string().c_str(), z_layout->to_string().c_str(),
+            dst_layout->to_string().c_str(), fm.padding[0], fm.padding[1],
+            fm.stride[0], fm.stride[1], fm.dilation[0], fm.dilation[1],
+            !fm.should_flip, src_layout->dtype.name(), dst_layout->dtype.name(),
            nonlinear_mode_str.c_str());
 }


--- a/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
+++ b/dnn/src/cuda/conv_bias/cudnn_conv_bias_activation.cpp
@@ -35,6 +35,17 @@ bool ConvBiasForwardImpl::AlgoCUDNNConvBiasActivation::is_available(
        return false;
    }
    auto&& param = args.opr->param();
+
+#if (CUDNN_MAJOR == 8 && CUDNN_MINOR < 2)
+    if (m_cudnn_enum == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM &&
+        param.format == param::ConvBias::Format::NCHW4 &&
+        args.filter_meta.group * args.filter_meta.ocpg > 256 &&
+        args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS8 &&
+        args.filter_layout->dtype.enumv() == DTypeEnum::QuantizedS8) {
+        return false;
+    }
+#endif
+
    //! FIXME: conv kernel of cudnn for NCHW4_NCHW tensor format causes illegal
    //! memory access errors, so we have to disable this kernel here.
    if (param.format == param::ConvBias::Format::NCHW4_NCHW ||

--- a/dnn/src/cuda/conv_bias/opr_impl.cpp
+++ b/dnn/src/cuda/conv_bias/opr_impl.cpp
@@ -97,8 +97,9 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
            auto conv_bias_algo = cb(algo_perf[i].algo);
            if (conv_bias_algo->is_available_attribute(
                        args, positive_attr, negative_attr,
-                        workspace_limit_in_bytes))
+                        workspace_limit_in_bytes)) {
                return conv_bias_algo;
+            }
        }
 #else
        cudnnConvolutionFwdAlgo_t algo;

--- a/dnn/test/cuda/conv_bias.cpp
+++ b/dnn/test/cuda/conv_bias.cpp
@@ -523,6 +523,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_NCHW4) {
    };

    run({{1, 4, 4, 4, 4}, {4, 4, 3, 3, 4}, {1, 1, 1, 1, 4}});
+    run({{1, 4, 4, 4, 4}, {260, 4, 3, 3, 4}, {1, 65, 1, 1, 4}});
    run({{20, 1, 24, 24, 4}, {24, 1, 2, 2, 4}, {1, 6, 1, 1, 4}});
    run({{20, 2, 24, 24, 4}, {24, 2, 3, 3, 4}, {1, 6, 1, 1, 4}});