diff --git a/dnn/test/cuda/batch_conv_bias.cpp b/dnn/test/cuda/batch_conv_bias.cpp
index 0250f26dfd7e60e5f55cb36c454aa097e2d33e0c..51c4f6dec6df49318f7d35cfb916ee8800abfc44 100644
--- a/dnn/test/cuda/batch_conv_bias.cpp
+++ b/dnn/test/cuda/batch_conv_bias.cpp
@@ -241,10 +241,14 @@ void benchmark_target_algo(Handle* handle, const std::vector<BenchArgs>& args,
     "v" V(CUDNN_MAJOR) "." V(CUDNN_MINOR) "." V(CUDNN_PATCHLEVEL)
     benchmarker_cudnn.set_before_exec_callback(
             conv_bias::ConvBiasAlgoChecker<ConvBiasForward>(
-                    "CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_"
-                    "GEMM" CUDNN_VERSION_STRING));
-    benchmarker_matmul.set_before_exec_callback(
-            AlgoChecker<BatchedMatrixMul>("BRUTE_FORCE-CUBLAS"));
+                    ConvBiasForward::algo_name<ConvBias::DefaultParam>(
+                            "CUDNN:ConvBiasActivation:"
+                            "CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_"
+                            "GEMM" CUDNN_VERSION_STRING,
+                            {})
+                            .c_str()));
+    benchmarker_matmul.set_before_exec_callback(AlgoChecker<BatchedMatrixMul>(
+            ExecutionPolicyAlgoName{"BRUTE_FORCE", {{"CUBLAS", {}}}}));
 
     benchmarker.set_dtype(0, src_dtype)
             .set_dtype(1, filter_dtype)
diff --git a/dnn/test/cuda/benchmark.cpp b/dnn/test/cuda/benchmark.cpp
index f54d804fb51ef35afd953144fd71b0db6d0428a2..f84d6d784df7a0fdc0e9047c887854547b1190da 100644
--- a/dnn/test/cuda/benchmark.cpp
+++ b/dnn/test/cuda/benchmark.cpp
@@ -41,10 +41,12 @@ TEST_F(CUDA, BENCHMARK_CONVOLUTION_8X8X32)
         auto time_in_ms_float = benchmarker.set_param(param_float)
             .set_dtype(0, dtype::Float32())
             .set_dtype(1, dtype::Float32())
+            .set_dtype(2, dtype::Float32())
             .execs({src_float, filter_float, {}});
         auto time_in_ms_int = benchmarker.set_param(param_int)
             .set_dtype(0, dtype::Int8())
             .set_dtype(1, dtype::Int8())
+            .set_dtype(2, dtype::Int32())
             .execs({src_int, filter_int, {}});
         std::cout << "1x1: N=" << N << " OC=" << OC << " IC=" << IC
             << " H=" << H << " W=" << W
@@ -67,10 +69,12 @@ TEST_F(CUDA, BENCHMARK_CONVOLUTION_8X8X32)
         auto time_in_ms_float = benchmarker.set_param(param_float)
             .set_dtype(0, dtype::Float32())
             .set_dtype(1, dtype::Float32())
+            .set_dtype(2, dtype::Float32())
             .execs({src_float, filter_float, {}});
         auto time_in_ms_int = benchmarker.set_param(param_int)
             .set_dtype(0, dtype::Int8())
             .set_dtype(1, dtype::Int8())
+            .set_dtype(2, dtype::Int32())
             .execs({src_int, filter_int, {}});
         std::cout << "chanwise: N=" << N << " C=" << C
             << " H=" << H << " W=" << W << " F=" << F