diff --git a/dnn/src/cuda/conv_bias/opr_impl.cpp b/dnn/src/cuda/conv_bias/opr_impl.cpp index 0a606e2ba6b1f9e365fc5780564f2616cdd46789..821f4e1c3d593237d61b0d592d76d0d11d20fd21 100644 --- a/dnn/src/cuda/conv_bias/opr_impl.cpp +++ b/dnn/src/cuda/conv_bias/opr_impl.cpp @@ -148,9 +148,9 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic( //! choose for large kernel cases size_t fh = args.filter_meta.spatial[0], fw = args.filter_meta.spatial[1]; size_t hi = src[2], wi = src[3]; - const bool prefer_dnn_lk_implbmm = - hi <= 2 * fh && wi <= 2 * fw && wi < 32 && hi <= 32; - const bool prefer_direct_lk = fh > 10 && fw > 10; + const bool prefer_dnn_lk_implbmm = hi <= 2 * fh && wi <= 2 * fw; + //! filter size > 9, choose large kernel cases + const bool prefer_direct_lk = fh > 9 && fw > 9; //! avoid bad case in cudnn, check dnn chanwise impl first if (is_chanwise) { if (prefer_dnn_lk_implbmm) { diff --git a/dnn/src/cuda/convolution/opr_impl.cpp b/dnn/src/cuda/convolution/opr_impl.cpp index 5a0109cfe6037ab1ae3267f7d9a4a024af12192c..5ef7404f8b4e1dd08c21ee3bfd3cdc12e4ef6fc1 100644 --- a/dnn/src/cuda/convolution/opr_impl.cpp +++ b/dnn/src/cuda/convolution/opr_impl.cpp @@ -119,10 +119,10 @@ ConvolutionBackwardDataImpl::Algorithm* ConvolutionBackwardDataImpl:: size_t fh = args.filter_meta.spatial[0], fw = args.filter_meta.spatial[1]; size_t ho = diff[2], wo = diff[3]; const bool prefer_dnn_lk_implbmm = args.filter_meta.format == Param::Format::NCHW && - ho <= 2 * fh && wo <= 2 * fw && ho < 32 && - wo < 32; + ho <= 2 * fh && wo <= 2 * fw; + //! filter size > 9, choose large kernel cases const bool prefer_direct_lk = - args.filter_meta.format == Param::Format::NCHW && fh > 10 && fw > 10; + args.filter_meta.format == Param::Format::NCHW && fh > 9 && fw > 9; if (prefer_dnn_lk_implbmm) { #if CUDA_VERSION >= 10020 if (sm_algo_pack.implbmm_nchw_hmma[0].is_available_attribute(