diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp index f87a46ba5e26f7636080b305359b3cb7a585921a..1cbc654603d1e6b4b95281e74a8a4c8ef7151b80 100644 --- a/modules/dnn/include/opencv2/dnn/all_layers.hpp +++ b/modules/dnn/include/opencv2/dnn/all_layers.hpp @@ -259,6 +259,7 @@ CV__DNN_INLINE_NS_BEGIN bool fusedActivation = false; bool fusedAdd = false; bool isConv2D = false; // Should be deleted after fastconv branch support Conv1D and Conv3D. + bool useWinograd = false; // Flag whether to use Winograd to speed up 3x3 convolution. }; class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer @@ -270,6 +271,7 @@ CV__DNN_INLINE_NS_BEGIN // quantization type flag. The perChannel default is true, that means it contains the parameters // of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters. bool per_channel; + bool useWinograd = true; // Flag whether to use Winograd to speed up 3x3 convolution. static Ptr create(const LayerParams& params); }; diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp index 78c18c15b79113b19000cf2ba32ec9bcebeb043a..29d6cfa4f3feeb156c67c3db9a1e796d022aae1c 100644 --- a/modules/dnn/include/opencv2/dnn/dnn.hpp +++ b/modules/dnn/include/opencv2/dnn/dnn.hpp @@ -837,6 +837,12 @@ CV__DNN_INLINE_NS_BEGIN */ CV_WRAP void enableFusion(bool fusion); + /** @brief Enables or disables the Winograd compute branch. The Winograd compute branch can speed up + * 3x3 Convolution at a small loss of accuracy. + * @param useWinograd true to enable the Winograd compute branch. The default is true. + */ + CV_WRAP void enableWinograd(bool useWinograd); + /** @brief Returns overall time for inference and timings (in ticks) for layers. * * Indexes in returned vector correspond to layers ids. Some layers can be fused with others, diff --git a/modules/dnn/src/int8layers/convolution_layer.cpp b/modules/dnn/src/int8layers/convolution_layer.cpp index 320a18e5ab065593c26e326128f749a2d05d8ddc..728ef24d9179eb4de6e8f8f34b06d353394ae5fe 100644 --- a/modules/dnn/src/int8layers/convolution_layer.cpp +++ b/modules/dnn/src/int8layers/convolution_layer.cpp @@ -41,7 +41,7 @@ public: BaseConvolutionLayerInt8Impl(const LayerParams ¶ms) { setParamsFrom(params); - getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads); + getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads, useWinograd); numOutput = params.get("num_output"); int ngroups = params.get("group", 1); diff --git a/modules/dnn/src/int8layers/layers_common.hpp b/modules/dnn/src/int8layers/layers_common.hpp index cb185a9edaa4b08146e2055f1b65614a59edccfc..5fdafbeab8308c21fbb58ac1c0bcc18d07c1d1d5 100644 --- a/modules/dnn/src/int8layers/layers_common.hpp +++ b/modules/dnn/src/int8layers/layers_common.hpp @@ -23,7 +23,7 @@ namespace dnn { void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, std::vector& dilations, - cv::String &padMode, std::vector& adjust_pads); + cv::String &padMode, std::vector& adjust_pads, bool& useWinograd); void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& globalPooling, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode); diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp index 678a052c7cc36e90cba2869d7b0c329dcd5c2b96..bc1acd0f72b48b98ef4ffb3c95eca94824e0cd26 100644 --- a/modules/dnn/src/layers/convolution_layer.cpp +++ b/modules/dnn/src/layers/convolution_layer.cpp @@ -89,7 +89,8 @@ public: BaseConvolutionLayerImpl(const LayerParams ¶ms) { setParamsFrom(params); - getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads); + getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, + padMode, adjust_pads, useWinograd); numOutput = params.get("num_output"); int ngroups = params.get("group", 1); @@ -2112,7 +2113,7 @@ public: int dilation_w = dilations.back(); fastConv2dImpl = initFastConv2d(ngroups, K, C, Hk, Wk, stride_w, stride_h, dilation_w, - dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0]); + dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0], useWinograd); } if (fastConv2dImpl) diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp index d62b6f230c71e8e95aad836733f800fcfdc6a7db..8c829eaf81df908b43bf716b7eabc9b81d78699c 100644 --- a/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp +++ b/modules/dnn/src/layers/fast_convolution/fast_convolution.cpp @@ -23,7 +23,8 @@ Ptr initFastConv2d( const std::vector& pads_begin, const std::vector& pads_end, InputArray _weightsMat, - float* srcBias) + float* srcBias, + bool useWinograd) { Ptr conv = makePtr(); @@ -48,11 +49,11 @@ Ptr initFastConv2d( const size_t wstep = weightsMat.step1(); #if CV_NEON // For now, winograd is ARM platform only. - if (ngroups == 1 && Hk ==3 && Wk == 3 && stride_x == 1 && stride_y == 1 && + if (useWinograd && ngroups == 1 && Hk ==3 && Wk == 3 && stride_x == 1 && stride_y == 1 && dilation_x == 1 && dilation_y ==1 && K >= 16 && C >= 16) - conv->ifWinograd63 = true; + conv->useWinograd63 = true; #else - conv->ifWinograd63 = false; + conv->useWinograd63 = false; #endif float *srcWeights = (float *)weightsMat.data; @@ -115,7 +116,7 @@ Ptr initFastConv2d( }}); // Prepare Weight for Winograd F(6x6, 3x3) - if (conv->ifWinograd63) + if (conv->useWinograd63) { initWinograd63(conv, weightsMat, K, C); } @@ -191,10 +192,7 @@ void runFastConv2d(InputArray _input, OutputArray _output, const Ptr } #if CV_NEON - if (conv->ifWinograd63 - && inputShape[2] > 12 && inputShape[3] > 12 - && inputShape[2] < 120 && inputShape[3] < 120 - ) + if (conv->useWinograd63 && inputShape[2] > 12 && inputShape[3] > 12) { if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct)) return; diff --git a/modules/dnn/src/layers/fast_convolution/fast_convolution.hpp b/modules/dnn/src/layers/fast_convolution/fast_convolution.hpp index ba85077f708f86662d5a3aa63ef61b23f8c26e03..671cb707d1e900ed8f4b1f0fb42b0bf42333e822 100644 --- a/modules/dnn/src/layers/fast_convolution/fast_convolution.hpp +++ b/modules/dnn/src/layers/fast_convolution/fast_convolution.hpp @@ -44,7 +44,7 @@ struct FastConv2d std::vector weightsBuf; // For generic Conv 2D std::vector weightsWino63Buf; // For Winograd F(6x6, 3x3). std::vector biasBuf; - bool ifWinograd63 = false; + bool useWinograd63 = false; bool useAVX2 = checkHardwareSupport(CPU_AVX2); bool useNEON = checkHardwareSupport(CPU_NEON); }; @@ -58,7 +58,7 @@ Ptr initFastConv2d( const std::vector& pads_begin, const std::vector& pads_end, InputArray weightsMat, - float* srcBias); + float* srcBias, bool useWinograd); // It contains different computing branches, like winograd, 1x1 conv. void runFastConv2d(InputArray _input, OutputArray _output, const Ptr& conv, int ntasks, diff --git a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp index fb668d63c689197a3aa1efbbc92dadad67040fcd..7475397901a38bbb8da2217f56f45d103df5af2d 100644 --- a/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp +++ b/modules/dnn/src/layers/fast_convolution/winograd_3x3s1_f63.cpp @@ -1689,7 +1689,7 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu void initWinograd63(Ptr& conv, InputArray _weightsMat, int K, int C) { - conv->ifWinograd63 = false; + conv->useWinograd63 = false; } int runWinograd63(InputArray _input, OutputArray _output, const Ptr& conv, int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct) diff --git a/modules/dnn/src/layers/layers_common.cpp b/modules/dnn/src/layers/layers_common.cpp index 445a89ff985ffc0e9874abacb768ddf994006491..b128872817d4a40b69903f4bcadb545f86daccf5 100644 --- a/modules/dnn/src/layers/layers_common.cpp +++ b/modules/dnn/src/layers/layers_common.cpp @@ -187,12 +187,14 @@ void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kern void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, - std::vector& dilations, cv::String &padMode, std::vector& adjust_pads) + std::vector& dilations, cv::String &padMode, std::vector& adjust_pads, + bool& useWinograd) { util::getKernelSize(params, kernel); util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size()); util::getParameter(params, "dilation", "dilation", dilations, true, std::vector(kernel.size(), 1)); util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector(kernel.size(), 0)); + useWinograd = params.get("use_winograd", true); for (int i = 0; i < dilations.size(); i++) CV_Assert(dilations[i] > 0); diff --git a/modules/dnn/src/layers/layers_common.hpp b/modules/dnn/src/layers/layers_common.hpp index 85f442c78e8ff3859a67c995d3c39cdfe20d6c34..4510f6b1067fd5ff5ebe478fc34b01385c7d9d0d 100644 --- a/modules/dnn/src/layers/layers_common.hpp +++ b/modules/dnn/src/layers/layers_common.hpp @@ -61,7 +61,7 @@ namespace dnn { void getConvolutionKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, std::vector& dilations, - cv::String &padMode, std::vector& adjust_pads); + cv::String &padMode, std::vector& adjust_pads, bool& useWinograd); void getPoolingKernelParams(const LayerParams ¶ms, std::vector& kernel, std::vector& globalPooling, std::vector& pads_begin, std::vector& pads_end, std::vector& strides, cv::String &padMode); diff --git a/modules/dnn/src/net.cpp b/modules/dnn/src/net.cpp index b3cf811a94e36921ea19d2aa81638dc95c50286e..3b200a108e26187392619d99098ebc6c9a0e5585 100644 --- a/modules/dnn/src/net.cpp +++ b/modules/dnn/src/net.cpp @@ -395,6 +395,13 @@ void Net::enableFusion(bool fusion) return impl->enableFusion(fusion); } +void Net::enableWinograd(bool useWinograd) +{ + CV_TRACE_FUNCTION(); + CV_Assert(impl); + return impl->enableWinograd(useWinograd); +} + void Net::setHalideScheduler(const String& scheduler) { CV_TRACE_FUNCTION(); diff --git a/modules/dnn/src/net_impl.cpp b/modules/dnn/src/net_impl.cpp index 3839cba329e37cb89cd014e4597bfa67d6eff74f..5411051484d051d44ed0a531f7c4f50c73284186 100644 --- a/modules/dnn/src/net_impl.cpp +++ b/modules/dnn/src/net_impl.cpp @@ -55,6 +55,7 @@ Net::Impl::Impl() preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT(); preferableTarget = DNN_TARGET_CPU; hasDynamicShapes = false; + useWinograd = true; } @@ -2038,6 +2039,37 @@ void Net::Impl::getMemoryConsumption( } } +void Net::Impl::enableWinograd(bool useWinograd_) +{ + if (useWinograd != useWinograd_) + { + useWinograd = useWinograd_; + + for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++) + { + int lid = it->first; + LayerData &ld = layers[lid]; + Ptr& currLayer = ld.layerInstance; + + if (ld.type == "Convolution") + { + ld.params.set("use_winograd", useWinograd_); + Ptr convLayer = ld.layerInstance.dynamicCast(); + if (!convLayer.empty()) + convLayer->useWinograd = useWinograd_; + } + + if (ld.type == "ConvolutionInt8") + { + Ptr convLayer = currLayer.dynamicCast(); + ld.params.set("use_winograd", useWinograd_); + if (!convLayer.empty()) + convLayer->useWinograd = useWinograd_; + } + } + } +} + // TODO drop? void Net::Impl::getLayerTypes(std::vector& layersTypes) const diff --git a/modules/dnn/src/net_impl.hpp b/modules/dnn/src/net_impl.hpp index 290ce50c134e87f6de98598a5faa09376964437f..08ac1932ca23db3882187d27c73e9ecddc3c2149 100644 --- a/modules/dnn/src/net_impl.hpp +++ b/modules/dnn/src/net_impl.hpp @@ -64,6 +64,7 @@ struct Net::Impl : public detail::NetImplBase bool netWasQuantized; bool fusion; bool isAsync; // FIXIT: drop + bool useWinograd; std::vector layersTimings; @@ -211,6 +212,7 @@ struct Net::Impl : public detail::NetImplBase void enableFusion(bool fusion_); virtual void fuseLayers(const std::vector& blobsToKeep_); + void enableWinograd(bool useWinograd_); void allocateLayers(const std::vector& blobsToKeep_); diff --git a/modules/dnn/src/net_quantization.cpp b/modules/dnn/src/net_quantization.cpp index 0add2d2d79764e3d4294ea57c59fbebee0811fb0..803a240770635d85199ba2f85348be9bba33fe04 100644 --- a/modules/dnn/src/net_quantization.cpp +++ b/modules/dnn/src/net_quantization.cpp @@ -51,6 +51,7 @@ Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype, setPreferableBackend(net, DNN_BACKEND_OPENCV); setPreferableTarget(DNN_TARGET_CPU); enableFusion(false); + enableWinograd(false); if (calibData.isMat()) { diff --git a/modules/dnn/test/test_torch_importer.cpp b/modules/dnn/test/test_torch_importer.cpp index bd9572748bcb3f97f5f32be90b3fb6ed3d6818f1..260e95537d002d5ec01fcfb5c1856bc1e08421e9 100644 --- a/modules/dnn/test/test_torch_importer.cpp +++ b/modules/dnn/test/test_torch_importer.cpp @@ -476,6 +476,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy) ASSERT_TRUE(!net.empty()); } + net.enableWinograd(false); net.setPreferableBackend(backend); net.setPreferableTarget(target);