未验证 提交 ec265417 编写于 作者: A Alexander Smorkalov 提交者: GitHub

Merge pull request #22577 from zihaomu:Disable_winograd_branch_in_tryquantize

DNN: add enableWinograd API for Net
......@@ -259,6 +259,7 @@ CV__DNN_INLINE_NS_BEGIN
bool fusedActivation = false;
bool fusedAdd = false;
bool isConv2D = false; // Should be deleted after fastconv branch support Conv1D and Conv3D.
bool useWinograd = false; // Flag whether to use Winograd to speed up 3x3 convolution.
};
class CV_EXPORTS ConvolutionLayerInt8 : public BaseConvolutionLayer
......@@ -270,6 +271,7 @@ CV__DNN_INLINE_NS_BEGIN
// quantization type flag. The perChannel default is true, that means it contains the parameters
// of per-Channel quantization. Otherwise, that means this layer contains per-Tensor quantized parameters.
bool per_channel;
bool useWinograd = true; // Flag whether to use Winograd to speed up 3x3 convolution.
static Ptr<BaseConvolutionLayer> create(const LayerParams& params);
};
......
......@@ -837,6 +837,12 @@ CV__DNN_INLINE_NS_BEGIN
*/
CV_WRAP void enableFusion(bool fusion);
/** @brief Enables or disables the Winograd compute branch. The Winograd compute branch can speed up
* 3x3 Convolution at a small loss of accuracy.
* @param useWinograd true to enable the Winograd compute branch. The default is true.
*/
CV_WRAP void enableWinograd(bool useWinograd);
/** @brief Returns overall time for inference and timings (in ticks) for layers.
*
* Indexes in returned vector correspond to layers ids. Some layers can be fused with others,
......
......@@ -41,7 +41,7 @@ public:
BaseConvolutionLayerInt8Impl(const LayerParams &params)
{
setParamsFrom(params);
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads);
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads, useWinograd);
numOutput = params.get<int>("num_output");
int ngroups = params.get<int>("group", 1);
......
......@@ -23,7 +23,7 @@ namespace dnn
{
void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
cv::String &padMode, std::vector<size_t>& adjust_pads);
cv::String &padMode, std::vector<size_t>& adjust_pads, bool& useWinograd);
void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
......
......@@ -89,7 +89,8 @@ public:
BaseConvolutionLayerImpl(const LayerParams &params)
{
setParamsFrom(params);
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations, padMode, adjust_pads);
getConvolutionKernelParams(params, kernel_size, pads_begin, pads_end, strides, dilations,
padMode, adjust_pads, useWinograd);
numOutput = params.get<int>("num_output");
int ngroups = params.get<int>("group", 1);
......@@ -2112,7 +2113,7 @@ public:
int dilation_w = dilations.back();
fastConv2dImpl = initFastConv2d(ngroups, K, C, Hk, Wk, stride_w, stride_h, dilation_w,
dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0]);
dilation_h, pads_begin, pads_end, weightsMat, &biasvec[0], useWinograd);
}
if (fastConv2dImpl)
......
......@@ -23,7 +23,8 @@ Ptr<FastConv2d> initFastConv2d(
const std::vector<size_t>& pads_begin,
const std::vector<size_t>& pads_end,
InputArray _weightsMat,
float* srcBias)
float* srcBias,
bool useWinograd)
{
Ptr<FastConv2d> conv = makePtr<FastConv2d>();
......@@ -48,11 +49,11 @@ Ptr<FastConv2d> initFastConv2d(
const size_t wstep = weightsMat.step1();
#if CV_NEON // For now, winograd is ARM platform only.
if (ngroups == 1 && Hk ==3 && Wk == 3 && stride_x == 1 && stride_y == 1 &&
if (useWinograd && ngroups == 1 && Hk ==3 && Wk == 3 && stride_x == 1 && stride_y == 1 &&
dilation_x == 1 && dilation_y ==1 && K >= 16 && C >= 16)
conv->ifWinograd63 = true;
conv->useWinograd63 = true;
#else
conv->ifWinograd63 = false;
conv->useWinograd63 = false;
#endif
float *srcWeights = (float *)weightsMat.data;
......@@ -115,7 +116,7 @@ Ptr<FastConv2d> initFastConv2d(
}});
// Prepare Weight for Winograd F(6x6, 3x3)
if (conv->ifWinograd63)
if (conv->useWinograd63)
{
initWinograd63(conv, weightsMat, K, C);
}
......@@ -191,10 +192,7 @@ void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>
}
#if CV_NEON
if (conv->ifWinograd63
&& inputShape[2] > 12 && inputShape[3] > 12
&& inputShape[2] < 120 && inputShape[3] < 120
)
if (conv->useWinograd63 && inputShape[2] > 12 && inputShape[3] > 12)
{
if (runWinograd63(input, fusedAddMat, output, conv, ntasks, minval, maxval, activ, ifMinMaxAct))
return;
......
......@@ -44,7 +44,7 @@ struct FastConv2d
std::vector<float> weightsBuf; // For generic Conv 2D
std::vector<float> weightsWino63Buf; // For Winograd F(6x6, 3x3).
std::vector<float> biasBuf;
bool ifWinograd63 = false;
bool useWinograd63 = false;
bool useAVX2 = checkHardwareSupport(CPU_AVX2);
bool useNEON = checkHardwareSupport(CPU_NEON);
};
......@@ -58,7 +58,7 @@ Ptr<FastConv2d> initFastConv2d(
const std::vector<size_t>& pads_begin,
const std::vector<size_t>& pads_end,
InputArray weightsMat,
float* srcBias);
float* srcBias, bool useWinograd);
// It contains different computing branches, like winograd, 1x1 conv.
void runFastConv2d(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks,
......
......@@ -1689,7 +1689,7 @@ int runWinograd63(InputArray _input, InputArray _fusedAddMat, OutputArray _outpu
void initWinograd63(Ptr<FastConv2d>& conv, InputArray _weightsMat, int K, int C)
{
conv->ifWinograd63 = false;
conv->useWinograd63 = false;
}
int runWinograd63(InputArray _input, OutputArray _output, const Ptr<FastConv2d>& conv, int ntasks, float minval, float maxval, ActivationLayer* activ, bool ifMinMaxAct)
......
......@@ -187,12 +187,14 @@ void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kern
void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides,
std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads)
std::vector<size_t>& dilations, cv::String &padMode, std::vector<size_t>& adjust_pads,
bool& useWinograd)
{
util::getKernelSize(params, kernel);
util::getStrideAndPadding(params, pads_begin, pads_end, strides, padMode, kernel.size());
util::getParameter(params, "dilation", "dilation", dilations, true, std::vector<size_t>(kernel.size(), 1));
util::getParameter(params, "adj", "adj", adjust_pads, true, std::vector<size_t>(kernel.size(), 0));
useWinograd = params.get<bool>("use_winograd", true);
for (int i = 0; i < dilations.size(); i++)
CV_Assert(dilations[i] > 0);
......
......@@ -61,7 +61,7 @@ namespace dnn
{
void getConvolutionKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<size_t>& pads_begin,
std::vector<size_t>& pads_end, std::vector<size_t>& strides, std::vector<size_t>& dilations,
cv::String &padMode, std::vector<size_t>& adjust_pads);
cv::String &padMode, std::vector<size_t>& adjust_pads, bool& useWinograd);
void getPoolingKernelParams(const LayerParams &params, std::vector<size_t>& kernel, std::vector<bool>& globalPooling,
std::vector<size_t>& pads_begin, std::vector<size_t>& pads_end, std::vector<size_t>& strides, cv::String &padMode);
......
......@@ -395,6 +395,13 @@ void Net::enableFusion(bool fusion)
return impl->enableFusion(fusion);
}
void Net::enableWinograd(bool useWinograd)
{
CV_TRACE_FUNCTION();
CV_Assert(impl);
return impl->enableWinograd(useWinograd);
}
void Net::setHalideScheduler(const String& scheduler)
{
CV_TRACE_FUNCTION();
......
......@@ -55,6 +55,7 @@ Net::Impl::Impl()
preferableBackend = (Backend)getParam_DNN_BACKEND_DEFAULT();
preferableTarget = DNN_TARGET_CPU;
hasDynamicShapes = false;
useWinograd = true;
}
......@@ -2038,6 +2039,37 @@ void Net::Impl::getMemoryConsumption(
}
}
void Net::Impl::enableWinograd(bool useWinograd_)
{
if (useWinograd != useWinograd_)
{
useWinograd = useWinograd_;
for (MapIdToLayerData::const_iterator it = layers.begin(); it != layers.end(); it++)
{
int lid = it->first;
LayerData &ld = layers[lid];
Ptr<Layer>& currLayer = ld.layerInstance;
if (ld.type == "Convolution")
{
ld.params.set("use_winograd", useWinograd_);
Ptr<ConvolutionLayer> convLayer = ld.layerInstance.dynamicCast<ConvolutionLayer>();
if (!convLayer.empty())
convLayer->useWinograd = useWinograd_;
}
if (ld.type == "ConvolutionInt8")
{
Ptr<ConvolutionLayerInt8> convLayer = currLayer.dynamicCast<ConvolutionLayerInt8>();
ld.params.set("use_winograd", useWinograd_);
if (!convLayer.empty())
convLayer->useWinograd = useWinograd_;
}
}
}
}
// TODO drop?
void Net::Impl::getLayerTypes(std::vector<String>& layersTypes) const
......
......@@ -64,6 +64,7 @@ struct Net::Impl : public detail::NetImplBase
bool netWasQuantized;
bool fusion;
bool isAsync; // FIXIT: drop
bool useWinograd;
std::vector<int64> layersTimings;
......@@ -211,6 +212,7 @@ struct Net::Impl : public detail::NetImplBase
void enableFusion(bool fusion_);
virtual void fuseLayers(const std::vector<LayerPin>& blobsToKeep_);
void enableWinograd(bool useWinograd_);
void allocateLayers(const std::vector<LayerPin>& blobsToKeep_);
......
......@@ -51,6 +51,7 @@ Net Net::Impl::quantize(Net& net, InputArrayOfArrays calibData, int inputsDtype,
setPreferableBackend(net, DNN_BACKEND_OPENCV);
setPreferableTarget(DNN_TARGET_CPU);
enableFusion(false);
enableWinograd(false);
if (calibData.isMat())
{
......
......@@ -476,6 +476,7 @@ TEST_P(Test_Torch_nets, ENet_accuracy)
ASSERT_TRUE(!net.empty());
}
net.enableWinograd(false);
net.setPreferableBackend(backend);
net.setPreferableTarget(target);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册