diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index 363c76f0d2b223d7e6c41e9fe7abae6e5386619c..c71ec626e67ccf212bf57efcb1859bd0a5aad32e 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -241,6 +241,26 @@ static inline IppDataType ippiGetDataType(int depth) depth == CV_64F ? ipp64f : (IppDataType)-1; } +// IPP temporary buffer hepler +template +class IppAutoBuffer +{ +public: + IppAutoBuffer() { m_pBuffer = NULL; } + IppAutoBuffer(int size) { Alloc(size); } + ~IppAutoBuffer() { Release(); } + T* Alloc(int size) { m_pBuffer = (T*)ippMalloc(size); return m_pBuffer; } + void Release() { if(m_pBuffer) ippFree(m_pBuffer); } + inline operator T* () { return (T*)m_pBuffer;} + inline operator const T* () const { return (const T*)m_pBuffer;} +private: + // Disable copy operations + IppAutoBuffer(IppAutoBuffer &) {}; + IppAutoBuffer& operator =(const IppAutoBuffer &) {return *this;}; + + T* m_pBuffer; +}; + #else #define IPP_VERSION_X100 0 #endif diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index cb639462f9e42a9bab1d3b22f00f898812769a69..99f6bf95effd14d5dd0f018ba87b5deafcb3d63b 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -3131,7 +3131,7 @@ static double dotProd_16u(const ushort* src1, const ushort* src2, int len) static double dotProd_16s(const short* src1, const short* src2, int len) { -#if (ARITHM_USE_IPP == 1) +#if (ARITHM_USE_IPP == 1) && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0 CV_IPP_CHECK() { double r = 0; diff --git a/modules/core/src/system.cpp b/modules/core/src/system.cpp index 81d555d81e4aa792ec605ded7ca3b0df579ca8f4..8c0072aed2a43c6d8264dc9c386eb51007d96fa8 100644 --- a/modules/core/src/system.cpp +++ b/modules/core/src/system.cpp @@ -1318,6 +1318,12 @@ public: ippFeatures = ippCPUID_SSE; else if(env == "sse2") ippFeatures = ippCPUID_SSE2; + else if(env == "sse3") + ippFeatures = ippCPUID_SSE3; + else if(env == "ssse3") + ippFeatures = ippCPUID_SSSE3; + else if(env == "sse41") + ippFeatures = ippCPUID_SSE41; else if(env == "sse42") ippFeatures = ippCPUID_SSE42; else if(env == "avx") diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 587c50ba35068de46529252acb840e83b52e6795..5eec45f9f379a18724d45c53a8e10fa85f030ab1 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -4579,7 +4579,11 @@ static bool ipp_filter2D( InputArray _src, OutputArray _dst, int ddepth, int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ktype = kernel.type(), kdepth = CV_MAT_DEPTH(ktype); bool isolated = (borderType & BORDER_ISOLATED) != 0; +#if IPP_VERSION_X100 >= 900 + Point ippAnchor((kernel.cols-1)/2, (kernel.rows-1)/2); +#else Point ippAnchor(kernel.cols >> 1, kernel.rows >> 1); +#endif int borderTypeNI = borderType & ~BORDER_ISOLATED; IppiBorderType ippBorderType = ippiGetBorderType(borderTypeNI); @@ -4610,24 +4614,64 @@ static bool ipp_filter2D( InputArray _src, OutputArray _dst, int ddepth, if ((status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize)) >= 0) { - IppiFilterBorderSpec * spec = (IppiFilterBorderSpec *)ippMalloc(specSize); - Ipp8u * buffer = ippsMalloc_8u(bufsize); + IppAutoBuffer spec(specSize); + IppAutoBuffer buffer(bufsize); Ipp32f borderValue[4] = { 0, 0, 0, 0 }; - Mat reversedKernel; - flip(kernel, reversedKernel, -1); - - if ((kdepth == CV_32F && (status = ippiFilterBorderInit_32f((const Ipp32f *)reversedKernel.data, kernelSize, - dataType, cn, ippRndFinancial, spec)) >= 0 ) || - (kdepth == CV_16S && (status = ippiFilterBorderInit_16s((const Ipp16s *)reversedKernel.data, - kernelSize, 0, dataType, cn, ippRndFinancial, spec)) >= 0)) + if(kdepth == CV_32F) { - status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize, - ippBorderType, borderValue, spec, buffer); + Ipp32f *pKerBuffer = (Ipp32f*)kernel.data; + IppAutoBuffer kerTmp; + int kerStep = sizeof(Ipp32f)*kernelSize.width; +#if IPP_VERSION_X100 >= 900 + if(kernel.step != kerStep) + { + kerTmp.Alloc(kerStep*kernelSize.height); + if(ippiCopy_32f_C1R((Ipp32f*)kernel.data, (int)kernel.step, kerTmp, kerStep, kernelSize) < 0) + return false; + pKerBuffer = kerTmp; + } +#else + kerTmp.Alloc(kerStep*kernelSize.height); + Mat kerFlip(Size(kernelSize.width, kernelSize.height), CV_32FC1, kerTmp, kerStep); + flip(kernel, kerFlip, -1); + pKerBuffer = kerTmp; +#endif + + if((status = ippiFilterBorderInit_32f(pKerBuffer, kernelSize, + dataType, cn, ippRndFinancial, spec)) >= 0 ) + { + status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize, + ippBorderType, borderValue, spec, buffer); + } } + else if(kdepth == CV_16S) + { + Ipp16s *pKerBuffer = (Ipp16s*)kernel.data; + IppAutoBuffer kerTmp; + int kerStep = sizeof(Ipp16s)*kernelSize.width; +#if IPP_VERSION_X100 >= 900 + if(kernel.step != kerStep) + { + kerTmp.Alloc(kerStep*kernelSize.height); + if(ippiCopy_16s_C1R((Ipp16s*)kernel.data, (int)kernel.step, kerTmp, kerStep, kernelSize) < 0) + return false; + pKerBuffer = kerTmp; + } +#else + kerTmp.Alloc(kerStep*kernelSize.height); + Mat kerFlip(Size(kernelSize.width, kernelSize.height), CV_16SC1, kerTmp, kerStep); + flip(kernel, kerFlip, -1); + pKerBuffer = kerTmp; +#endif - ippsFree(buffer); - ippsFree(spec); + if((status = ippiFilterBorderInit_16s(pKerBuffer, kernelSize, + 0, dataType, cn, ippRndFinancial, spec)) >= 0) + { + status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize, + ippBorderType, borderValue, spec, buffer); + } + } } if (status >= 0) diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 1ec1d25bf63bfc0fbfd2b58c467483a2541dbc5a..be55e356153345d93fcecda9ef28b6a09215737a 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1231,17 +1231,18 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern } else { +#if IPP_VERSION_X100 != 900 // Problems with accuracy in 9.0.0 #if IPP_VERSION_X100 >= 900 - if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0 + if (((kernelSize.width - 1) / 2 != anchor.x) || ((kernelSize.height - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0 return false; - #define IPP_MORPH_CASE(cvtype, flavor, data_type) \ + #define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \ case cvtype: \ {\ if (op == MORPH_ERODE)\ {\ int bufSize = 0;\ - if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, 1, &bufSize))\ + if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\ return false;\ AutoBuffer buf(bufSize + 64);\ uchar* buffer = alignPtr((uchar*)buf, 32);\ @@ -1250,7 +1251,7 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern else\ {\ int bufSize = 0;\ - if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, 1, &bufSize))\ + if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\ return false;\ AutoBuffer buf(bufSize + 64);\ uchar* buffer = alignPtr((uchar*)buf, 32);\ @@ -1261,7 +1262,7 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern #else IppiPoint point = {anchor.x, anchor.y}; - #define IPP_MORPH_CASE(cvtype, flavor, data_type) \ + #define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \ case cvtype: \ {\ int bufSize = 0;\ @@ -1279,17 +1280,18 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern CV_SUPPRESS_DEPRECATED_START switch (type) { - IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u); - IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u); - IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u); - IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f); - IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f); - IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f); + IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u, 1); + IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u, 3); + IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u, 4); + IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f, 1); + IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f, 3); + IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f, 4); default: ; } CV_SUPPRESS_DEPRECATED_END #undef IPP_MORPH_CASE +#endif } #else CV_UNUSED(op); CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(kernel); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(rectKernel); diff --git a/modules/imgproc/src/smooth.cpp b/modules/imgproc/src/smooth.cpp index 19b0bf2d23c3ed60bb36a39a9ed9d9325b00b150..9fa92037c11b5ce74d893fa6ba0291339af19a59 100644 --- a/modules/imgproc/src/smooth.cpp +++ b/modules/imgproc/src/smooth.cpp @@ -1695,32 +1695,33 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize, if (ippiFilterGaussianGetBufferSize(roiSize, (Ipp32u)ksize.width, dataType, cn, &specSize, &bufferSize) >= 0) { - IppFilterGaussianSpec * pSpec = (IppFilterGaussianSpec *)ippMalloc(specSize); - Ipp8u * pBuffer = (Ipp8u*)ippMalloc(bufferSize); + IppAutoBuffer spec(specSize); + IppAutoBuffer buffer(bufferSize); - if (ippiFilterGaussianInit(roiSize, (Ipp32u)ksize.width, (Ipp32f)sigma1, ippBorder, dataType, 1, pSpec, pBuffer) >= 0) + if (ippiFilterGaussianInit(roiSize, (Ipp32u)ksize.width, (Ipp32f)sigma1, ippBorder, dataType, cn, spec, buffer) >= 0) { #define IPP_FILTER_GAUSS_C1(ippfavor) \ { \ - typedef Ipp##ippfavor ippType; \ - ippType borderValues = 0; \ - status = ippiFilterGaussianBorder_##ippfavor##_C1R(src.ptr(), (int)src.step, \ - dst.ptr(), (int)dst.step, roiSize, borderValues, pSpec, pBuffer); \ + Ipp##ippfavor borderValues = 0; \ + status = ippiFilterGaussianBorder_##ippfavor##_C1R(src.ptr(), (int)src.step, \ + dst.ptr(), (int)dst.step, roiSize, borderValues, spec, buffer); \ } #define IPP_FILTER_GAUSS_CN(ippfavor, ippcn) \ { \ - typedef Ipp##ippfavor ippType; \ - ippType borderValues[] = { 0, 0, 0 }; \ - status = ippiFilterGaussianBorder_##ippfavor##_C##ippcn##R(src.ptr(), (int)src.step, \ - dst.ptr(), (int)dst.step, roiSize, borderValues, pSpec, pBuffer); \ + Ipp##ippfavor borderValues[] = { 0, 0, 0 }; \ + status = ippiFilterGaussianBorder_##ippfavor##_C##ippcn##R(src.ptr(), (int)src.step, \ + dst.ptr(), (int)dst.step, roiSize, borderValues, spec, buffer); \ } IppStatus status = ippStsErr; #if !HAVE_ICV +#if IPP_VERSION_X100 > 901 // Buffer overflow in IPP if (type == CV_8UC1) IPP_FILTER_GAUSS_C1(8u) - else if (type == CV_8UC3) + else +#endif + if (type == CV_8UC3) IPP_FILTER_GAUSS_CN(8u, 3) else if (type == CV_16UC1) IPP_FILTER_GAUSS_C1(16u) @@ -1737,11 +1738,6 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize, if (type == CV_32FC1) IPP_FILTER_GAUSS_C1(32f) - if (pSpec) - ippFree(pSpec); - if (pBuffer) - ippFree(pBuffer); - if(status >= 0) return true; diff --git a/modules/imgproc/src/sumpixels.cpp b/modules/imgproc/src/sumpixels.cpp index 8a667d291217ac9d445d8ee3b8a5eb56ac1295dc..7771d2cab4d8fb75c94adb1ba627c83de7fd38fb 100755 --- a/modules/imgproc/src/sumpixels.cpp +++ b/modules/imgproc/src/sumpixels.cpp @@ -425,7 +425,7 @@ namespace cv { static bool ipp_integral(InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth) { -#if !defined(HAVE_IPP_ICV_ONLY) // Disabled on ICV due invalid results +#if !defined(HAVE_IPP_ICV_ONLY) && (IPP_VERSION_X100 != 900) // Disabled on ICV due invalid results int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); if( sdepth <= 0 ) sdepth = depth == CV_8U ? CV_32S : CV_64F;