提交 89eee6ca 编写于 作者: P Pavel Vlasov

Fixes for IPP integration:

dotProd_16s - disabled for IPP 9.0.0;
filter2D - fixed kernel preparation;
morphology - conditions fix and disabled FilterMin and FilterMax for IPP 9.0.0;
GaussianBlur - disabled for CV_8UC1 due to buffer overflow;
integral - disabled for IPP 9.0.0;

IppAutoBuffer class was added;
上级 441eeef3
......@@ -241,6 +241,26 @@ static inline IppDataType ippiGetDataType(int depth)
depth == CV_64F ? ipp64f : (IppDataType)-1;
}
// IPP temporary buffer hepler
template<typename T>
class IppAutoBuffer
{
public:
IppAutoBuffer() { m_pBuffer = NULL; }
IppAutoBuffer(int size) { Alloc(size); }
~IppAutoBuffer() { Release(); }
T* Alloc(int size) { m_pBuffer = (T*)ippMalloc(size); return m_pBuffer; }
void Release() { if(m_pBuffer) ippFree(m_pBuffer); }
inline operator T* () { return (T*)m_pBuffer;}
inline operator const T* () const { return (const T*)m_pBuffer;}
private:
// Disable copy operations
IppAutoBuffer(IppAutoBuffer &) {};
IppAutoBuffer& operator =(const IppAutoBuffer &) {return *this;};
T* m_pBuffer;
};
#else
#define IPP_VERSION_X100 0
#endif
......
......@@ -3131,7 +3131,7 @@ static double dotProd_16u(const ushort* src1, const ushort* src2, int len)
static double dotProd_16s(const short* src1, const short* src2, int len)
{
#if (ARITHM_USE_IPP == 1)
#if (ARITHM_USE_IPP == 1) && (IPP_VERSION_X100 != 900) // bug in IPP 9.0.0
CV_IPP_CHECK()
{
double r = 0;
......
......@@ -1318,6 +1318,12 @@ public:
ippFeatures = ippCPUID_SSE;
else if(env == "sse2")
ippFeatures = ippCPUID_SSE2;
else if(env == "sse3")
ippFeatures = ippCPUID_SSE3;
else if(env == "ssse3")
ippFeatures = ippCPUID_SSSE3;
else if(env == "sse41")
ippFeatures = ippCPUID_SSE41;
else if(env == "sse42")
ippFeatures = ippCPUID_SSE42;
else if(env == "avx")
......
......@@ -4579,7 +4579,7 @@ static bool ipp_filter2D( InputArray _src, OutputArray _dst, int ddepth,
int stype = src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
ktype = kernel.type(), kdepth = CV_MAT_DEPTH(ktype);
bool isolated = (borderType & BORDER_ISOLATED) != 0;
Point ippAnchor(kernel.cols >> 1, kernel.rows >> 1);
Point ippAnchor((kernel.cols-1)/2, (kernel.rows-1)/2);
int borderTypeNI = borderType & ~BORDER_ISOLATED;
IppiBorderType ippBorderType = ippiGetBorderType(borderTypeNI);
......@@ -4610,24 +4610,48 @@ static bool ipp_filter2D( InputArray _src, OutputArray _dst, int ddepth,
if ((status = ippiFilterBorderGetSize(kernelSize, dstRoiSize, dataType, kernelType, cn, &specSize, &bufsize)) >= 0)
{
IppiFilterBorderSpec * spec = (IppiFilterBorderSpec *)ippMalloc(specSize);
Ipp8u * buffer = ippsMalloc_8u(bufsize);
IppAutoBuffer<IppiFilterBorderSpec> spec(specSize);
IppAutoBuffer<Ipp8u> buffer(bufsize);
Ipp32f borderValue[4] = { 0, 0, 0, 0 };
Mat reversedKernel;
flip(kernel, reversedKernel, -1);
if ((kdepth == CV_32F && (status = ippiFilterBorderInit_32f((const Ipp32f *)reversedKernel.data, kernelSize,
dataType, cn, ippRndFinancial, spec)) >= 0 ) ||
(kdepth == CV_16S && (status = ippiFilterBorderInit_16s((const Ipp16s *)reversedKernel.data,
kernelSize, 0, dataType, cn, ippRndFinancial, spec)) >= 0))
if(kdepth == CV_32F)
{
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
ippBorderType, borderValue, spec, buffer);
Ipp32f *pKerBuffer = (Ipp32f*)kernel.data;
IppAutoBuffer<Ipp32f> kerTmp;
if(kernel.step != kernel.cols)
{
kerTmp.Alloc(sizeof(Ipp32f)*kernelSize.width*kernelSize.height);
if(ippiCopy_32f_C1R((Ipp32f*)kernel.data, (int)kernel.step, kerTmp, kernelSize.width*sizeof(Ipp32f), kernelSize) < 0)
return false;
pKerBuffer = kerTmp;
}
if((status = ippiFilterBorderInit_32f(pKerBuffer, kernelSize,
dataType, cn, ippRndFinancial, spec)) >= 0 )
{
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
ippBorderType, borderValue, spec, buffer);
}
}
else if(kdepth == CV_16S)
{
Ipp16s *pKerBuffer = (Ipp16s*)kernel.data;
IppAutoBuffer<Ipp16s> kerTmp;
if(kernel.step != kernel.cols)
{
kerTmp.Alloc(sizeof(Ipp16s)*kernelSize.width*kernelSize.height);
if(ippiCopy_16s_C1R((Ipp16s*)kernel.data, (int)kernel.step, kerTmp, kernelSize.width*sizeof(Ipp16s), kernelSize) < 0)
return false;
pKerBuffer = kerTmp;
}
ippsFree(buffer);
ippsFree(spec);
if((status = ippiFilterBorderInit_16s(pKerBuffer, kernelSize,
0, dataType, cn, ippRndFinancial, spec)) >= 0)
{
status = ippFunc(src.data, (int)src.step, dst.data, (int)dst.step, dstRoiSize,
ippBorderType, borderValue, spec, buffer);
}
}
}
if (status >= 0)
......
......@@ -1231,17 +1231,18 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
}
else
{
#if IPP_VERSION_X100 != 900 // Problems with accuracy in 9.0.0
#if IPP_VERSION_X100 >= 900
if (((kernel.cols - 1) / 2 != anchor.x) || ((kernel.rows - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0
if (((kernelSize.width - 1) / 2 != anchor.x) || ((kernelSize.height - 1) / 2 != anchor.y)) // Arbitrary anchor is no longer supporeted since IPP 9.0.0
return false;
#define IPP_MORPH_CASE(cvtype, flavor, data_type) \
#define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \
case cvtype: \
{\
if (op == MORPH_ERODE)\
{\
int bufSize = 0;\
if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, 1, &bufSize))\
if (0 > ippiFilterMinBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\
return false;\
AutoBuffer<uchar> buf(bufSize + 64);\
uchar* buffer = alignPtr((uchar*)buf, 32);\
......@@ -1250,7 +1251,7 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
else\
{\
int bufSize = 0;\
if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, 1, &bufSize))\
if (0 > ippiFilterMaxBorderGetBufferSize(roiSize, kernelSize, ipp##data_type, cn, &bufSize))\
return false;\
AutoBuffer<uchar> buf(bufSize + 64);\
uchar* buffer = alignPtr((uchar*)buf, 32);\
......@@ -1261,7 +1262,7 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
#else
IppiPoint point = {anchor.x, anchor.y};
#define IPP_MORPH_CASE(cvtype, flavor, data_type) \
#define IPP_MORPH_CASE(cvtype, flavor, data_type, cn) \
case cvtype: \
{\
int bufSize = 0;\
......@@ -1279,17 +1280,18 @@ static bool ipp_MorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kern
CV_SUPPRESS_DEPRECATED_START
switch (type)
{
IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u);
IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u);
IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u);
IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f);
IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f);
IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f);
IPP_MORPH_CASE(CV_8UC1, 8u_C1R, 8u, 1);
IPP_MORPH_CASE(CV_8UC3, 8u_C3R, 8u, 3);
IPP_MORPH_CASE(CV_8UC4, 8u_C4R, 8u, 4);
IPP_MORPH_CASE(CV_32FC1, 32f_C1R, 32f, 1);
IPP_MORPH_CASE(CV_32FC3, 32f_C3R, 32f, 3);
IPP_MORPH_CASE(CV_32FC4, 32f_C4R, 32f, 4);
default:
;
}
CV_SUPPRESS_DEPRECATED_END
#undef IPP_MORPH_CASE
#endif
}
#else
CV_UNUSED(op); CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(kernel); CV_UNUSED(ksize); CV_UNUSED(anchor); CV_UNUSED(rectKernel);
......
......@@ -1695,32 +1695,33 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
if (ippiFilterGaussianGetBufferSize(roiSize, (Ipp32u)ksize.width, dataType, cn, &specSize, &bufferSize) >= 0)
{
IppFilterGaussianSpec * pSpec = (IppFilterGaussianSpec *)ippMalloc(specSize);
Ipp8u * pBuffer = (Ipp8u*)ippMalloc(bufferSize);
IppAutoBuffer<IppFilterGaussianSpec> spec(specSize);
IppAutoBuffer<Ipp8u> buffer(bufferSize);
if (ippiFilterGaussianInit(roiSize, (Ipp32u)ksize.width, (Ipp32f)sigma1, ippBorder, dataType, 1, pSpec, pBuffer) >= 0)
if (ippiFilterGaussianInit(roiSize, (Ipp32u)ksize.width, (Ipp32f)sigma1, ippBorder, dataType, cn, spec, buffer) >= 0)
{
#define IPP_FILTER_GAUSS_C1(ippfavor) \
{ \
typedef Ipp##ippfavor ippType; \
ippType borderValues = 0; \
status = ippiFilterGaussianBorder_##ippfavor##_C1R(src.ptr<ippType>(), (int)src.step, \
dst.ptr<ippType>(), (int)dst.step, roiSize, borderValues, pSpec, pBuffer); \
Ipp##ippfavor borderValues = 0; \
status = ippiFilterGaussianBorder_##ippfavor##_C1R(src.ptr<Ipp##ippfavor>(), (int)src.step, \
dst.ptr<Ipp##ippfavor>(), (int)dst.step, roiSize, borderValues, spec, buffer); \
}
#define IPP_FILTER_GAUSS_CN(ippfavor, ippcn) \
{ \
typedef Ipp##ippfavor ippType; \
ippType borderValues[] = { 0, 0, 0 }; \
status = ippiFilterGaussianBorder_##ippfavor##_C##ippcn##R(src.ptr<ippType>(), (int)src.step, \
dst.ptr<ippType>(), (int)dst.step, roiSize, borderValues, pSpec, pBuffer); \
Ipp##ippfavor borderValues[] = { 0, 0, 0 }; \
status = ippiFilterGaussianBorder_##ippfavor##_C##ippcn##R(src.ptr<Ipp##ippfavor>(), (int)src.step, \
dst.ptr<Ipp##ippfavor>(), (int)dst.step, roiSize, borderValues, spec, buffer); \
}
IppStatus status = ippStsErr;
#if !HAVE_ICV
#if IPP_VERSION_X100 > 901 // Buffer overflow in IPP
if (type == CV_8UC1)
IPP_FILTER_GAUSS_C1(8u)
else if (type == CV_8UC3)
else
#endif
if (type == CV_8UC3)
IPP_FILTER_GAUSS_CN(8u, 3)
else if (type == CV_16UC1)
IPP_FILTER_GAUSS_C1(16u)
......@@ -1737,11 +1738,6 @@ static bool ipp_GaussianBlur( InputArray _src, OutputArray _dst, Size ksize,
if (type == CV_32FC1)
IPP_FILTER_GAUSS_C1(32f)
if (pSpec)
ippFree(pSpec);
if (pBuffer)
ippFree(pBuffer);
if(status >= 0)
return true;
......
......@@ -425,7 +425,7 @@ namespace cv
{
static bool ipp_integral(InputArray _src, OutputArray _sum, OutputArray _sqsum, OutputArray _tilted, int sdepth, int sqdepth)
{
#if !defined(HAVE_IPP_ICV_ONLY) // Disabled on ICV due invalid results
#if !defined(HAVE_IPP_ICV_ONLY) && (IPP_VERSION_X100 != 900) // Disabled on ICV due invalid results
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
if( sdepth <= 0 )
sdepth = depth == CV_8U ? CV_32S : CV_64F;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册