提交 07446ec9 编写于 作者: A Alexander Alekhin

Merge pull request #2847 from ilya-lavrenov:tapi_pow

...@@ -2114,15 +2114,27 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, ...@@ -2114,15 +2114,27 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
rowsPerWI = d.isIntel() ? 4 : 1; rowsPerWI = d.isIntel() ? 4 : 1;
bool doubleSupport = d.doubleFPConfig() > 0; bool doubleSupport = d.doubleFPConfig() > 0;
_dst.createSameSize(_src, type);
if (is_ipower && (ipower == 0 || ipower == 1))
{
if (ipower == 0)
_dst.setTo(Scalar::all(1));
else if (ipower == 1)
_src.copyTo(_dst);
return true;
}
if (depth == CV_64F && !doubleSupport) if (depth == CV_64F && !doubleSupport)
return false; return false;
bool issqrt = std::abs(power - 0.5) < DBL_EPSILON; bool issqrt = std::abs(power - 0.5) < DBL_EPSILON, nonnegative = power >= 0;
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; const char * const op = issqrt ? "OP_SQRT" : is_ipower ? nonnegative ? "OP_POWN" : "OP_ROOTN" : nonnegative ? "OP_POWR" : "OP_POW";
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D dstT=%s -D rowsPerWI=%d -D %s -D UNARY_OP%s", ocl::typeToStr(depth), format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
rowsPerWI, op, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); ocl::typeToStr(depth), depth, rowsPerWI, op,
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty()) if (k.empty())
return false; return false;
...@@ -2153,11 +2165,12 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, ...@@ -2153,11 +2165,12 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
void pow( InputArray _src, double power, OutputArray _dst ) void pow( InputArray _src, double power, OutputArray _dst )
{ {
bool is_ipower = false, same = false;
int type = _src.type(), depth = CV_MAT_DEPTH(type), int type = _src.type(), depth = CV_MAT_DEPTH(type),
cn = CV_MAT_CN(type), ipower = cvRound(power); cn = CV_MAT_CN(type), ipower = cvRound(power);
bool is_ipower = fabs(ipower - power) < DBL_EPSILON, same = false,
useOpenCL = _dst.isUMat() && _src.dims() <= 2;
if( fabs(ipower - power) < DBL_EPSILON ) if( is_ipower && !(ocl::Device::getDefault().isIntel() && useOpenCL && depth != CV_64F))
{ {
if( ipower < 0 ) if( ipower < 0 )
{ {
...@@ -2179,7 +2192,8 @@ void pow( InputArray _src, double power, OutputArray _dst ) ...@@ -2179,7 +2192,8 @@ void pow( InputArray _src, double power, OutputArray _dst )
return; return;
case 2: case 2:
#if defined(HAVE_IPP) #if defined(HAVE_IPP)
if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) || (_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) )) if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) ||
(_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) ))
{ {
Mat src = _src.getMat(); Mat src = _src.getMat();
_dst.create( src.dims, src.size, type ); _dst.create( src.dims, src.size, type );
...@@ -2207,14 +2221,12 @@ void pow( InputArray _src, double power, OutputArray _dst ) ...@@ -2207,14 +2221,12 @@ void pow( InputArray _src, double power, OutputArray _dst )
else else
multiply(_src, _src, _dst); multiply(_src, _src, _dst);
return; return;
default:
is_ipower = true;
} }
} }
else else
CV_Assert( depth == CV_32F || depth == CV_64F ); CV_Assert( depth == CV_32F || depth == CV_64F );
CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2, CV_OCL_RUN(useOpenCL,
ocl_pow(same ? _dst : _src, power, _dst, is_ipower, ipower)) ocl_pow(same ? _dst : _src, power, _dst, is_ipower, ipower))
Mat src, dst; Mat src, dst;
......
...@@ -266,6 +266,16 @@ ...@@ -266,6 +266,16 @@
#elif defined OP_POW #elif defined OP_POW
#define PROCESS_ELEM storedst(pow(srcelem1, srcelem2)) #define PROCESS_ELEM storedst(pow(srcelem1, srcelem2))
#elif defined OP_ROOTN
#define PROCESS_ELEM storedst(rootn(srcelem1, srcelem2))
#elif defined OP_POWR
#if depth == 5
#define PROCESS_ELEM storedst(native_powr(srcelem1, srcelem2))
#else
#define PROCESS_ELEM storedst(powr(srcelem1, srcelem2))
#endif
#elif defined OP_POWN #elif defined OP_POWN
#undef workT #undef workT
#define workT int #define workT int
...@@ -374,7 +384,7 @@ ...@@ -374,7 +384,7 @@
#if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \ #if defined OP_AND || defined OP_OR || defined OP_XOR || defined OP_ADD || defined OP_SAT_ADD || \
defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \ defined OP_SUB || defined OP_SAT_SUB || defined OP_RSUB || defined OP_SAT_RSUB || \
defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \ defined OP_ABSDIFF || defined OP_CMP || defined OP_MIN || defined OP_MAX || defined OP_POW || \
defined OP_MUL || defined OP_DIV || defined OP_POWN defined OP_MUL || defined OP_DIV || defined OP_POWN || defined OP_POWR || defined OP_ROOTN
#undef EXTRA_PARAMS #undef EXTRA_PARAMS
#define EXTRA_PARAMS , workST srcelem2_ #define EXTRA_PARAMS , workST srcelem2_
#undef srcelem2 #undef srcelem2
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册