提交 eb8b4c58 编写于 作者: I Ilya Lavrenov

fixed bug in cv::ocl::predictOptimalVectorWidth

上级 b73490f8
...@@ -1299,7 +1299,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, ...@@ -1299,7 +1299,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1); int type1 = _src1.type(), depth1 = CV_MAT_DEPTH(type1), cn = CV_MAT_CN(type1);
bool haveMask = !_mask.empty(); bool haveMask = !_mask.empty();
if( ((haveMask || haveScalar) && cn > 4) ) if ( (haveMask || haveScalar) && cn > 4 )
return false; return false;
int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype)); int dtype = _dst.type(), ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32S, CV_MAT_DEPTH(wtype));
...@@ -1320,14 +1320,11 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, ...@@ -1320,14 +1320,11 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
"-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d", "-D convertToWT2=%s -D convertToDT=%s%s -D cn=%d",
(haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"), (haveMask ? "MASK_" : ""), (haveScalar ? "UNARY_OP" : "BINARY_OP"),
oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)), oclop2str[oclop], ocl::typeToStr(CV_MAKETYPE(depth1, kercn)),
ocl::typeToStr(CV_MAKETYPE(depth1, 1)), ocl::typeToStr(depth1), ocl::typeToStr(CV_MAKETYPE(depth2, kercn)),
ocl::typeToStr(CV_MAKETYPE(depth2, kercn)), ocl::typeToStr(depth2), ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(depth2, 1)), ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(ddepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(ddepth, 1)),
ocl::typeToStr(CV_MAKETYPE(wdepth, kercn)),
ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)), ocl::typeToStr(CV_MAKETYPE(wdepth, scalarcn)),
ocl::typeToStr(CV_MAKETYPE(wdepth, 1)), wdepth, ocl::typeToStr(wdepth), wdepth,
ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]), ocl::convertTypeStr(depth1, wdepth, kercn, cvtstr[0]),
ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]), ocl::convertTypeStr(depth2, wdepth, kercn, cvtstr[1]),
ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]), ocl::convertTypeStr(wdepth, ddepth, kercn, cvtstr[2]),
...@@ -1347,7 +1344,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, ...@@ -1347,7 +1344,7 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
} }
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts); ocl::Kernel k("KF", ocl::core::arithm_oclsrc, opts);
if( k.empty() ) if (k.empty())
return false; return false;
UMat src1 = _src1.getUMat(), src2; UMat src1 = _src1.getUMat(), src2;
...@@ -1388,12 +1385,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, ...@@ -1388,12 +1385,12 @@ static bool ocl_arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
if( !haveMask ) if( !haveMask )
{ {
if(n == 0) if (n == 0)
k.args(src1arg, src2arg, dstarg); k.args(src1arg, src2arg, dstarg);
else if(n == 1) else if (n == 1)
k.args(src1arg, src2arg, dstarg, k.args(src1arg, src2arg, dstarg,
ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz)); ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz));
else if(n == 3) else if (n == 3)
k.args(src1arg, src2arg, dstarg, k.args(src1arg, src2arg, dstarg,
ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz), ocl::KernelArg(0, 0, 0, 0, usrdata_p, usrdata_esz),
ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz), ocl::KernelArg(0, 0, 0, 0, usrdata_p + usrdata_esz, usrdata_esz),
......
...@@ -2041,7 +2041,7 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst, ...@@ -2041,7 +2041,7 @@ static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW"; const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
ocl::Kernel k("KF", ocl::core::arithm_oclsrc, ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), format("-D dstT=%s -D %s -D UNARY_OP%s", ocl::typeToStr(depth),
op, doubleSupport ? " -D DOUBLE_SUPPORT" : "")); op, doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty()) if (k.empty())
return false; return false;
...@@ -2081,7 +2081,7 @@ void pow( InputArray _src, double power, OutputArray _dst ) ...@@ -2081,7 +2081,7 @@ void pow( InputArray _src, double power, OutputArray _dst )
{ {
if( ipower < 0 ) if( ipower < 0 )
{ {
divide( 1., _src, _dst ); divide( Scalar::all(1), _src, _dst );
if( ipower == -1 ) if( ipower == -1 )
return; return;
ipower = -ipower; ipower = -ipower;
...@@ -2115,10 +2115,7 @@ void pow( InputArray _src, double power, OutputArray _dst ) ...@@ -2115,10 +2115,7 @@ void pow( InputArray _src, double power, OutputArray _dst )
Mat src, dst; Mat src, dst;
if (same) if (same)
{ src = dst = _dst.getMat();
dst = _dst.getMat();
src = dst;
}
else else
{ {
src = _src.getMat(); src = _src.getMat();
......
...@@ -4347,7 +4347,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, ...@@ -4347,7 +4347,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
InputArray src4, InputArray src5, InputArray src6, InputArray src4, InputArray src5, InputArray src6,
InputArray src7, InputArray src8, InputArray src9) InputArray src7, InputArray src8, InputArray src9)
{ {
int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), esz = CV_ELEM_SIZE(depth);
Size ssize = src1.size(); Size ssize = src1.size();
const ocl::Device & d = ocl::Device::getDefault(); const ocl::Device & d = ocl::Device::getDefault();
...@@ -4371,7 +4371,8 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, ...@@ -4371,7 +4371,8 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
PROCESS_SRC(src9); PROCESS_SRC(src9);
size_t size = offsets.size(); size_t size = offsets.size();
std::vector<int> dividers(size, width); int wsz = width * esz;
std::vector<int> dividers(size, wsz);
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0) while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
...@@ -4379,7 +4380,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, ...@@ -4379,7 +4380,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
// default strategy // default strategy
for (size_t i = 0; i < size; ++i) for (size_t i = 0; i < size; ++i)
if (dividers[i] != width) if (dividers[i] != wsz)
{ {
width = 1; width = 1;
break; break;
......
...@@ -839,7 +839,7 @@ OCL_TEST_P(Pow, Mat) ...@@ -839,7 +839,7 @@ OCL_TEST_P(Pow, Mat)
OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi)); OCL_OFF(cv::pow(src1_roi, pows[k], dst1_roi));
OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi)); OCL_ON(cv::pow(usrc1_roi, pows[k], udst1_roi));
OCL_EXPECT_MATS_NEAR_RELATIVE(dst1, 1e-6); OCL_EXPECT_MATS_NEAR_RELATIVE(dst1, 1e-5);
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册