From e4608229f0de65aad3917b8eef958803f3a12d98 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Thu, 27 Feb 2014 14:49:36 +0400 Subject: [PATCH] ocl: copyMakeBorder 3-channel --- modules/core/src/copy.cpp | 18 +++++++--- modules/core/src/opencl/copymakeborder.cl | 37 +++++++++++++++----- modules/imgproc/perf/opencl/perf_imgproc.cpp | 2 +- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/modules/core/src/copy.cpp b/modules/core/src/copy.cpp index 2e72d0d33f..e3e959c950 100644 --- a/modules/core/src/copy.cpp +++ b/modules/core/src/copy.cpp @@ -782,18 +782,26 @@ namespace cv { static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, const Scalar& value ) { - int type = _src.type(), cn = CV_MAT_CN(type); + int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type); bool isolated = (borderType & BORDER_ISOLATED) != 0; borderType &= ~cv::BORDER_ISOLATED; if ( !(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || borderType == BORDER_REFLECT || borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) || - cn == 3 || cn > 4) + cn > 4) return false; const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101" }; - ocl::Kernel k("copyMakeBorder", ocl::core::copymakeborder_oclsrc, - format("-D T=%s -D %s", ocl::memopTypeToStr(type), borderMap[borderType])); + int scalarcn = cn == 3 ? 4 : cn; + int sctype = CV_MAKETYPE(depth, scalarcn); + String buildOptions = format( + "-D T=%s -D %s " + "-D T1=%s -D cn=%d -D ST=%s", + ocl::memopTypeToStr(type), borderMap[borderType], + ocl::memopTypeToStr(depth), cn, ocl::memopTypeToStr(sctype) + ); + + ocl::Kernel k("copyMakeBorder", ocl::core::copymakeborder_oclsrc, buildOptions); if (k.empty()) return false; @@ -825,7 +833,7 @@ static bool ocl_copyMakeBorder( InputArray _src, OutputArray _dst, int top, int } k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), - top, left, ocl::KernelArg::Constant(Mat(1, 1, type, value))); + top, left, ocl::KernelArg::Constant(Mat(1, 1, sctype, value))); size_t globalsize[2] = { dst.cols, dst.rows }; return k.run(2, globalsize, NULL, false); diff --git a/modules/core/src/opencl/copymakeborder.cl b/modules/core/src/opencl/copymakeborder.cl index dbb00b931c..55239ced9a 100644 --- a/modules/core/src/opencl/copymakeborder.cl +++ b/modules/core/src/opencl/copymakeborder.cl @@ -42,6 +42,18 @@ #endif #endif +#if cn != 3 +#define loadpix(addr) *(__global const T*)(addr) +#define storepix(val, addr) *(__global T*)(addr) = val +#define TSIZE ((int)sizeof(T)) +#define convertScalar(a) (a) +#else +#define loadpix(addr) vload3(0, (__global const T1*)(addr)) +#define storepix(val, addr) vstore3(val, 0, (__global T1*)(addr)) +#define TSIZE ((int)sizeof(T1)*3) +#define convertScalar(a) (T)(a.x, a.y, a.z) +#endif + #ifdef BORDER_CONSTANT #define EXTRAPOLATE(x, y, v) v = scalar; #elif defined BORDER_REPLICATE @@ -49,7 +61,7 @@ { \ x = clamp(x, 0, src_cols - 1); \ y = clamp(y, 0, src_rows - 1); \ - v = *(__global const T *)(srcptr + mad24(y, src_step, mad24(x, (int)sizeof(T), src_offset))); \ + v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \ } #elif defined BORDER_WRAP #define EXTRAPOLATE(x, y, v) \ @@ -63,7 +75,7 @@ y -= ((y - src_rows + 1) / src_rows) * src_rows; \ if( y >= src_rows ) \ y %= src_rows; \ - v = *(__global const T *)(srcptr + mad24(y, src_step, mad24(x, (int)sizeof(T), src_offset))); \ + v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \ } #elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101) #ifdef BORDER_REFLECT @@ -97,7 +109,7 @@ y = src_rows - 1 - (y - src_rows) - delta; \ } \ while (y >= src_rows || y < 0); \ - v = *(__global const T *)(srcptr + mad24(y, src_step, mad24(x, (int)sizeof(T), src_offset))); \ + v = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))); \ } #else #error No extrapolation method @@ -107,26 +119,33 @@ __kernel void copyMakeBorder(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols, __global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols, - int top, int left, T scalar) + int top, int left, ST nVal) { int x = get_global_id(0); int y = get_global_id(1); +#ifdef BORDER_CONSTANT + T scalar = convertScalar(nVal); +#endif + if (x < dst_cols && y < dst_rows) { int src_x = x - left; int src_y = y - top; - int dst_index = mad24(y, dst_step, mad24(x, (int)sizeof(T), dst_offset)); + int dst_index = mad24(y, dst_step, mad24(x, (int)TSIZE, dst_offset)); __global T * dst = (__global T *)(dstptr + dst_index); + T v; if (NEED_EXTRAPOLATION(src_x, src_y)) - EXTRAPOLATE(src_x, src_y, dst[0]) + { + EXTRAPOLATE(src_x, src_y, v) + } else { - int src_index = mad24(src_y, src_step, mad24(src_x, (int)sizeof(T), src_offset)); - __global const T * src = (__global const T *)(srcptr + src_index); - dst[0] = src[0]; + int src_index = mad24(src_y, src_step, mad24(src_x, TSIZE, src_offset)); + v = loadpix(srcptr + src_index); } + storepix(v, dst); } } diff --git a/modules/imgproc/perf/opencl/perf_imgproc.cpp b/modules/imgproc/perf/opencl/perf_imgproc.cpp index ae6112e0d2..0d63e940ef 100644 --- a/modules/imgproc/perf/opencl/perf_imgproc.cpp +++ b/modules/imgproc/perf/opencl/perf_imgproc.cpp @@ -103,7 +103,7 @@ typedef tuple CopyMakeBorderParamType; typedef TestBaseWithParam CopyMakeBorderFixture; OCL_PERF_TEST_P(CopyMakeBorderFixture, CopyMakeBorder, - ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, Border::all())) + ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES_134, Border::all())) { const CopyMakeBorderParamType params = GetParam(); const Size srcSize = get<0>(params); -- GitLab