From 6fa41c5a6438e1391c6005ef0aa8154ce40a94a0 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 30 Jan 2014 00:12:59 +0400 Subject: [PATCH] some experiments --- modules/imgproc/src/filter.cpp | 62 ++++++++++++++++++++-- modules/imgproc/src/opencl/filterSepCol.cl | 4 +- modules/imgproc/src/opencl/filterSepRow.cl | 14 +++-- 3 files changed, 66 insertions(+), 14 deletions(-) diff --git a/modules/imgproc/src/filter.cpp b/modules/imgproc/src/filter.cpp index 8c11c62dba..6b767329dc 100644 --- a/modules/imgproc/src/filter.cpp +++ b/modules/imgproc/src/filter.cpp @@ -3313,6 +3313,56 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth, return kernel.run(2, globalsize, localsize, true); } +template +static std::string kerToStr(const Mat & k) +{ + int width = k.cols - 1, depth = k.depth(); + const T * const data = reinterpret_cast(k.data); + + std::ostringstream stream; + stream.precision(10); + + if (depth <= CV_8S) + { + for (int i = 0; i < width; ++i) + stream << (int)data[i] << ","; + stream << (int)data[width]; + } + else if (depth == CV_32F) + { + for (int i = 0; i < width; ++i) + stream << data[i] << "f,"; + stream << data[width] << "f"; + } + else + { + for (int i = 0; i < width; ++i) + stream << data[i] << ","; + } + + return stream.str(); +} + +static String kernelToStr(InputArray _kernel, int ddepth = -1) +{ + Mat kernel = _kernel.getMat().reshape(1, 1); + + int depth = kernel.depth(); + if (ddepth < 0) + ddepth = depth; + + if (ddepth != depth) + kernel.convertTo(kernel, ddepth); + + typedef std::string (*func_t)(const Mat &); + static const func_t funcs[] = { kerToStr, kerToStr, kerToStr,kerToStr, + kerToStr, kerToStr, kerToStr, 0 }; + const func_t func = funcs[depth]; + CV_Assert(func != 0); + + return cv::format(" -D COEFF=%s", func(kernel).c_str()); +} + static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync) { int type = src.type(); @@ -3378,6 +3428,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, btype, extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION", isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED"); + build_options += kernelToStr(kernelX, CV_32F); Size srcWholeSize; Point srcOffset; src.locateROI(srcWholeSize, srcOffset); @@ -3390,7 +3441,8 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, strKernel << "_D" << sdepth; ocl::Kernel kernelRow; - if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options)) + if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, + build_options)) return false; int idxArg = 0; @@ -3409,7 +3461,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, idxArg = kernelRow.set(idxArg, buf.cols); idxArg = kernelRow.set(idxArg, buf.rows); idxArg = kernelRow.set(idxArg, radiusY); - idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ))); +// idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ))); return kernelRow.run(2, globalsize, localsize, sync); } @@ -3479,6 +3531,8 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b } } + build_options += kernelToStr(kernelY, CV_32F); + ocl::Kernel kernelCol; if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options)) return false; @@ -3494,7 +3548,7 @@ static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, b idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize())); idxArg = kernelCol.set(idxArg, dst.cols); idxArg = kernelCol.set(idxArg, dst.rows); - idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ))); +// idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ))); return kernelCol.run(2, globalsize, localsize, sync); } @@ -3508,7 +3562,7 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth, int type = _src.type(); if ( !( (CV_8UC1 == type || CV_8UC4 == type || CV_32FC1 == type || CV_32FC4 == type) && - (ddepth == CV_32F || ddepth == CV_8U) ) ) + (ddepth == CV_32F || ddepth == CV_8U || ddepth < 0) ) ) return false; int cn = CV_MAT_CN(type); diff --git a/modules/imgproc/src/opencl/filterSepCol.cl b/modules/imgproc/src/opencl/filterSepCol.cl index e99fa6ee03..721eb90097 100644 --- a/modules/imgproc/src/opencl/filterSepCol.cl +++ b/modules/imgproc/src/opencl/filterSepCol.cl @@ -60,6 +60,7 @@ Niko The info above maybe obsolete. ***********************************************************************************/ +__constant float mat_kernel[] = { COEFF }; __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter (__global const GENTYPE_SRC * restrict src, @@ -70,8 +71,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter const int dst_offset_in_pixel, const int dst_step_in_pixel, const int dst_cols, - const int dst_rows, - __constant float * mat_kernel) + const int dst_rows) { int x = get_global_id(0); int y = get_global_id(1); diff --git a/modules/imgproc/src/opencl/filterSepRow.cl b/modules/imgproc/src/opencl/filterSepRow.cl index dfbf300999..efb082e3e4 100644 --- a/modules/imgproc/src/opencl/filterSepRow.cl +++ b/modules/imgproc/src/opencl/filterSepRow.cl @@ -144,6 +144,8 @@ Niko The info above maybe obsolete. ***********************************************************************************/ +__constant float mat_kernel[] = { COEFF }; + __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0 (__global uchar * restrict src, int src_step_in_pixel, @@ -153,8 +155,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0)<<2; int y = get_global_id(1); @@ -297,8 +298,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float4 * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1); @@ -391,8 +391,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1); @@ -484,8 +483,7 @@ __kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_ __global float4 * dst, int dst_step_in_pixel, int dst_cols, int dst_rows, - int radiusy, - __constant float * mat_kernel) + int radiusy) { int x = get_global_id(0); int y = get_global_id(1); -- GitLab