From 6c8b6bd0c79a8ec2d13cf190eb249011de105605 Mon Sep 17 00:00:00 2001 From: Alexander Karsakov Date: Thu, 17 Jul 2014 12:31:41 +0400 Subject: [PATCH] Added packing to CCS format --- modules/core/src/dxt.cpp | 117 +++++++---------------------- modules/core/src/opencl/fft.cl | 48 ++++++++++-- modules/core/test/ocl/test_dft.cpp | 18 +++-- 3 files changed, 82 insertions(+), 101 deletions(-) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index a3df694364..69ec2c9efe 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2083,20 +2083,19 @@ struct OCL_FftPlan int dft_size; int flags; - - OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags) + bool status; + OCL_FftPlan(int _size, int _flags): dft_size(_size), flags(_flags), status(true) { int min_radix = INT_MAX; std::vector radixes, blocks; ocl_getRadixes(dft_size, radixes, blocks, min_radix); thread_count = (dft_size + min_radix-1) / min_radix; - printf("cols: %d - ", dft_size); - for (int i=0; i ocl::Device::getDefault().maxWorkGroupSize()) { - printf("%d ", radixes[i]); + status = false; + return; } - printf("min radix - %d\n", min_radix); // generate string with radix calls String radix_processing; @@ -2142,6 +2141,9 @@ struct OCL_FftPlan bool enqueueTransform(InputArray _src, OutputArray _dst, int dft_size, int flags, bool rows = true) const { + if (!status) + return false; + UMat src = _src.getUMat(); UMat dst = _dst.getUMat(); @@ -2162,11 +2164,14 @@ struct OCL_FftPlan kernel_name = "fft_multi_radix_cols"; } + bool is1d = (flags & DFT_ROWS) != 0 || dft_size == 1; String options = buildOptions; if (src.channels() == 1) options += " -D REAL_INPUT"; if (dst.channels() == 1) options += " -D CCS_OUTPUT"; + if ((is1d && src.channels() == 1) || (rows && (flags & DFT_REAL_OUTPUT))) + options += " -D NO_CONJUGATE"; ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options); if (k.empty()) @@ -2219,61 +2224,16 @@ protected: std::vector planStorage; }; -static bool ocl_packToCCS(InputArray _src, OutputArray _dst, int flags) -{ - UMat src = _src.getUMat(); - _dst.create(src.size(), CV_32F); - UMat dst = _dst.getUMat(); - - src = src.reshape(1); - if ((flags & DFT_ROWS) == 0 && src.rows > 1) - { - // pack to CCS by rows - if (dst.cols > 2) - src.colRange(2, dst.cols + (dst.cols % 2)).copyTo(dst.colRange(1, dst.cols-1 + (dst.cols % 2))); - - Mat dst_mat = dst.getMat(ACCESS_WRITE); - Mat buffer_mat = src.getMat(ACCESS_READ); - - dst_mat.at(0,0) = buffer_mat.at(0,0); - dst_mat.at(dst_mat.rows-1,0) = buffer_mat.at(src.rows/2,0); - for (int i=1; i(i,0) = buffer_mat.at((i+1)/2,0); - dst_mat.at(i+1,0) = buffer_mat.at((i+1)/2,1); - } - - if (dst_mat.cols % 2 == 0) - { - dst_mat.at(0,dst_mat.cols-1) = buffer_mat.at(0,src.cols/2); - dst_mat.at(dst_mat.rows-1,dst_mat.cols-1) = buffer_mat.at(src.rows/2,src.cols/2); - - for (int i=1; i(i,dst_mat.cols-1) = buffer_mat.at((i+1)/2,src.cols/2); - dst_mat.at(i+1,dst_mat.cols-1) = buffer_mat.at((i+1)/2,src.cols/2+1); - } - } - } - else - { - // pack to CCS each row - src.colRange(0,1).copyTo(dst.colRange(0,1)); - src.colRange(2, (dst.cols+1)).copyTo(dst.colRange(1, dst.cols)); - } - return true; -} - static bool ocl_dft_C2C_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags) { const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), flags); return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, true); } -static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int flags) +static bool ocl_dft_C2C_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags) { const OCL_FftPlan* plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), flags); - return plan->enqueueTransform(_src, _dst, _src.cols(), flags, false); + return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, false); } static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) @@ -2315,6 +2275,8 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro // Forward Complex to CCS not supported if (complex_input && real_output && !inv) { + flags ^= DFT_REAL_OUTPUT; + flags |= DFT_COMPLEX_OUTPUT; real_output = 0; complex_output = 1; } @@ -2344,23 +2306,21 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro if (complex_output) { - if (real_input && is1d && !inv) - output.create(src.size(), CV_32FC2); - else - { - _dst.create(src.size(), CV_32FC2); - output = _dst.getUMat(); - } - } else + _dst.create(src.size(), CV_32FC2); + output = _dst.getUMat(); + } + else { - // CCS if (is1d) { _dst.create(src.size(), CV_32FC1); output = _dst.getUMat(); } else + { + _dst.create(src.size(), CV_32FC1); output.create(src.size(), CV_32FC2); + } } if (!ocl_dft_C2C_rows(input, output, nonzero_rows, flags)) @@ -2368,32 +2328,13 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_ro if (!is1d) { - if (!ocl_dft_C2C_cols(output, output, flags)) + int nonzero_cols = real_input && real_output ? output.cols/2 + 1 : output.cols; + if (!ocl_dft_C2C_cols(output, _dst, nonzero_cols, flags)) return false; - } - - if (complex_output) + } else { - if (real_input && is1d && !inv) - _dst.assign(output.colRange(0, output.cols/2+1)); - else - _dst.assign(output); + _dst.assign(output); } - else - { - if (!inv) - { - if (!is1d) - ocl_packToCCS(output, _dst, flags); - else - _dst.assign(output); - } - else - { - // copy real part to dst - } - } - //printf("OCL!\n"); return true; } @@ -2435,7 +2376,6 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2; int factors[34]; bool inplace_transform = false; - bool is1d = (flags & DFT_ROWS) != 0 || src.rows == 1; #ifdef USE_IPP_DFT AutoBuffer ippbuf; int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1; @@ -2444,10 +2384,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) ) - if (!is1d) - _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); - else - _dst.create( Size(src.cols/2+1, src.rows), CV_MAKETYPE(depth, 2) ); + _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) ) _dst.create( src.size(), depth ); else diff --git a/modules/core/src/opencl/fft.cl b/modules/core/src/opencl/fft.cl index 7803cdbc21..a778d59f22 100644 --- a/modules/core/src/opencl/fft.cl +++ b/modules/core/src/opencl/fft.cl @@ -331,10 +331,17 @@ __kernel void fft_multi_radix_rows(__global const uchar* src_ptr, int src_step, RADIX_PROCESS; #ifndef CCS_OUTPUT - __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, mad24(x, (int)(sizeof(float)*2), dst_offset))); +#ifdef NO_CONJUGATE + // copy result without complex conjugate + const int cols = dst_cols/2 + 1; +#else + const int cols = dst_cols; +#endif + + __global float2* dst = (__global float2*)(dst_ptr + mad24(y, dst_step, dst_offset)); #pragma unroll - for (int i=0; i(df) << std::endl; double eps = src.size().area() * 1e-4; EXPECT_MAT_NEAR(dst, udst, eps); @@ -181,9 +189,9 @@ OCL_TEST_P(MulSpectrums, Mat) OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool())); -OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(6, 1), cv::Size(5, 8), cv::Size(30, 20), +OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(6, 4), cv::Size(5, 8), cv::Size(6, 6), cv::Size(512, 1), cv::Size(1280, 768)), - Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R/*, (OCL_FFT_TYPE) C2R*/), + Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R), Bool(), // DFT_ROWS Bool() // inplace ) -- GitLab