diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt index 6c6022c72c5108525985d66800256f1f1cfc444f..44e9dc0f3bf3a932e7ccef8ab805ba42f3edabc6 100644 --- a/modules/superres/CMakeLists.txt +++ b/modules/superres/CMakeLists.txt @@ -4,4 +4,4 @@ endif() set(the_description "Super Resolution") ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef) -ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui) +ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui opencv_ocl) diff --git a/modules/superres/include/opencv2/superres/optical_flow.hpp b/modules/superres/include/opencv2/superres/optical_flow.hpp index 7a0ed833f41b1c86e84b1869b10b5c7f7be8fc8e..13ea9905c47190ce50743f4d1dad8355a708a7cf 100644 --- a/modules/superres/include/opencv2/superres/optical_flow.hpp +++ b/modules/superres/include/opencv2/superres/optical_flow.hpp @@ -63,10 +63,12 @@ namespace cv CV_EXPORTS Ptr createOptFlow_DualTVL1(); CV_EXPORTS Ptr createOptFlow_DualTVL1_GPU(); + CV_EXPORTS Ptr createOptFlow_DualTVL1_OCL(); CV_EXPORTS Ptr createOptFlow_Brox_GPU(); CV_EXPORTS Ptr createOptFlow_PyrLK_GPU(); + CV_EXPORTS Ptr createOptFlow_PyrLK_OCL(); } } diff --git a/modules/superres/include/opencv2/superres/superres.hpp b/modules/superres/include/opencv2/superres/superres.hpp index 1245c122a6b47cab30a99d9df9505ce6573f8e95..8daeb5ba0ee34ba01ad5d6a0f2492a903dfe9baf 100644 --- a/modules/superres/include/opencv2/superres/superres.hpp +++ b/modules/superres/include/opencv2/superres/superres.hpp @@ -92,6 +92,7 @@ namespace cv // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow. CV_EXPORTS Ptr createSuperResolution_BTVL1(); CV_EXPORTS Ptr createSuperResolution_BTVL1_GPU(); + CV_EXPORTS Ptr createSuperResolution_BTVL1_OCL(); } } diff --git a/modules/superres/perf/perf_superres_ocl.cpp b/modules/superres/perf/perf_superres_ocl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e75a8775d3c42f5a86f61da74bf2972cc5f7dfd8 --- /dev/null +++ b/modules/superres/perf/perf_superres_ocl.cpp @@ -0,0 +1,146 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "perf_precomp.hpp" + +#ifdef HAVE_OPENCL + +#include "opencv2/ocl/ocl.hpp" +using namespace std; +using namespace testing; +using namespace perf; +using namespace cv; +using namespace cv::superres; + +namespace +{ + class OneFrameSource_OCL : public FrameSource + { + public: + explicit OneFrameSource_OCL(const ocl::oclMat& frame) : frame_(frame) {} + + void nextFrame(OutputArray frame) + { + ocl::getOclMatRef(frame) = frame_; + } + void reset() + { + } + + private: + ocl::oclMat frame_; + }; + + + class ZeroOpticalFlowOCL : public DenseOpticalFlowExt + { + public: + void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2) + { + ocl::oclMat& frame0_ = ocl::getOclMatRef(frame0); + ocl::oclMat& flow1_ = ocl::getOclMatRef(flow1); + ocl::oclMat& flow2_ = ocl::getOclMatRef(flow2); + + cv::Size size = frame0_.size(); + + if(!flow2.needed()) + { + flow1_.create(size, CV_32FC2); + flow1_.setTo(Scalar::all(0)); + } + else + { + flow1_.create(size, CV_32FC1); + flow2_.create(size, CV_32FC1); + + flow1_.setTo(Scalar::all(0)); + flow2_.setTo(Scalar::all(0)); + } + } + + void collectGarbage() + { + } + }; +} + +PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL, + Combine(Values(szSmall64, szSmall128), + Values(MatType(CV_8UC1), MatType(CV_8UC3)))) +{ + std::vectorinfo; + cv::ocl::getDevice(info); + + declare.time(5 * 60); + + const Size size = get<0>(GetParam()); + const int type = get<1>(GetParam()); + + Mat frame(size, type); + declare.in(frame, WARMUP_RNG); + + ocl::oclMat frame_ocl; + frame_ocl.upload(frame); + + + const int scale = 2; + const int iterations = 50; + const int temporalAreaRadius = 1; + Ptr opticalFlowOcl(new ZeroOpticalFlowOCL); + + Ptr superRes_ocl = createSuperResolution_BTVL1_OCL(); + + superRes_ocl->set("scale", scale); + superRes_ocl->set("iterations", iterations); + superRes_ocl->set("temporalAreaRadius", temporalAreaRadius); + superRes_ocl->set("opticalFlow", opticalFlowOcl); + + superRes_ocl->setInput(new OneFrameSource_OCL(frame_ocl)); + + ocl::oclMat dst_ocl; + superRes_ocl->nextFrame(dst_ocl); + + TEST_CYCLE_N(10) superRes_ocl->nextFrame(dst_ocl); + frame_ocl.release(); + CPU_SANITY_CHECK(dst_ocl); +} +#endif diff --git a/modules/superres/src/btv_l1_ocl.cpp b/modules/superres/src/btv_l1_ocl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5f9e32675e9326a3175473730accc1e9d026ba86 --- /dev/null +++ b/modules/superres/src/btv_l1_ocl.cpp @@ -0,0 +1,748 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma, jin@multicorewareinc.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +// S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution. +// Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow. + +#include "precomp.hpp" + +#if !defined(HAVE_OPENCL) || !defined(HAVE_OPENCV_OCL) + +cv::Ptr cv::superres::createSuperResolution_BTVL1_OCL() +{ + CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); + return Ptr(); +} + +#else + +using namespace std; +using namespace cv; +using namespace cv::ocl; +using namespace cv::superres; +using namespace cv::superres::detail; + +namespace cv +{ + namespace ocl + { + extern const char* superres_btvl1; + + float* btvWeights_ = NULL; + size_t btvWeights_size = 0; + } +} + +namespace btv_l1_device_ocl +{ + void buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY, + const oclMat& backwardMotionX, const oclMat& bacwardMotionY, + oclMat& forwardMapX, oclMat& forwardMapY, + oclMat& backwardMapX, oclMat& backwardMapY); + + void upscale(const oclMat& src, oclMat& dst, int scale); + + float diffSign(float a, float b); + + Point3f diffSign(Point3f a, Point3f b); + + void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst); + + void calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize); +} + +void btv_l1_device_ocl::buildMotionMaps(const oclMat& forwardMotionX, const oclMat& forwardMotionY, + const oclMat& backwardMotionX, const oclMat& backwardMotionY, + oclMat& forwardMapX, oclMat& forwardMapY, + oclMat& backwardMapX, oclMat& backwardMapY) +{ + Context* clCxt = Context::getContext(); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {forwardMapX.cols, forwardMapX.rows, 1}; + + int forwardMotionX_step = (int)(forwardMotionX.step/forwardMotionX.elemSize()); + int forwardMotionY_step = (int)(forwardMotionY.step/forwardMotionY.elemSize()); + int backwardMotionX_step = (int)(backwardMotionX.step/backwardMotionX.elemSize()); + int backwardMotionY_step = (int)(backwardMotionY.step/backwardMotionY.elemSize()); + int forwardMapX_step = (int)(forwardMapX.step/forwardMapX.elemSize()); + int forwardMapY_step = (int)(forwardMapY.step/forwardMapY.elemSize()); + int backwardMapX_step = (int)(backwardMapX.step/backwardMapX.elemSize()); + int backwardMapY_step = (int)(backwardMapY.step/backwardMapY.elemSize()); + + String kernel_name = "buildMotionMapsKernel"; + vector< pair > args; + + args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionX.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMotionY.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionX.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMotionY.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapX.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&forwardMapY.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapX.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&backwardMapY.data)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY.cols)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionX_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMotionY_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionX_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMotionY_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapX_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&forwardMapY_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapX_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&backwardMapY_step)); + + openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); +} + +void btv_l1_device_ocl::upscale(const oclMat& src, oclMat& dst, int scale) +{ + Context* clCxt = Context::getContext(); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {src.cols, src.rows, 1}; + + int src_step = (int)(src.step/src.elemSize()); + int dst_step = (int)(dst.step/dst.elemSize()); + + String kernel_name = "upscaleKernel"; + vector< pair > args; + + int cn = src.oclchannels(); + + args.push_back(make_pair(sizeof(cl_mem), (void*)&src.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dst.data)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols)); + args.push_back(make_pair(sizeof(cl_int), (void*)&scale)); + args.push_back(make_pair(sizeof(cl_int), (void*)&cn)); + + openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); + +} + +float btv_l1_device_ocl::diffSign(float a, float b) +{ + return a > b ? 1.0f : a < b ? -1.0f : 0.0f; +} + +Point3f btv_l1_device_ocl::diffSign(Point3f a, Point3f b) +{ + return Point3f( + a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f, + a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f, + a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f + ); +} + +void btv_l1_device_ocl::diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst) +{ + Context* clCxt = Context::getContext(); + + oclMat src1_ = src1.reshape(1); + oclMat src2_ = src2.reshape(1); + oclMat dst_ = dst.reshape(1); + + int src1_step = (int)(src1_.step/src1_.elemSize()); + int src2_step = (int)(src2_.step/src2_.elemSize()); + int dst_step = (int)(dst_.step/dst_.elemSize()); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {src1_.cols, src1_.rows, 1}; + + String kernel_name = "diffSignKernel"; + vector< pair > args; + + args.push_back(make_pair(sizeof(cl_mem), (void*)&src1_.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&src2_.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data)); + + args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src1_.cols)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src1_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src2_step)); + + openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); +} + +void btv_l1_device_ocl::calcBtvRegularization(const oclMat& src, oclMat& dst, int ksize) +{ + Context* clCxt = Context::getContext(); + + oclMat src_ = src.reshape(1); + oclMat dst_ = dst.reshape(1); + + size_t local_thread[] = {32, 8, 1}; + size_t global_thread[] = {src.cols, src.rows, 1}; + + int src_step = (int)(src_.step/src_.elemSize()); + int dst_step = (int)(dst_.step/dst_.elemSize()); + + String kernel_name = "calcBtvRegularizationKernel"; + vector< pair > args; + + int cn = src.oclchannels(); + + cl_mem c_btvRegWeights; + size_t count = btvWeights_size * sizeof(float); + c_btvRegWeights = openCLCreateBuffer(clCxt, CL_MEM_READ_ONLY, count); + int cl_safe_check = clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL); + CV_Assert(cl_safe_check == CL_SUCCESS); + + args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&dst_.data)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&dst_step)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src.rows)); + args.push_back(make_pair(sizeof(cl_int), (void*)&src.cols)); + args.push_back(make_pair(sizeof(cl_int), (void*)&ksize)); + args.push_back(make_pair(sizeof(cl_int), (void*)&cn)); + args.push_back(make_pair(sizeof(cl_mem), (void*)&c_btvRegWeights)); + + openCLExecuteKernel(clCxt, &superres_btvl1, kernel_name, global_thread, local_thread, args, -1, -1); + cl_safe_check = clReleaseMemObject(c_btvRegWeights); + CV_Assert(cl_safe_check == CL_SUCCESS); +} + +namespace +{ + void calcRelativeMotions(const vector >& forwardMotions, const vector >& backwardMotions, + vector >& relForwardMotions, vector >& relBackwardMotions, + int baseIdx, Size size) + { + const int count = static_cast(forwardMotions.size()); + + relForwardMotions.resize(count); + relForwardMotions[baseIdx].first.create(size, CV_32FC1); + relForwardMotions[baseIdx].first.setTo(Scalar::all(0)); + relForwardMotions[baseIdx].second.create(size, CV_32FC1); + relForwardMotions[baseIdx].second.setTo(Scalar::all(0)); + + relBackwardMotions.resize(count); + relBackwardMotions[baseIdx].first.create(size, CV_32FC1); + relBackwardMotions[baseIdx].first.setTo(Scalar::all(0)); + relBackwardMotions[baseIdx].second.create(size, CV_32FC1); + relBackwardMotions[baseIdx].second.setTo(Scalar::all(0)); + + for (int i = baseIdx - 1; i >= 0; --i) + { + ocl::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first); + ocl::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second); + + ocl::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first); + ocl::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second); + } + + for (int i = baseIdx + 1; i < count; ++i) + { + ocl::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first); + ocl::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second); + + ocl::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first); + ocl::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second); + } + } + + void upscaleMotions(const vector >& lowResMotions, vector >& highResMotions, int scale) + { + highResMotions.resize(lowResMotions.size()); + + for (size_t i = 0; i < lowResMotions.size(); ++i) + { + ocl::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_LINEAR); + ocl::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_LINEAR); + + ocl::multiply(scale, highResMotions[i].first, highResMotions[i].first); + ocl::multiply(scale, highResMotions[i].second, highResMotions[i].second); + } + } + + void buildMotionMaps(const pair& forwardMotion, const pair& backwardMotion, + pair& forwardMap, pair& backwardMap) + { + forwardMap.first.create(forwardMotion.first.size(), CV_32FC1); + forwardMap.second.create(forwardMotion.first.size(), CV_32FC1); + + backwardMap.first.create(forwardMotion.first.size(), CV_32FC1); + backwardMap.second.create(forwardMotion.first.size(), CV_32FC1); + + btv_l1_device_ocl::buildMotionMaps(forwardMotion.first, forwardMotion.second, + backwardMotion.first, backwardMotion.second, + forwardMap.first, forwardMap.second, + backwardMap.first, backwardMap.second); + } + + void upscale(const oclMat& src, oclMat& dst, int scale) + { + CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); + + dst.create(src.rows * scale, src.cols * scale, src.type()); + dst.setTo(Scalar::all(0)); + + btv_l1_device_ocl::upscale(src, dst, scale); + } + + void diffSign(const oclMat& src1, const oclMat& src2, oclMat& dst) + { + dst.create(src1.size(), src1.type()); + + btv_l1_device_ocl::diffSign(src1, src2, dst); + } + + void calcBtvWeights(int btvKernelSize, double alpha, vector& btvWeights) + { + const size_t size = btvKernelSize * btvKernelSize; + + btvWeights.resize(size); + + const int ksize = (btvKernelSize - 1) / 2; + const float alpha_f = static_cast(alpha); + + for (int m = 0, ind = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++ind) + btvWeights[ind] = pow(alpha_f, std::abs(m) + std::abs(l)); + } + + btvWeights_ = &btvWeights[0]; + btvWeights_size = size; + } + + void calcBtvRegularization(const oclMat& src, oclMat& dst, int btvKernelSize) + { + dst.create(src.size(), src.type()); + dst.setTo(Scalar::all(0)); + + const int ksize = (btvKernelSize - 1) / 2; + + btv_l1_device_ocl::calcBtvRegularization(src, dst, ksize); + } + + class BTVL1_OCL_Base + { + public: + BTVL1_OCL_Base(); + + void process(const vector& src, oclMat& dst, + const vector >& forwardMotions, const vector >& backwardMotions, + int baseIdx); + + void collectGarbage(); + + protected: + int scale_; + int iterations_; + double lambda_; + double tau_; + double alpha_; + int btvKernelSize_; + int blurKernelSize_; + double blurSigma_; + Ptr opticalFlow_; + + private: + vector > filters_; + int curBlurKernelSize_; + double curBlurSigma_; + int curSrcType_; + + vector btvWeights_; + int curBtvKernelSize_; + double curAlpha_; + + vector > lowResForwardMotions_; + vector > lowResBackwardMotions_; + + vector > highResForwardMotions_; + vector > highResBackwardMotions_; + + vector > forwardMaps_; + vector > backwardMaps_; + + oclMat highRes_; + + vector diffTerms_; + vector a_, b_, c_; + oclMat regTerm_; + }; + + BTVL1_OCL_Base::BTVL1_OCL_Base() + { + scale_ = 4; + iterations_ = 180; + lambda_ = 0.03; + tau_ = 1.3; + alpha_ = 0.7; + btvKernelSize_ = 7; + blurKernelSize_ = 5; + blurSigma_ = 0.0; + opticalFlow_ = createOptFlow_DualTVL1_OCL(); + + curBlurKernelSize_ = -1; + curBlurSigma_ = -1.0; + curSrcType_ = -1; + + curBtvKernelSize_ = -1; + curAlpha_ = -1.0; + } + + void BTVL1_OCL_Base::process(const vector& src, oclMat& dst, + const vector >& forwardMotions, const vector >& backwardMotions, + int baseIdx) + { + CV_Assert( scale_ > 1 ); + CV_Assert( iterations_ > 0 ); + CV_Assert( tau_ > 0.0 ); + CV_Assert( alpha_ > 0.0 ); + CV_Assert( btvKernelSize_ > 0 && btvKernelSize_ <= 16 ); + CV_Assert( blurKernelSize_ > 0 ); + CV_Assert( blurSigma_ >= 0.0 ); + + // update blur filter and btv weights + + if (filters_.size() != src.size() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_) + { + filters_.resize(src.size()); + for (size_t i = 0; i < src.size(); ++i) + filters_[i] = cv::ocl::createGaussianFilter_GPU(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_); + curBlurKernelSize_ = blurKernelSize_; + curBlurSigma_ = blurSigma_; + curSrcType_ = src[0].type(); + } + + if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_) + { + calcBtvWeights(btvKernelSize_, alpha_, btvWeights_); + curBtvKernelSize_ = btvKernelSize_; + curAlpha_ = alpha_; + } + + // calc motions between input frames + + calcRelativeMotions(forwardMotions, backwardMotions, + lowResForwardMotions_, lowResBackwardMotions_, + baseIdx, src[0].size()); + + upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_); + upscaleMotions(lowResBackwardMotions_, highResBackwardMotions_, scale_); + + forwardMaps_.resize(highResForwardMotions_.size()); + backwardMaps_.resize(highResForwardMotions_.size()); + for (size_t i = 0; i < highResForwardMotions_.size(); ++i) + { + buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]); + } + // initial estimation + + const Size lowResSize = src[0].size(); + const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_); + + ocl::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_LINEAR); + + // iterations + + diffTerms_.resize(src.size()); + a_.resize(src.size()); + b_.resize(src.size()); + c_.resize(src.size()); + + for (int i = 0; i < iterations_; ++i) + { + for (size_t k = 0; k < src.size(); ++k) + { + diffTerms_[k].create(highRes_.size(), highRes_.type()); + a_[k].create(highRes_.size(), highRes_.type()); + b_[k].create(highRes_.size(), highRes_.type()); + c_[k].create(lowResSize, highRes_.type()); + + // a = M * Ih + ocl::remap(highRes_, a_[k], backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar()); + // b = HM * Ih + filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1)); + // c = DHF * Ih + ocl::resize(b_[k], c_[k], lowResSize, 0, 0, INTER_NEAREST); + + diffSign(src[k], c_[k], c_[k]); + + // a = Dt * diff + upscale(c_[k], a_[k], scale_); + // b = HtDt * diff + filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1)); + // diffTerm = MtHtDt * diff + ocl::remap(b_[k], diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_CONSTANT, Scalar()); + } + + if (lambda_ > 0) + { + calcBtvRegularization(highRes_, regTerm_, btvKernelSize_); + ocl::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_); + } + + for (size_t k = 0; k < src.size(); ++k) + { + ocl::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_); + } + } + + Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_); + highRes_(inner).copyTo(dst); + } + + void BTVL1_OCL_Base::collectGarbage() + { + filters_.clear(); + + lowResForwardMotions_.clear(); + lowResBackwardMotions_.clear(); + + highResForwardMotions_.clear(); + highResBackwardMotions_.clear(); + + forwardMaps_.clear(); + backwardMaps_.clear(); + + highRes_.release(); + + diffTerms_.clear(); + a_.clear(); + b_.clear(); + c_.clear(); + regTerm_.release(); + } + + //////////////////////////////////////////////////////////// + + class BTVL1_OCL : public SuperResolution, private BTVL1_OCL_Base + { + public: + AlgorithmInfo* info() const; + + BTVL1_OCL(); + + void collectGarbage(); + + protected: + void initImpl(Ptr& frameSource); + void processImpl(Ptr& frameSource, OutputArray output); + + private: + int temporalAreaRadius_; + + void readNextFrame(Ptr& frameSource); + void processFrame(int idx); + + oclMat curFrame_; + oclMat prevFrame_; + + vector frames_; + vector > forwardMotions_; + vector > backwardMotions_; + vector outputs_; + + int storePos_; + int procPos_; + int outPos_; + + vector srcFrames_; + vector > srcForwardMotions_; + vector > srcBackwardMotions_; + oclMat finalOutput_; + }; + + CV_INIT_ALGORITHM(BTVL1_OCL, "SuperResolution.BTVL1_OCL", + obj.info()->addParam(obj, "scale", obj.scale_, false, 0, 0, "Scale factor."); + obj.info()->addParam(obj, "iterations", obj.iterations_, false, 0, 0, "Iteration count."); + obj.info()->addParam(obj, "tau", obj.tau_, false, 0, 0, "Asymptotic value of steepest descent method."); + obj.info()->addParam(obj, "lambda", obj.lambda_, false, 0, 0, "Weight parameter to balance data term and smoothness term."); + obj.info()->addParam(obj, "alpha", obj.alpha_, false, 0, 0, "Parameter of spacial distribution in Bilateral-TV."); + obj.info()->addParam(obj, "btvKernelSize", obj.btvKernelSize_, false, 0, 0, "Kernel size of Bilateral-TV filter."); + obj.info()->addParam(obj, "blurKernelSize", obj.blurKernelSize_, false, 0, 0, "Gaussian blur kernel size."); + obj.info()->addParam(obj, "blurSigma", obj.blurSigma_, false, 0, 0, "Gaussian blur sigma."); + obj.info()->addParam(obj, "temporalAreaRadius", obj.temporalAreaRadius_, false, 0, 0, "Radius of the temporal search area."); + obj.info()->addParam(obj, "opticalFlow", obj.opticalFlow_, false, 0, 0, "Dense optical flow algorithm.")); + + BTVL1_OCL::BTVL1_OCL() + { + temporalAreaRadius_ = 4; + } + + void BTVL1_OCL::collectGarbage() + { + curFrame_.release(); + prevFrame_.release(); + + frames_.clear(); + forwardMotions_.clear(); + backwardMotions_.clear(); + outputs_.clear(); + + srcFrames_.clear(); + srcForwardMotions_.clear(); + srcBackwardMotions_.clear(); + finalOutput_.release(); + + SuperResolution::collectGarbage(); + BTVL1_OCL_Base::collectGarbage(); + } + + void BTVL1_OCL::initImpl(Ptr& frameSource) + { + const int cacheSize = 2 * temporalAreaRadius_ + 1; + + frames_.resize(cacheSize); + forwardMotions_.resize(cacheSize); + backwardMotions_.resize(cacheSize); + outputs_.resize(cacheSize); + + storePos_ = -1; + + for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t) + readNextFrame(frameSource); + + for (int i = 0; i <= temporalAreaRadius_; ++i) + processFrame(i); + + procPos_ = temporalAreaRadius_; + outPos_ = -1; + } + + void BTVL1_OCL::processImpl(Ptr& frameSource, OutputArray _output) + { + if (outPos_ >= storePos_) + { + if(_output.kind() == _InputArray::OCL_MAT) + { + getOclMatRef(_output).release(); + } + else + { + _output.release(); + } + return; + } + + readNextFrame(frameSource); + + if (procPos_ < storePos_) + { + ++procPos_; + processFrame(procPos_); + } + + ++outPos_; + const oclMat& curOutput = at(outPos_, outputs_); + + if (_output.kind() == _InputArray::OCL_MAT) + curOutput.convertTo(getOclMatRef(_output), CV_8U); + else + { + curOutput.convertTo(finalOutput_, CV_8U); + arrCopy(finalOutput_, _output); + } + } + + void BTVL1_OCL::readNextFrame(Ptr& frameSource) + { + curFrame_.release(); + frameSource->nextFrame(curFrame_); + + if (curFrame_.empty()) + return; + + ++storePos_; + curFrame_.convertTo(at(storePos_, frames_), CV_32F); + + if (storePos_ > 0) + { + pair& forwardMotion = at(storePos_ - 1, forwardMotions_); + pair& backwardMotion = at(storePos_, backwardMotions_); + + opticalFlow_->calc(prevFrame_, curFrame_, forwardMotion.first, forwardMotion.second); + opticalFlow_->calc(curFrame_, prevFrame_, backwardMotion.first, backwardMotion.second); + } + + curFrame_.copyTo(prevFrame_); + } + + void BTVL1_OCL::processFrame(int idx) + { + const int startIdx = max(idx - temporalAreaRadius_, 0); + const int procIdx = idx; + const int endIdx = min(startIdx + 2 * temporalAreaRadius_, storePos_); + + const int count = endIdx - startIdx + 1; + + srcFrames_.resize(count); + srcForwardMotions_.resize(count); + srcBackwardMotions_.resize(count); + + int baseIdx = -1; + + for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k) + { + if (i == procIdx) + baseIdx = k; + + srcFrames_[k] = at(i, frames_); + + if (i < endIdx) + srcForwardMotions_[k] = at(i, forwardMotions_); + if (i > startIdx) + srcBackwardMotions_[k] = at(i, backwardMotions_); + } + + process(srcFrames_, at(idx, outputs_), srcForwardMotions_, srcBackwardMotions_, baseIdx); + } +} + +Ptr cv::superres::createSuperResolution_BTVL1_OCL() +{ + return new BTVL1_OCL; +} +#endif \ No newline at end of file diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp index 052141616d7499daf645de5e1767014b73c68e3d..20e45d9518a8a3ab8030a86e7611c4d3536b01c2 100644 --- a/modules/superres/src/frame_source.cpp +++ b/modules/superres/src/frame_source.cpp @@ -119,11 +119,23 @@ namespace { vc_ >> _frame.getMatRef(); } - else + else if(_frame.kind() == _InputArray::GPU_MAT) { vc_ >> frame_; arrCopy(frame_, _frame); } + else if(_frame.kind() == _InputArray::OCL_MAT) + { + vc_ >> frame_; + if(!frame_.empty()) + { + arrCopy(frame_, _frame); + } + } + else + { + //should never get here + } } class VideoFrameSource : public CaptureFrameSource diff --git a/modules/superres/src/input_array_utility.cpp b/modules/superres/src/input_array_utility.cpp index 5a6682526ab44a13240d09e524284204907bec6f..075cf95144bb3c87ed23061fb64a7bbc2e381b9d 100644 --- a/modules/superres/src/input_array_utility.cpp +++ b/modules/superres/src/input_array_utility.cpp @@ -125,30 +125,59 @@ namespace { src.getGpuMat().copyTo(dst.getGpuMatRef()); } +#ifdef HAVE_OPENCV_OCL + void ocl2mat(InputArray src, OutputArray dst) + { + dst.getMatRef() = (Mat)ocl::getOclMatRef(src); + } + void mat2ocl(InputArray src, OutputArray dst) + { + Mat m = src.getMat(); + ocl::getOclMatRef(dst) = (ocl::oclMat)m; + } + void ocl2ocl(InputArray src, OutputArray dst) + { + ocl::getOclMatRef(src).copyTo(ocl::getOclMatRef(dst)); + } +#else + void ocl2mat(InputArray, OutputArray) + { + CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");; + } + void mat2ocl(InputArray, OutputArray) + { + CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");; + } + void ocl2ocl(InputArray, OutputArray) + { + CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); + } +#endif } void cv::superres::arrCopy(InputArray src, OutputArray dst) { typedef void (*func_t)(InputArray src, OutputArray dst); - static const func_t funcs[10][10] = + static const func_t funcs[11][11] = { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu}, - {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr}, - {0, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr}, - {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, arr2tex, gpu2gpu} + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl}, + {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0 }, + {0, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, 0 }, + {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, arr2tex, gpu2gpu, 0 }, + {0, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, 0, 0, 0, ocl2ocl} }; const int src_kind = src.kind() >> _InputArray::KIND_SHIFT; const int dst_kind = dst.kind() >> _InputArray::KIND_SHIFT; - CV_DbgAssert( src_kind >= 0 && src_kind < 10 ); - CV_DbgAssert( dst_kind >= 0 && dst_kind < 10 ); + CV_DbgAssert( src_kind >= 0 && src_kind < 11 ); + CV_DbgAssert( dst_kind >= 0 && dst_kind < 11 ); const func_t func = funcs[src_kind][dst_kind]; CV_DbgAssert( func != 0 ); @@ -190,7 +219,6 @@ namespace break; } } - void convertToDepth(InputArray src, OutputArray dst, int depth) { CV_Assert( src.depth() <= CV_64F ); @@ -271,3 +299,70 @@ GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, Gp convertToDepth(buf0, buf1, depth); return buf1; } +#ifdef HAVE_OPENCV_OCL +namespace +{ + // TODO(pengx17): remove these overloaded functions until IntputArray fully supports oclMat + void convertToCn(const ocl::oclMat& src, ocl::oclMat& dst, int cn) + { + CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); + CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + + static const int codes[5][5] = + { + {-1, -1, -1, -1, -1}, + {-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA}, + {-1, -1, -1, -1, -1}, + {-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA}, + {-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1}, + }; + + const int code = codes[src.channels()][cn]; + CV_DbgAssert( code >= 0 ); + + ocl::cvtColor(src, dst, code, cn); + } + void convertToDepth(const ocl::oclMat& src, ocl::oclMat& dst, int depth) + { + CV_Assert( src.depth() <= CV_64F ); + CV_Assert( depth == CV_8U || depth == CV_32F ); + + static const double maxVals[] = + { + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max(), + 1.0, + 1.0, + }; + const double scale = maxVals[depth] / maxVals[src.depth()]; + src.convertTo(dst, depth, scale); + } +} +ocl::oclMat cv::superres::convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1) +{ + if (src.type() == type) + return src; + + const int depth = CV_MAT_DEPTH(type); + const int cn = CV_MAT_CN(type); + + if (src.depth() == depth) + { + convertToCn(src, buf0, cn); + return buf0; + } + + if (src.channels() == cn) + { + convertToDepth(src, buf1, depth); + return buf1; + } + + convertToCn(src, buf0, cn); + convertToDepth(buf0, buf1, depth); + return buf1; +} +#endif diff --git a/modules/superres/src/input_array_utility.hpp b/modules/superres/src/input_array_utility.hpp index 975783dc6f908186aff29fd6110c43612680c05e..9fa63da53e174d662f0c05d1100fd464a2a63abd 100644 --- a/modules/superres/src/input_array_utility.hpp +++ b/modules/superres/src/input_array_utility.hpp @@ -45,6 +45,9 @@ #include "opencv2/core/core.hpp" #include "opencv2/core/gpumat.hpp" +#ifdef HAVE_OPENCV_OCL +#include "opencv2/ocl/ocl.hpp" +#endif namespace cv { @@ -57,6 +60,10 @@ namespace cv CV_EXPORTS Mat convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1); CV_EXPORTS gpu::GpuMat convertToType(const gpu::GpuMat& src, int type, gpu::GpuMat& buf0, gpu::GpuMat& buf1); + +#ifdef HAVE_OPENCV_OCL + CV_EXPORTS ocl::oclMat convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1); +#endif } } diff --git a/modules/superres/src/opencl/superres_btvl1.cl b/modules/superres/src/opencl/superres_btvl1.cl new file mode 100644 index 0000000000000000000000000000000000000000..0efa1709c89597edc6556779969cc7a4ab2895f8 --- /dev/null +++ b/modules/superres/src/opencl/superres_btvl1.cl @@ -0,0 +1,261 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jin Ma jin@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +__kernel void buildMotionMapsKernel(__global float* forwardMotionX, + __global float* forwardMotionY, + __global float* backwardMotionX, + __global float* backwardMotionY, + __global float* forwardMapX, + __global float* forwardMapY, + __global float* backwardMapX, + __global float* backwardMapY, + int forwardMotionX_row, + int forwardMotionX_col, + int forwardMotionX_step, + int forwardMotionY_step, + int backwardMotionX_step, + int backwardMotionY_step, + int forwardMapX_step, + int forwardMapY_step, + int backwardMapX_step, + int backwardMapY_step + ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < forwardMotionX_col && y < forwardMotionX_row) + { + float fx = forwardMotionX[y * forwardMotionX_step + x]; + float fy = forwardMotionY[y * forwardMotionY_step + x]; + + float bx = backwardMotionX[y * backwardMotionX_step + x]; + float by = backwardMotionY[y * backwardMotionY_step + x]; + + forwardMapX[y * forwardMapX_step + x] = x + bx; + forwardMapY[y * forwardMapY_step + x] = y + by; + + backwardMapX[y * backwardMapX_step + x] = x + fx; + backwardMapY[y * backwardMapY_step + x] = y + fy; + } +} + +__kernel void upscaleKernel(__global float* src, + __global float* dst, + int src_step, + int dst_step, + int src_row, + int src_col, + int scale, + int channels + ) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < src_col && y < src_row) + { + if(channels == 1) + { + dst[y * scale * dst_step + x * scale] = src[y * src_step + x]; + }else if(channels == 3) + { + dst[y * channels * scale * dst_step + 3 * x * scale + 0] = src[y * channels * src_step + 3 * x + 0]; + dst[y * channels * scale * dst_step + 3 * x * scale + 1] = src[y * channels * src_step + 3 * x + 1]; + dst[y * channels * scale * dst_step + 3 * x * scale + 2] = src[y * channels * src_step + 3 * x + 2]; + }else + { + dst[y * channels * scale * dst_step + 4 * x * scale + 0] = src[y * channels * src_step + 4 * x + 0]; + dst[y * channels * scale * dst_step + 4 * x * scale + 1] = src[y * channels * src_step + 4 * x + 1]; + dst[y * channels * scale * dst_step + 4 * x * scale + 2] = src[y * channels * src_step + 4 * x + 2]; + dst[y * channels * scale * dst_step + 4 * x * scale + 3] = src[y * channels * src_step + 4 * x + 3]; + } + } +} + + +float diffSign(float a, float b) +{ + return a > b ? 1.0f : a < b ? -1.0f : 0.0f; +} + +float3 diffSign3(float3 a, float3 b) +{ + float3 pos; + pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f; + pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f; + pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f; + return pos; +} + +float4 diffSign4(float4 a, float4 b) +{ + float4 pos; + pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f; + pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f; + pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f; + pos.w = 0.0f; + return pos; +} + +__kernel void diffSignKernel(__global float* src1, + __global float* src2, + __global float* dst, + int src1_row, + int src1_col, + int dst_step, + int src1_step, + int src2_step) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if(x < src1_col && y < src1_row) + { + dst[y * dst_step + x] = diffSign(src1[y * src1_step + x], src2[y * src2_step + x]); + } + barrier(CLK_LOCAL_MEM_FENCE); +} + +__kernel void calcBtvRegularizationKernel(__global float* src, + __global float* dst, + int src_step, + int dst_step, + int src_row, + int src_col, + int ksize, + int channels, + __global float* c_btvRegWeights + ) +{ + int x = get_global_id(0) + ksize; + int y = get_global_id(1) + ksize; + + if ((y < src_row - ksize) && (x < src_col - ksize)) + { + if(channels == 1) + { + const float srcVal = src[y * src_step + x]; + float dstVal = 0.0f; + + for (int m = 0, count = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++count) + dstVal = dstVal + c_btvRegWeights[count] * (diffSign(srcVal, src[(y + m) * src_step + (x + l)]) - diffSign(src[(y - m) * src_step + (x - l)], srcVal)); + } + dst[y * dst_step + x] = dstVal; + }else if(channels == 3) + { + float3 srcVal; + srcVal.x = src[y * src_step + 3 * x + 0]; + srcVal.y = src[y * src_step + 3 * x + 1]; + srcVal.z = src[y * src_step + 3 * x + 2]; + + float3 dstVal; + dstVal.x = 0.0f; + dstVal.y = 0.0f; + dstVal.z = 0.0f; + + for (int m = 0, count = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++count) + { + float3 src1; + src1.x = src[(y + m) * src_step + 3 * (x + l) + 0]; + src1.y = src[(y + m) * src_step + 3 * (x + l) + 1]; + src1.z = src[(y + m) * src_step + 3 * (x + l) + 2]; + + float3 src2; + src2.x = src[(y - m) * src_step + 3 * (x - l) + 0]; + src2.y = src[(y - m) * src_step + 3 * (x - l) + 1]; + src2.z = src[(y - m) * src_step + 3 * (x - l) + 2]; + + dstVal = dstVal + c_btvRegWeights[count] * (diffSign3(srcVal, src1) - diffSign3(src2, srcVal)); + } + } + dst[y * dst_step + 3 * x + 0] = dstVal.x; + dst[y * dst_step + 3 * x + 1] = dstVal.y; + dst[y * dst_step + 3 * x + 2] = dstVal.z; + }else + { + float4 srcVal; + srcVal.x = src[y * src_step + 4 * x + 0];//r type =float + srcVal.y = src[y * src_step + 4 * x + 1];//g + srcVal.z = src[y * src_step + 4 * x + 2];//b + srcVal.w = src[y * src_step + 4 * x + 3];//a + + float4 dstVal; + dstVal.x = 0.0f; + dstVal.y = 0.0f; + dstVal.z = 0.0f; + dstVal.w = 0.0f; + + for (int m = 0, count = 0; m <= ksize; ++m) + { + for (int l = ksize; l + m >= 0; --l, ++count) + { + float4 src1; + src1.x = src[(y + m) * src_step + 4 * (x + l) + 0]; + src1.y = src[(y + m) * src_step + 4 * (x + l) + 1]; + src1.z = src[(y + m) * src_step + 4 * (x + l) + 2]; + src1.w = src[(y + m) * src_step + 4 * (x + l) + 3]; + + float4 src2; + src2.x = src[(y - m) * src_step + 4 * (x - l) + 0]; + src2.y = src[(y - m) * src_step + 4 * (x - l) + 1]; + src2.z = src[(y - m) * src_step + 4 * (x - l) + 2]; + src2.w = src[(y - m) * src_step + 4 * (x - l) + 3]; + + dstVal = dstVal + c_btvRegWeights[count] * (diffSign4(srcVal, src1) - diffSign4(src2, srcVal)); + + } + } + dst[y * dst_step + 4 * x + 0] = dstVal.x; + dst[y * dst_step + 4 * x + 1] = dstVal.y; + dst[y * dst_step + 4 * x + 2] = dstVal.z; + dst[y * dst_step + 4 * x + 3] = dstVal.w; + } + } +} \ No newline at end of file diff --git a/modules/superres/src/optical_flow.cpp b/modules/superres/src/optical_flow.cpp index 12642175d29057fd28dcd2b9c52922802f523a23..6947d190173d08d4df57172069270291c03122bb 100644 --- a/modules/superres/src/optical_flow.cpp +++ b/modules/superres/src/optical_flow.cpp @@ -719,3 +719,195 @@ Ptr cv::superres::createOptFlow_DualTVL1_GPU() } #endif // HAVE_OPENCV_GPU +#ifdef HAVE_OPENCV_OCL + +namespace +{ + class oclOpticalFlow : public DenseOpticalFlowExt + { + public: + explicit oclOpticalFlow(int work_type); + + void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2); + void collectGarbage(); + + protected: + virtual void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) = 0; + + private: + int work_type_; + cv::ocl::oclMat buf_[6]; + cv::ocl::oclMat u_, v_, flow_; + }; + + oclOpticalFlow::oclOpticalFlow(int work_type) : work_type_(work_type) + { + } + + void oclOpticalFlow::calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2) + { + ocl::oclMat& _frame0 = ocl::getOclMatRef(frame0); + ocl::oclMat& _frame1 = ocl::getOclMatRef(frame1); + ocl::oclMat& _flow1 = ocl::getOclMatRef(flow1); + ocl::oclMat& _flow2 = ocl::getOclMatRef(flow2); + + CV_Assert( _frame1.type() == _frame0.type() ); + CV_Assert( _frame1.size() == _frame0.size() ); + + cv::ocl::oclMat input0_ = convertToType(_frame0, work_type_, buf_[2], buf_[3]); + cv::ocl::oclMat input1_ = convertToType(_frame1, work_type_, buf_[4], buf_[5]); + + impl(input0_, input1_, u_, v_);//go to tvl1 algorithm + + u_.copyTo(_flow1); + v_.copyTo(_flow2); + } + + void oclOpticalFlow::collectGarbage() + { + for (int i = 0; i < 6; ++i) + buf_[i].release(); + u_.release(); + v_.release(); + flow_.release(); + } +} +/////////////////////////////////////////////////////////////////// +// PyrLK_OCL + +namespace +{ + class PyrLK_OCL : public oclOpticalFlow + { + public: + AlgorithmInfo* info() const; + + PyrLK_OCL(); + + void collectGarbage(); + + protected: + void impl(const ocl::oclMat& input0, const ocl::oclMat& input1, ocl::oclMat& dst1, ocl::oclMat& dst2); + + private: + int winSize_; + int maxLevel_; + int iterations_; + + ocl::PyrLKOpticalFlow alg_; + }; + + CV_INIT_ALGORITHM(PyrLK_OCL, "DenseOpticalFlowExt.PyrLK_OCL", + obj.info()->addParam(obj, "winSize", obj.winSize_); + obj.info()->addParam(obj, "maxLevel", obj.maxLevel_); + obj.info()->addParam(obj, "iterations", obj.iterations_)); + + PyrLK_OCL::PyrLK_OCL() : oclOpticalFlow(CV_8UC1) + { + winSize_ = alg_.winSize.width; + maxLevel_ = alg_.maxLevel; + iterations_ = alg_.iters; + } + + void PyrLK_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) + { + alg_.winSize.width = winSize_; + alg_.winSize.height = winSize_; + alg_.maxLevel = maxLevel_; + alg_.iters = iterations_; + + alg_.dense(input0, input1, dst1, dst2); + } + + void PyrLK_OCL::collectGarbage() + { + alg_.releaseMemory(); + oclOpticalFlow::collectGarbage(); + } +} + +Ptr cv::superres::createOptFlow_PyrLK_OCL() +{ + return new PyrLK_OCL; +} + +/////////////////////////////////////////////////////////////////// +// DualTVL1_OCL + +namespace +{ + class DualTVL1_OCL : public oclOpticalFlow + { + public: + AlgorithmInfo* info() const; + + DualTVL1_OCL(); + + void collectGarbage(); + + protected: + void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2); + + private: + double tau_; + double lambda_; + double theta_; + int nscales_; + int warps_; + double epsilon_; + int iterations_; + bool useInitialFlow_; + + ocl::OpticalFlowDual_TVL1_OCL alg_; + }; + + CV_INIT_ALGORITHM(DualTVL1_OCL, "DenseOpticalFlowExt.DualTVL1_OCL", + obj.info()->addParam(obj, "tau", obj.tau_); + obj.info()->addParam(obj, "lambda", obj.lambda_); + obj.info()->addParam(obj, "theta", obj.theta_); + obj.info()->addParam(obj, "nscales", obj.nscales_); + obj.info()->addParam(obj, "warps", obj.warps_); + obj.info()->addParam(obj, "epsilon", obj.epsilon_); + obj.info()->addParam(obj, "iterations", obj.iterations_); + obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow_)); + + DualTVL1_OCL::DualTVL1_OCL() : oclOpticalFlow(CV_8UC1) + { + tau_ = alg_.tau; + lambda_ = alg_.lambda; + theta_ = alg_.theta; + nscales_ = alg_.nscales; + warps_ = alg_.warps; + epsilon_ = alg_.epsilon; + iterations_ = alg_.iterations; + useInitialFlow_ = alg_.useInitialFlow; + } + + void DualTVL1_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) + { + alg_.tau = tau_; + alg_.lambda = lambda_; + alg_.theta = theta_; + alg_.nscales = nscales_; + alg_.warps = warps_; + alg_.epsilon = epsilon_; + alg_.iterations = iterations_; + alg_.useInitialFlow = useInitialFlow_; + + alg_(input0, input1, dst1, dst2); + + } + + void DualTVL1_OCL::collectGarbage() + { + alg_.collectGarbage(); + oclOpticalFlow::collectGarbage(); + } +} + +Ptr cv::superres::createOptFlow_DualTVL1_OCL() +{ + return new DualTVL1_OCL; +} + +#endif \ No newline at end of file diff --git a/modules/superres/src/precomp.hpp b/modules/superres/src/precomp.hpp index 82b591b3c2f207f0bd117f4429a091007e918358..51e6c336fc0d87a6d041d01533a47e773133bd38 100644 --- a/modules/superres/src/precomp.hpp +++ b/modules/superres/src/precomp.hpp @@ -65,6 +65,10 @@ #endif #endif +#ifdef HAVE_OPENCV_OCL + #include "opencv2/ocl/private/util.hpp" +#endif + #ifdef HAVE_OPENCV_HIGHGUI #include "opencv2/highgui/highgui.hpp" #endif diff --git a/modules/superres/test/test_superres.cpp b/modules/superres/test/test_superres.cpp index b4a546c6212698f81aa9f80b8d71cbf30bffda8b..9aa9a44bfbe5831f95105adc93c6c959b0a5bd63 100644 --- a/modules/superres/test/test_superres.cpp +++ b/modules/superres/test/test_superres.cpp @@ -274,5 +274,12 @@ TEST_F(SuperResolution, BTVL1_GPU) { RunTest(cv::superres::createSuperResolution_BTVL1_GPU()); } - #endif +#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL) +TEST_F(SuperResolution, BTVL1_OCL) +{ + std::vector infos; + cv::ocl::getDevice(infos); + RunTest(cv::superres::createSuperResolution_BTVL1_OCL()); +} +#endif