Merge pull request #1049 from pengx17:2.4_superres_ocl

886c009d · Roman Donchenko · OpenCV Buildbot · 09ec483d · 30239ad5 · 886c009d
12 changed file
--- a/modules/superres/CMakeLists.txt
+++ b/modules/superres/CMakeLists.txt
@@ -4,4 +4,4 @@ endif()

 set(the_description "Super Resolution")
 ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 -Wundef)
-ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui)
+ocv_define_module(superres opencv_imgproc opencv_video OPTIONAL opencv_gpu opencv_highgui opencv_ocl)
--- a/modules/superres/include/opencv2/superres/optical_flow.hpp
+++ b/modules/superres/include/opencv2/superres/optical_flow.hpp
@@ -63,10 +63,12 @@ namespace cv

        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_DualTVL1();
        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_DualTVL1_GPU();
+        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_DualTVL1_OCL();

        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_Brox_GPU();

        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_PyrLK_GPU();
+        CV_EXPORTS Ptr<DenseOpticalFlowExt> createOptFlow_PyrLK_OCL();
    }
 }


--- a/modules/superres/include/opencv2/superres/superres.hpp
+++ b/modules/superres/include/opencv2/superres/superres.hpp
@@ -92,6 +92,7 @@ namespace cv
        // Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.
        CV_EXPORTS Ptr<SuperResolution> createSuperResolution_BTVL1();
        CV_EXPORTS Ptr<SuperResolution> createSuperResolution_BTVL1_GPU();
+        CV_EXPORTS Ptr<SuperResolution> createSuperResolution_BTVL1_OCL();
    }
 }


--- a/modules/superres/perf/perf_superres_ocl.cpp
+++ b/modules/superres/perf/perf_superres_ocl.cpp
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#include "perf_precomp.hpp"
+
+#ifdef HAVE_OPENCL
+
+#include "opencv2/ocl/ocl.hpp"
+using namespace std;
+using namespace testing;
+using namespace perf;
+using namespace cv;
+using namespace cv::superres;
+
+namespace
+{
+    class OneFrameSource_OCL : public FrameSource
+    {
+    public:
+        explicit OneFrameSource_OCL(const ocl::oclMat& frame) : frame_(frame) {}
+
+        void nextFrame(OutputArray frame)
+        {
+            ocl::getOclMatRef(frame) = frame_;
+        }
+        void reset()
+        {
+        }
+
+    private:
+        ocl::oclMat frame_;
+    };
+
+
+    class ZeroOpticalFlowOCL : public DenseOpticalFlowExt
+    {
+    public:
+        void calc(InputArray frame0, InputArray, OutputArray flow1, OutputArray flow2)
+        {
+            ocl::oclMat& frame0_ = ocl::getOclMatRef(frame0);
+            ocl::oclMat& flow1_ = ocl::getOclMatRef(flow1);
+            ocl::oclMat& flow2_ = ocl::getOclMatRef(flow2);
+
+            cv::Size size = frame0_.size();
+
+            if(!flow2.needed())
+            {
+                flow1_.create(size, CV_32FC2);
+                flow1_.setTo(Scalar::all(0));
+            }
+            else
+            {
+                flow1_.create(size, CV_32FC1);
+                flow2_.create(size, CV_32FC1);
+
+                flow1_.setTo(Scalar::all(0));
+                flow2_.setTo(Scalar::all(0));
+            }
+        }
+
+        void collectGarbage()
+        {
+        }
+    };
+}
+
+PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL,
+    Combine(Values(szSmall64, szSmall128),
+    Values(MatType(CV_8UC1), MatType(CV_8UC3))))
+{
+    std::vector<cv::ocl::Info>info;
+    cv::ocl::getDevice(info);
+
+    declare.time(5 * 60);
+
+    const Size size = get<0>(GetParam());
+    const int type = get<1>(GetParam());
+
+    Mat frame(size, type);
+    declare.in(frame, WARMUP_RNG);
+
+    ocl::oclMat frame_ocl;
+    frame_ocl.upload(frame);
+
+
+    const int scale = 2;
+    const int iterations = 50;
+    const int temporalAreaRadius = 1;
+    Ptr<DenseOpticalFlowExt> opticalFlowOcl(new ZeroOpticalFlowOCL);
+
+    Ptr<SuperResolution> superRes_ocl = createSuperResolution_BTVL1_OCL();
+
+    superRes_ocl->set("scale", scale);
+    superRes_ocl->set("iterations", iterations);
+    superRes_ocl->set("temporalAreaRadius", temporalAreaRadius);
+    superRes_ocl->set("opticalFlow", opticalFlowOcl);
+
+    superRes_ocl->setInput(new OneFrameSource_OCL(frame_ocl));
+
+    ocl::oclMat dst_ocl;
+    superRes_ocl->nextFrame(dst_ocl);
+
+    TEST_CYCLE_N(10) superRes_ocl->nextFrame(dst_ocl);
+    frame_ocl.release();
+    CPU_SANITY_CHECK(dst_ocl);
+}
+#endif
--- a/modules/superres/src/btv_l1_ocl.cpp
+++ b/modules/superres/src/btv_l1_ocl.cpp
--- a/modules/superres/src/frame_source.cpp
+++ b/modules/superres/src/frame_source.cpp
@@ -119,11 +119,23 @@ namespace
        {
            vc_ >> _frame.getMatRef();
        }
-        else
+        else if(_frame.kind() == _InputArray::GPU_MAT)
        {
            vc_ >> frame_;
            arrCopy(frame_, _frame);
        }
+        else if(_frame.kind() == _InputArray::OCL_MAT)
+        {
+            vc_ >> frame_;
+            if(!frame_.empty())
+            {
+                arrCopy(frame_, _frame);
+            }
+        }
+        else
+        {
+            //should never get here
+        }
    }

    class VideoFrameSource : public CaptureFrameSource

--- a/modules/superres/src/input_array_utility.cpp
+++ b/modules/superres/src/input_array_utility.cpp
@@ -125,30 +125,59 @@ namespace
    {
        src.getGpuMat().copyTo(dst.getGpuMatRef());
    }
+#ifdef HAVE_OPENCV_OCL
+    void ocl2mat(InputArray src, OutputArray dst)
+    {
+        dst.getMatRef() = (Mat)ocl::getOclMatRef(src);
+    }
+    void mat2ocl(InputArray src, OutputArray dst)
+    {
+        Mat m = src.getMat();
+        ocl::getOclMatRef(dst) = (ocl::oclMat)m;
+    }
+    void ocl2ocl(InputArray src, OutputArray dst)
+    {
+        ocl::getOclMatRef(src).copyTo(ocl::getOclMatRef(dst));
+    }
+#else
+    void ocl2mat(InputArray, OutputArray)
+    {
+        CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");;
+    }
+    void mat2ocl(InputArray, OutputArray)
+    {
+        CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");;
+    }
+    void ocl2ocl(InputArray, OutputArray)
+    {
+        CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform");
+    }
+#endif
 }

 void cv::superres::arrCopy(InputArray src, OutputArray dst)
 {
    typedef void (*func_t)(InputArray src, OutputArray dst);
-    static const func_t funcs[10][10] =
+    static const func_t funcs[11][11] =
    {
        {0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
-        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu},
-        {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr},
-        {0, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr},
-        {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, arr2tex, gpu2gpu}
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl},
+        {0, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, mat2mat, arr2buf, arr2tex, mat2gpu, mat2ocl},
+        {0, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, buf2arr, 0      },
+        {0, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, tex2arr, 0      },
+        {0, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, gpu2mat, arr2buf, arr2tex, gpu2gpu, 0      },
+        {0, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, ocl2mat, 0,       0,       0,       ocl2ocl}
    };

    const int src_kind = src.kind() >> _InputArray::KIND_SHIFT;
    const int dst_kind = dst.kind() >> _InputArray::KIND_SHIFT;

-    CV_DbgAssert( src_kind >= 0 && src_kind < 10 );
-    CV_DbgAssert( dst_kind >= 0 && dst_kind < 10 );
+    CV_DbgAssert( src_kind >= 0 && src_kind < 11 );
+    CV_DbgAssert( dst_kind >= 0 && dst_kind < 11 );

    const func_t func = funcs[src_kind][dst_kind];
    CV_DbgAssert( func != 0 );
@@ -190,7 +219,6 @@ namespace
            break;
        }
    }
-
    void convertToDepth(InputArray src, OutputArray dst, int depth)
    {
        CV_Assert( src.depth() <= CV_64F );
@@ -271,3 +299,70 @@ GpuMat cv::superres::convertToType(const GpuMat& src, int type, GpuMat& buf0, Gp
    convertToDepth(buf0, buf1, depth);
    return buf1;
 }
+#ifdef HAVE_OPENCV_OCL
+namespace
+{
+    // TODO(pengx17): remove these overloaded functions until IntputArray fully supports oclMat
+    void convertToCn(const ocl::oclMat& src, ocl::oclMat& dst, int cn)
+    {
+        CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
+        CV_Assert( cn == 1 || cn == 3 || cn == 4 );
+
+        static const int codes[5][5] =
+        {
+            {-1, -1, -1, -1, -1},
+            {-1, -1, -1, COLOR_GRAY2BGR, COLOR_GRAY2BGRA},
+            {-1, -1, -1, -1, -1},
+            {-1, COLOR_BGR2GRAY, -1, -1, COLOR_BGR2BGRA},
+            {-1, COLOR_BGRA2GRAY, -1, COLOR_BGRA2BGR, -1},
+        };
+
+        const int code = codes[src.channels()][cn];
+        CV_DbgAssert( code >= 0 );
+
+        ocl::cvtColor(src, dst, code, cn);
+    }
+    void convertToDepth(const ocl::oclMat& src, ocl::oclMat& dst, int depth)
+    {
+        CV_Assert( src.depth() <= CV_64F );
+        CV_Assert( depth == CV_8U || depth == CV_32F );
+
+        static const double maxVals[] =
+        {
+            std::numeric_limits<uchar>::max(),
+            std::numeric_limits<schar>::max(),
+            std::numeric_limits<ushort>::max(),
+            std::numeric_limits<short>::max(),
+            std::numeric_limits<int>::max(),
+            1.0,
+            1.0,
+        };
+        const double scale = maxVals[depth] / maxVals[src.depth()];
+        src.convertTo(dst, depth, scale);
+    }
+}
+ocl::oclMat cv::superres::convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1)
+{
+    if (src.type() == type)
+        return src;
+
+    const int depth = CV_MAT_DEPTH(type);
+    const int cn = CV_MAT_CN(type);
+
+    if (src.depth() == depth)
+    {
+        convertToCn(src, buf0, cn);
+        return buf0;
+    }
+
+    if (src.channels() == cn)
+    {
+        convertToDepth(src, buf1, depth);
+        return buf1;
+    }
+
+    convertToCn(src, buf0, cn);
+    convertToDepth(buf0, buf1, depth);
+    return buf1;
+}
+#endif
--- a/modules/superres/src/input_array_utility.hpp
+++ b/modules/superres/src/input_array_utility.hpp
@@ -45,6 +45,9 @@

 #include "opencv2/core/core.hpp"
 #include "opencv2/core/gpumat.hpp"
+#ifdef HAVE_OPENCV_OCL
+#include "opencv2/ocl/ocl.hpp"
+#endif

 namespace cv
 {
@@ -57,6 +60,10 @@ namespace cv

        CV_EXPORTS Mat convertToType(const Mat& src, int type, Mat& buf0, Mat& buf1);
        CV_EXPORTS gpu::GpuMat convertToType(const gpu::GpuMat& src, int type, gpu::GpuMat& buf0, gpu::GpuMat& buf1);
+
+#ifdef HAVE_OPENCV_OCL
+        CV_EXPORTS ocl::oclMat convertToType(const ocl::oclMat& src, int type, ocl::oclMat& buf0, ocl::oclMat& buf1);
+#endif
    }
 }


--- a/modules/superres/src/opencl/superres_btvl1.cl
+++ b/modules/superres/src/opencl/superres_btvl1.cl
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Jin Ma jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other oclMaterials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+__kernel void buildMotionMapsKernel(__global float* forwardMotionX,
+    __global float* forwardMotionY,
+    __global float* backwardMotionX,
+    __global float* backwardMotionY,
+    __global float* forwardMapX,
+    __global float* forwardMapY,
+    __global float* backwardMapX,
+    __global float* backwardMapY,
+    int forwardMotionX_row,
+    int forwardMotionX_col,
+    int forwardMotionX_step,
+    int forwardMotionY_step,
+    int backwardMotionX_step,
+    int backwardMotionY_step,
+    int forwardMapX_step,
+    int forwardMapY_step,
+    int backwardMapX_step,
+    int backwardMapY_step
+    )
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < forwardMotionX_col && y < forwardMotionX_row)
+    {
+        float fx = forwardMotionX[y * forwardMotionX_step + x];
+        float fy = forwardMotionY[y * forwardMotionY_step + x];
+
+        float bx = backwardMotionX[y * backwardMotionX_step + x];
+        float by = backwardMotionY[y * backwardMotionY_step + x];
+
+        forwardMapX[y * forwardMapX_step + x] = x + bx;
+        forwardMapY[y * forwardMapY_step + x] = y + by;
+
+        backwardMapX[y * backwardMapX_step + x] = x + fx;
+        backwardMapY[y * backwardMapY_step + x] = y + fy;
+    }
+}
+
+__kernel void upscaleKernel(__global float* src,
+    __global float* dst,
+    int src_step,
+    int dst_step,
+    int src_row,
+    int src_col,
+    int scale,
+    int channels
+    )
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < src_col && y < src_row)
+    {
+        if(channels == 1)
+        {
+            dst[y * scale * dst_step + x * scale] = src[y * src_step + x];
+        }else if(channels == 3)
+        {
+            dst[y * channels * scale * dst_step + 3 * x * scale + 0] = src[y * channels * src_step + 3 * x + 0];
+            dst[y * channels * scale * dst_step + 3 * x * scale + 1] = src[y * channels * src_step + 3 * x + 1];
+            dst[y * channels * scale * dst_step + 3 * x * scale + 2] = src[y * channels * src_step + 3 * x + 2];
+        }else
+        {
+            dst[y * channels * scale * dst_step + 4 * x * scale + 0] = src[y * channels * src_step + 4 * x + 0];
+            dst[y * channels * scale * dst_step + 4 * x * scale + 1] = src[y * channels * src_step + 4 * x + 1];
+            dst[y * channels * scale * dst_step + 4 * x * scale + 2] = src[y * channels * src_step + 4 * x + 2];
+            dst[y * channels * scale * dst_step + 4 * x * scale + 3] = src[y * channels * src_step + 4 * x + 3];            
+        }
+    }
+}
+
+
+float diffSign(float a, float b)
+{
+    return a > b ? 1.0f : a < b ? -1.0f : 0.0f;
+}
+
+float3 diffSign3(float3 a, float3 b)
+{
+    float3 pos;
+    pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f;
+    pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f;
+    pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f;
+    return pos;
+}
+
+float4 diffSign4(float4 a, float4 b)
+{
+    float4 pos;
+    pos.x = a.x > b.x ? 1.0f : a.x < b.x ? -1.0f : 0.0f;
+    pos.y = a.y > b.y ? 1.0f : a.y < b.y ? -1.0f : 0.0f;
+    pos.z = a.z > b.z ? 1.0f : a.z < b.z ? -1.0f : 0.0f;
+    pos.w = 0.0f;
+    return pos;
+}
+
+__kernel void diffSignKernel(__global float* src1,
+    __global float* src2,
+    __global float* dst,
+    int src1_row,
+    int src1_col,
+    int dst_step,
+    int src1_step,
+    int src2_step)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if(x < src1_col && y < src1_row)
+    {
+        dst[y * dst_step + x] = diffSign(src1[y * src1_step + x], src2[y * src2_step + x]);
+    }
+    barrier(CLK_LOCAL_MEM_FENCE);
+}
+
+__kernel void calcBtvRegularizationKernel(__global float* src,
+    __global float* dst,
+    int src_step,
+    int dst_step,
+    int src_row,
+    int src_col,
+    int ksize,
+    int channels,
+    __global float* c_btvRegWeights
+    )
+{
+    int x = get_global_id(0) + ksize;
+    int y = get_global_id(1) + ksize;
+
+    if ((y < src_row - ksize) && (x < src_col - ksize))
+    {
+        if(channels == 1)
+        {
+            const float srcVal = src[y * src_step + x];
+            float dstVal = 0.0f;
+
+            for (int m = 0, count = 0; m <= ksize; ++m)
+            {
+                for (int l = ksize; l + m >= 0; --l, ++count)
+                    dstVal = dstVal + c_btvRegWeights[count] * (diffSign(srcVal, src[(y + m) * src_step + (x + l)]) - diffSign(src[(y - m) * src_step + (x - l)], srcVal));
+            }
+            dst[y * dst_step + x] = dstVal;
+        }else if(channels == 3)
+        {
+            float3 srcVal;
+            srcVal.x = src[y * src_step + 3 * x + 0];
+            srcVal.y = src[y * src_step + 3 * x + 1];
+            srcVal.z = src[y * src_step + 3 * x + 2];
+
+            float3 dstVal;
+            dstVal.x = 0.0f;
+            dstVal.y = 0.0f;
+            dstVal.z = 0.0f;
+
+            for (int m = 0, count = 0; m <= ksize; ++m)
+            {
+                for (int l = ksize; l + m >= 0; --l, ++count)
+                {
+                    float3 src1;
+                    src1.x = src[(y + m) * src_step + 3 * (x + l) + 0];
+                    src1.y = src[(y + m) * src_step + 3 * (x + l) + 1];
+                    src1.z = src[(y + m) * src_step + 3 * (x + l) + 2];
+
+                    float3 src2;
+                    src2.x = src[(y - m) * src_step + 3 * (x - l) + 0];
+                    src2.y = src[(y - m) * src_step + 3 * (x - l) + 1];
+                    src2.z = src[(y - m) * src_step + 3 * (x - l) + 2];
+
+                    dstVal = dstVal + c_btvRegWeights[count] * (diffSign3(srcVal, src1) - diffSign3(src2, srcVal));
+                }
+            }
+            dst[y * dst_step + 3 * x + 0] = dstVal.x;
+            dst[y * dst_step + 3 * x + 1] = dstVal.y;
+            dst[y * dst_step + 3 * x + 2] = dstVal.z;
+        }else
+        {
+            float4 srcVal;
+            srcVal.x = src[y * src_step + 4 * x + 0];//r type =float
+            srcVal.y = src[y * src_step + 4 * x + 1];//g
+            srcVal.z = src[y * src_step + 4 * x + 2];//b
+            srcVal.w = src[y * src_step + 4 * x + 3];//a
+
+            float4 dstVal;
+            dstVal.x = 0.0f;
+            dstVal.y = 0.0f;
+            dstVal.z = 0.0f;
+            dstVal.w = 0.0f;
+
+            for (int m = 0, count = 0; m <= ksize; ++m)
+            {
+                for (int l = ksize; l + m >= 0; --l, ++count)
+                {
+                    float4 src1;
+                    src1.x = src[(y + m) * src_step + 4 * (x + l) + 0];
+                    src1.y = src[(y + m) * src_step + 4 * (x + l) + 1];
+                    src1.z = src[(y + m) * src_step + 4 * (x + l) + 2];
+                    src1.w = src[(y + m) * src_step + 4 * (x + l) + 3];
+
+                    float4 src2;
+                    src2.x = src[(y - m) * src_step + 4 * (x - l) + 0];
+                    src2.y = src[(y - m) * src_step + 4 * (x - l) + 1];
+                    src2.z = src[(y - m) * src_step + 4 * (x - l) + 2];
+                    src2.w = src[(y - m) * src_step + 4 * (x - l) + 3];
+
+                    dstVal = dstVal + c_btvRegWeights[count] * (diffSign4(srcVal, src1) - diffSign4(src2, srcVal));
+
+                }
+            }
+            dst[y * dst_step + 4 * x + 0] = dstVal.x;
+            dst[y * dst_step + 4 * x + 1] = dstVal.y;
+            dst[y * dst_step + 4 * x + 2] = dstVal.z;
+            dst[y * dst_step + 4 * x + 3] = dstVal.w;
+        }
+    }
+}
\ No newline at end of file
--- a/modules/superres/src/optical_flow.cpp
+++ b/modules/superres/src/optical_flow.cpp
@@ -719,3 +719,195 @@ Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_GPU()
 }

 #endif // HAVE_OPENCV_GPU
+#ifdef HAVE_OPENCV_OCL
+
+namespace
+{
+    class oclOpticalFlow : public DenseOpticalFlowExt
+    {
+    public:
+        explicit oclOpticalFlow(int work_type);
+
+        void calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2);
+        void collectGarbage();
+
+    protected:
+        virtual void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2) = 0;
+
+    private:
+        int work_type_;
+        cv::ocl::oclMat buf_[6];
+        cv::ocl::oclMat u_, v_, flow_;
+    };
+
+    oclOpticalFlow::oclOpticalFlow(int work_type) : work_type_(work_type)
+    {
+    }
+
+    void oclOpticalFlow::calc(InputArray frame0, InputArray frame1, OutputArray flow1, OutputArray flow2)
+    {
+        ocl::oclMat& _frame0 = ocl::getOclMatRef(frame0);
+        ocl::oclMat& _frame1 = ocl::getOclMatRef(frame1);
+        ocl::oclMat& _flow1  = ocl::getOclMatRef(flow1);
+        ocl::oclMat& _flow2  = ocl::getOclMatRef(flow2);
+
+        CV_Assert( _frame1.type() == _frame0.type() );
+        CV_Assert( _frame1.size() == _frame0.size() );
+
+        cv::ocl::oclMat input0_ = convertToType(_frame0, work_type_, buf_[2], buf_[3]);
+        cv::ocl::oclMat input1_ = convertToType(_frame1, work_type_, buf_[4], buf_[5]);
+
+        impl(input0_, input1_, u_, v_);//go to tvl1 algorithm
+
+        u_.copyTo(_flow1);
+        v_.copyTo(_flow2);
+    }
+
+    void oclOpticalFlow::collectGarbage()
+    {
+        for (int i = 0; i < 6; ++i)
+            buf_[i].release();
+        u_.release();
+        v_.release();
+        flow_.release();
+    }
+}
+///////////////////////////////////////////////////////////////////
+// PyrLK_OCL
+
+namespace
+{
+    class PyrLK_OCL : public oclOpticalFlow
+    {
+    public:
+        AlgorithmInfo* info() const;
+
+        PyrLK_OCL();
+
+        void collectGarbage();
+
+    protected:
+        void impl(const ocl::oclMat& input0, const ocl::oclMat& input1, ocl::oclMat& dst1, ocl::oclMat& dst2);
+
+    private:
+        int winSize_;
+        int maxLevel_;
+        int iterations_;
+
+        ocl::PyrLKOpticalFlow alg_;
+    };
+
+    CV_INIT_ALGORITHM(PyrLK_OCL, "DenseOpticalFlowExt.PyrLK_OCL",
+        obj.info()->addParam(obj, "winSize", obj.winSize_);
+    obj.info()->addParam(obj, "maxLevel", obj.maxLevel_);
+    obj.info()->addParam(obj, "iterations", obj.iterations_));
+
+    PyrLK_OCL::PyrLK_OCL() : oclOpticalFlow(CV_8UC1)
+    {
+        winSize_ = alg_.winSize.width;
+        maxLevel_ = alg_.maxLevel;
+        iterations_ = alg_.iters;
+    }
+
+    void PyrLK_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2)
+    {
+        alg_.winSize.width = winSize_;
+        alg_.winSize.height = winSize_;
+        alg_.maxLevel = maxLevel_;
+        alg_.iters = iterations_;
+
+        alg_.dense(input0, input1, dst1, dst2);
+    }
+
+    void PyrLK_OCL::collectGarbage()
+    {
+        alg_.releaseMemory();
+        oclOpticalFlow::collectGarbage();
+    }
+}
+
+Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_PyrLK_OCL()
+{
+    return new PyrLK_OCL;
+}
+
+///////////////////////////////////////////////////////////////////
+// DualTVL1_OCL
+
+namespace
+{
+    class DualTVL1_OCL : public oclOpticalFlow
+    {
+    public:
+        AlgorithmInfo* info() const;
+
+        DualTVL1_OCL();
+
+        void collectGarbage();
+
+    protected:
+        void impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2);
+
+    private:
+        double tau_;
+        double lambda_;
+        double theta_;
+        int nscales_;
+        int warps_;
+        double epsilon_;
+        int iterations_;
+        bool useInitialFlow_;
+
+        ocl::OpticalFlowDual_TVL1_OCL alg_;
+    };
+
+    CV_INIT_ALGORITHM(DualTVL1_OCL, "DenseOpticalFlowExt.DualTVL1_OCL",
+    obj.info()->addParam(obj, "tau", obj.tau_);
+    obj.info()->addParam(obj, "lambda", obj.lambda_);
+    obj.info()->addParam(obj, "theta", obj.theta_);
+    obj.info()->addParam(obj, "nscales", obj.nscales_);
+    obj.info()->addParam(obj, "warps", obj.warps_);
+    obj.info()->addParam(obj, "epsilon", obj.epsilon_);
+    obj.info()->addParam(obj, "iterations", obj.iterations_);
+    obj.info()->addParam(obj, "useInitialFlow", obj.useInitialFlow_));
+
+    DualTVL1_OCL::DualTVL1_OCL() : oclOpticalFlow(CV_8UC1)
+    {
+        tau_ = alg_.tau;
+        lambda_ = alg_.lambda;
+        theta_ = alg_.theta;
+        nscales_ = alg_.nscales;
+        warps_ = alg_.warps;
+        epsilon_ = alg_.epsilon;
+        iterations_ = alg_.iterations;
+        useInitialFlow_ = alg_.useInitialFlow;
+    }
+
+    void DualTVL1_OCL::impl(const cv::ocl::oclMat& input0, const cv::ocl::oclMat& input1, cv::ocl::oclMat& dst1, cv::ocl::oclMat& dst2)
+    {
+        alg_.tau = tau_;
+        alg_.lambda = lambda_;
+        alg_.theta = theta_;
+        alg_.nscales = nscales_;
+        alg_.warps = warps_;
+        alg_.epsilon = epsilon_;
+        alg_.iterations = iterations_;
+        alg_.useInitialFlow = useInitialFlow_;
+
+        alg_(input0, input1, dst1, dst2);
+
+    }
+
+    void DualTVL1_OCL::collectGarbage()
+    {
+        alg_.collectGarbage();
+        oclOpticalFlow::collectGarbage();
+    }
+}
+
+Ptr<DenseOpticalFlowExt> cv::superres::createOptFlow_DualTVL1_OCL()
+{
+    return new DualTVL1_OCL;
+}
+
+#endif
\ No newline at end of file
--- a/modules/superres/src/precomp.hpp
+++ b/modules/superres/src/precomp.hpp
@@ -65,6 +65,10 @@
    #endif
 #endif

+#ifdef HAVE_OPENCV_OCL
+    #include "opencv2/ocl/private/util.hpp"
+#endif
+
 #ifdef HAVE_OPENCV_HIGHGUI
    #include "opencv2/highgui/highgui.hpp"
 #endif

--- a/modules/superres/test/test_superres.cpp
+++ b/modules/superres/test/test_superres.cpp
@@ -274,5 +274,12 @@ TEST_F(SuperResolution, BTVL1_GPU)
 {
    RunTest(cv::superres::createSuperResolution_BTVL1_GPU());
 }
-
 #endif
+#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL)
+TEST_F(SuperResolution, BTVL1_OCL)
+{
+    std::vector<cv::ocl::Info> infos;
+    cv::ocl::getDevice(infos);
+    RunTest(cv::superres::createSuperResolution_BTVL1_OCL());
+}
+#endif