Merge pull request #1740 from ilya-lavrenov:ocl_corners

a3fa7a24 · Andrey Pavlenko · OpenCV Buildbot · 95767676 · e7e7e04d · a3fa7a24
7 changed file
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -48,22 +48,22 @@
 #define T_MEAN_VAR float
 #define CONVERT_TYPE convert_uchar_sat
 #define F_ZERO (0.0f)
-float cvt(uchar val)
+inline float cvt(uchar val)
 {
    return val;
 }

-float sqr(float val)
+inline float sqr(float val)
 {
    return val * val;
 }

-float sum(float val)
+inline float sum(float val)
 {
    return val;
 }

-float clamp1(float var, float learningRate, float diff, float minVar)
+static float clamp1(float var, float learningRate, float diff, float minVar)
 {
    return fmax(var + learningRate * (diff * diff - var), minVar);
 }
@@ -72,7 +72,7 @@ float clamp1(float var, float learningRate, float diff, float minVar)
 #define T_MEAN_VAR float4
 #define CONVERT_TYPE convert_uchar4_sat
 #define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
-float4 cvt(const uchar4 val)
+inline float4 cvt(const uchar4 val)
 {
    float4 result;
    result.x = val.x;
@@ -83,17 +83,17 @@ float4 cvt(const uchar4 val)
    return result;
 }

-float sqr(const float4 val)
+inline float sqr(const float4 val)
 {
    return val.x * val.x + val.y * val.y + val.z * val.z;
 }

-float sum(const float4 val)
+inline float sum(const float4 val)
 {
    return (val.x + val.y + val.z);
 }

-float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
+static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
 {
    float4 result;
    result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
@@ -116,14 +116,14 @@ typedef struct
    uchar c_shadowVal;
 }con_srtuct_t;

-void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
 {
    float val = ptr[(k * rows + y) * ptr_step + x];
    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
 }

-void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
 {
    float4 val = ptr[(k * rows + y) * ptr_step + x];
    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
@@ -412,7 +412,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __glob

            if (_weight < -prune)
            {
-                _weight = 0.0;
+                _weight = 0.0f;
                nmodes--;
            }


--- a/modules/ocl/src/opencl/haarobjectdetect.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect.cl
@@ -292,7 +292,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
    for(int scalei = 0; scalei <loopcount; scalei++)
    {
        int4 scaleinfo1= info[scalei];
-        int width = (scaleinfo1.x & 0xffff0000) >> 16;
        int height = scaleinfo1.x & 0xffff;
        int grpnumperline =(scaleinfo1.y & 0xffff0000) >> 16;
        int totalgrp = scaleinfo1.y & 0xffff;

--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
@@ -136,8 +136,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
    {
        int4 scaleinfo1;
        scaleinfo1 = info[scalei];
-        int width = (scaleinfo1.x & 0xffff0000) >> 16;
-        int height = scaleinfo1.x & 0xffff;
        int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
        int totalgrp = scaleinfo1.y & 0xffff;
        float factor = as_float(scaleinfo1.w);

--- a/modules/ocl/src/opencl/imgproc_calcHarris.cl
+++ b/modules/ocl/src/opencl/imgproc_calcHarris.cl
@@ -125,10 +125,12 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
        int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
        float dx_s = dx_con ? Dx[indexDx] : 0.0f;
        dx_data[i] = dx_s;
+
        bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
        int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
-        float dy_s = dx_con ? Dy[indexDy] : 0.0f;
+        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
        dy_data[i] = dy_s;
+
        data[0][i] = dx_data[i] * dx_data[i];
        data[1][i] = dx_data[i] * dy_data[i];
        data[2][i] = dy_data[i] * dy_data[i];

--- a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
+++ b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
@@ -124,10 +124,12 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
        int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
        float dx_s = dx_con ? Dx[indexDx] : 0.0f;
        dx_data[i] = dx_s;
+
        bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
        int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
-        float dy_s = dx_con ? Dy[indexDy] : 0.0f;
+        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
        dy_data[i] = dy_s;
+
        data[0][i] = dx_data[i] * dx_data[i];
        data[1][i] = dx_data[i] * dy_data[i];
        data[2][i] = dy_data[i] * dy_data[i];

--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -69,23 +69,16 @@ __global float* dx, __global float* dy, int dx_step)

 }

-float bicubicCoeff(float x_)
+static float bicubicCoeff(float x_)
 {

    float x = fabs(x_);
    if (x <= 1.0f)
-    {
        return x * x * (1.5f * x - 2.5f) + 1.0f;
-    }
    else if (x < 2.0f)
-    {
        return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
-    }
    else
-    {
        return 0.0f;
-    }
-
 }

 __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
@@ -170,12 +163,10 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c

 }

-float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
+static float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
 {
    int i0 = clamp(x, 0, cols - 1);
    int j0 = clamp(y, 0, rows - 1);
-    int i1 = clamp(x + 1, 0, cols - 1);
-    int j1 = clamp(y + 1, 0, rows - 1);

    return image[j0 * elemCntPerRow + i0];
 }
@@ -303,7 +294,7 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,

 }

-float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
+static float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
 {

    if (x > 0 && y > 0)
@@ -407,5 +398,4 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
            error[y * I1wx_step + x] = n1 + n2;
        }
    }
-
 }
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -212,11 +212,19 @@ struct CornerTestBase :
        Mat image = readImageType("gpu/stereobm/aloe-L.png", type);
        ASSERT_FALSE(image.empty());

+        bool isFP = CV_MAT_DEPTH(type) >= CV_32F;
+        float val = 255.0f;
+        if (isFP)
+        {
+            image.convertTo(image, -1, 1.0 / 255);
+            val /= 255.0f;
+        }
+
        Size roiSize = image.size();
        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);

        Size wholeSize = Size(roiSize.width + srcBorder.lef + srcBorder.rig, roiSize.height + srcBorder.top + srcBorder.bot);
-        src = randomMat(wholeSize, type, -255, 255, false);
+        src = randomMat(wholeSize, type, -val, val, false);
        src_roi = src(Rect(srcBorder.lef, srcBorder.top, roiSize.width, roiSize.height));
        image.copyTo(src_roi);

@@ -527,7 +535,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerMinEigenVal, Combine(
                            Bool()));

 INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
-                            Values((MatType)CV_8UC1), // TODO does not work properly with CV_32FC1
+                            Values((MatType)CV_8UC1, CV_32FC1),
                            Values(3, 5),
                            Values( (int)BORDER_CONSTANT, (int)BORDER_REPLICATE, (int)BORDER_REFLECT, (int)BORDER_REFLECT_101),
                            Bool()));