added connectivityMask calculation function

0bf10c9a · marina.kolpakova · a9764dd9 · 0bf10c9a · 0bf10c9a · 0bf10c9a
4 changed file
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -917,8 +917,11 @@ CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTra
                         GpuMat& labels,
                         GpuMat& buf, Stream& stream = Stream::Null());

+//! compute mask for Generalized Flood fill componetns labeling.
+CV_EXPORTS void connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null());
+
 //! performs connected componnents labeling.
-CV_EXPORTS void labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi, Stream& stream = Stream::Null());
+CV_EXPORTS void labelComponents(const GpuMat& mask, GpuMat& components, Stream& stream = Stream::Null());

 ////////////////////////////////// Histograms //////////////////////////////////


--- a/modules/gpu/src/cuda/ccomponetns.cu
+++ b/modules/gpu/src/cuda/ccomponetns.cu
@@ -65,32 +65,108 @@ namespace cv { namespace gpu { namespace device
            TILE_ROWS = CTA_SIZE_Y * TPB_Y
        };

+        template<typename T> struct IntervalsTraits
+        {
+            typedef T elem_type;
+        };
+
+        template<> struct IntervalsTraits<unsigned char>
+        {
+            typedef int dist_type;
+            enum {ch = 1};
+        };
+
+        template<> struct IntervalsTraits<uchar3>
+        {
+            typedef int3 dist_type;
+            enum {ch = 3};
+        };
+
+        template<> struct IntervalsTraits<uchar4>
+        {
+            typedef int3 dist_type;
+            enum {ch = 4};
+        };
+
+        template<> struct IntervalsTraits<unsigned short>
+        {
+            typedef int dist_type;
+            enum {ch = 1};
+        };
+
+        template<> struct IntervalsTraits<ushort3>
+        {
+            typedef int3 dist_type;
+            enum {ch = 3};
+        };
+
+        template<> struct IntervalsTraits<ushort4>
+        {
+            typedef int4 dist_type;
+            enum {ch = 4};
+        };
+
+        template<> struct IntervalsTraits<float>
+        {
+            typedef float dist_type;
+            enum {ch = 1};
+        };
+
+        template<> struct IntervalsTraits<int>
+        {
+            typedef int dist_type;
+            enum {ch = 1};
+        };
+
        typedef unsigned char component;
        enum Edges { UP = 1, DOWN = 2, LEFT = 4, RIGHT = 8, EMPTY = 0xF0 };

-        template<typename T>
-        struct InInterval
+        template<typename T, int CH> struct InInterval {};
+
+        template<typename T> struct InInterval<T, 1>
        {
-            __host__ __device__ __forceinline__ InInterval(const T& _lo, const T& _hi) : lo(-_lo), hi(_hi) {};
+            __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi) : lo(-_lo.x), hi(_hi.x) {};
            T lo, hi;

-            __device__ __forceinline__ bool operator() (const T& a, const T& b) const
+            template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
            {
                T d = a - b;
                return lo <= d && d <= hi;
            }
+        };
+
+        template<typename T> struct InInterval<T, 3>
+        {
+            __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi){};
+            T lo, hi;
+
+            template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
+            {
+                return true;
+            }
+        };
+
+        template<typename T> struct InInterval<T, 4>
+        {
+            __host__ __device__ __forceinline__ InInterval(const float4& _lo, const float4& _hi){};
+            T lo, hi;

+            template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
+            {
+                return true;
+            }
        };

-        template<typename F>
-        __global__ void computeConnectivity(const DevMem2D image, DevMem2D components, F connected)
+
+        template<typename T, typename F>
+        __global__ void computeConnectivity(const DevMem2D_<T> image, DevMem2D components, F connected)
        {
            int x = threadIdx.x + blockIdx.x * blockDim.x;
            int y = threadIdx.y + blockIdx.y * blockDim.y;

            if (x >= image.cols || y >= image.rows) return;

-            int intensity = image(y, x);
+            T intensity = image(y, x);
            component c = 0;

            if ( x > 0 && connected(intensity, image(y, x - 1)))
@@ -108,18 +184,31 @@ namespace cv { namespace gpu { namespace device
            components(y, x) = c;
        }

-        void computeEdges(const DevMem2D& image, DevMem2D edges, const int lo, const int hi, cudaStream_t stream)
+        template< typename T>
+        void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream)
        {
            dim3 block(CTA_SIZE_X, CTA_SIZE_Y);
            dim3 grid(divUp(image.cols, block.x), divUp(image.rows, block.y));
-            InInterval<int> inInt(lo, hi);
-            computeConnectivity<InInterval<int> ><<<grid, block, 0, stream>>>(image, edges, inInt);
+
+            typedef InInterval<typename IntervalsTraits<T>::dist_type, IntervalsTraits<T>::ch> Int_t;
+
+            Int_t inInt(lo, hi);
+            computeConnectivity<T, Int_t><<<grid, block, 0, stream>>>(static_cast<const DevMem2D_<T> >(image), edges, inInt);

            cudaSafeCall( cudaGetLastError() );
            if (stream == 0)
                cudaSafeCall( cudaDeviceSynchronize() );
        }

+        template void computeEdges<uchar>  (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<uchar3> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<uchar4> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<ushort> (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<ushort3>(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<ushort4>(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<int>    (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+        template void computeEdges<float>  (const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+
        __global__ void lableTiles(const DevMem2D edges, DevMem2Di comps)
        {
            int x = threadIdx.x + blockIdx.x * TILE_COLS;

--- a/modules/gpu/src/graphcuts.cpp
+++ b/modules/gpu/src/graphcuts.cpp
@@ -47,7 +47,8 @@
 void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
 void cv::gpu::graphcut(GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }

-void cv::gpu::labelComponents(const GpuMat&, GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); }
+void cv::gpu::connectivityMask(const GpuMat&, GpuMat&, const cv::Scalar&, const cv::Scalar&, Stream&) { throw_nogpu(); }
+void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, Stream& stream) { throw_nogpu(); }

 #else /* !defined (HAVE_CUDA) */

@@ -56,30 +57,66 @@ namespace cv { namespace gpu { namespace device
    namespace ccl
    {
        void labelComponents(const DevMem2D& edges, DevMem2Di comps, cudaStream_t stream);
-        void computeEdges(const DevMem2D& image, DevMem2D edges, const int lo, const int hi, cudaStream_t stream);
+
+        template<typename T>
+        void computeEdges(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
    }
 }}}

-void cv::gpu::labelComponents(const GpuMat& image, GpuMat& mask, GpuMat& components, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s)
+
+float4 scalarToCudaType(const cv::Scalar& in)
+{
+    float4 res;
+    res.x = in[0]; res.y = in[1]; res.z = in[2]; res.w = in[3];
+    return res;
+}
+
+
+void cv::gpu::connectivityMask(const GpuMat& image, GpuMat& mask, const cv::Scalar& lo, const cv::Scalar& hi, Stream& s)
 {
    CV_Assert(!image.empty());

-    int type = image.type();
-    CV_Assert(type == CV_8UC1);
+    int ch = image.channels();
+    CV_Assert(ch <= 4);
+
+    int depth = image.depth();
+
+    typedef void (*func_t)(const DevMem2D& image, DevMem2D edges, const float4& lo, const float4& hi, cudaStream_t stream);
+
+    static const func_t suppotLookup[8][4] =
+    {   //    1,    2,     3,     4
+        { device::ccl::computeEdges<uchar>,  0,  device::ccl::computeEdges<uchar3>,  device::ccl::computeEdges<uchar4>  },// CV_8U
+        { 0,                                 0,  0,                                  0                                  },// CV_16U
+        { device::ccl::computeEdges<ushort>, 0,  device::ccl::computeEdges<ushort3>, device::ccl::computeEdges<ushort4> },// CV_8S
+        { 0,                                 0,  0,                                  0                                  },// CV_16S
+        { device::ccl::computeEdges<int>,    0,  0,                                  0                                  },// CV_32S
+        { device::ccl::computeEdges<float>,  0,  0,                                  0                                  },// CV_32F
+        { 0,                                 0,  0,                                  0                                  },// CV_64F
+        { 0,                                 0,  0,                                  0                                  } // CV_USRTYPE1
+    };
+
+    func_t f = suppotLookup[depth][ch - 1];
+    CV_Assert(f);

    if (image.size() != mask.size() || mask.type() != CV_8UC1)
        mask.create(image.size(), CV_8UC1);

-    if (image.size() != components.size() || components.type() != CV_32SC1)
-        components.create(image.size(), CV_32SC1);
-
    cudaStream_t stream = StreamAccessor::getStream(s);
+    float4 culo = scalarToCudaType(lo), cuhi = scalarToCudaType(hi);
+    f(image, mask, culo, cuhi, stream);
+}

-    device::ccl::computeEdges(image, mask, lo[0], hi[0], stream);
+void cv::gpu::labelComponents(const GpuMat& mask, GpuMat& components, Stream& s)
+{
+    CV_Assert(!mask.empty() && mask.type() == CV_8U);
+
+    if (mask.size() != components.size() || components.type() != CV_32SC1)
+        components.create(mask.size(), CV_32SC1);
+
+    cudaStream_t stream = StreamAccessor::getStream(s);
    device::ccl::labelComponents(mask, components, stream);
 }

-
 namespace
 {
    typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppState, Npp8u* pDeviceMem);

--- a/modules/gpu/test/test_labeling.cpp
+++ b/modules/gpu/test/test_labeling.cpp
@@ -70,7 +70,9 @@ TEST_P(Labeling, ConnectedComponents)
    cv::gpu::GpuMat components;
    components.create(image.rows, image.cols, CV_32SC1);

-    cv::gpu::labelComponents(cv::gpu::GpuMat(image), mask, components, cv::Scalar::all(0), cv::Scalar::all(2));
+    cv::gpu::connectivityMask(cv::gpu::GpuMat(image), mask, cv::Scalar::all(0), cv::Scalar::all(2));
+
+    cv::gpu::labelComponents(mask, components);

    // std::cout << cv::Mat(components) << std::endl;
    // cv::imshow("test", image);