added buffered version of gpu::integral function and updated performance test...

added buffered version of gpu::integral function and updated performance test (it still works too slow)

added buffered version of gpu::integral function and updated performance test...
added buffered version of gpu::integral function and updated performance test (it still works too slow)
7a29d96c · Alexey Spizhevoy · 1748f65f · 7a29d96c · 7a29d96c · 7a29d96c
3 changed file
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -650,6 +650,9 @@ namespace cv
        //! supports only CV_8UC1 source type
        CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum);

+        //! buffered version
+        CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer);
+
        //! computes the integral image and integral for the squared image
        //! sum will have CV_32S type, sqsum - CV32F type
        //! supports only CV_8UC1 source type

--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -61,6 +61,7 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_
 void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
 void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }
 void cv::gpu::integral(const GpuMat&, GpuMat&) { throw_nogpu(); }
+void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }
@@ -545,6 +546,12 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
 // integral

 void cv::gpu::integral(const GpuMat& src, GpuMat& sum)
+{
+    GpuMat buffer;
+    integralBuffered(src, sum, buffer);
+}
+
+void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer)
 {
    CV_Assert(src.type() == CV_8UC1);

@@ -555,10 +562,8 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum)
    roiSize.height = src.rows;

    NppSt32u bufSize;
-
    nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize) );
-
-    GpuMat buffer(1, bufSize, CV_8UC1);
+    ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);

    nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<NppSt8u*>(src.ptr<NppSt8u>()), src.step, 
        sum.ptr<NppSt32u>(), sum.step, roiSize, buffer.ptr<NppSt8u>(), bufSize) );

--- a/samples/gpu/performance/tests.cpp
+++ b/samples/gpu/performance/tests.cpp
@@ -170,24 +170,26 @@ TEST(cornerHarris)
 TEST(integral)
 {
    Mat src, sum;
-    gpu::GpuMat d_src, d_sum;
+    gpu::GpuMat d_src, d_sum, d_buf;

-    for (int size = 1000; size <= 8000; size *= 2)
+    int size = 4000;
+
+    gen(src, size, size, CV_8U, 0, 256);
+    sum.create(size + 1, size + 1, CV_32S);
+
+    d_src = src;
+    d_sum.create(size + 1, size + 1, CV_32S);
+
+    for (int i = 0; i < 5; ++i)
    {
        SUBTEST << "size " << size << ", 8U";

-        gen(src, size, size, CV_8U, 0, 256);
-        sum.create(size + 1, size + 1, CV_32S);
-
        CPU_ON;
        integral(src, sum);
        CPU_OFF;

-        d_src = src;
-        d_sum.create(size + 1, size + 1, CV_32S);
-
        GPU_ON;
-        gpu::integral(d_src, d_sum);
+        gpu::integralBuffered(d_src, d_sum, d_buf);
        GPU_OFF;
    }
 }