From d23a4f50bd317b105dd80b1c6bd2fd599c80ab9e Mon Sep 17 00:00:00 2001
From: "marina.kolpakova" <marina.kolpakova@itseez.com>
Date: Wed, 28 Nov 2012 21:32:35 +0400
Subject: [PATCH] add resize

---
 modules/gpu/src/cuda/icf-sc.cu  | 35 +++++++++++++++++++++++++++++++++
 modules/gpu/src/softcascade.cpp | 12 ++++++++---
 2 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/modules/gpu/src/cuda/icf-sc.cu b/modules/gpu/src/cuda/icf-sc.cu
index 1b9c02eb2b..58d6883e89 100644
--- a/modules/gpu/src/cuda/icf-sc.cu
+++ b/modules/gpu/src/cuda/icf-sc.cu
@@ -50,6 +50,41 @@
 namespace cv { namespace gpu { namespace device {
 namespace icf {
 
+    template <int FACTOR>
+    __device__ __forceinline__ uchar shrink(const uchar* ptr, const int pitch, const int y, const int x)
+    {
+        int out = 0;
+#pragma unroll
+        for(int dy = 0; dy < FACTOR; ++dy)
+#pragma unroll
+            for(int dx = 0; dx < FACTOR; ++dx)
+            {
+                out += ptr[dy * pitch + dx];
+            }
+
+        return static_cast<uchar>(out / (FACTOR * FACTOR));
+    }
+
+    template<int FACTOR>
+    __global__ void shrink(const uchar* __restrict__ hogluv, const int inPitch,
+                                 uchar* __restrict__ shrank, const int outPitch )
+    {
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+        const uchar* ptr = hogluv + (FACTOR * y) * inPitch + (FACTOR * x);
+
+        shrank[ y * outPitch + x] = shrink<FACTOR>(ptr, inPitch, y, x);
+    }
+
+    void shrink(const cv::gpu::PtrStepSzb& channels, cv::gpu::PtrStepSzb shrunk)
+    {
+        dim3 block(32, 8);
+        dim3 grid(shrunk.cols / 32, shrunk.rows / 8);
+        shrink<4><<<grid, block>>>((uchar*)channels.ptr(), channels.step, (uchar*)shrunk.ptr(), shrunk.step);
+        cudaSafeCall(cudaDeviceSynchronize());
+    }
+
     __device__ __forceinline__ void luv(const float& b, const float& g, const float& r, uchar& __l, uchar& __u, uchar& __v)
     {
         // rgb -> XYZ
diff --git a/modules/gpu/src/softcascade.cpp b/modules/gpu/src/softcascade.cpp
index 2c7d42b9e3..6d29a1d94a 100644
--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@@ -96,6 +96,8 @@ namespace icf {
 
     void bgr2Luv(const PtrStepSzb& bgr, PtrStepSzb luv);
     void gray2hog(const PtrStepSzb& gray, PtrStepSzb mag, const int bins);
+
+    void shrink(const cv::gpu::PtrStepSzb& channels, cv::gpu::PtrStepSzb shrunk);
 }
 
 namespace imgproc {
@@ -669,13 +671,15 @@ struct SeparablePreprocessor : public cv::gpu::SCascade::Preprocessor
 {
     SeparablePreprocessor(const int s, const int b) : cv::gpu::SCascade::Preprocessor(), shrinkage(s), bins(b) {}
 
-    virtual void apply(InputArray _frame, OutputArray _channels, Stream& s = Stream::Null())
+    virtual void apply(InputArray _frame, OutputArray _shrunk, Stream& s = Stream::Null())
     {
         const GpuMat frame = _frame.getGpuMat();
         cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);
 
-        _channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
-        GpuMat channels = _channels.getGpuMat();
+        _shrunk.create(frame.rows * (4 + bins) / shrinkage, frame.cols / shrinkage, CV_8UC1);
+        GpuMat shrunk = _shrunk.getGpuMat();
+
+        channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
         setZero(channels, s);
 
         cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
@@ -683,6 +687,7 @@ struct SeparablePreprocessor : public cv::gpu::SCascade::Preprocessor
 
         cv::gpu::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));
         cv::gpu::device::icf::bgr2Luv(bgr, luv);
+        cv::gpu::device::icf::shrink(channels, shrunk);
     }
 
 private:
@@ -691,6 +696,7 @@ private:
 
     GpuMat bgr;
     GpuMat gray;
+    GpuMat channels;
 };
 
 }
-- 
GitLab