add resize

d23a4f50 · marina.kolpakova · 8ef19e76 · d23a4f50 · d23a4f50
隐藏空白更改
内联并排

Showing with 44 addition and 3 deletion

modules/gpu/src/cuda/icf-sc.cu modules/gpu/src/cuda/icf-sc.cu +35 -0

modules/gpu/src/softcascade.cpp modules/gpu/src/softcascade.cpp +9 -3

未找到文件。
--- a/modules/gpu/src/cuda/icf-sc.cu
+++ b/modules/gpu/src/cuda/icf-sc.cu
@@ -50,6 +50,41 @@
 namespace cv { namespace gpu { namespace device {
 namespace icf {

+    template <int FACTOR>
+    __device__ __forceinline__ uchar shrink(const uchar* ptr, const int pitch, const int y, const int x)
+    {
+        int out = 0;
+#pragma unroll
+        for(int dy = 0; dy < FACTOR; ++dy)
+#pragma unroll
+            for(int dx = 0; dx < FACTOR; ++dx)
+            {
+                out += ptr[dy * pitch + dx];
+            }
+
+        return static_cast<uchar>(out / (FACTOR * FACTOR));
+    }
+
+    template<int FACTOR>
+    __global__ void shrink(const uchar* __restrict__ hogluv, const int inPitch,
+                                 uchar* __restrict__ shrank, const int outPitch )
+    {
+        const int y = blockIdx.y * blockDim.y + threadIdx.y;
+        const int x = blockIdx.x * blockDim.x + threadIdx.x;
+
+        const uchar* ptr = hogluv + (FACTOR * y) * inPitch + (FACTOR * x);
+
+        shrank[ y * outPitch + x] = shrink<FACTOR>(ptr, inPitch, y, x);
+    }
+
+    void shrink(const cv::gpu::PtrStepSzb& channels, cv::gpu::PtrStepSzb shrunk)
+    {
+        dim3 block(32, 8);
+        dim3 grid(shrunk.cols / 32, shrunk.rows / 8);
+        shrink<4><<<grid, block>>>((uchar*)channels.ptr(), channels.step, (uchar*)shrunk.ptr(), shrunk.step);
+        cudaSafeCall(cudaDeviceSynchronize());
+    }
+
    __device__ __forceinline__ void luv(const float& b, const float& g, const float& r, uchar& __l, uchar& __u, uchar& __v)
    {
        // rgb -> XYZ

--- a/modules/gpu/src/softcascade.cpp
+++ b/modules/gpu/src/softcascade.cpp
@@ -96,6 +96,8 @@ namespace icf {

    void bgr2Luv(const PtrStepSzb& bgr, PtrStepSzb luv);
    void gray2hog(const PtrStepSzb& gray, PtrStepSzb mag, const int bins);
+
+    void shrink(const cv::gpu::PtrStepSzb& channels, cv::gpu::PtrStepSzb shrunk);
 }

 namespace imgproc {
@@ -669,13 +671,15 @@ struct SeparablePreprocessor : public cv::gpu::SCascade::Preprocessor
 {
    SeparablePreprocessor(const int s, const int b) : cv::gpu::SCascade::Preprocessor(), shrinkage(s), bins(b) {}

-    virtual void apply(InputArray _frame, OutputArray _channels, Stream& s = Stream::Null())
+    virtual void apply(InputArray _frame, OutputArray _shrunk, Stream& s = Stream::Null())
    {
        const GpuMat frame = _frame.getGpuMat();
        cv::gpu::GaussianBlur(frame, bgr, cv::Size(3, 3), -1.0);

-        _channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
-        GpuMat channels = _channels.getGpuMat();
+        _shrunk.create(frame.rows * (4 + bins) / shrinkage, frame.cols / shrinkage, CV_8UC1);
+        GpuMat shrunk = _shrunk.getGpuMat();
+
+        channels.create(frame.rows * (4 + bins), frame.cols, CV_8UC1);
        setZero(channels, s);

        cv::gpu::cvtColor(bgr, gray, CV_BGR2GRAY);
@@ -683,6 +687,7 @@ struct SeparablePreprocessor : public cv::gpu::SCascade::Preprocessor

        cv::gpu::GpuMat luv(channels, cv::Rect(0, bgr.rows * (bins + 1), bgr.cols, bgr.rows * 3));
        cv::gpu::device::icf::bgr2Luv(bgr, luv);
+        cv::gpu::device::icf::shrink(channels, shrunk);
    }

 private:
@@ -691,6 +696,7 @@ private:

    GpuMat bgr;
    GpuMat gray;
+    GpuMat channels;
 };

 }