From 0e43976259d5f6e7340745c119a0fac1029fa2ac Mon Sep 17 00:00:00 2001 From: Anatoly Baksheev Date: Wed, 24 Nov 2010 09:43:17 +0000 Subject: [PATCH] 1) more convenient naming for samples gpu 2) added mask support to device 'transform' function 3) sample hog gpu: waitKey(1) -> waitKey(3), in other case image is not displayed. --- modules/gpu/src/cuda/mathfunc.cu | 2 +- modules/gpu/src/cuda/transform.hpp | 40 ++++++++++++++++++++++-------- samples/gpu/CMakeLists.txt | 4 +-- samples/gpu/hog.cpp | 2 +- 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index 3f3fd71eb5..d69a32a064 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -217,7 +217,7 @@ namespace cv { namespace gpu { namespace mathfunc template struct NotEqual { - __device__ uchar operator()(const T1& src1, const T2& src2, int, int) + __device__ uchar operator()(const T1& src1, const T2& src2) { return static_cast(static_cast(src1 != src2) * 255); } diff --git a/modules/gpu/src/cuda/transform.hpp b/modules/gpu/src/cuda/transform.hpp index 544567449a..af516b3d92 100644 --- a/modules/gpu/src/cuda/transform.hpp +++ b/modules/gpu/src/cuda/transform.hpp @@ -47,31 +47,49 @@ namespace cv { namespace gpu { namespace device { - template - static __global__ void transform(const DevMem2D_ src, PtrStep_ dst, UnOp op) + //! Mask accessor + template struct MaskReader_ + { + PtrStep_ mask; + explicit MaskReader_(PtrStep_ mask): mask(mask) {} + + __device__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; } + }; + + //! Stub mask accessor + struct NoMask + { + __device__ bool operator()(int y, int x) const { return true; } + }; + + //! Transform kernels + + template + static __global__ void transform(const DevMem2D_ src, PtrStep_ dst, const Mask mask, UnOp op) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x < src.cols && y < src.rows) + if (x < src.cols && y < src.rows && mask(y, x)) { T src_data = src.ptr(y)[x]; - dst.ptr(y)[x] = op(src_data, x, y); + dst.ptr(y)[x] = op(src_data); } } - template - static __global__ void transform(const DevMem2D_ src1, const PtrStep_ src2, PtrStep_ dst, BinOp op) + + template + static __global__ void transform(const DevMem2D_ src1, const PtrStep_ src2, PtrStep_ dst, const Mask mask, BinOp op) { const int x = blockDim.x * blockIdx.x + threadIdx.x; const int y = blockDim.y * blockIdx.y + threadIdx.y; - if (x < src1.cols && y < src1.rows) + if (x < src1.cols && y < src1.rows && mask(y, x)) { T1 src1_data = src1.ptr(y)[x]; T2 src2_data = src2.ptr(y)[x]; - dst.ptr(y)[x] = op(src1_data, src2_data, x, y); + dst.ptr(y)[x] = op(src1_data, src2_data); } - } + } }}} namespace cv @@ -87,7 +105,7 @@ namespace cv grid.x = divUp(src.cols, threads.x); grid.y = divUp(src.rows, threads.y); - device::transform<<>>(src, dst, op); + device::transform<<>>(src, dst, device::NoMask(), op); if (stream == 0) cudaSafeCall( cudaThreadSynchronize() ); @@ -101,7 +119,7 @@ namespace cv grid.x = divUp(src1.cols, threads.x); grid.y = divUp(src1.rows, threads.y); - device::transform<<>>(src1, src2, dst, op); + device::transform<<>>(src1, src2, dst, device::NoMask(), op); if (stream == 0) cudaSafeCall( cudaThreadSynchronize() ); diff --git a/samples/gpu/CMakeLists.txt b/samples/gpu/CMakeLists.txt index c658e8c7da..041c0cf27a 100644 --- a/samples/gpu/CMakeLists.txt +++ b/samples/gpu/CMakeLists.txt @@ -24,11 +24,11 @@ if (BUILD_EXAMPLES) # Define executable targets # --------------------------------------------- MACRO(MY_DEFINE_EXAMPLE name srcs) - set(the_target "example_${name}") + set(the_target "example_gpu_${name}") add_executable(${the_target} ${srcs}) set_target_properties(${the_target} PROPERTIES OUTPUT_NAME "${name}" - PROJECT_LABEL "(EXAMPLE) ${name}") + PROJECT_LABEL "(EXAMPLE_GPU) ${name}") add_dependencies(${the_target} opencv_core opencv_flann opencv_imgproc opencv_highgui opencv_ml opencv_video opencv_objdetect opencv_features2d opencv_calib3d opencv_legacy opencv_contrib opencv_gpu) diff --git a/samples/gpu/hog.cpp b/samples/gpu/hog.cpp index ed8823ec7d..34e7c58d8e 100644 --- a/samples/gpu/hog.cpp +++ b/samples/gpu/hog.cpp @@ -283,7 +283,7 @@ void App::RunOpencvGui() // Show results putText(img_to_show, GetPerformanceSummary(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar(0, 0, 255), 2); imshow("opencv_gpu_hog", img_to_show); - HandleKey((char)waitKey(1)); + HandleKey((char)waitKey(3)); if (settings.src_is_video) { -- GitLab