From 79b1eec3d38cc8c1335e6b7e0fd557d85ee62946 Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Fri, 10 Dec 2010 14:02:41 +0000 Subject: [PATCH] minor refactoring of gpu module --- modules/gpu/src/cuda/mathfunc.cu | 60 +++++++++++++++++--------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/modules/gpu/src/cuda/mathfunc.cu b/modules/gpu/src/cuda/mathfunc.cu index 3d9ceda5bd..b06bef0586 100644 --- a/modules/gpu/src/cuda/mathfunc.cu +++ b/modules/gpu/src/cuda/mathfunc.cu @@ -1510,33 +1510,6 @@ namespace cv { namespace gpu { namespace mathfunc } - template - T sum_caller(const DevMem2D_ src, PtrStep buf) - { - dim3 threads, grid; - estimate_thread_cfg(src.cols, src.rows, threads, grid); - set_kernel_consts(src.cols, src.rows, threads, grid); - - T* buf_ = (T*)buf.ptr(0); - - sum_kernel<<>>(src, buf_); - cudaSafeCall(cudaThreadSynchronize()); - - T sum; - cudaSafeCall(cudaMemcpy(&sum, buf_, sizeof(T), cudaMemcpyDeviceToHost)); - - return sum; - } - - template unsigned char sum_caller(const DevMem2D_, PtrStep); - template char sum_caller(const DevMem2D_, PtrStep); - template unsigned short sum_caller(const DevMem2D_, PtrStep); - template short sum_caller(const DevMem2D_, PtrStep); - template int sum_caller(const DevMem2D_, PtrStep); - template float sum_caller(const DevMem2D_, PtrStep); - template double sum_caller(const DevMem2D_, PtrStep); - - template __global__ void sum_pass2_kernel(T* result, int size) { @@ -1550,10 +1523,14 @@ namespace cv { namespace gpu { namespace mathfunc result[0] = smem[0]; } + } // namespace sum + template T sum_multipass_caller(const DevMem2D_ src, PtrStep buf) { + using namespace sum; + dim3 threads, grid; estimate_thread_cfg(src.cols, src.rows, threads, grid); set_kernel_consts(src.cols, src.rows, threads, grid); @@ -1578,5 +1555,32 @@ namespace cv { namespace gpu { namespace mathfunc template int sum_multipass_caller(const DevMem2D_, PtrStep); template float sum_multipass_caller(const DevMem2D_, PtrStep); - } // namespace sum + + template + T sum_caller(const DevMem2D_ src, PtrStep buf) + { + using namespace sum; + + dim3 threads, grid; + estimate_thread_cfg(src.cols, src.rows, threads, grid); + set_kernel_consts(src.cols, src.rows, threads, grid); + + T* buf_ = (T*)buf.ptr(0); + + sum_kernel<<>>(src, buf_); + cudaSafeCall(cudaThreadSynchronize()); + + T sum; + cudaSafeCall(cudaMemcpy(&sum, buf_, sizeof(T), cudaMemcpyDeviceToHost)); + + return sum; + } + + template unsigned char sum_caller(const DevMem2D_, PtrStep); + template char sum_caller(const DevMem2D_, PtrStep); + template unsigned short sum_caller(const DevMem2D_, PtrStep); + template short sum_caller(const DevMem2D_, PtrStep); + template int sum_caller(const DevMem2D_, PtrStep); + template float sum_caller(const DevMem2D_, PtrStep); + template double sum_caller(const DevMem2D_, PtrStep); }}} -- GitLab