提交 79b1eec3 编写于 作者: A Alexey Spizhevoy

minor refactoring of gpu module

上级 9f80317f
......@@ -1510,33 +1510,6 @@ namespace cv { namespace gpu { namespace mathfunc
}
template <typename T>
T sum_caller(const DevMem2D_<T> src, PtrStep buf)
{
dim3 threads, grid;
estimate_thread_cfg(src.cols, src.rows, threads, grid);
set_kernel_consts(src.cols, src.rows, threads, grid);
T* buf_ = (T*)buf.ptr(0);
sum_kernel<T, threads_x * threads_y><<<grid, threads>>>(src, buf_);
cudaSafeCall(cudaThreadSynchronize());
T sum;
cudaSafeCall(cudaMemcpy(&sum, buf_, sizeof(T), cudaMemcpyDeviceToHost));
return sum;
}
template unsigned char sum_caller<unsigned char>(const DevMem2D_<unsigned char>, PtrStep);
template char sum_caller<char>(const DevMem2D_<char>, PtrStep);
template unsigned short sum_caller<unsigned short>(const DevMem2D_<unsigned short>, PtrStep);
template short sum_caller<short>(const DevMem2D_<short>, PtrStep);
template int sum_caller<int>(const DevMem2D_<int>, PtrStep);
template float sum_caller<float>(const DevMem2D_<float>, PtrStep);
template double sum_caller<double>(const DevMem2D_<double>, PtrStep);
template <typename T, int nthreads>
__global__ void sum_pass2_kernel(T* result, int size)
{
......@@ -1550,10 +1523,14 @@ namespace cv { namespace gpu { namespace mathfunc
result[0] = smem[0];
}
} // namespace sum
template <typename T>
T sum_multipass_caller(const DevMem2D_<T> src, PtrStep buf)
{
using namespace sum;
dim3 threads, grid;
estimate_thread_cfg(src.cols, src.rows, threads, grid);
set_kernel_consts(src.cols, src.rows, threads, grid);
......@@ -1578,5 +1555,32 @@ namespace cv { namespace gpu { namespace mathfunc
template int sum_multipass_caller<int>(const DevMem2D_<int>, PtrStep);
template float sum_multipass_caller<float>(const DevMem2D_<float>, PtrStep);
} // namespace sum
template <typename T>
T sum_caller(const DevMem2D_<T> src, PtrStep buf)
{
using namespace sum;
dim3 threads, grid;
estimate_thread_cfg(src.cols, src.rows, threads, grid);
set_kernel_consts(src.cols, src.rows, threads, grid);
T* buf_ = (T*)buf.ptr(0);
sum_kernel<T, threads_x * threads_y><<<grid, threads>>>(src, buf_);
cudaSafeCall(cudaThreadSynchronize());
T sum;
cudaSafeCall(cudaMemcpy(&sum, buf_, sizeof(T), cudaMemcpyDeviceToHost));
return sum;
}
template unsigned char sum_caller<unsigned char>(const DevMem2D_<unsigned char>, PtrStep);
template char sum_caller<char>(const DevMem2D_<char>, PtrStep);
template unsigned short sum_caller<unsigned short>(const DevMem2D_<unsigned short>, PtrStep);
template short sum_caller<short>(const DevMem2D_<short>, PtrStep);
template int sum_caller<int>(const DevMem2D_<int>, PtrStep);
template float sum_caller<float>(const DevMem2D_<float>, PtrStep);
template double sum_caller<double>(const DevMem2D_<double>, PtrStep);
}}}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册