提交 1eedc6c4 编写于 作者: V Vladislav Vinogradov

refactored Linear Filter

上级 fcfcd4cb
......@@ -96,6 +96,34 @@ inline void blur(InputArray src, OutputArray dst, Size ksize, Point anchor, Stre
f->apply(src, dst, stream);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Linear Filter
//! non-separable linear 2D filter
CV_EXPORTS Ptr<Filter> createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1),
int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));
__OPENCV_GPUFILTERS_DEPR_BEFORE__ void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel,
Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT,
Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__;
inline void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, Point anchor, int borderType, Stream& stream)
{
Ptr<gpu::Filter> f = gpu::createLinearFilter(src.type(), ddepth, kernel, anchor, borderType);
f->apply(src, dst, stream);
}
//! applies Laplacian operator to the image
//! supports only ksize = 1 and ksize = 3
CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
......@@ -194,13 +222,7 @@ CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, co
CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
const Point& anchor = Point(-1,-1), int iterations = 1);
//! returns 2D filter with the specified kernel
//! supports CV_8U, CV_16U and CV_32F one and four channel image
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
//! returns the non-separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT);
//! returns the primitive row filter with the specified kernel.
//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
......@@ -269,9 +291,6 @@ CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat&
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
//! applies non-separable 2D linear filter to the image
CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
//! applies separable 2D linear filter to the image
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
......@@ -297,10 +316,6 @@ CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
//! applies Laplacian operator to the image
//! supports only ksize = 1 and ksize = 3
CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
}} // namespace cv { namespace gpu {
#undef __OPENCV_GPUFILTERS_DEPR_BEFORE__
......
......@@ -86,6 +86,51 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur,
}
}
//////////////////////////////////////////////////////////////////////
// Filter2D
PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
{
declare.time(20.0);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const int ksize = GET_PARAM(2);
cv::Mat src(size, type);
declare.in(src, WARMUP_RNG);
cv::Mat kernel(ksize, ksize, CV_32FC1);
declare.in(kernel, WARMUP_RNG);
if (PERF_RUN_GPU())
{
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
cv::Ptr<cv::gpu::Filter> filter2D = cv::gpu::createLinearFilter(d_src.type(), -1, kernel);
TEST_CYCLE() filter2D->apply(d_src, dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Sobel
......@@ -330,39 +375,3 @@ PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Filter2D
PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
{
declare.time(20.0);
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
const int ksize = GET_PARAM(2);
cv::Mat src(size, type);
declare.in(src, WARMUP_RNG);
cv::Mat kernel(ksize, ksize, CV_32FC1);
declare.in(kernel, WARMUP_RNG);
if (PERF_RUN_GPU())
{
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE() cv::gpu::filter2D(d_src, dst, -1, kernel);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
CPU_SANITY_CHECK(dst);
}
}
......@@ -48,111 +48,104 @@
namespace cv { namespace gpu { namespace cudev
{
namespace imgproc
template <class SrcPtr, typename D>
__global__ void filter2D(const SrcPtr src, PtrStepSz<D> dst,
const float* __restrict__ kernel,
const int kWidth, const int kHeight,
const int anchorX, const int anchorY)
{
#define FILTER2D_MAX_KERNEL_SIZE 16
typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
__constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
template <class SrcT, typename D>
__global__ void filter2D(const SrcT src, PtrStepSz<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
{
typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= dst.cols || y >= dst.rows)
return;
if (x >= dst.cols || y >= dst.rows)
return;
sum_t res = VecTraits<sum_t>::all(0);
int kInd = 0;
sum_t res = VecTraits<sum_t>::all(0);
int kInd = 0;
for (int i = 0; i < kHeight; ++i)
{
for (int j = 0; j < kWidth; ++j)
res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++];
}
dst(y, x) = saturate_cast<D>(res);
for (int i = 0; i < kHeight; ++i)
{
for (int j = 0; j < kWidth; ++j)
res = res + src(y - anchorY + i, x - anchorX + j) * kernel[kInd++];
}
template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
dst(y, x) = saturate_cast<D>(res);
}
#define IMPLEMENT_FILTER2D_TEX_READER(type) \
texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_filter2D_ ## type ## _reader \
template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
#define IMPLEMENT_FILTER2D_TEX_READER(type) \
texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_filter2D_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
const int xoff; \
const int yoff; \
tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
typedef type elem_type; \
typedef int index_type; \
const int xoff; \
const int yoff; \
tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
} \
}; \
template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
} \
}; \
template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
{ \
static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel, \
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
{ \
static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, \
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
{ \
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(16, 16); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_filter2D_ ## type , srcWhole); \
tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
cudaSafeCall( cudaGetLastError() ); \
if (stream == 0) \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
IMPLEMENT_FILTER2D_TEX_READER(uchar);
IMPLEMENT_FILTER2D_TEX_READER(uchar4);
IMPLEMENT_FILTER2D_TEX_READER(ushort);
IMPLEMENT_FILTER2D_TEX_READER(ushort4);
IMPLEMENT_FILTER2D_TEX_READER(float);
IMPLEMENT_FILTER2D_TEX_READER(float4);
#undef IMPLEMENT_FILTER2D_TEX_READER
template <typename T, typename D>
void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
int borderMode, const float* borderValue, cudaStream_t stream)
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
dim3 block(16, 16); \
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_filter2D_ ## type , srcWhole); \
tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kernel, kWidth, kHeight, anchorX, anchorY); \
cudaSafeCall( cudaGetLastError() ); \
if (stream == 0) \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
IMPLEMENT_FILTER2D_TEX_READER(uchar);
IMPLEMENT_FILTER2D_TEX_READER(uchar4);
IMPLEMENT_FILTER2D_TEX_READER(ushort);
IMPLEMENT_FILTER2D_TEX_READER(ushort4);
IMPLEMENT_FILTER2D_TEX_READER(float);
IMPLEMENT_FILTER2D_TEX_READER(float4);
#undef IMPLEMENT_FILTER2D_TEX_READER
template <typename T, typename D>
void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
int kWidth, int kHeight, int anchorX, int anchorY,
int borderMode, const float* borderValue, cudaStream_t stream)
{
typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel,
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
static const func_t funcs[] =
{
typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
static const func_t funcs[] =
{
Filter2DCaller<T, D, BrdConstant>::call,
Filter2DCaller<T, D, BrdReplicate>::call,
Filter2DCaller<T, D, BrdReflect>::call,
Filter2DCaller<T, D, BrdWrap>::call,
Filter2DCaller<T, D, BrdReflect101>::call
};
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
}
template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
Filter2DCaller<T, D, BrdConstant>::call,
Filter2DCaller<T, D, BrdReplicate>::call,
Filter2DCaller<T, D, BrdReflect>::call,
Filter2DCaller<T, D, BrdWrap>::call,
Filter2DCaller<T, D, BrdReflect101>::call
};
funcs[borderMode]((PtrStepSz<T>) srcWhole, ofsX, ofsY, (PtrStepSz<D>) dst, kernel,
kWidth, kHeight, anchorX, anchorY, borderValue, stream);
}
template void filter2D<uchar , uchar >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D<uchar4 , uchar4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D<ushort , ushort >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D<float , float >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
template void filter2D<float4 , float4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
}}}
#endif // CUDA_DISABLER
......@@ -49,6 +49,8 @@ using namespace cv::gpu;
Ptr<Filter> cv::gpu::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
Ptr<Filter> cv::gpu::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>&, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int, GpuMat&) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
......@@ -57,8 +59,6 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) {
Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_no_cuda(); return Ptr<BaseFilter_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_no_cuda(); return Ptr<BaseFilter_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr<BaseRowFilter_GPU>(0); }
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr<BaseColumnFilter_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
......@@ -76,7 +76,6 @@ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_
void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); }
void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_no_cuda(); }
void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); }
void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point, int, Stream&) { throw_no_cuda(); }
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_no_cuda(); }
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_no_cuda(); }
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_no_cuda(); }
......@@ -188,6 +187,138 @@ Ptr<Filter> cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point
return new NPPBoxFilter(srcType, dstType, ksize, anchor, borderMode, borderVal);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Linear Filter
namespace cv { namespace gpu { namespace cudev
{
template <typename T, typename D>
void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
int kWidth, int kHeight, int anchorX, int anchorY,
int borderMode, const float* borderValue, cudaStream_t stream);
}}}
namespace
{
class LinearFilter : public Filter
{
public:
LinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal);
void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
private:
typedef void (*filter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
int kWidth, int kHeight, int anchorX, int anchorY,
int borderMode, const float* borderValue, cudaStream_t stream);
GpuMat kernel_;
Point anchor_;
int type_;
filter2D_t func_;
int borderMode_;
Scalar_<float> borderVal_;
};
LinearFilter::LinearFilter(int srcType, int dstType, InputArray _kernel, Point anchor, int borderMode, Scalar borderVal) :
anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal)
{
const int sdepth = CV_MAT_DEPTH(srcType);
const int scn = CV_MAT_CN(srcType);
Mat kernel = _kernel.getMat();
CV_Assert( sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F );
CV_Assert( scn == 1 || scn == 4 );
CV_Assert( dstType == srcType );
CV_Assert( kernel.channels() == 1 );
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
Mat kernel32F;
kernel.convertTo(kernel32F, CV_32F);
kernel_ = gpu::createContinuous(kernel.size(), CV_32FC1);
kernel_.upload(kernel32F);
normalizeAnchor(anchor_, kernel.size());
switch (srcType)
{
case CV_8UC1:
func_ = cudev::filter2D<uchar, uchar>;
break;
case CV_8UC4:
func_ = cudev::filter2D<uchar4, uchar4>;
break;
case CV_16UC1:
func_ = cudev::filter2D<ushort, ushort>;
break;
case CV_16UC4:
func_ = cudev::filter2D<ushort4, ushort4>;
break;
case CV_32FC1:
func_ = cudev::filter2D<float, float>;
break;
case CV_32FC4:
func_ = cudev::filter2D<float4, float4>;
break;
}
}
void LinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == type_ );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
Point ofs;
Size wholeSize;
src.locateROI(wholeSize, ofs);
GpuMat srcWhole(wholeSize, src.type(), src.datastart);
func_(srcWhole, ofs.x, ofs.y, dst, kernel_.ptr<float>(),
kernel_.cols, kernel_.rows, anchor_.x, anchor_.y,
borderMode_, borderVal_.val, StreamAccessor::getStream(_stream));
}
}
Ptr<Filter> cv::gpu::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal)
{
if (dstType < 0)
dstType = srcType;
return new LinearFilter(srcType, dstType, kernel, anchor, borderMode, borderVal);
}
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream)
{
CV_Assert(ksize == 1 || ksize == 3);
static const int K[2][9] =
{
{0, 1, 0, 1, -4, 1, 0, 1, 0},
{2, 0, 2, 0, -8, 0, 2, 0, 2}
};
Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]);
if (scale != 1)
kernel *= scale;
Ptr<gpu::Filter> f = gpu::createLinearFilter(src.type(), ddepth, kernel, Point(-1,-1), borderType);
f->apply(src, dst, stream);
}
......@@ -702,172 +833,6 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Linear Filter
namespace cv { namespace gpu { namespace cudev
{
namespace imgproc
{
template <typename T, typename D>
void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
int borderMode, const float* borderValue, cudaStream_t stream);
}
}}}
namespace
{
typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor);
struct NPPLinearFilter : public BaseFilter_GPU
{
NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) :
BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
NppiSize oKernelSize;
oKernelSize.height = ksize.height;
oKernelSize.width = ksize.width;
NppiPoint oAnchor;
oAnchor.x = anchor.x;
oAnchor.y = anchor.y;
cudaStream_t stream = StreamAccessor::getStream(s);
NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
GpuMat kernel;
Npp32s nDivisor;
nppFilter2D_t func;
};
typedef void (*gpuFilter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
int borderMode, const float* borderValue, cudaStream_t stream);
struct GpuFilter2D : public BaseFilter_GPU
{
GpuFilter2D(Size ksize_, Point anchor_, gpuFilter2D_t func_, const GpuMat& kernel_, int brd_type_) :
BaseFilter_GPU(ksize_, anchor_), func(func_), kernel(kernel_), brd_type(brd_type_)
{
}
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
{
using namespace cv::gpu::cudev::imgproc;
Point ofs;
Size wholeSize;
src.locateROI(wholeSize, ofs);
GpuMat srcWhole(wholeSize, src.type(), src.datastart);
static const Scalar_<float> zero = Scalar_<float>::all(0.0f);
func(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr<float>(), brd_type, zero.val, StreamAccessor::getStream(stream));
}
gpuFilter2D_t func;
GpuMat kernel;
int brd_type;
};
}
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int brd_type)
{
using namespace cv::gpu::cudev::imgproc;
int sdepth = CV_MAT_DEPTH(srcType);
int scn = CV_MAT_CN(srcType);
CV_Assert(sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F);
CV_Assert(scn == 1 || scn == 4);
CV_Assert(dstType == srcType);
CV_Assert(brd_type == BORDER_REFLECT101 || brd_type == BORDER_REPLICATE || brd_type == BORDER_CONSTANT || brd_type == BORDER_REFLECT || brd_type == BORDER_WRAP);
Size ksize = kernel.size();
#if 0
if ((srcType == CV_8UC1 || srcType == CV_8UC4) && brd_type == BORDER_CONSTANT)
{
static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};
GpuMat gpu_krnl;
int nDivisor;
normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
normalizeAnchor(anchor, ksize);
return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));
}
#endif
CV_Assert(ksize.width * ksize.height <= 16 * 16);
GpuMat gpu_krnl;
normalizeKernel(kernel, gpu_krnl, CV_32F);
normalizeAnchor(anchor, ksize);
gpuFilter2D_t func = 0;
switch (srcType)
{
case CV_8UC1:
func = filter2D_gpu<uchar, uchar>;
break;
case CV_8UC4:
func = filter2D_gpu<uchar4, uchar4>;
break;
case CV_16UC1:
func = filter2D_gpu<ushort, ushort>;
break;
case CV_16UC4:
func = filter2D_gpu<ushort4, ushort4>;
break;
case CV_32FC1:
func = filter2D_gpu<float, float>;
break;
case CV_32FC4:
func = filter2D_gpu<float4, float4>;
break;
}
return Ptr<BaseFilter_GPU>(new GpuFilter2D(ksize, anchor, func, gpu_krnl, brd_type));
}
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int borderType)
{
Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, anchor, borderType);
return createFilter2D_GPU(linearFilter, srcType, dstType);
}
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, int borderType, Stream& stream)
{
if (ddepth < 0)
ddepth = src.depth();
int dst_type = CV_MAKE_TYPE(ddepth, src.channels());
Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst_type, kernel, anchor, borderType);
dst.create(src.size(), dst_type);
f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Separable Linear Filter
......@@ -1208,22 +1173,6 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream);
}
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream)
{
CV_Assert(ksize == 1 || ksize == 3);
static const int K[2][9] =
{
{0, 1, 0, 1, -4, 1, 0, 1, 0},
{2, 0, 2, 0, -8, 0, 2, 0, 2}
};
Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]);
if (scale != 1)
kernel *= scale;
filter2D(src, dst, ddepth, kernel, Point(-1,-1), borderType, stream);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// Gaussian Filter
......
......@@ -118,6 +118,121 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine(
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)),
WHOLE_SUBMAT));
/////////////////////////////////////////////////////////////////////////////////////////////////
// Filter2D
PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
cv::Size ksize;
cv::Point anchor;
int borderType;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
ksize = GET_PARAM(3);
anchor = GET_PARAM(4);
borderType = GET_PARAM(5);
useRoi = GET_PARAM(6);
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(Filter2D, Accuracy)
{
cv::Mat src = randomMat(size, type);
cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0);
cv::Ptr<cv::gpu::Filter> filter2D = cv::gpu::createLinearFilter(src.type(), -1, kernel, anchor, borderType);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
filter2D->apply(loadMat(src, useRoi), dst);
cv::Mat dst_gold;
cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType);
EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0);
}
INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))),
testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))),
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)),
WHOLE_SUBMAT));
/////////////////////////////////////////////////////////////////////////////////////////////////
// Laplacian
PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
cv::Size ksize;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
ksize = GET_PARAM(3);
useRoi = GET_PARAM(4);
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(Laplacian, Accuracy)
{
cv::Mat src = randomMat(size, type);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width);
cv::Mat dst_gold;
cv::Laplacian(src, dst_gold, -1, ksize.width);
EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3);
}
INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))),
WHOLE_SUBMAT));
/////////////////////////////////////////////////////////////////////////////////////////////////
// Sobel
......@@ -332,49 +447,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine(
BorderType(cv::BORDER_REFLECT)),
WHOLE_SUBMAT));
/////////////////////////////////////////////////////////////////////////////////////////////////
// Laplacian
PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
cv::Size ksize;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
ksize = GET_PARAM(3);
useRoi = GET_PARAM(4);
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(Laplacian, Accuracy)
{
cv::Mat src = randomMat(size, type);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width);
cv::Mat dst_gold;
cv::Laplacian(src, dst_gold, -1, ksize.width);
EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3);
}
INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))),
WHOLE_SUBMAT));
/////////////////////////////////////////////////////////////////////////////////////////////////
// Erode
......@@ -527,56 +599,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, MorphEx, testing::Combine(
testing::Values(Iterations(1), Iterations(2), Iterations(3)),
WHOLE_SUBMAT));
/////////////////////////////////////////////////////////////////////////////////////////////////
// Filter2D
PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
cv::Size ksize;
cv::Point anchor;
int borderType;
bool useRoi;
cv::Mat img;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
ksize = GET_PARAM(3);
anchor = GET_PARAM(4);
borderType = GET_PARAM(5);
useRoi = GET_PARAM(6);
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(Filter2D, Accuracy)
{
cv::Mat src = randomMat(size, type);
cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::filter2D(loadMat(src, useRoi), dst, -1, kernel, anchor, borderType);
cv::Mat dst_gold;
cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType);
EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0);
}
INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))),
testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))),
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)),
WHOLE_SUBMAT));
#endif // HAVE_CUDA
......@@ -961,10 +961,11 @@ TEST(filter2D)
gpu::GpuMat d_src(src);
gpu::GpuMat d_dst;
gpu::filter2D(d_src, d_dst, -1, kernel);
Ptr<gpu::Filter> filter2D = gpu::createLinearFilter(d_src.type(), -1, kernel);
filter2D->apply(d_src, d_dst);
GPU_ON;
gpu::filter2D(d_src, d_dst, -1, kernel);
filter2D->apply(d_src, d_dst);
GPU_OFF;
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册