未验证 提交 698b2bf7 编写于 作者: Y Yosshi999 提交者: GitHub

Merge pull request #18167 from Yosshi999:bit-exact-gaussian

Bit exact gaussian blur for 16bit unsigned int

* bit-exact gaussian kernel for CV_16U

* SIMD optimization

* template GaussianBlurFixedPoint

* remove template specialization

* simd support for h3N121 uint16

* test for u16 gaussian blur

* remove unnecessary comments

* fix return type of raw()

* add typedef of native internal type in fixedpoint

* update return type of raw()
上级 1d425600
......@@ -14,13 +14,14 @@ namespace {
class fixedpoint64
{
private:
static const int fixedShift = 32;
int64_t val;
fixedpoint64(int64_t _val) : val(_val) {}
static CV_ALWAYS_INLINE uint64_t fixedround(const uint64_t& _val) { return (_val + ((1LL << fixedShift) >> 1)); }
public:
static const int fixedShift = 32;
typedef fixedpoint64 WT;
typedef int64_t raw_t;
CV_ALWAYS_INLINE fixedpoint64() { val = 0; }
CV_ALWAYS_INLINE fixedpoint64(const fixedpoint64& v) { val = v.val; }
CV_ALWAYS_INLINE fixedpoint64(const int8_t& _val) { val = ((int64_t)_val) << fixedShift; }
......@@ -97,13 +98,14 @@ public:
class ufixedpoint64
{
private:
static const int fixedShift = 32;
uint64_t val;
ufixedpoint64(uint64_t _val) : val(_val) {}
static CV_ALWAYS_INLINE uint64_t fixedround(const uint64_t& _val) { return (_val + ((1LL << fixedShift) >> 1)); }
public:
static const int fixedShift = 32;
typedef ufixedpoint64 WT;
typedef uint64_t raw_t;
CV_ALWAYS_INLINE ufixedpoint64() { val = 0; }
CV_ALWAYS_INLINE ufixedpoint64(const ufixedpoint64& v) { val = v.val; }
CV_ALWAYS_INLINE ufixedpoint64(const uint8_t& _val) { val = ((uint64_t)_val) << fixedShift; }
......@@ -157,6 +159,9 @@ public:
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
static CV_ALWAYS_INLINE ufixedpoint64 zero() { return ufixedpoint64(); }
static CV_ALWAYS_INLINE ufixedpoint64 one() { return ufixedpoint64((uint64_t)(1ULL << fixedShift)); }
static CV_ALWAYS_INLINE ufixedpoint64 fromRaw(uint64_t v) { return ufixedpoint64(v); }
CV_ALWAYS_INLINE uint64_t raw() { return val; }
CV_ALWAYS_INLINE uint32_t cvFloor() const { return cv::saturate_cast<uint32_t>(val >> fixedShift); }
friend class ufixedpoint32;
};
......@@ -164,13 +169,14 @@ public:
class fixedpoint32
{
private:
static const int fixedShift = 16;
int32_t val;
fixedpoint32(int32_t _val) : val(_val) {}
static CV_ALWAYS_INLINE uint32_t fixedround(const uint32_t& _val) { return (_val + ((1 << fixedShift) >> 1)); }
public:
static const int fixedShift = 16;
typedef fixedpoint64 WT;
typedef int32_t raw_t;
CV_ALWAYS_INLINE fixedpoint32() { val = 0; }
CV_ALWAYS_INLINE fixedpoint32(const fixedpoint32& v) { val = v.val; }
CV_ALWAYS_INLINE fixedpoint32(const int8_t& _val) { val = ((int32_t)_val) << fixedShift; }
......@@ -218,13 +224,14 @@ public:
class ufixedpoint32
{
private:
static const int fixedShift = 16;
uint32_t val;
ufixedpoint32(uint32_t _val) : val(_val) {}
static CV_ALWAYS_INLINE uint32_t fixedround(const uint32_t& _val) { return (_val + ((1 << fixedShift) >> 1)); }
public:
static const int fixedShift = 16;
typedef ufixedpoint64 WT;
typedef uint32_t raw_t;
CV_ALWAYS_INLINE ufixedpoint32() { val = 0; }
CV_ALWAYS_INLINE ufixedpoint32(const ufixedpoint32& v) { val = v.val; }
CV_ALWAYS_INLINE ufixedpoint32(const uint8_t& _val) { val = ((uint32_t)_val) << fixedShift; }
......@@ -262,19 +269,23 @@ public:
CV_ALWAYS_INLINE bool isZero() { return val == 0; }
static CV_ALWAYS_INLINE ufixedpoint32 zero() { return ufixedpoint32(); }
static CV_ALWAYS_INLINE ufixedpoint32 one() { return ufixedpoint32((1U << fixedShift)); }
static CV_ALWAYS_INLINE ufixedpoint32 fromRaw(uint32_t v) { return ufixedpoint32(v); }
CV_ALWAYS_INLINE uint32_t raw() { return val; }
friend class ufixedpoint16;
};
class fixedpoint16
{
private:
static const int fixedShift = 8;
int16_t val;
fixedpoint16(int16_t _val) : val(_val) {}
static CV_ALWAYS_INLINE uint16_t fixedround(const uint16_t& _val) { return (_val + ((1 << fixedShift) >> 1)); }
public:
static const int fixedShift = 8;
typedef fixedpoint32 WT;
typedef int16_t raw_t;
CV_ALWAYS_INLINE fixedpoint16() { val = 0; }
CV_ALWAYS_INLINE fixedpoint16(const fixedpoint16& v) { val = v.val; }
CV_ALWAYS_INLINE fixedpoint16(const int8_t& _val) { val = ((int16_t)_val) << fixedShift; }
......@@ -315,13 +326,14 @@ public:
class ufixedpoint16
{
private:
static const int fixedShift = 8;
uint16_t val;
ufixedpoint16(uint16_t _val) : val(_val) {}
static CV_ALWAYS_INLINE uint16_t fixedround(const uint16_t& _val) { return (_val + ((1 << fixedShift) >> 1)); }
public:
static const int fixedShift = 8;
typedef ufixedpoint32 WT;
typedef uint16_t raw_t;
CV_ALWAYS_INLINE ufixedpoint16() { val = 0; }
CV_ALWAYS_INLINE ufixedpoint16(const ufixedpoint16& v) { val = v.val; }
CV_ALWAYS_INLINE ufixedpoint16(const uint8_t& _val) { val = ((uint16_t)_val) << fixedShift; }
......@@ -358,7 +370,7 @@ public:
static CV_ALWAYS_INLINE ufixedpoint16 one() { return ufixedpoint16((uint16_t)(1 << fixedShift)); }
static CV_ALWAYS_INLINE ufixedpoint16 fromRaw(uint16_t v) { return ufixedpoint16(v); }
CV_ALWAYS_INLINE ufixedpoint16 raw() { return val; }
CV_ALWAYS_INLINE uint16_t raw() { return val; }
};
}
......
......@@ -258,23 +258,20 @@ softdouble getGaussianKernelFixedPoint_ED(CV_OUT std::vector<int64_t>& result, c
}
static void getGaussianKernel(int n, double sigma, int ktype, Mat& res) { res = getGaussianKernel(n, sigma, ktype); }
template <typename T> static void getGaussianKernel(int n, double sigma, int, std::vector<T>& res);
//{ res = getFixedpointGaussianKernel<T>(n, sigma); }
template<> void getGaussianKernel<ufixedpoint16>(int n, double sigma, int, std::vector<ufixedpoint16>& res)
template <typename FT> static void getGaussianKernel(int n, double sigma, int, std::vector<FT>& res)
{
std::vector<softdouble> res_sd;
softdouble s0 = getGaussianKernelBitExact(res_sd, n, sigma);
CV_UNUSED(s0);
std::vector<int64_t> fixed_256;
softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, 8);
softdouble approx_err = getGaussianKernelFixedPoint_ED(fixed_256, res_sd, FT::fixedShift);
CV_UNUSED(approx_err);
res.resize(n);
for (int i = 0; i < n; i++)
{
res[i] = ufixedpoint16::fromRaw((uint16_t)fixed_256[i]);
res[i] = FT::fromRaw((typename FT::raw_t)fixed_256[i]);
//printf("%03d: %d\n", i, res[i].raw());
}
}
......@@ -688,6 +685,43 @@ void GaussianBlur(InputArray _src, OutputArray _dst, Size ksize,
return;
}
}
if(sdepth == CV_16U && ((borderType & BORDER_ISOLATED) || !_src.isSubmatrix()))
{
CV_LOG_INFO(NULL, "GaussianBlur: running bit-exact version...");
std::vector<ufixedpoint32> fkx, fky;
createGaussianKernels(fkx, fky, type, ksize, sigma1, sigma2);
static bool param_check_gaussian_blur_bitexact_kernels = utils::getConfigurationParameterBool("OPENCV_GAUSSIANBLUR_CHECK_BITEXACT_KERNELS", false);
if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(fkx))
{
CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fx kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2));
}
else if (param_check_gaussian_blur_bitexact_kernels && !validateGaussianBlurKernel(fky))
{
CV_LOG_INFO(NULL, "GaussianBlur: bit-exact fy kernel can't be applied: ksize=" << ksize << " sigma=" << Size2d(sigma1, sigma2));
}
else
{
// TODO: implement ocl_sepFilter2D_BitExact -- how to deal with bdepth?
// CV_OCL_RUN(useOpenCL,
// ocl_sepFilter2D_BitExact(_src, _dst, sdepth,
// ksize,
// (const uint32_t*)&fkx[0], (const uint32_t*)&fky[0],
// Point(-1, -1), 0, borderType,
// 16/*shift_bits*/)
// );
Mat src = _src.getMat();
Mat dst = _dst.getMat();
if (src.data == dst.data)
src = src.clone();
CV_CPU_DISPATCH(GaussianBlurFixedPoint, (src, dst, (const uint32_t*)&fkx[0], (int)fkx.size(), (const uint32_t*)&fky[0], (int)fky.size(), borderType),
CV_CPU_DISPATCH_MODES_ALL);
return;
}
}
#ifdef HAVE_OPENCL
if (useOpenCL)
......
......@@ -54,9 +54,10 @@
namespace cv {
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
void GaussianBlurFixedPoint(const Mat& src, /*const*/ Mat& dst,
const uint16_t/*ufixedpoint16*/* fkx, int fkx_size,
const uint16_t/*ufixedpoint16*/* fky, int fky_size,
template <typename RFT>
void GaussianBlurFixedPoint(const Mat& src, Mat& dst,
const RFT* fkx, int fkx_size,
const RFT* fky, int fky_size,
int borderType);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
......@@ -192,8 +193,9 @@ void hlineSmooth3N<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufi
}
}
}
template <typename ET, typename FT>
void hlineSmooth3N121(const ET* src, int cn, const FT*, int, FT* dst, int len, int borderType)
template <typename ET, typename FT, typename VFT>
void hlineSmooth3N121Impl(const ET* src, int cn, const FT*, int, FT* dst, int len, int borderType)
{
if (len == 1)
{
......@@ -217,7 +219,13 @@ void hlineSmooth3N121(const ET* src, int cn, const FT*, int, FT* dst, int len, i
}
src += cn; dst += cn;
for (int i = cn; i < (len - 1)*cn; i++, src++, dst++)
int i = cn, lencn = (len - 1)*cn;
#if CV_SIMD
const int VECSZ = VFT::nlanes;
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((typename FT::raw_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn) + (vx_load_expand(src) << 1)) << (FT::fixedShift-2));
#endif
for (; i < lencn; i++, src++, dst++)
*dst = (FT(src[-cn])>>2) + (FT(src[cn])>>2) + (FT(src[0])>>1);
// Point that fall right from border
......@@ -231,51 +239,19 @@ void hlineSmooth3N121(const ET* src, int cn, const FT*, int, FT* dst, int len, i
}
}
}
template <typename ET, typename FT>
void hlineSmooth3N121(const ET* src, int cn, const FT*, int, FT* dst, int len, int borderType);
template <>
void hlineSmooth3N121<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufixedpoint16*, int, ufixedpoint16* dst, int len, int borderType)
void hlineSmooth3N121<uint8_t, ufixedpoint16>(const uint8_t* src, int cn, const ufixedpoint16* _m, int _n, ufixedpoint16* dst, int len, int borderType)
{
if (len == 1)
{
if (borderType != BORDER_CONSTANT)
for (int k = 0; k < cn; k++)
dst[k] = ufixedpoint16(src[k]);
else
for (int k = 0; k < cn; k++)
dst[k] = ufixedpoint16(src[k]) >> 1;
}
else
{
// Point that fall left from border
for (int k = 0; k < cn; k++)
dst[k] = (ufixedpoint16(src[k])>>1) + (ufixedpoint16(src[cn + k])>>2);
if (borderType != BORDER_CONSTANT)// If BORDER_CONSTANT out of border values are equal to zero and could be skipped
{
int src_idx = borderInterpolate(-1, len, borderType);
for (int k = 0; k < cn; k++)
dst[k] = dst[k] + (ufixedpoint16(src[src_idx*cn + k])>>2);
}
src += cn; dst += cn;
int i = cn, lencn = (len - 1)*cn;
#if CV_SIMD
const int VECSZ = v_uint16::nlanes;
for (; i <= lencn - VECSZ; i += VECSZ, src += VECSZ, dst += VECSZ)
v_store((uint16_t*)dst, (vx_load_expand(src - cn) + vx_load_expand(src + cn) + (vx_load_expand(src) << 1)) << 6);
#endif
for (; i < lencn; i++, src++, dst++)
*((uint16_t*)dst) = (uint16_t(src[-cn]) + uint16_t(src[cn]) + (uint16_t(src[0]) << 1)) << 6;
// Point that fall right from border
for (int k = 0; k < cn; k++)
dst[k] = (ufixedpoint16(src[k - cn])>>2) + (ufixedpoint16(src[k])>>1);
if (borderType != BORDER_CONSTANT)// If BORDER_CONSTANT out of border values are equal to zero and could be skipped
{
int src_idx = (borderInterpolate(len, len, borderType) - (len - 1))*cn;
for (int k = 0; k < cn; k++)
dst[k] = dst[k] + (ufixedpoint16(src[src_idx + k])>>2);
}
}
hlineSmooth3N121Impl<uint8_t, ufixedpoint16, v_uint16>(src, cn, _m, _n, dst, len, borderType);
}
template <>
void hlineSmooth3N121<uint16_t, ufixedpoint32>(const uint16_t* src, int cn, const ufixedpoint32* _m, int _n, ufixedpoint32* dst, int len, int borderType)
{
hlineSmooth3N121Impl<uint16_t, ufixedpoint32, v_uint32>(src, cn, _m, _n, dst, len, borderType);
}
template <typename ET, typename FT>
void hlineSmooth3Naba(const ET* src, int cn, const FT* m, int, FT* dst, int len, int borderType)
{
......@@ -1376,6 +1352,28 @@ void vlineSmooth3N121<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src,
for (; i < len; i++)
dst[i] = (((uint32_t)(((uint16_t*)(src[0]))[i]) + (uint32_t)(((uint16_t*)(src[2]))[i]) + ((uint32_t)(((uint16_t*)(src[1]))[i]) << 1)) + (1 << 9)) >> 10;
}
template <>
void vlineSmooth3N121<uint16_t, ufixedpoint32>(const ufixedpoint32* const * src, const ufixedpoint32*, int, uint16_t* dst, int len)
{
int i = 0;
#if CV_SIMD
const int VECSZ = v_uint32::nlanes;
for (; i <= len - 2*VECSZ; i += 2*VECSZ)
{
v_uint64 v_src00, v_src01, v_src02, v_src03, v_src10, v_src11, v_src12, v_src13, v_src20, v_src21, v_src22, v_src23;
v_expand(vx_load((uint32_t*)(src[0]) + i), v_src00, v_src01);
v_expand(vx_load((uint32_t*)(src[0]) + i + VECSZ), v_src02, v_src03);
v_expand(vx_load((uint32_t*)(src[1]) + i), v_src10, v_src11);
v_expand(vx_load((uint32_t*)(src[1]) + i + VECSZ), v_src12, v_src13);
v_expand(vx_load((uint32_t*)(src[2]) + i), v_src20, v_src21);
v_expand(vx_load((uint32_t*)(src[2]) + i + VECSZ), v_src22, v_src23);
v_store(dst + i, v_pack(v_rshr_pack<18>(v_src00 + v_src20 + (v_src10 + v_src10), v_src01 + v_src21 + (v_src11 + v_src11)),
v_rshr_pack<18>(v_src02 + v_src22 + (v_src12 + v_src12), v_src03 + v_src23 + (v_src13 + v_src13))));
}
#endif
for (; i < len; i++)
dst[i] = (((uint64_t)((uint32_t*)(src[0]))[i]) + (uint64_t)(((uint32_t*)(src[2]))[i]) + ((uint64_t(((uint32_t*)(src[1]))[i]) << 1)) + (1 << 17)) >> 18;
}
template <typename ET, typename FT>
void vlineSmooth5N(const FT* const * src, const FT* m, int, ET* dst, int len)
{
......@@ -1525,6 +1523,39 @@ void vlineSmooth5N14641<uint8_t, ufixedpoint16>(const ufixedpoint16* const * src
(((uint32_t)(((uint16_t*)(src[1]))[i]) + (uint32_t)(((uint16_t*)(src[3]))[i])) << 2) +
(uint32_t)(((uint16_t*)(src[0]))[i]) + (uint32_t)(((uint16_t*)(src[4]))[i]) + (1 << 11)) >> 12;
}
template <>
void vlineSmooth5N14641<uint16_t, ufixedpoint32>(const ufixedpoint32* const * src, const ufixedpoint32*, int, uint16_t* dst, int len)
{
int i = 0;
#if CV_SIMD
const int VECSZ = v_uint32::nlanes;
for (; i <= len - 2*VECSZ; i += 2*VECSZ)
{
v_uint64 v_src00, v_src10, v_src20, v_src30, v_src40;
v_uint64 v_src01, v_src11, v_src21, v_src31, v_src41;
v_uint64 v_src02, v_src12, v_src22, v_src32, v_src42;
v_uint64 v_src03, v_src13, v_src23, v_src33, v_src43;
v_expand(vx_load((uint32_t*)(src[0]) + i), v_src00, v_src01);
v_expand(vx_load((uint32_t*)(src[0]) + i + VECSZ), v_src02, v_src03);
v_expand(vx_load((uint32_t*)(src[1]) + i), v_src10, v_src11);
v_expand(vx_load((uint32_t*)(src[1]) + i + VECSZ), v_src12, v_src13);
v_expand(vx_load((uint32_t*)(src[2]) + i), v_src20, v_src21);
v_expand(vx_load((uint32_t*)(src[2]) + i + VECSZ), v_src22, v_src23);
v_expand(vx_load((uint32_t*)(src[3]) + i), v_src30, v_src31);
v_expand(vx_load((uint32_t*)(src[3]) + i + VECSZ), v_src32, v_src33);
v_expand(vx_load((uint32_t*)(src[4]) + i), v_src40, v_src41);
v_expand(vx_load((uint32_t*)(src[4]) + i + VECSZ), v_src42, v_src43);
v_store(dst + i, v_pack(v_rshr_pack<20>((v_src20 << 2) + (v_src20 << 1) + ((v_src10 + v_src30) << 2) + v_src00 + v_src40,
(v_src21 << 2) + (v_src21 << 1) + ((v_src11 + v_src31) << 2) + v_src01 + v_src41),
v_rshr_pack<20>((v_src22 << 2) + (v_src22 << 1) + ((v_src12 + v_src32) << 2) + v_src02 + v_src42,
(v_src23 << 2) + (v_src23 << 1) + ((v_src13 + v_src33) << 2) + v_src03 + v_src43)));
}
#endif
for (; i < len; i++)
dst[i] = ((uint64_t)(((uint32_t*)(src[2]))[i]) * 6 +
(((uint64_t)(((uint32_t*)(src[1]))[i]) + (uint64_t)(((uint32_t*)(src[3]))[i])) << 2) +
(uint64_t)(((uint32_t*)(src[0]))[i]) + (uint64_t)(((uint32_t*)(src[4]))[i]) + (1 << 19)) >> 20;
}
template <typename ET, typename FT>
void vlineSmooth(const FT* const * src, const FT* m, int n, ET* dst, int len)
{
......@@ -2029,25 +2060,42 @@ private:
} // namespace anon
void GaussianBlurFixedPoint(const Mat& src, /*const*/ Mat& dst,
const uint16_t/*ufixedpoint16*/* fkx, int fkx_size,
const uint16_t/*ufixedpoint16*/* fky, int fky_size,
int borderType)
template <typename RFT, typename ET, typename FT>
void GaussianBlurFixedPointImpl(const Mat& src, /*const*/ Mat& dst,
const RFT* fkx, int fkx_size,
const RFT* fky, int fky_size,
int borderType)
{
CV_INSTRUMENT_REGION();
CV_Assert(src.depth() == CV_8U && ((borderType & BORDER_ISOLATED) || !src.isSubmatrix()));
fixedSmoothInvoker<uint8_t, ufixedpoint16> invoker(
src.ptr<uint8_t>(), src.step1(),
dst.ptr<uint8_t>(), dst.step1(), dst.cols, dst.rows, dst.channels(),
(const ufixedpoint16*)fkx, fkx_size, (const ufixedpoint16*)fky, fky_size,
CV_Assert(src.depth() == DataType<ET>::depth && ((borderType & BORDER_ISOLATED) || !src.isSubmatrix()));
fixedSmoothInvoker<ET, FT> invoker(
src.ptr<ET>(), src.step1(),
dst.ptr<ET>(), dst.step1(), dst.cols, dst.rows, dst.channels(),
(const FT*)fkx, fkx_size, (const FT*)fky, fky_size,
borderType & ~BORDER_ISOLATED);
{
// TODO AVX guard (external call)
parallel_for_(Range(0, dst.rows), invoker, std::max(1, std::min(getNumThreads(), getNumberOfCPUs())));
}
}
template <>
void GaussianBlurFixedPoint<uint16_t>(const Mat& src, /*const*/ Mat& dst,
const uint16_t/*ufixedpoint16*/* fkx, int fkx_size,
const uint16_t/*ufixedpoint16*/* fky, int fky_size,
int borderType)
{
GaussianBlurFixedPointImpl<uint16_t, uint8_t, ufixedpoint16>(src, dst, fkx, fkx_size, fky, fky_size, borderType);
}
template <>
void GaussianBlurFixedPoint<uint32_t>(const Mat& src, /*const*/ Mat& dst,
const uint32_t/*ufixedpoint32*/* fkx, int fkx_size,
const uint32_t/*ufixedpoint32*/* fky, int fky_size,
int borderType)
{
GaussianBlurFixedPointImpl<uint32_t, uint16_t, ufixedpoint32>(src, dst, fkx, fkx_size, fky, fky_size, borderType);
}
#endif
CV_CPU_OPTIMIZATION_NAMESPACE_END
} // namespace
......@@ -7,13 +7,15 @@
namespace opencv_test { namespace {
static const int fixedShiftU8 = 8;
static const int64_t fixedOne = (1L << fixedShiftU8);
int64_t v[][9] = {
{ fixedOne }, // size 1, sigma 0
{ fixedOne >> 2, fixedOne >> 1, fixedOne >> 2 }, // size 3, sigma 0
{ fixedOne >> 4, fixedOne >> 2, 6 * (fixedOne >> 4), fixedOne >> 2, fixedOne >> 4 }, // size 5, sigma 0
{ fixedOne >> 5, 7 * (fixedOne >> 6), 7 * (fixedOne >> 5), 9 * (fixedOne >> 5), 7 * (fixedOne >> 5), 7 * (fixedOne >> 6), fixedOne >> 5 }, // size 7, sigma 0
static const int64_t fixedOneU8 = (1L << fixedShiftU8);
static const int fixedShiftU16 = 16;
static const int64_t fixedOneU16 = (1L << fixedShiftU16);
int64_t vU8[][9] = {
{ fixedOneU8 }, // size 1, sigma 0
{ fixedOneU8 >> 2, fixedOneU8 >> 1, fixedOneU8 >> 2 }, // size 3, sigma 0
{ fixedOneU8 >> 4, fixedOneU8 >> 2, 6 * (fixedOneU8 >> 4), fixedOneU8 >> 2, fixedOneU8 >> 4 }, // size 5, sigma 0
{ fixedOneU8 >> 5, 7 * (fixedOneU8 >> 6), 7 * (fixedOneU8 >> 5), 9 * (fixedOneU8 >> 5), 7 * (fixedOneU8 >> 5), 7 * (fixedOneU8 >> 6), fixedOneU8 >> 5 }, // size 7, sigma 0
{ 4, 13, 30, 51, 60, 51, 30, 13, 4 }, // size 9, sigma 0
#if 1
#define CV_TEST_INACCURATE_GAUSSIAN_BLUR
......@@ -24,6 +26,14 @@ namespace opencv_test { namespace {
#endif
};
int64_t vU16[][9] = {
{ fixedOneU16 }, // size 1, sigma 0
{ fixedOneU16 >> 2, fixedOneU16 >> 1, fixedOneU16 >> 2 }, // size 3, sigma 0
{ fixedOneU16 >> 4, fixedOneU16 >> 2, 6 * (fixedOneU16 >> 4), fixedOneU16 >> 2, fixedOneU16 >> 4 }, // size 5, sigma 0
{ fixedOneU16 >> 5, 7 * (fixedOneU16 >> 6), 7 * (fixedOneU16 >> 5), 9 * (fixedOneU16 >> 5), 7 * (fixedOneU16 >> 5), 7 * (fixedOneU16 >> 6), fixedOneU16 >> 5 }, // size 7, sigma 0
{ 4<<8, 13<<8, 30<<8, 51<<8, 60<<8, 51<<8, 30<<8, 13<<8, 4<<8 } // size 9, sigma 0
};
template <typename T, int fixedShift>
T eval(Mat src, vector<int64_t> kernelx, vector<int64_t> kernely)
{
......@@ -39,8 +49,6 @@ namespace opencv_test { namespace {
return saturate_cast<T>((val + fixedRound) >> (fixedShift * 2));
}
TEST(GaussianBlur_Bitexact, Linear8U)
{
struct testmode
{
int type;
......@@ -50,34 +58,6 @@ TEST(GaussianBlur_Bitexact, Linear8U)
double sigma_y;
vector<int64_t> kernel_x;
vector<int64_t> kernel_y;
} modes[] = {
{ CV_8UC1, Size( 1, 1), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC1, Size( 2, 2), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC1, Size( 3, 1), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC1, Size( 1, 3), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC1, Size( 3, 3), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC1, Size( 3, 3), Size(5, 5), 0, 0, vector<int64_t>(v[2], v[2]+5), vector<int64_t>(v[2], v[2]+5) },
{ CV_8UC1, Size( 3, 3), Size(7, 7), 0, 0, vector<int64_t>(v[3], v[3]+7), vector<int64_t>(v[3], v[3]+7) },
{ CV_8UC1, Size( 5, 5), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC1, Size( 5, 5), Size(5, 5), 0, 0, vector<int64_t>(v[2], v[2]+5), vector<int64_t>(v[2], v[2]+5) },
{ CV_8UC1, Size( 3, 5), Size(5, 5), 0, 0, vector<int64_t>(v[2], v[2]+5), vector<int64_t>(v[2], v[2]+5) },
{ CV_8UC1, Size( 5, 5), Size(5, 5), 0, 0, vector<int64_t>(v[2], v[2]+5), vector<int64_t>(v[2], v[2]+5) },
{ CV_8UC1, Size( 5, 5), Size(7, 7), 0, 0, vector<int64_t>(v[3], v[3]+7), vector<int64_t>(v[3], v[3]+7) },
{ CV_8UC1, Size( 7, 7), Size(7, 7), 0, 0, vector<int64_t>(v[3], v[3]+7), vector<int64_t>(v[3], v[3]+7) },
{ CV_8UC1, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC2, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC3, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC4, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(v[1], v[1]+3), vector<int64_t>(v[1], v[1]+3) },
{ CV_8UC1, Size( 256, 128), Size(5, 5), 0, 0, vector<int64_t>(v[2], v[2]+5), vector<int64_t>(v[2], v[2]+5) },
{ CV_8UC1, Size( 256, 128), Size(7, 7), 0, 0, vector<int64_t>(v[3], v[3]+7), vector<int64_t>(v[3], v[3]+7) },
{ CV_8UC1, Size( 256, 128), Size(9, 9), 0, 0, vector<int64_t>(v[4], v[4]+9), vector<int64_t>(v[4], v[4]+9) },
#ifdef CV_TEST_INACCURATE_GAUSSIAN_BLUR
{ CV_8UC1, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(v[5], v[5]+3), vector<int64_t>(v[6], v[6]+3) },
{ CV_8UC2, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(v[5], v[5]+3), vector<int64_t>(v[6], v[6]+3) },
{ CV_8UC3, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(v[5], v[5]+3), vector<int64_t>(v[6], v[6]+3) },
{ CV_8UC4, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(v[5], v[5]+3), vector<int64_t>(v[6], v[6]+3) },
{ CV_8UC1, Size( 256, 128), Size(5, 5), 0.375, 0.75, vector<int64_t>(v[7], v[7]+5), vector<int64_t>(v[8], v[8]+5) }
#endif
};
int bordermodes[] = {
......@@ -93,11 +73,12 @@ TEST(GaussianBlur_Bitexact, Linear8U)
// BORDER_REFLECT_101
};
for (int modeind = 0, _modecnt = sizeof(modes) / sizeof(modes[0]); modeind < _modecnt; ++modeind)
template <int fixedShift>
void checkMode(const testmode& mode)
{
int type = modes[modeind].type, depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
int dcols = modes[modeind].sz.width, drows = modes[modeind].sz.height;
Size kernel = modes[modeind].kernel;
int type = mode.type, depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
int dcols = mode.sz.width, drows = mode.sz.height;
Size kernel = mode.kernel;
int rows = drows + 20, cols = dcols + 20;
Mat src(rows, cols, type), refdst(drows, dcols, type), dst;
......@@ -142,25 +123,93 @@ TEST(GaussianBlur_Bitexact, Linear8U)
for (int i = 0; i < dcols; i++)
{
if (depth == CV_8U)
dst_chan.at<uint8_t>(j, i) = eval<uint8_t, fixedShiftU8>(src_chan(Rect(i,j,kernel.width,kernel.height)), modes[modeind].kernel_x, modes[modeind].kernel_y);
dst_chan.at<uint8_t>(j, i) = eval<uint8_t, fixedShift>(src_chan(Rect(i,j,kernel.width,kernel.height)), mode.kernel_x, mode.kernel_y);
else if (depth == CV_16U)
dst_chan.at<uint16_t>(j, i) = eval<uint16_t, fixedShiftU8>(src_chan(Rect(i, j, kernel.width, kernel.height)), modes[modeind].kernel_x, modes[modeind].kernel_y);
dst_chan.at<uint16_t>(j, i) = eval<uint16_t, fixedShift>(src_chan(Rect(i, j, kernel.width, kernel.height)), mode.kernel_x, mode.kernel_y);
else if (depth == CV_16S)
dst_chan.at<int16_t>(j, i) = eval<int16_t, fixedShiftU8>(src_chan(Rect(i, j, kernel.width, kernel.height)), modes[modeind].kernel_x, modes[modeind].kernel_y);
dst_chan.at<int16_t>(j, i) = eval<int16_t, fixedShift>(src_chan(Rect(i, j, kernel.width, kernel.height)), mode.kernel_x, mode.kernel_y);
else if (depth == CV_32S)
dst_chan.at<int32_t>(j, i) = eval<int32_t, fixedShiftU8>(src_chan(Rect(i, j, kernel.width, kernel.height)), modes[modeind].kernel_x, modes[modeind].kernel_y);
dst_chan.at<int32_t>(j, i) = eval<int32_t, fixedShift>(src_chan(Rect(i, j, kernel.width, kernel.height)), mode.kernel_x, mode.kernel_y);
else
CV_Assert(0);
}
mixChannels(dst_chan, refdst, toFrom, 1);
}
cv::GaussianBlur(src_roi, dst, kernel, modes[modeind].sigma_x, modes[modeind].sigma_y, bordermodes[borderind]);
cv::GaussianBlur(src_roi, dst, kernel, mode.sigma_x, mode.sigma_y, bordermodes[borderind]);
EXPECT_GE(0, cvtest::norm(refdst, dst, cv::NORM_L1))
<< "GaussianBlur " << cn << "-chan mat " << drows << "x" << dcols << " by kernel " << kernel << " sigma(" << modes[modeind].sigma_x << ";" << modes[modeind].sigma_y << ") failed with max diff " << cvtest::norm(refdst, dst, cv::NORM_INF);
<< "GaussianBlur " << cn << "-chan mat " << drows << "x" << dcols << " by kernel " << kernel << " sigma(" << mode.sigma_x << ";" << mode.sigma_y << ") failed with max diff " << cvtest::norm(refdst, dst, cv::NORM_INF);
}
}
TEST(GaussianBlur_Bitexact, Linear8U)
{
testmode modes[] = {
{ CV_8UC1, Size( 1, 1), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC1, Size( 2, 2), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC1, Size( 3, 1), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC1, Size( 1, 3), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC1, Size( 3, 3), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC1, Size( 3, 3), Size(5, 5), 0, 0, vector<int64_t>(vU8[2], vU8[2]+5), vector<int64_t>(vU8[2], vU8[2]+5) },
{ CV_8UC1, Size( 3, 3), Size(7, 7), 0, 0, vector<int64_t>(vU8[3], vU8[3]+7), vector<int64_t>(vU8[3], vU8[3]+7) },
{ CV_8UC1, Size( 5, 5), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC1, Size( 5, 5), Size(5, 5), 0, 0, vector<int64_t>(vU8[2], vU8[2]+5), vector<int64_t>(vU8[2], vU8[2]+5) },
{ CV_8UC1, Size( 3, 5), Size(5, 5), 0, 0, vector<int64_t>(vU8[2], vU8[2]+5), vector<int64_t>(vU8[2], vU8[2]+5) },
{ CV_8UC1, Size( 5, 5), Size(5, 5), 0, 0, vector<int64_t>(vU8[2], vU8[2]+5), vector<int64_t>(vU8[2], vU8[2]+5) },
{ CV_8UC1, Size( 5, 5), Size(7, 7), 0, 0, vector<int64_t>(vU8[3], vU8[3]+7), vector<int64_t>(vU8[3], vU8[3]+7) },
{ CV_8UC1, Size( 7, 7), Size(7, 7), 0, 0, vector<int64_t>(vU8[3], vU8[3]+7), vector<int64_t>(vU8[3], vU8[3]+7) },
{ CV_8UC1, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC2, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC3, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC4, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(vU8[1], vU8[1]+3), vector<int64_t>(vU8[1], vU8[1]+3) },
{ CV_8UC1, Size( 256, 128), Size(5, 5), 0, 0, vector<int64_t>(vU8[2], vU8[2]+5), vector<int64_t>(vU8[2], vU8[2]+5) },
{ CV_8UC1, Size( 256, 128), Size(7, 7), 0, 0, vector<int64_t>(vU8[3], vU8[3]+7), vector<int64_t>(vU8[3], vU8[3]+7) },
{ CV_8UC1, Size( 256, 128), Size(9, 9), 0, 0, vector<int64_t>(vU8[4], vU8[4]+9), vector<int64_t>(vU8[4], vU8[4]+9) },
#ifdef CV_TEST_INACCURATE_GAUSSIAN_BLUR
{ CV_8UC1, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(vU8[5], vU8[5]+3), vector<int64_t>(vU8[6], vU8[6]+3) },
{ CV_8UC2, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(vU8[5], vU8[5]+3), vector<int64_t>(vU8[6], vU8[6]+3) },
{ CV_8UC3, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(vU8[5], vU8[5]+3), vector<int64_t>(vU8[6], vU8[6]+3) },
{ CV_8UC4, Size( 256, 128), Size(3, 3), 1.75, 0.875, vector<int64_t>(vU8[5], vU8[5]+3), vector<int64_t>(vU8[6], vU8[6]+3) },
{ CV_8UC1, Size( 256, 128), Size(5, 5), 0.375, 0.75, vector<int64_t>(vU8[7], vU8[7]+5), vector<int64_t>(vU8[8], vU8[8]+5) }
#endif
};
for (int modeind = 0, _modecnt = sizeof(modes) / sizeof(modes[0]); modeind < _modecnt; ++modeind)
{
checkMode<fixedShiftU8>(modes[modeind]);
}
}
TEST(GaussianBlur_Bitexact, Linear16U)
{
testmode modes[] = {
{ CV_16UC1, Size( 1, 1), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC1, Size( 2, 2), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC1, Size( 3, 1), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC1, Size( 1, 3), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC1, Size( 3, 3), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC1, Size( 3, 3), Size(5, 5), 0, 0, vector<int64_t>(vU16[2], vU16[2]+5), vector<int64_t>(vU16[2], vU16[2]+5) },
{ CV_16UC1, Size( 3, 3), Size(7, 7), 0, 0, vector<int64_t>(vU16[3], vU16[3]+7), vector<int64_t>(vU16[3], vU16[3]+7) },
{ CV_16UC1, Size( 5, 5), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC1, Size( 5, 5), Size(5, 5), 0, 0, vector<int64_t>(vU16[2], vU16[2]+5), vector<int64_t>(vU16[2], vU16[2]+5) },
{ CV_16UC1, Size( 3, 5), Size(5, 5), 0, 0, vector<int64_t>(vU16[2], vU16[2]+5), vector<int64_t>(vU16[2], vU16[2]+5) },
{ CV_16UC1, Size( 5, 5), Size(5, 5), 0, 0, vector<int64_t>(vU16[2], vU16[2]+5), vector<int64_t>(vU16[2], vU16[2]+5) },
{ CV_16UC1, Size( 5, 5), Size(7, 7), 0, 0, vector<int64_t>(vU16[3], vU16[3]+7), vector<int64_t>(vU16[3], vU16[3]+7) },
{ CV_16UC1, Size( 7, 7), Size(7, 7), 0, 0, vector<int64_t>(vU16[3], vU16[3]+7), vector<int64_t>(vU16[3], vU16[3]+7) },
{ CV_16UC1, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC2, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC3, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC4, Size( 256, 128), Size(3, 3), 0, 0, vector<int64_t>(vU16[1], vU16[1]+3), vector<int64_t>(vU16[1], vU16[1]+3) },
{ CV_16UC1, Size( 256, 128), Size(5, 5), 0, 0, vector<int64_t>(vU16[2], vU16[2]+5), vector<int64_t>(vU16[2], vU16[2]+5) },
{ CV_16UC1, Size( 256, 128), Size(7, 7), 0, 0, vector<int64_t>(vU16[3], vU16[3]+7), vector<int64_t>(vU16[3], vU16[3]+7) },
{ CV_16UC1, Size( 256, 128), Size(9, 9), 0, 0, vector<int64_t>(vU16[4], vU16[4]+9), vector<int64_t>(vU16[4], vU16[4]+9) },
};
for (int modeind = 0, _modecnt = sizeof(modes) / sizeof(modes[0]); modeind < _modecnt; ++modeind)
{
checkMode<16>(modes[modeind]);
}
}
TEST(GaussianBlur_Bitexact, regression_15015)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册