From bd0732b1d03bcbc15d919d6dbe3dc45eac1b9514 Mon Sep 17 00:00:00 2001 From: Nicholas Ho <88894303+Nicholas-Ho-arm@users.noreply.github.com> Date: Fri, 15 Oct 2021 12:47:53 +0100 Subject: [PATCH] Merge pull request #20740 from Nicholas-Ho-arm:3.4_SymmColumnVec_32f8u * Add SymmColumnVec_32f8u * Fix double to float warnings --- modules/imgproc/src/filter.simd.hpp | 81 ++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/filter.simd.hpp b/modules/imgproc/src/filter.simd.hpp index 94dbce060c..966de94c0b 100644 --- a/modules/imgproc/src/filter.simd.hpp +++ b/modules/imgproc/src/filter.simd.hpp @@ -1160,6 +1160,81 @@ struct SymmColumnVec_32s8u Mat kernel; }; +struct SymmColumnVec_32f8u +{ + SymmColumnVec_32f8u() { symmetryType = 0; delta = 0; } + SymmColumnVec_32f8u(const Mat& _kernel, int _symmetryType, int, double _delta) + { + symmetryType = _symmetryType; + kernel = _kernel; + delta = (float)_delta; + CV_Assert( (symmetryType & (KERNEL_SYMMETRICAL | KERNEL_ASYMMETRICAL)) != 0 ); + } + + int operator()(const uchar** _src, uchar* _dst, int width) const + { + CV_INSTRUMENT_REGION(); + + int _ksize = kernel.rows + kernel.cols - 1; + if( _ksize == 1 ) return 0; + const int ksize2 = _ksize / 2; + const float* ky = kernel.ptr() + ksize2; + int i = 0, k; + bool symmetrical = (symmetryType & KERNEL_SYMMETRICAL) != 0; + const float** src = (const float**)_src; + + if( symmetrical ) + { + for( ; i <= width - v_uint8::nlanes; i += v_uint8::nlanes ) + { + v_float32 v_ky0 = vx_setall_f32(ky[0]); + v_float32 v32_delta = vx_setall_f32(delta); + const float* S = src[0] + i; + v_float32 s0 = v_muladd(v_ky0, vx_load(S), v32_delta); + v_float32 s1 = v_muladd(v_ky0, vx_load(S + v_float32::nlanes), v32_delta); + v_float32 s2 = v_muladd(v_ky0, vx_load(S + 2*v_float32::nlanes), v32_delta); + v_float32 s3 = v_muladd(v_ky0, vx_load(S + 3*v_float32::nlanes), v32_delta); + for( k = 1; k <= ksize2; k++ ) + { + v_float32 v_kyk = vx_setall_f32(ky[k]); + const float* S0 = src[k] + i; + const float* S1 = src[-k] + i; + s0 = v_muladd(v_kyk, vx_load(S0) + vx_load(S1), s0); + s1 = v_muladd(v_kyk, vx_load(S0 + v_float32::nlanes) + vx_load(S1 + v_float32::nlanes), s1); + s2 = v_muladd(v_kyk, vx_load(S0 + 2*v_float32::nlanes) + vx_load(S1 + 2*v_float32::nlanes), s2); + s3 = v_muladd(v_kyk, vx_load(S0 + 3*v_float32::nlanes) + vx_load(S1 + 3*v_float32::nlanes), s3); + } + v_store(_dst + i, v_pack_u(v_pack(v_round(s0), v_round(s1)), v_pack(v_round(s2), v_round(s3)))); + } + } + else + { + for( ; i <= width - v_uint8::nlanes; i += v_uint8::nlanes ) + { + v_float32 s0 = vx_setall_f32(delta); + v_float32 s1 = vx_setall_f32(delta); + v_float32 s2 = vx_setall_f32(delta); + v_float32 s3 = vx_setall_f32(delta); + for( k = 1; k <= ksize2; k++ ) + { + v_float32 v_kyk = vx_setall_f32(ky[k]); + const float* S0 = src[k] + i; + const float* S1 = src[-k] + i; + s0 = v_muladd(v_kyk, vx_load(S0) - vx_load(S1), s0); + s1 = v_muladd(v_kyk, vx_load(S0 + v_float32::nlanes) - vx_load(S1 + v_float32::nlanes), s1); + s2 = v_muladd(v_kyk, vx_load(S0 + 2*v_float32::nlanes) - vx_load(S1 + 2*v_float32::nlanes), s2); + s3 = v_muladd(v_kyk, vx_load(S0 + 3*v_float32::nlanes) - vx_load(S1 + 3*v_float32::nlanes), s3); + } + v_store(_dst + i, v_pack_u(v_pack(v_round(s0), v_round(s1)), v_pack(v_round(s2), v_round(s3)))); + } + } + return i; + } + int symmetryType; + float delta; + Mat kernel; +}; + struct SymmColumnSmallVec_32s16s { @@ -2341,6 +2416,7 @@ typedef RowNoVec RowVec_32f; typedef SymmRowSmallNoVec SymmRowSmallVec_8u32s; typedef SymmRowSmallNoVec SymmRowSmallVec_32f; typedef ColumnNoVec SymmColumnVec_32s8u; +typedef ColumnNoVec SymmColumnVec_32f8u; typedef ColumnNoVec SymmColumnVec_32f16s; typedef ColumnNoVec SymmColumnVec_32f; typedef SymmColumnSmallNoVec SymmColumnSmallVec_32s16s; @@ -3031,8 +3107,9 @@ Ptr getLinearColumnFilter( (kernel, anchor, delta, symmetryType, FixedPtCastEx(bits), SymmColumnVec_32s8u(kernel, symmetryType, bits, delta)); if( ddepth == CV_8U && sdepth == CV_32F ) - return makePtr, ColumnNoVec> > - (kernel, anchor, delta, symmetryType); + return makePtr, SymmColumnVec_32f8u> > + (kernel, anchor, delta, symmetryType, Cast(), + SymmColumnVec_32f8u(kernel, symmetryType, 0, delta)); if( ddepth == CV_8U && sdepth == CV_64F ) return makePtr, ColumnNoVec> > (kernel, anchor, delta, symmetryType); -- GitLab