提交 8b44ee2c 编写于 作者: S shengwenxue

fix MSA sum overflow issue

上级 9b2b2c88
......@@ -1037,12 +1037,12 @@ inline scalartype v_reduce_sum(const _Tpvec& a) \
return (scalartype)msa_sum_##suffix(a.val); \
}
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned char, u8)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, char, s8)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned short, u16)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, short, s16)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, unsigned, u32)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int, s32)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint8x16, unsigned short, u8)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int8x16, short, s8)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint16x8, unsigned, u16)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int16x8, int, s16)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_uint32x4, uint64_t, u32)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_int32x4, int64_t, s32)
OPENCV_HAL_IMPL_MSA_REDUCE_SUM(v_float32x4, float, f32)
inline uint64 v_reduce_sum(const v_uint64x2& a)
......
......@@ -719,7 +719,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
v2i64 _c; \
_b = __builtin_msa_hadd_s_w(__a, __a); \
_c = __builtin_msa_hadd_s_d(_b, _b); \
(int16_t)(_c[0] + _c[1]); \
(int32_t)(_c[0] + _c[1]); \
})
......@@ -736,7 +736,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
({ \
v2i64 _b; \
_b = __builtin_msa_hadd_s_d(__a, __a); \
(int32_t)(_b[0] + _b[1]); \
(int64_t)(_b[0] + _b[1]); \
})
/* uint8_t msa_sum_u8(v16u8 __a)*/
......@@ -756,7 +756,7 @@ typedef double v1f64 __attribute__ ((vector_size(8), aligned(8)));
v4i32 _c32; \
_b16 = __builtin_msa_hadd_s_h(__a, __a); \
_c32 = __builtin_msa_hadd_s_w(_b16, _b16); \
(int8_t)msa_sum_s32(_c32); \
(int16_t)msa_sum_s32(_c32); \
})
/* float msa_sum_f32(v4f32 __a)*/
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册