Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
28db4a22
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
28db4a22
编写于
9月 02, 2016
作者:
M
Maksim Shabunin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7175 from tomoaki0705:featureIntrinsic64
上级
1ac1e5be
7fef96be
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
643 addition
and
349 deletion
+643
-349
modules/core/include/opencv2/core/hal/intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+280
-2
modules/core/include/opencv2/core/hal/intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+10
-0
modules/core/test/test_intrin.cpp
modules/core/test/test_intrin.cpp
+6
-0
modules/imgproc/src/accum.cpp
modules/imgproc/src/accum.cpp
+347
-347
未找到文件。
modules/core/include/opencv2/core/hal/intrin_neon.hpp
浏览文件 @
28db4a22
...
...
@@ -53,6 +53,28 @@ namespace cv
//! @cond IGNORED
#define CV_SIMD128 1
#if defined(__aarch64__)
#define CV_SIMD128_64F 1
#else
#define CV_SIMD128_64F 0
#endif
#if CV_SIMD128_64F
#define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \
template <typename T> static inline \
_Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \
template <typename T> static inline \
float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; }
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
uint8x16_t
,
u8
)
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
int8x16_t
,
s8
)
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
uint16x8_t
,
u16
)
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
int16x8_t
,
s16
)
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
uint32x4_t
,
u32
)
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
int32x4_t
,
s32
)
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
uint64x2_t
,
u64
)
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
int64x2_t
,
s64
)
OPENCV_HAL_IMPL_NEON_REINTERPRET
(
float32x4_t
,
f32
)
#endif
struct
v_uint8x16
{
...
...
@@ -232,6 +254,27 @@ struct v_int64x2
int64x2_t
val
;
};
#if CV_SIMD128_64F
struct
v_float64x2
{
typedef
double
lane_type
;
enum
{
nlanes
=
2
};
v_float64x2
()
{}
explicit
v_float64x2
(
float64x2_t
v
)
:
val
(
v
)
{}
v_float64x2
(
double
v0
,
double
v1
)
{
double
v
[]
=
{
v0
,
v1
};
val
=
vld1q_f64
(
v
);
}
double
get0
()
const
{
return
vgetq_lane_f64
(
val
,
0
);
}
float64x2_t
val
;
};
#endif
#define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
...
...
@@ -255,6 +298,21 @@ OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32)
OPENCV_HAL_IMPL_NEON_INIT
(
uint64x2
,
uint64
,
u64
)
OPENCV_HAL_IMPL_NEON_INIT
(
int64x2
,
int64
,
s64
)
OPENCV_HAL_IMPL_NEON_INIT
(
float32x4
,
float
,
f32
)
#if CV_SIMD128_64F
#define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \
inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); }
OPENCV_HAL_IMPL_NEON_INIT
(
float64x2
,
double
,
f64
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
uint8x16
,
u8
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
int8x16
,
s8
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
uint16x8
,
u16
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
int16x8
,
s16
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
uint32x4
,
u32
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
int32x4
,
s32
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
uint64x2
,
u64
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
int64x2
,
s64
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
float32x4
,
f32
)
OPENCV_HAL_IMPL_NEON_INIT_64
(
float64x2
,
f64
)
#endif
#define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, wsuffix, pack, op) \
inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
...
...
@@ -337,7 +395,13 @@ OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64)
OPENCV_HAL_IMPL_NEON_BIN_OP
(
-
,
v_int64x2
,
vsubq_s64
)
OPENCV_HAL_IMPL_NEON_BIN_OP
(
+
,
v_uint64x2
,
vaddq_u64
)
OPENCV_HAL_IMPL_NEON_BIN_OP
(
-
,
v_uint64x2
,
vsubq_u64
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_BIN_OP
(
/
,
v_float32x4
,
vdivq_f32
)
OPENCV_HAL_IMPL_NEON_BIN_OP
(
+
,
v_float64x2
,
vaddq_f64
)
OPENCV_HAL_IMPL_NEON_BIN_OP
(
-
,
v_float64x2
,
vsubq_f64
)
OPENCV_HAL_IMPL_NEON_BIN_OP
(
*
,
v_float64x2
,
vmulq_f64
)
OPENCV_HAL_IMPL_NEON_BIN_OP
(
/
,
v_float64x2
,
vdivq_f64
)
#else
inline
v_float32x4
operator
/
(
const
v_float32x4
&
a
,
const
v_float32x4
&
b
)
{
float32x4_t
reciprocal
=
vrecpeq_f32
(
b
.
val
);
...
...
@@ -353,6 +417,7 @@ inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
a
.
val
=
vmulq_f32
(
a
.
val
,
reciprocal
);
return
a
;
}
#endif
inline
void
v_mul_expand
(
const
v_int16x8
&
a
,
const
v_int16x8
&
b
,
v_int32x4
&
c
,
v_int32x4
&
d
)
...
...
@@ -421,6 +486,18 @@ inline v_float32x4 operator ~ (const v_float32x4& a)
return
v_float32x4
(
vreinterpretq_f32_s32
(
vmvnq_s32
(
vreinterpretq_s32_f32
(
a
.
val
))));
}
#if CV_SIMD128_64F
inline
v_float32x4
v_sqrt
(
const
v_float32x4
&
x
)
{
return
v_float32x4
(
vsqrtq_f32
(
x
.
val
));
}
inline
v_float32x4
v_invsqrt
(
const
v_float32x4
&
x
)
{
v_float32x4
one
=
v_setall_f32
(
1.0
f
);
return
one
/
v_sqrt
(
x
);
}
#else
inline
v_float32x4
v_sqrt
(
const
v_float32x4
&
x
)
{
float32x4_t
x1
=
vmaxq_f32
(
x
.
val
,
vdupq_n_f32
(
FLT_MIN
));
...
...
@@ -437,10 +514,47 @@ inline v_float32x4 v_invsqrt(const v_float32x4& x)
e
=
vmulq_f32
(
vrsqrtsq_f32
(
vmulq_f32
(
x
.
val
,
e
),
e
),
e
);
return
v_float32x4
(
e
);
}
#endif
inline
v_float32x4
v_abs
(
v_float32x4
x
)
{
return
v_float32x4
(
vabsq_f32
(
x
.
val
));
}
#if CV_SIMD128_64F
#define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \
inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \
{ \
return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \
} \
inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \
{ \
a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \
return a; \
}
OPENCV_HAL_IMPL_NEON_DBL_BIT_OP
(
&
,
vandq_s64
)
OPENCV_HAL_IMPL_NEON_DBL_BIT_OP
(
|
,
vorrq_s64
)
OPENCV_HAL_IMPL_NEON_DBL_BIT_OP
(
^
,
veorq_s64
)
inline
v_float64x2
operator
~
(
const
v_float64x2
&
a
)
{
return
v_float64x2
(
vreinterpretq_f64_s32
(
vmvnq_s32
(
vreinterpretq_s32_f64
(
a
.
val
))));
}
inline
v_float64x2
v_sqrt
(
const
v_float64x2
&
x
)
{
return
v_float64x2
(
vsqrtq_f64
(
x
.
val
));
}
inline
v_float64x2
v_invsqrt
(
const
v_float64x2
&
x
)
{
v_float64x2
one
=
v_setall_f64
(
1.0
f
);
return
one
/
v_sqrt
(
x
);
}
inline
v_float64x2
v_abs
(
v_float64x2
x
)
{
return
v_float64x2
(
vabsq_f64
(
x
.
val
));
}
#endif
// TODO: exp, log, sin, cos
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \
...
...
@@ -463,8 +577,23 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_int32x4
,
v_max
,
vmaxq_s32
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_float32x4
,
v_min
,
vminq_f32
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_float32x4
,
v_max
,
vmaxq_f32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_float64x2
,
v_min
,
vminq_f64
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_float64x2
,
v_max
,
vmaxq_f64
)
#endif
#if CV_SIMD128_64F
inline
int64x2_t
vmvnq_s64
(
int64x2_t
a
)
{
int64x2_t
vx
=
vreinterpretq_s64_u32
(
vdupq_n_u32
(
0xFFFFFFFF
));
return
veorq_s64
(
a
,
vx
);
}
inline
uint64x2_t
vmvnq_u64
(
uint64x2_t
a
)
{
uint64x2_t
vx
=
vreinterpretq_u64_u32
(
vdupq_n_u32
(
0xFFFFFFFF
));
return
veorq_u64
(
a
,
vx
);
}
#endif
#define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \
inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
{ return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \
...
...
@@ -486,6 +615,11 @@ OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP
(
v_uint32x4
,
OPENCV_HAL_NOP
,
u32
,
u32
)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP
(
v_int32x4
,
vreinterpretq_s32_u32
,
s32
,
u32
)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP
(
v_float32x4
,
vreinterpretq_f32_u32
,
f32
,
u32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_INT_CMP_OP
(
v_uint64x2
,
OPENCV_HAL_NOP
,
u64
,
u64
)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP
(
v_int64x2
,
vreinterpretq_s64_u64
,
s64
,
u64
)
OPENCV_HAL_IMPL_NEON_INT_CMP_OP
(
v_float64x2
,
vreinterpretq_f64_u64
,
f64
,
u64
)
#endif
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_uint8x16
,
v_add_wrap
,
vaddq_u8
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_int8x16
,
v_add_wrap
,
vaddq_s8
)
...
...
@@ -501,6 +635,9 @@ OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_uint16x8
,
v_absdiff
,
vabdq_u16
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_uint32x4
,
v_absdiff
,
vabdq_u32
)
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_float32x4
,
v_absdiff
,
vabdq_f32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_BIN_FUNC
(
v_float64x2
,
v_absdiff
,
vabdq_f64
)
#endif
#define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
...
...
@@ -528,6 +665,24 @@ inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_
return
v_float32x4
(
vmlaq_f32
(
c
.
val
,
a
.
val
,
b
.
val
));
}
#if CV_SIMD128_64F
inline
v_float64x2
v_magnitude
(
const
v_float64x2
&
a
,
const
v_float64x2
&
b
)
{
v_float64x2
x
(
vaddq_f64
(
vmulq_f64
(
a
.
val
,
a
.
val
),
vmulq_f64
(
b
.
val
,
b
.
val
)));
return
v_sqrt
(
x
);
}
inline
v_float64x2
v_sqr_magnitude
(
const
v_float64x2
&
a
,
const
v_float64x2
&
b
)
{
return
v_float64x2
(
vaddq_f64
(
vmulq_f64
(
a
.
val
,
a
.
val
),
vmulq_f64
(
b
.
val
,
b
.
val
)));
}
inline
v_float64x2
v_muladd
(
const
v_float64x2
&
a
,
const
v_float64x2
&
b
,
const
v_float64x2
&
c
)
{
return
v_float64x2
(
vaddq_f64
(
c
.
val
,
vmulq_f64
(
a
.
val
,
b
.
val
)));
}
#endif
// trade efficiency for convenience
#define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \
inline _Tpvec operator << (const _Tpvec& a, int n) \
...
...
@@ -575,6 +730,9 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32)
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP
(
v_uint64x2
,
uint64
,
u64
)
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP
(
v_int64x2
,
int64
,
s64
)
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP
(
v_float32x4
,
float
,
f32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_LOADSTORE_OP
(
v_float64x2
,
double
,
f64
)
#endif
#define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, scalartype, func, scalar_func) \
inline scalartype v_reduce_##func(const _Tpvec& a) \
...
...
@@ -627,6 +785,16 @@ inline int v_signmask(const v_int32x4& a)
{
return
v_signmask
(
v_reinterpret_as_u32
(
a
));
}
inline
int
v_signmask
(
const
v_float32x4
&
a
)
{
return
v_signmask
(
v_reinterpret_as_u32
(
a
));
}
#if CV_SIMD128_64F
inline
int
v_signmask
(
const
v_uint64x2
&
a
)
{
int64x1_t
m0
=
vdup_n_s64
(
0
);
uint64x2_t
v0
=
vshlq_u64
(
vshrq_n_u64
(
a
.
val
,
63
),
vcombine_s64
(
m0
,
m0
));
return
(
int
)
vgetq_lane_u64
(
v0
,
0
)
+
((
int
)
vgetq_lane_u64
(
v0
,
1
)
<<
1
);
}
inline
int
v_signmask
(
const
v_float64x2
&
a
)
{
return
v_signmask
(
v_reinterpret_as_u64
(
a
));
}
#endif
#define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
inline bool v_check_all(const v_##_Tpvec& a) \
...
...
@@ -645,6 +813,9 @@ inline bool v_check_any(const v_##_Tpvec& a) \
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY
(
uint8x16
,
u8
,
7
)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY
(
uint16x8
,
u16
,
15
)
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY
(
uint32x4
,
u32
,
31
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_CHECK_ALLANY
(
uint64x2
,
u64
,
63
)
#endif
inline
bool
v_check_all
(
const
v_int8x16
&
a
)
{
return
v_check_all
(
v_reinterpret_as_u8
(
a
));
}
...
...
@@ -664,6 +835,17 @@ inline bool v_check_any(const v_int32x4& a)
inline
bool
v_check_any
(
const
v_float32x4
&
a
)
{
return
v_check_any
(
v_reinterpret_as_u32
(
a
));
}
#if CV_SIMD128_64F
inline
bool
v_check_all
(
const
v_int64x2
&
a
)
{
return
v_check_all
(
v_reinterpret_as_u64
(
a
));
}
inline
bool
v_check_all
(
const
v_float64x2
&
a
)
{
return
v_check_all
(
v_reinterpret_as_u64
(
a
));
}
inline
bool
v_check_any
(
const
v_int64x2
&
a
)
{
return
v_check_any
(
v_reinterpret_as_u64
(
a
));
}
inline
bool
v_check_any
(
const
v_float64x2
&
a
)
{
return
v_check_any
(
v_reinterpret_as_u64
(
a
));
}
#endif
#define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \
inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
{ \
...
...
@@ -677,6 +859,9 @@ OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16)
OPENCV_HAL_IMPL_NEON_SELECT
(
v_uint32x4
,
u32
,
u32
)
OPENCV_HAL_IMPL_NEON_SELECT
(
v_int32x4
,
s32
,
u32
)
OPENCV_HAL_IMPL_NEON_SELECT
(
v_float32x4
,
f32
,
u32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_SELECT
(
v_float64x2
,
f64
,
u64
)
#endif
#define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
...
...
@@ -710,6 +895,27 @@ inline v_int32x4 v_load_expand_q(const schar* ptr)
return
v_int32x4
(
vmovl_s16
(
v1
));
}
#if defined(__aarch64__)
#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
{ \
b0.val = vzip1q_##suffix(a0.val, a1.val); \
b1.val = vzip2q_##suffix(a0.val, a1.val); \
} \
inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
{ \
return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
} \
inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
{ \
return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
} \
inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
{ \
c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
}
#else
#define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
{ \
...
...
@@ -730,6 +936,7 @@ inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c,
c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
}
#endif
OPENCV_HAL_IMPL_NEON_UNPACKS
(
uint8x16
,
u8
)
OPENCV_HAL_IMPL_NEON_UNPACKS
(
int8x16
,
s8
)
...
...
@@ -738,6 +945,9 @@ OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16)
OPENCV_HAL_IMPL_NEON_UNPACKS
(
uint32x4
,
u32
)
OPENCV_HAL_IMPL_NEON_UNPACKS
(
int32x4
,
s32
)
OPENCV_HAL_IMPL_NEON_UNPACKS
(
float32x4
,
f32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_UNPACKS
(
float64x2
,
f64
)
#endif
#define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
template <int s> \
...
...
@@ -755,6 +965,9 @@ OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32)
OPENCV_HAL_IMPL_NEON_EXTRACT
(
uint64x2
,
u64
)
OPENCV_HAL_IMPL_NEON_EXTRACT
(
int64x2
,
s64
)
OPENCV_HAL_IMPL_NEON_EXTRACT
(
float32x4
,
f32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_EXTRACT
(
float64x2
,
f64
)
#endif
inline
v_int32x4
v_round
(
const
v_float32x4
&
a
)
{
...
...
@@ -782,6 +995,38 @@ inline v_int32x4 v_ceil(const v_float32x4& a)
inline
v_int32x4
v_trunc
(
const
v_float32x4
&
a
)
{
return
v_int32x4
(
vcvtq_s32_f32
(
a
.
val
));
}
#if CV_SIMD128_64F
inline
v_int32x4
v_round
(
const
v_float64x2
&
a
)
{
static
const
int32x2_t
zero
=
vdup_n_s32
(
0
);
return
v_int32x4
(
vcombine_s32
(
vmovn_s64
(
vcvtaq_s64_f64
(
a
.
val
)),
zero
));
}
inline
v_int32x4
v_floor
(
const
v_float64x2
&
a
)
{
static
const
int32x2_t
zero
=
vdup_n_s32
(
0
);
int64x2_t
a1
=
vcvtq_s64_f64
(
a
.
val
);
uint64x2_t
mask
=
vcgtq_f64
(
vcvtq_f64_s64
(
a1
),
a
.
val
);
a1
=
vaddq_s64
(
a1
,
vreinterpretq_s64_u64
(
mask
));
return
v_int32x4
(
vcombine_s32
(
vmovn_s64
(
a1
),
zero
));
}
inline
v_int32x4
v_ceil
(
const
v_float64x2
&
a
)
{
static
const
int32x2_t
zero
=
vdup_n_s32
(
0
);
int64x2_t
a1
=
vcvtq_s64_f64
(
a
.
val
);
uint64x2_t
mask
=
vcgtq_f64
(
a
.
val
,
vcvtq_f64_s64
(
a1
));
a1
=
vsubq_s64
(
a1
,
vreinterpretq_s64_u64
(
mask
));
return
v_int32x4
(
vcombine_s32
(
vmovn_s64
(
a1
),
zero
));
}
inline
v_int32x4
v_trunc
(
const
v_float64x2
&
a
)
{
static
const
int32x2_t
zero
=
vdup_n_s32
(
0
);
return
v_int32x4
(
vcombine_s32
(
vmovn_s64
(
vcvtaq_s64_f64
(
a
.
val
)),
zero
));
}
#endif
#define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
...
...
@@ -864,12 +1109,45 @@ OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16)
OPENCV_HAL_IMPL_NEON_INTERLEAVED
(
uint32x4
,
unsigned
,
u32
)
OPENCV_HAL_IMPL_NEON_INTERLEAVED
(
int32x4
,
int
,
s32
)
OPENCV_HAL_IMPL_NEON_INTERLEAVED
(
float32x4
,
float
,
f32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_NEON_INTERLEAVED
(
float64x2
,
double
,
f64
)
#endif
inline
v_float32x4
v_cvt_f32
(
const
v_int32x4
&
a
)
{
return
v_float32x4
(
vcvtq_f32_s32
(
a
.
val
));
}
#if CV_SIMD128_64F
inline
v_float32x4
v_cvt_f32
(
const
v_float64x2
&
a
)
{
float32x2_t
zero
=
vdup_n_f32
(
0.0
f
);
return
v_float32x4
(
vcombine_f32
(
vcvt_f32_f64
(
a
.
val
),
zero
));
}
inline
v_float64x2
v_cvt_f64
(
const
v_int32x4
&
a
)
{
return
v_float64x2
(
vcvt_f64_f32
(
vcvt_f32_s32
(
vget_low_s32
(
a
.
val
))));
}
inline
v_float64x2
v_cvt_f64_high
(
const
v_int32x4
&
a
)
{
return
v_float64x2
(
vcvt_f64_f32
(
vcvt_f32_s32
(
vget_high_s32
(
a
.
val
))));
}
inline
v_float64x2
v_cvt_f64
(
const
v_float32x4
&
a
)
{
return
v_float64x2
(
vcvt_f64_f32
(
vget_low_f32
(
a
.
val
)));
}
inline
v_float64x2
v_cvt_f64_high
(
const
v_float32x4
&
a
)
{
return
v_float64x2
(
vcvt_f64_f32
(
vget_high_f32
(
a
.
val
)));
}
#endif
//! @endcond
}
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
浏览文件 @
28db4a22
...
...
@@ -1611,11 +1611,21 @@ inline v_float64x2 v_cvt_f64(const v_int32x4& a)
return
v_float64x2
(
_mm_cvtepi32_pd
(
a
.
val
));
}
inline
v_float64x2
v_cvt_f64_high
(
const
v_int32x4
&
a
)
{
return
v_float64x2
(
_mm_cvtepi32_pd
(
_mm_srli_si128
(
a
.
val
,
8
)));
}
inline
v_float64x2
v_cvt_f64
(
const
v_float32x4
&
a
)
{
return
v_float64x2
(
_mm_cvtps_pd
(
a
.
val
));
}
inline
v_float64x2
v_cvt_f64_high
(
const
v_float32x4
&
a
)
{
return
v_float64x2
(
_mm_cvtps_pd
(
_mm_castsi128_ps
(
_mm_srli_si128
(
_mm_castps_si128
(
a
.
val
),
8
))));
}
//! @endcond
}
...
...
modules/core/test/test_intrin.cpp
浏览文件 @
28db4a22
...
...
@@ -652,12 +652,18 @@ template<typename R> struct TheTest
dataA
*=
1.1
;
R
a
=
dataA
;
Rt
b
=
v_cvt_f64
(
a
);
Rt
c
=
v_cvt_f64_high
(
a
);
Data
<
Rt
>
resB
=
b
;
Data
<
Rt
>
resC
=
c
;
int
n
=
std
::
min
<
int
>
(
Rt
::
nlanes
,
R
::
nlanes
);
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_EQ
((
typename
Rt
::
lane_type
)
dataA
[
i
],
resB
[
i
]);
}
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
EXPECT_EQ
((
typename
Rt
::
lane_type
)
dataA
[
i
+
n
],
resC
[
i
]);
}
#endif
return
*
this
;
}
...
...
modules/imgproc/src/accum.cpp
浏览文件 @
28db4a22
此差异已折叠。
点击以展开。
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录