Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
a98ce86c
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a98ce86c
编写于
10月 02, 2013
作者:
A
Adrian Stratulat
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add NEON intrinsics to core/src/arithm.cpp
上级
1f0ca00d
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
105 addition
and
4 deletion
+105
-4
modules/core/src/arithm.cpp
modules/core/src/arithm.cpp
+105
-4
未找到文件。
modules/core/src/arithm.cpp
浏览文件 @
a98ce86c
...
...
@@ -54,21 +54,23 @@ namespace cv
struct
NOP
{};
#if CV_SSE2
#if CV_SSE2
|| CV_NEON
#define FUNCTOR_TEMPLATE(name) \
template<typename T> struct name {}
FUNCTOR_TEMPLATE
(
VLoadStore128
);
#if CV_SSE2
FUNCTOR_TEMPLATE
(
VLoadStore64
);
FUNCTOR_TEMPLATE
(
VLoadStore128Aligned
);
#endif
#endif
template
<
typename
T
,
class
Op
,
class
VOp
>
void
vBinOp
(
const
T
*
src1
,
size_t
step1
,
const
T
*
src2
,
size_t
step2
,
T
*
dst
,
size_t
step
,
Size
sz
)
{
#if CV_SSE2
#if CV_SSE2
|| CV_NEON
VOp
vop
;
#endif
Op
op
;
...
...
@@ -79,9 +81,11 @@ void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, si
{
int
x
=
0
;
#if CV_NEON || CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
)
{
#endif
for
(
;
x
<=
sz
.
width
-
32
/
(
int
)
sizeof
(
T
);
x
+=
32
/
sizeof
(
T
)
)
{
typename
VLoadStore128
<
T
>::
reg_type
r0
=
VLoadStore128
<
T
>::
load
(
src1
+
x
);
...
...
@@ -91,8 +95,10 @@ void vBinOp(const T* src1, size_t step1, const T* src2, size_t step2, T* dst, si
VLoadStore128
<
T
>::
store
(
dst
+
x
,
r0
);
VLoadStore128
<
T
>::
store
(
dst
+
x
+
16
/
sizeof
(
T
),
r1
);
}
#if CV_SSE2
}
#endif
#endif
#if CV_SSE2
if
(
USE_SSE2
)
{
...
...
@@ -125,7 +131,7 @@ template<typename T, class Op, class Op32>
void
vBinOp32
(
const
T
*
src1
,
size_t
step1
,
const
T
*
src2
,
size_t
step2
,
T
*
dst
,
size_t
step
,
Size
sz
)
{
#if CV_SSE2
#if CV_SSE2
|| CV_NEON
Op32
op32
;
#endif
Op
op
;
...
...
@@ -153,9 +159,11 @@ void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2,
}
}
#endif
#if CV_NEON || CV_SSE2
#if CV_SSE2
if
(
USE_SSE2
)
{
#endif
for
(
;
x
<=
sz
.
width
-
8
;
x
+=
8
)
{
typename
VLoadStore128
<
T
>::
reg_type
r0
=
VLoadStore128
<
T
>::
load
(
src1
+
x
);
...
...
@@ -165,8 +173,10 @@ void vBinOp32(const T* src1, size_t step1, const T* src2, size_t step2,
VLoadStore128
<
T
>::
store
(
dst
+
x
,
r0
);
VLoadStore128
<
T
>::
store
(
dst
+
x
+
4
,
r1
);
}
#if CV_SSE2
}
#endif
#endif
#if CV_ENABLE_UNROLLED
for
(
;
x
<=
sz
.
width
-
4
;
x
+=
4
)
{
...
...
@@ -383,7 +393,98 @@ FUNCTOR_TEMPLATE(VNot);
FUNCTOR_CLOSURE_1arg
(
VNot
,
uchar
,
return
_mm_xor_si128
(
_mm_set1_epi32
(
-
1
),
a
));
#endif
#if CV_SSE2
#if CV_NEON
#define FUNCTOR_LOADSTORE(name, template_arg, register_type, load_body, store_body)\
template <> \
struct name<template_arg>{ \
typedef register_type reg_type; \
static reg_type load(const template_arg * p) { return load_body (p);}; \
static void store(template_arg * p, reg_type v) { store_body (p, v);}; \
}
#define FUNCTOR_CLOSURE_2arg(name, template_arg, body)\
template<> \
struct name<template_arg> \
{ \
VLoadStore128<template_arg>::reg_type operator()( \
VLoadStore128<template_arg>::reg_type a, \
VLoadStore128<template_arg>::reg_type b) const \
{ \
return body; \
}; \
}
#define FUNCTOR_CLOSURE_1arg(name, template_arg, body)\
template<> \
struct name<template_arg> \
{ \
VLoadStore128<template_arg>::reg_type operator()( \
VLoadStore128<template_arg>::reg_type a, \
VLoadStore128<template_arg>::reg_type ) const \
{ \
return body; \
}; \
}
FUNCTOR_LOADSTORE
(
VLoadStore128
,
uchar
,
uint8x16_t
,
vld1q_u8
,
vst1q_u8
);
FUNCTOR_LOADSTORE
(
VLoadStore128
,
schar
,
int8x16_t
,
vld1q_s8
,
vst1q_s8
);
FUNCTOR_LOADSTORE
(
VLoadStore128
,
ushort
,
uint16x8_t
,
vld1q_u16
,
vst1q_u16
);
FUNCTOR_LOADSTORE
(
VLoadStore128
,
short
,
int16x8_t
,
vld1q_s16
,
vst1q_s16
);
FUNCTOR_LOADSTORE
(
VLoadStore128
,
int
,
int32x4_t
,
vld1q_s32
,
vst1q_s32
);
FUNCTOR_LOADSTORE
(
VLoadStore128
,
float
,
float32x4_t
,
vld1q_f32
,
vst1q_f32
);
FUNCTOR_TEMPLATE
(
VAdd
);
FUNCTOR_CLOSURE_2arg
(
VAdd
,
uchar
,
vqaddq_u8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VAdd
,
schar
,
vqaddq_s8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VAdd
,
ushort
,
vqaddq_u16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VAdd
,
short
,
vqaddq_s16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VAdd
,
int
,
vaddq_s32
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VAdd
,
float
,
vaddq_f32
(
a
,
b
));
FUNCTOR_TEMPLATE
(
VSub
);
FUNCTOR_CLOSURE_2arg
(
VSub
,
uchar
,
vqsubq_u8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VSub
,
schar
,
vqsubq_s8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VSub
,
ushort
,
vqsubq_u16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VSub
,
short
,
vqsubq_s16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VSub
,
int
,
vsubq_s32
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VSub
,
float
,
vsubq_f32
(
a
,
b
));
FUNCTOR_TEMPLATE
(
VMin
);
FUNCTOR_CLOSURE_2arg
(
VMin
,
uchar
,
vminq_u8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMin
,
schar
,
vminq_s8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMin
,
ushort
,
vminq_u16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMin
,
short
,
vminq_s16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMin
,
int
,
vminq_s32
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMin
,
float
,
vminq_f32
(
a
,
b
));
FUNCTOR_TEMPLATE
(
VMax
);
FUNCTOR_CLOSURE_2arg
(
VMax
,
uchar
,
vmaxq_u8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMax
,
schar
,
vmaxq_s8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMax
,
ushort
,
vmaxq_u16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMax
,
short
,
vmaxq_s16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMax
,
int
,
vmaxq_s32
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VMax
,
float
,
vmaxq_f32
(
a
,
b
));
FUNCTOR_TEMPLATE
(
VAbsDiff
);
FUNCTOR_CLOSURE_2arg
(
VAbsDiff
,
uchar
,
vabdq_u8
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VAbsDiff
,
schar
,
vqabsq_s8
(
vqsubq_s8
(
a
,
b
)));
FUNCTOR_CLOSURE_2arg
(
VAbsDiff
,
ushort
,
vabdq_u16
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VAbsDiff
,
short
,
vqabsq_s16
(
vqsubq_s16
(
a
,
b
)));
FUNCTOR_CLOSURE_2arg
(
VAbsDiff
,
int
,
vabdq_s32
(
a
,
b
));
FUNCTOR_CLOSURE_2arg
(
VAbsDiff
,
float
,
vabdq_f32
(
a
,
b
));
FUNCTOR_TEMPLATE
(
VAnd
);
FUNCTOR_CLOSURE_2arg
(
VAnd
,
uchar
,
vandq_u8
(
a
,
b
));
FUNCTOR_TEMPLATE
(
VOr
);
FUNCTOR_CLOSURE_2arg
(
VOr
,
uchar
,
vorrq_u8
(
a
,
b
));
FUNCTOR_TEMPLATE
(
VXor
);
FUNCTOR_CLOSURE_2arg
(
VXor
,
uchar
,
veorq_u8
(
a
,
b
));
FUNCTOR_TEMPLATE
(
VNot
);
FUNCTOR_CLOSURE_1arg
(
VNot
,
uchar
,
vmvnq_u8
(
a
));
#endif
#if CV_SSE2 || CV_NEON
#define IF_SIMD(op) op
#else
#define IF_SIMD(op) NOP
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录