Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
281ce441
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
281ce441
编写于
9月 23, 2014
作者:
V
Vadim Pisarevsky
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3250 from ilya-lavrenov:neon_convert_scale_abs
上级
1c0b9469
515be708
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
179 addition
and
2 deletion
+179
-2
modules/core/src/convert.cpp
modules/core/src/convert.cpp
+179
-2
未找到文件。
modules/core/src/convert.cpp
浏览文件 @
281ce441
...
@@ -1244,6 +1244,183 @@ struct cvtScaleAbs_SIMD<float, uchar, float>
...
@@ -1244,6 +1244,183 @@ struct cvtScaleAbs_SIMD<float, uchar, float>
#elif CV_NEON
#elif CV_NEON
template
<
>
struct
cvtScaleAbs_SIMD
<
uchar
,
uchar
,
float
>
{
int
operator
()
(
const
uchar
*
src
,
uchar
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
float32x4_t
v_shift
=
vdupq_n_f32
(
shift
);
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
uint8x16_t
v_src
=
vld1q_u8
(
src
+
x
);
uint16x8_t
v_half
=
vmovl_u8
(
vget_low_u8
(
v_src
));
uint32x4_t
v_quat
=
vmovl_u16
(
vget_low_u16
(
v_half
));
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_quat
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
v_quat
=
vmovl_u16
(
vget_high_u16
(
v_half
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_quat
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
v_half
=
vmovl_u8
(
vget_high_u8
(
v_src
));
v_quat
=
vmovl_u16
(
vget_low_u16
(
v_half
));
float32x4_t
v_dst_2
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_quat
),
scale
);
v_dst_2
=
vabsq_f32
(
vaddq_f32
(
v_dst_2
,
v_shift
));
v_quat
=
vmovl_u16
(
vget_high_u16
(
v_half
));
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
return
x
;
}
};
template
<
>
struct
cvtScaleAbs_SIMD
<
schar
,
uchar
,
float
>
{
int
operator
()
(
const
schar
*
src
,
uchar
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
float32x4_t
v_shift
=
vdupq_n_f32
(
shift
);
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int8x16_t
v_src
=
vld1q_s8
(
src
+
x
);
int16x8_t
v_half
=
vmovl_s8
(
vget_low_s8
(
v_src
));
int32x4_t
v_quat
=
vmovl_s16
(
vget_low_s16
(
v_half
));
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_quat
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
v_quat
=
vmovl_s16
(
vget_high_s16
(
v_half
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_quat
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
v_half
=
vmovl_s8
(
vget_high_s8
(
v_src
));
v_quat
=
vmovl_s16
(
vget_low_s16
(
v_half
));
float32x4_t
v_dst_2
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_quat
),
scale
);
v_dst_2
=
vabsq_f32
(
vaddq_f32
(
v_dst_2
,
v_shift
));
v_quat
=
vmovl_s16
(
vget_high_s16
(
v_half
));
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
return
x
;
}
};
template
<
>
struct
cvtScaleAbs_SIMD
<
ushort
,
uchar
,
float
>
{
int
operator
()
(
const
ushort
*
src
,
uchar
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
float32x4_t
v_shift
=
vdupq_n_f32
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vld1q_u16
(
src
+
x
);
uint32x4_t
v_half
=
vmovl_u16
(
vget_low_u16
(
v_src
));
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_half
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
v_half
=
vmovl_u16
(
vget_high_u16
(
v_src
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
return
x
;
}
};
template
<
>
struct
cvtScaleAbs_SIMD
<
short
,
uchar
,
float
>
{
int
operator
()
(
const
short
*
src
,
uchar
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
float32x4_t
v_shift
=
vdupq_n_f32
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
x
);
int32x4_t
v_half
=
vmovl_s16
(
vget_low_s16
(
v_src
));
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_half
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
v_half
=
vmovl_s16
(
vget_high_s16
(
v_src
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
return
x
;
}
};
template
<
>
struct
cvtScaleAbs_SIMD
<
int
,
uchar
,
float
>
{
int
operator
()
(
const
int
*
src
,
uchar
*
dst
,
int
width
,
float
scale
,
float
shift
)
const
{
int
x
=
0
;
float32x4_t
v_shift
=
vdupq_n_f32
(
shift
);
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
)),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
+
4
)),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvtq_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
return
x
;
}
};
template
<
>
template
<
>
struct
cvtScaleAbs_SIMD
<
float
,
uchar
,
float
>
struct
cvtScaleAbs_SIMD
<
float
,
uchar
,
float
>
{
{
...
@@ -1257,11 +1434,11 @@ struct cvtScaleAbs_SIMD<float, uchar, float>
...
@@ -1257,11 +1434,11 @@ struct cvtScaleAbs_SIMD<float, uchar, float>
{
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
),
scale
);
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmov
un_s32
(
vcvtq_s
32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmov
n_u32
(
vcvtq_u
32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
+
4
),
scale
);
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
+
4
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmov
un_s32
(
vcvtq_s
32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmov
n_u32
(
vcvtq_u
32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录