Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
345b1369
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
345b1369
编写于
9月 25, 2014
作者:
I
Ilya Lavrenov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
correct neon rounding
上级
4b3f2c19
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
65 addition
and
30 deletion
+65
-30
modules/core/include/opencv2/core/base.hpp
modules/core/include/opencv2/core/base.hpp
+35
-0
modules/core/src/convert.cpp
modules/core/src/convert.cpp
+30
-30
未找到文件。
modules/core/include/opencv2/core/base.hpp
浏览文件 @
345b1369
...
...
@@ -568,6 +568,41 @@ CV_EXPORTS int getIppStatus();
CV_EXPORTS
String
getIppErrorLocation
();
}
// ipp
#if CV_NEON
inline
int32x2_t
cv_vrnd_s32_f32
(
float32x2_t
v
)
{
static
int32x2_t
v_sign
=
vdup_n_s32
(
1
<<
31
),
v_05
=
vreinterpret_s32_f32
(
vdup_n_f32
(
0.5
f
));
int32x2_t
v_addition
=
vorr_s32
(
v_05
,
vand_s32
(
v_sign
,
vreinterpret_s32_f32
(
v
)));
return
vcvt_s32_f32
(
vadd_f32
(
v
,
vreinterpret_f32_s32
(
v_addition
)));
}
inline
int32x4_t
cv_vrndq_s32_f32
(
float32x4_t
v
)
{
static
int32x4_t
v_sign
=
vdupq_n_s32
(
1
<<
31
),
v_05
=
vreinterpretq_s32_f32
(
vdupq_n_f32
(
0.5
f
));
int32x4_t
v_addition
=
vorrq_s32
(
v_05
,
vandq_s32
(
v_sign
,
vreinterpretq_s32_f32
(
v
)));
return
vcvtq_s32_f32
(
vaddq_f32
(
v
,
vreinterpretq_f32_s32
(
v_addition
)));
}
inline
uint32x2_t
cv_vrnd_u32_f32
(
float32x2_t
v
)
{
static
float32x2_t
v_05
=
vdup_n_f32
(
0.5
f
);
return
vcvt_u32_f32
(
vadd_f32
(
v
,
v_05
));
}
inline
uint32x4_t
cv_vrndq_u32_f32
(
float32x4_t
v
)
{
static
float32x4_t
v_05
=
vdupq_n_f32
(
0.5
f
);
return
vcvtq_u32_f32
(
vaddq_f32
(
v
,
v_05
));
}
#endif
}
// cv
#endif //__OPENCV_CORE_BASE_HPP__
modules/core/src/convert.cpp
浏览文件 @
345b1369
...
...
@@ -1276,10 +1276,10 @@ struct cvtScaleAbs_SIMD<uchar, uchar, float>
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_3
)));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
...
...
@@ -1320,10 +1320,10 @@ struct cvtScaleAbs_SIMD<schar, uchar, float>
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_3
)));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
...
...
@@ -1353,8 +1353,8 @@ struct cvtScaleAbs_SIMD<ushort, uchar, float>
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
...
...
@@ -1384,8 +1384,8 @@ struct cvtScaleAbs_SIMD<short, uchar, float>
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
...
...
@@ -1407,11 +1407,11 @@ struct cvtScaleAbs_SIMD<int, uchar, float>
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
)),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
+
4
)),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
@@ -1434,11 +1434,11 @@ struct cvtScaleAbs_SIMD<float, uchar, float>
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
+
4
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
@@ -2011,12 +2011,12 @@ struct Cvt_SIMD<float, uchar>
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
+
4
));
int32x4_t
v_src3
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
+
8
));
int32x4_t
v_src4
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
+
12
));
uint8x8_t
v_dst1
=
vqmovn_u16
(
vcombine_u16
(
vqmov
un_s32
(
v_src1
),
vqmovun_s
32
(
v_src2
)));
uint8x8_t
v_dst2
=
vqmovn_u16
(
vcombine_u16
(
vqmov
un_s32
(
v_src3
),
vqmovun_s
32
(
v_src4
)));
uint32x4_t
v_src1
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
));
uint32x4_t
v_src2
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
+
4
));
uint32x4_t
v_src3
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
+
8
));
uint32x4_t
v_src4
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
+
12
));
uint8x8_t
v_dst1
=
vqmovn_u16
(
vcombine_u16
(
vqmov
n_u32
(
v_src1
),
vqmovn_u
32
(
v_src2
)));
uint8x8_t
v_dst2
=
vqmovn_u16
(
vcombine_u16
(
vqmov
n_u32
(
v_src3
),
vqmovn_u
32
(
v_src4
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
v_dst1
,
v_dst2
));
}
...
...
@@ -2033,10 +2033,10 @@ struct Cvt_SIMD<float, schar>
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
+
4
));
int32x4_t
v_src3
=
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
+
8
));
int32x4_t
v_src4
=
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
+
12
));
int32x4_t
v_src1
=
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
+
4
));
int32x4_t
v_src3
=
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
+
8
));
int32x4_t
v_src4
=
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
+
12
));
int8x8_t
v_dst1
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src1
),
vqmovn_s32
(
v_src2
)));
int8x8_t
v_dst2
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src3
),
vqmovn_s32
(
v_src4
)));
vst1q_s8
(
dst
+
x
,
vcombine_s8
(
v_dst1
,
v_dst2
));
...
...
@@ -2056,9 +2056,9 @@ struct Cvt_SIMD<float, ushort>
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int32x4_t
v_src1
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
vcvtq_s
32_f32
(
vld1q_f32
(
src
+
x
+
4
));
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
vqmov
un_s32
(
v_src1
),
vqmovun_s
32
(
v_src2
)));
uint32x4_t
v_src1
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
));
uint32x4_t
v_src2
=
cv_vrndq_u
32_f32
(
vld1q_f32
(
src
+
x
+
4
));
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
vqmov
n_u32
(
v_src1
),
vqmovn_u
32
(
v_src2
)));
}
return
x
;
...
...
@@ -2073,7 +2073,7 @@ struct Cvt_SIMD<float, int>
int
x
=
0
;
for
(
;
x
<=
width
-
4
;
x
+=
4
)
vst1q_s32
(
dst
+
x
,
vcvt
q_s32_f32
(
vld1q_f32
(
src
+
x
)));
vst1q_s32
(
dst
+
x
,
cv_vrnd
q_s32_f32
(
vld1q_f32
(
src
+
x
)));
return
x
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录