Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
047abb00
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
047abb00
编写于
9月 26, 2014
作者:
M
Maksim Shabunin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3258 from ilya-lavrenov:neon_convert
上级
12059416
345b1369
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
530 addition
and
17 deletion
+530
-17
modules/core/include/opencv2/core/base.hpp
modules/core/include/opencv2/core/base.hpp
+35
-0
modules/core/src/convert.cpp
modules/core/src/convert.cpp
+495
-17
未找到文件。
modules/core/include/opencv2/core/base.hpp
浏览文件 @
047abb00
...
...
@@ -568,6 +568,41 @@ CV_EXPORTS int getIppStatus();
CV_EXPORTS
String
getIppErrorLocation
();
}
// ipp
#if CV_NEON
inline
int32x2_t
cv_vrnd_s32_f32
(
float32x2_t
v
)
{
static
int32x2_t
v_sign
=
vdup_n_s32
(
1
<<
31
),
v_05
=
vreinterpret_s32_f32
(
vdup_n_f32
(
0.5
f
));
int32x2_t
v_addition
=
vorr_s32
(
v_05
,
vand_s32
(
v_sign
,
vreinterpret_s32_f32
(
v
)));
return
vcvt_s32_f32
(
vadd_f32
(
v
,
vreinterpret_f32_s32
(
v_addition
)));
}
inline
int32x4_t
cv_vrndq_s32_f32
(
float32x4_t
v
)
{
static
int32x4_t
v_sign
=
vdupq_n_s32
(
1
<<
31
),
v_05
=
vreinterpretq_s32_f32
(
vdupq_n_f32
(
0.5
f
));
int32x4_t
v_addition
=
vorrq_s32
(
v_05
,
vandq_s32
(
v_sign
,
vreinterpretq_s32_f32
(
v
)));
return
vcvtq_s32_f32
(
vaddq_f32
(
v
,
vreinterpretq_f32_s32
(
v_addition
)));
}
inline
uint32x2_t
cv_vrnd_u32_f32
(
float32x2_t
v
)
{
static
float32x2_t
v_05
=
vdup_n_f32
(
0.5
f
);
return
vcvt_u32_f32
(
vadd_f32
(
v
,
v_05
));
}
inline
uint32x4_t
cv_vrndq_u32_f32
(
float32x4_t
v
)
{
static
float32x4_t
v_05
=
vdupq_n_f32
(
0.5
f
);
return
vcvtq_u32_f32
(
vaddq_f32
(
v
,
v_05
));
}
#endif
}
// cv
#endif //__OPENCV_CORE_BASE_HPP__
modules/core/src/convert.cpp
浏览文件 @
047abb00
...
...
@@ -1276,10 +1276,10 @@ struct cvtScaleAbs_SIMD<uchar, uchar, float>
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_3
)));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
...
...
@@ -1320,10 +1320,10 @@ struct cvtScaleAbs_SIMD<schar, uchar, float>
float32x4_t
v_dst_3
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_quat
),
scale
);
v_dst_3
=
vabsq_f32
(
vaddq_f32
(
v_dst_3
,
v_shift
));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_3
)));
uint16x8_t
v_dsti_0
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dsti_1
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_2
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_3
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_dsti_0
),
vqmovn_u16
(
v_dsti_1
)));
}
...
...
@@ -1353,8 +1353,8 @@ struct cvtScaleAbs_SIMD<ushort, uchar, float>
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_u32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
...
...
@@ -1384,8 +1384,8 @@ struct cvtScaleAbs_SIMD<short, uchar, float>
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
v_half
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
)));
uint16x8_t
v_dst
=
vcombine_u16
(
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
)),
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
)));
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
}
...
...
@@ -1407,11 +1407,11 @@ struct cvtScaleAbs_SIMD<int, uchar, float>
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
)),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
+
4
)),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
@@ -1434,11 +1434,11 @@ struct cvtScaleAbs_SIMD<float, uchar, float>
{
float32x4_t
v_dst_0
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
),
scale
);
v_dst_0
=
vabsq_f32
(
vaddq_f32
(
v_dst_0
,
v_shift
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_0
));
uint16x4_t
v_dsti_0
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_0
));
float32x4_t
v_dst_1
=
vmulq_n_f32
(
vld1q_f32
(
src
+
x
+
4
),
scale
);
v_dst_1
=
vabsq_f32
(
vaddq_f32
(
v_dst_1
,
v_shift
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
vcvt
q_u32_f32
(
v_dst_1
));
uint16x4_t
v_dsti_1
=
vqmovn_u32
(
cv_vrnd
q_u32_f32
(
v_dst_1
));
uint16x8_t
v_dst
=
vcombine_u16
(
v_dsti_0
,
v_dsti_1
);
vst1_u8
(
dst
+
x
,
vqmovn_u16
(
v_dst
));
...
...
@@ -1491,6 +1491,7 @@ cvtScale_( const T* src, size_t sstep,
for
(
;
size
.
height
--
;
src
+=
sstep
,
dst
+=
dstep
)
{
int
x
=
0
;
#if CV_ENABLE_UNROLLED
for
(
;
x
<=
size
.
width
-
4
;
x
+=
4
)
{
...
...
@@ -1604,16 +1605,493 @@ cvtScale_<short, int, float>( const short* src, size_t sstep,
}
}
template
<
typename
T
,
typename
DT
>
struct
Cvt_SIMD
{
int
operator
()
(
const
T
*
,
DT
*
,
int
)
const
{
return
0
;
}
};
#if CV_NEON
// from uchar
template
<
>
struct
Cvt_SIMD
<
uchar
,
schar
>
{
int
operator
()
(
const
uchar
*
src
,
schar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1_s8
(
dst
+
x
,
vqmovn_s16
(
vreinterpretq_s16_u16
(
vmovl_u8
(
vld1_u8
(
src
+
x
)))));
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
uchar
,
ushort
>
{
int
operator
()
(
const
uchar
*
src
,
ushort
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1q_u16
(
dst
+
x
,
vmovl_u8
(
vld1_u8
(
src
+
x
)));
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
uchar
,
short
>
{
int
operator
()
(
const
uchar
*
src
,
short
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1q_s16
(
dst
+
x
,
vreinterpretq_s16_u16
(
vmovl_u8
(
vld1_u8
(
src
+
x
))));
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
uchar
,
int
>
{
int
operator
()
(
const
uchar
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vmovl_u8
(
vld1_u8
(
src
+
x
));
vst1q_s32
(
dst
+
x
,
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
))));
vst1q_s32
(
dst
+
x
+
4
,
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
))));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
uchar
,
float
>
{
int
operator
()
(
const
uchar
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vmovl_u8
(
vld1_u8
(
src
+
x
));
vst1q_f32
(
dst
+
x
,
vcvtq_f32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
))));
vst1q_f32
(
dst
+
x
+
4
,
vcvtq_f32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
))));
}
return
x
;
}
};
// from schar
template
<
>
struct
Cvt_SIMD
<
schar
,
uchar
>
{
int
operator
()
(
const
schar
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1_u8
(
dst
+
x
,
vqmovun_s16
(
vmovl_s8
(
vld1_s8
(
src
+
x
))));
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
schar
,
short
>
{
int
operator
()
(
const
schar
*
src
,
short
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
vst1q_s16
(
dst
+
x
,
vmovl_s8
(
vld1_s8
(
src
+
x
)));
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
schar
,
int
>
{
int
operator
()
(
const
schar
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vmovl_s8
(
vld1_s8
(
src
+
x
));
vst1q_s32
(
dst
+
x
,
vmovl_s16
(
vget_low_s16
(
v_src
)));
vst1q_s32
(
dst
+
x
+
4
,
vmovl_s16
(
vget_high_s16
(
v_src
)));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
schar
,
float
>
{
int
operator
()
(
const
schar
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vmovl_s8
(
vld1_s8
(
src
+
x
));
vst1q_f32
(
dst
+
x
,
vcvtq_f32_s32
(
vmovl_s16
(
vget_low_s16
(
v_src
))));
vst1q_f32
(
dst
+
x
+
4
,
vcvtq_f32_s32
(
vmovl_s16
(
vget_high_s16
(
v_src
))));
}
return
x
;
}
};
// from ushort
template
<
>
struct
Cvt_SIMD
<
ushort
,
uchar
>
{
int
operator
()
(
const
ushort
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
uint16x8_t
v_src1
=
vld1q_u16
(
src
+
x
),
v_src2
=
vld1q_u16
(
src
+
x
+
8
);
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovn_u16
(
v_src1
),
vqmovn_u16
(
v_src2
)));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
ushort
,
int
>
{
int
operator
()
(
const
ushort
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vld1q_u16
(
src
+
x
);
vst1q_s32
(
dst
+
x
,
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
))));
vst1q_s32
(
dst
+
x
+
4
,
vreinterpretq_s32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
))));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
ushort
,
float
>
{
int
operator
()
(
const
ushort
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint16x8_t
v_src
=
vld1q_u16
(
src
+
x
);
vst1q_f32
(
dst
+
x
,
vcvtq_f32_u32
(
vmovl_u16
(
vget_low_u16
(
v_src
))));
vst1q_f32
(
dst
+
x
+
4
,
vcvtq_f32_u32
(
vmovl_u16
(
vget_high_u16
(
v_src
))));
}
return
x
;
}
};
// from short
template
<
>
struct
Cvt_SIMD
<
short
,
uchar
>
{
int
operator
()
(
const
short
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int16x8_t
v_src1
=
vld1q_s16
(
src
+
x
),
v_src2
=
vld1q_s16
(
src
+
x
+
8
);
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
vqmovun_s16
(
v_src1
),
vqmovun_s16
(
v_src2
)));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
short
,
schar
>
{
int
operator
()
(
const
short
*
src
,
schar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int16x8_t
v_src1
=
vld1q_s16
(
src
+
x
),
v_src2
=
vld1q_s16
(
src
+
x
+
8
);
vst1q_s8
(
dst
+
x
,
vcombine_s8
(
vqmovn_s16
(
v_src1
),
vqmovn_s16
(
v_src2
)));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
short
,
ushort
>
{
int
operator
()
(
const
short
*
src
,
ushort
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
x
);
uint16x4_t
v_dst1
=
vqmovun_s32
(
vmovl_s16
(
vget_low_s16
(
v_src
)));
uint16x4_t
v_dst2
=
vqmovun_s32
(
vmovl_s16
(
vget_high_s16
(
v_src
)));
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
short
,
int
>
{
int
operator
()
(
const
short
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
x
);
vst1q_s32
(
dst
+
x
,
vmovl_s16
(
vget_low_s16
(
v_src
)));
vst1q_s32
(
dst
+
x
+
4
,
vmovl_s16
(
vget_high_s16
(
v_src
)));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
short
,
float
>
{
int
operator
()
(
const
short
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int16x8_t
v_src
=
vld1q_s16
(
src
+
x
);
vst1q_f32
(
dst
+
x
,
vcvtq_f32_s32
(
vmovl_s16
(
vget_low_s16
(
v_src
))));
vst1q_f32
(
dst
+
x
+
4
,
vcvtq_f32_s32
(
vmovl_s16
(
vget_high_s16
(
v_src
))));
}
return
x
;
}
};
// from int
template
<
>
struct
Cvt_SIMD
<
int
,
uchar
>
{
int
operator
()
(
const
int
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
vld1q_s32
(
src
+
x
),
v_src2
=
vld1q_s32
(
src
+
x
+
4
);
int32x4_t
v_src3
=
vld1q_s32
(
src
+
x
+
8
),
v_src4
=
vld1q_s32
(
src
+
x
+
12
);
uint8x8_t
v_dst1
=
vqmovn_u16
(
vcombine_u16
(
vqmovun_s32
(
v_src1
),
vqmovun_s32
(
v_src2
)));
uint8x8_t
v_dst2
=
vqmovn_u16
(
vcombine_u16
(
vqmovun_s32
(
v_src3
),
vqmovun_s32
(
v_src4
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
int
,
schar
>
{
int
operator
()
(
const
int
*
src
,
schar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
vld1q_s32
(
src
+
x
),
v_src2
=
vld1q_s32
(
src
+
x
+
4
);
int32x4_t
v_src3
=
vld1q_s32
(
src
+
x
+
8
),
v_src4
=
vld1q_s32
(
src
+
x
+
12
);
int8x8_t
v_dst1
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src1
),
vqmovn_s32
(
v_src2
)));
int8x8_t
v_dst2
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src3
),
vqmovn_s32
(
v_src4
)));
vst1q_s8
(
dst
+
x
,
vcombine_s8
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
int
,
ushort
>
{
int
operator
()
(
const
int
*
src
,
ushort
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int32x4_t
v_src1
=
vld1q_s32
(
src
+
x
),
v_src2
=
vld1q_s32
(
src
+
x
+
4
);
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
vqmovun_s32
(
v_src1
),
vqmovun_s32
(
v_src2
)));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
int
,
short
>
{
int
operator
()
(
const
int
*
src
,
short
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
int32x4_t
v_src1
=
vld1q_s32
(
src
+
x
),
v_src2
=
vld1q_s32
(
src
+
x
+
4
);
vst1q_s16
(
dst
+
x
,
vcombine_s16
(
vqmovn_s32
(
v_src1
),
vqmovn_s32
(
v_src2
)));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
int
,
float
>
{
int
operator
()
(
const
int
*
src
,
float
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
4
;
x
+=
4
)
vst1q_f32
(
dst
+
x
,
vcvtq_f32_s32
(
vld1q_s32
(
src
+
x
)));
return
x
;
}
};
// from float
template
<
>
struct
Cvt_SIMD
<
float
,
uchar
>
{
int
operator
()
(
const
float
*
src
,
uchar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
uint32x4_t
v_src1
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
));
uint32x4_t
v_src2
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
+
4
));
uint32x4_t
v_src3
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
+
8
));
uint32x4_t
v_src4
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
+
12
));
uint8x8_t
v_dst1
=
vqmovn_u16
(
vcombine_u16
(
vqmovn_u32
(
v_src1
),
vqmovn_u32
(
v_src2
)));
uint8x8_t
v_dst2
=
vqmovn_u16
(
vcombine_u16
(
vqmovn_u32
(
v_src3
),
vqmovn_u32
(
v_src4
)));
vst1q_u8
(
dst
+
x
,
vcombine_u8
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
float
,
schar
>
{
int
operator
()
(
const
float
*
src
,
schar
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
16
;
x
+=
16
)
{
int32x4_t
v_src1
=
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
));
int32x4_t
v_src2
=
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
+
4
));
int32x4_t
v_src3
=
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
+
8
));
int32x4_t
v_src4
=
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
+
12
));
int8x8_t
v_dst1
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src1
),
vqmovn_s32
(
v_src2
)));
int8x8_t
v_dst2
=
vqmovn_s16
(
vcombine_s16
(
vqmovn_s32
(
v_src3
),
vqmovn_s32
(
v_src4
)));
vst1q_s8
(
dst
+
x
,
vcombine_s8
(
v_dst1
,
v_dst2
));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
float
,
ushort
>
{
int
operator
()
(
const
float
*
src
,
ushort
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
8
;
x
+=
8
)
{
uint32x4_t
v_src1
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
));
uint32x4_t
v_src2
=
cv_vrndq_u32_f32
(
vld1q_f32
(
src
+
x
+
4
));
vst1q_u16
(
dst
+
x
,
vcombine_u16
(
vqmovn_u32
(
v_src1
),
vqmovn_u32
(
v_src2
)));
}
return
x
;
}
};
template
<
>
struct
Cvt_SIMD
<
float
,
int
>
{
int
operator
()
(
const
float
*
src
,
int
*
dst
,
int
width
)
const
{
int
x
=
0
;
for
(
;
x
<=
width
-
4
;
x
+=
4
)
vst1q_s32
(
dst
+
x
,
cv_vrndq_s32_f32
(
vld1q_f32
(
src
+
x
)));
return
x
;
}
};
#endif
template
<
typename
T
,
typename
DT
>
static
void
cvt_
(
const
T
*
src
,
size_t
sstep
,
DT
*
dst
,
size_t
dstep
,
Size
size
)
{
sstep
/=
sizeof
(
src
[
0
]);
dstep
/=
sizeof
(
dst
[
0
]);
Cvt_SIMD
<
T
,
DT
>
vop
;
for
(
;
size
.
height
--
;
src
+=
sstep
,
dst
+=
dstep
)
{
int
x
=
0
;
int
x
=
vop
(
src
,
dst
,
size
.
width
)
;
#if CV_ENABLE_UNROLLED
for
(
;
x
<=
size
.
width
-
4
;
x
+=
4
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录