Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
903ec0ec
O
Opencv
项目概览
Greenplum
/
Opencv
8 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
903ec0ec
编写于
2月 13, 2023
作者:
M
Maksim Shabunin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
RISC-V: support RVV 0.7 in mainline RVV intrinsics
上级
47293f28
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
54 addition
and
35 deletion
+54
-35
modules/core/include/opencv2/core/hal/intrin_rvv.hpp
modules/core/include/opencv2/core/hal/intrin_rvv.hpp
+47
-23
modules/core/include/opencv2/core/hal/intrin_rvv071.hpp
modules/core/include/opencv2/core/hal/intrin_rvv071.hpp
+7
-12
未找到文件。
modules/core/include/opencv2/core/hal/intrin_rvv.hpp
浏览文件 @
903ec0ec
...
...
@@ -10,13 +10,27 @@
#include <algorithm>
// Building for T-Head C906 core with RVV 0.7.1 using toolchain
// https://github.com/T-head-Semi/xuantie-gnu-toolchain
// with option '-march=rv64gcv0p7'
#ifdef __THEAD_VERSION__
# if __riscv_v == 7000
# include <fenv.h>
# define CV_RVV_THEAD_0_7
# endif
#endif
namespace
cv
{
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#define CV_SIMD128 1
#define CV_SIMD128_64F 1
#ifndef CV_RVV_THEAD_0_7
# define CV_SIMD128_64F 1
#else
# define CV_SIMD128_64F 0
#endif
//////////// Unsupported native intrinsics in C++ ////////////
// The following types have been defined in clang, but not in GCC yet.
...
...
@@ -1001,14 +1015,17 @@ OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET(uint64x2, float32x4, u64, f32, u, f, 6
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
int8x16
,
float32x4
,
s8
,
f32
,
i
,
f
,
8
,
32
)
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
int16x8
,
float32x4
,
s16
,
f32
,
i
,
f
,
16
,
32
)
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
int64x2
,
float32x4
,
s64
,
f32
,
i
,
f
,
64
,
32
)
#if CV_SIMD128_64F
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
uint8x16
,
float64x2
,
u8
,
f64
,
u
,
f
,
8
,
64
)
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
uint16x8
,
float64x2
,
u16
,
f64
,
u
,
f
,
16
,
64
)
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
uint32x4
,
float64x2
,
u32
,
f64
,
u
,
f
,
32
,
64
)
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
int8x16
,
float64x2
,
s8
,
f64
,
i
,
f
,
8
,
64
)
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
int16x8
,
float64x2
,
s16
,
f64
,
i
,
f
,
16
,
64
)
OPENCV_HAL_IMPL_RVV_TWO_TIMES_REINTERPRET
(
int32x4
,
float64x2
,
s32
,
f64
,
i
,
f
,
32
,
64
)
#endif
// Three times reinterpret
#if CV_SIMD128_64F
inline
v_float32x4
v_reinterpret_as_f32
(
const
v_float64x2
&
v
)
\
{
\
return
v_float32x4
(
vreinterpret_v_u32m1_f32m1
(
vreinterpret_v_u64m1_u32m1
(
vreinterpret_v_f64m1_u64m1
(
v
))));
\
...
...
@@ -1017,6 +1034,7 @@ inline v_float64x2 v_reinterpret_as_f64(const v_float32x4& v) \
{
\
return
v_float64x2
(
vreinterpret_v_u64m1_f64m1
(
vreinterpret_v_u32m1_u64m1
(
vreinterpret_v_f32m1_u32m1
(
v
))));
\
}
#endif
////////////// Extract //////////////
...
...
@@ -1920,13 +1938,15 @@ inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_
#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, suffix, shift, vl) \
inline bool v_check_all(const _Tpvec& a) \
{ \
v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl))); \
return (v.val[0] | v.val[1]) == 0; \
auto v0 = vsrl_vx_##suffix##m1(vnot_v_##suffix##m1(a, vl), shift, vl); \
v_uint32x4 v = v_uint32x4(v_reinterpret_as_u32(_Tpvec(v0))); \
return (v.val[0] | v.val[1] | v.val[2] | v.val[3]) == 0; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
v_uint64x2 v = v_uint64x2(vreinterpret_v_##suffix##m1_u64m1(vsrl_vx_##suffix##m1(a, shift, vl))); \
return (v.val[0] | v.val[1]) != 0; \
auto v0 = vsrl_vx_##suffix##m1(a, shift, vl); \
v_uint32x4 v = v_uint32x4(v_reinterpret_as_u32(_Tpvec(v0))); \
return (v.val[0] | v.val[1] | v.val[2] | v.val[3]) != 0; \
}
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY
(
v_uint8x16
,
u8
,
7
,
16
)
...
...
@@ -2042,28 +2062,18 @@ OPENCV_HAL_IMPL_RVV_ABSDIFF(v_float64x2, absdiff)
OPENCV_HAL_IMPL_RVV_ABSDIFF
(
v_int8x16
,
absdiffs
)
OPENCV_HAL_IMPL_RVV_ABSDIFF
(
v_int16x8
,
absdiffs
)
// use reinterpret instead of c-style casting.
#ifndef __clang__
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(ivec, uvec, itype, utype, isuf, usuf, vlen) \
inline uvec v_absdiff(const ivec& a, const ivec& b) \
{ \
return _rTpvec(rshr(vreinterpret_v_i##width##m2_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \
itype max = vmax_vv_##isuf(a, b, vlen); \
itype min = vmin_vv_##isuf(a, b, vlen); \
return uvec(vreinterpret_v_##isuf##_##usuf(vsub_vv_##isuf(max, min, vlen))); \
}
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int8x16
,
v_uint8x16
,
vuint16m2_t
,
vwsub_vv_i16m2
,
vnclipu_wx_u8m1
,
16
,
16
)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int16x8
,
v_uint16x8
,
vuint32m2_t
,
vwsub_vv_i32m2
,
vnclipu_wx_u16m1
,
32
,
8
)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int32x4
,
v_uint32x4
,
vuint64m2_t
,
vwsub_vv_i64m2
,
vnclipu_wx_u32m1
,
64
,
4
)
#else
#define OPENCV_HAL_IMPL_RVV_ABSDIFF_S(_Tpvec, _rTpvec, _nwTpvec, sub, rshr, width, vl) \
inline _rTpvec v_absdiff(const _Tpvec& a, const _Tpvec& b) \
{ \
return _rTpvec(rshr(vreinterpret_u##width##m2(sub(v_max(a, b), v_min(a, b), vl)), 0, vl)); \
}
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int8x16
,
v_uint8x16
,
vint8m1_t
,
vuint8m1_t
,
i8m1
,
u8m1
,
16
)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int16x8
,
v_uint16x8
,
vint16m1_t
,
vuint16m1_t
,
i16m1
,
u16m1
,
8
)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int32x4
,
v_uint32x4
,
vint32m1_t
,
vuint32m1_t
,
i32m1
,
u32m1
,
4
)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int8x16
,
v_uint8x16
,
vuint16m2_t
,
vwsub_vv_i16m2
,
vnclipu_wx_u8m1
,
16
,
16
)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int16x8
,
v_uint16x8
,
vuint32m2_t
,
vwsub_vv_i32m2
,
vnclipu_wx_u16m1
,
32
,
8
)
OPENCV_HAL_IMPL_RVV_ABSDIFF_S
(
v_int32x4
,
v_uint32x4
,
vuint64m2_t
,
vwsub_vv_i64m2
,
vnclipu_wx_u32m1
,
64
,
4
)
#endif
#define OPENCV_HAL_IMPL_RVV_ABS(_Tprvec, _Tpvec, suffix) \
inline _Tprvec v_abs(const _Tpvec& a) \
{ \
...
...
@@ -2902,7 +2912,14 @@ inline v_int32x4 v_ceil(const v_float32x4& a)
inline
v_int32x4
v_trunc
(
const
v_float32x4
&
a
)
{
#ifndef CV_RVV_THEAD_0_7
return
v_int32x4
(
vfcvt_rtz_x_f_v_i32m1
(
a
,
4
));
#else
const
int
old_round
=
fesetround
(
FE_TOWARDZERO
);
vint32m1_t
val
=
vfcvt_x_f_v_i32m1
(
a
,
4
);
fesetround
(
old_round
);
return
v_int32x4
(
val
);
#endif
}
#if CV_SIMD128_64F
#ifndef __clang__
...
...
@@ -2938,7 +2955,14 @@ inline v_int32x4 v_trunc(const v_float64x2& a)
{
double
arr
[
4
]
=
{
a
.
val
[
0
],
a
.
val
[
1
],
0
,
0
};
vfloat64m2_t
tmp
=
vle64_v_f64m2
(
arr
,
4
);
#ifndef CV_RVV_THEAD_0_7
return
v_int32x4
(
vfncvt_rtz_x_f_w_i32m1
(
tmp
,
4
));
#else
const
int
old_round
=
fesetround
(
FE_TOWARDZERO
);
vint32m1_t
val
=
vfncvt_x_f_w_i32m1
(
tmp
,
4
);
fesetround
(
old_round
);
return
v_int32x4
(
val
);
#endif
}
#else
...
...
modules/core/include/opencv2/core/hal/intrin_rvv071.hpp
浏览文件 @
903ec0ec
...
...
@@ -19,7 +19,7 @@ namespace cv
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#define CV_SIMD128 1
#define CV_SIMD128_64F
1
#define CV_SIMD128_64F
0
//////////// Types ////////////
struct
v_uint8x16
{
...
...
@@ -2021,23 +2021,18 @@ inline v_int32x4 v_load_expand_q(const schar* ptr)
c
=
vwadd_vv_i32m2
(
vget_i16m2_i16m1
(
b
,
0
),
vmv_v_x_i16m1
(
0
,
4
),
4
);
\
return
v_int32x4
(
vget_i32m2_i32m1
(
c
,
0
));
}
#define VITL_16 (vuint64m2_t){0x1303120211011000, 0x1707160615051404, 0x1B0B1A0A19091808, 0x1F0F1E0E1D0D1C0C}
#define VITL_8 (vuint64m2_t){0x0009000100080000, 0x000B0003000A0002, 0x000D0005000C0004, 0x000F0007000E0006}
#define VITL_4 (vuint64m2_t){0x0000000400000000, 0x0000000500000001, 0x0000000600000002, 0x0000000700000003}
#define VITL_2 (vuint64m2_t){0, 2, 1, 3}
#define LOW_4 0x0000000100000000, 0x0000000500000004
#define LOW_8 0x0003000200010000, 0x000B000A00090008
#define LOW_16 0x0706050403020100, 0x1716151413121110
#define HIGH_4 0x0000000300000002, 0x0000000700000006
#define HIGH_8 0x0007000600050004, 0x000F000E000D000C
#define HIGH_16 0x0F0E0D0C0B0A0908, 0x1F1E1D1C1B1A1918
#define VITL_16 (vuint32m2_t){0x11011000, 0x13031202, 0x15051404, 0x17071606, 0x19091808, 0x1B0B1A0A, 0x1D0D1C0C, 0x1F0F1E0E}
#define VITL_8 (vuint32m2_t){0x00080000, 0x00090001, 0x000A0002, 0x000B0003, 0x000C0004, 0x000D0005, 0x000E0006, 0x000F0007}
#define VITL_4 (vuint32m2_t){0x00000000, 0x00000004, 0x00000001, 0x00000005, 0x00000002, 0x00000006, 0x00000003, 0x00000007}
#define VITL_2 (vuint32m2_t){0, 0, 2, 0, 1, 0, 3, 0}
#define OPENCV_HAL_IMPL_RISCVV_UNPACKS(_Tpvec, _Tp, _T, _UTp, _UT, num, num2, len, numh) \
inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
{ \
v##_Tp##m2_t tmp = vundefined_##_T##m2();\
tmp = vset_##_T##m2(tmp, 0, a0.val); \
tmp = vset_##_T##m2(tmp, 1, a1.val); \
vuint
64
m2_t mask = VITL_##num; \
vuint
32
m2_t mask = VITL_##num; \
tmp = (v##_Tp##m2_t)vrgather_vv_##_T##m2((v##_Tp##m2_t)tmp, (v##_UTp##m2_t)mask, num2); \
b0.val = vget_##_T##m2_##_T##m1(tmp, 0); \
b1.val = vget_##_T##m2_##_T##m1(tmp, 1); \
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录