Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
b12c1451
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b12c1451
编写于
3月 19, 2023
作者:
M
Maksim Shabunin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
RISC-V: allow building scalable RVV support with GCC, LLVM 16 support
上级
2b32eee3
变更
6
展开全部
隐藏空白更改
内联
并排
Showing
6 changed file
with
25475 addition
and
79 deletion
+25475
-79
cmake/checks/cpu_rvv.cpp
cmake/checks/cpu_rvv.cpp
+6
-1
modules/core/include/opencv2/core/hal/intrin_rvv.hpp
modules/core/include/opencv2/core/hal/intrin_rvv.hpp
+9
-0
modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_non-policy.hpp
...ude/opencv2/core/hal/intrin_rvv_010_compat_non-policy.hpp
+24395
-0
modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_overloaded-non-policy.hpp
.../core/hal/intrin_rvv_010_compat_overloaded-non-policy.hpp
+768
-0
modules/core/include/opencv2/core/hal/intrin_rvv_compat_overloaded.hpp
...include/opencv2/core/hal/intrin_rvv_compat_overloaded.hpp
+207
-0
modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
...les/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
+90
-78
未找到文件。
cmake/checks/cpu_rvv.cpp
浏览文件 @
b12c1451
...
...
@@ -6,12 +6,17 @@
#endif
#if defined CV_RVV
#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>10999
#define vreinterpret_v_u64m1_u8m1 __riscv_vreinterpret_v_u64m1_u8m1
#define vle64_v_u64m1 __riscv_vle64_v_u64m1
#define vle32_v_f32m1 __riscv_vle32_v_f32m1
#define vfmv_f_s_f32m1_f32 __riscv_vfmv_f_s_f32m1_f32
#endif
int
test
()
{
const
float
src
[]
=
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
};
uint64_t
ptr
[
2
]
=
{
0x0908060504020100
,
0xFFFFFFFF0E0D0C0A
};
vuint8m1_t
a
=
vreinterpret_v_u64m1_u8m1
(
vle64_v_u64m1
(
ptr
,
2
));
//vuint8m1_t a = (vuint8m1_t)vle64_v_u64m1(ptr, 2);
vfloat32m1_t
val
=
vle32_v_f32m1
((
const
float
*
)(
src
),
4
);
return
(
int
)
vfmv_f_s_f32m1_f32
(
val
);
}
...
...
modules/core/include/opencv2/core/hal/intrin_rvv.hpp
浏览文件 @
b12c1451
...
...
@@ -10,6 +10,15 @@
#include <algorithm>
// RVV intrinsics have been renamed in version 0.11, so we need to include
// compatibility headers:
// https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated/rvv-v0p10-compatible-headers
#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>10999
#include "intrin_rvv_010_compat_non-policy.hpp"
#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
#endif
// Building for T-Head C906 core with RVV 0.7.1 using toolchain
// https://github.com/T-head-Semi/xuantie-gnu-toolchain
// with option '-march=rv64gcv0p7'
...
...
modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_non-policy.hpp
0 → 100644
浏览文件 @
b12c1451
此差异已折叠。
点击以展开。
modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_overloaded-non-policy.hpp
0 → 100644
浏览文件 @
b12c1451
此差异已折叠。
点击以展开。
modules/core/include/opencv2/core/hal/intrin_rvv_compat_overloaded.hpp
0 → 100644
浏览文件 @
b12c1451
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
#define OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
// This file requires VTraits to be defined for vector types
#define OPENCV_HAL_IMPL_RVV_FUN_AND(REG, SUF) \
inline static REG vand(const REG & op1, const REG & op2, size_t vl) \
{ \
return vand_vv_##SUF(op1, op2, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_AND
(
vint8m1_t
,
i8m1
)
OPENCV_HAL_IMPL_RVV_FUN_AND
(
vuint8m1_t
,
u8m1
)
OPENCV_HAL_IMPL_RVV_FUN_AND
(
vint16m1_t
,
i16m1
)
OPENCV_HAL_IMPL_RVV_FUN_AND
(
vuint16m1_t
,
u16m1
)
OPENCV_HAL_IMPL_RVV_FUN_AND
(
vint32m1_t
,
i32m1
)
OPENCV_HAL_IMPL_RVV_FUN_AND
(
vuint32m1_t
,
u32m1
)
OPENCV_HAL_IMPL_RVV_FUN_AND
(
vint64m1_t
,
i64m1
)
OPENCV_HAL_IMPL_RVV_FUN_AND
(
vuint64m1_t
,
u64m1
)
#define OPENCV_HAL_IMPL_RVV_FUN_LOXEI(REG, SUF, INDX, ISUF) \
inline static REG vloxe##ISUF(const VTraits<REG>::lane_type *base, INDX bindex, size_t vl) \
{ \
return vloxe##ISUF##_v_##SUF(base, bindex, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint8m1_t
,
i8m1
,
vuint8m1_t
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint8m2_t
,
i8m2
,
vuint8m2_t
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint8m4_t
,
i8m4
,
vuint8m4_t
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint8m8_t
,
i8m8
,
vuint8m8_t
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint8m1_t
,
i8m1
,
vuint32m4_t
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint8m2_t
,
i8m2
,
vuint32m8_t
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint16m1_t
,
i16m1
,
vuint32m2_t
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint32m1_t
,
i32m1
,
vuint32m1_t
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint32m2_t
,
i32m2
,
vuint32m2_t
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint32m4_t
,
i32m4
,
vuint32m4_t
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint32m8_t
,
i32m8
,
vuint32m8_t
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vint64m1_t
,
i64m1
,
vuint32mf2_t
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vuint8m1_t
,
u8m1
,
vuint8m1_t
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vuint8m2_t
,
u8m2
,
vuint8m2_t
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vuint8m4_t
,
u8m4
,
vuint8m4_t
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vuint8m8_t
,
u8m8
,
vuint8m8_t
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vfloat32m1_t
,
f32m1
,
vuint32m1_t
,
i32
)
#if CV_SIMD_SCALABLE_64F
OPENCV_HAL_IMPL_RVV_FUN_LOXEI
(
vfloat64m1_t
,
f64m1
,
vuint32mf2_t
,
i32
)
#endif
#define OPENCV_HAL_IMPL_RVV_FUN_MUL(REG, SUF) \
inline static REG##m1_t vmul(const REG##m1_t & op1, const REG##m1_t & op2, size_t vl) \
{ \
return vmul_vv_##SUF##m1(op1, op2, vl); \
} \
inline static REG##m1_t vmul(const REG##m1_t & op1, VTraits<REG##m1_t>::lane_type op2, size_t vl) \
{ \
return vmul_vx_##SUF##m1(op1, op2, vl); \
} \
inline static REG##m2_t vmul(const REG##m2_t & op1, const REG##m2_t & op2, size_t vl) \
{ \
return vmul_vv_##SUF##m2(op1, op2, vl); \
} \
inline static REG##m2_t vmul(const REG##m2_t & op1, VTraits<REG##m2_t>::lane_type op2, size_t vl) \
{ \
return vmul_vx_##SUF##m2(op1, op2, vl); \
} \
inline static REG##m4_t vmul(const REG##m4_t & op1, const REG##m4_t & op2, size_t vl) \
{ \
return vmul_vv_##SUF##m4(op1, op2, vl); \
} \
inline static REG##m4_t vmul(const REG##m4_t & op1, VTraits<REG##m4_t>::lane_type op2, size_t vl) \
{ \
return vmul_vx_##SUF##m4(op1, op2, vl); \
} \
inline static REG##m8_t vmul(const REG##m8_t & op1, const REG##m8_t & op2, size_t vl) \
{ \
return vmul_vv_##SUF##m8(op1, op2, vl); \
} \
inline static REG##m8_t vmul(const REG##m8_t & op1, VTraits<REG##m8_t>::lane_type op2, size_t vl) \
{ \
return vmul_vx_##SUF##m8(op1, op2, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_MUL
(
vint8
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_MUL
(
vuint8
,
u8
)
OPENCV_HAL_IMPL_RVV_FUN_MUL
(
vint16
,
i16
)
OPENCV_HAL_IMPL_RVV_FUN_MUL
(
vuint16
,
u16
)
OPENCV_HAL_IMPL_RVV_FUN_MUL
(
vint32
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_MUL
(
vuint32
,
u32
)
#define OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(REG1, SUF1, REG2, SUF2) \
inline static REG1##m1_t vreinterpret_##SUF1##m1(const REG2##m1_t & src) \
{\
return vreinterpret_v_##SUF2##m1_##SUF1##m1(src); \
} \
inline static REG1##m2_t vreinterpret_##SUF1##m2(const REG2##m2_t & src) \
{\
return vreinterpret_v_##SUF2##m2_##SUF1##m2(src); \
} \
inline static REG1##m4_t vreinterpret_##SUF1##m4(const REG2##m4_t & src) \
{\
return vreinterpret_v_##SUF2##m4_##SUF1##m4(src); \
} \
inline static REG1##m8_t vreinterpret_##SUF1##m8(const REG2##m8_t & src) \
{\
return vreinterpret_v_##SUF2##m8_##SUF1##m8(src); \
}
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vint8
,
i8
,
vuint8
,
u8
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vint16
,
i16
,
vuint16
,
u16
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vint32
,
i32
,
vuint32
,
u32
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vfloat32
,
f32
,
vuint32
,
u32
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vfloat32
,
f32
,
vint32
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint32
,
u32
,
vfloat32
,
f32
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vint32
,
i32
,
vfloat32
,
f32
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint8
,
u8
,
vint8
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint8
,
u8
,
vuint16
,
u16
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint8
,
u8
,
vuint32
,
u32
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint8
,
u8
,
vuint64
,
u64
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint16
,
u16
,
vint16
,
i16
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint16
,
u16
,
vuint8
,
u8
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint16
,
u16
,
vuint32
,
u32
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint16
,
u16
,
vuint64
,
u64
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint32
,
u32
,
vint32
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint32
,
u32
,
vuint8
,
u8
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint32
,
u32
,
vuint16
,
u16
)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET
(
vuint32
,
u32
,
vuint64
,
u64
)
#define OPENCV_HAL_IMPL_RVV_FUN_STORE(REG, SUF, SZ) \
inline static void vse##SZ(VTraits<REG>::lane_type *base, REG value, size_t vl) \
{ \
return vse##SZ##_v_##SUF##m1(base, value, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_uint8
,
u8
,
8
)
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_int8
,
i8
,
8
)
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_uint16
,
u16
,
16
)
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_int16
,
i16
,
16
)
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_uint32
,
u32
,
32
)
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_int32
,
i32
,
32
)
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_uint64
,
u64
,
64
)
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_int64
,
i64
,
64
)
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_float32
,
f32
,
32
)
#if CV_SIMD_SCALABLE_64F
OPENCV_HAL_IMPL_RVV_FUN_STORE
(
v_float64
,
f64
,
64
)
#endif
#define OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(REG, SUF) \
inline static VTraits<REG>::lane_type vmv_x(const REG & reg) \
{\
return vmv_x_s_##SUF##m1_##SUF(reg); \
}
#define OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(REG, SUF) \
inline static VTraits<REG>::lane_type vfmv_f(const REG & reg) \
{\
return vfmv_f_s_##SUF##m1_##SUF(reg); \
}
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT
(
v_uint8
,
u8
)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT
(
v_int8
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT
(
v_uint16
,
u16
)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT
(
v_int16
,
i16
)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT
(
v_uint32
,
u32
)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT
(
v_int32
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT
(
v_uint64
,
u64
)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT
(
v_int64
,
i64
)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F
(
v_float32
,
f32
)
#if CV_SIMD_SCALABLE_64F
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F
(
v_float64
,
f64
)
#endif
#define OPENCV_HAL_IMPL_RVV_FUN_SLIDE(REG, SUF) \
inline static REG vslidedown(const REG & dst, const REG & src, size_t offset, size_t vl) \
{ \
return vslidedown_vx_##SUF##m1(dst, src, offset, vl); \
} \
inline static REG vslideup(const REG & dst, const REG & src, size_t offset, size_t vl) \
{ \
return vslideup_vx_##SUF##m1(dst, src, offset, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_uint8
,
u8
)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_int8
,
i8
)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_uint16
,
u16
)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_int16
,
i16
)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_uint32
,
u32
)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_int32
,
i32
)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_float32
,
f32
)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_uint64
,
u64
)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_int64
,
i64
)
#if CV_SIMD_SCALABLE_64F
OPENCV_HAL_IMPL_RVV_FUN_SLIDE
(
v_float64
,
f64
)
#endif
inline
static
vuint32mf2_t
vmul
(
const
vuint32mf2_t
&
op1
,
uint32_t
op2
,
size_t
vl
)
{
return
vmul_vx_u32mf2
(
op1
,
op2
,
vl
);
}
inline
static
vuint32mf2_t
vreinterpret_u32mf2
(
vint32mf2_t
val
)
{
return
vreinterpret_v_i32mf2_u32mf2
(
val
);
}
#endif //OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
浏览文件 @
b12c1451
...
...
@@ -13,6 +13,14 @@
#include <vector>
#include <opencv2/core/check.hpp>
// RVV intrinsics have been renamed in version 0.11, so we need to include
// compatibility headers:
// https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated/rvv-v0p10-compatible-headers
#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>10999
#include "intrin_rvv_010_compat_non-policy.hpp"
#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
#endif
#if defined(__GNUC__) && !defined(__clang__)
// FIXIT: eliminate massive warnigs from templates
// GCC from 'rvv-next': riscv64-unknown-linux-gnu-g++ (g42df3464463) 12.0.1 20220505 (prerelease)
...
...
@@ -52,89 +60,93 @@ using uint = unsigned int;
using
uint64
=
unsigned
long
int
;
using
int64
=
long
int
;
static
const
int
__cv_rvv_e8_nlanes
=
vsetvlmax_e8m1
();
static
const
int
__cv_rvv_e16_nlanes
=
vsetvlmax_e16m1
();
static
const
int
__cv_rvv_e32_nlanes
=
vsetvlmax_e32m1
();
static
const
int
__cv_rvv_e64_nlanes
=
vsetvlmax_e64m1
();
static
const
int
__cv_rvv_e8m1_nlanes
=
vsetvlmax_e8m1
();
static
const
int
__cv_rvv_e16m1_nlanes
=
vsetvlmax_e16m1
();
static
const
int
__cv_rvv_e32m1_nlanes
=
vsetvlmax_e32m1
();
static
const
int
__cv_rvv_e64m1_nlanes
=
vsetvlmax_e64m1
();
static
const
int
__cv_rvv_e8m2_nlanes
=
vsetvlmax_e8m2
();
static
const
int
__cv_rvv_e16m2_nlanes
=
vsetvlmax_e16m2
();
static
const
int
__cv_rvv_e32m2_nlanes
=
vsetvlmax_e32m2
();
static
const
int
__cv_rvv_e64m2_nlanes
=
vsetvlmax_e64m2
();
static
const
int
__cv_rvv_e8m4_nlanes
=
vsetvlmax_e8m4
();
static
const
int
__cv_rvv_e16m4_nlanes
=
vsetvlmax_e16m4
();
static
const
int
__cv_rvv_e32m4_nlanes
=
vsetvlmax_e32m4
();
static
const
int
__cv_rvv_e64m4_nlanes
=
vsetvlmax_e64m4
();
static
const
int
__cv_rvv_e8m8_nlanes
=
vsetvlmax_e8m8
();
static
const
int
__cv_rvv_e16m8_nlanes
=
vsetvlmax_e16m8
();
static
const
int
__cv_rvv_e32m8_nlanes
=
vsetvlmax_e32m8
();
static
const
int
__cv_rvv_e64m8_nlanes
=
vsetvlmax_e64m8
();
template
<
class
T
>
struct
VTraits
;
template
<
>
struct
VTraits
<
v_uint8
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e8_nlanes
;
}
using
lane_type
=
uchar
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
8
;
#define OPENCV_HAL_IMPL_RVV_TRAITS(REG, TYP, SUF, SZ) \
template <> \
struct VTraits<REG> \
{ \
static inline int vlanes() { return __cv_rvv_##SUF##_nlanes; } \
using lane_type = TYP; \
static const int max_nlanes = CV_RVV_MAX_VLEN/SZ; \
};
template
<
>
struct
VTraits
<
v_int8
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e8_nlanes
;
}
using
lane_type
=
schar
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
8
;
};
template
<
>
struct
VTraits
<
v_uint16
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e16_nlanes
;
}
using
lane_type
=
ushort
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
16
;
};
template
<
>
struct
VTraits
<
v_int16
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e16_nlanes
;
}
using
lane_type
=
short
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
16
;
};
template
<
>
struct
VTraits
<
v_uint32
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e32_nlanes
;
}
using
lane_type
=
uint
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
32
;
};
template
<
>
struct
VTraits
<
v_int32
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e32_nlanes
;
}
using
lane_type
=
int
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
32
;
};
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint8m1_t
,
int8_t
,
e8m1
,
8
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint8m2_t
,
int8_t
,
e8m2
,
8
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint8m4_t
,
int8_t
,
e8m4
,
8
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint8m8_t
,
int8_t
,
e8m8
,
8
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint8m1_t
,
uint8_t
,
e8m1
,
8
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint8m2_t
,
uint8_t
,
e8m2
,
8
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint8m4_t
,
uint8_t
,
e8m4
,
8
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint8m8_t
,
uint8_t
,
e8m8
,
8
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint16m1_t
,
int16_t
,
e16m1
,
16
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint16m2_t
,
int16_t
,
e16m2
,
16
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint16m4_t
,
int16_t
,
e16m4
,
16
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint16m8_t
,
int16_t
,
e16m8
,
16
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint16m1_t
,
uint16_t
,
e16m1
,
16
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint16m2_t
,
uint16_t
,
e16m2
,
16
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint16m4_t
,
uint16_t
,
e16m4
,
16
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint16m8_t
,
uint16_t
,
e16m8
,
16
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint32m1_t
,
int32_t
,
e32m1
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint32m2_t
,
int32_t
,
e32m2
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint32m4_t
,
int32_t
,
e32m4
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint32m8_t
,
int32_t
,
e32m8
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint32m1_t
,
uint32_t
,
e32m1
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint32m2_t
,
uint32_t
,
e32m2
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint32m4_t
,
uint32_t
,
e32m4
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint32m8_t
,
uint32_t
,
e32m8
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint64m1_t
,
int64_t
,
e64m1
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint64m2_t
,
int64_t
,
e64m2
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint64m4_t
,
int64_t
,
e64m4
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vint64m8_t
,
int64_t
,
e64m8
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint64m1_t
,
uint64_t
,
e64m1
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint64m2_t
,
uint64_t
,
e64m2
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint64m4_t
,
uint64_t
,
e64m4
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vuint64m8_t
,
uint64_t
,
e64m8
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vfloat32m1_t
,
float
,
e32m1
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vfloat32m2_t
,
float
,
e32m2
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vfloat32m4_t
,
float
,
e32m4
,
32
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vfloat32m8_t
,
float
,
e32m8
,
32
)
template
<
>
struct
VTraits
<
v_float32
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e32_nlanes
;
}
using
lane_type
=
float
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
32
;
};
template
<
>
struct
VTraits
<
v_uint64
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e64_nlanes
;
}
using
lane_type
=
uint64
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
64
;
};
template
<
>
struct
VTraits
<
v_int64
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e64_nlanes
;
}
using
lane_type
=
int64
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
64
;
};
#if CV_SIMD_SCALABLE_64F
template
<
>
struct
VTraits
<
v_float64
>
{
static
inline
int
vlanes
()
{
return
__cv_rvv_e64_nlanes
;
}
using
lane_type
=
double
;
static
const
int
max_nlanes
=
CV_RVV_MAX_VLEN
/
64
;
};
OPENCV_HAL_IMPL_RVV_TRAITS
(
vfloat64m1_t
,
double
,
e64m1
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vfloat64m2_t
,
double
,
e64m2
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vfloat64m4_t
,
double
,
e64m4
,
64
)
OPENCV_HAL_IMPL_RVV_TRAITS
(
vfloat64m8_t
,
double
,
e64m8
,
64
)
#endif
// LLVM/Clang defines "overloaded intrinsics" e.g. 'vand(op1, op2)'
// GCC does not have these functions, so we need to implement them manually
// We implement only selected subset required to build current state of the code
// Included inside namespace cv::
#ifndef __riscv_v_intrinsic_overloading
#include "intrin_rvv_compat_overloaded.hpp"
#endif // __riscv_v_intrinsic_overloading
//////////// get0 ////////////
#define OPENCV_HAL_IMPL_RVV_GRT0_INT(_Tpvec, _Tp) \
inline _Tp v_get0(const v_##_Tpvec& v) \
...
...
@@ -435,7 +447,7 @@ inline _Tpvec v_lut(const _Tp* tab, const int* idx) \
inline _Tpvec v_lut_pairs(const _Tp* tab, const int* idx) \
{ \
std::vector<uint> idx_; \
for (
size_
t i = 0; i < VTraits<v_int16>::vlanes(); ++i) { \
for (
in
t i = 0; i < VTraits<v_int16>::vlanes(); ++i) { \
idx_.push_back(idx[i]); \
idx_.push_back(idx[i]+1); \
} \
...
...
@@ -445,7 +457,7 @@ inline _Tpvec v_lut_pairs(const _Tp* tab, const int* idx) \
inline _Tpvec v_lut_quads(const _Tp* tab, const int* idx) \
{ \
std::vector<uint> idx_; \
for (
size_
t i = 0; i < VTraits<v_int32>::vlanes(); ++i) { \
for (
in
t i = 0; i < VTraits<v_int32>::vlanes(); ++i) { \
idx_.push_back(idx[i]); \
idx_.push_back(idx[i]+1); \
idx_.push_back(idx[i]+2); \
...
...
@@ -479,7 +491,7 @@ inline v_uint64 v_lut_quads(const uint64* tab, const int* idx) { return v_reinte
////////////// Pack boolean ////////////////////
inline
v_uint8
v_pack_b
(
const
v_uint16
&
a
,
const
v_uint16
&
b
)
{
return
vnsrl
(
vset
(
vlmul_ext_u16m2
(
a
),
1
,
b
),
0
,
VTraits
<
v_uint8
>::
vlanes
());
return
vnsrl
(
vset
(
vlmul_ext_
v_u16m1_
u16m2
(
a
),
1
,
b
),
0
,
VTraits
<
v_uint8
>::
vlanes
());
}
inline
v_uint8
v_pack_b
(
const
v_uint32
&
a
,
const
v_uint32
&
b
,
...
...
@@ -1074,11 +1086,11 @@ inline v_float64 v_muladd(const v_float64& a, const v_float64& b, const v_float6
#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, vl) \
inline bool v_check_all(const _Tpvec& a) \
{ \
return vcpop(vmslt(a, 0, vl), vl) == vl; \
return
(int)
vcpop(vmslt(a, 0, vl), vl) == vl; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
return vcpop(vmslt(a, 0, vl), vl) != 0; \
return
(int)
vcpop(vmslt(a, 0, vl), vl) != 0; \
}
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY
(
v_int8
,
VTraits
<
v_int8
>::
vlanes
())
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录