提交 b12c1451 编写于 作者: M Maksim Shabunin

RISC-V: allow building scalable RVV support with GCC, LLVM 16 support

上级 2b32eee3
......@@ -6,12 +6,17 @@
#endif
#if defined CV_RVV
#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>10999
#define vreinterpret_v_u64m1_u8m1 __riscv_vreinterpret_v_u64m1_u8m1
#define vle64_v_u64m1 __riscv_vle64_v_u64m1
#define vle32_v_f32m1 __riscv_vle32_v_f32m1
#define vfmv_f_s_f32m1_f32 __riscv_vfmv_f_s_f32m1_f32
#endif
int test()
{
const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
uint64_t ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A};
vuint8m1_t a = vreinterpret_v_u64m1_u8m1(vle64_v_u64m1(ptr, 2));
//vuint8m1_t a = (vuint8m1_t)vle64_v_u64m1(ptr, 2);
vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4);
return (int)vfmv_f_s_f32m1_f32(val);
}
......
......@@ -10,6 +10,15 @@
#include <algorithm>
// RVV intrinsics have been renamed in version 0.11, so we need to include
// compatibility headers:
// https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated/rvv-v0p10-compatible-headers
#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>10999
#include "intrin_rvv_010_compat_non-policy.hpp"
#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
#endif
// Building for T-Head C906 core with RVV 0.7.1 using toolchain
// https://github.com/T-head-Semi/xuantie-gnu-toolchain
// with option '-march=rv64gcv0p7'
......
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
#define OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
// This file requires VTraits to be defined for vector types
#define OPENCV_HAL_IMPL_RVV_FUN_AND(REG, SUF) \
inline static REG vand(const REG & op1, const REG & op2, size_t vl) \
{ \
return vand_vv_##SUF(op1, op2, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_AND(vint8m1_t, i8m1)
OPENCV_HAL_IMPL_RVV_FUN_AND(vuint8m1_t, u8m1)
OPENCV_HAL_IMPL_RVV_FUN_AND(vint16m1_t, i16m1)
OPENCV_HAL_IMPL_RVV_FUN_AND(vuint16m1_t, u16m1)
OPENCV_HAL_IMPL_RVV_FUN_AND(vint32m1_t, i32m1)
OPENCV_HAL_IMPL_RVV_FUN_AND(vuint32m1_t, u32m1)
OPENCV_HAL_IMPL_RVV_FUN_AND(vint64m1_t, i64m1)
OPENCV_HAL_IMPL_RVV_FUN_AND(vuint64m1_t, u64m1)
#define OPENCV_HAL_IMPL_RVV_FUN_LOXEI(REG, SUF, INDX, ISUF) \
inline static REG vloxe##ISUF(const VTraits<REG>::lane_type *base, INDX bindex, size_t vl) \
{ \
return vloxe##ISUF##_v_##SUF(base, bindex, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m1_t, i8m1, vuint8m1_t, i8)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m2_t, i8m2, vuint8m2_t, i8)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m4_t, i8m4, vuint8m4_t, i8)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m8_t, i8m8, vuint8m8_t, i8)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m1_t, i8m1, vuint32m4_t, i32)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m2_t, i8m2, vuint32m8_t, i32)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint16m1_t, i16m1, vuint32m2_t, i32)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m1_t, i32m1, vuint32m1_t, i32)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m2_t, i32m2, vuint32m2_t, i32)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m4_t, i32m4, vuint32m4_t, i32)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m8_t, i32m8, vuint32m8_t, i32)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint64m1_t, i64m1, vuint32mf2_t, i32)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m1_t, u8m1, vuint8m1_t, i8)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m2_t, u8m2, vuint8m2_t, i8)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m4_t, u8m4, vuint8m4_t, i8)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m8_t, u8m8, vuint8m8_t, i8)
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vfloat32m1_t, f32m1, vuint32m1_t, i32)
#if CV_SIMD_SCALABLE_64F
OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vfloat64m1_t, f64m1, vuint32mf2_t, i32)
#endif
#define OPENCV_HAL_IMPL_RVV_FUN_MUL(REG, SUF) \
inline static REG##m1_t vmul(const REG##m1_t & op1, const REG##m1_t & op2, size_t vl) \
{ \
return vmul_vv_##SUF##m1(op1, op2, vl); \
} \
inline static REG##m1_t vmul(const REG##m1_t & op1, VTraits<REG##m1_t>::lane_type op2, size_t vl) \
{ \
return vmul_vx_##SUF##m1(op1, op2, vl); \
} \
inline static REG##m2_t vmul(const REG##m2_t & op1, const REG##m2_t & op2, size_t vl) \
{ \
return vmul_vv_##SUF##m2(op1, op2, vl); \
} \
inline static REG##m2_t vmul(const REG##m2_t & op1, VTraits<REG##m2_t>::lane_type op2, size_t vl) \
{ \
return vmul_vx_##SUF##m2(op1, op2, vl); \
} \
inline static REG##m4_t vmul(const REG##m4_t & op1, const REG##m4_t & op2, size_t vl) \
{ \
return vmul_vv_##SUF##m4(op1, op2, vl); \
} \
inline static REG##m4_t vmul(const REG##m4_t & op1, VTraits<REG##m4_t>::lane_type op2, size_t vl) \
{ \
return vmul_vx_##SUF##m4(op1, op2, vl); \
} \
inline static REG##m8_t vmul(const REG##m8_t & op1, const REG##m8_t & op2, size_t vl) \
{ \
return vmul_vv_##SUF##m8(op1, op2, vl); \
} \
inline static REG##m8_t vmul(const REG##m8_t & op1, VTraits<REG##m8_t>::lane_type op2, size_t vl) \
{ \
return vmul_vx_##SUF##m8(op1, op2, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_MUL(vint8, i8)
OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint8, u8)
OPENCV_HAL_IMPL_RVV_FUN_MUL(vint16, i16)
OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint16, u16)
OPENCV_HAL_IMPL_RVV_FUN_MUL(vint32, i32)
OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint32, u32)
#define OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(REG1, SUF1, REG2, SUF2) \
inline static REG1##m1_t vreinterpret_##SUF1##m1(const REG2##m1_t & src) \
{\
return vreinterpret_v_##SUF2##m1_##SUF1##m1(src); \
} \
inline static REG1##m2_t vreinterpret_##SUF1##m2(const REG2##m2_t & src) \
{\
return vreinterpret_v_##SUF2##m2_##SUF1##m2(src); \
} \
inline static REG1##m4_t vreinterpret_##SUF1##m4(const REG2##m4_t & src) \
{\
return vreinterpret_v_##SUF2##m4_##SUF1##m4(src); \
} \
inline static REG1##m8_t vreinterpret_##SUF1##m8(const REG2##m8_t & src) \
{\
return vreinterpret_v_##SUF2##m8_##SUF1##m8(src); \
}
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint8, i8, vuint8, u8)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint16, i16, vuint16, u16)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint32, i32, vuint32, u32)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vfloat32, f32, vuint32, u32)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vfloat32, f32, vint32, i32)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vfloat32, f32)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint32, i32, vfloat32, f32)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vint8, i8)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint16, u16)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint32, u32)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint64, u64)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vint16, i16)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint8, u8)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint32, u32)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint64, u64)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vint32, i32)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint8, u8)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint16, u16)
OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint64, u64)
#define OPENCV_HAL_IMPL_RVV_FUN_STORE(REG, SUF, SZ) \
inline static void vse##SZ(VTraits<REG>::lane_type *base, REG value, size_t vl) \
{ \
return vse##SZ##_v_##SUF##m1(base, value, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint8, u8, 8)
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int8, i8, 8)
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint16, u16, 16)
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int16, i16, 16)
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint32, u32, 32)
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int32, i32, 32)
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint64, u64, 64)
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int64, i64, 64)
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_float32, f32, 32)
#if CV_SIMD_SCALABLE_64F
OPENCV_HAL_IMPL_RVV_FUN_STORE(v_float64, f64, 64)
#endif
#define OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(REG, SUF) \
inline static VTraits<REG>::lane_type vmv_x(const REG & reg) \
{\
return vmv_x_s_##SUF##m1_##SUF(reg); \
}
#define OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(REG, SUF) \
inline static VTraits<REG>::lane_type vfmv_f(const REG & reg) \
{\
return vfmv_f_s_##SUF##m1_##SUF(reg); \
}
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint8, u8)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int8, i8)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint16, u16)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int16, i16)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint32, u32)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int32, i32)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint64, u64)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int64, i64)
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(v_float32, f32)
#if CV_SIMD_SCALABLE_64F
OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(v_float64, f64)
#endif
#define OPENCV_HAL_IMPL_RVV_FUN_SLIDE(REG, SUF) \
inline static REG vslidedown(const REG & dst, const REG & src, size_t offset, size_t vl) \
{ \
return vslidedown_vx_##SUF##m1(dst, src, offset, vl); \
} \
inline static REG vslideup(const REG & dst, const REG & src, size_t offset, size_t vl) \
{ \
return vslideup_vx_##SUF##m1(dst, src, offset, vl); \
}
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint8, u8)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int8, i8)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint16, u16)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int16, i16)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint32, u32)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int32, i32)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_float32, f32)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint64, u64)
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int64, i64)
#if CV_SIMD_SCALABLE_64F
OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_float64, f64)
#endif
inline static vuint32mf2_t vmul(const vuint32mf2_t & op1, uint32_t op2, size_t vl)
{
return vmul_vx_u32mf2(op1, op2, vl);
}
inline static vuint32mf2_t vreinterpret_u32mf2(vint32mf2_t val)
{
return vreinterpret_v_i32mf2_u32mf2(val);
}
#endif //OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
......@@ -13,6 +13,14 @@
#include <vector>
#include <opencv2/core/check.hpp>
// RVV intrinsics have been renamed in version 0.11, so we need to include
// compatibility headers:
// https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated/rvv-v0p10-compatible-headers
#if defined(__riscv_v_intrinsic) && __riscv_v_intrinsic>10999
#include "intrin_rvv_010_compat_non-policy.hpp"
#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
#endif
#if defined(__GNUC__) && !defined(__clang__)
// FIXIT: eliminate massive warnigs from templates
// GCC from 'rvv-next': riscv64-unknown-linux-gnu-g++ (g42df3464463) 12.0.1 20220505 (prerelease)
......@@ -52,89 +60,93 @@ using uint = unsigned int;
using uint64 = unsigned long int;
using int64 = long int;
static const int __cv_rvv_e8_nlanes = vsetvlmax_e8m1();
static const int __cv_rvv_e16_nlanes = vsetvlmax_e16m1();
static const int __cv_rvv_e32_nlanes = vsetvlmax_e32m1();
static const int __cv_rvv_e64_nlanes = vsetvlmax_e64m1();
static const int __cv_rvv_e8m1_nlanes = vsetvlmax_e8m1();
static const int __cv_rvv_e16m1_nlanes = vsetvlmax_e16m1();
static const int __cv_rvv_e32m1_nlanes = vsetvlmax_e32m1();
static const int __cv_rvv_e64m1_nlanes = vsetvlmax_e64m1();
static const int __cv_rvv_e8m2_nlanes = vsetvlmax_e8m2();
static const int __cv_rvv_e16m2_nlanes = vsetvlmax_e16m2();
static const int __cv_rvv_e32m2_nlanes = vsetvlmax_e32m2();
static const int __cv_rvv_e64m2_nlanes = vsetvlmax_e64m2();
static const int __cv_rvv_e8m4_nlanes = vsetvlmax_e8m4();
static const int __cv_rvv_e16m4_nlanes = vsetvlmax_e16m4();
static const int __cv_rvv_e32m4_nlanes = vsetvlmax_e32m4();
static const int __cv_rvv_e64m4_nlanes = vsetvlmax_e64m4();
static const int __cv_rvv_e8m8_nlanes = vsetvlmax_e8m8();
static const int __cv_rvv_e16m8_nlanes = vsetvlmax_e16m8();
static const int __cv_rvv_e32m8_nlanes = vsetvlmax_e32m8();
static const int __cv_rvv_e64m8_nlanes = vsetvlmax_e64m8();
template <class T>
struct VTraits;
template <>
struct VTraits<v_uint8>
{
static inline int vlanes() { return __cv_rvv_e8_nlanes; }
using lane_type = uchar;
static const int max_nlanes = CV_RVV_MAX_VLEN/8;
#define OPENCV_HAL_IMPL_RVV_TRAITS(REG, TYP, SUF, SZ) \
template <> \
struct VTraits<REG> \
{ \
static inline int vlanes() { return __cv_rvv_##SUF##_nlanes; } \
using lane_type = TYP; \
static const int max_nlanes = CV_RVV_MAX_VLEN/SZ; \
};
template <>
struct VTraits<v_int8>
{
static inline int vlanes() { return __cv_rvv_e8_nlanes; }
using lane_type = schar;
static const int max_nlanes = CV_RVV_MAX_VLEN/8;
};
template <>
struct VTraits<v_uint16>
{
static inline int vlanes() { return __cv_rvv_e16_nlanes; }
using lane_type = ushort;
static const int max_nlanes = CV_RVV_MAX_VLEN/16;
};
template <>
struct VTraits<v_int16>
{
static inline int vlanes() { return __cv_rvv_e16_nlanes; }
using lane_type = short;
static const int max_nlanes = CV_RVV_MAX_VLEN/16;
};
template <>
struct VTraits<v_uint32>
{
static inline int vlanes() { return __cv_rvv_e32_nlanes; }
using lane_type = uint;
static const int max_nlanes = CV_RVV_MAX_VLEN/32;
};
template <>
struct VTraits<v_int32>
{
static inline int vlanes() { return __cv_rvv_e32_nlanes; }
using lane_type = int;
static const int max_nlanes = CV_RVV_MAX_VLEN/32;
};
OPENCV_HAL_IMPL_RVV_TRAITS(vint8m1_t, int8_t, e8m1, 8)
OPENCV_HAL_IMPL_RVV_TRAITS(vint8m2_t, int8_t, e8m2, 8)
OPENCV_HAL_IMPL_RVV_TRAITS(vint8m4_t, int8_t, e8m4, 8)
OPENCV_HAL_IMPL_RVV_TRAITS(vint8m8_t, int8_t, e8m8, 8)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m1_t, uint8_t, e8m1, 8)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m2_t, uint8_t, e8m2, 8)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m4_t, uint8_t, e8m4, 8)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m8_t, uint8_t, e8m8, 8)
OPENCV_HAL_IMPL_RVV_TRAITS(vint16m1_t, int16_t, e16m1, 16)
OPENCV_HAL_IMPL_RVV_TRAITS(vint16m2_t, int16_t, e16m2, 16)
OPENCV_HAL_IMPL_RVV_TRAITS(vint16m4_t, int16_t, e16m4, 16)
OPENCV_HAL_IMPL_RVV_TRAITS(vint16m8_t, int16_t, e16m8, 16)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m1_t, uint16_t, e16m1, 16)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m2_t, uint16_t, e16m2, 16)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m4_t, uint16_t, e16m4, 16)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m8_t, uint16_t, e16m8, 16)
OPENCV_HAL_IMPL_RVV_TRAITS(vint32m1_t, int32_t, e32m1, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vint32m2_t, int32_t, e32m2, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vint32m4_t, int32_t, e32m4, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vint32m8_t, int32_t, e32m8, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m1_t, uint32_t, e32m1, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m2_t, uint32_t, e32m2, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m4_t, uint32_t, e32m4, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m8_t, uint32_t, e32m8, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vint64m1_t, int64_t, e64m1, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vint64m2_t, int64_t, e64m2, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vint64m4_t, int64_t, e64m4, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vint64m8_t, int64_t, e64m8, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m1_t, uint64_t, e64m1, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m2_t, uint64_t, e64m2, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m4_t, uint64_t, e64m4, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m8_t, uint64_t, e64m8, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m1_t, float, e32m1, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m2_t, float, e32m2, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m4_t, float, e32m4, 32)
OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m8_t, float, e32m8, 32)
template <>
struct VTraits<v_float32>
{
static inline int vlanes() { return __cv_rvv_e32_nlanes; }
using lane_type = float;
static const int max_nlanes = CV_RVV_MAX_VLEN/32;
};
template <>
struct VTraits<v_uint64>
{
static inline int vlanes() { return __cv_rvv_e64_nlanes; }
using lane_type = uint64;
static const int max_nlanes = CV_RVV_MAX_VLEN/64;
};
template <>
struct VTraits<v_int64>
{
static inline int vlanes() { return __cv_rvv_e64_nlanes; }
using lane_type = int64;
static const int max_nlanes = CV_RVV_MAX_VLEN/64;
};
#if CV_SIMD_SCALABLE_64F
template <>
struct VTraits<v_float64>
{
static inline int vlanes() { return __cv_rvv_e64_nlanes; }
using lane_type = double;
static const int max_nlanes = CV_RVV_MAX_VLEN/64;
};
OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m1_t, double, e64m1, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m2_t, double, e64m2, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m4_t, double, e64m4, 64)
OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m8_t, double, e64m8, 64)
#endif
// LLVM/Clang defines "overloaded intrinsics" e.g. 'vand(op1, op2)'
// GCC does not have these functions, so we need to implement them manually
// We implement only selected subset required to build current state of the code
// Included inside namespace cv::
#ifndef __riscv_v_intrinsic_overloading
#include "intrin_rvv_compat_overloaded.hpp"
#endif // __riscv_v_intrinsic_overloading
//////////// get0 ////////////
#define OPENCV_HAL_IMPL_RVV_GRT0_INT(_Tpvec, _Tp) \
inline _Tp v_get0(const v_##_Tpvec& v) \
......@@ -435,7 +447,7 @@ inline _Tpvec v_lut(const _Tp* tab, const int* idx) \
inline _Tpvec v_lut_pairs(const _Tp* tab, const int* idx) \
{ \
std::vector<uint> idx_; \
for (size_t i = 0; i < VTraits<v_int16>::vlanes(); ++i) { \
for (int i = 0; i < VTraits<v_int16>::vlanes(); ++i) { \
idx_.push_back(idx[i]); \
idx_.push_back(idx[i]+1); \
} \
......@@ -445,7 +457,7 @@ inline _Tpvec v_lut_pairs(const _Tp* tab, const int* idx) \
inline _Tpvec v_lut_quads(const _Tp* tab, const int* idx) \
{ \
std::vector<uint> idx_; \
for (size_t i = 0; i < VTraits<v_int32>::vlanes(); ++i) { \
for (int i = 0; i < VTraits<v_int32>::vlanes(); ++i) { \
idx_.push_back(idx[i]); \
idx_.push_back(idx[i]+1); \
idx_.push_back(idx[i]+2); \
......@@ -479,7 +491,7 @@ inline v_uint64 v_lut_quads(const uint64* tab, const int* idx) { return v_reinte
////////////// Pack boolean ////////////////////
inline v_uint8 v_pack_b(const v_uint16& a, const v_uint16& b)
{
return vnsrl(vset(vlmul_ext_u16m2(a),1,b), 0, VTraits<v_uint8>::vlanes());
return vnsrl(vset(vlmul_ext_v_u16m1_u16m2(a),1,b), 0, VTraits<v_uint8>::vlanes());
}
inline v_uint8 v_pack_b(const v_uint32& a, const v_uint32& b,
......@@ -1074,11 +1086,11 @@ inline v_float64 v_muladd(const v_float64& a, const v_float64& b, const v_float6
#define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, vl) \
inline bool v_check_all(const _Tpvec& a) \
{ \
return vcpop(vmslt(a, 0, vl), vl) == vl; \
return (int)vcpop(vmslt(a, 0, vl), vl) == vl; \
} \
inline bool v_check_any(const _Tpvec& a) \
{ \
return vcpop(vmslt(a, 0, vl), vl) != 0; \
return (int)vcpop(vmslt(a, 0, vl), vl) != 0; \
}
OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int8, VTraits<v_int8>::vlanes())
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册