RISC-V: allow building scalable RVV support with GCC, LLVM 16 support

b12c1451 · Maksim Shabunin · 2b32eee3 · b12c1451 · b12c1451 · b12c1451
6 changed file
--- a/cmake/checks/cpu_rvv.cpp
+++ b/cmake/checks/cpu_rvv.cpp
@@ -6,12 +6,17 @@
 #endif

 #if defined CV_RVV
+#if defined(__riscv_v_intrinsic) &&  __riscv_v_intrinsic>10999
+#define vreinterpret_v_u64m1_u8m1 __riscv_vreinterpret_v_u64m1_u8m1
+#define vle64_v_u64m1 __riscv_vle64_v_u64m1
+#define vle32_v_f32m1 __riscv_vle32_v_f32m1
+#define vfmv_f_s_f32m1_f32 __riscv_vfmv_f_s_f32m1_f32
+#endif
 int test()
 {
    const float src[] = { 0.0f, 0.0f, 0.0f, 0.0f };
    uint64_t ptr[2] = {0x0908060504020100, 0xFFFFFFFF0E0D0C0A};
    vuint8m1_t a = vreinterpret_v_u64m1_u8m1(vle64_v_u64m1(ptr, 2));
-    //vuint8m1_t a = (vuint8m1_t)vle64_v_u64m1(ptr, 2);
    vfloat32m1_t val = vle32_v_f32m1((const float*)(src), 4);
    return (int)vfmv_f_s_f32m1_f32(val);
 }

--- a/modules/core/include/opencv2/core/hal/intrin_rvv.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv.hpp
@@ -10,6 +10,15 @@

 #include <algorithm>

+// RVV intrinsics have been renamed in version 0.11, so we need to include
+// compatibility headers:
+// https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated/rvv-v0p10-compatible-headers
+#if defined(__riscv_v_intrinsic) &&  __riscv_v_intrinsic>10999
+#include "intrin_rvv_010_compat_non-policy.hpp"
+#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
+#endif
+
+
 // Building for T-Head C906 core with RVV 0.7.1 using toolchain
 // https://github.com/T-head-Semi/xuantie-gnu-toolchain
 // with option '-march=rv64gcv0p7'

--- a/modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_non-policy.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_non-policy.hpp
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_overloaded-non-policy.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_010_compat_overloaded-non-policy.hpp
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_compat_overloaded.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_compat_overloaded.hpp
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html.
+
+#ifndef OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
+#define OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
+
+// This file requires VTraits to be defined for vector types
+
+#define OPENCV_HAL_IMPL_RVV_FUN_AND(REG, SUF) \
+inline static REG vand(const REG & op1, const REG & op2, size_t vl) \
+{ \
+    return vand_vv_##SUF(op1, op2, vl); \
+}
+
+OPENCV_HAL_IMPL_RVV_FUN_AND(vint8m1_t, i8m1)
+OPENCV_HAL_IMPL_RVV_FUN_AND(vuint8m1_t, u8m1)
+OPENCV_HAL_IMPL_RVV_FUN_AND(vint16m1_t, i16m1)
+OPENCV_HAL_IMPL_RVV_FUN_AND(vuint16m1_t, u16m1)
+OPENCV_HAL_IMPL_RVV_FUN_AND(vint32m1_t, i32m1)
+OPENCV_HAL_IMPL_RVV_FUN_AND(vuint32m1_t, u32m1)
+OPENCV_HAL_IMPL_RVV_FUN_AND(vint64m1_t, i64m1)
+OPENCV_HAL_IMPL_RVV_FUN_AND(vuint64m1_t, u64m1)
+
+#define OPENCV_HAL_IMPL_RVV_FUN_LOXEI(REG, SUF, INDX, ISUF) \
+inline static REG vloxe##ISUF(const VTraits<REG>::lane_type *base, INDX bindex, size_t vl) \
+{ \
+    return vloxe##ISUF##_v_##SUF(base, bindex, vl); \
+}
+
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m1_t, i8m1, vuint8m1_t, i8)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m2_t, i8m2, vuint8m2_t, i8)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m4_t, i8m4, vuint8m4_t, i8)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m8_t, i8m8, vuint8m8_t, i8)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m1_t, i8m1, vuint32m4_t, i32)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint8m2_t, i8m2, vuint32m8_t, i32)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint16m1_t, i16m1, vuint32m2_t, i32)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m1_t, i32m1, vuint32m1_t, i32)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m2_t, i32m2, vuint32m2_t, i32)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m4_t, i32m4, vuint32m4_t, i32)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint32m8_t, i32m8, vuint32m8_t, i32)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vint64m1_t, i64m1, vuint32mf2_t, i32)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m1_t, u8m1, vuint8m1_t, i8)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m2_t, u8m2, vuint8m2_t, i8)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m4_t, u8m4, vuint8m4_t, i8)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vuint8m8_t, u8m8, vuint8m8_t, i8)
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vfloat32m1_t, f32m1, vuint32m1_t, i32)
+#if CV_SIMD_SCALABLE_64F
+OPENCV_HAL_IMPL_RVV_FUN_LOXEI(vfloat64m1_t, f64m1, vuint32mf2_t, i32)
+#endif
+
+#define OPENCV_HAL_IMPL_RVV_FUN_MUL(REG, SUF) \
+inline static REG##m1_t vmul(const REG##m1_t & op1, const REG##m1_t & op2, size_t vl) \
+{ \
+    return vmul_vv_##SUF##m1(op1, op2, vl); \
+} \
+inline static REG##m1_t vmul(const REG##m1_t & op1, VTraits<REG##m1_t>::lane_type op2, size_t vl) \
+{ \
+    return vmul_vx_##SUF##m1(op1, op2, vl); \
+} \
+inline static REG##m2_t vmul(const REG##m2_t & op1, const REG##m2_t & op2, size_t vl) \
+{ \
+    return vmul_vv_##SUF##m2(op1, op2, vl); \
+} \
+inline static REG##m2_t vmul(const REG##m2_t & op1, VTraits<REG##m2_t>::lane_type op2, size_t vl) \
+{ \
+    return vmul_vx_##SUF##m2(op1, op2, vl); \
+} \
+inline static REG##m4_t vmul(const REG##m4_t & op1, const REG##m4_t & op2, size_t vl) \
+{ \
+    return vmul_vv_##SUF##m4(op1, op2, vl); \
+} \
+inline static REG##m4_t vmul(const REG##m4_t & op1, VTraits<REG##m4_t>::lane_type op2, size_t vl) \
+{ \
+    return vmul_vx_##SUF##m4(op1, op2, vl); \
+} \
+inline static REG##m8_t vmul(const REG##m8_t & op1, const REG##m8_t & op2, size_t vl) \
+{ \
+    return vmul_vv_##SUF##m8(op1, op2, vl); \
+} \
+inline static REG##m8_t vmul(const REG##m8_t & op1, VTraits<REG##m8_t>::lane_type op2, size_t vl) \
+{ \
+    return vmul_vx_##SUF##m8(op1, op2, vl); \
+}
+
+OPENCV_HAL_IMPL_RVV_FUN_MUL(vint8, i8)
+OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint8, u8)
+OPENCV_HAL_IMPL_RVV_FUN_MUL(vint16, i16)
+OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint16, u16)
+OPENCV_HAL_IMPL_RVV_FUN_MUL(vint32, i32)
+OPENCV_HAL_IMPL_RVV_FUN_MUL(vuint32, u32)
+
+#define OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(REG1, SUF1, REG2, SUF2) \
+inline static REG1##m1_t vreinterpret_##SUF1##m1(const REG2##m1_t & src) \
+{\
+    return vreinterpret_v_##SUF2##m1_##SUF1##m1(src); \
+} \
+inline static REG1##m2_t vreinterpret_##SUF1##m2(const REG2##m2_t & src) \
+{\
+    return vreinterpret_v_##SUF2##m2_##SUF1##m2(src); \
+} \
+inline static REG1##m4_t vreinterpret_##SUF1##m4(const REG2##m4_t & src) \
+{\
+    return vreinterpret_v_##SUF2##m4_##SUF1##m4(src); \
+} \
+inline static REG1##m8_t vreinterpret_##SUF1##m8(const REG2##m8_t & src) \
+{\
+    return vreinterpret_v_##SUF2##m8_##SUF1##m8(src); \
+}
+
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint8, i8, vuint8, u8)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint16, i16, vuint16, u16)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint32, i32, vuint32, u32)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vfloat32, f32, vuint32, u32)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vfloat32, f32, vint32, i32)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vfloat32, f32)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vint32, i32, vfloat32, f32)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vint8, i8)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint16, u16)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint32, u32)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint8, u8, vuint64, u64)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vint16, i16)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint8, u8)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint32, u32)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint16, u16, vuint64, u64)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vint32, i32)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint8, u8)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint16, u16)
+OPENCV_HAL_IMPL_RVV_FUN_REINTERPRET(vuint32, u32, vuint64, u64)
+
+#define OPENCV_HAL_IMPL_RVV_FUN_STORE(REG, SUF, SZ) \
+inline static void vse##SZ(VTraits<REG>::lane_type *base, REG value, size_t vl) \
+{ \
+    return vse##SZ##_v_##SUF##m1(base, value, vl); \
+}
+
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint8, u8, 8)
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int8, i8, 8)
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint16, u16, 16)
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int16, i16, 16)
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint32, u32, 32)
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int32, i32, 32)
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_uint64, u64, 64)
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_int64, i64, 64)
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_float32, f32, 32)
+#if CV_SIMD_SCALABLE_64F
+OPENCV_HAL_IMPL_RVV_FUN_STORE(v_float64, f64, 64)
+#endif
+
+#define OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(REG, SUF) \
+inline static VTraits<REG>::lane_type vmv_x(const REG & reg) \
+{\
+    return vmv_x_s_##SUF##m1_##SUF(reg); \
+}
+#define OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(REG, SUF) \
+inline static VTraits<REG>::lane_type vfmv_f(const REG & reg) \
+{\
+    return vfmv_f_s_##SUF##m1_##SUF(reg); \
+}
+
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint8, u8)
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int8, i8)
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint16, u16)
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int16, i16)
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint32, u32)
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int32, i32)
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_uint64, u64)
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT(v_int64, i64)
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(v_float32, f32)
+#if CV_SIMD_SCALABLE_64F
+OPENCV_HAL_IMPL_RVV_FUN_EXTRACT_F(v_float64, f64)
+#endif
+
+#define OPENCV_HAL_IMPL_RVV_FUN_SLIDE(REG, SUF) \
+inline static REG vslidedown(const REG & dst, const REG & src, size_t offset, size_t vl) \
+{ \
+    return vslidedown_vx_##SUF##m1(dst, src, offset, vl); \
+} \
+inline static REG vslideup(const REG & dst, const REG & src, size_t offset, size_t vl) \
+{ \
+    return vslideup_vx_##SUF##m1(dst, src, offset, vl); \
+}
+
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint8, u8)
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int8, i8)
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint16, u16)
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int16, i16)
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint32, u32)
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int32, i32)
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_float32, f32)
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_uint64, u64)
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_int64, i64)
+#if CV_SIMD_SCALABLE_64F
+OPENCV_HAL_IMPL_RVV_FUN_SLIDE(v_float64, f64)
+#endif
+
+inline static vuint32mf2_t vmul(const vuint32mf2_t & op1, uint32_t op2, size_t vl)
+{
+    return vmul_vx_u32mf2(op1, op2, vl);
+}
+
+inline static vuint32mf2_t vreinterpret_u32mf2(vint32mf2_t val)
+{
+    return vreinterpret_v_i32mf2_u32mf2(val);
+}
+
+#endif //OPENCV_HAL_INTRIN_RVV_COMPAT_OVERLOAD_HPP
--- a/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
+++ b/modules/core/include/opencv2/core/hal/intrin_rvv_scalable.hpp
@@ -13,6 +13,14 @@
 #include <vector>
 #include <opencv2/core/check.hpp>

+// RVV intrinsics have been renamed in version 0.11, so we need to include
+// compatibility headers:
+// https://github.com/riscv-non-isa/rvv-intrinsic-doc/tree/master/auto-generated/rvv-v0p10-compatible-headers
+#if defined(__riscv_v_intrinsic) &&  __riscv_v_intrinsic>10999
+#include "intrin_rvv_010_compat_non-policy.hpp"
+#include "intrin_rvv_010_compat_overloaded-non-policy.hpp"
+#endif
+
 #if defined(__GNUC__) && !defined(__clang__)
 // FIXIT: eliminate massive warnigs from templates
 // GCC from 'rvv-next': riscv64-unknown-linux-gnu-g++ (g42df3464463) 12.0.1 20220505 (prerelease)
@@ -52,89 +60,93 @@ using uint = unsigned int;
 using uint64 = unsigned long int;
 using int64 = long int;

-static const int __cv_rvv_e8_nlanes = vsetvlmax_e8m1();
-static const int __cv_rvv_e16_nlanes = vsetvlmax_e16m1();
-static const int __cv_rvv_e32_nlanes = vsetvlmax_e32m1();
-static const int __cv_rvv_e64_nlanes = vsetvlmax_e64m1();
+static const int __cv_rvv_e8m1_nlanes = vsetvlmax_e8m1();
+static const int __cv_rvv_e16m1_nlanes = vsetvlmax_e16m1();
+static const int __cv_rvv_e32m1_nlanes = vsetvlmax_e32m1();
+static const int __cv_rvv_e64m1_nlanes = vsetvlmax_e64m1();
+static const int __cv_rvv_e8m2_nlanes = vsetvlmax_e8m2();
+static const int __cv_rvv_e16m2_nlanes = vsetvlmax_e16m2();
+static const int __cv_rvv_e32m2_nlanes = vsetvlmax_e32m2();
+static const int __cv_rvv_e64m2_nlanes = vsetvlmax_e64m2();
+static const int __cv_rvv_e8m4_nlanes = vsetvlmax_e8m4();
+static const int __cv_rvv_e16m4_nlanes = vsetvlmax_e16m4();
+static const int __cv_rvv_e32m4_nlanes = vsetvlmax_e32m4();
+static const int __cv_rvv_e64m4_nlanes = vsetvlmax_e64m4();
+static const int __cv_rvv_e8m8_nlanes = vsetvlmax_e8m8();
+static const int __cv_rvv_e16m8_nlanes = vsetvlmax_e16m8();
+static const int __cv_rvv_e32m8_nlanes = vsetvlmax_e32m8();
+static const int __cv_rvv_e64m8_nlanes = vsetvlmax_e64m8();

 template <class T>
 struct VTraits;

-template <>
-struct VTraits<v_uint8>
-{
-    static inline int vlanes() { return __cv_rvv_e8_nlanes; }
-    using lane_type = uchar;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/8;
+#define OPENCV_HAL_IMPL_RVV_TRAITS(REG, TYP, SUF, SZ) \
+template <> \
+struct VTraits<REG> \
+{ \
+    static inline int vlanes() { return __cv_rvv_##SUF##_nlanes; } \
+    using lane_type = TYP; \
+    static const int max_nlanes = CV_RVV_MAX_VLEN/SZ; \
 };

-template <>
-struct VTraits<v_int8>
-{
-    static inline int vlanes() { return __cv_rvv_e8_nlanes; }
-    using lane_type = schar;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/8;
-};
-template <>
-struct VTraits<v_uint16>
-{
-    static inline int vlanes() { return __cv_rvv_e16_nlanes; }
-    using lane_type = ushort;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/16;
-};
-template <>
-struct VTraits<v_int16>
-{
-    static inline int vlanes() { return __cv_rvv_e16_nlanes; }
-    using lane_type = short;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/16;
-};
-template <>
-struct VTraits<v_uint32>
-{
-    static inline int vlanes() { return __cv_rvv_e32_nlanes; }
-    using lane_type = uint;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/32;
-};
-template <>
-struct VTraits<v_int32>
-{
-    static inline int vlanes() { return __cv_rvv_e32_nlanes; }
-    using lane_type = int;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/32;
-};
+OPENCV_HAL_IMPL_RVV_TRAITS(vint8m1_t, int8_t, e8m1, 8)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint8m2_t, int8_t, e8m2, 8)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint8m4_t, int8_t, e8m4, 8)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint8m8_t, int8_t, e8m8, 8)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m1_t, uint8_t, e8m1, 8)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m2_t, uint8_t, e8m2, 8)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m4_t, uint8_t, e8m4, 8)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint8m8_t, uint8_t, e8m8, 8)
+
+OPENCV_HAL_IMPL_RVV_TRAITS(vint16m1_t, int16_t, e16m1, 16)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint16m2_t, int16_t, e16m2, 16)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint16m4_t, int16_t, e16m4, 16)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint16m8_t, int16_t, e16m8, 16)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m1_t, uint16_t, e16m1, 16)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m2_t, uint16_t, e16m2, 16)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m4_t, uint16_t, e16m4, 16)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint16m8_t, uint16_t, e16m8, 16)
+
+OPENCV_HAL_IMPL_RVV_TRAITS(vint32m1_t, int32_t, e32m1, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint32m2_t, int32_t, e32m2, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint32m4_t, int32_t, e32m4, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint32m8_t, int32_t, e32m8, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m1_t, uint32_t, e32m1, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m2_t, uint32_t, e32m2, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m4_t, uint32_t, e32m4, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint32m8_t, uint32_t, e32m8, 32)
+
+OPENCV_HAL_IMPL_RVV_TRAITS(vint64m1_t, int64_t, e64m1, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint64m2_t, int64_t, e64m2, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint64m4_t, int64_t, e64m4, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vint64m8_t, int64_t, e64m8, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m1_t, uint64_t, e64m1, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m2_t, uint64_t, e64m2, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m4_t, uint64_t, e64m4, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vuint64m8_t, uint64_t, e64m8, 64)
+
+OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m1_t, float, e32m1, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m2_t, float, e32m2, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m4_t, float, e32m4, 32)
+OPENCV_HAL_IMPL_RVV_TRAITS(vfloat32m8_t, float, e32m8, 32)

-template <>
-struct VTraits<v_float32>
-{
-    static inline int vlanes() { return __cv_rvv_e32_nlanes; }
-    using lane_type = float;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/32;
-};
-template <>
-struct VTraits<v_uint64>
-{
-    static inline int vlanes() { return __cv_rvv_e64_nlanes; }
-    using lane_type = uint64;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/64;
-};
-template <>
-struct VTraits<v_int64>
-{
-    static inline int vlanes() { return __cv_rvv_e64_nlanes; }
-    using lane_type = int64;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/64;
-};
 #if CV_SIMD_SCALABLE_64F
-template <>
-struct VTraits<v_float64>
-{
-    static inline int vlanes() { return __cv_rvv_e64_nlanes; }
-    using lane_type = double;
-    static const int max_nlanes = CV_RVV_MAX_VLEN/64;
-};
+OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m1_t, double, e64m1, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m2_t, double, e64m2, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m4_t, double, e64m4, 64)
+OPENCV_HAL_IMPL_RVV_TRAITS(vfloat64m8_t, double, e64m8, 64)
 #endif

+
+// LLVM/Clang defines "overloaded intrinsics" e.g. 'vand(op1, op2)'
+// GCC does not have these functions, so we need to implement them manually
+// We implement only selected subset required to build current state of the code
+// Included inside namespace cv::
+#ifndef __riscv_v_intrinsic_overloading
+#include "intrin_rvv_compat_overloaded.hpp"
+#endif // __riscv_v_intrinsic_overloading
+
+
 //////////// get0 ////////////
 #define OPENCV_HAL_IMPL_RVV_GRT0_INT(_Tpvec, _Tp) \
 inline _Tp v_get0(const v_##_Tpvec& v) \
@@ -435,7 +447,7 @@ inline _Tpvec v_lut(const _Tp* tab, const int* idx) \
 inline _Tpvec v_lut_pairs(const _Tp* tab, const int* idx) \
 { \
    std::vector<uint> idx_; \
-    for (size_t i = 0; i < VTraits<v_int16>::vlanes(); ++i) { \
+    for (int i = 0; i < VTraits<v_int16>::vlanes(); ++i) { \
        idx_.push_back(idx[i]); \
        idx_.push_back(idx[i]+1); \
    } \
@@ -445,7 +457,7 @@ inline _Tpvec v_lut_pairs(const _Tp* tab, const int* idx) \
 inline _Tpvec v_lut_quads(const _Tp* tab, const int* idx) \
 { \
    std::vector<uint> idx_; \
-    for (size_t i = 0; i < VTraits<v_int32>::vlanes(); ++i) { \
+    for (int i = 0; i < VTraits<v_int32>::vlanes(); ++i) { \
        idx_.push_back(idx[i]); \
        idx_.push_back(idx[i]+1); \
        idx_.push_back(idx[i]+2); \
@@ -479,7 +491,7 @@ inline v_uint64 v_lut_quads(const uint64* tab, const int* idx) { return v_reinte
 ////////////// Pack boolean ////////////////////
 inline v_uint8 v_pack_b(const v_uint16& a, const v_uint16& b)
 {
-    return vnsrl(vset(vlmul_ext_u16m2(a),1,b), 0, VTraits<v_uint8>::vlanes());
+    return vnsrl(vset(vlmul_ext_v_u16m1_u16m2(a),1,b), 0, VTraits<v_uint8>::vlanes());
 }

 inline v_uint8 v_pack_b(const v_uint32& a, const v_uint32& b,
@@ -1074,11 +1086,11 @@ inline v_float64 v_muladd(const v_float64& a, const v_float64& b, const v_float6
 #define OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(_Tpvec, vl) \
 inline bool v_check_all(const _Tpvec& a) \
 { \
-    return vcpop(vmslt(a, 0, vl), vl) == vl; \
+    return (int)vcpop(vmslt(a, 0, vl), vl) == vl; \
 } \
 inline bool v_check_any(const _Tpvec& a) \
 { \
-    return vcpop(vmslt(a, 0, vl), vl) != 0; \
+    return (int)vcpop(vmslt(a, 0, vl), vl) != 0; \
 }

 OPENCV_HAL_IMPL_RVV_CHECK_ALLANY(v_int8, VTraits<v_int8>::vlanes())