提交 5873d5f5 编写于 作者: M Megvii Engine Team

feat(gi): add more gi api

GitOrigin-RevId: e2ae8c0873366bc920eb0ab3398f41f995c34ffd
上级 cfc41648
...@@ -1750,6 +1750,42 @@ GI_FLOAT32_V3_t GiLoadUzipFloat32V3(const float* ptr) { ...@@ -1750,6 +1750,42 @@ GI_FLOAT32_V3_t GiLoadUzipFloat32V3(const float* ptr) {
#endif #endif
} }
GI_FORCEINLINE
GI_FLOAT32_V4_t GiLoadUzipFloat32V4(const float* ptr) {
#if defined(GI_NEON_INTRINSICS)
return vld4q_f32(ptr);
#elif defined(GI_SSE2_INTRINSICS)
GI_FLOAT32_V4_t v;
__m128 tmp0, tmp1, tmp2, tmp3;
v.val[0] = GiLoadFloat32(ptr);
v.val[1] = GiLoadFloat32((ptr + 4));
v.val[2] = GiLoadFloat32((ptr + 8));
v.val[3] = GiLoadFloat32((ptr + 12));
tmp0 = _mm_unpacklo_ps(v.val[0], v.val[1]);
tmp2 = _mm_unpacklo_ps(v.val[2], v.val[3]);
tmp1 = _mm_unpackhi_ps(v.val[0], v.val[1]);
tmp3 = _mm_unpackhi_ps(v.val[2], v.val[3]);
v.val[0] = _mm_movelh_ps(tmp0, tmp2);
v.val[1] = _mm_movehl_ps(tmp2, tmp0);
v.val[2] = _mm_movelh_ps(tmp1, tmp3);
v.val[3] = _mm_movehl_ps(tmp3, tmp1);
return v;
#elif defined(GI_RVV_INTRINSICS)
return vlseg4e32_v_f32m1x4(ptr, GI_SIMD_LEN_BYTE / sizeof(float));
#else
GI_FLOAT32_V4_t ret;
for (size_t i = 0; i < 4; i++) {
ret.val[i][0] = ptr[0 + i];
ret.val[i][1] = ptr[4 + i];
ret.val[i][2] = ptr[8 + i];
ret.val[i][3] = ptr[12 + i];
}
return ret;
#endif
}
GI_FORCEINLINE GI_FORCEINLINE
void GiStoreZipFloat32V3(float* ptr, GI_FLOAT32_V3_t val) { void GiStoreZipFloat32V3(float* ptr, GI_FLOAT32_V3_t val) {
#if defined(GI_NEON_INTRINSICS) #if defined(GI_NEON_INTRINSICS)
......
...@@ -4208,7 +4208,26 @@ TEST_F(FALLBACK, GiLoadUzipFloat32V3) { ...@@ -4208,7 +4208,26 @@ TEST_F(FALLBACK, GiLoadUzipFloat32V3) {
naive.push_back(s0[9 + i]); naive.push_back(s0[9 + i]);
} }
assert_eq((float*)&ret, naive); assert_eq((float*)&ret, naive, SIMD_LEN * 3);
}
TEST_F(FALLBACK, GiLoadUzipFloat32V4) {
GI_FLOAT32_V4_t ret;
std::vector<float> s0{1.1f, 2.2f, 3.5f, 4.9f, 2312.1f, 345.244f, 3.59f, -12.8f,
2.2f, 6.0f, 90.0f, 89.3f, 2.1f, -3.5f, 4.9f, -2312.1f};
s0.resize(SIMD_LEN * 4);
force_memset_ret((void*)&ret, GI_SIMD_LEN_BYTE * 4);
ret = GiLoadUzipFloat32V4(s0.data());
std::vector<float> naive;
for (size_t i = 0; i < 4; i++) {
naive.push_back(s0[0 + i]);
naive.push_back(s0[4 + i]);
naive.push_back(s0[8 + i]);
naive.push_back(s0[12 + i]);
}
assert_eq((float*)&ret, naive, SIMD_LEN * 4);
} }
TEST_F(FALLBACK, GiStoreZipFloat32V3) { TEST_F(FALLBACK, GiStoreZipFloat32V3) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册