提交 a811059b 编写于 作者: A Alexander Alekhin

Merge pull request #13336 from sergiud:core_sse_immediates_gcc-5.4.0

...@@ -1965,13 +1965,11 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4& ...@@ -1965,13 +1965,11 @@ inline void v_load_deinterleave(const unsigned* ptr, v_uint32x4& a, v_uint32x4&
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b) inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b)
{ {
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1);
__m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1 __m128 u0 = _mm_loadu_ps(ptr); // a0 b0 a1 b1
__m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3 __m128 u1 = _mm_loadu_ps((ptr + 4)); // a2 b2 a3 b3
a.val = _mm_shuffle_ps(u0, u1, mask_lo); // a0 a1 a2 a3 a.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(2, 0, 2, 0)); // a0 a1 a2 a3
b.val = _mm_shuffle_ps(u0, u1, mask_hi); // b0 b1 ab b3 b.val = _mm_shuffle_ps(u0, u1, _MM_SHUFFLE(3, 1, 3, 1)); // b0 b1 ab b3
} }
inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c) inline void v_load_deinterleave(const float* ptr, v_float32x4& a, v_float32x4& b, v_float32x4& c)
......
...@@ -567,7 +567,7 @@ inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m ...@@ -567,7 +567,7 @@ inline void _mm_deinterleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1) inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1)
{ {
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) };
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
__m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); __m128 layer2_chunk2 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
...@@ -588,7 +588,7 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m12 ...@@ -588,7 +588,7 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m12
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
__m128 & v_g1, __m128 & v_b0, __m128 & v_b1) __m128 & v_g1, __m128 & v_b0, __m128 & v_b1)
{ {
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) };
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
__m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); __m128 layer2_chunk3 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
...@@ -615,7 +615,7 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, ...@@ -615,7 +615,7 @@ inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1, inline void _mm_interleave_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m128 & v_g1,
__m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1) __m128 & v_b0, __m128 & v_b1, __m128 & v_a0, __m128 & v_a1)
{ {
const int mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1); enum { mask_lo = _MM_SHUFFLE(2, 0, 2, 0), mask_hi = _MM_SHUFFLE(3, 1, 3, 1) };
__m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo); __m128 layer2_chunk0 = _mm_shuffle_ps(v_r0, v_r1, mask_lo);
__m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi); __m128 layer2_chunk4 = _mm_shuffle_ps(v_r0, v_r1, mask_hi);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册