Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
31827d8d
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
31827d8d
编写于
1月 12, 2015
作者:
I
Ilya Lavrenov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fixed typo
上级
bc394e75
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
55 addition
and
55 deletion
+55
-55
modules/core/include/opencv2/core/cvdef.h
modules/core/include/opencv2/core/cvdef.h
+1
-1
modules/core/include/opencv2/core/sse_utils.hpp
modules/core/include/opencv2/core/sse_utils.hpp
+24
-24
modules/imgproc/src/color.cpp
modules/imgproc/src/color.cpp
+30
-30
未找到文件。
modules/core/include/opencv2/core/cvdef.h
浏览文件 @
31827d8d
...
...
@@ -138,7 +138,7 @@
#if defined __SSE2__ || defined _M_X64 || (defined _M_IX86_FP && _M_IX86_FP >= 2)
# include <emmintrin.h>
# define CV_MMX
# define CV_MMX
1
# define CV_SSE 1
# define CV_SSE2 1
# if defined __SSE3__ || (defined _MSC_VER && _MSC_VER >= 1500)
...
...
modules/core/include/opencv2/core/sse_utils.hpp
浏览文件 @
31827d8d
...
...
@@ -48,8 +48,8 @@
#if CV_SSE2
inline
void
_mm_deinterl
iv
_epi8
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
)
inline
void
_mm_deinterl
eave
_epi8
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
)
{
__m128i
layer1_chunk0
=
_mm_unpacklo_epi8
(
v_r0
,
v_g1
);
__m128i
layer1_chunk1
=
_mm_unpackhi_epi8
(
v_r0
,
v_g1
);
...
...
@@ -87,8 +87,8 @@ inline void _mm_deinterliv_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
v_b1
=
_mm_unpackhi_epi8
(
layer4_chunk2
,
layer4_chunk5
);
}
inline
void
_mm_deinterl
iv
_epi8
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
,
__m128i
&
v_a0
,
__m128i
&
v_a1
)
inline
void
_mm_deinterl
eave
_epi8
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
,
__m128i
&
v_a0
,
__m128i
&
v_a1
)
{
__m128i
layer1_chunk0
=
_mm_unpacklo_epi8
(
v_r0
,
v_b0
);
__m128i
layer1_chunk1
=
_mm_unpackhi_epi8
(
v_r0
,
v_b0
);
...
...
@@ -136,8 +136,8 @@ inline void _mm_deinterliv_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
v_a1
=
_mm_unpackhi_epi8
(
layer4_chunk3
,
layer4_chunk7
);
}
inline
void
_mm_interl
iv
e_epi8
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
)
inline
void
_mm_interl
eave
e_epi8
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
)
{
__m128i
v_mask
=
_mm_set1_epi16
(
0x00ff
);
...
...
@@ -177,8 +177,8 @@ inline void _mm_interlive_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
v_b1
=
_mm_packus_epi16
(
_mm_srli_epi16
(
layer1_chunk4
,
8
),
_mm_srli_epi16
(
layer1_chunk5
,
8
));
}
inline
void
_mm_interl
iv
e_epi8
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
,
__m128i
&
v_a0
,
__m128i
&
v_a1
)
inline
void
_mm_interl
eave
e_epi8
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
,
__m128i
&
v_a0
,
__m128i
&
v_a1
)
{
__m128i
v_mask
=
_mm_set1_epi16
(
0x00ff
);
...
...
@@ -228,8 +228,8 @@ inline void _mm_interlive_epi8(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, _
v_a1
=
_mm_packus_epi16
(
_mm_srli_epi16
(
layer1_chunk6
,
8
),
_mm_srli_epi16
(
layer1_chunk7
,
8
));
}
inline
void
_mm_deinterl
iv
_epi16
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
)
inline
void
_mm_deinterl
eave
_epi16
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
)
{
__m128i
layer1_chunk0
=
_mm_unpacklo_epi16
(
v_r0
,
v_g1
);
__m128i
layer1_chunk1
=
_mm_unpackhi_epi16
(
v_r0
,
v_g1
);
...
...
@@ -260,8 +260,8 @@ inline void _mm_deinterliv_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
v_b1
=
_mm_unpackhi_epi16
(
layer3_chunk2
,
layer3_chunk5
);
}
inline
void
_mm_deinterl
iv
_epi16
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
,
__m128i
&
v_a0
,
__m128i
&
v_a1
)
inline
void
_mm_deinterl
eave
_epi16
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
,
__m128i
&
v_a0
,
__m128i
&
v_a1
)
{
__m128i
layer1_chunk0
=
_mm_unpacklo_epi16
(
v_r0
,
v_b0
);
__m128i
layer1_chunk1
=
_mm_unpackhi_epi16
(
v_r0
,
v_b0
);
...
...
@@ -300,8 +300,8 @@ inline void _mm_deinterliv_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
v_a1
=
_mm_unpackhi_epi16
(
layer3_chunk3
,
layer3_chunk7
);
}
inline
void
_mm_interl
iv
_epi16
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
)
inline
void
_mm_interl
eave
_epi16
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
)
{
__m128i
v_mask
=
_mm_set1_epi32
(
0x0000ffff
);
...
...
@@ -334,8 +334,8 @@ inline void _mm_interliv_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0,
v_b1
=
_mm_packus_epi32
(
_mm_srli_epi32
(
layer1_chunk4
,
16
),
_mm_srli_epi32
(
layer1_chunk5
,
16
));
}
inline
void
_mm_interl
iv
_epi16
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
,
__m128i
&
v_a0
,
__m128i
&
v_a1
)
inline
void
_mm_interl
eave
_epi16
(
__m128i
&
v_r0
,
__m128i
&
v_r1
,
__m128i
&
v_g0
,
__m128i
&
v_g1
,
__m128i
&
v_b0
,
__m128i
&
v_b1
,
__m128i
&
v_a0
,
__m128i
&
v_a1
)
{
__m128i
v_mask
=
_mm_set1_epi32
(
0x0000ffff
);
...
...
@@ -376,8 +376,8 @@ inline void _mm_interliv_epi16(__m128i & v_r0, __m128i & v_r1, __m128i & v_g0, _
v_a1
=
_mm_packus_epi32
(
_mm_srli_epi32
(
layer1_chunk6
,
16
),
_mm_srli_epi32
(
layer1_chunk7
,
16
));
}
inline
void
_mm_deinterl
iv
_ps
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
)
inline
void
_mm_deinterl
eave
_ps
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
)
{
__m128
layer1_chunk0
=
_mm_unpacklo_ps
(
v_r0
,
v_g1
);
__m128
layer1_chunk1
=
_mm_unpackhi_ps
(
v_r0
,
v_g1
);
...
...
@@ -401,8 +401,8 @@ inline void _mm_deinterliv_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
v_b1
=
_mm_unpackhi_ps
(
layer2_chunk2
,
layer2_chunk5
);
}
inline
void
_mm_deinterl
iv
_ps
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
,
__m128
&
v_a0
,
__m128
&
v_a1
)
inline
void
_mm_deinterl
eave
_ps
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
,
__m128
&
v_a0
,
__m128
&
v_a1
)
{
__m128
layer1_chunk0
=
_mm_unpacklo_ps
(
v_r0
,
v_b0
);
__m128
layer1_chunk1
=
_mm_unpackhi_ps
(
v_r0
,
v_b0
);
...
...
@@ -432,8 +432,8 @@ inline void _mm_deinterliv_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0, __m12
v_a1
=
_mm_unpackhi_ps
(
layer2_chunk3
,
layer2_chunk7
);
}
inline
void
_mm_interl
iv
_ps
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
)
inline
void
_mm_interl
eave
_ps
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
)
{
const
int
mask_lo
=
_MM_SHUFFLE
(
2
,
0
,
2
,
0
),
mask_hi
=
_MM_SHUFFLE
(
3
,
1
,
3
,
1
);
...
...
@@ -459,8 +459,8 @@ inline void _mm_interliv_ps(__m128 & v_r0, __m128 & v_r1, __m128 & v_g0,
v_b1
=
_mm_shuffle_ps
(
layer1_chunk4
,
layer1_chunk5
,
mask_hi
);
}
inline
void
_mm_interl
iv
_ps
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
,
__m128
&
v_a0
,
__m128
&
v_a1
)
inline
void
_mm_interl
eave
_ps
(
__m128
&
v_r0
,
__m128
&
v_r1
,
__m128
&
v_g0
,
__m128
&
v_g1
,
__m128
&
v_b0
,
__m128
&
v_b1
,
__m128
&
v_a0
,
__m128
&
v_a1
)
{
const
int
mask_lo
=
_MM_SHUFFLE
(
2
,
0
,
2
,
0
),
mask_hi
=
_MM_SHUFFLE
(
3
,
1
,
3
,
1
);
...
...
modules/imgproc/src/color.cpp
浏览文件 @
31827d8d
...
...
@@ -1504,7 +1504,7 @@ struct RGB2Gray<ushort>
__m128i
v_b0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
32
));
__m128i
v_b1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
40
));
_mm_deinterl
iv
_epi16
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_epi16
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
__m128i
v_gray0
;
process
(
v_r0
,
v_g0
,
v_b0
,
...
...
@@ -1531,7 +1531,7 @@ struct RGB2Gray<ushort>
__m128i
v_a0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
48
));
__m128i
v_a1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
56
));
_mm_deinterl
iv
_epi16
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
,
v_a0
,
v_a1
);
_mm_deinterl
eave
_epi16
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
,
v_a0
,
v_a1
);
__m128i
v_gray0
;
process
(
v_r0
,
v_g0
,
v_b0
,
...
...
@@ -1596,7 +1596,7 @@ struct RGB2Gray<float>
__m128
v_b0
=
_mm_loadu_ps
(
src
+
16
);
__m128
v_b1
=
_mm_loadu_ps
(
src
+
20
);
_mm_deinterl
iv
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
__m128
v_gray0
;
process
(
v_r0
,
v_g0
,
v_b0
,
...
...
@@ -1623,7 +1623,7 @@ struct RGB2Gray<float>
__m128
v_a0
=
_mm_loadu_ps
(
src
+
24
);
__m128
v_a1
=
_mm_loadu_ps
(
src
+
28
);
_mm_deinterl
iv
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
,
v_a0
,
v_a1
);
_mm_deinterl
eave
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
,
v_a0
,
v_a1
);
__m128
v_gray0
;
process
(
v_r0
,
v_g0
,
v_b0
,
...
...
@@ -1821,7 +1821,7 @@ struct RGB2YCrCb_f<float>
__m128
v_b0
=
_mm_loadu_ps
(
src
+
16
);
__m128
v_b1
=
_mm_loadu_ps
(
src
+
20
);
_mm_deinterl
iv
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
__m128
v_y0
,
v_cr0
,
v_cb0
;
process
(
v_r0
,
v_g0
,
v_b0
,
...
...
@@ -1831,7 +1831,7 @@ struct RGB2YCrCb_f<float>
process
(
v_r1
,
v_g1
,
v_b1
,
v_y1
,
v_cr1
,
v_cb1
);
_mm_interl
iv
_ps
(
v_y0
,
v_y1
,
v_cr0
,
v_cr1
,
v_cb0
,
v_cb1
);
_mm_interl
eave
_ps
(
v_y0
,
v_y1
,
v_cr0
,
v_cr1
,
v_cb0
,
v_cb1
);
_mm_storeu_ps
(
dst
+
i
,
v_y0
);
_mm_storeu_ps
(
dst
+
i
+
4
,
v_y1
);
...
...
@@ -2186,7 +2186,7 @@ struct RGB2YCrCb_i<uchar>
__m128i
v_b0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
64
));
__m128i
v_b1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
80
));
_mm_deinterl
iv
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
__m128i
v_y0
=
v_zero
,
v_cr0
=
v_zero
,
v_cb0
=
v_zero
;
process
(
_mm_unpacklo_epi8
(
v_r0
,
v_zero
),
...
...
@@ -2218,7 +2218,7 @@ struct RGB2YCrCb_i<uchar>
__m128i
v_cr_1
=
_mm_packus_epi16
(
v_cr0
,
v_cr1
);
__m128i
v_cb_1
=
_mm_packus_epi16
(
v_cb0
,
v_cb1
);
_mm_interl
iv
e_epi8
(
v_y_0
,
v_y_1
,
v_cr_0
,
v_cr_1
,
v_cb_0
,
v_cb_1
);
_mm_interl
eave
e_epi8
(
v_y_0
,
v_y_1
,
v_cr_0
,
v_cr_1
,
v_cb_0
,
v_cb_1
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
i
),
v_y_0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
i
+
16
),
v_y_1
);
...
...
@@ -2328,7 +2328,7 @@ struct RGB2YCrCb_i<ushort>
__m128i
v_b0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
32
));
__m128i
v_b1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
40
));
_mm_deinterl
iv
_epi16
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_epi16
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
__m128i
v_y0
=
v_zero
,
v_cr0
=
v_zero
,
v_cb0
=
v_zero
;
process
(
v_r0
,
v_g0
,
v_b0
,
...
...
@@ -2338,7 +2338,7 @@ struct RGB2YCrCb_i<ushort>
process
(
v_r1
,
v_g1
,
v_b1
,
v_y1
,
v_cr1
,
v_cb1
);
_mm_interl
iv
_epi16
(
v_y0
,
v_y1
,
v_cr0
,
v_cr1
,
v_cb0
,
v_cb1
);
_mm_interl
eave
_epi16
(
v_y0
,
v_y1
,
v_cr0
,
v_cr1
,
v_cb0
,
v_cb1
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
i
),
v_y0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
i
+
8
),
v_y1
);
...
...
@@ -2536,7 +2536,7 @@ struct YCrCb2RGB_f<float>
__m128
v_cb0
=
_mm_loadu_ps
(
src
+
i
+
16
);
__m128
v_cb1
=
_mm_loadu_ps
(
src
+
i
+
20
);
_mm_deinterl
iv
_ps
(
v_y0
,
v_y1
,
v_cr0
,
v_cr1
,
v_cb0
,
v_cb1
);
_mm_deinterl
eave
_ps
(
v_y0
,
v_y1
,
v_cr0
,
v_cr1
,
v_cb0
,
v_cb1
);
__m128
v_r0
,
v_g0
,
v_b0
;
process
(
v_y0
,
v_cr0
,
v_cb0
,
...
...
@@ -2546,7 +2546,7 @@ struct YCrCb2RGB_f<float>
process
(
v_y1
,
v_cr1
,
v_cb1
,
v_r1
,
v_g1
,
v_b1
);
_mm_interl
iv
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_interl
eave
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_storeu_ps
(
dst
,
v_r0
);
_mm_storeu_ps
(
dst
+
4
,
v_r1
);
...
...
@@ -2949,7 +2949,7 @@ struct YCrCb2RGB_i<uchar>
__m128i
v_cb0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
+
64
));
__m128i
v_cb1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
+
80
));
_mm_deinterl
iv
_epi8
(
v_y0
,
v_y1
,
v_cr0
,
v_cr1
,
v_cb0
,
v_cb1
);
_mm_deinterl
eave
_epi8
(
v_y0
,
v_y1
,
v_cr0
,
v_cr1
,
v_cb0
,
v_cb1
);
__m128i
v_r_0
=
v_zero
,
v_g_0
=
v_zero
,
v_b_0
=
v_zero
;
process
(
_mm_unpacklo_epi8
(
v_y0
,
v_zero
),
...
...
@@ -2987,7 +2987,7 @@ struct YCrCb2RGB_i<uchar>
std
::
swap
(
v_r1
,
v_b1
);
}
_mm_interl
iv
e_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_interl
eave
e_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
v_r0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
16
),
v_r1
);
...
...
@@ -3210,7 +3210,7 @@ struct RGB2XYZ_f<float>
__m128
v_b0
=
_mm_loadu_ps
(
src
+
16
);
__m128
v_b1
=
_mm_loadu_ps
(
src
+
20
);
_mm_deinterl
iv
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
__m128
v_x0
,
v_y0
,
v_z0
;
process
(
v_r0
,
v_g0
,
v_b0
,
...
...
@@ -3220,7 +3220,7 @@ struct RGB2XYZ_f<float>
process
(
v_r1
,
v_g1
,
v_b1
,
v_x1
,
v_y1
,
v_z1
);
_mm_interl
iv
_ps
(
v_x0
,
v_x1
,
v_y0
,
v_y1
,
v_z0
,
v_z1
);
_mm_interl
eave
_ps
(
v_x0
,
v_x1
,
v_y0
,
v_y1
,
v_z0
,
v_z1
);
_mm_storeu_ps
(
dst
+
i
,
v_x0
);
_mm_storeu_ps
(
dst
+
i
+
4
,
v_x1
);
...
...
@@ -3636,7 +3636,7 @@ struct XYZ2RGB_f<float>
__m128
v_z0
=
_mm_loadu_ps
(
src
+
i
+
16
);
__m128
v_z1
=
_mm_loadu_ps
(
src
+
i
+
20
);
_mm_deinterl
iv
_ps
(
v_x0
,
v_x1
,
v_y0
,
v_y1
,
v_z0
,
v_z1
);
_mm_deinterl
eave
_ps
(
v_x0
,
v_x1
,
v_y0
,
v_y1
,
v_z0
,
v_z1
);
__m128
v_r0
,
v_g0
,
v_b0
;
process
(
v_x0
,
v_y0
,
v_z0
,
...
...
@@ -3646,7 +3646,7 @@ struct XYZ2RGB_f<float>
process
(
v_x1
,
v_y1
,
v_z1
,
v_r1
,
v_g1
,
v_b1
);
_mm_interl
iv
_ps
(
v_b0
,
v_b1
,
v_g0
,
v_g1
,
v_r0
,
v_r1
);
_mm_interl
eave
_ps
(
v_b0
,
v_b1
,
v_g0
,
v_g1
,
v_r0
,
v_r1
);
_mm_storeu_ps
(
dst
,
v_b0
);
_mm_storeu_ps
(
dst
+
4
,
v_b1
);
...
...
@@ -4216,7 +4216,7 @@ struct HSV2RGB_b
v_g1
=
_mm_mul_ps
(
v_g1
,
v_scale_inv
);
v_b1
=
_mm_mul_ps
(
v_b1
,
v_scale_inv
);
_mm_interl
iv
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_interl
eave
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_store_ps
(
buf
,
v_r0
);
_mm_store_ps
(
buf
+
4
,
v_r1
);
...
...
@@ -4267,7 +4267,7 @@ struct HSV2RGB_b
__m128i
v_b0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
j
+
64
));
__m128i
v_b1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
j
+
80
));
_mm_deinterl
iv
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
process
(
_mm_unpacklo_epi8
(
v_r0
,
v_zero
),
_mm_unpacklo_epi8
(
v_g0
,
v_zero
),
...
...
@@ -4461,7 +4461,7 @@ struct RGB2HLS_b
__m128
v_s0f
=
_mm_load_ps
(
buf
+
16
);
__m128
v_s1f
=
_mm_load_ps
(
buf
+
20
);
_mm_deinterl
iv
_ps
(
v_h0f
,
v_h1f
,
v_l0f
,
v_l1f
,
v_s0f
,
v_s1f
);
_mm_deinterl
eave
_ps
(
v_h0f
,
v_h1f
,
v_l0f
,
v_l1f
,
v_s0f
,
v_s1f
);
v_l0f
=
_mm_mul_ps
(
v_l0f
,
v_scale
);
v_l1f
=
_mm_mul_ps
(
v_l1f
,
v_scale
);
...
...
@@ -4584,7 +4584,7 @@ struct RGB2HLS_b
__m128i
v_l1
=
_mm_packus_epi16
(
v_l_0
,
v_l_1
);
__m128i
v_s1
=
_mm_packus_epi16
(
v_s_0
,
v_s_1
);
_mm_interl
iv
e_epi8
(
v_h0
,
v_h1
,
v_l0
,
v_l1
,
v_s0
,
v_s1
);
_mm_interl
eave
e_epi8
(
v_h0
,
v_h1
,
v_l0
,
v_l1
,
v_s0
,
v_s1
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
),
v_h0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
+
16
),
v_h1
);
...
...
@@ -4716,7 +4716,7 @@ struct HLS2RGB_b
v_g1
=
_mm_mul_ps
(
v_g1
,
v_scale_inv
);
v_b1
=
_mm_mul_ps
(
v_b1
,
v_scale_inv
);
_mm_interl
iv
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_interl
eave
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_store_ps
(
buf
,
v_r0
);
_mm_store_ps
(
buf
+
4
,
v_r1
);
...
...
@@ -4767,7 +4767,7 @@ struct HLS2RGB_b
__m128i
v_b0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
j
+
64
));
__m128i
v_b1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
j
+
80
));
_mm_deinterl
iv
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
process
(
_mm_unpacklo_epi8
(
v_r0
,
v_zero
),
_mm_unpacklo_epi8
(
v_g0
,
v_zero
),
...
...
@@ -5215,7 +5215,7 @@ struct Lab2RGB_b
v_b0
=
_mm_sub_ps
(
v_b0
,
v_128
);
v_b1
=
_mm_sub_ps
(
v_b1
,
v_128
);
_mm_interl
iv
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_interl
eave
_ps
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_store_ps
(
buf
,
v_r0
);
_mm_store_ps
(
buf
+
4
,
v_r1
);
...
...
@@ -5266,7 +5266,7 @@ struct Lab2RGB_b
__m128i
v_b0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
j
+
64
));
__m128i
v_b1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
j
+
80
));
_mm_deinterl
iv
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
process
(
_mm_unpacklo_epi8
(
v_r0
,
v_zero
),
_mm_unpacklo_epi8
(
v_g0
,
v_zero
),
...
...
@@ -5568,7 +5568,7 @@ struct RGB2Luv_b
__m128
v_v0f
=
_mm_load_ps
(
buf
+
16
);
__m128
v_v1f
=
_mm_load_ps
(
buf
+
20
);
_mm_deinterl
iv
_ps
(
v_l0f
,
v_l1f
,
v_u0f
,
v_u1f
,
v_v0f
,
v_v1f
);
_mm_deinterl
eave
_ps
(
v_l0f
,
v_l1f
,
v_u0f
,
v_u1f
,
v_v0f
,
v_v1f
);
v_l0f
=
_mm_mul_ps
(
v_l0f
,
v_scale
);
v_l1f
=
_mm_mul_ps
(
v_l1f
,
v_scale
);
...
...
@@ -5694,7 +5694,7 @@ struct RGB2Luv_b
__m128i
v_u1
=
_mm_packus_epi16
(
v_u_0
,
v_u_1
);
__m128i
v_v1
=
_mm_packus_epi16
(
v_v_0
,
v_v_1
);
_mm_interl
iv
e_epi8
(
v_l0
,
v_l1
,
v_u0
,
v_u1
,
v_v0
,
v_v1
);
_mm_interl
eave
e_epi8
(
v_l0
,
v_l1
,
v_u0
,
v_u1
,
v_v0
,
v_v1
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
),
v_l0
);
_mm_storeu_si128
((
__m128i
*
)(
dst
+
j
+
16
),
v_l1
);
...
...
@@ -5775,7 +5775,7 @@ struct Luv2RGB_b
v_v0
=
_mm_sub_ps
(
_mm_mul_ps
(
v_v0
,
v_coeff2
),
v_140
);
v_v1
=
_mm_sub_ps
(
_mm_mul_ps
(
v_v1
,
v_coeff2
),
v_140
);
_mm_interl
iv
_ps
(
v_l0
,
v_l1
,
v_u0
,
v_u1
,
v_v0
,
v_v1
);
_mm_interl
eave
_ps
(
v_l0
,
v_l1
,
v_u0
,
v_u1
,
v_v0
,
v_v1
);
_mm_store_ps
(
buf
,
v_l0
);
_mm_store_ps
(
buf
+
4
,
v_l1
);
...
...
@@ -5826,7 +5826,7 @@ struct Luv2RGB_b
__m128i
v_b0
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
j
+
64
));
__m128i
v_b1
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
j
+
80
));
_mm_deinterl
iv
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
_mm_deinterl
eave
_epi8
(
v_r0
,
v_r1
,
v_g0
,
v_g1
,
v_b0
,
v_b1
);
process
(
_mm_unpacklo_epi8
(
v_r0
,
v_zero
),
_mm_unpacklo_epi8
(
v_g0
,
v_zero
),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录