Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
0f716cdf
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
0f716cdf
编写于
8月 19, 2016
作者:
V
Vadim Pisarevsky
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #7108 from K-Shinotsuka:issue16
上级
93e5947f
3faaabd6
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
151 addition
and
0 deletion
+151
-0
modules/imgproc/src/color.cpp
modules/imgproc/src/color.cpp
+151
-0
未找到文件。
modules/imgproc/src/color.cpp
浏览文件 @
0f716cdf
...
...
@@ -2987,6 +2987,72 @@ struct YCrCb2RGB_i<uchar>
haveSIMD
=
checkHardwareSupport
(
CV_CPU_SSE2
);
}
#if CV_SSE4_1
// 16s x 8
void
process
(
__m128i
*
v_src
,
__m128i
*
v_shuffle
,
__m128i
*
v_coeffs
)
const
{
__m128i
v_ycrcb
[
3
];
v_ycrcb
[
0
]
=
_mm_shuffle_epi8
(
v_src
[
0
],
v_shuffle
[
0
]);
v_ycrcb
[
1
]
=
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
8
),
v_shuffle
[
0
]);
v_ycrcb
[
2
]
=
_mm_shuffle_epi8
(
v_src
[
1
],
v_shuffle
[
0
]);
__m128i
v_y
[
3
];
v_y
[
1
]
=
_mm_shuffle_epi8
(
v_src
[
0
],
v_shuffle
[
1
]);
v_y
[
2
]
=
_mm_srli_si128
(
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
15
),
v_shuffle
[
1
]),
1
);
v_y
[
0
]
=
_mm_unpacklo_epi8
(
v_y
[
1
],
v_zero
);
v_y
[
1
]
=
_mm_unpackhi_epi8
(
v_y
[
1
],
v_zero
);
v_y
[
2
]
=
_mm_unpacklo_epi8
(
v_y
[
2
],
v_zero
);
__m128i
v_rgb
[
6
];
v_rgb
[
0
]
=
_mm_unpacklo_epi8
(
v_ycrcb
[
0
],
v_zero
);
v_rgb
[
1
]
=
_mm_unpackhi_epi8
(
v_ycrcb
[
0
],
v_zero
);
v_rgb
[
2
]
=
_mm_unpacklo_epi8
(
v_ycrcb
[
1
],
v_zero
);
v_rgb
[
3
]
=
_mm_unpackhi_epi8
(
v_ycrcb
[
1
],
v_zero
);
v_rgb
[
4
]
=
_mm_unpacklo_epi8
(
v_ycrcb
[
2
],
v_zero
);
v_rgb
[
5
]
=
_mm_unpackhi_epi8
(
v_ycrcb
[
2
],
v_zero
);
v_rgb
[
0
]
=
_mm_sub_epi16
(
v_rgb
[
0
],
v_delta
);
v_rgb
[
1
]
=
_mm_sub_epi16
(
v_rgb
[
1
],
v_delta
);
v_rgb
[
2
]
=
_mm_sub_epi16
(
v_rgb
[
2
],
v_delta
);
v_rgb
[
3
]
=
_mm_sub_epi16
(
v_rgb
[
3
],
v_delta
);
v_rgb
[
4
]
=
_mm_sub_epi16
(
v_rgb
[
4
],
v_delta
);
v_rgb
[
5
]
=
_mm_sub_epi16
(
v_rgb
[
5
],
v_delta
);
v_rgb
[
0
]
=
_mm_madd_epi16
(
v_rgb
[
0
],
v_coeffs
[
0
]);
v_rgb
[
1
]
=
_mm_madd_epi16
(
v_rgb
[
1
],
v_coeffs
[
1
]);
v_rgb
[
2
]
=
_mm_madd_epi16
(
v_rgb
[
2
],
v_coeffs
[
2
]);
v_rgb
[
3
]
=
_mm_madd_epi16
(
v_rgb
[
3
],
v_coeffs
[
0
]);
v_rgb
[
4
]
=
_mm_madd_epi16
(
v_rgb
[
4
],
v_coeffs
[
1
]);
v_rgb
[
5
]
=
_mm_madd_epi16
(
v_rgb
[
5
],
v_coeffs
[
2
]);
v_rgb
[
0
]
=
_mm_add_epi32
(
v_rgb
[
0
],
v_delta2
);
v_rgb
[
1
]
=
_mm_add_epi32
(
v_rgb
[
1
],
v_delta2
);
v_rgb
[
2
]
=
_mm_add_epi32
(
v_rgb
[
2
],
v_delta2
);
v_rgb
[
3
]
=
_mm_add_epi32
(
v_rgb
[
3
],
v_delta2
);
v_rgb
[
4
]
=
_mm_add_epi32
(
v_rgb
[
4
],
v_delta2
);
v_rgb
[
5
]
=
_mm_add_epi32
(
v_rgb
[
5
],
v_delta2
);
v_rgb
[
0
]
=
_mm_srai_epi32
(
v_rgb
[
0
],
yuv_shift
);
v_rgb
[
1
]
=
_mm_srai_epi32
(
v_rgb
[
1
],
yuv_shift
);
v_rgb
[
2
]
=
_mm_srai_epi32
(
v_rgb
[
2
],
yuv_shift
);
v_rgb
[
3
]
=
_mm_srai_epi32
(
v_rgb
[
3
],
yuv_shift
);
v_rgb
[
4
]
=
_mm_srai_epi32
(
v_rgb
[
4
],
yuv_shift
);
v_rgb
[
5
]
=
_mm_srai_epi32
(
v_rgb
[
5
],
yuv_shift
);
v_rgb
[
0
]
=
_mm_packs_epi32
(
v_rgb
[
0
],
v_rgb
[
1
]);
v_rgb
[
2
]
=
_mm_packs_epi32
(
v_rgb
[
2
],
v_rgb
[
3
]);
v_rgb
[
4
]
=
_mm_packs_epi32
(
v_rgb
[
4
],
v_rgb
[
5
]);
v_rgb
[
0
]
=
_mm_add_epi16
(
v_rgb
[
0
],
v_y
[
0
]);
v_rgb
[
2
]
=
_mm_add_epi16
(
v_rgb
[
2
],
v_y
[
1
]);
v_rgb
[
4
]
=
_mm_add_epi16
(
v_rgb
[
4
],
v_y
[
2
]);
v_src
[
0
]
=
_mm_packus_epi16
(
v_rgb
[
0
],
v_rgb
[
2
]);
v_src
[
1
]
=
_mm_packus_epi16
(
v_rgb
[
4
],
v_rgb
[
4
]);
}
#endif // CV_SSE4_1
// 16s x 8
void
process
(
__m128i
v_y
,
__m128i
v_cr
,
__m128i
v_cb
,
__m128i
&
v_r
,
__m128i
&
v_g
,
__m128i
&
v_b
)
const
...
...
@@ -3040,6 +3106,91 @@ struct YCrCb2RGB_i<uchar>
int
C0
=
coeffs
[
0
],
C1
=
coeffs
[
1
],
C2
=
coeffs
[
2
],
C3
=
coeffs
[
3
];
n
*=
3
;
#if CV_SSE4_1
if
(
checkHardwareSupport
(
CV_CPU_SSE4_1
)
&&
useSSE
)
{
__m128i
v_shuffle
[
2
];
v_shuffle
[
0
]
=
_mm_set_epi8
(
0x8
,
0x7
,
0x7
,
0x6
,
0x6
,
0x5
,
0x5
,
0x4
,
0x4
,
0x3
,
0x3
,
0x2
,
0x2
,
0x1
,
0x1
,
0x0
);
v_shuffle
[
1
]
=
_mm_set_epi8
(
0xf
,
0xc
,
0xc
,
0xc
,
0x9
,
0x9
,
0x9
,
0x6
,
0x6
,
0x6
,
0x3
,
0x3
,
0x3
,
0x0
,
0x0
,
0x0
);
__m128i
v_coeffs
[
3
];
v_coeffs
[
0
]
=
_mm_set_epi16
((
short
)
C0
,
0
,
0
,
(
short
)
C3
,
(
short
)
C2
,
(
short
)
C1
,
(
short
)
C0
,
0
);
v_coeffs
[
1
]
=
_mm_set_epi16
((
short
)
C2
,
(
short
)
C1
,
(
short
)
C0
,
0
,
0
,
(
short
)
C3
,
(
short
)
C2
,
(
short
)
C1
);
v_coeffs
[
2
]
=
_mm_set_epi16
(
0
,
(
short
)
C3
,
(
short
)
C2
,
(
short
)
C1
,
(
short
)
C0
,
0
,
0
,
(
short
)
C3
);
if
(
dcn
==
3
)
{
if
(
bidx
==
0
)
{
__m128i
v_shuffle_dst
=
_mm_set_epi8
(
0xf
,
0xc
,
0xd
,
0xe
,
0x9
,
0xa
,
0xb
,
0x6
,
0x7
,
0x8
,
0x3
,
0x4
,
0x5
,
0x0
,
0x1
,
0x2
);
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128i
v_src
[
2
];
v_src
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
));
v_src
[
1
]
=
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
i
+
16
));
process
(
v_src
,
v_shuffle
,
v_coeffs
);
__m128i
v_dst
[
2
];
v_dst
[
0
]
=
_mm_shuffle_epi8
(
v_src
[
0
],
v_shuffle_dst
);
v_dst
[
1
]
=
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
15
),
v_shuffle_dst
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
_mm_alignr_epi8
(
v_dst
[
1
],
_mm_slli_si128
(
v_dst
[
0
],
1
),
1
));
_mm_storel_epi64
((
__m128i
*
)(
dst
+
16
),
_mm_srli_si128
(
v_dst
[
1
],
1
));
}
}
else
{
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128i
v_src
[
2
];
v_src
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
));
v_src
[
1
]
=
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
i
+
16
));
process
(
v_src
,
v_shuffle
,
v_coeffs
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
v_src
[
0
]);
_mm_storel_epi64
((
__m128i
*
)(
dst
+
16
),
v_src
[
1
]);
}
}
}
else
{
if
(
bidx
==
0
)
{
__m128i
v_shuffle_dst
=
_mm_set_epi8
(
0x0
,
0xa
,
0xb
,
0xc
,
0x0
,
0x7
,
0x8
,
0x9
,
0x0
,
0x4
,
0x5
,
0x6
,
0x0
,
0x1
,
0x2
,
0x3
);
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128i
v_src
[
2
];
v_src
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
));
v_src
[
1
]
=
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
i
+
16
));
process
(
v_src
,
v_shuffle
,
v_coeffs
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
0
],
v_alpha
,
15
),
v_shuffle_dst
));
_mm_storeu_si128
((
__m128i
*
)(
dst
+
16
),
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
12
),
v_alpha
,
15
),
v_shuffle_dst
));
}
}
else
{
__m128i
v_shuffle_dst
=
_mm_set_epi8
(
0x0
,
0xc
,
0xb
,
0xa
,
0x0
,
0x9
,
0x8
,
0x7
,
0x0
,
0x6
,
0x5
,
0x4
,
0x0
,
0x3
,
0x2
,
0x1
);
for
(
;
i
<=
n
-
24
;
i
+=
24
,
dst
+=
dcn
*
8
)
{
__m128i
v_src
[
2
];
v_src
[
0
]
=
_mm_loadu_si128
((
__m128i
const
*
)(
src
+
i
));
v_src
[
1
]
=
_mm_loadl_epi64
((
__m128i
const
*
)(
src
+
i
+
16
));
process
(
v_src
,
v_shuffle
,
v_coeffs
);
_mm_storeu_si128
((
__m128i
*
)(
dst
),
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
v_src
[
0
],
v_alpha
,
15
),
v_shuffle_dst
));
_mm_storeu_si128
((
__m128i
*
)(
dst
+
16
),
_mm_shuffle_epi8
(
_mm_alignr_epi8
(
_mm_alignr_epi8
(
v_src
[
1
],
v_src
[
0
],
12
),
v_alpha
,
15
),
v_shuffle_dst
));
}
}
}
}
else
#endif // CV_SSE4_1
if
(
haveSIMD
&&
useSSE
)
{
for
(
;
i
<=
n
-
96
;
i
+=
96
,
dst
+=
dcn
*
32
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录