Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
1466621f
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1466621f
编写于
10月 27, 2014
作者:
A
Alexander Karsakov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Added loading 4 pixels in line instead of 2 to RGB[A] -> YUV(420) kernel
上级
60367907
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
71 addition
and
8 deletion
+71
-8
modules/imgproc/src/color.cpp
modules/imgproc/src/color.cpp
+14
-3
modules/imgproc/src/opencl/cvtcolor.cl
modules/imgproc/src/opencl/cvtcolor.cl
+57
-5
未找到文件。
modules/imgproc/src/color.cpp
浏览文件 @
1466621f
...
...
@@ -4857,6 +4857,7 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
pxPerWIy
=
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
)
?
4
:
1
;
int
pxPerWIx
=
1
;
size_t
globalsize
[]
=
{
src
.
cols
,
(
src
.
rows
+
pxPerWIy
-
1
)
/
pxPerWIy
};
cv
::
String
opts
=
format
(
"-D depth=%d -D scn=%d -D PIX_PER_WI_Y=%d "
,
...
...
@@ -5025,10 +5026,20 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert
(
sz
.
width
%
2
==
0
&&
sz
.
height
%
2
==
0
);
dstSz
=
Size
(
sz
.
width
,
sz
.
height
/
2
*
3
);
globalsize
[
0
]
=
dstSz
.
width
/
2
;
globalsize
[
1
]
=
(
dstSz
.
height
/
3
+
pxPerWIy
-
1
)
/
pxPerWIy
;
_dst
.
create
(
dstSz
,
CV_MAKETYPE
(
depth
,
dcn
));
dst
=
_dst
.
getUMat
();
if
(
dev
.
isIntel
()
&&
src
.
cols
%
4
==
0
&&
src
.
step
%
4
==
0
&&
src
.
offset
%
4
==
0
&&
dst
.
step
%
4
==
0
&&
dst
.
offset
%
4
==
0
)
{
pxPerWIx
=
2
;
}
globalsize
[
0
]
=
dstSz
.
width
/
(
2
*
pxPerWIx
);
globalsize
[
1
]
=
(
dstSz
.
height
/
3
+
pxPerWIy
-
1
)
/
pxPerWIy
;
k
.
create
(
"RGB2YUV_YV12_IYUV"
,
ocl
::
imgproc
::
cvtcolor_oclsrc
,
opts
+
format
(
"-D dcn=%d -D bidx=%d -D uidx=%d"
,
dcn
,
bidx
,
uidx
));
break
;
opts
+
format
(
"-D dcn=%d -D bidx=%d -D uidx=%d -D PIX_PER_WI_X=%d"
,
dcn
,
bidx
,
uidx
,
pxPerWIx
));
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
::
WriteOnly
(
dst
));
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
case
COLOR_YUV2RGB_UYVY
:
case
COLOR_YUV2BGR_UYVY
:
case
COLOR_YUV2RGBA_UYVY
:
case
COLOR_YUV2BGRA_UYVY
:
case
COLOR_YUV2RGB_YUY2
:
case
COLOR_YUV2BGR_YUY2
:
case
COLOR_YUV2RGB_YVYU
:
case
COLOR_YUV2BGR_YVYU
:
...
...
modules/imgproc/src/opencl/cvtcolor.cl
浏览文件 @
1466621f
...
...
@@ -119,6 +119,10 @@ enum
#
define
yidx
0
#
endif
#
ifndef
PIX_PER_WI_X
#
define
PIX_PER_WI_X
1
#
endif
#
define
__CAT
(
x,
y
)
x##y
#
define
CAT
(
x,
y
)
__CAT
(
x,
y
)
...
...
@@ -454,7 +458,7 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int
__global uchar* dstptr, int dst_step, int dst_offset,
int rows, int cols)
{
int x = get_global_id(0);
int x = get_global_id(0)
* PIX_PER_WI_X
;
int y = get_global_id(1) * PIX_PER_WI_Y;
if (x < cols/2)
...
...
@@ -463,6 +467,7 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int
int ydst_index = mad24(y << 1, dst_step, (x << 1) + dst_offset);
int y_rows = rows / 3 * 2;
int vsteps[2] = { cols >> 1, dst_step - (cols >> 1)};
__constant float* coeffs = c_RGB2YUVCoeffs_420;
#pragma unroll
for (int cy = 0; cy < PIX_PER_WI_Y; ++cy)
...
...
@@ -477,12 +482,61 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int
__global uchar* udst = dstptr + mad24(y_rows + (y>>1), dst_step, dst_offset + (y%2)*(cols >> 1) + x);
__global uchar* vdst = udst + mad24(y_rows >> 2, dst_step, y_rows % 4 ? vsteps[y%2] : 0);
#if PIX_PER_WI_X == 2
int s11 = *((__global const int*) src1);
int s12 = *((__global const int*) src1 + 1);
int s13 = *((__global const int*) src1 + 2);
#if scn == 4
int s14 = *((__global const int*) src1 + 3);
#endif
int s21 = *((__global const int*) src2);
int s22 = *((__global const int*) src2 + 1);
int s23 = *((__global const int*) src2 + 2);
#if scn == 4
int s24 = *((__global const int*) src2 + 3);
#endif
float src_pix1[scn * 4], src_pix2[scn * 4];
*((float4*) src_pix1) = convert_float4(as_uchar4(s11));
*((float4*) src_pix1 + 1) = convert_float4(as_uchar4(s12));
*((float4*) src_pix1 + 2) = convert_float4(as_uchar4(s13));
#if scn == 4
*((float4*) src_pix1 + 3) = convert_float4(as_uchar4(s14));
#endif
*((float4*) src_pix2) = convert_float4(as_uchar4(s21));
*((float4*) src_pix2 + 1) = convert_float4(as_uchar4(s22));
*((float4*) src_pix2 + 2) = convert_float4(as_uchar4(s23));
#if scn == 4
*((float4*) src_pix2 + 3) = convert_float4(as_uchar4(s24));
#endif
uchar4 y1, y2;
y1.x = convert_uchar_sat(fma(coeffs[0], src_pix1[ 2-bidx], fma(coeffs[1], src_pix1[ 1], fma(coeffs[2], src_pix1[ bidx], 16.5f))));
y1.y = convert_uchar_sat(fma(coeffs[0], src_pix1[ scn+2-bidx], fma(coeffs[1], src_pix1[ scn+1], fma(coeffs[2], src_pix1[ scn+bidx], 16.5f))));
y1.z = convert_uchar_sat(fma(coeffs[0], src_pix1[2*scn+2-bidx], fma(coeffs[1], src_pix1[2*scn+1], fma(coeffs[2], src_pix1[2*scn+bidx], 16.5f))));
y1.w = convert_uchar_sat(fma(coeffs[0], src_pix1[3*scn+2-bidx], fma(coeffs[1], src_pix1[3*scn+1], fma(coeffs[2], src_pix1[3*scn+bidx], 16.5f))));
y2.x = convert_uchar_sat(fma(coeffs[0], src_pix2[ 2-bidx], fma(coeffs[1], src_pix2[ 1], fma(coeffs[2], src_pix2[ bidx], 16.5f))));
y2.y = convert_uchar_sat(fma(coeffs[0], src_pix2[ scn+2-bidx], fma(coeffs[1], src_pix2[ scn+1], fma(coeffs[2], src_pix2[ scn+bidx], 16.5f))));
y2.z = convert_uchar_sat(fma(coeffs[0], src_pix2[2*scn+2-bidx], fma(coeffs[1], src_pix2[2*scn+1], fma(coeffs[2], src_pix2[2*scn+bidx], 16.5f))));
y2.w = convert_uchar_sat(fma(coeffs[0], src_pix2[3*scn+2-bidx], fma(coeffs[1], src_pix2[3*scn+1], fma(coeffs[2], src_pix2[3*scn+bidx], 16.5f))));
*((__global int*) ydst1) = as_int(y1);
*((__global int*) ydst2) = as_int(y2);
float uv[4] = { fma(coeffs[3], src_pix1[ 2-bidx], fma(coeffs[4], src_pix1[ 1], fma(coeffs[5], src_pix1[ bidx], 128.5f))),
fma(coeffs[5], src_pix1[ 2-bidx], fma(coeffs[6], src_pix1[ 1], fma(coeffs[7], src_pix1[ bidx], 128.5f))),
fma(coeffs[3], src_pix1[2*scn+2-bidx], fma(coeffs[4], src_pix1[2*scn+1], fma(coeffs[5], src_pix1[2*scn+bidx], 128.5f))),
fma(coeffs[5], src_pix1[2*scn+2-bidx], fma(coeffs[6], src_pix1[2*scn+1], fma(coeffs[7], src_pix1[2*scn+bidx], 128.5f))) };
udst[0] = convert_uchar_sat(uv[uidx] );
vdst[0] = convert_uchar_sat(uv[1 - uidx]);
udst[1] = convert_uchar_sat(uv[2 + uidx]);
vdst[1] = convert_uchar_sat(uv[3 - uidx]);
#else
float4 src_pix1 = convert_float4(vload4(0, src1));
float4 src_pix2 = convert_float4(vload4(0, src1+scn));
float4 src_pix3 = convert_float4(vload4(0, src2));
float4 src_pix4 = convert_float4(vload4(0, src2+scn));
__constant float* coeffs = c_RGB2YUVCoeffs_420;
ydst1[0] = convert_uchar_sat(fma(coeffs[0], src_pix1.R_COMP, fma(coeffs[1], src_pix1.G_COMP, fma(coeffs[2], src_pix1.B_COMP, 16.5f))));
ydst1[1] = convert_uchar_sat(fma(coeffs[0], src_pix2.R_COMP, fma(coeffs[1], src_pix2.G_COMP, fma(coeffs[2], src_pix2.B_COMP, 16.5f))));
ydst2[0] = convert_uchar_sat(fma(coeffs[0], src_pix3.R_COMP, fma(coeffs[1], src_pix3.G_COMP, fma(coeffs[2], src_pix3.B_COMP, 16.5f))));
...
...
@@ -493,7 +547,7 @@ __kernel void RGB2YUV_YV12_IYUV(__global const uchar* srcptr, int src_step, int
udst[0] = convert_uchar_sat(uv[uidx] );
vdst[0] = convert_uchar_sat(uv[1-uidx]);
#endif
++y;
src_index += 2*src_step;
ydst_index += 2*dst_step;
...
...
@@ -522,7 +576,6 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
float U = ((float) src[uidx]) - HALF_MAX;
float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX;
__constant float* coeffs = c_YUV2RGBCoeffs_420;
float ruv = fma(coeffs[4], V, 0.5f);
float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f));
...
...
@@ -535,7 +588,6 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of
#if dcn == 4
dst[3] = 255;
#endif
float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0];
dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv);
dst[dcn + 1] = convert_uchar_sat(y01 + guv);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录