Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
8c91a1af
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
8c91a1af
编写于
12月 19, 2013
作者:
A
Andrey Pavlenko
提交者:
OpenCV Buildbot
12月 19, 2013
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2005 from krodyush:pullreq/2.4-opt-131114-extractCovData
上级
22a3cf0f
de431609
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
417 addition
and
128 deletion
+417
-128
modules/ocl/src/imgproc.cpp
modules/ocl/src/imgproc.cpp
+103
-53
modules/ocl/src/opencl/imgproc_sobel3.cl
modules/ocl/src/opencl/imgproc_sobel3.cl
+314
-75
未找到文件。
modules/ocl/src/imgproc.cpp
浏览文件 @
8c91a1af
...
...
@@ -1033,67 +1033,117 @@ namespace cv
else
scale
=
1.
/
scale
;
if
(
ksize
>
0
)
const
int
sobel_lsz
=
16
;
if
((
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_32FC1
)
&&
(
ksize
==
3
||
ksize
==
5
||
ksize
==
7
||
ksize
==-
1
)
&&
src
.
wholerows
>
sobel_lsz
+
(
ksize
>>
1
)
&&
src
.
wholecols
>
sobel_lsz
+
(
ksize
>>
1
))
{
Context
*
clCxt
=
Context
::
getContext
();
if
(
clCxt
->
supportsFeature
(
FEATURE_CL_INTEL_DEVICE
)
&&
src
.
type
()
==
CV_8UC1
&&
src
.
cols
%
8
==
0
&&
src
.
rows
%
8
==
0
&&
ksize
==
3
&&
(
borderType
==
cv
::
BORDER_REFLECT
||
borderType
==
cv
::
BORDER_REPLICATE
||
borderType
==
cv
::
BORDER_REFLECT101
||
borderType
==
cv
::
BORDER_WRAP
))
Dx
.
create
(
src
.
size
(),
CV_32FC1
);
Dy
.
create
(
src
.
size
(),
CV_32FC1
);
CV_Assert
(
Dx
.
rows
==
Dy
.
rows
&&
Dx
.
cols
==
Dy
.
cols
);
size_t
lt2
[
3
]
=
{
sobel_lsz
,
sobel_lsz
,
1
};
size_t
gt2
[
3
]
=
{
lt2
[
0
]
*
(
1
+
(
src
.
cols
-
1
)
/
lt2
[
0
]),
lt2
[
1
]
*
(
1
+
(
src
.
rows
-
1
)
/
lt2
[
1
]),
1
};
unsigned
int
src_pitch
=
src
.
step
;
unsigned
int
Dx_pitch
=
Dx
.
step
;
unsigned
int
Dy_pitch
=
Dy
.
step
;
int
src_offset_x
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
src_offset_y
=
src
.
offset
/
src
.
step
;
float
_scale
=
scale
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
src_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset_x
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset_y
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dx
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
offset
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
Dx_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dy
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dy
.
offset
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
Dy_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
wholecols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
wholerows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
Dx
.
rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
_scale
));
string
option
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d"
,(
int
)
lt2
[
0
],(
int
)
lt2
[
1
]);
switch
(
src
.
type
())
{
Dx
.
create
(
src
.
size
(),
CV_32FC1
);
Dy
.
create
(
src
.
size
(),
CV_32FC1
);
const
unsigned
int
block_x
=
8
;
const
unsigned
int
block_y
=
8
;
unsigned
int
src_pitch
=
src
.
step
;
unsigned
int
dst_pitch
=
Dx
.
cols
;
float
_scale
=
scale
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dx
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
Dy
.
data
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
src_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_uint
)
,
(
void
*
)
&
dst_pitch
));
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_float
)
,
(
void
*
)
&
_scale
));
size_t
gt2
[
3
]
=
{
src
.
cols
,
src
.
rows
,
1
},
lt2
[
3
]
=
{
block_x
,
block_y
,
1
};
string
option
=
"-D BLK_X=8 -D BLK_Y=8"
;
switch
(
borderType
)
{
case
cv
::
BORDER_REPLICATE
:
option
+=
" -D BORDER_REPLICATE"
;
break
;
case
cv
::
BORDER_REFLECT
:
option
+=
" -D BORDER_REFLECT"
;
break
;
case
cv
::
BORDER_REFLECT101
:
option
+=
" -D BORDER_REFLECT101"
;
break
;
case
cv
::
BORDER_WRAP
:
option
+=
" -D BORDER_WRAP"
;
break
;
}
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_sobel3
,
"sobel3"
,
gt2
,
lt2
,
args
,
-
1
,
-
1
,
option
.
c_str
()
);
case
CV_8UC1
:
option
+=
" -D SRCTYPE=uchar"
;
break
;
case
CV_32FC1
:
option
+=
" -D SRCTYPE=float"
;
break
;
}
else
switch
(
borderType
)
{
Sobel
(
src
,
Dx
,
CV_32F
,
1
,
0
,
ksize
,
scale
,
0
,
borderType
);
Sobel
(
src
,
Dy
,
CV_32F
,
0
,
1
,
ksize
,
scale
,
0
,
borderType
);
case
cv
::
BORDER_CONSTANT
:
option
+=
" -D BORDER_CONSTANT"
;
break
;
case
cv
::
BORDER_REPLICATE
:
option
+=
" -D BORDER_REPLICATE"
;
break
;
case
cv
::
BORDER_REFLECT
:
option
+=
" -D BORDER_REFLECT"
;
break
;
case
cv
::
BORDER_REFLECT101
:
option
+=
" -D BORDER_REFLECT_101"
;
break
;
case
cv
::
BORDER_WRAP
:
option
+=
" -D BORDER_WRAP"
;
break
;
default:
CV_Error
(
CV_StsBadFlag
,
"BORDER type is not supported!"
);
break
;
}
string
kernel_name
;
switch
(
ksize
)
{
case
-
1
:
option
+=
" -D SCHARR"
;
kernel_name
=
"sobel3"
;
break
;
case
3
:
kernel_name
=
"sobel3"
;
break
;
case
5
:
kernel_name
=
"sobel5"
;
break
;
case
7
:
kernel_name
=
"sobel7"
;
break
;
default:
CV_Error
(
CV_StsBadFlag
,
"Kernel size is not supported!"
);
break
;
}
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_sobel3
,
kernel_name
,
gt2
,
lt2
,
args
,
-
1
,
-
1
,
option
.
c_str
()
);
}
else
{
Scharr
(
src
,
Dx
,
CV_32F
,
1
,
0
,
scale
,
0
,
borderType
);
Scharr
(
src
,
Dy
,
CV_32F
,
0
,
1
,
scale
,
0
,
borderType
);
if
(
ksize
>
0
)
{
Sobel
(
src
,
Dx
,
CV_32F
,
1
,
0
,
ksize
,
scale
,
0
,
borderType
);
Sobel
(
src
,
Dy
,
CV_32F
,
0
,
1
,
ksize
,
scale
,
0
,
borderType
);
}
else
{
Scharr
(
src
,
Dx
,
CV_32F
,
1
,
0
,
scale
,
0
,
borderType
);
Scharr
(
src
,
Dy
,
CV_32F
,
0
,
1
,
scale
,
0
,
borderType
);
}
}
CV_Assert
(
Dx
.
offset
==
0
&&
Dy
.
offset
==
0
);
}
...
...
modules/ocl/src/opencl/imgproc_sobel3.cl
浏览文件 @
8c91a1af
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro
for
border
type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#
ifdef
BORDER_REPLICATE
//BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
t_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
b_edge
)
-1
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT
//BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-1+
((
b_edge
)
<<1
)
:
(
addr
))
#
ifdef
BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#
define
EXTRAPOLATE
(
x,
maxV
)
#
elif
defined
BORDER_REPLICATE
//aaaaaa|abcdefgh|hhhhhhh
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
max
(
min
((
x
)
,
(
maxV
)
-
1
)
,
0
)
; \
}
#
elif
defined
BORDER_WRAP
//cdefgh|abcdefgh|abcdefg
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
(
(
x
)
+
(
maxV
)
)
%
(
maxV
)
; \
}
#
elif
defined
BORDER_REFLECT
//fedcba|abcdefgh|hgfedcb
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
min
(
mad24
((
maxV
)
-1
,
2
,
-
(
x
))
+1
,
max
((
x
)
,
-
(
x
)
-1
)
)
; \
}
#
elif
defined
BORDER_REFLECT_101
//gfedcb|abcdefgh|gfedcba
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
min
(
mad24
((
maxV
)
-1
,
2
,
-
(
x
))
,
max
((
x
)
,
-
(
x
))
)
; \
}
#
else
#
error
No
extrapolation
method
#
endif
#
ifdef
BORDER_REFLECT101
//BORDER_REFLECT101:
gfedcb|abcdefgh|gfedcba
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-2+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
#
define
SRC
(
_x,_y
)
convert_float
(((
global
SRCTYPE*
)(
Src+
(
_y
)
*SrcPitch
))
[_x]
)
#
ifdef
BORDER_WRAP
//BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
i
)
+
(
b_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
i
)
-
(
b_edge
)
:
(
addr
))
#
ifdef
BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#
define
ELEM
(
_x,_y,r_edge,t_edge,const_v
)
(
_x
)
<0
| (_x) >= (r_edge) |
(
_y
)
<0
| (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif
#define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
#define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
#define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
int srcX = x + srcOffsetX - (kernel_border); \
int srcY = y + srcOffsetY - (kernel_border); \
int xb = srcX; \
int yb = srcY; \
\
EXTRAPOLATE(xb, (width)); \
EXTRAPOLATE(yb, (height)); \
lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
\
if(lix < ((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
EXTRAPOLATE(xb,(width)); \
lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(liy< ((kernel_border)*2)) \
{ \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(yb, (height)); \
lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(xb,(width)); \
EXTRAPOLATE(yb,(height)); \
lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
}
__kernel void sobel3(
__global uchar* Src,
__global
float*
DstX,
__global
float*
DstY,
int
width,
int
height,
uint
srcStride,
uint
dstStride,
float
scale
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+2][BLK_X+2];
...
...
@@ -47,62 +99,249 @@ __kernel void sobel3(
int lix = get_local_id(0);
int liy = get_local_id(1);
int
gix
=
get_group_id
(
0
)
;
int
giy
=
get_group_id
(
1
)
;
int
id_x
=
get_global_id
(
0
)
;
int
id_y
=
get_global_id
(
1
)
;
lsmem[liy+1][lix+1]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x
]
)
;
int
id_y_h
=
ADDR_H
(
id_y-1,
0
,
height
)
;
int
id_y_b
=
ADDR_B
(
id_y+1,
height,id_y+1
)
;
int
id_x_l
=
ADDR_L
(
id_x-1,
0
,
width
)
;
int
id_x_r
=
ADDR_R
(
id_x+1,
width,id_x+1
)
;
if
(
liy==0
)
{
lsmem[0][lix+1]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x
]
)
;
if
(
lix==0
)
lsmem[0][0]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[0][BLK_X+1]=convert_float
(
Src[
id_y_h
*
srcStride
+
id_x_r
]
)
;
}
else
if
(
liy==BLK_Y-1
)
{
lsmem[BLK_Y+1][lix+1]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x
]
)
;
if
(
lix==0
)
lsmem[BLK_Y+1][0]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[BLK_Y+1][BLK_X+1]=convert_float
(
Src[
id_y_b
*
srcStride
+
id_x_r
]
)
;
}
if
(
lix==0
)
lsmem[liy+1][0]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x_l
]
)
;
else
if
(
lix==BLK_X-1
)
lsmem[liy+1][BLK_X+1]
=
convert_float
(
Src[
id_y
*
srcStride
+
id_x_r
]
)
;
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float u1 = lsmem[liy][lix];
float u2 = lsmem[liy][lix+1];
float u3 = lsmem[liy][lix+2];
float m1 = lsmem[liy+1][lix];
float
m2
=
lsmem[liy+1][lix+1]
;
float m3 = lsmem[liy+1][lix+2];
float b1 = lsmem[liy+2][lix];
float b2 = lsmem[liy+2][lix+1];
float b3 = lsmem[liy+2][lix+2];
//m2
*
scale
;//
float
dx
=
mad
(
2.0f,
m3
-
m1,
u3
-
u1
+
b3
-
b1
)
;
DstX[
id_y
*
dstStride
+
id_x
]
=
dx
*
scale
;
//calc and store dx and dy;//
#ifdef SCHARR
DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
#else
DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
#endif
}
__kernel void sobel5(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+4][BLK_X+4];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float t1 = lsmem[liy][lix];
float t2 = lsmem[liy][lix+1];
float t3 = lsmem[liy][lix+2];
float t4 = lsmem[liy][lix+3];
float t5 = lsmem[liy][lix+4];
float u1 = lsmem[liy+1][lix];
float u2 = lsmem[liy+1][lix+1];
float u3 = lsmem[liy+1][lix+2];
float u4 = lsmem[liy+1][lix+3];
float u5 = lsmem[liy+1][lix+4];
float m1 = lsmem[liy+2][lix];
float m2 = lsmem[liy+2][lix+1];
float m4 = lsmem[liy+2][lix+3];
float m5 = lsmem[liy+2][lix+4];
float l1 = lsmem[liy+3][lix];
float l2 = lsmem[liy+3][lix+1];
float l3 = lsmem[liy+3][lix+2];
float l4 = lsmem[liy+3][lix+3];
float l5 = lsmem[liy+3][lix+4];
float b1 = lsmem[liy+4][lix];
float b2 = lsmem[liy+4][lix+1];
float b3 = lsmem[liy+4][lix+2];
float b4 = lsmem[liy+4][lix+3];
float b5 = lsmem[liy+4][lix+4];
//calc and store dx and dy;//
DSTX(x,y) = scale *
mad(12.0f, m4 - m2,
mad(6.0f, m5 - m1,
mad(8.0f, u4 - u2 + l4 - l2,
mad(4.0f, u5 - u1 + l5 - l1,
mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
)
)
)
);
DSTY(x,y) = scale *
mad(12.0f, l3 - u3,
mad(6.0f, b3 - t3,
mad(8.0f, l2 - u2 + l4 - u4,
mad(4.0f, b2 - t2 + b4 - t4,
mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
)
)
)
);
}
__kernel void sobel7(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+6][BLK_X+6];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth |
|
y
>=dstHeight
)
return
;
float
tt1
=
lsmem[liy][lix]
;
float
tt2
=
lsmem[liy][lix+1]
;
float
tt3
=
lsmem[liy][lix+2]
;
float
tt4
=
lsmem[liy][lix+3]
;
float
tt5
=
lsmem[liy][lix+4]
;
float
tt6
=
lsmem[liy][lix+5]
;
float
tt7
=
lsmem[liy][lix+6]
;
float
t1
=
lsmem[liy+1][lix]
;
float
t2
=
lsmem[liy+1][lix+1]
;
float
t3
=
lsmem[liy+1][lix+2]
;
float
t4
=
lsmem[liy+1][lix+3]
;
float
t5
=
lsmem[liy+1][lix+4]
;
float
t6
=
lsmem[liy+1][lix+5]
;
float
t7
=
lsmem[liy+1][lix+6]
;
float
u1
=
lsmem[liy+2][lix]
;
float
u2
=
lsmem[liy+2][lix+1]
;
float
u3
=
lsmem[liy+2][lix+2]
;
float
u4
=
lsmem[liy+2][lix+3]
;
float
u5
=
lsmem[liy+2][lix+4]
;
float
u6
=
lsmem[liy+2][lix+5]
;
float
u7
=
lsmem[liy+2][lix+6]
;
float
m1
=
lsmem[liy+3][lix]
;
float
m2
=
lsmem[liy+3][lix+1]
;
float
m3
=
lsmem[liy+3][lix+2]
;
float
m5
=
lsmem[liy+3][lix+4]
;
float
m6
=
lsmem[liy+3][lix+5]
;
float
m7
=
lsmem[liy+3][lix+6]
;
float
l1
=
lsmem[liy+4][lix]
;
float
l2
=
lsmem[liy+4][lix+1]
;
float
l3
=
lsmem[liy+4][lix+2]
;
float
l4
=
lsmem[liy+4][lix+3]
;
float
l5
=
lsmem[liy+4][lix+4]
;
float
l6
=
lsmem[liy+4][lix+5]
;
float
l7
=
lsmem[liy+4][lix+6]
;
float
b1
=
lsmem[liy+5][lix]
;
float
b2
=
lsmem[liy+5][lix+1]
;
float
b3
=
lsmem[liy+5][lix+2]
;
float
b4
=
lsmem[liy+5][lix+3]
;
float
b5
=
lsmem[liy+5][lix+4]
;
float
b6
=
lsmem[liy+5][lix+5]
;
float
b7
=
lsmem[liy+5][lix+6]
;
float
bb1
=
lsmem[liy+6][lix]
;
float
bb2
=
lsmem[liy+6][lix+1]
;
float
bb3
=
lsmem[liy+6][lix+2]
;
float
bb4
=
lsmem[liy+6][lix+3]
;
float
bb5
=
lsmem[liy+6][lix+4]
;
float
bb6
=
lsmem[liy+6][lix+5]
;
float
bb7
=
lsmem[liy+6][lix+6]
;
//calc
and
store
dx
and
dy
DSTX
(
x,y
)
=
scale
*
mad
(
100.0f,
m5
-
m3,
mad
(
80.0f,
m6
-
m2,
mad
(
20.0f,
m7
-
m1,
mad
(
75.0f,
u5
-
u3
+
l5
-
l3,
mad
(
60.0f,
u6
-
u2
+
l6
-
l2,
mad
(
15.0f,
u7
-
u1
+
l7
-
l1,
mad
(
30.0f,
t5
-
t3
+
b5
-
b3,
mad
(
24.0f,
t6
-
t2
+
b6
-
b2,
mad
(
6.0f,
t7
-
t1
+
b7
-
b1,
mad
(
5.0f,
tt5
-
tt3
+
bb5
-
bb3,
mad
(
4.0f,
tt6
-
tt2
+
bb6
-
bb2,
tt7
-
tt1
+
bb7
-
bb1
)
)
)
)
)
)
)
)
)
)
)
;
float
dy
=
mad
(
2.0f,
b2
-
u2,
b1
-
u1
+
b3
-
u3
)
;
DstY[
id_y
*
dstStride
+
id_x
]
=
dy
*
scale
;
}
\ No newline at end of file
DSTY
(
x,y
)
=
scale
*
mad
(
100.0f,
l4
-
u4,
mad
(
80.0f,
b4
-
t4,
mad
(
20.0f,
bb4
-
tt4,
mad
(
75.0f,
l5
-
u5
+
l3
-
u3,
mad
(
60.0f,
b5
-
t5
+
b3
-
t3,
mad
(
15.0f,
bb5
-
tt5
+
bb3
-
tt3,
mad
(
30.0f,
l6
-
u6
+
l2
-
u2,
mad
(
24.0f,
b6
-
t6
+
b2
-
t2,
mad
(
6.0f,
bb6
-
tt6
+
bb2
-
tt2,
mad
(
5.0f,
l7
-
u7
+
l1
-
u1,
mad
(
4.0f,
b7
-
t7
+
b1
-
t1,
bb7
-
tt7
+
bb1
-
tt1
)
)
)
)
)
)
)
)
)
)
)
;
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录