Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
e75cd74f
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e75cd74f
编写于
6月 30, 2014
作者:
V
vbystricky
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimize OpenCL version of Laplacian filter for kernel size great than 3
上级
3b59a105
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
271 addition
and
4 deletion
+271
-4
modules/imgproc/src/deriv.cpp
modules/imgproc/src/deriv.cpp
+86
-4
modules/imgproc/src/opencl/laplacian5.cl
modules/imgproc/src/opencl/laplacian5.cl
+185
-0
未找到文件。
modules/imgproc/src/deriv.cpp
浏览文件 @
e75cd74f
...
...
@@ -643,21 +643,103 @@ void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy,
namespace
cv
{
#define LAPLACIAN_LOCAL_MEM(tileX, tileY, ksize, elsize) (((tileX) + 2 * (int)((ksize) / 2)) * (3 * (tileY) + 2 * (int)((ksize) / 2)) * elsize)
static
bool
ocl_Laplacian5
(
InputArray
_src
,
OutputArray
_dst
,
const
Mat
&
kd
,
const
Mat
&
ks
,
double
scale
,
double
delta
,
int
borderType
,
int
depth
,
int
ddepth
)
{
const
size_t
tileSizeX
=
16
;
const
size_t
tileSizeYmin
=
8
;
const
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
stype
=
_src
.
type
();
int
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
),
esz
=
CV_ELEM_SIZE
(
stype
);
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
if
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
return
false
;
Mat
kernelX
=
kd
.
reshape
(
1
,
1
);
if
(
kernelX
.
cols
%
2
!=
1
)
return
false
;
Mat
kernelY
=
ks
.
reshape
(
1
,
1
);
if
(
kernelY
.
cols
%
2
!=
1
)
return
false
;
CV_Assert
(
kernelX
.
cols
==
kernelY
.
cols
);
size_t
wgs
=
dev
.
maxWorkGroupSize
();
size_t
lmsz
=
dev
.
localMemSize
();
size_t
src_step
=
_src
.
step
(),
src_offset
=
_src
.
offset
();
if
(((
src_offset
%
src_step
)
%
esz
==
0
)
&&
(
(
borderType
==
BORDER_CONSTANT
||
borderType
==
BORDER_REPLICATE
)
||
(
borderType
==
BORDER_REFLECT
||
borderType
==
BORDER_WRAP
||
borderType
==
BORDER_REFLECT_101
)
&&
(
_src
.
cols
()
>=
kernelX
.
cols
&&
_src
.
rows
()
>=
kernelY
.
cols
)
)
&&
(
tileSizeX
*
tileSizeYmin
<=
wgs
)
&&
(
LAPLACIAN_LOCAL_MEM
(
tileSizeX
,
tileSizeYmin
,
kernelX
.
cols
,
cn
*
4
)
<=
lmsz
)
)
{
Size
size
=
_src
.
size
(),
wholeSize
;
Point
origin
;
int
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
int
wdepth
=
CV_32F
;
size_t
tileSizeY
=
wgs
/
tileSizeX
;
while
((
tileSizeX
*
tileSizeY
>
wgs
)
||
(
LAPLACIAN_LOCAL_MEM
(
tileSizeX
,
tileSizeY
,
kernelX
.
cols
,
cn
*
4
)
>
lmsz
))
{
tileSizeY
/=
2
;
}
size_t
lt2
[
2
]
=
{
tileSizeX
,
tileSizeY
};
size_t
gt2
[
2
]
=
{
lt2
[
0
]
*
(
1
+
(
size
.
width
-
1
)
/
lt2
[
0
]),
lt2
[
1
]
};
char
cvt
[
2
][
40
];
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
};
String
opts
=
cv
::
format
(
"-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s"
" -D convertToWT=%s -D convertToDT=%s"
" -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s -D CN=%d "
,
(
int
)
lt2
[
0
],
(
int
)
lt2
[
1
],
kernelX
.
cols
/
2
,
ocl
::
kernelToStr
(
kernelX
,
wdepth
,
"KERNEL_MATRIX_X"
).
c_str
(),
ocl
::
kernelToStr
(
kernelY
,
wdepth
,
"KERNEL_MATRIX_Y"
).
c_str
(),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
borderMap
[
borderType
],
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wdepth
),
cn
);
ocl
::
Kernel
k
(
"laplacian"
,
ocl
::
imgproc
::
laplacian5_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
_dst
.
create
(
size
,
dtype
);
UMat
dst
=
_dst
.
getUMat
();
int
src_offset_x
=
static_cast
<
int
>
((
src_offset
%
src_step
)
/
esz
);
int
src_offset_y
=
static_cast
<
int
>
(
src_offset
/
src_step
);
src
.
locateROI
(
wholeSize
,
origin
);
k
.
args
(
ocl
::
KernelArg
::
PtrReadOnly
(
src
),
(
int
)
src_step
,
src_offset_x
,
src_offset_y
,
wholeSize
.
height
,
wholeSize
.
width
,
ocl
::
KernelArg
::
WriteOnly
(
dst
),
static_cast
<
float
>
(
scale
),
static_cast
<
float
>
(
delta
));
return
k
.
run
(
2
,
gt2
,
lt2
,
false
);
}
int
iscale
=
cvRound
(
scale
),
idelta
=
cvRound
(
delta
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
,
floatCoeff
=
std
::
fabs
(
delta
-
idelta
)
>
DBL_EPSILON
||
std
::
fabs
(
scale
-
iscale
)
>
DBL_EPSILON
;
int
cn
=
_src
.
channels
(),
wdepth
=
std
::
max
(
depth
,
floatCoeff
?
CV_32F
:
CV_32S
),
kercn
=
1
;
bool
floatCoeff
=
std
::
fabs
(
delta
-
idelta
)
>
DBL_EPSILON
||
std
::
fabs
(
scale
-
iscale
)
>
DBL_EPSILON
;
int
wdepth
=
std
::
max
(
depth
,
floatCoeff
?
CV_32F
:
CV_32S
),
kercn
=
1
;
if
(
!
doubleSupport
&&
wdepth
==
CV_64F
)
return
false
;
char
cvt
[
2
][
40
];
ocl
::
Kernel
k
(
"sumConvert"
,
ocl
::
imgproc
::
laplacian5_oclsrc
,
format
(
"-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
format
(
"-D ONLY_SUM_CONVERT "
"-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d "
"-D convertToWT=%s -D convertToDT=%s%s"
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
kercn
)),
...
...
modules/imgproc/src/opencl/laplacian5.cl
浏览文件 @
e75cd74f
...
...
@@ -5,8 +5,11 @@
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
define
noconvert
#
ifdef
ONLY_SUM_CONVERT
__kernel
void
sumConvert
(
__global
const
uchar
*
src1ptr,
int
src1_step,
int
src1_offset,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
...
...
@@ -32,3 +35,185 @@ __kernel void sumConvert(__global const uchar * src1ptr, int src1_step, int src1
#
endif
}
}
#
else
#
if
CN==1
#
define
srcT
srcT1
#
define
WT
WT1
#
define
dstT
dstT1
#
else
#
define
__CAT
(
x,
y
)
x##y
#
define
CAT
(
x,
y
)
__CAT
(
x,
y
)
#
define
srcT
CAT
(
srcT1,
CN
)
#
define
WT
CAT
(
WT1,
CN
)
#
define
dstT
CAT
(
dstT1,
CN
)
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro
for
border
type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#
ifdef
BORDER_CONSTANT
//
CCCCCC|abcdefgh|CCCCCCC
#
define
EXTRAPOLATE
(
x,
maxV
)
#
elif
defined
BORDER_REPLICATE
//
aaaaaa|abcdefgh|hhhhhhh
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
max
(
min
((
x
)
,
(
maxV
)
-
1
)
,
0
)
; \
}
#
elif
defined
BORDER_WRAP
//
cdefgh|abcdefgh|abcdefg
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
(
(
x
)
+
(
maxV
)
)
%
(
maxV
)
; \
}
#
elif
defined
BORDER_REFLECT
//
fedcba|abcdefgh|hgfedcb
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
min
(((
maxV
)
-1
)
*2-
(
x
)
+1
,
max
((
x
)
,
-
(
x
)
-1
)
)
; \
}
#
elif
defined
BORDER_REFLECT_101
//
gfedcb|abcdefgh|gfedcba
#
define
EXTRAPOLATE
(
x,
maxV
)
\
{
\
(
x
)
=
min
(((
maxV
)
-1
)
*2-
(
x
)
,
max
((
x
)
,
-
(
x
))
)
; \
}
#
else
#
error
No
extrapolation
method
#
endif
#
if
CN
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
#
define
SRCSIZE
(
int
)
sizeof
(
srcT
)
#
define
DSTSIZE
(
int
)
sizeof
(
dstT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
#
define
SRCSIZE
(
int
)
sizeof
(
srcT1
)
*3
#
define
DSTSIZE
(
int
)
sizeof
(
dstT1
)
*3
#
endif
#
define
SRC
(
_x,_y
)
convertToWT
(
loadpix
(
Src
+
mad24
(
_y,
src_step,
SRCSIZE
*
_x
)))
#
ifdef
BORDER_CONSTANT
//
CCCCCC|abcdefgh|CCCCCCC
#
define
ELEM
(
_x,_y,r_edge,t_edge,const_v
)
(
_x
)
<0
| (_x) >= (r_edge) |
(
_y
)
<0
|
(
_y
)
>=
(
t_edge
)
?
(
const_v
)
:
SRC
((
_x
)
,
(
_y
))
#
else
#
define
ELEM
(
_x,_y,r_edge,t_edge,const_v
)
SRC
((
_x
)
,
(
_y
))
#
endif
//
horizontal
and
vertical
filter
kernels
//
should
be
defined
on
host
during
compile
time
to
avoid
overhead
#
define
DIG
(
a
)
a,
__constant
WT1
mat_kernelX[]
=
{
KERNEL_MATRIX_X
}
;
__constant
WT1
mat_kernelY[]
=
{
KERNEL_MATRIX_Y
}
;
__kernel
void
laplacian
(
__global
uchar*
Src,
int
src_step,
int
srcOffsetX,
int
srcOffsetY,
int
height,
int
width,
__global
uchar*
Dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols,
WT1
scale,
WT1
delta
)
{
__local
WT
lsmem[BLK_Y
+
2
*
RADIUS][BLK_X
+
2
*
RADIUS]
;
__local
WT
lsmemDy1[BLK_Y][BLK_X
+
2
*
RADIUS]
;
__local
WT
lsmemDy2[BLK_Y][BLK_X
+
2
*
RADIUS]
;
int
lix
=
get_local_id
(
0
)
;
int
liy
=
get_local_id
(
1
)
;
int
x
=
get_global_id
(
0
)
;
int
srcX
=
x
+
srcOffsetX
-
RADIUS
;
int
clocY
=
liy
;
do
{
int
yb
=
clocY
+
srcOffsetY
-
RADIUS
;
EXTRAPOLATE
(
yb,
(
height
))
;
int
clocX
=
lix
;
int
cSrcX
=
srcX
;
do
{
int
xb
=
cSrcX
;
EXTRAPOLATE
(
xb,
(
width
))
;
lsmem[clocY][clocX]
=
ELEM
(
xb,
yb,
(
width
)
,
(
height
)
,
0
)
;
clocX
+=
BLK_X
;
cSrcX
+=
BLK_X
;
}
while
(
clocX
<
BLK_X+
(
RADIUS*2
))
;
clocY
+=
BLK_Y
;
}
while
(
clocY
<
BLK_Y+
(
RADIUS*2
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
WT
scale_v
=
(
WT
)
scale
;
WT
delta_v
=
(
WT
)
delta
;
for
(
int
y
=
0
; y < dst_rows; y+=BLK_Y)
{
int
i,
clocX
=
lix
;
WT
sum1
=
(
WT
)
0
;
WT
sum2
=
(
WT
)
0
;
do
{
sum1
=
(
WT
)
0
;
sum2
=
(
WT
)
0
;
for
(
i=0
; i<=2*RADIUS; i++)
{
sum1
=
mad
(
lsmem[liy
+
i][clocX],
mat_kernelY[i],
sum1
)
;
sum2
=
mad
(
lsmem[liy
+
i][clocX],
mat_kernelX[i],
sum2
)
;
}
lsmemDy1[liy][clocX]
=
sum1
;
lsmemDy2[liy][clocX]
=
sum2
;
clocX
+=
BLK_X
;
}
while
(
clocX
<
BLK_X+
(
RADIUS*2
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
((
x
<
dst_cols
)
&&
(
y
+
liy
<
dst_rows
))
{
sum1
=
(
WT
)
0
;
sum2
=
(
WT
)
0
;
for
(
i=0
; i<=2*RADIUS; i++)
{
sum1
=
mad
(
lsmemDy1[liy][lix+i],
mat_kernelX[i],
sum1
)
;
sum2
=
mad
(
lsmemDy2[liy][lix+i],
mat_kernelY[i],
sum2
)
;
}
WT
sum
=
mad
(
scale_v,
(
sum1
+
sum2
)
,
delta_v
)
;
storepix
(
convertToDT
(
sum
)
,
Dst
+
mad24
(
y
+
liy,
dst_step,
mad24
(
x,
DSTSIZE,
dst_offset
)))
;
}
for
(
int
i
=
liy
*
BLK_X
+
lix
; i < (RADIUS*2) * (BLK_X+(RADIUS*2)); i += BLK_X * BLK_Y)
{
int
clocX
=
i
%
(
BLK_X+
(
RADIUS*2
))
;
int
clocY
=
i
/
(
BLK_X+
(
RADIUS*2
))
;
lsmem[clocY][clocX]
=
lsmem[clocY
+
BLK_Y][clocX]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
yb
=
y
+
liy
+
BLK_Y
+
srcOffsetY
+
RADIUS
;
EXTRAPOLATE
(
yb,
(
height
))
;
clocX
=
lix
;
int
cSrcX
=
x
+
srcOffsetX
-
RADIUS
;
do
{
int
xb
=
cSrcX
;
EXTRAPOLATE
(
xb,
(
width
))
;
lsmem[liy
+
2*RADIUS][clocX]
=
ELEM
(
xb,
yb,
(
width
)
,
(
height
)
,
0
)
;
clocX
+=
BLK_X
;
cSrcX
+=
BLK_X
;
}
while
(
clocX
<
BLK_X+
(
RADIUS*2
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
#
endif
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录