Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
c424d360
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c424d360
编写于
6月 12, 2014
作者:
I
Ilya Lavrenov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimized cv::boxFilter
上级
45f21e4f
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
411 addition
and
37 deletion
+411
-37
modules/imgproc/src/opencl/boxFilterSmall.cl
modules/imgproc/src/opencl/boxFilterSmall.cl
+305
-0
modules/imgproc/src/smooth.cpp
modules/imgproc/src/smooth.cpp
+106
-37
未找到文件。
modules/imgproc/src/opencl/boxFilterSmall.cl
0 → 100755
浏览文件 @
c424d360
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
ifdef
BORDER_REPLICATE
//BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
t_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
b_edge
)
-1
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT
//BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-1+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT_101
//BORDER_REFLECT_101:
gfedcb|abcdefgh|gfedcba
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-2+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
//blur
function
does
not
support
BORDER_WRAP
#
ifdef
BORDER_WRAP
//BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
i
)
+
(
b_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
i
)
-
(
b_edge
)
:
(
addr
))
#
endif
#
ifdef
BORDER_ISOLATED
#
define
ISOLATED_MIN
(
VAL
)
(
VAL
)
#
else
#
define
ISOLATED_MIN
(
VAL
)
0
#
endif
#
ifdef
EXTRA_EXTRAPOLATION
//
border
>
src
image
size
#
ifdef
BORDER_CONSTANT
//
None
#
elif
defined
BORDER_REPLICATE
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
\
{
\
x
=
max
(
min
(
x,
maxX
-
1
)
,
minX
)
; \
y
=
max
(
min
(
y,
maxY
-
1
)
,
minY
)
; \
}
#
elif
defined
BORDER_WRAP
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
\
{
\
if
(
x
<
minX
)
\
x
-=
((
x
-
maxX
+
1
)
/
maxX
)
*
maxX
; \
if
(
x
>=
maxX
)
\
x
%=
maxX
; \
if
(
y
<
minY
)
\
y
-=
((
y
-
maxY
+
1
)
/
maxY
)
*
maxY
; \
if
(
y
>=
maxY
)
\
y
%=
maxY
; \
}
#
elif
defined
(
BORDER_REFLECT
)
|
| defined(BORDER_REFLECT_101)
#define EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, delta) \
{ \
if (maxX - minX == 1) \
x = minX; \
else \
do \
{ \
if (x < minX) \
x = minX - (x - minX) - 1 + delta; \
else \
x = maxX - 1 - (x - maxX) - delta; \
} \
while (x >= maxX || x < minX); \
\
if (maxY - minY == 1) \
y = minY; \
else \
do \
{ \
if (y < minY) \
y = minY - (y - minY) - 1 + delta; \
else \
y = maxY - 1 - (y - maxY) - delta; \
} \
while (y >= maxY || y < minY); \
}
#ifdef BORDER_REFLECT
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 0)
#elif defined(BORDER_REFLECT_101) || defined(BORDER_REFLECT101)
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) EXTRAPOLATE_(x, y, minX, minY, maxX, maxY, 1)
#endif
#else
#error No extrapolation method
#endif
#else
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
{ \
int _row = y - ISOLATED_MIN(minY), _col = x - ISOLATED_MIN(minX); \
_row = ADDR_H(_row, 0, maxY - ISOLATED_MIN(minY)); \
_row = ADDR_B(_row, maxY - ISOLATED_MIN(minY), _row); \
y = _row + ISOLATED_MIN(minY); \
\
_col = ADDR_L(_col, 0, maxX - ISOLATED_MIN(minX)); \
_col = ADDR_R(_col, maxX - ISOLATED_MIN(minX), _col); \
x = _col + ISOLATED_MIN(minX); \
}
#endif
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#if cn != 3
#define loadpix(addr) *(__global const srcT *)(addr)
#define storepix(val, addr) *(__global dstT *)(addr) = val
#define SRCSIZE (int)sizeof(srcT)
#define DSTSIZE (int)sizeof(dstT)
#else
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
#define SRCSIZE (int)sizeof(srcT1) * cn
#define DSTSIZE (int)sizeof(dstT1) * cn
#endif
#define noconvert
struct RectCoords
{
int x1, y1, x2, y2;
};
#ifdef BORDER_ISOLATED
inline bool isBorder(const struct RectCoords bounds, int2 coord, int numPixels)
{
return coord.x < bounds.x1 || coord.y < bounds.y1 || coord.x + numPixels > bounds.x2 || coord.y >= bounds.y2;
}
#else
inline bool isBorder(const struct RectCoords bounds, int2 coord, int numPixels)
{
return coord.x < 0 || coord.y < 0 || coord.x + numPixels > bounds.x2 || coord.y >= bounds.y2;
}
#endif
inline WT getBorderPixel(const struct RectCoords bounds, int2 coord,
__global const uchar * srcptr, int srcstep)
{
#ifdef BORDER_CONSTANT
return (WT)(0);
#else
int selected_col = coord.x;
int selected_row = coord.y;
EXTRAPOLATE(selected_col, selected_row,
bounds.x1, bounds.y1,
bounds.x2, bounds.y2);
__global const uchar* ptr = srcptr + mad24(selected_row, srcstep, selected_col * SRCSIZE);
return convertToWT(loadpix(ptr));
#endif
}
inline WT readSrcPixelSingle(int2 pos, __global const uchar * srcptr,
int srcstep, const struct RectCoords srcCoords)
{
if (!isBorder(srcCoords, pos, 1))
{
__global const uchar * ptr = srcptr + mad24(pos.y, srcstep, pos.x * SRCSIZE);
return convertToWT(loadpix(ptr));
}
else
return getBorderPixel(srcCoords, pos, srcptr, srcstep);
}
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define vload1(OFFSET, PTR) (*(PTR + OFFSET))
#define PX_LOAD_VEC_TYPE CAT(srcT1, PX_LOAD_VEC_SIZE)
#define PX_LOAD_FLOAT_VEC_TYPE CAT(WT1, PX_LOAD_VEC_SIZE)
#define PX_LOAD_FLOAT_VEC_CONV CAT(convert_, PX_LOAD_FLOAT_VEC_TYPE)
#define PX_LOAD CAT(vload, PX_LOAD_VEC_SIZE)
#define float1 float
inline PX_LOAD_FLOAT_VEC_TYPE readSrcPixelGroup(int2 pos, __global const uchar * srcptr,
int srcstep, const struct RectCoords srcCoords)
{
__global const srcT1 * ptr = (__global const srcT1 *)
(srcptr + mad24(pos.y, srcstep, pos.x * SRCSIZE));
return PX_LOAD_FLOAT_VEC_CONV(PX_LOAD(0, ptr));
}
// Macros to ensure unrolled loops
#define LOOP1(VAR, STMT) (STMT); (VAR)++;
#define LOOP2(VAR, STMT) LOOP1(VAR, STMT); (STMT); (VAR)++;
#define LOOP3(VAR, STMT) LOOP2(VAR, STMT); (STMT); (VAR)++;
#define LOOP4(VAR, STMT) LOOP3(VAR, STMT); (STMT); (VAR)++;
#define LOOP5(VAR, STMT) LOOP4(VAR, STMT); (STMT); (VAR)++;
#define LOOP6(VAR, STMT) LOOP5(VAR, STMT); (STMT); (VAR)++;
#define LOOP7(VAR, STMT) LOOP6(VAR, STMT); (STMT); (VAR)++;
#define LOOP8(VAR, STMT) LOOP7(VAR, STMT); (STMT); (VAR)++;
#define LOOP9(VAR, STMT) LOOP8(VAR, STMT); (STMT); (VAR)++;
#define LOOP10(VAR, STMT) LOOP9(VAR, STMT); (STMT); (VAR)++;
#define LOOP11(VAR, STMT) LOOP10(VAR, STMT); (STMT); (VAR)++;
#define LOOP12(VAR, STMT) LOOP11(VAR, STMT); (STMT); (VAR)++;
#define LOOP13(VAR, STMT) LOOP12(VAR, STMT); (STMT); (VAR)++;
#define LOOP(N, VAR, STMT) CAT(LOOP, N)((VAR), (STMT))
__kernel void boxFilterSmall(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY,
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols
#ifdef NORMALIZE
, float alpha
#endif
)
{
// for non-isolated border: offsetX, offsetY, wholeX, wholeY
const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY };
const int startX = get_global_id(0) * PX_PER_WI_X;
const int startY = get_global_id(1) * PX_PER_WI_Y;
if (startX >= cols |
|
startY
>=
rows
)
return
;
WT
privateData[PX_PER_WI_Y
+
KERNEL_SIZE_Y
-
1][PRIV_DATA_WIDTH]
;
//
Load
all
of
the
pixels
needed
for
the
calculation
int
py
=
0
;
LOOP
(
PX_LOAD_Y_ITERATIONS,
py,
{
int
y
=
startY
+
py
;
int
px
=
0
;
LOOP
(
PX_LOAD_X_ITERATIONS,
px,
{
int
x
=
startX
+
(
px
*
PX_LOAD_NUM_PX
)
;
int2
srcPos
=
(
int2
)(
srcCoords.x1
+
x
-
ANCHOR_X,
srcCoords.y1
+
y
-
ANCHOR_Y
)
;
if
(
!isBorder
(
srcCoords,
srcPos,
PX_LOAD_NUM_PX
))
{
PX_LOAD_FLOAT_VEC_TYPE
p
=
readSrcPixelGroup
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
#
ifdef
SQR
*
((
PX_LOAD_FLOAT_VEC_TYPE
*
)
&privateData[py][px
*
PX_LOAD_NUM_PX]
)
=
p
*
p
;
#
else
*
((
PX_LOAD_FLOAT_VEC_TYPE
*
)
&privateData[py][px
*
PX_LOAD_NUM_PX]
)
=
p
;
#
endif
}
else
{
int
lx
=
0
;
LOOP
(
PX_LOAD_NUM_PX,
lx,
{
WT
p
=
readSrcPixelSingle
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
#
ifdef
SQR
*
((
WT*
)
&privateData[py][px
*
PX_LOAD_NUM_PX
+
lx]
)
=
p
*
p
;
#
else
*
((
WT*
)
&privateData[py][px
*
PX_LOAD_NUM_PX
+
lx]
)
=
p
;
#
endif
srcPos.x++
;
}
)
;
}
}
)
;
}
)
;
//
Use
the
stored
pixels
to
compute
the
results
py
=
0
;
LOOP
(
PX_PER_WI_Y,
py,
{
int
y
=
startY
+
py
;
int
px
=
0
;
LOOP
(
PX_PER_WI_X,
px,
{
int
x
=
startX
+
px
;
int
sy
=
0
;
int
kernelIndex
=
0
;
WT
total_sum
=
(
WT
)(
0
)
;
LOOP
(
KERNEL_SIZE_Y,
sy,
{
int
sx
=
0
;
LOOP
(
KERNEL_SIZE_X,
sx,
{
total_sum
+=
privateData[py
+
sy][px
+
sx]
;
}
)
;
}
)
;
__global
dstT
*
dstPtr
=
(
__global
dstT
*
)(
dstptr
+
mad24
(
y,
dst_step,
mad24
(
x,
DSTSIZE,
dst_offset
)))
;
#
ifdef
NORMALIZE
total_sum
*=
(
WT
)(
alpha
)
;
#
endif
storepix
(
convertToDstT
(
total_sum
)
,
dstPtr
)
;
}
)
;
}
)
;
}
modules/imgproc/src/smooth.cpp
浏览文件 @
c424d360
...
...
@@ -629,12 +629,14 @@ struct ColumnSum<int, ushort> :
#ifdef HAVE_OPENCL
#define DIVUP(total, grain) ((total + grain - 1) / (grain))
#define ROUNDUP(sz, n) ((sz) + (n) - 1 - (((sz) + (n) - 1) % (n)))
static
bool
ocl_boxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
int
borderType
,
bool
normalize
,
bool
sqr
=
false
)
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz
=
CV_ELEM_SIZE
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
()
.
doubleFPConfig
()
>
0
;
bool
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
...
...
@@ -653,11 +655,12 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size
size
=
_src
.
size
(),
wholeSize
;
bool
isolated
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
borderType
&=
~
BORDER_ISOLATED
;
int
wdepth
=
std
::
max
(
CV_32F
,
std
::
max
(
ddepth
,
sdepth
));
int
wdepth
=
std
::
max
(
CV_32F
,
std
::
max
(
ddepth
,
sdepth
)),
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
);
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
0
,
"BORDER_REFLECT_101"
};
size_t
globalsize
[
2
]
=
{
size
.
width
,
size
.
height
};
size_t
localsize
[
2
]
=
{
0
,
1
}
;
size_t
localsize
_general
[
2
]
=
{
0
,
1
},
*
localsize
=
NULL
;
UMat
src
=
_src
.
getUMat
();
if
(
!
isolated
)
...
...
@@ -674,46 +677,110 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
int
tryWorkItems
=
(
int
)
maxWorkItemSizes
[
0
];
ocl
::
Kernel
kernel
;
for
(
;
;
)
{
int
BLOCK_SIZE_X
=
tryWorkItems
,
BLOCK_SIZE_Y
=
std
::
min
(
ksize
.
height
*
10
,
size
.
height
);
while
(
BLOCK_SIZE_X
>
32
&&
BLOCK_SIZE_X
>=
ksize
.
width
*
2
&&
BLOCK_SIZE_X
>
size
.
width
*
2
)
BLOCK_SIZE_X
/=
2
;
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE_X
/
8
&&
BLOCK_SIZE_Y
*
computeUnits
*
32
<
size
.
height
)
BLOCK_SIZE_Y
*=
2
;
if
(
ksize
.
width
>
BLOCK_SIZE_X
||
w
<
ksize
.
width
||
h
<
ksize
.
height
)
if
(
dev
.
isIntel
()
&&
!
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_CPU
)
&&
((
ksize
.
width
<
5
&&
ksize
.
height
<
5
&&
esz
<=
4
)
||
(
ksize
.
width
==
5
&&
ksize
.
height
==
5
&&
cn
==
1
)))
{
if
(
w
<
ksize
.
width
||
h
<
ksize
.
height
)
return
false
;
char
cvt
[
2
][
50
];
String
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s"
" -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
" -D ST1=%s -D DT1=%s -D cn=%d"
,
BLOCK_SIZE_X
,
BLOCK_SIZE_Y
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
cn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
1
]),
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
borderMap
[
borderType
],
isolated
?
" -D BORDER_ISOLATED"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
normalize
?
" -D NORMALIZE"
:
""
,
sqr
?
" -D SQR"
:
""
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
cn
);
localsize
[
0
]
=
BLOCK_SIZE_X
;
globalsize
[
0
]
=
DIVUP
(
size
.
width
,
BLOCK_SIZE_X
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE_X
;
globalsize
[
1
]
=
DIVUP
(
size
.
height
,
BLOCK_SIZE_Y
);
kernel
.
create
(
"boxFilter"
,
cv
::
ocl
::
imgproc
::
boxFilter_oclsrc
,
opts
);
if
(
kernel
.
empty
())
return
false
;
// Figure out what vector size to use for loading the pixels.
int
pxLoadNumPixels
=
cn
!=
1
||
size
.
width
%
4
?
1
:
4
;
int
pxLoadVecSize
=
cn
*
pxLoadNumPixels
;
size_t
kernelWorkGroupSize
=
kernel
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
if
(
BLOCK_SIZE_X
<
(
int
)
kernelWorkGroupSize
)
// Figure out how many pixels per work item to compute in X and Y
// directions. Too many and we run out of registers.
int
pxPerWorkItemX
=
1
,
pxPerWorkItemY
=
1
;
if
(
cn
<=
2
&&
ksize
.
width
<=
4
&&
ksize
.
height
<=
4
)
{
pxPerWorkItemX
=
size
.
width
%
8
?
size
.
width
%
4
?
size
.
width
%
2
?
1
:
2
:
4
:
8
;
pxPerWorkItemY
=
size
.
height
%
2
?
1
:
2
;
}
else
if
(
cn
<
4
||
(
ksize
.
width
<=
4
&&
ksize
.
height
<=
4
))
{
pxPerWorkItemX
=
size
.
width
%
2
?
1
:
2
;
pxPerWorkItemY
=
size
.
height
%
2
?
1
:
2
;
}
globalsize
[
0
]
=
size
.
width
/
pxPerWorkItemX
;
globalsize
[
1
]
=
size
.
height
/
pxPerWorkItemY
;
// Need some padding in the private array for pixels
int
privDataWidth
=
ROUNDUP
(
pxPerWorkItemX
+
ksize
.
width
-
1
,
pxLoadNumPixels
);
// Make the global size a nice round number so the runtime can pick
// from reasonable choices for the workgroup size
const
int
wgRound
=
256
;
globalsize
[
0
]
=
ROUNDUP
(
globalsize
[
0
],
wgRound
);
char
build_options
[
1024
],
cvt
[
2
][
40
];
sprintf
(
build_options
,
"-D cn=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
"-D PX_LOAD_VEC_SIZE=%d -D PX_LOAD_NUM_PX=%d "
"-D PX_PER_WI_X=%d -D PX_PER_WI_Y=%d -D PRIV_DATA_WIDTH=%d -D %s -D %s "
"-D PX_LOAD_X_ITERATIONS=%d -D PX_LOAD_Y_ITERATIONS=%d "
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
"-D convertToWT=%s -D convertToDstT=%s%s%s"
,
cn
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
pxLoadVecSize
,
pxLoadNumPixels
,
pxPerWorkItemX
,
pxPerWorkItemY
,
privDataWidth
,
borderMap
[
borderType
],
isolated
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
,
privDataWidth
/
pxLoadNumPixels
,
pxPerWorkItemY
+
ksize
.
height
-
1
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
typeToStr
(
wdepth
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
normalize
?
" -D NORMALIZE"
:
""
,
sqr
?
" -D SQR"
:
""
);
if
(
!
kernel
.
create
(
"boxFilterSmall"
,
cv
::
ocl
::
imgproc
::
boxFilterSmall_oclsrc
,
build_options
))
return
false
;
}
else
{
localsize
=
localsize_general
;
for
(
;
;
)
{
int
BLOCK_SIZE_X
=
tryWorkItems
,
BLOCK_SIZE_Y
=
std
::
min
(
ksize
.
height
*
10
,
size
.
height
);
while
(
BLOCK_SIZE_X
>
32
&&
BLOCK_SIZE_X
>=
ksize
.
width
*
2
&&
BLOCK_SIZE_X
>
size
.
width
*
2
)
BLOCK_SIZE_X
/=
2
;
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE_X
/
8
&&
BLOCK_SIZE_Y
*
computeUnits
*
32
<
size
.
height
)
BLOCK_SIZE_Y
*=
2
;
if
(
ksize
.
width
>
BLOCK_SIZE_X
||
w
<
ksize
.
width
||
h
<
ksize
.
height
)
return
false
;
char
cvt
[
2
][
50
];
String
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s"
" -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s%s"
" -D ST1=%s -D DT1=%s -D cn=%d"
,
BLOCK_SIZE_X
,
BLOCK_SIZE_Y
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
cn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
1
]),
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
borderMap
[
borderType
],
isolated
?
" -D BORDER_ISOLATED"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
normalize
?
" -D NORMALIZE"
:
""
,
sqr
?
" -D SQR"
:
""
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
),
cn
);
localsize
[
0
]
=
BLOCK_SIZE_X
;
globalsize
[
0
]
=
DIVUP
(
size
.
width
,
BLOCK_SIZE_X
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE_X
;
globalsize
[
1
]
=
DIVUP
(
size
.
height
,
BLOCK_SIZE_Y
);
kernel
.
create
(
"boxFilter"
,
cv
::
ocl
::
imgproc
::
boxFilter_oclsrc
,
opts
);
if
(
kernel
.
empty
())
return
false
;
size_t
kernelWorkGroupSize
=
kernel
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
if
(
BLOCK_SIZE_X
<
(
int
)
kernelWorkGroupSize
)
return
false
;
tryWorkItems
=
(
int
)
kernelWorkGroupSize
;
tryWorkItems
=
(
int
)
kernelWorkGroupSize
;
}
}
_dst
.
create
(
size
,
CV_MAKETYPE
(
ddepth
,
cn
));
...
...
@@ -736,6 +803,8 @@ static bool ocl_boxFilter( InputArray _src, OutputArray _dst, int ddepth,
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
false
);
}
#undef ROUNDUP
#endif
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录