Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
68f5dd41
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
68f5dd41
编写于
1月 30, 2014
作者:
I
Ilya Lavrenov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
simplified cv::boxFilter OpenCL impl
上级
afa62c41
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
129 addition
and
338 deletion
+129
-338
modules/imgproc/src/opencl/boxFilter.cl
modules/imgproc/src/opencl/boxFilter.cl
+53
-221
modules/imgproc/src/smooth.cpp
modules/imgproc/src/smooth.cpp
+63
-106
modules/imgproc/test/ocl/test_boxfilter.cpp
modules/imgproc/test/ocl/test_boxfilter.cpp
+13
-11
未找到文件。
modules/imgproc/src/opencl/boxFilter.cl
浏览文件 @
68f5dd41
...
...
@@ -39,45 +39,15 @@
//
//M*/
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro
for
border
type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#
ifdef
BORDER_REPLICATE
//BORDER_REPLICATE:
aaaaaa|abcdefgh|hhhhhhh
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
l_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
r_edge
)
-1
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
t_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
b_edge
)
-1
:
(
addr
))
#
endif
#
ifdef
BORDER_REFLECT
//BORDER_REFLECT:
fedcba|abcdefgh|hgfedcb
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-1+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
-1
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-1+
((
b_edge
)
<<1
)
:
(
addr
))
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
ifdef
BORDER_REFLECT_101
//BORDER_REFLECT_101:
gfedcb|abcdefgh|gfedcba
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
-
(
i
)
-2+
((
r_edge
)
<<1
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
-
(
i
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
-
(
i
)
-2+
((
b_edge
)
<<1
)
:
(
addr
))
#
endif
//blur
function
does
not
support
BORDER_WRAP
#
ifdef
BORDER_WRAP
//BORDER_WRAP:
cdefgh|abcdefgh|abcdefg
#
define
ADDR_L
(
i,
l_edge,
r_edge
)
((
i
)
<
(
l_edge
)
?
(
i
)
+
(
r_edge
)
:
(
i
))
#
define
ADDR_R
(
i,
r_edge,
addr
)
((
i
)
>=
(
r_edge
)
?
(
i
)
-
(
r_edge
)
:
(
addr
))
#
define
ADDR_H
(
i,
t_edge,
b_edge
)
((
i
)
<
(
t_edge
)
?
(
i
)
+
(
b_edge
)
:
(
i
))
#
define
ADDR_B
(
i,
b_edge,
addr
)
((
i
)
>=
(
b_edge
)
?
(
i
)
-
(
b_edge
)
:
(
addr
))
#
endif
#
ifdef
EXTRA_EXTRAPOLATION
//
border
>
src
image
size
#
ifdef
BORDER_CONSTANT
//
None
#
elif
defined
BORDER_REPLICATE
#
define
EXTRAPOLATE
(
x,
y,
minX,
minY,
maxX,
maxY
)
\
{
\
...
...
@@ -131,248 +101,110 @@
#
else
#
error
No
extrapolation
method
#
endif
#else
#define EXTRAPOLATE(x, y, minX, minY, maxX, maxY) \
{ \
int _row = y - minY, _col = x - minX; \
_row = ADDR_H(_row, 0, maxY - minY); \
_row = ADDR_B(_row, maxY - minY, _row); \
y = _row + minY; \
\
_col = ADDR_L(_col, 0, maxX - minX); \
_col = ADDR_R(_col, maxX - minX, _col); \
x = _col + minX; \
}
#endif
#if USE_DOUBLE
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#define FPTYPE double
#define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
#else
#define FPTYPE float
#define CONVERT_TO_FPTYPE CAT(convert_float, VEC_SIZE)
#endif
#if DATA_DEPTH == 0
#define BASE_TYPE uchar
#elif DATA_DEPTH == 1
#define BASE_TYPE char
#elif DATA_DEPTH == 2
#define BASE_TYPE ushort
#elif DATA_DEPTH == 3
#define BASE_TYPE short
#elif DATA_DEPTH == 4
#define BASE_TYPE int
#elif DATA_DEPTH == 5
#define BASE_TYPE float
#elif DATA_DEPTH == 6
#define BASE_TYPE double
#else
#error data_depth
#endif
#define __CAT(x, y) x##y
#define CAT(x, y) __CAT(x, y)
#define uchar1 uchar
#define char1 char
#define ushort1 ushort
#define short1 short
#define int1 int
#define float1 float
#define double1 double
#define convert_uchar1_sat_rte convert_uchar_sat_rte
#define convert_char1_sat_rte convert_char_sat_rte
#define convert_ushort1_sat_rte convert_ushort_sat_rte
#define convert_short1_sat_rte convert_short_sat_rte
#define convert_int1_sat_rte convert_int_sat_rte
#define convert_float1
#define convert_double1
#if DATA_DEPTH == 5 |
|
DATA_DEPTH
==
6
#
define
CONVERT_TO_TYPE
CAT
(
CAT
(
convert_,
BASE_TYPE
)
,
VEC_SIZE
)
#
else
#
define
CONVERT_TO_TYPE
CAT
(
CAT
(
CAT
(
convert_,
BASE_TYPE
)
,
VEC_SIZE
)
,
_sat_rte
)
#
endif
#
define
VEC_SIZE
DATA_CHAN
#
define
VEC_TYPE
CAT
(
BASE_TYPE,
VEC_SIZE
)
#
define
TYPE
VEC_TYPE
#
define
SCALAR_TYPE
CAT
(
FPTYPE,
VEC_SIZE
)
#
define
INTERMEDIATE_TYPE
CAT
(
FPTYPE,
VEC_SIZE
)
#
define
TYPE_SIZE
(
VEC_SIZE*sizeof
(
BASE_TYPE
))
#
define
noconvert
struct
RectCoords
{
int
x1,
y1,
x2,
y2
;
}
;
//#define
DEBUG
#
ifdef
DEBUG
#
define
DEBUG_ONLY
(
x
)
x
#
define
ASSERT
(
condition
)
do
{
if
(
!
(
condition
))
{
printf
(
"BUG in boxFilter kernel (global=%d,%d): "
#
condition
"\n"
,
get_global_id
(
0
)
,
get_global_id
(
1
))
; } } while (0)
#
else
#
define
DEBUG_ONLY
(
x
)
#
define
ASSERT
(
condition
)
#
endif
inline
INTERMEDIATE_TYPE
readSrcPixel
(
int2
pos,
__global
const
uchar*
srcptr,
int
srcstep,
const
struct
RectCoords
srcCoords
#
ifdef
BORDER_CONSTANT
,
SCALAR_TYPE
borderValue
#
endif
)
inline
WT
readSrcPixel
(
int2
pos,
__global
const
uchar
*
srcptr,
int
src_step,
const
struct
RectCoords
srcCoords
)
{
#
ifdef
BORDER_ISOLATED
if
(
pos.x
>=
srcCoords.x1
&&
pos.y
>=
srcCoords.y1
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
if
(
pos.x
>=
srcCoords.x1
&&
pos.y
>=
srcCoords.y1
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
else
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
#
endif
{
__global
TYPE*
ptr
=
(
__global
TYPE*
)(
srcptr
+
pos.y
*
srcstep
+
pos.x
*
sizeof
(
TYPE
))
;
return
CONVERT_TO_FPTYPE
(
*ptr
)
;
int
src_index
=
mad24
(
pos.y,
src_step,
pos.x
*
(
int
)
sizeof
(
ST
))
;
return
convertToWT
(
*
(
__global
const
ST
*
)(
srcptr
+
src_index
)
)
;
}
else
{
#
ifdef
BORDER_CONSTANT
return
borderValue
;
return
(
WT
)(
0
)
;
#
else
int
selected_col
=
pos.x
;
int
selected_row
=
pos.y
;
int
selected_col
=
pos.x,
selected_row
=
pos.y
;
EXTRAPOLATE
(
selected_col,
selected_row,
#
ifdef
BORDER_ISOLATED
srcCoords.x1,
srcCoords.y1,
srcCoords.x1,
srcCoords.y1,
#
else
0
,
0
,
0
,
0
,
#
endif
srcCoords.x2,
srcCoords.y2
)
;
srcCoords.x2,
srcCoords.y2
)
;
//
debug
border
mapping
//printf
(
"pos=%d,%d --> %d, %d\n"
,
pos.x,
pos.y,
selected_col,
selected_row
)
;
pos
=
(
int2
)(
selected_col,
selected_row
)
;
if
(
pos.x
>=
0
&&
pos.y
>=
0
&&
pos.x
<
srcCoords.x2
&&
pos.y
<
srcCoords.y2
)
{
__global
TYPE*
ptr
=
(
__global
TYPE*
)(
srcptr
+
pos.y
*
srcstep
+
pos.x
*
sizeof
(
TYPE
))
;
return
CONVERT_TO_FPTYPE
(
*ptr
)
;
}
else
{
//
for
debug
only
DEBUG_ONLY
(
printf
(
"BUG in boxFilter kernel\n"
))
;
return
(
FPTYPE
)(
0.0f
)
;
}
int
src_index
=
mad24
(
selected_row,
src_step,
selected_col
*
(
int
)
sizeof
(
ST
))
;
return
convertToWT
(
*
(
__global
const
ST
*
)(
srcptr
+
src_index
))
;
#
endif
}
}
//
INPUT
PARAMETER:
BLOCK_SIZE_Y
(
via
defines
)
__kernel
__attribute__
((
reqd_work_group_size
(
LOCAL_SIZE,
1
,
1
)))
void
boxFilter
(
__global
const
uchar*
srcptr,
int
srcstep,
int
srcOffsetX,
int
srcOffsetY,
int
srcEndX,
int
srcEndY,
__global
uchar*
dstptr,
int
dststep,
int
dstoffset,
int
rows,
int
cols,
#
ifdef
BORDER_CONSTANT
SCALAR_TYPE
borderValue,
__kernel
void
boxFilter
(
__global
const
uchar
*
srcptr,
int
src_step,
int
srcOffsetX,
int
srcOffsetY,
int
srcEndX,
int
srcEndY,
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
rows,
int
cols
#
ifdef
NORMALIZE
,
float
alpha
#
endif
FPTYPE
alpha
)
)
{
const
struct
RectCoords
srcCoords
=
{srcOffsetX,
srcOffsetY,
srcEndX,
srcEndY}
; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const
int
x
=
get_local_id
(
0
)
+
(
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
))
*
get_group_id
(
0
)
-
ANCHOR_X
;
const
int
y
=
get_global_id
(
1
)
*
BLOCK_SIZE_Y
;
const
struct
RectCoords
srcCoords
=
{
srcOffsetX,
srcOffsetY,
srcEndX,
srcEndY
}
; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
const
int
local_id
=
get_local_id
(
0
)
;
INTERMEDIATE_TYPE
data[KERNEL_SIZE_Y]
;
__local
INTERMEDIATE_TYPE
sumOfCols[LOCAL_SIZE]
;
int
x
=
get_local_id
(
0
)
+
(
LOCAL_SIZE_X
-
(
KERNEL_SIZE_X
-
1
))
*
get_group_id
(
0
)
-
ANCHOR_X
;
int
y
=
get_global_id
(
1
)
*
BLOCK_SIZE_Y
;
int
local_id
=
get_local_id
(
0
)
;
WT
data[KERNEL_SIZE_Y]
;
__local
WT
sumOfCols[LOCAL_SIZE_X]
;
int2
srcPos
=
(
int2
)(
srcCoords.x1
+
x,
srcCoords.y1
+
y
-
ANCHOR_Y
)
;
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++, srcPos.y++)
{
data[sy]
=
readSrcPixel
(
srcPos,
srcptr,
srcstep,
srcCoords
#
ifdef
BORDER_CONSTANT
,
borderValue
#
endif
)
;
}
INTERMEDIATE_TYPE
tmp_sum
=
0
;
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++)
{
tmp_sum
+=
(
data[sy]
)
;
}
#
pragma
unroll
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++, srcPos.y++)
data[sy]
=
readSrcPixel
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
WT
tmp_sum
=
(
WT
)(
0
)
;
#
pragma
unroll
for
(
int
sy
=
0
; sy < KERNEL_SIZE_Y; sy++)
tmp_sum
+=
data[sy]
;
sumOfCols[local_id]
=
tmp_sum
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
2
pos
=
(
int2
)(
x,
y
)
;
__global
TYPE*
dstPtr
=
(
__global
TYPE*
)(
dstptr
+
pos.y
*
dststep
+
dstoffset
+
pos.x
*
TYPE_SIZE/*sizeof
(
TYPE
)
*/
)
; // Pointer can be out of bounds!
int
dst_index
=
mad24
(
y,
dst_step,
x
*
(
int
)
sizeof
(
DT
)
+
dst_offset
)
;
__global
DT
*
dst
=
(
__global
DT
*
)(
dstptr
+
dst_index
)
;
int
sy_index
=
0
; // current index in data[] array
int
stepsY
=
min
(
rows
-
pos.y,
BLOCK_SIZE_Y
)
;
ASSERT
(
stepsY
>
0
)
;
for
(
; ;)
for
(
int
i
=
0
,
stepY
=
min
(
rows
-
y,
BLOCK_SIZE_Y
)
; i < stepY; ++i)
{
ASSERT
(
pos.y
<
rows
)
;
if
(
local_id
>=
ANCHOR_X
&&
local_id
<
LOCAL_SIZE
-
(
KERNEL_SIZE_X
-
1
-
ANCHOR_X
)
&&
pos.x
>=
0
&&
pos.x
<
cols
)
if
(
local_id
>=
ANCHOR_X
&&
local_id
<
LOCAL_SIZE_X
-
(
KERNEL_SIZE_X
-
1
-
ANCHOR_X
)
&&
x
>=
0
&&
x
<
cols
)
{
ASSERT
(
pos.y
>=
0
&&
pos.y
<
rows
)
;
WT
total_sum
=
(
WT
)(
0
)
;
INTERMEDIATE_TYPE
total_sum
=
0
;
#
pragma
unroll
#
pragma
unroll
for
(
int
sx
=
0
; sx < KERNEL_SIZE_X; sx++)
{
total_sum
+=
sumOfCols[local_id
+
sx
-
ANCHOR_X]
;
}
*dstPtr
=
CONVERT_TO_TYPE
(((
INTERMEDIATE_TYPE
)
alpha
)
*
total_sum
)
;
}
#
if
BLOCK_SIZE_Y
==
1
break
;
#
if
def
NORMALIZE
dst[0]
=
convertToDT
((
WT
)(
alpha
)
*
total_sum
)
;
#
else
if
(
--stepsY
==
0
)
break
;
dst[0]
=
convertToDT
(
total_sum
)
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
tmp_sum
=
sumOfCols[local_id]
; // TODO FIX IT: workaround for BUG in OpenCL compiler
//
only
works
with
scalars:
ASSERT
(
fabs
(
tmp_sum
-
sumOfCols[local_id]
)
<
(
INTERMEDIATE_TYPE
)
1e-6
)
;
tmp_sum
=
sumOfCols[local_id]
;
tmp_sum
-=
data[sy_index]
;
data[sy_index]
=
readSrcPixel
(
srcPos,
srcptr,
srcstep,
srcCoords
#
ifdef
BORDER_CONSTANT
,
borderValue
#
endif
)
;
data[sy_index]
=
readSrcPixel
(
srcPos,
srcptr,
src_step,
srcCoords
)
;
srcPos.y++
;
tmp_sum
+=
data[sy_index]
;
sumOfCols[local_id]
=
tmp_sum
;
sy_index
=
(
sy_index
+
1
<
KERNEL_SIZE_Y
)
?
sy_index
+
1
:
0
;
sy_index
=
sy_index
+
1
<
KERNEL_SIZE_Y
?
sy_index
+
1
:
0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
next
line
DEBUG_ONLY
(
pos.y++
)
;
dstPtr
=
(
__global
TYPE*
)((
__global
char*
)
dstPtr
+
dststep
)
; // Pointer can be out of bounds!
#
endif
//
BLOCK_SIZE_Y
==
1
dst
=
(
__global
DT
*
)((
__global
uchar
*
)
dst
+
dst_step
)
;
}
}
modules/imgproc/src/smooth.cpp
浏览文件 @
68f5dd41
...
...
@@ -611,155 +611,114 @@ template<> struct ColumnSum<int, ushort> : public BaseColumnFilter
std
::
vector
<
int
>
sum
;
};
#ifdef HAVE_OPENCL
#define DIVUP(total, grain) ((total + grain - 1) / (grain))
static
bool
ocl_boxFilter
(
InputArray
_src
,
OutputArray
_dst
,
int
ddepth
,
Size
ksize
,
Point
anchor
,
int
borderType
)
Size
ksize
,
Point
anchor
,
int
borderType
,
bool
normalize
)
{
int
type
=
_src
.
type
();
int
cn
=
CV_MAT_CN
(
type
);
if
((
1
!=
cn
)
&&
(
2
!=
cn
)
&&
(
4
!=
cn
))
return
false
;
//TODO
int
type
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz
=
CV_ELEM_SIZE
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
int
sdepth
=
CV_MAT_DEPTH
(
type
);
if
(
ddepth
<
0
)
if
(
ddepth
<
0
)
ddepth
=
sdepth
;
else
if
(
ddepth
!=
sdepth
)
if
(
!
(
cn
==
1
||
cn
==
2
||
cn
==
4
)
||
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
||
_src
.
offset
()
%
esz
!=
0
||
_src
.
step
()
%
esz
!=
0
)
return
false
;
if
(
anchor
.
x
<
0
)
if
(
anchor
.
x
<
0
)
anchor
.
x
=
ksize
.
width
/
2
;
if
(
anchor
.
y
<
0
)
if
(
anchor
.
y
<
0
)
anchor
.
y
=
ksize
.
height
/
2
;
ocl
::
Kernel
kernel
;
//Normalize the result by default
int
computeUnits
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
float
alpha
=
1.0
f
/
(
ksize
.
height
*
ksize
.
width
);
bool
isIsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
bool
useDouble
=
(
CV_64F
==
sdepth
);
const
cv
::
ocl
::
Device
&
device
=
cv
::
ocl
::
Device
::
getDefault
();
int
doubleFPConfig
=
device
.
doubleFPConfig
();
if
(
useDouble
&&
(
0
==
doubleFPConfig
))
return
false
;
// may be we have to check is (0 != (CL_FP_SOFT_FLOAT & doubleFPConfig)) ?
const
char
*
btype
=
NULL
;
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
btype
=
"BORDER_CONSTANT"
;
break
;
case
BORDER_REPLICATE
:
btype
=
"BORDER_REPLICATE"
;
break
;
case
BORDER_REFLECT
:
btype
=
"BORDER_REFLECT"
;
break
;
case
BORDER_WRAP
:
//CV_Error(CV_StsUnsupportedFormat, "BORDER_WRAP is not supported!");
return
false
;
case
BORDER_REFLECT101
:
btype
=
"BORDER_REFLECT_101"
;
break
;
}
cv
::
Size
sz
=
_src
.
size
();
Size
size
=
_src
.
size
(),
wholeSize
;
bool
isolated
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
borderType
&=
~
BORDER_ISOLATED
;
int
wdepth
=
std
::
max
(
CV_32F
,
std
::
max
(
ddepth
,
sdepth
));
size_t
globalsize
[
2
]
=
{
sz
.
width
,
sz
.
height
};
size_t
localsize
[
2
]
=
{
0
,
1
};
const
char
*
const
borderMap
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
0
,
"BORDER_REFLECT_101"
};
size_t
globalsize
[
2
]
=
{
size
.
width
,
size
.
height
};
size_t
localsize
[
2
]
=
{
0
,
1
};
UMat
src
;
Size
wholeSize
;
if
(
!
is
IsolatedBorder
)
UMat
src
=
_src
.
getUMat
()
;
if
(
!
is
olated
)
{
src
=
_src
.
getUMat
();
Point
ofs
;
src
.
locateROI
(
wholeSize
,
ofs
);
}
size_t
maxWorkItemSizes
[
32
];
device
.
maxWorkItemSizes
(
maxWorkItemSizes
);
size_t
tryWorkItems
=
maxWorkItemSizes
[
0
];
for
(;;)
{
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
sz
.
width
*
2
)
BLOCK_SIZE
/=
2
;
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
device
.
maxComputeUnits
()
*
32
<
(
size_t
)
sz
.
height
)
BLOCK_SIZE_Y
*=
2
;
int
h
=
isolated
?
size
.
height
:
wholeSize
.
height
;
int
w
=
isolated
?
size
.
width
:
wholeSize
.
width
;
if
((
size_t
)
ksize
.
width
>
BLOCK_SIZE
)
return
false
;
size_t
maxWorkItemSizes
[
32
];
ocl
::
Device
::
getDefault
().
maxWorkItemSizes
(
maxWorkItemSizes
);
int
tryWorkItems
=
(
int
)
maxWorkItemSizes
[
0
];
int
requiredTop
=
anchor
.
y
;
int
requiredLeft
=
(
int
)
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
(
int
)
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
h
=
isIsolatedBorder
?
sz
.
height
:
wholeSize
.
height
;
int
w
=
isIsolatedBorder
?
sz
.
width
:
wholeSize
.
width
;
ocl
::
Kernel
kernel
;
for
(
;
;
)
{
int
BLOCK_SIZE_X
=
tryWorkItems
,
BLOCK_SIZE_Y
=
8
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
while
(
BLOCK_SIZE_X
>
32
&&
BLOCK_SIZE_X
>=
ksize
.
width
*
2
&&
BLOCK_SIZE_X
>
size
.
width
*
2
)
BLOCK_SIZE_X
/=
2
;
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE_X
/
8
&&
BLOCK_SIZE_Y
*
computeUnits
*
32
<
size
.
height
)
BLOCK_SIZE_Y
*=
2
;
if
(
(
w
<
ksize
.
width
)
||
(
h
<
ksize
.
height
)
)
if
(
ksize
.
width
>
BLOCK_SIZE_X
||
w
<
ksize
.
width
||
h
<
ksize
.
height
)
return
false
;
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s -D %s -D %s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
sdepth
,
cn
,
useDouble
?
1
:
0
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
btype
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
char
cvt
[
2
][
50
];
String
opts
=
format
(
"-D LOCAL_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D ST=%s -D DT=%s -D WT=%s -D convertToDT=%s -D convertToWT=%s "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s%s%s%s"
,
BLOCK_SIZE_X
,
BLOCK_SIZE_Y
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
ddepth
,
cn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
1
]),
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
borderMap
[
borderType
],
isolated
?
" -D BORDER_ISOLATED"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
normalize
?
" -D NORMALIZE"
:
""
);
localsize
[
0
]
=
BLOCK_SIZE
;
globalsize
[
0
]
=
DIVUP
(
s
z
.
width
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
;
globalsize
[
1
]
=
DIVUP
(
s
z
.
height
,
BLOCK_SIZE_Y
);
localsize
[
0
]
=
BLOCK_SIZE
_X
;
globalsize
[
0
]
=
DIVUP
(
s
ize
.
width
,
BLOCK_SIZE_X
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE_X
;
globalsize
[
1
]
=
DIVUP
(
s
ize
.
height
,
BLOCK_SIZE_Y
);
cv
::
String
errmsg
;
kernel
.
create
(
"boxFilter"
,
cv
::
ocl
::
imgproc
::
boxFilter_oclsrc
,
build_options
);
kernel
.
create
(
"boxFilter"
,
cv
::
ocl
::
imgproc
::
boxFilter_oclsrc
,
opts
);
size_t
kernelWorkGroupSize
=
kernel
.
workGroupSize
();
if
(
localsize
[
0
]
<=
kernelWorkGroupSize
)
break
;
if
(
BLOCK_SIZE
<
kernelWorkGroupSize
)
if
(
BLOCK_SIZE_X
<
(
int
)
kernelWorkGroupSize
)
return
false
;
tryWorkItems
=
kernelWorkGroupSize
;
tryWorkItems
=
(
int
)
kernelWorkGroupSize
;
}
_dst
.
create
(
s
z
,
CV_MAKETYPE
(
ddepth
,
cn
));
_dst
.
create
(
s
ize
,
CV_MAKETYPE
(
ddepth
,
cn
));
UMat
dst
=
_dst
.
getUMat
();
if
(
src
.
empty
())
src
=
_src
.
getUMat
();
int
idxArg
=
0
;
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
int
idxArg
=
kernel
.
set
(
0
,
ocl
::
KernelArg
::
PtrReadOnly
(
src
));
idxArg
=
kernel
.
set
(
idxArg
,
(
int
)
src
.
step
);
int
srcOffsetX
=
(
int
)((
src
.
offset
%
src
.
step
)
/
src
.
elemSize
());
int
srcOffsetY
=
(
int
)(
src
.
offset
/
src
.
step
);
int
srcEndX
=
(
isIsolatedBorder
?
(
srcOffsetX
+
sz
.
width
)
:
wholeSize
.
width
)
;
int
srcEndY
=
(
isIsolatedBorder
?
(
srcOffsetY
+
sz
.
height
)
:
wholeSize
.
height
)
;
int
srcEndX
=
isolated
?
srcOffsetX
+
size
.
width
:
wholeSize
.
width
;
int
srcEndY
=
isolated
?
srcOffsetY
+
size
.
height
:
wholeSize
.
height
;
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcOffsetY
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndX
);
idxArg
=
kernel
.
set
(
idxArg
,
srcEndY
);
idxArg
=
kernel
.
set
(
idxArg
,
ocl
::
KernelArg
::
WriteOnly
(
dst
));
float
borderValue
[
4
]
=
{
0
,
0
,
0
,
0
};
double
borderValueDouble
[
4
]
=
{
0
,
0
,
0
,
0
};
if
((
borderType
&
~
BORDER_ISOLATED
)
==
BORDER_CONSTANT
)
{
int
cnocl
=
(
3
==
cn
)
?
4
:
cn
;
if
(
useDouble
)
idxArg
=
kernel
.
set
(
idxArg
,
(
void
*
)
&
borderValueDouble
[
0
],
sizeof
(
double
)
*
cnocl
);
else
idxArg
=
kernel
.
set
(
idxArg
,
(
void
*
)
&
borderValue
[
0
],
sizeof
(
float
)
*
cnocl
);
}
if
(
useDouble
)
idxArg
=
kernel
.
set
(
idxArg
,
(
double
)
alpha
);
else
if
(
normalize
)
idxArg
=
kernel
.
set
(
idxArg
,
(
float
)
alpha
);
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
tru
e
);
return
kernel
.
run
(
2
,
globalsize
,
localsize
,
fals
e
);
}
#endif
}
...
...
@@ -862,9 +821,7 @@ void cv::boxFilter( InputArray _src, OutputArray _dst, int ddepth,
Size
ksize
,
Point
anchor
,
bool
normalize
,
int
borderType
)
{
bool
use_opencl
=
ocl
::
useOpenCL
()
&&
_dst
.
isUMat
()
&&
normalize
;
if
(
use_opencl
&&
ocl_boxFilter
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
)
)
return
;
CV_OCL_RUN
(
_dst
.
isUMat
(),
ocl_boxFilter
(
_src
,
_dst
,
ddepth
,
ksize
,
anchor
,
borderType
,
normalize
))
Mat
src
=
_src
.
getMat
();
int
sdepth
=
src
.
depth
(),
cn
=
src
.
channels
();
...
...
modules/imgproc/test/ocl/test_boxfilter.cpp
浏览文件 @
68f5dd41
...
...
@@ -56,32 +56,34 @@ enum
/////////////////////////////////////////////////////////////////////////////////////////////////
// boxFilter
PARAM_TEST_CASE
(
BoxFilter
,
MatDepth
,
Channels
,
BorderType
,
bool
)
PARAM_TEST_CASE
(
BoxFilter
,
MatDepth
,
Channels
,
BorderType
,
bool
,
bool
)
{
static
const
int
kernelMinSize
=
2
;
static
const
int
kernelMaxSize
=
10
;
int
type
;
int
depth
,
cn
;
Size
ksize
;
Size
dsize
;
Point
anchor
;
int
borderType
;
bool
useRoi
;
bool
normalize
,
useRoi
;
TEST_DECLARE_INPUT_PARAMETER
(
src
)
TEST_DECLARE_OUTPUT_PARAMETER
(
dst
)
virtual
void
SetUp
()
{
type
=
CV_MAKE_TYPE
(
GET_PARAM
(
0
),
GET_PARAM
(
1
));
depth
=
GET_PARAM
(
0
);
cn
=
GET_PARAM
(
1
);
borderType
=
GET_PARAM
(
2
);
// only not isolated border tested, because CPU module doesn't support isolated border case.
useRoi
=
GET_PARAM
(
3
);
normalize
=
GET_PARAM
(
3
);
useRoi
=
GET_PARAM
(
4
);
}
void
random_roi
()
{
int
type
=
CV_MAKE_TYPE
(
depth
,
cn
);
dsize
=
randomSize
(
1
,
MAX_VALUE
);
ksize
=
randomSize
(
kernelMinSize
,
kernelMaxSize
);
Size
roiSize
=
randomSize
(
ksize
.
width
,
MAX_VALUE
,
ksize
.
height
,
MAX_VALUE
);
...
...
@@ -100,8 +102,7 @@ PARAM_TEST_CASE(BoxFilter, MatDepth, Channels, BorderType, bool)
void
Near
(
double
threshold
=
0.0
)
{
EXPECT_MAT_NEAR
(
dst
,
udst
,
threshold
);
EXPECT_MAT_NEAR
(
dst_roi
,
udst_roi
,
threshold
);
OCL_EXPECT_MATS_NEAR
(
dst
,
threshold
)
}
};
...
...
@@ -111,10 +112,10 @@ OCL_TEST_P(BoxFilter, Mat)
{
random_roi
();
OCL_OFF
(
cv
::
boxFilter
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
anchor
,
tru
e
,
borderType
));
OCL_ON
(
cv
::
boxFilter
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
anchor
,
tru
e
,
borderType
));
OCL_OFF
(
cv
::
boxFilter
(
src_roi
,
dst_roi
,
-
1
,
ksize
,
anchor
,
normaliz
e
,
borderType
));
OCL_ON
(
cv
::
boxFilter
(
usrc_roi
,
udst_roi
,
-
1
,
ksize
,
anchor
,
normaliz
e
,
borderType
));
Near
(
1.0
);
Near
(
depth
<=
CV_32S
?
1
:
1e-3
);
}
}
...
...
@@ -127,6 +128,7 @@ OCL_INSTANTIATE_TEST_CASE_P(ImageProc, BoxFilter,
(
BorderType
)
BORDER_REPLICATE
,
(
BorderType
)
BORDER_REFLECT
,
(
BorderType
)
BORDER_REFLECT_101
),
Bool
(),
Bool
()
// ROI
)
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录