Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
OpenCV
opencv
提交
7b0f018a
O
opencv
项目概览
OpenCV
/
opencv
上一次同步 10 个月
通知
995
Star
71100
Fork
55580
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
7b0f018a
编写于
10月 30, 2013
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ocl: adjust worksize for filter2D and boxFilter
上级
8a4f1bbb
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
240 addition
and
184 deletion
+240
-184
modules/ocl/include/opencv2/ocl/private/util.hpp
modules/ocl/include/opencv2/ocl/private/util.hpp
+4
-0
modules/ocl/src/cl_operations.cpp
modules/ocl/src/cl_operations.cpp
+16
-4
modules/ocl/src/filtering.cpp
modules/ocl/src/filtering.cpp
+220
-180
未找到文件。
modules/ocl/include/opencv2/ocl/private/util.hpp
浏览文件 @
7b0f018a
...
...
@@ -103,7 +103,11 @@ CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
);
CV_EXPORTS
cl_kernel
openCLGetKernelFromSource
(
const
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
const
char
*
build_options
);
CV_EXPORTS
cl_kernel
openCLGetKernelFromSource
(
Context
*
ctx
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
int
channels
,
int
depth
,
const
char
*
build_options
);
CV_EXPORTS
void
openCLVerifyKernel
(
const
Context
*
clCxt
,
cl_kernel
kernel
,
size_t
*
localThreads
);
CV_EXPORTS
void
openCLExecuteKernel
(
Context
*
ctx
,
cl_kernel
kernel
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
);
CV_EXPORTS
void
openCLExecuteKernel
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
&
args
,
int
globalcols
,
int
globalrows
,
size_t
blockSize
=
16
,
int
kernel_expand_depth
=
-
1
,
int
kernel_expand_channel
=
-
1
);
CV_EXPORTS
void
openCLExecuteKernel_
(
Context
*
clCxt
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
std
::
string
kernelName
,
...
...
modules/ocl/src/cl_operations.cpp
浏览文件 @
7b0f018a
...
...
@@ -336,8 +336,7 @@ static std::string removeDuplicatedWhiteSpaces(const char * buildOptions)
return
opt
;
}
void
openCLExecuteKernel_
(
Context
*
ctx
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
cl_kernel
openCLGetKernelFromSource
(
Context
*
ctx
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
int
channels
,
int
depth
,
const
char
*
build_options
)
{
//construct kernel name
...
...
@@ -350,10 +349,14 @@ void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, str
idxStr
<<
"_D"
<<
depth
;
kernelName
+=
idxStr
.
str
();
cl_kernel
kernel
;
std
::
string
fixedOptions
=
removeDuplicatedWhiteSpaces
(
build_options
);
kernel
=
openCLGetKernelFromSource
(
ctx
,
source
,
kernelName
,
fixedOptions
.
c_str
());
cl_kernel
kernel
=
openCLGetKernelFromSource
(
ctx
,
source
,
kernelName
,
fixedOptions
.
c_str
());
return
kernel
;
}
void
openCLExecuteKernel
(
Context
*
ctx
,
cl_kernel
kernel
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
)
{
if
(
localThreads
!=
NULL
)
{
globalThreads
[
0
]
=
roundUp
(
globalThreads
[
0
],
localThreads
[
0
]);
...
...
@@ -399,6 +402,15 @@ void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, str
openCLSafeCall
(
clReleaseKernel
(
kernel
));
}
void
openCLExecuteKernel_
(
Context
*
ctx
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
,
const
char
*
build_options
)
{
cl_kernel
kernel
=
openCLGetKernelFromSource
(
ctx
,
source
,
kernelName
,
channels
,
depth
,
build_options
);
openCLExecuteKernel
(
ctx
,
kernel
,
globalThreads
,
localThreads
,
args
);
}
void
openCLExecuteKernel
(
Context
*
ctx
,
const
cv
::
ocl
::
ProgramEntry
*
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
)
...
...
modules/ocl/src/filtering.cpp
浏览文件 @
7b0f018a
...
...
@@ -578,104 +578,124 @@ static void GPUFilter2D(const oclMat &src, oclMat &dst, const Mat &kernel,
kernelDataFloat
.
size
()
*
sizeof
(
float
),
1
,
clMemcpyHostToDevice
);
}
size_t
BLOCK_SIZE
=
src
.
clCxt
->
getDeviceInfo
().
maxWorkItemSizes
[
0
];
size_t
tryWorkItems
=
src
.
clCxt
->
getDeviceInfo
().
maxWorkItemSizes
[
0
];
do
{
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
src
.
cols
*
2
)
BLOCK_SIZE
/=
2
;
#if 1 // TODO Mode with several blocks requires a much more VGPRs, so this optimization is not actual for the current devices
size_t
BLOCK_SIZE_Y
=
1
;
size_t
BLOCK_SIZE_Y
=
1
;
#else
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
*
32
<
(
size_t
)
src
.
rows
)
BLOCK_SIZE_Y
*=
2
;
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
*
32
<
(
size_t
)
src
.
rows
)
BLOCK_SIZE_Y
*=
2
;
#endif
CV_Assert
((
size_t
)
ksize
.
width
<=
BLOCK_SIZE
);
CV_Assert
((
size_t
)
ksize
.
width
<=
BLOCK_SIZE
);
bool
isIsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
bool
isIsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
cl_uint
stepBytes
=
src
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_uint
),
(
void
*
)
&
stepBytes
));
int
offsetXBytes
=
src
.
offset
%
src
.
step
;
int
offsetX
=
offsetXBytes
/
src
.
elemSize
();
CV_Assert
((
int
)(
offsetX
*
src
.
elemSize
())
==
offsetXBytes
);
int
offsetY
=
src
.
offset
/
src
.
step
;
int
endX
=
(
offsetX
+
src
.
cols
);
int
endY
=
(
offsetY
+
src
.
rows
);
cl_int
rect
[
4
]
=
{
offsetX
,
offsetY
,
endX
,
endY
};
if
(
!
isIsolatedBorder
)
{
rect
[
2
]
=
src
.
wholecols
;
rect
[
3
]
=
src
.
wholerows
;
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
*
4
,
(
void
*
)
&
rect
[
0
]));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
cl_uint
_stepBytes
=
dst
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_uint
),
(
void
*
)
&
_stepBytes
));
int
_offsetXBytes
=
dst
.
offset
%
dst
.
step
;
int
_offsetX
=
_offsetXBytes
/
dst
.
elemSize
();
CV_Assert
((
int
)(
_offsetX
*
dst
.
elemSize
())
==
_offsetXBytes
);
int
_offsetY
=
dst
.
offset
/
dst
.
step
;
int
_endX
=
(
_offsetX
+
dst
.
cols
);
int
_endY
=
(
_offsetY
+
dst
.
rows
);
cl_int
_rect
[
4
]
=
{
_offsetX
,
_offsetY
,
_endX
,
_endY
};
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
*
4
,
(
void
*
)
&
_rect
[
0
]));
float
borderValue
[
4
]
=
{
0
,
0
,
0
,
0
};
// DON'T move into 'if' body
double
borderValueDouble
[
4
]
=
{
0
,
0
,
0
,
0
};
// DON'T move into 'if' body
if
((
borderType
&
~
BORDER_ISOLATED
)
==
BORDER_CONSTANT
)
{
if
(
useDouble
)
args
.
push_back
(
make_pair
(
sizeof
(
double
)
*
src
.
oclchannels
(),
(
void
*
)
&
borderValue
[
0
]));
else
args
.
push_back
(
make_pair
(
sizeof
(
float
)
*
src
.
oclchannels
(),
(
void
*
)
&
borderValueDouble
[
0
]));
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
cl_uint
stepBytes
=
src
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_uint
),
(
void
*
)
&
stepBytes
));
int
offsetXBytes
=
src
.
offset
%
src
.
step
;
int
offsetX
=
offsetXBytes
/
src
.
elemSize
();
CV_Assert
((
int
)(
offsetX
*
src
.
elemSize
())
==
offsetXBytes
);
int
offsetY
=
src
.
offset
/
src
.
step
;
int
endX
=
(
offsetX
+
src
.
cols
);
int
endY
=
(
offsetY
+
src
.
rows
);
cl_int
rect
[
4
]
=
{
offsetX
,
offsetY
,
endX
,
endY
};
if
(
!
isIsolatedBorder
)
{
rect
[
2
]
=
src
.
wholecols
;
rect
[
3
]
=
src
.
wholerows
;
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
*
4
,
(
void
*
)
&
rect
[
0
]));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
cl_uint
_stepBytes
=
dst
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_uint
),
(
void
*
)
&
_stepBytes
));
int
_offsetXBytes
=
dst
.
offset
%
dst
.
step
;
int
_offsetX
=
_offsetXBytes
/
dst
.
elemSize
();
CV_Assert
((
int
)(
_offsetX
*
dst
.
elemSize
())
==
_offsetXBytes
);
int
_offsetY
=
dst
.
offset
/
dst
.
step
;
int
_endX
=
(
_offsetX
+
dst
.
cols
);
int
_endY
=
(
_offsetY
+
dst
.
rows
);
cl_int
_rect
[
4
]
=
{
_offsetX
,
_offsetY
,
_endX
,
_endY
};
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
*
4
,
(
void
*
)
&
_rect
[
0
]));
float
borderValue
[
4
]
=
{
0
,
0
,
0
,
0
};
// DON'T move into 'if' body
double
borderValueDouble
[
4
]
=
{
0
,
0
,
0
,
0
};
// DON'T move into 'if' body
if
((
borderType
&
~
BORDER_ISOLATED
)
==
BORDER_CONSTANT
)
{
if
(
useDouble
)
args
.
push_back
(
make_pair
(
sizeof
(
double
)
*
src
.
oclchannels
(),
(
void
*
)
&
borderValue
[
0
]));
else
args
.
push_back
(
make_pair
(
sizeof
(
float
)
*
src
.
oclchannels
(),
(
void
*
)
&
borderValueDouble
[
0
]));
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
oclKernelParameter
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
oclKernelParameter
.
data
));
const
char
*
btype
=
NULL
;
const
char
*
btype
=
NULL
;
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
btype
=
"BORDER_CONSTANT"
;
break
;
case
BORDER_REPLICATE
:
btype
=
"BORDER_REPLICATE"
;
break
;
case
BORDER_REFLECT
:
btype
=
"BORDER_REFLECT"
;
break
;
case
BORDER_WRAP
:
CV_Error
(
CV_StsUnsupportedFormat
,
"BORDER_WRAP is not supported!"
);
return
;
case
BORDER_REFLECT101
:
btype
=
"BORDER_REFLECT_101"
;
break
;
}
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
btype
=
"BORDER_CONSTANT"
;
break
;
case
BORDER_REPLICATE
:
btype
=
"BORDER_REPLICATE"
;
break
;
case
BORDER_REFLECT
:
btype
=
"BORDER_REFLECT"
;
break
;
case
BORDER_WRAP
:
CV_Error
(
CV_StsUnsupportedFormat
,
"BORDER_WRAP is not supported!"
);
return
;
case
BORDER_REFLECT101
:
btype
=
"BORDER_REFLECT_101"
;
break
;
}
int
requiredTop
=
anchor
.
y
;
int
requiredLeft
=
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
h
=
isIsolatedBorder
?
src
.
rows
:
src
.
wholerows
;
int
w
=
isIsolatedBorder
?
src
.
cols
:
src
.
wholecols
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D KERNEL_SIZE_Y2_ALIGNED=%d "
"-D %s -D %s -D %s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
src
.
depth
(),
src
.
oclchannels
(),
useDouble
?
1
:
0
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
kernel_size_y2_aligned
,
btype
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
size_t
lt
[
3
]
=
{
BLOCK_SIZE
,
1
,
1
};
size_t
gt
[
3
]
=
{
divUp
(
dst
.
cols
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
,
divUp
(
dst
.
rows
,
BLOCK_SIZE_Y
),
1
};
cl_kernel
kernel
=
openCLGetKernelFromSource
(
src
.
clCxt
,
&
filtering_filter2D
,
"filter2D"
,
-
1
,
-
1
,
build_options
);
size_t
kernelWorkGroupSize
;
openCLSafeCall
(
clGetKernelWorkGroupInfo
(
kernel
,
getClDeviceID
(
src
.
clCxt
),
CL_KERNEL_WORK_GROUP_SIZE
,
sizeof
(
size_t
),
&
kernelWorkGroupSize
,
0
));
if
(
lt
[
0
]
>
kernelWorkGroupSize
)
{
clReleaseKernel
(
kernel
);
CV_Assert
(
BLOCK_SIZE
>
kernelWorkGroupSize
);
tryWorkItems
=
kernelWorkGroupSize
;
continue
;
}
int
requiredTop
=
anchor
.
y
;
int
requiredLeft
=
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
h
=
isIsolatedBorder
?
src
.
rows
:
src
.
wholerows
;
int
w
=
isIsolatedBorder
?
src
.
cols
:
src
.
wholecols
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d "
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D KERNEL_SIZE_Y2_ALIGNED=%d "
"-D %s -D %s -D %s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
src
.
depth
(),
src
.
oclchannels
(),
useDouble
?
1
:
0
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
kernel_size_y2_aligned
,
btype
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
size_t
gt
[
3
]
=
{
divUp
(
dst
.
cols
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
,
divUp
(
dst
.
rows
,
BLOCK_SIZE_Y
),
1
},
lt
[
3
]
=
{
BLOCK_SIZE
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
filtering_filter2D
,
"filter2D"
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
openCLExecuteKernel
(
src
.
clCxt
,
kernel
,
gt
,
lt
,
args
);
// kernel will be released here
}
while
(
false
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
ocl
::
getLinearFilter_GPU
(
int
/*srcType*/
,
int
/*dstType*/
,
const
Mat
&
kernel
,
const
Size
&
ksize
,
...
...
@@ -770,106 +790,126 @@ static void GPUFilterBox(const oclMat &src, oclMat &dst,
(
src
.
rows
==
dst
.
rows
));
CV_Assert
(
src
.
oclchannels
()
==
dst
.
oclchannels
());
size_t
BLOCK_SIZE
=
src
.
clCxt
->
getDeviceInfo
().
maxWorkItemSizes
[
0
];
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
*
32
<
(
size_t
)
src
.
rows
)
BLOCK_SIZE_Y
*=
2
;
CV_Assert
((
size_t
)
ksize
.
width
<=
BLOCK_SIZE
);
bool
isIsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
cl_uint
stepBytes
=
src
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_uint
),
(
void
*
)
&
stepBytes
));
int
offsetXBytes
=
src
.
offset
%
src
.
step
;
int
offsetX
=
offsetXBytes
/
src
.
elemSize
();
CV_Assert
((
int
)(
offsetX
*
src
.
elemSize
())
==
offsetXBytes
);
int
offsetY
=
src
.
offset
/
src
.
step
;
int
endX
=
(
offsetX
+
src
.
cols
);
int
endY
=
(
offsetY
+
src
.
rows
);
cl_int
rect
[
4
]
=
{
offsetX
,
offsetY
,
endX
,
endY
};
if
(
!
isIsolatedBorder
)
{
rect
[
2
]
=
src
.
wholecols
;
rect
[
3
]
=
src
.
wholerows
;
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
*
4
,
(
void
*
)
&
rect
[
0
]));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
cl_uint
_stepBytes
=
dst
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_uint
),
(
void
*
)
&
_stepBytes
));
int
_offsetXBytes
=
dst
.
offset
%
dst
.
step
;
int
_offsetX
=
_offsetXBytes
/
dst
.
elemSize
();
CV_Assert
((
int
)(
_offsetX
*
dst
.
elemSize
())
==
_offsetXBytes
);
int
_offsetY
=
dst
.
offset
/
dst
.
step
;
int
_endX
=
(
_offsetX
+
dst
.
cols
);
int
_endY
=
(
_offsetY
+
dst
.
rows
);
cl_int
_rect
[
4
]
=
{
_offsetX
,
_offsetY
,
_endX
,
_endY
};
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
*
4
,
(
void
*
)
&
_rect
[
0
]));
bool
useDouble
=
src
.
depth
()
==
CV_64F
;
size_t
tryWorkItems
=
src
.
clCxt
->
getDeviceInfo
().
maxWorkItemSizes
[
0
];
do
{
size_t
BLOCK_SIZE
=
tryWorkItems
;
while
(
BLOCK_SIZE
>
32
&&
BLOCK_SIZE
>=
(
size_t
)
ksize
.
width
*
2
&&
BLOCK_SIZE
>
(
size_t
)
src
.
cols
*
2
)
BLOCK_SIZE
/=
2
;
size_t
BLOCK_SIZE_Y
=
8
;
// TODO Check heuristic value on devices
while
(
BLOCK_SIZE_Y
<
BLOCK_SIZE
/
8
&&
BLOCK_SIZE_Y
*
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
*
32
<
(
size_t
)
src
.
rows
)
BLOCK_SIZE_Y
*=
2
;
CV_Assert
((
size_t
)
ksize
.
width
<=
BLOCK_SIZE
);
bool
isIsolatedBorder
=
(
borderType
&
BORDER_ISOLATED
)
!=
0
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
cl_uint
stepBytes
=
src
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_uint
),
(
void
*
)
&
stepBytes
));
int
offsetXBytes
=
src
.
offset
%
src
.
step
;
int
offsetX
=
offsetXBytes
/
src
.
elemSize
();
CV_Assert
((
int
)(
offsetX
*
src
.
elemSize
())
==
offsetXBytes
);
int
offsetY
=
src
.
offset
/
src
.
step
;
int
endX
=
(
offsetX
+
src
.
cols
);
int
endY
=
(
offsetY
+
src
.
rows
);
cl_int
rect
[
4
]
=
{
offsetX
,
offsetY
,
endX
,
endY
};
if
(
!
isIsolatedBorder
)
{
rect
[
2
]
=
src
.
wholecols
;
rect
[
3
]
=
src
.
wholerows
;
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
*
4
,
(
void
*
)
&
rect
[
0
]));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
cl_uint
_stepBytes
=
dst
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_uint
),
(
void
*
)
&
_stepBytes
));
int
_offsetXBytes
=
dst
.
offset
%
dst
.
step
;
int
_offsetX
=
_offsetXBytes
/
dst
.
elemSize
();
CV_Assert
((
int
)(
_offsetX
*
dst
.
elemSize
())
==
_offsetXBytes
);
int
_offsetY
=
dst
.
offset
/
dst
.
step
;
int
_endX
=
(
_offsetX
+
dst
.
cols
);
int
_endY
=
(
_offsetY
+
dst
.
rows
);
cl_int
_rect
[
4
]
=
{
_offsetX
,
_offsetY
,
_endX
,
_endY
};
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
*
4
,
(
void
*
)
&
_rect
[
0
]));
bool
useDouble
=
src
.
depth
()
==
CV_64F
;
float
borderValue
[
4
]
=
{
0
,
0
,
0
,
0
};
// DON'T move into 'if' body
double
borderValueDouble
[
4
]
=
{
0
,
0
,
0
,
0
};
// DON'T move into 'if' body
if
((
borderType
&
~
BORDER_ISOLATED
)
==
BORDER_CONSTANT
)
{
if
(
useDouble
)
args
.
push_back
(
make_pair
(
sizeof
(
double
)
*
src
.
oclchannels
(),
(
void
*
)
&
borderValue
[
0
]));
else
args
.
push_back
(
make_pair
(
sizeof
(
float
)
*
src
.
oclchannels
(),
(
void
*
)
&
borderValueDouble
[
0
]));
}
float
borderValue
[
4
]
=
{
0
,
0
,
0
,
0
};
// DON'T move into 'if' body
double
borderValueDouble
[
4
]
=
{
0
,
0
,
0
,
0
};
// DON'T move into 'if' body
if
((
borderType
&
~
BORDER_ISOLATED
)
==
BORDER_CONSTANT
)
{
double
alphaDouble
=
alpha
;
// DON'T move into 'if' body
if
(
useDouble
)
args
.
push_back
(
make_pair
(
sizeof
(
double
)
*
src
.
oclchannels
(),
(
void
*
)
&
borderValue
[
0
]
));
args
.
push_back
(
make_pair
(
sizeof
(
double
)
,
(
void
*
)
&
alphaDouble
));
else
args
.
push_back
(
make_pair
(
sizeof
(
float
)
*
src
.
oclchannels
(),
(
void
*
)
&
borderValueDouble
[
0
]));
}
args
.
push_back
(
make_pair
(
sizeof
(
float
),
(
void
*
)
&
alpha
));
double
alphaDouble
=
alpha
;
// DON'T move into 'if' body
if
(
useDouble
)
args
.
push_back
(
make_pair
(
sizeof
(
double
),
(
void
*
)
&
alphaDouble
));
else
args
.
push_back
(
make_pair
(
sizeof
(
float
),
(
void
*
)
&
alpha
));
const
char
*
btype
=
NULL
;
const
char
*
btype
=
NULL
;
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
btype
=
"BORDER_CONSTANT"
;
break
;
case
BORDER_REPLICATE
:
btype
=
"BORDER_REPLICATE"
;
break
;
case
BORDER_REFLECT
:
btype
=
"BORDER_REFLECT"
;
break
;
case
BORDER_WRAP
:
CV_Error
(
CV_StsUnsupportedFormat
,
"BORDER_WRAP is not supported!"
);
return
;
case
BORDER_REFLECT101
:
btype
=
"BORDER_REFLECT_101"
;
break
;
}
switch
(
borderType
&
~
BORDER_ISOLATED
)
{
case
BORDER_CONSTANT
:
btype
=
"BORDER_CONSTANT"
;
break
;
case
BORDER_REPLICATE
:
btype
=
"BORDER_REPLICATE"
;
break
;
case
BORDER_REFLECT
:
btype
=
"BORDER_REFLECT"
;
break
;
case
BORDER_WRAP
:
CV_Error
(
CV_StsUnsupportedFormat
,
"BORDER_WRAP is not supported!"
);
return
;
case
BORDER_REFLECT101
:
btype
=
"BORDER_REFLECT_101"
;
break
;
}
int
requiredTop
=
anchor
.
y
;
int
requiredLeft
=
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
h
=
isIsolatedBorder
?
src
.
rows
:
src
.
wholerows
;
int
w
=
isIsolatedBorder
?
src
.
cols
:
src
.
wholecols
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
CV_Assert
(
w
>=
ksize
.
width
&&
h
>=
ksize
.
height
);
// TODO Other cases are not tested well
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s -D %s -D %s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
src
.
depth
(),
src
.
oclchannels
(),
useDouble
?
1
:
0
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
btype
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
size_t
lt
[
3
]
=
{
BLOCK_SIZE
,
1
,
1
};
size_t
gt
[
3
]
=
{
divUp
(
dst
.
cols
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
,
divUp
(
dst
.
rows
,
BLOCK_SIZE_Y
),
1
};
cl_kernel
kernel
=
openCLGetKernelFromSource
(
src
.
clCxt
,
&
filtering_boxFilter
,
"boxFilter"
,
-
1
,
-
1
,
build_options
);
size_t
kernelWorkGroupSize
;
openCLSafeCall
(
clGetKernelWorkGroupInfo
(
kernel
,
getClDeviceID
(
src
.
clCxt
),
CL_KERNEL_WORK_GROUP_SIZE
,
sizeof
(
size_t
),
&
kernelWorkGroupSize
,
0
));
if
(
lt
[
0
]
>
kernelWorkGroupSize
)
{
clReleaseKernel
(
kernel
);
CV_Assert
(
BLOCK_SIZE
>
kernelWorkGroupSize
);
tryWorkItems
=
kernelWorkGroupSize
;
continue
;
}
int
requiredTop
=
anchor
.
y
;
int
requiredLeft
=
BLOCK_SIZE
;
// not this: anchor.x;
int
requiredBottom
=
ksize
.
height
-
1
-
anchor
.
y
;
int
requiredRight
=
BLOCK_SIZE
;
// not this: ksize.width - 1 - anchor.x;
int
h
=
isIsolatedBorder
?
src
.
rows
:
src
.
wholerows
;
int
w
=
isIsolatedBorder
?
src
.
cols
:
src
.
wholecols
;
bool
extra_extrapolation
=
h
<
requiredTop
||
h
<
requiredBottom
||
w
<
requiredLeft
||
w
<
requiredRight
;
CV_Assert
(
w
>=
ksize
.
width
&&
h
>=
ksize
.
height
);
// TODO Other cases are not tested well
char
build_options
[
1024
];
sprintf
(
build_options
,
"-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D DATA_DEPTH=%d -D DATA_CHAN=%d -D USE_DOUBLE=%d -D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d -D %s -D %s -D %s"
,
(
int
)
BLOCK_SIZE
,
(
int
)
BLOCK_SIZE_Y
,
src
.
depth
(),
src
.
oclchannels
(),
useDouble
?
1
:
0
,
anchor
.
x
,
anchor
.
y
,
ksize
.
width
,
ksize
.
height
,
btype
,
extra_extrapolation
?
"EXTRA_EXTRAPOLATION"
:
"NO_EXTRA_EXTRAPOLATION"
,
isIsolatedBorder
?
"BORDER_ISOLATED"
:
"NO_BORDER_ISOLATED"
);
size_t
gt
[
3
]
=
{
divUp
(
dst
.
cols
,
BLOCK_SIZE
-
(
ksize
.
width
-
1
))
*
BLOCK_SIZE
,
divUp
(
dst
.
rows
,
BLOCK_SIZE_Y
),
1
},
lt
[
3
]
=
{
BLOCK_SIZE
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
filtering_boxFilter
,
"boxFilter"
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
openCLExecuteKernel
(
src
.
clCxt
,
kernel
,
gt
,
lt
,
args
);
// kernel will be released here
}
while
(
false
);
}
Ptr
<
BaseFilter_GPU
>
cv
::
ocl
::
getBoxFilter_GPU
(
int
/*srcType*/
,
int
/*dstType*/
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录