Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
e6e817e6
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e6e817e6
编写于
3月 28, 2014
作者:
A
Andrey Pavlenko
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Revert "Merge pull request #1779 from perping:integral_2.4"
This reverts commit
54ea5bba
, reversing changes made to
28e0d3d7
.
上级
a1d15192
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
110 addition
and
201 deletion
+110
-201
modules/ocl/doc/image_processing.rst
modules/ocl/doc/image_processing.rst
+4
-4
modules/ocl/include/opencv2/ocl/ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+4
-4
modules/ocl/perf/perf_match_template.cpp
modules/ocl/perf/perf_match_template.cpp
+2
-2
modules/ocl/src/haar.cpp
modules/ocl/src/haar.cpp
+4
-43
modules/ocl/src/imgproc.cpp
modules/ocl/src/imgproc.cpp
+15
-34
modules/ocl/src/match_template.cpp
modules/ocl/src/match_template.cpp
+6
-18
modules/ocl/src/opencl/imgproc_integral.cl
modules/ocl/src/opencl/imgproc_integral.cl
+67
-76
modules/ocl/test/test_imgproc.cpp
modules/ocl/test/test_imgproc.cpp
+8
-20
未找到文件。
modules/ocl/doc/image_processing.rst
浏览文件 @
e6e817e6
...
@@ -65,15 +65,15 @@ ocl::integral
...
@@ -65,15 +65,15 @@ ocl::integral
-----------------
-----------------
Computes an integral image.
Computes an integral image.
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum
, int sdepth=-1
)
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum
, int sdepth=-1
)
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum)
:param src: Source image. Only ``CV_8UC1`` images are supported for now.
:param src: Source image. Only ``CV_8UC1`` images are supported for now.
:param sum: Integral image containing 32-bit unsigned integer
or 32-bit floating-point
.
:param sum: Integral image containing 32-bit unsigned integer
values packed into ``CV_32SC1``
.
:param sqsum: Sqsum values is ``CV_32FC1``
or ``CV_64FC1``
type.
:param sqsum: Sqsum values is ``CV_32FC1`` type.
.. seealso:: :ocv:func:`integral`
.. seealso:: :ocv:func:`integral`
...
...
modules/ocl/include/opencv2/ocl/ocl.hpp
浏览文件 @
e6e817e6
...
@@ -859,10 +859,10 @@ namespace cv
...
@@ -859,10 +859,10 @@ namespace cv
CV_EXPORTS
void
warpPerspective
(
const
oclMat
&
src
,
oclMat
&
dst
,
const
Mat
&
M
,
Size
dsize
,
int
flags
=
INTER_LINEAR
);
CV_EXPORTS
void
warpPerspective
(
const
oclMat
&
src
,
oclMat
&
dst
,
const
Mat
&
M
,
Size
dsize
,
int
flags
=
INTER_LINEAR
);
//! computes the integral image and integral for the squared image
//! computes the integral image and integral for the squared image
// sum will
support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
// sum will
have CV_32S type, sqsum - CV32F type
// supports only CV_8UC1 source type
// supports only CV_8UC1 source type
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
,
int
sdepth
=-
1
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
int
sdepth
=-
1
);
CV_EXPORTS
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
);
CV_EXPORTS
void
cornerHarris
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
blockSize
,
int
ksize
,
double
k
,
int
bordertype
=
cv
::
BORDER_DEFAULT
);
CV_EXPORTS
void
cornerHarris
(
const
oclMat
&
src
,
oclMat
&
dst
,
int
blockSize
,
int
ksize
,
double
k
,
int
bordertype
=
cv
::
BORDER_DEFAULT
);
CV_EXPORTS
void
cornerHarris_dxdy
(
const
oclMat
&
src
,
oclMat
&
dst
,
oclMat
&
Dx
,
oclMat
&
Dy
,
CV_EXPORTS
void
cornerHarris_dxdy
(
const
oclMat
&
src
,
oclMat
&
dst
,
oclMat
&
Dx
,
oclMat
&
Dy
,
int
blockSize
,
int
ksize
,
double
k
,
int
bordertype
=
cv
::
BORDER_DEFAULT
);
int
blockSize
,
int
ksize
,
double
k
,
int
bordertype
=
cv
::
BORDER_DEFAULT
);
...
@@ -936,7 +936,7 @@ namespace cv
...
@@ -936,7 +936,7 @@ namespace cv
Size
m_maxSize
;
Size
m_maxSize
;
vector
<
CvSize
>
sizev
;
vector
<
CvSize
>
sizev
;
vector
<
float
>
scalev
;
vector
<
float
>
scalev
;
oclMat
gimg1
,
gsum
,
gsqsum
,
gsqsum_t
;
oclMat
gimg1
,
gsum
,
gsqsum
;
void
*
buffers
;
void
*
buffers
;
};
};
...
...
modules/ocl/perf/perf_match_template.cpp
浏览文件 @
e6e817e6
...
@@ -109,13 +109,13 @@ OCL_PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, matchTemplate,
...
@@ -109,13 +109,13 @@ OCL_PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, matchTemplate,
oclDst
.
download
(
dst
);
oclDst
.
download
(
dst
);
SANITY_CHECK
(
dst
,
3
e-2
);
SANITY_CHECK
(
dst
,
2
e-2
);
}
}
else
if
(
RUN_PLAIN_IMPL
)
else
if
(
RUN_PLAIN_IMPL
)
{
{
TEST_CYCLE
()
cv
::
matchTemplate
(
src
,
templ
,
dst
,
CV_TM_CCORR_NORMED
);
TEST_CYCLE
()
cv
::
matchTemplate
(
src
,
templ
,
dst
,
CV_TM_CCORR_NORMED
);
SANITY_CHECK
(
dst
,
3
e-2
);
SANITY_CHECK
(
dst
,
2
e-2
);
}
}
else
else
OCL_PERF_ELSE
OCL_PERF_ELSE
...
...
modules/ocl/src/haar.cpp
浏览文件 @
e6e817e6
...
@@ -747,15 +747,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -747,15 +747,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
oclMat
gsum
(
totalheight
+
4
,
gimg
.
cols
+
1
,
CV_32SC1
);
oclMat
gsum
(
totalheight
+
4
,
gimg
.
cols
+
1
,
CV_32SC1
);
oclMat
gsqsum
(
totalheight
+
4
,
gimg
.
cols
+
1
,
CV_32FC1
);
oclMat
gsqsum
(
totalheight
+
4
,
gimg
.
cols
+
1
,
CV_32FC1
);
int
sdepth
=
0
;
if
(
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
sdepth
=
CV_64FC1
;
else
sdepth
=
CV_32FC1
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
oclMat
gsqsum_t
(
totalheight
+
4
,
gimg
.
cols
+
1
,
type
);
cl_mem
stagebuffer
;
cl_mem
stagebuffer
;
cl_mem
nodebuffer
;
cl_mem
nodebuffer
;
cl_mem
candidatebuffer
;
cl_mem
candidatebuffer
;
...
@@ -763,7 +754,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -763,7 +754,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
cv
::
Rect
roi
,
roi2
;
cv
::
Rect
roi
,
roi2
;
cv
::
Mat
imgroi
,
imgroisq
;
cv
::
Mat
imgroi
,
imgroisq
;
cv
::
ocl
::
oclMat
resizeroi
,
gimgroi
,
gimgroisq
;
cv
::
ocl
::
oclMat
resizeroi
,
gimgroi
,
gimgroisq
;
int
grp_per_CU
=
12
;
int
grp_per_CU
=
12
;
size_t
blocksize
=
8
;
size_t
blocksize
=
8
;
...
@@ -783,7 +773,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -783,7 +773,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
roi2
=
Rect
(
0
,
0
,
sz
.
width
-
1
,
sz
.
height
-
1
);
roi2
=
Rect
(
0
,
0
,
sz
.
width
-
1
,
sz
.
height
-
1
);
resizeroi
=
gimg1
(
roi2
);
resizeroi
=
gimg1
(
roi2
);
gimgroi
=
gsum
(
roi
);
gimgroi
=
gsum
(
roi
);
gimgroisq
=
gsqsum
_t
(
roi
);
gimgroisq
=
gsqsum
(
roi
);
int
width
=
gimgroi
.
cols
-
1
-
cascade
->
orig_window_size
.
width
;
int
width
=
gimgroi
.
cols
-
1
-
cascade
->
orig_window_size
.
width
;
int
height
=
gimgroi
.
rows
-
1
-
cascade
->
orig_window_size
.
height
;
int
height
=
gimgroi
.
rows
-
1
-
cascade
->
orig_window_size
.
height
;
scaleinfo
[
i
].
width_height
=
(
width
<<
16
)
|
height
;
scaleinfo
[
i
].
width_height
=
(
width
<<
16
)
|
height
;
...
@@ -797,13 +787,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -797,13 +787,8 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
scaleinfo
[
i
].
factor
=
factor
;
scaleinfo
[
i
].
factor
=
factor
;
cv
::
ocl
::
resize
(
gimg
,
resizeroi
,
Size
(
sz
.
width
-
1
,
sz
.
height
-
1
),
0
,
0
,
INTER_LINEAR
);
cv
::
ocl
::
resize
(
gimg
,
resizeroi
,
Size
(
sz
.
width
-
1
,
sz
.
height
-
1
),
0
,
0
,
INTER_LINEAR
);
cv
::
ocl
::
integral
(
resizeroi
,
gimgroi
,
gimgroisq
);
cv
::
ocl
::
integral
(
resizeroi
,
gimgroi
,
gimgroisq
);
indexy
+=
sz
.
height
;
indexy
+=
sz
.
height
;
}
}
if
(
gsqsum_t
.
depth
()
==
CV_64F
)
gsqsum_t
.
convertTo
(
gsqsum
,
CV_32FC1
);
else
gsqsum
=
gsqsum_t
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
...
@@ -1040,12 +1025,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
...
@@ -1040,12 +1025,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
int
n_factors
=
0
;
int
n_factors
=
0
;
oclMat
gsum
;
oclMat
gsum
;
oclMat
gsqsum
;
oclMat
gsqsum
;
oclMat
gsqsum_t
;
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum
);
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum_t
);
if
(
gsqsum_t
.
depth
()
==
CV_64F
)
gsqsum_t
.
convertTo
(
gsqsum
,
CV_32FC1
);
else
gsqsum
=
gsqsum_t
;
CvSize
sz
;
CvSize
sz
;
vector
<
CvSize
>
sizev
;
vector
<
CvSize
>
sizev
;
vector
<
float
>
scalev
;
vector
<
float
>
scalev
;
...
@@ -1320,16 +1300,12 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1320,16 +1300,12 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
roi2
=
Rect
(
0
,
0
,
sz
.
width
-
1
,
sz
.
height
-
1
);
roi2
=
Rect
(
0
,
0
,
sz
.
width
-
1
,
sz
.
height
-
1
);
resizeroi
=
gimg1
(
roi2
);
resizeroi
=
gimg1
(
roi2
);
gimgroi
=
gsum
(
roi
);
gimgroi
=
gsum
(
roi
);
gimgroisq
=
gsqsum
_t
(
roi
);
gimgroisq
=
gsqsum
(
roi
);
cv
::
ocl
::
resize
(
gimg
,
resizeroi
,
Size
(
sz
.
width
-
1
,
sz
.
height
-
1
),
0
,
0
,
INTER_LINEAR
);
cv
::
ocl
::
resize
(
gimg
,
resizeroi
,
Size
(
sz
.
width
-
1
,
sz
.
height
-
1
),
0
,
0
,
INTER_LINEAR
);
cv
::
ocl
::
integral
(
resizeroi
,
gimgroi
,
gimgroisq
);
cv
::
ocl
::
integral
(
resizeroi
,
gimgroi
,
gimgroisq
);
indexy
+=
sz
.
height
;
indexy
+=
sz
.
height
;
}
}
if
(
gsqsum_t
.
depth
()
==
CV_64F
)
gsqsum_t
.
convertTo
(
gsqsum
,
CV_32FC1
);
else
gsqsum
=
gsqsum_t
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)(
cascade
->
hid_cascade
);
gcascade
=
(
GpuHidHaarClassifierCascade
*
)(
cascade
->
hid_cascade
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
stage
=
(
GpuHidHaarStageClassifier
*
)(
gcascade
+
1
);
...
@@ -1391,11 +1367,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
...
@@ -1391,11 +1367,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
}
}
else
else
{
{
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum_t
);
cv
::
ocl
::
integral
(
gimg
,
gsum
,
gsqsum
);
if
(
gsqsum_t
.
depth
()
==
CV_64F
)
gsqsum_t
.
convertTo
(
gsqsum
,
CV_32FC1
);
else
gsqsum
=
gsqsum_t
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
gcascade
=
(
GpuHidHaarClassifierCascade
*
)
cascade
->
hid_cascade
;
...
@@ -1621,7 +1593,6 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
...
@@ -1621,7 +1593,6 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
gimg1
.
release
();
gimg1
.
release
();
gsum
.
release
();
gsum
.
release
();
gsqsum
.
release
();
gsqsum
.
release
();
gsqsum_t
.
release
();
}
}
else
if
(
!
(
m_flags
&
CV_HAAR_SCALE_IMAGE
)
&&
(
flags
&
CV_HAAR_SCALE_IMAGE
))
else
if
(
!
(
m_flags
&
CV_HAAR_SCALE_IMAGE
)
&&
(
flags
&
CV_HAAR_SCALE_IMAGE
))
{
{
...
@@ -1696,16 +1667,6 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
...
@@ -1696,16 +1667,6 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
gsum
.
create
(
totalheight
+
4
,
cols
+
1
,
CV_32SC1
);
gsum
.
create
(
totalheight
+
4
,
cols
+
1
,
CV_32SC1
);
gsqsum
.
create
(
totalheight
+
4
,
cols
+
1
,
CV_32FC1
);
gsqsum
.
create
(
totalheight
+
4
,
cols
+
1
,
CV_32FC1
);
int
sdepth
=
0
;
if
(
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_DOUBLE
))
sdepth
=
CV_64FC1
;
else
sdepth
=
CV_32FC1
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
gsqsum_t
.
create
(
totalheight
+
4
,
cols
+
1
,
type
);
scaleinfo
=
(
detect_piramid_info
*
)
malloc
(
sizeof
(
detect_piramid_info
)
*
loopcount
);
scaleinfo
=
(
detect_piramid_info
*
)
malloc
(
sizeof
(
detect_piramid_info
)
*
loopcount
);
for
(
int
i
=
0
;
i
<
loopcount
;
i
++
)
for
(
int
i
=
0
;
i
<
loopcount
;
i
++
)
{
{
...
...
modules/ocl/src/imgproc.cpp
浏览文件 @
e6e817e6
...
@@ -898,7 +898,7 @@ namespace cv
...
@@ -898,7 +898,7 @@ namespace cv
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// integral
// integral
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
,
int
sdepth
)
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
oclMat
&
sqsum
)
{
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
if
(
!
src
.
clCxt
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
if
(
!
src
.
clCxt
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
...
@@ -907,11 +907,6 @@ namespace cv
...
@@ -907,11 +907,6 @@ namespace cv
return
;
return
;
}
}
if
(
sdepth
<=
0
)
sdepth
=
CV_32S
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
int
vlen
=
4
;
int
vlen
=
4
;
int
offset
=
src
.
offset
/
vlen
;
int
offset
=
src
.
offset
/
vlen
;
int
pre_invalid
=
src
.
offset
%
vlen
;
int
pre_invalid
=
src
.
offset
%
vlen
;
...
@@ -919,26 +914,17 @@ namespace cv
...
@@ -919,26 +914,17 @@ namespace cv
oclMat
t_sum
,
t_sqsum
;
oclMat
t_sum
,
t_sqsum
;
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
int
depth
=
src
.
depth
()
==
CV_8U
?
CV_32S
:
CV_64F
;
char
build_option
[
250
];
int
type
=
CV_MAKE_TYPE
(
depth
,
1
);
if
(
Context
::
getContext
()
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
))
{
t_sqsum
.
create
(
src
.
cols
,
src
.
rows
,
CV_64FC1
);
sqsum
.
create
(
h
,
w
,
CV_64FC1
);
sprintf
(
build_option
,
"-D TYPE=double -D TYPE4=double4 -D convert_TYPE4=convert_double4"
);
}
else
{
t_sqsum
.
create
(
src
.
cols
,
src
.
rows
,
CV_32FC1
);
sqsum
.
create
(
h
,
w
,
CV_32FC1
);
sprintf
(
build_option
,
"-D TYPE=float -D TYPE4=float4 -D convert_TYPE4=convert_float4"
);
}
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
sum
.
create
(
h
,
w
,
type
);
sum
.
create
(
h
,
w
,
type
);
int
sum_offset
=
sum
.
offset
/
sum
.
elemSize
();
t_sqsum
.
create
(
src
.
cols
,
src
.
rows
,
CV_32FC1
);
int
sqsum_offset
=
sqsum
.
offset
/
sqsum
.
elemSize
();
sqsum
.
create
(
h
,
w
,
CV_32FC1
);
int
sum_offset
=
sum
.
offset
/
vlen
;
int
sqsum_offset
=
sqsum
.
offset
/
vlen
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
...
@@ -950,9 +936,8 @@ namespace cv
...
@@ -950,9 +936,8 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sqsum
.
step
));
size_t
gt
[
3
]
=
{((
vcols
+
1
)
/
2
)
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
size_t
gt
[
3
]
=
{((
vcols
+
1
)
/
2
)
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_cols"
,
gt
,
lt
,
args
,
-
1
,
sdepth
,
build_option
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_cols"
,
gt
,
lt
,
args
,
-
1
,
depth
);
args
.
clear
();
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
t_sum
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
t_sum
.
data
));
...
@@ -962,16 +947,15 @@ namespace cv
...
@@ -962,16 +947,15 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sqsum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sqsum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sqsum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sqsum_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sqsum_offset
));
size_t
gt2
[
3
]
=
{
t_sum
.
cols
*
32
,
1
,
1
},
lt2
[
3
]
=
{
256
,
1
,
1
};
size_t
gt2
[
3
]
=
{
t_sum
.
cols
*
32
,
1
,
1
},
lt2
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_rows"
,
gt2
,
lt2
,
args
,
-
1
,
sdepth
,
build_option
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral
,
"integral_rows"
,
gt2
,
lt2
,
args
,
-
1
,
depth
);
}
}
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
,
int
sdepth
)
void
integral
(
const
oclMat
&
src
,
oclMat
&
sum
)
{
{
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
int
vlen
=
4
;
int
vlen
=
4
;
...
@@ -979,13 +963,10 @@ namespace cv
...
@@ -979,13 +963,10 @@ namespace cv
int
pre_invalid
=
src
.
offset
%
vlen
;
int
pre_invalid
=
src
.
offset
%
vlen
;
int
vcols
=
(
pre_invalid
+
src
.
cols
+
vlen
-
1
)
/
vlen
;
int
vcols
=
(
pre_invalid
+
src
.
cols
+
vlen
-
1
)
/
vlen
;
if
(
sdepth
<=
0
)
sdepth
=
CV_32S
;
sdepth
=
CV_MAT_DEPTH
(
sdepth
);
int
type
=
CV_MAKE_TYPE
(
sdepth
,
1
);
oclMat
t_sum
;
oclMat
t_sum
;
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
int
w
=
src
.
cols
+
1
,
h
=
src
.
rows
+
1
;
int
depth
=
src
.
depth
()
==
CV_8U
?
CV_32S
:
CV_32F
;
int
type
=
CV_MAKE_TYPE
(
depth
,
1
);
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
t_sum
.
create
(
src
.
cols
,
src
.
rows
,
type
);
sum
.
create
(
h
,
w
,
type
);
sum
.
create
(
h
,
w
,
type
);
...
@@ -1001,7 +982,7 @@ namespace cv
...
@@ -1001,7 +982,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
t_sum
.
step
));
size_t
gt
[
3
]
=
{((
vcols
+
1
)
/
2
)
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
size_t
gt
[
3
]
=
{((
vcols
+
1
)
/
2
)
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_cols"
,
gt
,
lt
,
args
,
-
1
,
s
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_cols"
,
gt
,
lt
,
args
,
-
1
,
depth
);
args
.
clear
();
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
t_sum
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
t_sum
.
data
));
...
@@ -1012,7 +993,7 @@ namespace cv
...
@@ -1012,7 +993,7 @@ namespace cv
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
sum_offset
));
size_t
gt2
[
3
]
=
{
t_sum
.
cols
*
32
,
1
,
1
},
lt2
[
3
]
=
{
256
,
1
,
1
};
size_t
gt2
[
3
]
=
{
t_sum
.
cols
*
32
,
1
,
1
},
lt2
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_rows"
,
gt2
,
lt2
,
args
,
-
1
,
s
depth
);
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_integral_sum
,
"integral_sum_rows"
,
gt2
,
lt2
,
args
,
-
1
,
depth
);
}
}
/////////////////////// corner //////////////////////////////
/////////////////////// corner //////////////////////////////
...
...
modules/ocl/src/match_template.cpp
浏览文件 @
e6e817e6
...
@@ -245,15 +245,12 @@ namespace cv
...
@@ -245,15 +245,12 @@ namespace cv
void
matchTemplate_CCORR_NORMED
(
void
matchTemplate_CCORR_NORMED
(
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
)
const
oclMat
&
image
,
const
oclMat
&
templ
,
oclMat
&
result
,
MatchTemplateBuf
&
buf
)
{
{
cv
::
ocl
::
oclMat
temp
;
matchTemplate_CCORR
(
image
,
templ
,
result
,
buf
);
matchTemplate_CCORR
(
image
,
templ
,
result
,
buf
);
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
integral
(
image
.
reshape
(
1
),
buf
.
image_sums
[
0
],
temp
);
if
(
temp
.
depth
()
==
CV_64F
)
integral
(
image
.
reshape
(
1
),
buf
.
image_sums
[
0
],
buf
.
image_sqsums
[
0
]);
temp
.
convertTo
(
buf
.
image_sqsums
[
0
],
CV_32FC1
);
else
buf
.
image_sqsums
[
0
]
=
temp
;
unsigned
long
long
templ_sqsum
=
(
unsigned
long
long
)
sqrSum
(
templ
.
reshape
(
1
))[
0
];
unsigned
long
long
templ_sqsum
=
(
unsigned
long
long
)
sqrSum
(
templ
.
reshape
(
1
))[
0
];
Context
*
clCxt
=
image
.
clCxt
;
Context
*
clCxt
=
image
.
clCxt
;
...
@@ -419,12 +416,7 @@ namespace cv
...
@@ -419,12 +416,7 @@ namespace cv
{
{
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
buf
.
image_sqsums
.
resize
(
1
);
cv
::
ocl
::
oclMat
temp
;
integral
(
image
,
buf
.
image_sums
[
0
],
buf
.
image_sqsums
[
0
]);
integral
(
image
,
buf
.
image_sums
[
0
],
temp
);
if
(
temp
.
depth
()
==
CV_64F
)
temp
.
convertTo
(
buf
.
image_sqsums
[
0
],
CV_32FC1
);
else
buf
.
image_sqsums
[
0
]
=
temp
;
templ_sum
[
0
]
=
(
float
)
sum
(
templ
)[
0
];
templ_sum
[
0
]
=
(
float
)
sum
(
templ
)[
0
];
...
@@ -460,14 +452,10 @@ namespace cv
...
@@ -460,14 +452,10 @@ namespace cv
templ_sum
*=
scale
;
templ_sum
*=
scale
;
buf
.
image_sums
.
resize
(
buf
.
images
.
size
());
buf
.
image_sums
.
resize
(
buf
.
images
.
size
());
buf
.
image_sqsums
.
resize
(
buf
.
images
.
size
());
buf
.
image_sqsums
.
resize
(
buf
.
images
.
size
());
cv
::
ocl
::
oclMat
temp
;
for
(
int
i
=
0
;
i
<
image
.
oclchannels
();
i
++
)
for
(
int
i
=
0
;
i
<
image
.
oclchannels
();
i
++
)
{
{
integral
(
buf
.
images
[
i
],
buf
.
image_sums
[
i
],
temp
);
integral
(
buf
.
images
[
i
],
buf
.
image_sums
[
i
],
buf
.
image_sqsums
[
i
]);
if
(
temp
.
depth
()
==
CV_64F
)
temp
.
convertTo
(
buf
.
image_sqsums
[
i
],
CV_32FC1
);
else
buf
.
image_sqsums
[
i
]
=
temp
;
}
}
switch
(
image
.
oclchannels
())
switch
(
image
.
oclchannels
())
...
...
modules/ocl/src/opencl/imgproc_integral.cl
浏览文件 @
e6e817e6
...
@@ -49,9 +49,6 @@
...
@@ -49,9 +49,6 @@
#
elif
defined
(
cl_khr_fp64
)
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
CONVERT
(
step
)
((
step
)
>>1
)
#
else
#
define
CONVERT
(
step
)
((
step
))
#
endif
#
endif
#
define
LSIZE
256
#
define
LSIZE
256
...
@@ -64,17 +61,17 @@
...
@@ -64,17 +61,17 @@
#
define
GET_CONFLICT_OFFSET
(
lid
)
((
lid
)
>>
LOG_NUM_BANKS
)
#
define
GET_CONFLICT_OFFSET
(
lid
)
((
lid
)
>>
LOG_NUM_BANKS
)
kernel
void
integral_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
__global
TYPE
*sqsum,
kernel
void
integral_cols_D4
(
__global
uchar4
*src,__global
int
*sum
,
__global
float
*sqsum,
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
,int
dst1_step
)
int
src_offset,int
pre_invalid,int
rows,int
cols,int
src_step,int
dst_step
)
{
{
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int4
src_t[2],
sum_t[2]
;
int4
src_t[2],
sum_t[2]
;
TYPE
4
sqsum_t[2]
;
float
4
sqsum_t[2]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
int4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
TYPE
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
int*
sum_p
;
__local
int*
sum_p
;
__local
TYPE
*
sqsum_p
;
__local
float
*
sqsum_p
;
src_step
=
src_step
>>
2
;
src_step
=
src_step
>>
2
;
gid
=
gid
<<
1
;
gid
=
gid
<<
1
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
...
@@ -83,17 +80,17 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TY
...
@@ -83,17 +80,17 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TY
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid
+
1
,
cols
-
1
)
]
)
:
0
)
;
src_t[1]
=
(
i
+
lid
<
rows
?
convert_int4
(
src[src_offset
+
(
lid+i
)
*
src_step
+
min
(
gid
+
1
,
cols
-
1
)
]
)
:
0
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[0]
=
(
i
==
0
?
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sum[0][bf_loc]
=
src_t[0]
;
lm_sqsum[0][bf_loc]
=
convert_
TYPE
4
(
src_t[0]
*
src_t[0]
)
;
lm_sqsum[0][bf_loc]
=
convert_
float
4
(
src_t[0]
*
src_t[0]
)
;
lm_sum[1][bf_loc]
=
src_t[1]
;
lm_sum[1][bf_loc]
=
src_t[1]
;
lm_sqsum[1][bf_loc]
=
convert_
TYPE
4
(
src_t[1]
*
src_t[1]
)
;
lm_sqsum[1][bf_loc]
=
convert_
float
4
(
src_t[1]
*
src_t[1]
)
;
int
offset
=
1
;
int
offset
=
1
;
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
for
(
int
d
=
LSIZE
>>
1
; d > 0; d>>=1)
...
@@ -134,8 +131,7 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TY
...
@@ -134,8 +131,7 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TY
}
}
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/4,
loc_s1
=
loc_s0
+
dst_step
;
int
loc_s0
=
gid
*
dst_step
+
i
+
lid
-
1
-
pre_invalid
*
dst_step
/
4
,
loc_s1
=
loc_s0
+
dst_step
;
int
loc_sq0
=
gid
*
CONVERT
(
dst1_step
)
+
i
+
lid
-
1
-
pre_invalid
*
dst1_step
/
sizeof
(
TYPE
)
,
loc_sq1
=
loc_sq0
+
CONVERT
(
dst1_step
)
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
{
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[0][bf_loc]
+=
sum_t[0]
;
...
@@ -143,20 +139,20 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TY
...
@@ -143,20 +139,20 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TY
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
sum_p
=
(
__local
int*
)(
&
(
lm_sum[0][bf_loc]
))
;
sum_p
=
(
__local
int*
)(
&
(
lm_sum[0][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
for
(
int
k
=
0
; k < 4; k++)
{
{
if
(
gid
*
4
+
k
>=
cols
+
pre_invalid
|
| gid * 4 + k < pre_invalid) continue;
if
(
gid
*
4
+
k
>=
cols
+
pre_invalid
|
| gid * 4 + k < pre_invalid) continue;
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
sum[loc_s0 + k * dst_step / 4] = sum_p[k];
sqsum[loc_s
q0 + k * dst1_step / sizeof(TYPE)
] = sqsum_p[k];
sqsum[loc_s
0 + k * dst_step / 4
] = sqsum_p[k];
}
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[1][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[1][bf_loc]));
for(int k = 0; k < 4; k++)
for(int k = 0; k < 4; k++)
{
{
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
if(gid * 4 + k + 4 >= cols + pre_invalid) break;
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
sum[loc_s1 + k * dst_step / 4] = sum_p[k];
sqsum[loc_s
q1 + k * dst1_step / sizeof(TYPE)
] = sqsum_p[k];
sqsum[loc_s
1 + k * dst_step / 4
] = sqsum_p[k];
}
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
...
@@ -164,32 +160,30 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TY
...
@@ -164,32 +160,30 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TY
}
}
kernel void integral_rows_D4(__global int4 *srcsum,__global
TYPE
4 * srcsqsum,__global int *sum ,
kernel void integral_rows_D4(__global int4 *srcsum,__global
float
4 * srcsqsum,__global int *sum ,
__global
TYPE *sqsum,int rows,int cols,int src_step,int src1
_step,int sum_step,
__global
float *sqsum,int rows,int cols,int src
_step,int sum_step,
int sqsum_step,int sum_offset,int sqsum_offset)
int sqsum_step,int sum_offset,int sqsum_offset)
{
{
int lid = get_local_id(0);
int lid = get_local_id(0);
int gid = get_group_id(0);
int gid = get_group_id(0);
int4 src_t[2], sum_t[2];
int4 src_t[2], sum_t[2];
TYPE
4 sqsrc_t[2],sqsum_t[2];
float
4 sqsrc_t[2],sqsum_t[2];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local int4 lm_sum[2][LSIZE + LOG_LSIZE];
__local
TYPE
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local
float
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local int *sum_p;
__local int *sum_p;
__local
TYPE
*sqsum_p;
__local
float
*sqsum_p;
src_step = src_step >> 4;
src_step = src_step >> 4;
src1_step = (src1_step / sizeof(TYPE)) >> 2 ;
gid <<= 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
for(int i = 0; i < rows; i =i + LSIZE_1)
{
{
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid ] : (int4)0;
src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid
* 2
] : (int4)0;
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src
1_step + gid ] : (TYPE
4)0;
sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src
_step + gid * 2] : (float
4)0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid + 1] : (int4)0;
src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid
* 2
+ 1] : (int4)0;
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src
1_step + gid + 1] : (TYPE
4)0;
sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src
_step + gid * 2 + 1] : (float
4)0;
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
float
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
float
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
...
@@ -245,18 +239,17 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global TYPE4 * srcsqsum,__g
...
@@ -245,18 +239,17 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global TYPE4 * srcsqsum,__g
}
}
if(i + lid == 0)
if(i + lid == 0)
{
{
int loc0 = gid * sum_step;
int loc0 = gid
* 2
* sum_step;
int loc1 = gid
* CONVERT(sqsum_step)
;
int loc1 = gid
* 2 * sqsum_step
;
for(int k = 1; k <= 8; k++)
for(int k = 1; k <= 8; k++)
{
{
if(gid *
4
+ k > cols) break;
if(gid *
8
+ k > cols) break;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
sum[sum_offset + loc0 + k * sum_step / 4] = 0;
sqsum[sqsum_offset + loc1 + k * sqsum_step /
sizeof(TYPE)
] = 0;
sqsum[sqsum_offset + loc1 + k * sqsum_step /
4
] = 0;
}
}
}
}
int loc_s0 = sum_offset + gid * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
int loc_sq0 = sqsum_offset + gid * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
if(lid > 0 && (i+lid) <= rows)
if(lid > 0 && (i+lid) <= rows)
{
{
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[0][bf_loc] += sum_t[0];
...
@@ -264,37 +257,37 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global TYPE4 * srcsqsum,__g
...
@@ -264,37 +257,37 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global TYPE4 * srcsqsum,__g
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[1][bf_loc] += sqsum_t[1];
lm_sqsum[1][bf_loc] += sqsum_t[1];
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[0][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[0][bf_loc]));
for(int k = 0; k < 4; k++)
for(int k = 0; k < 4; k++)
{
{
if(gid *
4
+ k >= cols) break;
if(gid *
8
+ k >= cols) break;
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
sum[loc_s0 + k * sum_step / 4] = sum_p[k];
sqsum[loc_sq0 + k * sqsum_step /
sizeof(TYPE)
] = sqsum_p[k];
sqsum[loc_sq0 + k * sqsum_step /
4
] = sqsum_p[k];
}
}
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[1][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[1][bf_loc]));
for(int k = 0; k < 4; k++)
for(int k = 0; k < 4; k++)
{
{
if(gid *
4
+ 4 + k >= cols) break;
if(gid *
8
+ 4 + k >= cols) break;
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
sum[loc_s1 + k * sum_step / 4] = sum_p[k];
sqsum[loc_sq1 + k * sqsum_step /
sizeof(TYPE)
] = sqsum_p[k];
sqsum[loc_sq1 + k * sqsum_step /
4
] = sqsum_p[k];
}
}
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
}
}
}
}
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
TYPE
*sqsum,
kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
float
*sqsum,
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step
, int dst1_step
)
int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
{
{
int lid = get_local_id(0);
int lid = get_local_id(0);
int gid = get_group_id(0);
int gid = get_group_id(0);
float4 src_t[2], sum_t[2];
float4 src_t[2], sum_t[2];
TYPE
4 sqsum_t[2];
float
4 sqsum_t[2];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local float4 lm_sum[2][LSIZE + LOG_LSIZE];
__local
TYPE
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local
float
4 lm_sqsum[2][LSIZE + LOG_LSIZE];
__local float* sum_p;
__local float* sum_p;
__local
TYPE
* sqsum_p;
__local
float
* sqsum_p;
src_step = src_step >> 2;
src_step = src_step >> 2;
gid = gid << 1;
gid = gid << 1;
for(int i = 0; i < rows; i =i + LSIZE_1)
for(int i = 0; i < rows; i =i + LSIZE_1)
...
@@ -303,17 +296,17 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
...
@@ -303,17 +296,17 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sqsum_t[0] = (i == 0 ? (
float
4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sum_t[1] = (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
TYPE
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
sqsum_t[1] = (i == 0 ? (
float
4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
lm_sum[0][bf_loc] = src_t[0];
lm_sum[0][bf_loc] = src_t[0];
lm_sqsum[0][bf_loc] = convert_
TYPE
4(src_t[0] * src_t[0]);
lm_sqsum[0][bf_loc] = convert_
float
4(src_t[0] * src_t[0]);
lm_sum[1][bf_loc] = src_t[1];
lm_sum[1][bf_loc] = src_t[1];
lm_sqsum[1][bf_loc] = convert_
TYPE
4(src_t[1] * src_t[1]);
lm_sqsum[1][bf_loc] = convert_
float
4(src_t[1] * src_t[1]);
int offset = 1;
int offset = 1;
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
for(int d = LSIZE >> 1 ; d > 0; d>>=1)
...
@@ -355,7 +348,6 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
...
@@ -355,7 +348,6 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE), loc_sq1 = loc_sq0 + CONVERT(dst1_step);
if(lid > 0 && (i+lid) <= rows)
if(lid > 0 && (i+lid) <= rows)
{
{
lm_sum[0][bf_loc] += sum_t[0];
lm_sum[0][bf_loc] += sum_t[0];
...
@@ -363,20 +355,20 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
...
@@ -363,20 +355,20 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[0][bf_loc] += sqsum_t[0];
lm_sqsum[1][bf_loc] += sqsum_t[1];
lm_sqsum[1][bf_loc] += sqsum_t[1];
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
sqsum_p = (__local
TYPE
*)(&(lm_sqsum[0][bf_loc]));
sqsum_p = (__local
float
*)(&(lm_sqsum[0][bf_loc]));
for(int k = 0; k < 4; k++)
for(int k = 0; k < 4; k++)
{
{
if(gid * 4 + k >= cols + pre_invalid |
|
gid
*
4
+
k
<
pre_invalid
)
continue
;
if(gid * 4 + k >= cols + pre_invalid |
|
gid
*
4
+
k
<
pre_invalid
)
continue
;
sum[loc_s0
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sum[loc_s0
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sqsum[loc_s
q0
+
k
*
dst1_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
sqsum[loc_s
0
+
k
*
dst_step
/
4
]
=
sqsum_p[k]
;
}
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
for
(
int
k
=
0
; k < 4; k++)
{
{
if
(
gid
*
4
+
k
+
4
>=
cols
+
pre_invalid
)
break
;
if
(
gid
*
4
+
k
+
4
>=
cols
+
pre_invalid
)
break
;
sum[loc_s1
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sum[loc_s1
+
k
*
dst_step
/
4]
=
sum_p[k]
;
sqsum[loc_s
q1
+
k
*
dst1_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
sqsum[loc_s
1
+
k
*
dst_step
/
4
]
=
sqsum_p[k]
;
}
}
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
@@ -384,31 +376,30 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
...
@@ -384,31 +376,30 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
}
}
kernel
void
integral_rows_D5
(
__global
float4
*srcsum,__global
TYPE
4
*
srcsqsum,__global
float
*sum
,
kernel
void
integral_rows_D5
(
__global
float4
*srcsum,__global
float
4
*
srcsqsum,__global
float
*sum
,
__global
TYPE
*sqsum,int
rows,int
cols,int
src_step,int
src1_step,
int
sum_step,
__global
float
*sqsum,int
rows,int
cols,int
src_step,
int
sum_step,
int
sqsum_step,int
sum_offset,int
sqsum_offset
)
int
sqsum_step,int
sum_offset,int
sqsum_offset
)
{
{
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
float4
src_t[2],
sum_t[2]
;
float4
src_t[2],
sum_t[2]
;
TYPE
4
sqsrc_t[2],sqsum_t[2]
;
float
4
sqsrc_t[2],sqsum_t[2]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
float4
lm_sum[2][LSIZE
+
LOG_LSIZE]
;
__local
TYPE
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
4
lm_sqsum[2][LSIZE
+
LOG_LSIZE]
;
__local
float
*sum_p
;
__local
float
*sum_p
;
__local
TYPE
*sqsum_p
;
__local
float
*sqsum_p
;
src_step
=
src_step
>>
4
;
src_step
=
src_step
>>
4
;
src1_step
=
(
src1_step
/
sizeof
(
TYPE
))
>>
2
;
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
for
(
int
i
=
0
; i < rows; i =i + LSIZE_1)
{
{
src_t[0]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2]
:
(
float4
)
0
;
src_t[0]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2]
:
(
float4
)
0
;
sqsrc_t[0]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
1_step
+
gid
*
2]
:
(
TYPE
4
)
0
;
sqsrc_t[0]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
_step
+
gid
*
2]
:
(
float
4
)
0
;
src_t[1]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2
+
1]
:
(
float4
)
0
;
src_t[1]
=
i
+
lid
<
rows
?
srcsum[
(
lid+i
)
*
src_step
+
gid
*
2
+
1]
:
(
float4
)
0
;
sqsrc_t[1]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
1_step
+
gid
*
2
+
1]
:
(
TYPE
4
)
0
;
sqsrc_t[1]
=
i
+
lid
<
rows
?
srcsqsum[
(
lid+i
)
*
src
_step
+
gid
*
2
+
1]
:
(
float
4
)
0
;
sum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[0]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[0]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[0][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sum_t[1]
=
(
i
==
0
?
(
float4
)
0
:
lm_sum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
TYPE
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
sqsum_t[1]
=
(
i
==
0
?
(
float
4
)
0
:
lm_sqsum[1][LSIZE_2
+
LOG_LSIZE]
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
int
bf_loc
=
lid
+
GET_CONFLICT_OFFSET
(
lid
)
;
...
@@ -465,16 +456,16 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global TYPE4 * srcsqsum,_
...
@@ -465,16 +456,16 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global TYPE4 * srcsqsum,_
if
(
i
+
lid
==
0
)
if
(
i
+
lid
==
0
)
{
{
int
loc0
=
gid
*
2
*
sum_step
;
int
loc0
=
gid
*
2
*
sum_step
;
int
loc1
=
gid
*
2
*
CONVERT
(
sqsum_step
)
;
int
loc1
=
gid
*
2
*
sqsum_step
;
for
(
int
k
=
1
; k <= 8; k++)
for
(
int
k
=
1
; k <= 8; k++)
{
{
if
(
gid
*
8
+
k
>
cols
)
break
;
if
(
gid
*
8
+
k
>
cols
)
break
;
sum[sum_offset
+
loc0
+
k
*
sum_step
/
4]
=
0
;
sum[sum_offset
+
loc0
+
k
*
sum_step
/
4]
=
0
;
sqsum[sqsum_offset
+
loc1
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
0
;
sqsum[sqsum_offset
+
loc1
+
k
*
sqsum_step
/
4
]
=
0
;
}
}
}
}
int
loc_s0
=
sum_offset
+
gid
*
2
*
sum_step
+
sum_step
/
4
+
i
+
lid,
loc_s1
=
loc_s0
+
sum_step
;
int
loc_s0
=
sum_offset
+
gid
*
2
*
sum_step
+
sum_step
/
4
+
i
+
lid,
loc_s1
=
loc_s0
+
sum_step
;
int
loc_sq0
=
sqsum_offset
+
gid
*
2
*
CONVERT
(
sqsum_step
)
+
sqsum_step
/
sizeof
(
TYPE
)
+
i
+
lid,
loc_sq1
=
loc_sq0
+
CONVERT
(
sqsum_step
)
;
int
loc_sq0
=
sqsum_offset
+
gid
*
2
*
sqsum_step
+
sqsum_step
/
4
+
i
+
lid,
loc_sq1
=
loc_sq0
+
sqsum_step
;
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
if
(
lid
>
0
&&
(
i+lid
)
<=
rows
)
{
{
lm_sum[0][bf_loc]
+=
sum_t[0]
;
lm_sum[0][bf_loc]
+=
sum_t[0]
;
...
@@ -482,20 +473,20 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global TYPE4 * srcsqsum,_
...
@@ -482,20 +473,20 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global TYPE4 * srcsqsum,_
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[0][bf_loc]
+=
sqsum_t[0]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
lm_sqsum[1][bf_loc]
+=
sqsum_t[1]
;
sum_p
=
(
__local
float*
)(
&
(
lm_sum[0][bf_loc]
))
;
sum_p
=
(
__local
float*
)(
&
(
lm_sum[0][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[0][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
for
(
int
k
=
0
; k < 4; k++)
{
{
if
(
gid
*
8
+
k
>=
cols
)
break
;
if
(
gid
*
8
+
k
>=
cols
)
break
;
sum[loc_s0
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sum[loc_s0
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq0
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
sqsum[loc_sq0
+
k
*
sqsum_step
/
4
]
=
sqsum_p[k]
;
}
}
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sum_p
=
(
__local
float*
)(
&
(
lm_sum[1][bf_loc]
))
;
sqsum_p
=
(
__local
TYPE
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
sqsum_p
=
(
__local
float
*
)(
&
(
lm_sqsum[1][bf_loc]
))
;
for
(
int
k
=
0
; k < 4; k++)
for
(
int
k
=
0
; k < 4; k++)
{
{
if
(
gid
*
8
+
4
+
k
>=
cols
)
break
;
if
(
gid
*
8
+
4
+
k
>=
cols
)
break
;
sum[loc_s1
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sum[loc_s1
+
k
*
sum_step
/
4]
=
sum_p[k]
;
sqsum[loc_sq1
+
k
*
sqsum_step
/
sizeof
(
TYPE
)
]
=
sqsum_p[k]
;
sqsum[loc_sq1
+
k
*
sqsum_step
/
4
]
=
sqsum_p[k]
;
}
}
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
modules/ocl/test/test_imgproc.cpp
浏览文件 @
e6e817e6
...
@@ -295,33 +295,23 @@ OCL_TEST_P(CornerHarris, Mat)
...
@@ -295,33 +295,23 @@ OCL_TEST_P(CornerHarris, Mat)
//////////////////////////////////integral/////////////////////////////////////////////////
//////////////////////////////////integral/////////////////////////////////////////////////
struct
Integral
:
typedef
ImgprocTestBase
Integral
;
public
ImgprocTestBase
{
int
sdepth
;
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
blockSize
=
GET_PARAM
(
1
);
sdepth
=
GET_PARAM
(
2
);
useRoi
=
GET_PARAM
(
3
);
}
};
OCL_TEST_P
(
Integral
,
Mat1
)
OCL_TEST_P
(
Integral
,
Mat1
)
{
{
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
{
random_roi
();
random_roi
();
ocl
::
integral
(
gsrc_roi
,
gdst_roi
,
sdepth
);
ocl
::
integral
(
gsrc_roi
,
gdst_roi
);
integral
(
src_roi
,
dst_roi
,
sdepth
);
integral
(
src_roi
,
dst_roi
);
Near
();
Near
();
}
}
}
}
OCL_TEST_P
(
Integral
,
Mat2
)
// TODO wrong output type
OCL_TEST_P
(
Integral
,
DISABLED_Mat2
)
{
{
Mat
dst1
;
Mat
dst1
;
ocl
::
oclMat
gdst1
;
ocl
::
oclMat
gdst1
;
...
@@ -330,12 +320,10 @@ OCL_TEST_P(Integral, Mat2)
...
@@ -330,12 +320,10 @@ OCL_TEST_P(Integral, Mat2)
{
{
random_roi
();
random_roi
();
integral
(
src_roi
,
dst
_roi
,
dst1
,
sdepth
);
integral
(
src_roi
,
dst
1
,
dst_roi
);
ocl
::
integral
(
gsrc_roi
,
gdst
_roi
,
gdst1
,
sdepth
);
ocl
::
integral
(
gsrc_roi
,
gdst
1
,
gdst_roi
);
Near
();
Near
();
if
(
gdst1
.
clCxt
->
supportsFeature
(
ocl
::
FEATURE_CL_DOUBLE
))
EXPECT_MAT_NEAR
(
dst1
,
Mat
(
gdst1
),
0.
);
}
}
}
}
...
@@ -575,7 +563,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
...
@@ -575,7 +563,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Integral
,
Combine
(
INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Integral
,
Combine
(
Values
((
MatType
)
CV_8UC1
),
// TODO does not work with CV_32F, CV_64F
Values
((
MatType
)
CV_8UC1
),
// TODO does not work with CV_32F, CV_64F
Values
(
0
),
// not used
Values
(
0
),
// not used
Values
(
(
MatType
)
CV_32SC1
,
(
MatType
)
CV_32FC1
),
Values
(
0
),
// not used
Bool
()));
Bool
()));
INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Threshold
,
Combine
(
INSTANTIATE_TEST_CASE_P
(
Imgproc
,
Threshold
,
Combine
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录