Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
69a0b5dd
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
69a0b5dd
编写于
4月 15, 2013
作者:
Y
yao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add OclCascadeClassifierBuf interface
上级
abe2ea59
变更
5
展开全部
显示空白变更内容
内联
并排
Showing
5 changed file
with
956 addition
and
499 deletion
+956
-499
modules/ocl/include/opencv2/ocl/ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+38
-0
modules/ocl/src/haar.cpp
modules/ocl/src/haar.cpp
+587
-128
modules/ocl/src/opencl/haarobjectdetect.cl
modules/ocl/src/opencl/haarobjectdetect.cl
+142
-176
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+141
-145
modules/ocl/test/test_haar.cpp
modules/ocl/test/test_haar.cpp
+48
-50
未找到文件。
modules/ocl/include/opencv2/ocl/ocl.hpp
浏览文件 @
69a0b5dd
...
...
@@ -802,6 +802,44 @@ namespace cv
int
minNeighbors
,
int
flags
,
CvSize
minSize
=
cvSize
(
0
,
0
),
CvSize
maxSize
=
cvSize
(
0
,
0
));
};
class
CV_EXPORTS
OclCascadeClassifierBuf
:
public
cv
::
CascadeClassifier
{
public:
OclCascadeClassifierBuf
()
:
m_flags
(
0
),
initialized
(
false
),
m_scaleFactor
(
0
),
buffers
(
NULL
)
{}
~
OclCascadeClassifierBuf
()
{}
void
detectMultiScale
(
oclMat
&
image
,
CV_OUT
std
::
vector
<
cv
::
Rect
>&
faces
,
double
scaleFactor
=
1.1
,
int
minNeighbors
=
3
,
int
flags
=
0
,
Size
minSize
=
Size
(),
Size
maxSize
=
Size
());
void
release
();
private:
void
Init
(
const
int
rows
,
const
int
cols
,
double
scaleFactor
,
int
flags
,
const
int
outputsz
,
const
size_t
localThreads
[],
CvSize
minSize
,
CvSize
maxSize
);
void
CreateBaseBufs
(
const
int
datasize
,
const
int
totalclassifier
,
const
int
flags
,
const
int
outputsz
);
void
CreateFactorRelatedBufs
(
const
int
rows
,
const
int
cols
,
const
int
flags
,
const
double
scaleFactor
,
const
size_t
localThreads
[],
CvSize
minSize
,
CvSize
maxSize
);
void
GenResult
(
CV_OUT
std
::
vector
<
cv
::
Rect
>&
faces
,
const
std
::
vector
<
cv
::
Rect
>
&
rectList
,
const
std
::
vector
<
int
>
&
rweights
);
int
m_rows
;
int
m_cols
;
int
m_flags
;
int
m_loopcount
;
int
m_nodenum
;
bool
findBiggestObject
;
bool
initialized
;
double
m_scaleFactor
;
Size
m_minSize
;
Size
m_maxSize
;
vector
<
CvSize
>
sizev
;
vector
<
float
>
scalev
;
oclMat
gimg1
,
gsum
,
gsqsum
;
void
*
buffers
;
};
/////////////////////////////// Pyramid /////////////////////////////////////
...
...
modules/ocl/src/haar.cpp
浏览文件 @
69a0b5dd
此差异已折叠。
点击以展开。
modules/ocl/src/opencl/haarobjectdetect.cl
浏览文件 @
69a0b5dd
...
...
@@ -112,7 +112,7 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade
}
GpuHidHaarClassifierCascade
;
__kernel
void
__attribute__
((
reqd_work_group_size
(
8
,
8
,
1
)))
gpuRunHaarClassifierCascade
(
//constant
GpuHidHaarClassifierCascade
*
cascade,
__kernel
void
__attribute__
((
reqd_work_group_size
(
8
,
8
,
1
)))
gpuRunHaarClassifierCascade
(
global
GpuHidHaarStageClassifier
*
stagecascadeptr,
global
int4
*
info,
global
GpuHidHaarTreeNode
*
nodeptr,
...
...
@@ -128,12 +128,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
const
int
splitnode,
const
int4
p,
const
int4
pq,
const
float
correction
//const
int
width,
//const
int
height,
//const
int
grpnumperline,
//const
int
totalgrp
)
const
float
correction
)
{
int
grpszx
=
get_local_size
(
0
)
;
int
grpszy
=
get_local_size
(
1
)
;
...
...
@@ -145,13 +140,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int
lcl_sz
=
mul24
(
grpszx,grpszy
)
;
int
lcl_id
=
mad24
(
lclidy,grpszx,lclidx
)
;
//assume
lcl_sz
==
256
or
128
or
64
//int
lcl_sz_shift
=
(
lcl_sz
==
256
)
?
8
:
7
;
//lcl_sz_shift
=
(
lcl_sz
==
64
)
?
6
:
lcl_sz_shift
;
__local
int
lclshare[1024]
;
#
define
OFF
0
__local
int*
lcldata
=
lclshare
+
OFF
;//for save win data
__local
int*
lcldata
=
lclshare
;//for save win data
__local
int*
glboutindex
=
lcldata
+
28*28
;//for save global out index
__local
int*
lclcount
=
glboutindex
+
1
;//for save the numuber of temp pass pixel
__local
int*
lcloutindex
=
lclcount
+
1
;//for save info of temp pass pixel
...
...
@@ -181,7 +171,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int
totalgrp
=
scaleinfo1.y
&
0xffff
;
int
imgoff
=
scaleinfo1.z
;
float
factor
=
as_float
(
scaleinfo1.w
)
;
//int
ystep
=1
;// factor > 2.0 ? 1 : 2;
__global
const
int
*
sum
=
sum1
+
imgoff
;
__global
const
float
*
sqsum
=
sqsum1
+
imgoff
;
...
...
@@ -191,8 +180,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int
grpidx
=
grploop
-
mul24
(
grpidy,
grpnumperline
)
;
int
x
=
mad24
(
grpidx,grpszx,lclidx
)
;
int
y
=
mad24
(
grpidy,grpszy,lclidy
)
;
//candidate_result.x
=
convert_int_rtn
(
x*factor
)
;
//candidate_result.y
=
convert_int_rtn
(
y*factor
)
;
int
grpoffx
=
x-lclidx
;
int
grpoffy
=
y-lclidy
;
...
...
@@ -211,14 +198,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int4
data
=
*
(
__global
int4*
)
&sum[glb_off]
;
int
lcl_off
=
mad24
(
lcl_y,
readwidth,
lcl_x<<2
)
;
#
if
OFF
lcldata[lcl_off]
=
data.x
;
lcldata[lcl_off+1]
=
data.y
;
lcldata[lcl_off+2]
=
data.z
;
lcldata[lcl_off+3]
=
data.w
;
#
else
vstore4
(
data,
0
,
&lcldata[lcl_off]
)
;
#
endif
}
lcloutindex[lcl_id]
=
0
;
...
...
@@ -231,11 +211,8 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int
lcl_off
=
mad24
(
lclidy,readwidth,lclidx
)
;
int4
cascadeinfo1,
cascadeinfo2
;
cascadeinfo1
=
p
;
cascadeinfo2
=
pq
;// + mad24(y, pixelstep, x);
cascadeinfo2
=
pq
;
//if
((
x
<
width
)
&&
(
y
<
height
))
{
cascadeinfo1.x
+=lcl_off
;
cascadeinfo1.z
+=lcl_off
;
mean
=
(
lcldata[mad24
(
cascadeinfo1.y,readwidth,cascadeinfo1.x
)
]
-
lcldata[mad24
(
cascadeinfo1.y,readwidth,cascadeinfo1.z
)
]
-
...
...
@@ -251,8 +228,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
variance_norm_factor
=
variance_norm_factor
*
correction
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=0.f
?
sqrt
(
variance_norm_factor
)
:
1.f
;
//if
(
cascade->is_stump_based
)
//{
for
(
int
stageloop
=
start_stage
; (stageloop < split_stage) && result; stageloop++ )
{
float
stage_sum
=
0.f
;
...
...
@@ -277,18 +253,14 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
float
classsum
=
(
lcldata[mad24
(
info1.y,readwidth,info1.x
)
]
-
lcldata[mad24
(
info1.y,readwidth,info1.z
)
]
-
lcldata[mad24
(
info1.w,readwidth,info1.x
)
]
+
lcldata[mad24
(
info1.w,readwidth,info1.z
)
]
)
*
w.x
;
classsum
+=
(
lcldata[mad24
(
info2.y,readwidth,info2.x
)
]
-
lcldata[mad24
(
info2.y,readwidth,info2.z
)
]
-
lcldata[mad24
(
info2.w,readwidth,info2.x
)
]
+
lcldata[mad24
(
info2.w,readwidth,info2.z
)
]
)
*
w.y
;
//if
((
info3.z
-
info3.x
)
&&
(
!stageinfo.z
))
//{
info3.x
+=lcl_off
;
info3.z
+=lcl_off
;
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
//}
stage_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
nodecounter++
;
}
...
...
@@ -308,8 +280,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
nodecounter
=
splitnode
;
for
(
int
stageloop
=
split_stage
; stageloop< end_stage && queuecount>0; stageloop++)
{
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//if
(
lcl_id
==
0
)
lclcount[0]=0
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -322,14 +292,13 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
int
lcl_compute_win_id
=
(
lcl_id
>>
(
6-perfscale
))
;
int
lcl_loops
=
(
stageinfo.x
+
lcl_compute_win
-1
)
>>
(
6-perfscale
)
;
int
lcl_compute_id
=
lcl_id
-
(
lcl_compute_win_id
<<
(
6-perfscale
))
;
for
(
int
queueloop=0
; queueloop<queuecount_loop/* && lcl_compute_win_id < queuecount*/
; queueloop++)
for
(
int
queueloop=0
; queueloop<queuecount_loop
; queueloop++)
{
float
stage_sum
=
0.f
;
int
temp_coord
=
lcloutindex[lcl_compute_win_id<<1]
;
float
variance_norm_factor
=
as_float
(
lcloutindex[
(
lcl_compute_win_id<<1
)
+1]
)
;
int
queue_pixel
=
mad24
(((
temp_coord
&
(
int
)
0xffff0000
)
>>16
)
,
readwidth,temp_coord
&
0xffff
)
;
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lcl_compute_win_id
<
queuecount
)
{
...
...
@@ -357,13 +326,12 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
classsum
+=
(
lcldata[mad24
(
info2.y,readwidth,info2.x
)
]
-
lcldata[mad24
(
info2.y,readwidth,info2.z
)
]
-
lcldata[mad24
(
info2.w,readwidth,info2.x
)
]
+
lcldata[mad24
(
info2.w,readwidth,info2.z
)
]
)
*
w.y
;
//if
((
info3.z
-
info3.x
)
&&
(
!stageinfo.z
))
//{
info3.x
+=queue_pixel
;
info3.z
+=queue_pixel
;
classsum
+=
(
lcldata[mad24
(
info3.y,readwidth,info3.x
)
]
-
lcldata[mad24
(
info3.y,readwidth,info3.z
)
]
-
lcldata[mad24
(
info3.w,readwidth,info3.x
)
]
+
lcldata[mad24
(
info3.w,readwidth,info3.z
)
]
)
*
w.z
;
//}
part_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
tempnodecounter
+=lcl_compute_win
;
}//end
for
(
int
lcl_loop=0
;lcl_loop<lcl_loops;lcl_loop++)
...
...
@@ -386,12 +354,12 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}//end
for
(
int
queueloop=0
;queueloop<queuecount_loop;queueloop++)
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
queuecount
=
lclcount[0]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
nodecounter
+=
stageinfo.x
;
}//end
for
(
int
stageloop
=
splitstage
; stageloop< endstage && queuecount>0;stageloop++)
//barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lcl_id<queuecount
)
{
int
temp
=
lcloutindex[lcl_id<<1]
;
...
...
@@ -406,9 +374,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
candidate[outputoff+temp+lcl_id]
=
candidate_result
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}//end
if
((
x
<
width
)
&&
(
y
<
height
))
}//end
for
(
int
grploop=grpidx
;grploop<totalgrp;grploop+=grpnumx)
//outputoff
+=mul24
(
width,height
)
;
}//end
for
(
int
scalei
=
0
; scalei <loopcount; scalei++)
}
...
...
modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
浏览文件 @
69a0b5dd
...
...
@@ -16,6 +16,7 @@
//
//
@Authors
//
Wu
Xinglong,
wxl370@126.com
//
Sen
Liu,
swjtuls1987@126.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
...
...
@@ -113,15 +114,15 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
global
const
int
*restrict
sum,
global
const
float
*restrict
sqsum,
global
int4
*candidate,
const
int
rows,
const
int
cols,
const
int
step,
const
int
loopcount,
const
int
start_stage,
const
int
split_stage,
const
int
end_stage,
const
int
startnode,
const
int
splitnode,
global
int4
*p,
//const
int4
*
pq,
global
float
*correction,
const
int
nodecount
)
{
...
...
@@ -133,18 +134,16 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
int
lclidy
=
get_local_id
(
1
)
;
int
lcl_sz
=
mul24
(
grpszx,
grpszy
)
;
int
lcl_id
=
mad24
(
lclidy,
grpszx,
lclidx
)
;
__local
int
lclshare[1024]
;
__local
int
*glboutindex
=
lclshare
+
0
;
__local
int
*lclcount
=
glboutindex
+
1
;
__local
int
*lcloutindex
=
lclcount
+
1
;
__local
float
*partialsum
=
(
__local
float
*
)(
lcloutindex
+
(
lcl_sz
<<
1
))
;
__local
int
glboutindex[1]
;
__local
int
lclcount[1]
;
__local
int
lcloutindex[64]
;
glboutindex[0]
=
0
;
int
outputoff
=
mul24
(
grpidx,
256
)
;
candidate[outputoff
+
(
lcl_id
<<
2
)
]
=
(
int4
)
0
;
candidate[outputoff
+
(
lcl_id
<<
2
)
+
1]
=
(
int4
)
0
;
candidate[outputoff
+
(
lcl_id
<<
2
)
+
2]
=
(
int4
)
0
;
candidate[outputoff
+
(
lcl_id
<<
2
)
+
3]
=
(
int4
)
0
;
int
max_idx
=
rows
*
cols
-
1
;
for
(
int
scalei
=
0
; scalei < loopcount; scalei++)
{
int4
scaleinfo1
;
...
...
@@ -168,30 +167,28 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
int
y
=
iy
*
ystep
;
lcloutindex[lcl_id]
=
0
;
lclcount[0]
=
0
;
int
result
=
1
,
nodecounter
;
int
nodecounter
;
float
mean,
variance_norm_factor
;
//if
((
ix
<
width
)
&&
(
iy
<
height
))
{
const
int
p_offset
=
mad24
(
y,
step,
x
)
;
cascadeinfo.x
+=
p_offset
;
cascadeinfo.z
+=
p_offset
;
mean
=
(
sum[mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
]
-
sum[mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
]
-
sum[mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
]
+
sum[mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
]
)
mean
=
(
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
)
*
correction_t
;
variance_norm_factor
=
sqsum[mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
]
-
sqsum[mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
]
-
sqsum[mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
]
+
sqsum[mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
]
;
variance_norm_factor
=
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.y,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
-
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.x
)
,
0
,
max_idx
)
]
+
sqsum[clamp
(
mad24
(
cascadeinfo.w,
step,
cascadeinfo.z
)
,
0
,
max_idx
)
]
;
variance_norm_factor
=
variance_norm_factor
*
correction_t
-
mean
*
mean
;
variance_norm_factor
=
variance_norm_factor
>=
0.f
?
sqrt
(
variance_norm_factor
)
:
1.f
;
result
=
1
;
bool
result
=
true
;
nodecounter
=
startnode
+
nodecount
*
scalei
;
for
(
int
stageloop
=
start_stage
; stageloop < end_stage
&& result; stageloop++)
for
(
int
stageloop
=
start_stage
; (stageloop < end_stage)
&& result; stageloop++)
{
float
stage_sum
=
0.f
;
int4
stageinfo
=
*
(
global
int4
*
)(
stagecascadeptr
+
stageloop
)
;
float
stagethreshold
=
as_float
(
stageinfo.y
)
;
for
(
int
nodeloop
=
0
; nodeloop < stageinfo.x; nodeloop++)
int
stagecount
=
stagecascadeptr[stageloop].count
;
for
(
int
nodeloop
=
0
; nodeloop < stagecount; nodeloop++)
{
__global
GpuHidHaarTreeNode
*currentnodeptr
=
(
nodeptr
+
nodecounter
)
;
int4
info1
=
*
(
__global
int4
*
)(
&
(
currentnodeptr->p[0][0]
))
;
...
...
@@ -204,43 +201,41 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
info1.z
+=
p_offset
;
info2.x
+=
p_offset
;
info2.z
+=
p_offset
;
float
classsum
=
(
sum[mad24
(
info1.y,
step,
info1.x
)
]
-
sum[mad24
(
info1.y,
step,
info1.z
)
]
-
sum[mad24
(
info1.w,
step,
info1.x
)
]
+
sum[mad24
(
info1.w,
step,
info1.z
)
]
)
*
w.x
;
classsum
+=
(
sum[mad24
(
info2.y,
step,
info2.x
)
]
-
sum[mad24
(
info2.y,
step,
info2.z
)
]
-
sum[mad24
(
info2.w,
step,
info2.x
)
]
+
sum[mad24
(
info2.w,
step,
info2.z
)
]
)
*
w.y
;
float
classsum
=
(
sum[clamp
(
mad24
(
info1.y,
step,
info1.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.y,
step,
info1.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info1.w,
step,
info1.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info1.w,
step,
info1.z
)
,
0
,
max_idx
)
]
)
*
w.x
;
classsum
+=
(
sum[clamp
(
mad24
(
info2.y,
step,
info2.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.y,
step,
info2.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info2.w,
step,
info2.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info2.w,
step,
info2.z
)
,
0
,
max_idx
)
]
)
*
w.y
;
info3.x
+=
p_offset
;
info3.z
+=
p_offset
;
classsum
+=
(
sum[mad24
(
info3.y,
step,
info3.x
)
]
-
sum[mad24
(
info3.y,
step,
info3.z
)
]
-
sum[mad24
(
info3.w,
step,
info3.x
)
]
+
sum[mad24
(
info3.w,
step,
info3.z
)
]
)
*
w.z
;
classsum
+=
(
sum[clamp
(
mad24
(
info3.y,
step,
info3.x
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.y,
step,
info3.z
)
,
0
,
max_idx
)
]
-
sum[clamp
(
mad24
(
info3.w,
step,
info3.x
)
,
0
,
max_idx
)
]
+
sum[clamp
(
mad24
(
info3.w,
step,
info3.z
)
,
0
,
max_idx
)
]
)
*
w.z
;
stage_sum
+=
classsum
>=
nodethreshold
?
alpha2.y
:
alpha2.x
;
nodecounter++
;
}
result
=
(
stage_sum
>=
stagethreshold
)
;
result
=
(
bool
)(
stage_sum
>=
stagecascadeptr[stageloop].threshold
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
result
&&
(
ix
<
width
)
&&
(
iy
<
height
))
{
int
queueindex
=
atomic_inc
(
lclcount
)
;
lcloutindex[queueindex
<<
1]
=
(
y
<<
16
)
|
x
;
lcloutindex[
(
queueindex
<<
1
)
+
1]
=
as_int
(
variance_norm_factor
)
;
lcloutindex[queueindex]
=
(
y
<<
16
)
|
x
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
queuecount
=
lclcount[0]
;
nodecounter
=
splitnode
+
nodecount
*
scalei
;
if
(
lcl_id
<
queuecount
)
{
int
temp
=
lcloutindex[lcl_id
<<
1
]
;
int
temp
=
lcloutindex[lcl_id
]
;
int
x
=
temp
&
0xffff
;
int
y
=
(
temp
&
(
int
)
0xffff0000
)
>>
16
;
temp
=
glboutindex[0]
;
temp
=
atomic_inc
(
glboutindex
)
;
int4
candidate_result
;
candidate_result.zw
=
(
int2
)
convert_int_rtn
(
factor
*
20.f
)
;
candidate_result.x
=
x
;
candidate_result.y
=
y
;
atomic_inc
(
glboutindex
)
;
candidate[outputoff
+
temp
+
lcl_id]
=
candidate_result
;
}
...
...
@@ -283,3 +278,4 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
newnode[counter].alpha[0]
=
t1.alpha[0]
;
newnode[counter].alpha[1]
=
t1.alpha[1]
;
}
modules/ocl/test/test_haar.cpp
浏览文件 @
69a0b5dd
...
...
@@ -16,6 +16,7 @@
//
// @Authors
// Jia Haipeng, jiahaipeng95@gmail.com
// Sen Liu, swjutls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
...
...
@@ -61,40 +62,31 @@ struct getRect
}
};
PARAM_TEST_CASE
(
Haar
TestBase
,
int
,
int
)
PARAM_TEST_CASE
(
Haar
,
double
,
int
)
{
//std::vector<cv::ocl::Info> oclinfo;
cv
::
ocl
::
OclCascadeClassifier
cascade
,
nestedCascade
;
cv
::
ocl
::
OclCascadeClassifierBuf
cascadebuf
;
cv
::
CascadeClassifier
cpucascade
,
cpunestedCascade
;
// Mat img;
double
scale
;
int
index
;
int
flags
;
virtual
void
SetUp
()
{
scale
=
1.0
;
index
=
0
;
scale
=
GET_PARAM
(
0
)
;
flags
=
GET_PARAM
(
1
)
;
string
cascadeName
=
workdir
+
"../../data/haarcascades/haarcascade_frontalface_alt.xml"
;
if
(
(
!
cascade
.
load
(
cascadeName
))
||
(
!
cpucascade
.
load
(
cascadeName
)))
if
(
(
!
cascade
.
load
(
cascadeName
))
||
(
!
cpucascade
.
load
(
cascadeName
))
||
(
!
cascadebuf
.
load
(
cascadeName
))
)
{
cout
<<
"ERROR: Could not load classifier cascade"
<<
endl
;
return
;
}
//int devnums = getDevice(oclinfo);
//CV_Assert(devnums>0);
////if you want to use undefault device, set it here
////setDevice(oclinfo[0]);
//cv::ocl::setBinpath("E:\\");
}
};
////////////////////////////////faceDetect/////////////////////////////////////////////////
struct
Haar
:
HaarTestBase
{};
TEST_F
(
Haar
,
FaceDetect
)
TEST_P
(
Haar
,
FaceDetect
)
{
string
imgName
=
workdir
+
"lena.jpg"
;
Mat
img
=
imread
(
imgName
,
1
);
...
...
@@ -105,59 +97,65 @@ TEST_F(Haar, FaceDetect)
return
;
}
//int i = 0;
//double t = 0;
vector
<
Rect
>
faces
,
oclfaces
;
// const static Scalar colors[] = { CV_RGB(0, 0, 255),
// CV_RGB(0, 128, 255),
// CV_RGB(0, 255, 255),
// CV_RGB(0, 255, 0),
// CV_RGB(255, 128, 0),
// CV_RGB(255, 255, 0),
// CV_RGB(255, 0, 0),
// CV_RGB(255, 0, 255)
// } ;
Mat
gray
,
smallImg
(
cvRound
(
img
.
rows
/
scale
),
cvRound
(
img
.
cols
/
scale
),
CV_8UC1
);
MemStorage
storage
(
cvCreateMemStorage
(
0
));
cvtColor
(
img
,
gray
,
CV_BGR2GRAY
);
resize
(
gray
,
smallImg
,
smallImg
.
size
(),
0
,
0
,
INTER_LINEAR
);
equalizeHist
(
smallImg
,
smallImg
);
cv
::
ocl
::
oclMat
image
;
CvSeq
*
_objects
;
image
.
upload
(
smallImg
);
_objects
=
cascade
.
oclHaarDetectObjects
(
image
,
storage
,
1.1
,
3
,
0
|
CV_HAAR_SCALE_IMAGE
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
3
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
vector
<
CvAvgComp
>
vecAvgComp
;
Seq
<
CvAvgComp
>
(
_objects
).
copyTo
(
vecAvgComp
);
oclfaces
.
resize
(
vecAvgComp
.
size
());
std
::
transform
(
vecAvgComp
.
begin
(),
vecAvgComp
.
end
(),
oclfaces
.
begin
(),
getRect
());
cpucascade
.
detectMultiScale
(
smallImg
,
faces
,
1.1
,
3
,
0
|
CV_HAAR_SCALE_IMAGE
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
cpucascade
.
detectMultiScale
(
smallImg
,
faces
,
1.1
,
3
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
EXPECT_EQ
(
faces
.
size
(),
oclfaces
.
size
());
/* for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
}
TEST_P
(
Haar
,
FaceDetectUseBuf
)
{
string
imgName
=
workdir
+
"lena.jpg"
;
Mat
img
=
imread
(
imgName
,
1
);
if
(
img
.
empty
())
{
Mat smallImgROI;
Point center;
Scalar color = colors[i%8];
int radius;
center.x = cvRound((r->x + r->width*0.5)*scale);
center.y = cvRound((r->y + r->height*0.5)*scale);
radius = cvRound((r->width + r->height)*0.25*scale);
circle( img, center, radius, color, 3, 8, 0 );
} */
//namedWindow("result");
//imshow("result",img);
//waitKey(0);
//destroyAllWindows();
std
::
cout
<<
"Couldn't read "
<<
imgName
<<
std
::
endl
;
return
;
}
vector
<
Rect
>
faces
,
oclfaces
;
Mat
gray
,
smallImg
(
cvRound
(
img
.
rows
/
scale
),
cvRound
(
img
.
cols
/
scale
),
CV_8UC1
);
MemStorage
storage
(
cvCreateMemStorage
(
0
));
cvtColor
(
img
,
gray
,
CV_BGR2GRAY
);
resize
(
gray
,
smallImg
,
smallImg
.
size
(),
0
,
0
,
INTER_LINEAR
);
equalizeHist
(
smallImg
,
smallImg
);
cv
::
ocl
::
oclMat
image
;
image
.
upload
(
smallImg
);
cascadebuf
.
detectMultiScale
(
image
,
oclfaces
,
1.1
,
3
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
cascadebuf
.
release
();
cpucascade
.
detectMultiScale
(
smallImg
,
faces
,
1.1
,
3
,
flags
,
Size
(
30
,
30
),
Size
(
0
,
0
)
);
EXPECT_EQ
(
faces
.
size
(),
oclfaces
.
size
());
}
INSTANTIATE_TEST_CASE_P
(
FaceDetect
,
Haar
,
Combine
(
Values
(
1.0
),
Values
(
CV_HAAR_SCALE_IMAGE
,
0
)));
#endif // HAVE_OPENCL
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录