Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
dcaa8735
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
dcaa8735
编写于
6月 06, 2014
作者:
E
Elena Gvozdeva
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
used vector data types for CCORR cn==1
上级
13db9480
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
148 addition
and
42 deletion
+148
-42
modules/imgproc/src/opencl/match_template.cl
modules/imgproc/src/opencl/match_template.cl
+101
-8
modules/imgproc/src/templmatch.cpp
modules/imgproc/src/templmatch.cpp
+45
-32
modules/imgproc/test/ocl/test_match_template.cpp
modules/imgproc/test/ocl/test_match_template.cpp
+2
-2
未找到文件。
modules/imgproc/src/opencl/match_template.cl
浏览文件 @
dcaa8735
...
...
@@ -173,37 +173,130 @@ __kernel void matchTemplate_Naive_CCORR(__global const uchar * srcptr, int src_s
}
}
#
elif
cn==1
&&
PIX_PER_WI_X==4
__kernel
void
matchTemplate_Naive_CCORR
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
const
uchar
*
templateptr,
int
template_step,
int
template_offset,
int
template_rows,
int
template_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
int
x0
=
get_global_id
(
0
)
*PIX_PER_WI_X
;
int
y
=
get_global_id
(
1
)
;
if
(
y
<
dst_rows
)
{
if
(
x0
+
PIX_PER_WI_X
<=
dst_cols
)
{
WT
sum
=
(
WT
)(
0
)
;
int
ind
=
mad24
(
y,
src_step,
mad24
(
x0,
(
int
)
sizeof
(
T1
)
,
src_offset
))
;
__global
const
T1
*
template
=
(
__global
const
T1*
)(
templateptr
+
template_offset
)
;
for
(
int
i
=
0
; i < template_rows; ++i)
{
for
(
int
j
=
0
; j < template_cols; ++j)
{
T
temp
=
(
T
)(
template[j]
)
;
T
src
=
*
(
__global
const
T*
)(
srcptr
+
ind
+
j*
(
int
)
sizeof
(
T1
))
;
#
if
wdepth
==
4
sum
=
mad24
(
convertToWT
(
src
)
,
convertToWT
(
temp
)
,
sum
)
;
#
else
sum
=
mad
(
convertToWT
(
src
)
,
convertToWT
(
temp
)
,
sum
)
;
#
endif
}
ind
+=
src_step
;
template
=
(
__global
const
T1
*
)((
__global
const
uchar
*
)
template
+
template_step
)
;
}
T
temp
=
(
T
)(
template[0]
)
;
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x0,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
*
(
__global
float4
*
)(
dst
+
dst_idx
)
=
convert_float4
(
sum
)
;
}
else
{
WT1
sum
[PIX_PER_WI_X]
;
#
pragma
unroll
for
(
int
i=0
; i < PIX_PER_WI_X; i++) sum[i] = 0;
__global
const
T1
*
src
=
(
__global
const
T1
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x0,
(
int
)
sizeof
(
T1
)
,
src_offset
)))
;
__global
const
T1
*
template
=
(
__global
const
T1
*
)(
templateptr
+
template_offset
)
;
for
(
int
i
=
0
; i < template_rows; ++i)
{
for
(
int
j
=
0
; j < template_cols; ++j)
{
#
pragma
unroll
for
(
int
cx=0,
x
=
x0
; cx < PIX_PER_WI_X && x < dst_cols; ++cx, ++x)
{
#
if
wdepth
==
4
sum[cx]
=
mad24
(
convertToWT1
(
src[j+cx]
)
,
convertToWT1
(
template[j]
)
,
sum[cx]
)
;
#
else
sum[cx]
=
mad
(
convertToWT1
(
src[j+cx]
)
,
convertToWT1
(
template[j]
)
,
sum[cx]
)
;
#
endif
}
}
src
=
(
__global
const
T1
*
)((
__global
const
uchar
*
)
src
+
src_step
)
;
template
=
(
__global
const
T1
*
)((
__global
const
uchar
*
)
template
+
template_step
)
;
}
#
pragma
unroll
for
(
int
cx=0
; cx < PIX_PER_WI_X && x0 < dst_cols; ++cx, ++x0)
{
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x0,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
*
(
__global
float
*
)(
dst
+
dst_idx
)
=
convertToDT
(
sum[cx]
)
;
}
}
}
}
#
else
__kernel
void
matchTemplate_Naive_CCORR
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__global
const
uchar
*
templateptr,
int
template_step,
int
template_offset,
int
template_rows,
int
template_cols,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
int
x
=
get_global_id
(
0
)
;
int
x
0
=
get_global_id
(
0
)
*PIX_PER_WI_X
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
int
step
=
src_step/
(
int
)
sizeof
(
T
)
;
if
(
y
<
dst_rows
)
{
WT
sum
=
(
WT
)(
0
)
;
WT
sum
[PIX_PER_WI_X]
;
#
pragma
unroll
for
(
int
i=0
; i < PIX_PER_WI_X; i++)
sum[i]
=
0
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src_offset
)))
;
__global
const
T
*
src
=
(
__global
const
T
*
)(
srcptr
+
mad24
(
y,
src_step,
mad24
(
x
0
,
(
int
)
sizeof
(
T
)
,
src_offset
)))
;
__global
const
T
*
template
=
(
__global
const
T
*
)(
templateptr
+
template_offset
)
;
for
(
int
i
=
0
; i < template_rows; ++i)
{
for
(
int
j
=
0
; j < template_cols; ++j)
{
#
pragma
unroll
for
(
int
cx=0,
x
=
x0
; cx < PIX_PER_WI_X && x < dst_cols; ++cx, ++x)
{
#
if
wdepth
==
4
sum
=
mad24
(
convertToWT
(
src[j]
)
,
convertToWT
(
template[j]
)
,
sum
)
;
sum[cx]
=
mad24
(
convertToWT
(
src[j+cx]
)
,
convertToWT
(
template[j]
)
,
sum[cx]
)
;
#
else
sum
=
mad
(
convertToWT
(
src[j]
)
,
convertToWT
(
template[j]
)
,
sum
)
;
sum[cx]
=
mad
(
convertToWT
(
src[j+cx]
)
,
convertToWT
(
template[j]
)
,
sum[cx]
)
;
#
endif
}
}
src
=
(
__global
const
T
*
)((
__global
const
uchar
*
)
src
+
src_step
)
;
template
=
(
__global
const
T
*
)((
__global
const
uchar
*
)
template
+
template_step
)
;
}
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
*
(
__global
float
*
)(
dst
+
dst_idx
)
=
convertToDT
(
sum
)
;
#
pragma
unroll
for
(
int
cx=0
; cx < PIX_PER_WI_X && x0 < dst_cols; ++cx, ++x0)
{
int
dst_idx
=
mad24
(
y,
dst_step,
mad24
(
x0,
(
int
)
sizeof
(
float
)
,
dst_offset
))
;
*
(
__global
float
*
)(
dst
+
dst_idx
)
=
convertToDT
(
sum[cx]
)
;
}
}
}
#
endif
...
...
modules/imgproc/src/templmatch.cpp
浏览文件 @
dcaa8735
...
...
@@ -58,10 +58,7 @@ enum
static
bool
extractFirstChannel_32F
(
InputArray
_image
,
OutputArray
_result
,
int
cn
)
{
UMat
image
=
_image
.
getUMat
();
UMat
result
=
_result
.
getUMat
();
int
depth
=
image
.
depth
();
int
depth
=
_image
.
depth
();
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
pxPerWIy
=
(
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
))
?
4
:
1
;
...
...
@@ -71,6 +68,10 @@ static bool extractFirstChannel_32F(InputArray _image, OutputArray _result, int
if
(
k
.
empty
())
return
false
;
UMat
image
=
_image
.
getUMat
();
UMat
result
=
_result
.
getUMat
();
size_t
globalsize
[
2
]
=
{
result
.
cols
,
(
result
.
rows
+
pxPerWIy
-
1
)
/
pxPerWIy
};
return
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image
),
ocl
::
KernelArg
::
WriteOnly
(
result
)).
run
(
2
,
globalsize
,
NULL
,
false
);
}
...
...
@@ -107,33 +108,29 @@ static bool sumTemplate(InputArray _src, UMat & result)
return
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
false
);
}
static
bool
useNaive
(
int
method
,
int
depth
,
Size
size
)
static
bool
useNaive
(
int
method
,
Size
size
)
{
/* if (method == TM_SQDIFF && (depth == CV_32F))
{
return true;
}
else*/
if
(
method
==
TM_CCORR
||
method
==
TM_SQDIFF
)
{
return
size
.
height
<
18
&&
size
.
width
<
18
;
}
else
return
false
;
if
(
method
==
TM_CCORR
||
method
==
TM_SQDIFF
)
{
return
size
.
height
<
18
&&
size
.
width
<
18
;
}
else
return
false
;
}
struct
ConvolveBuf
{
Size
result_size
;
Size
block_size
;
Size
user_block_size
;
Size
dft_size
;
{
Size
result_size
;
Size
block_size
;
Size
user_block_size
;
Size
dft_size
;
UMat
image_spect
,
templ_spect
,
result_spect
;
UMat
image_block
,
templ_block
,
result_data
;
UMat
image_spect
,
templ_spect
,
result_spect
;
UMat
image_block
,
templ_block
,
result_data
;
void
create
(
Size
image_size
,
Size
templ_size
);
static
Size
estimateBlockSize
(
Size
result_size
,
Size
templ
_size
);
};
void
create
(
Size
image_size
,
Size
templ_size
);
static
Size
estimateBlockSize
(
Size
result
_size
);
};
void
ConvolveBuf
::
create
(
Size
image_size
,
Size
templ_size
)
{
...
...
@@ -142,7 +139,7 @@ void ConvolveBuf::create(Size image_size, Size templ_size)
block_size
=
user_block_size
;
if
(
user_block_size
.
width
==
0
||
user_block_size
.
height
==
0
)
block_size
=
estimateBlockSize
(
result_size
,
templ_size
);
block_size
=
estimateBlockSize
(
result_size
);
dft_size
.
width
=
1
<<
int
(
ceil
(
std
::
log
(
block_size
.
width
+
templ_size
.
width
-
1.
)
/
std
::
log
(
2.
)));
dft_size
.
height
=
1
<<
int
(
ceil
(
std
::
log
(
block_size
.
height
+
templ_size
.
height
-
1.
)
/
std
::
log
(
2.
)));
...
...
@@ -167,7 +164,7 @@ void ConvolveBuf::create(Size image_size, Size templ_size)
block_size
.
height
=
std
::
min
(
dft_size
.
height
-
templ_size
.
height
+
1
,
result_size
.
height
);
}
Size
ConvolveBuf
::
estimateBlockSize
(
Size
result_size
,
Size
/*templ_size*/
)
Size
ConvolveBuf
::
estimateBlockSize
(
Size
result_size
)
{
int
width
=
(
result_size
.
width
+
2
)
/
3
;
int
height
=
(
result_size
.
height
+
2
)
/
3
;
...
...
@@ -266,10 +263,26 @@ static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, Outpu
int
type
=
_image
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
int
wdepth
=
std
::
max
(
depth
,
CV_32S
),
wtype
=
CV_MAKE_TYPE
(
wdepth
,
cn
);
ocl
::
Device
dev
=
ocl
::
Device
::
getDefault
();
int
pxPerWIx
=
(
cn
!=
3
&&
dev
.
isIntel
()
&&
(
dev
.
type
()
&
ocl
::
Device
::
TYPE_GPU
))
?
4
:
1
;
int
rated_cn
=
cn
;
int
wtype1
=
wtype
;
if
(
pxPerWIx
!=
1
&&
cn
==
1
)
{
rated_cn
=
pxPerWIx
;
type
=
CV_MAKE_TYPE
(
depth
,
rated_cn
);
wtype1
=
CV_MAKE_TYPE
(
wdepth
,
rated_cn
);
}
char
cvt
[
40
];
char
cvt1
[
40
];
const
char
*
convertToWT1
=
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
);
const
char
*
convertToWT
=
ocl
::
convertTypeStr
(
depth
,
wdepth
,
rated_cn
,
cvt1
);
ocl
::
Kernel
k
(
"matchTemplate_Naive_CCORR"
,
ocl
::
imgproc
::
match_template_oclsrc
,
format
(
"-D CCORR -D T=%s -D T1=%s -D WT=%s -D
convertToWT=%s -D cn=%d -D wdepth=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wtype
),
ocl
::
convertTypeStr
(
depth
,
wdepth
,
cn
,
cvt
),
cn
,
wdepth
));
format
(
"-D CCORR -D T=%s -D T1=%s -D WT=%s -D
WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D wdepth=%d -D PIX_PER_WI_X=%d"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
wtype1
),
ocl
::
typeToStr
(
wtype
),
convertToWT
,
convertToWT1
,
cn
,
wdepth
,
pxPerWIx
));
if
(
k
.
empty
())
return
false
;
...
...
@@ -280,14 +293,14 @@ static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, Outpu
k
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
image
),
ocl
::
KernelArg
::
ReadOnly
(
templ
),
ocl
::
KernelArg
::
WriteOnly
(
result
));
size_t
globalsize
[
2
]
=
{
result
.
cols
,
result
.
rows
};
size_t
globalsize
[
2
]
=
{
(
result
.
cols
+
pxPerWIx
-
1
)
/
pxPerWIx
,
result
.
rows
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
static
bool
matchTemplate_CCORR
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
{
if
(
useNaive
(
TM_CCORR
,
_
image
.
depth
(),
_
templ
.
size
()))
if
(
useNaive
(
TM_CCORR
,
_templ
.
size
()))
return
(
matchTemplateNaive_CCORR
(
_image
,
_templ
,
_result
));
else
...
...
@@ -364,7 +377,7 @@ static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, Outp
static
bool
matchTemplate_SQDIFF
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
)
{
if
(
useNaive
(
TM_SQDIFF
,
_
image
.
depth
(),
_
templ
.
size
()))
if
(
useNaive
(
TM_SQDIFF
,
_templ
.
size
()))
return
(
matchTemplateNaive_SQDIFF
(
_image
,
_templ
,
_result
));
else
{
...
...
modules/imgproc/test/ocl/test_match_template.cpp
浏览文件 @
dcaa8735
...
...
@@ -71,7 +71,7 @@ PARAM_TEST_CASE(MatchTemplate, MatDepth, Channels, MatchTemplType, bool)
type
=
CV_MAKE_TYPE
(
GET_PARAM
(
0
),
GET_PARAM
(
1
));
depth
=
GET_PARAM
(
0
);
method
=
GET_PARAM
(
2
);
use_roi
=
GET_PARAM
(
3
);
use_roi
=
false
;
//
GET_PARAM(3);
}
virtual
void
generateTestData
()
...
...
@@ -116,7 +116,7 @@ OCL_TEST_P(MatchTemplate, Mat)
}
}
OCL_INSTANTIATE_TEST_CASE_P
(
ImageProc
,
MatchTemplate
,
Combine
(
OCL_INSTANTIATE_TEST_CASE_P
(
ImageProc
,
MatchTemplate
,
Combine
(
Values
(
CV_8U
,
CV_32F
),
Values
(
1
,
2
,
3
,
4
),
MatchTemplType
::
all
(),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录