Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
1d1d28ba
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1d1d28ba
编写于
5月 14, 2013
作者:
Y
yao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix black screen when input Mat is large
上级
abe2ea59
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
98 addition
and
18 deletion
+98
-18
modules/ocl/src/moments.cpp
modules/ocl/src/moments.cpp
+1
-1
modules/ocl/src/opencl/moments.cl
modules/ocl/src/opencl/moments.cl
+97
-17
未找到文件。
modules/ocl/src/moments.cpp
浏览文件 @
1d1d28ba
...
...
@@ -327,7 +327,7 @@ static void ocl_cvMoments( const void* array, CvMoments* mom, int binary )
mom
->
m12
=
dstsum
[
8
];
mom
->
m03
=
dstsum
[
9
];
delete
[]
dstsum
;
openCLSafeCall
(
clReleaseMemObject
(
sum
));
icvCompleteMomentState
(
mom
);
}
...
...
modules/ocl/src/opencl/moments.cl
浏览文件 @
1d1d28ba
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Multicoreware,
Inc.,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Sen
Liu,
swjtuls1987@126.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
oclMaterials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
if
defined
(
DOUBLE_SUPPORT
)
#
ifdef
cl_khr_fp64
...
...
@@ -609,22 +654,33 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
int
y
=
wgidy*TILE_SIZE
; // real Y index of pixel
int
x
=
wgidx*TILE_SIZE
; // real X index of pixel
int
kcn
=
(
cn==2
)
?2:4
;
int
rstep
=
min
(
src_step/4,
TILE_SIZE
)
;
src_step
/=
sizeof
(
*src_data
)
;
int
rstep
=
min
(
src_step,
TILE_SIZE
)
;
tileSize_height
=
min
(
TILE_SIZE,
src_rows
-
y
)
;
tileSize_width
=
min
(
TILE_SIZE,
src_cols
-x
)
;
if
(
tileSize_width
<
TILE_SIZE
)
for
(
int
i
=
tileSize_width
; i < rstep; i++ )
*
((
__global
float*
)
src_data+
(
y+lidy
)
*src_step/4+x+i
)
=
0
;
int
maxIdx
=
mul24
(
src_rows,
src_cols
)
;
int
yOff
=
(
y+lidy
)
*src_step
;
int
index
;
if
(
tileSize_width
<
TILE_SIZE
&&
yOff
<
src_rows
)
for
(
int
i
=
tileSize_width
; i < rstep && (yOff+x+i) < maxIdx; i++ )
*
(
src_data+yOff+x+i
)
=
0
;
if
(
coi
>
0
)
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
{
#
pragma
unroll
for
(
int
j=0
; j<4; j++)
tmp_coi[j]
=
*
(
src_data+
(
y+lidy
)
*src_step/4+
(
x+i+j
)
*kcn+coi-1
)
;
{
index
=
yOff+
(
x+i+j
)
*kcn+coi-1
;
if
(
index
<
maxIdx
)
tmp_coi[j]
=
*
(
src_data+index
)
;
else
tmp_coi[j]
=
0
;
}
tmp[i/VLEN_F]
=
(
float4
)(
tmp_coi[0],tmp_coi[1],tmp_coi[2],tmp_coi[3]
)
;
}
else
for
(
int
i=0
; i < tileSize_width; i+=VLEN_F)
tmp[i/VLEN_F]
=
(
float4
)(
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+1
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+2
)
,
*
(
src_data+
(
y+lidy
)
*src_step/4+x+i+3
))
;
for
(
int
i=0
; i < tileSize_width
&& (yOff+x+i) < maxIdx
; i+=VLEN_F)
tmp[i/VLEN_F]
=
(
*
(
__global
float4
*
)(
src_data+yOff+x+i
))
;
float4
zero
=
(
float4
)(
0
)
;
float4
full
=
(
float4
)(
255
)
;
if
(
binary
)
...
...
@@ -714,35 +770,59 @@ __kernel void CvMoments_D5( __global float* src_data, int src_rows, int src_cols
//
accumulate
moments
computed
in
each
tile
dst_step
/=
sizeof
(
F
)
;
int
dst_x_off
=
mad24
(
wgidy,
dst_cols,
wgidx
)
;
int
dst_off
=
0
;
int
max_dst_index
=
10
*
blocky
*
get_global_size
(
1
)
;
//
+
m00
(
=
m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_00
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[0]
;
dst_off
=
mad24
(
DST_ROW_00
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[0]
;
//
+
m10
(
=
m10
'
+
x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_10
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[1]
+
xm
;
dst_off
=
mad24
(
DST_ROW_10
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[1]
+
xm
;
//
+
m01
(
=
m01
'
+
y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_01
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[2]
+
ym
;
dst_off
=
mad24
(
DST_ROW_01
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[2]
+
ym
;
//
+
m20
(
=
m20
'
+
2*x*m10
'
+
x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_20
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
dst_off
=
mad24
(
DST_ROW_20
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[3]
+
x
*
(
mom[1]
*
2
+
xm
)
;
//
+
m11
(
=
m11
'
+
x*m01
'
+
y*m10
'
+
x*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_11
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
dst_off
=
mad24
(
DST_ROW_11
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[4]
+
x
*
(
mom[2]
+
ym
)
+
y
*
mom[1]
;
//
+
m02
(
=
m02
'
+
2*y*m01
'
+
y*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_02
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
dst_off
=
mad24
(
DST_ROW_02
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[5]
+
y
*
(
mom[2]
*
2
+
ym
)
;
//
+
m30
(
=
m30
'
+
3*x*m20
'
+
3*x*x*m10
'
+
x*x*x*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_30
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
dst_off
=
mad24
(
DST_ROW_30
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[6]
+
x
*
(
3.
*
mom[3]
+
x
*
(
3.
*
mom[1]
+
xm
))
;
//
+
m21
(
=
m21
'
+
x*
(
2*m11
'
+
2*y*m10
'
+
x*m01
'
+
x*y*m00
'
)
+
y*m20
'
)
*
(
dst_m
+
mad24
(
DST_ROW_21
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
dst_off
=
mad24
(
DST_ROW_21
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[7]
+
x
*
(
2
*
(
mom[4]
+
y
*
mom[1]
)
+
x
*
(
mom[2]
+
ym
))
+
y
*
mom[3]
;
//
+
m12
(
=
m12
'
+
y*
(
2*m11
'
+
2*x*m01
'
+
y*m10
'
+
x*y*m00
'
)
+
x*m02
'
)
*
(
dst_m
+
mad24
(
DST_ROW_12
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
dst_off
=
mad24
(
DST_ROW_12
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[8]
+
y
*
(
2
*
(
mom[4]
+
x
*
mom[2]
)
+
y
*
(
mom[1]
+
xm
))
+
x
*
mom[5]
;
//
+
m03
(
=
m03
'
+
3*y*m02
'
+
3*y*y*m01
'
+
y*y*y*m00
'
)
*
(
dst_m
+
mad24
(
DST_ROW_03
*
blocky,
dst_step,
mad24
(
wgidy,
dst_cols,
wgidx
)))
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
dst_off
=
mad24
(
DST_ROW_03
*
blocky,
dst_step,
dst_x_off
)
;
if
(
dst_off
<
max_dst_index
)
*
(
dst_m
+
dst_off
)
=
mom[9]
+
y
*
(
3.
*
mom[5]
+
y
*
(
3.
*
mom[2]
+
ym
))
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录