Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
33173d90
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
33173d90
编写于
5月 21, 2014
作者:
I
Ilya Lavrenov
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimized cv::meanStdDev
上级
d940093f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
205 addition
and
14 deletion
+205
-14
modules/core/src/ocl.cpp
modules/core/src/ocl.cpp
+7
-7
modules/core/src/opencl/meanstddev.cl
modules/core/src/opencl/meanstddev.cl
+129
-0
modules/core/src/stat.cpp
modules/core/src/stat.cpp
+69
-7
未找到文件。
modules/core/src/ocl.cpp
浏览文件 @
33173d90
...
...
@@ -4419,22 +4419,22 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
InputArray
src4
,
InputArray
src5
,
InputArray
src6
,
InputArray
src7
,
InputArray
src8
,
InputArray
src9
)
{
int
type
=
src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz
=
CV_ELEM_SIZE
(
depth
);
int
type
=
src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz
1
=
CV_ELEM_SIZE1
(
depth
);
Size
ssize
=
src1
.
size
();
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
vectorWidths
[]
=
{
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthInt
(),
d
.
preferredVectorWidthFloat
(),
d
.
preferredVectorWidthDouble
(),
-
1
},
width
=
vectorWidths
[
depth
];
d
.
preferredVectorWidthDouble
(),
-
1
},
kercn
=
vectorWidths
[
depth
];
if
(
d
.
isIntel
())
{
// it's heuristic
int
vectorWidthsIntel
[]
=
{
16
,
16
,
8
,
8
,
1
,
1
,
1
,
-
1
};
width
=
vectorWidthsIntel
[
depth
];
kercn
=
vectorWidthsIntel
[
depth
];
}
if
(
ssize
.
width
*
cn
<
width
||
width
<=
0
)
if
(
ssize
.
width
*
cn
<
kercn
||
kercn
<=
0
)
return
1
;
std
::
vector
<
size_t
>
offsets
,
steps
,
cols
;
...
...
@@ -4449,7 +4449,7 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
PROCESS_SRC
(
src9
);
size_t
size
=
offsets
.
size
();
int
wsz
=
width
*
esz
;
int
wsz
=
kercn
*
esz1
;
std
::
vector
<
int
>
dividers
(
size
,
wsz
);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
...
...
@@ -4460,14 +4460,14 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
if
(
dividers
[
i
]
!=
wsz
)
{
width
=
1
;
kercn
=
1
;
break
;
}
// another strategy
// width = *std::min_element(dividers.begin(), dividers.end());
return
width
;
return
kercn
;
}
#undef PROCESS_SRC
...
...
modules/core/src/opencl/meanstddev.cl
0 → 100644
浏览文件 @
33173d90
//
This
file
is
part
of
OpenCV
project.
//
It
is
subject
to
the
license
terms
in
the
LICENSE
file
found
in
the
top-level
directory
//
of
this
distribution
and
at
http://opencv.org/license.html.
//
Copyright
(
C
)
2014
,
Itseez,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
#
define
noconvert
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
#
define
storesqpix
(
val,
addr
)
*
(
__global
sqdstT
*
)(
addr
)
=
val
#
define
srcTSIZE
(
int
)
sizeof
(
srcT
)
#
define
dstTSIZE
(
int
)
sizeof
(
dstT
)
#
define
sqdstTSIZE
(
int
)
sizeof
(
sqdstT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
#
define
storesqpix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
sqdstT1
*
)(
addr
))
#
define
srcTSIZE
((
int
)
sizeof
(
srcT1
)
*3
)
#
define
dstTSIZE
((
int
)
sizeof
(
dstT1
)
*3
)
#
define
sqdstTSIZE
((
int
)
sizeof
(
sqdstT1
)
*3
)
#
endif
__kernel
void
meanStdDev
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
int
cols,
int
total,
int
groups,
__global
uchar
*
dstptr
#
ifdef
HAVE_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
__local
dstT
localMemSum[WGS2_ALIGNED]
;
__local
sqdstT
localMemSqSum[WGS2_ALIGNED]
;
#
ifdef
HAVE_MASK
__local
int
localMemNonZero[WGS2_ALIGNED]
;
#
endif
dstT
accSum
=
(
dstT
)(
0
)
;
sqdstT
accSqSum
=
(
sqdstT
)(
0
)
;
#
ifdef
HAVE_MASK
int
accNonZero
=
0
;
mask
+=
mask_offset
;
#
endif
srcptr
+=
src_offset
;
for
(
int
grain
=
groups
*
WGS
; id < total; id += grain)
{
#
ifdef
HAVE_MASK
#
ifdef
HAVE_SRC_CONT
int
mask_index
=
id
;
#
else
int
mask_index
=
mad24
(
id
/
cols,
mask_step,
id
%
cols
)
;
#
endif
if
(
mask[mask_index]
)
#
endif
{
#
ifdef
HAVE_SRC_CONT
int
src_index
=
mul24
(
id,
srcTSIZE
)
;
#
else
int
src_index
=
mad24
(
id
/
cols,
src_step,
mul24
(
id
%
cols,
srcTSIZE
))
;
#
endif
srcT
value
=
loadpix
(
srcptr
+
src_index
)
;
accSum
+=
convertToDT
(
value
)
;
sqdstT
dvalue
=
convertToSDT
(
value
)
;
accSqSum
=
fma
(
dvalue,
dvalue,
accSqSum
)
;
#
ifdef
HAVE_MASK
++accNonZero
;
#
endif
}
}
if
(
lid
<
WGS2_ALIGNED
)
{
localMemSum[lid]
=
accSum
;
localMemSqSum[lid]
=
accSqSum
;
#
ifdef
HAVE_MASK
localMemNonZero[lid]
=
accNonZero
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
>=
WGS2_ALIGNED
&&
total
>=
WGS2_ALIGNED
)
{
localMemSum[lid
-
WGS2_ALIGNED]
+=
accSum
;
localMemSqSum[lid
-
WGS2_ALIGNED]
+=
accSqSum
;
#
ifdef
HAVE_MASK
localMemNonZero[lid
-
WGS2_ALIGNED]
+=
accNonZero
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
WGS2_ALIGNED
>>
1
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localMemSum[lid]
+=
localMemSum[lid2]
;
localMemSqSum[lid]
+=
localMemSqSum[lid2]
;
#
ifdef
HAVE_MASK
localMemNonZero[lid]
+=
localMemNonZero[lid2]
;
#
endif
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
storepix
(
localMemSum[0],
dstptr
+
dstTSIZE
*
gid
)
;
storesqpix
(
localMemSqSum[0],
dstptr
+
mad24
(
dstTSIZE,
groups,
sqdstTSIZE
*
gid
))
;
#
ifdef
HAVE_MASK
*
(
__global
int
*
)(
dstptr
+
mad24
(
dstTSIZE
+
sqdstTSIZE,
groups,
(
int
)
sizeof
(
int
)
*
gid
))
=
localMemNonZero[0]
;
#
endif
}
}
modules/core/src/stat.cpp
浏览文件 @
33173d90
...
...
@@ -878,14 +878,76 @@ namespace cv {
static
bool
ocl_meanStdDev
(
InputArray
_src
,
OutputArray
_mean
,
OutputArray
_sdv
,
InputArray
_mask
)
{
bool
haveMask
=
_mask
.
kind
()
!=
_InputArray
::
NONE
;
int
nz
=
haveMask
?
-
1
:
(
int
)
_src
.
total
();
Scalar
mean
,
stddev
;
if
(
!
ocl_sum
(
_src
,
mean
,
OCL_OP_SUM
,
_mask
))
return
false
;
if
(
!
ocl_sum
(
_src
,
stddev
,
OCL_OP_SUM_SQR
,
_mask
))
return
false
;
int
nz
=
haveMask
?
countNonZero
(
_mask
)
:
(
int
)
_src
.
total
();
{
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
,
isContinuous
=
_src
.
isContinuous
();
int
groups
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
size_t
wgs
=
ocl
::
Device
::
getDefault
().
maxWorkGroupSize
();
int
ddepth
=
std
::
max
(
CV_32S
,
depth
),
sqddepth
=
std
::
max
(
CV_32F
,
depth
),
dtype
=
CV_MAKE_TYPE
(
ddepth
,
cn
),
sqdtype
=
CV_MAKETYPE
(
sqddepth
,
cn
);
CV_Assert
(
!
haveMask
||
_mask
.
type
()
==
CV_8UC1
);
int
wgs2_aligned
=
1
;
while
(
wgs2_aligned
<
(
int
)
wgs
)
wgs2_aligned
<<=
1
;
wgs2_aligned
>>=
1
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
)
return
false
;
char
cvt
[
2
][
40
];
String
opts
=
format
(
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
" -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s"
" -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
sqddepth
,
ocl
::
typeToStr
(
sqdtype
),
ocl
::
typeToStr
(
sqddepth
),
ocl
::
convertTypeStr
(
depth
,
sqddepth
,
cn
,
cvt
[
0
]),
cn
,
isContinuous
?
" -D HAVE_SRC_CONT"
:
""
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
[
1
]),
(
int
)
wgs
,
wgs2_aligned
,
haveMask
?
" -D HAVE_MASK"
:
""
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
k
(
"meanStdDev"
,
ocl
::
core
::
meanstddev_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
int
dbsize
=
groups
*
((
haveMask
?
CV_ELEM_SIZE1
(
CV_32S
)
:
0
)
+
CV_ELEM_SIZE
(
sqdtype
)
+
CV_ELEM_SIZE
(
dtype
));
UMat
src
=
_src
.
getUMat
(),
db
(
1
,
dbsize
,
CV_8UC1
),
mask
=
_mask
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dbarg
=
ocl
::
KernelArg
::
PtrWriteOnly
(
db
),
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
);
if
(
haveMask
)
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
groups
,
dbarg
,
maskarg
);
else
k
.
args
(
srcarg
,
src
.
cols
,
(
int
)
src
.
total
(),
groups
,
dbarg
);
size_t
globalsize
=
groups
*
wgs
;
if
(
!
k
.
run
(
1
,
&
globalsize
,
&
wgs
,
false
))
return
false
;
typedef
Scalar
(
*
part_sum
)(
Mat
m
);
part_sum
funcs
[
3
]
=
{
ocl_part_sum
<
int
>
,
ocl_part_sum
<
float
>
,
ocl_part_sum
<
double
>
};
Mat
dbm
=
db
.
getMat
(
ACCESS_READ
);
mean
=
funcs
[
ddepth
-
CV_32S
](
Mat
(
1
,
groups
,
dtype
,
dbm
.
data
));
stddev
=
funcs
[
sqddepth
-
CV_32S
](
Mat
(
1
,
groups
,
sqdtype
,
dbm
.
data
+
groups
*
CV_ELEM_SIZE
(
dtype
)));
if
(
haveMask
)
nz
=
saturate_cast
<
int
>
(
funcs
[
0
](
Mat
(
1
,
groups
,
CV_32SC1
,
dbm
.
data
+
groups
*
(
CV_ELEM_SIZE
(
dtype
)
+
CV_ELEM_SIZE
(
sqdtype
))))[
0
]);
}
double
total
=
nz
!=
0
?
1.0
/
nz
:
0
;
int
k
,
j
,
cn
=
_src
.
channels
();
for
(
int
i
=
0
;
i
<
cn
;
++
i
)
...
...
@@ -927,7 +989,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
ocl_meanStdDev
(
_src
,
_mean
,
_sdv
,
_mask
))
Mat
src
=
_src
.
getMat
(),
mask
=
_mask
.
getMat
();
CV_Assert
(
mask
.
empty
()
||
mask
.
type
()
==
CV_8U
);
CV_Assert
(
mask
.
empty
()
||
mask
.
type
()
==
CV_8U
C1
);
int
k
,
cn
=
src
.
channels
(),
depth
=
src
.
depth
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录