Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
c5bf1c10
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c5bf1c10
编写于
3月 13, 2014
作者:
A
Andrey Pavlenko
提交者:
OpenCV Buildbot
3月 13, 2014
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2470 from ilya-lavrenov:tapi_reduction
上级
42335396
9932cf41
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
47 addition
and
30 deletion
+47
-30
modules/core/perf/opencl/perf_arithm.cpp
modules/core/perf/opencl/perf_arithm.cpp
+12
-8
modules/core/src/opencl/reduce.cl
modules/core/src/opencl/reduce.cl
+29
-19
modules/core/src/stat.cpp
modules/core/src/stat.cpp
+6
-3
未找到文件。
modules/core/perf/opencl/perf_arithm.cpp
浏览文件 @
c5bf1c10
...
...
@@ -344,7 +344,7 @@ OCL_PERF_TEST_P(FlipFixture, Flip,
typedef
Size_MatType
MinMaxLocFixture
;
OCL_PERF_TEST_P
(
MinMaxLocFixture
,
MinMaxLoc
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES
))
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES
_134
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -380,7 +380,7 @@ typedef Size_MatType SumFixture;
OCL_PERF_TEST_P
(
SumFixture
,
Sum
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES
))
OCL_TEST_TYPES
_134
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -447,7 +447,7 @@ OCL_PERF_TEST_P(PhaseFixture, Phase, ::testing::Combine(
SANITY_CHECK
(
dst
,
1e-2
);
}
///////////// bitwise_and////////////////////////
///////////// bitwise_and
////////////////////////
typedef
Size_MatType
BitwiseAndFixture
;
...
...
@@ -531,7 +531,7 @@ OCL_PERF_TEST_P(BitwiseNotFixture, Bitwise_not,
SANITY_CHECK
(
dst
);
}
///////////// compare////////////////////////
///////////// compare
////////////////////////
CV_ENUM
(
CmpCode
,
CMP_LT
,
CMP_LE
,
CMP_EQ
,
CMP_NE
,
CMP_GE
,
CMP_GT
)
...
...
@@ -652,7 +652,8 @@ OCL_PERF_TEST_P(SetIdentityFixture, SetIdentity,
typedef
Size_MatType
MeanStdDevFixture
;
OCL_PERF_TEST_P
(
MeanStdDevFixture
,
MeanStdDev
,
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES
))
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES_134
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -688,7 +689,8 @@ typedef std::tr1::tuple<Size, MatType, NormType> NormParams;
typedef
TestBaseWithParam
<
NormParams
>
NormFixture
;
OCL_PERF_TEST_P
(
NormFixture
,
Norm
,
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES
,
NormType
::
all
()))
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES_134
,
NormType
::
all
()))
{
const
NormParams
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -711,7 +713,8 @@ OCL_PERF_TEST_P(NormFixture, Norm,
typedef
Size_MatType
UMatDotFixture
;
OCL_PERF_TEST_P
(
UMatDotFixture
,
UMatDot
,
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES
))
::
testing
::
Combine
(
OCL_PERF_ENUM
(
OCL_SIZE_1
,
OCL_SIZE_2
,
OCL_SIZE_3
),
OCL_TEST_TYPES_134
))
{
const
Size_MatType_t
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
@@ -820,7 +823,8 @@ typedef tuple<Size, MatType, NormalizeModes> NormalizeParams;
typedef
TestBaseWithParam
<
NormalizeParams
>
NormalizeFixture
;
OCL_PERF_TEST_P
(
NormalizeFixture
,
Normalize
,
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES
,
NormalizeModes
::
all
()))
::
testing
::
Combine
(
OCL_TEST_SIZES
,
OCL_TEST_TYPES_134
,
NormalizeModes
::
all
()))
{
const
NormalizeParams
params
=
GetParam
();
const
Size
srcSize
=
get
<
0
>
(
params
);
...
...
modules/core/src/opencl/reduce.cl
浏览文件 @
c5bf1c10
...
...
@@ -52,6 +52,18 @@
#
define
noconvert
#
if
cn
!=
3
#
define
loadpix
(
addr
)
*
(
__global
const
srcT
*
)(
addr
)
#
define
storepix
(
val,
addr
)
*
(
__global
dstT
*
)(
addr
)
=
val
#
define
srcTSIZE
(
int
)
sizeof
(
srcT
)
#
define
dstTSIZE
(
int
)
sizeof
(
dstT
)
#
else
#
define
loadpix
(
addr
)
vload3
(
0
,
(
__global
const
srcT1
*
)(
addr
))
#
define
storepix
(
val,
addr
)
vstore3
(
val,
0
,
(
__global
dstT1
*
)(
addr
))
#
define
srcTSIZE
((
int
)
sizeof
(
srcT1
)
*3
)
#
define
dstTSIZE
((
int
)
sizeof
(
dstT1
)
*3
)
#
endif
#
ifdef
HAVE_MASK
#
define
EXTRA_PARAMS
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
else
...
...
@@ -88,19 +100,20 @@
#ifdef HAVE_MASK
#define REDUCE_GLOBAL \
dstT temp = convertToDT(src[0]); \
int mask_index = mad24(id / cols, mask_step, mask_offset + (id % cols)); \
if (mask[mask_index]) \
FUNC(accumulator, temp)
{ \
dstT temp = convertToDT(loadpix(srcptr + src_index)); \
FUNC(accumulator, temp); \
}
#elif defined OP_DOT
#define REDUCE_GLOBAL \
int src2_index = mad24(id / cols, src2_step, mad24(id % cols, (int)sizeof(srcT), src2_offset)); \
__global const srcT * src2 = (__global const srcT *)(src2ptr + src2_index); \
dstT temp = convertToDT(src[0]), temp2 = convertToDT(src2[0]); \
int src2_index = mad24(id / cols, src2_step, mad24(id % cols, srcTSIZE, src2_offset)); \
dstT temp = convertToDT(loadpix(srcptr + src_index)), temp2 = convertToDT(loadpix(src2ptr + src2_index)); \
FUNC(accumulator, temp, temp2)
#else
#define REDUCE_GLOBAL \
dstT temp = convertToDT(
src[0]
); \
dstT temp = convertToDT(
loadpix(srcptr + src_index)
); \
FUNC(accumulator, temp)
#endif
...
...
@@ -111,8 +124,7 @@
#define REDUCE_LOCAL_2 \
localmem[lid] += localmem[lid2]
#define CALC_RESULT \
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
dst[0] = localmem[0]
storepix(localmem[0], dstptr + dstTSIZE * gid)
// countNonZero stuff
#elif defined OP_COUNT_NON_ZERO
...
...
@@ -123,7 +135,7 @@
dstT accumulator = (dstT)(0); \
srcT zero = (srcT)(0), one = (srcT)(1)
#define REDUCE_GLOBAL \
accumulator +=
src[0]
== zero ? zero : one
accumulator +=
loadpix(srcptr + src_index)
== zero ? zero : one
#define SET_LOCAL_1 \
localmem[lid] = accumulator
#define REDUCE_LOCAL_1 \
...
...
@@ -131,8 +143,7 @@
#define REDUCE_LOCAL_2 \
localmem[lid] += localmem[lid2]
#define CALC_RESULT \
__global dstT * dst = (__global dstT *)(dstptr + (int)sizeof(dstT) * gid); \
dst[0] = localmem[0]
storepix(localmem[0], dstptr + dstTSIZE * gid)
// minMaxLoc stuff
#elif defined OP_MIN_MAX_LOC |
|
defined
OP_MIN_MAX_LOC_MASK
...
...
@@ -167,6 +178,8 @@
#
define
MAX_VAL
DBL_MAX
#
endif
#
define
dstT
srcT
#
define
DECLARE_LOCAL_MEM
\
__local
srcT
localmem_min[WGS2_ALIGNED]
; \
__local
srcT
localmem_max[WGS2_ALIGNED]
; \
...
...
@@ -181,7 +194,7 @@
srcT
temp
; \
int
temploc
#
define
REDUCE_GLOBAL
\
temp
=
src[0]
; \
temp
=
loadpix
(
srcptr
+
src_index
)
; \
temploc
=
id
; \
srcT
temp_minval
=
minval,
temp_maxval
=
maxval
; \
minval
=
min
(
minval,
temp
)
; \
...
...
@@ -217,10 +230,8 @@
localmem_maxloc[lid]
:
(
max1
==
max2
)
?
(
max1
==
oldmax
)
?
min
(
localmem_maxloc[lid2],localmem_maxloc[lid]
)
:
\
localmem_maxloc[lid2]
:
localmem_maxloc[lid]
#
define
CALC_RESULT
\
__global
srcT
*
dstminval
=
(
__global
srcT
*
)(
dstptr
+
(
int
)
sizeof
(
srcT
)
*
gid
)
; \
__global
srcT
*
dstmaxval
=
(
__global
srcT
*
)(
dstptr2
+
(
int
)
sizeof
(
srcT
)
*
gid
)
; \
dstminval[0]
=
localmem_min[0]
; \
dstmaxval[0]
=
localmem_max[0]
; \
storepix
(
localmem_min[0],
dstptr
+
dstTSIZE
*
gid
)
; \
storepix
(
localmem_max[0],
dstptr2
+
dstTSIZE
*
gid
)
; \
dstlocptr[gid]
=
localmem_minloc[0]
; \
dstlocptr2[gid]
=
localmem_maxloc[0]
...
...
@@ -236,7 +247,7 @@
int
temploc
#
undef
REDUCE_GLOBAL
#
define
REDUCE_GLOBAL
\
temp
=
src[0]
; \
temp
=
loadpix
(
srcptr
+
src_index
)
; \
temploc
=
id
; \
int
mask_index
=
mad24
(
id
/
cols,
mask_step,
mask_offset
+
(
id
%
cols
)
*
(
int
)
sizeof
(
uchar
))
; \
__global
const
uchar
*
mask
=
(
__global
const
uchar
*
)(
maskptr
+
mask_index
)
; \
...
...
@@ -278,8 +289,7 @@ __kernel void reduce(__global const uchar * srcptr, int src_step, int src_offset
for
(
int
grain
=
groupnum
*
WGS
; id < total; id += grain)
{
int
src_index
=
mad24
(
id
/
cols,
src_step,
mad24
(
id
%
cols,
(
int
)
sizeof
(
srcT
)
,
src_offset
))
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
int
src_index
=
mad24
(
id
/
cols,
src_step,
mad24
(
id
%
cols,
srcTSIZE,
src_offset
))
;
REDUCE_GLOBAL
;
}
...
...
modules/core/src/stat.cpp
浏览文件 @
c5bf1c10
...
...
@@ -475,7 +475,7 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
;
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
||
cn
==
3
)
if
(
(
!
doubleSupport
&&
depth
==
CV_64F
)
||
cn
>
4
)
return
false
;
int
dbsize
=
ocl
::
Device
::
getDefault
().
maxComputeUnits
();
...
...
@@ -494,8 +494,11 @@ static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask
static
const
char
*
const
opMap
[
3
]
=
{
"OP_SUM"
,
"OP_SUM_ABS"
,
"OP_SUM_SQR"
};
char
cvt
[
40
];
ocl
::
Kernel
k
(
"reduce"
,
ocl
::
core
::
reduce_oclsrc
,
format
(
"-D srcT=%s -D dstT=%s -D ddepth=%d -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
dtype
),
ddepth
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
format
(
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D ddepth=%d -D cn=%d"
" -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s"
,
ocl
::
typeToStr
(
type
),
ocl
::
typeToStr
(
depth
),
ocl
::
typeToStr
(
dtype
),
ocl
::
typeToStr
(
ddepth
),
ddepth
,
cn
,
ocl
::
convertTypeStr
(
depth
,
ddepth
,
cn
,
cvt
),
opMap
[
sum_op
],
(
int
)
wgs
,
wgs2_aligned
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
haveMask
?
" -D HAVE_MASK"
:
""
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录