Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
b332152b
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b332152b
编写于
8月 28, 2014
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2956 from ilya-lavrenov:tapi_accumulate
上级
d6681597
7017b125
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
99 addition
and
52 deletion
+99
-52
modules/core/include/opencv2/core/ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+21
-1
modules/core/src/ocl.cpp
modules/core/src/ocl.cpp
+26
-25
modules/imgproc/src/accum.cpp
modules/imgproc/src/accum.cpp
+11
-10
modules/imgproc/src/opencl/accumulate.cl
modules/imgproc/src/opencl/accumulate.cl
+41
-16
未找到文件。
modules/core/include/opencv2/core/ocl.hpp
浏览文件 @
b332152b
...
@@ -598,7 +598,27 @@ CV_EXPORTS const char* typeToStr(int t);
...
@@ -598,7 +598,27 @@ CV_EXPORTS const char* typeToStr(int t);
CV_EXPORTS
const
char
*
memopTypeToStr
(
int
t
);
CV_EXPORTS
const
char
*
memopTypeToStr
(
int
t
);
CV_EXPORTS
String
kernelToStr
(
InputArray
_kernel
,
int
ddepth
=
-
1
,
const
char
*
name
=
NULL
);
CV_EXPORTS
String
kernelToStr
(
InputArray
_kernel
,
int
ddepth
=
-
1
,
const
char
*
name
=
NULL
);
CV_EXPORTS
void
getPlatfomsInfo
(
std
::
vector
<
PlatformInfo
>&
platform_info
);
CV_EXPORTS
void
getPlatfomsInfo
(
std
::
vector
<
PlatformInfo
>&
platform_info
);
enum
OclVectorStrategy
{
// all matrices have its own vector width
OCL_VECTOR_OWN
=
0
,
// all matrices have maximal vector width among all matrices
// (useful for cases when matrices have different data types)
OCL_VECTOR_MAX
=
1
,
// default strategy
OCL_VECTOR_DEFAULT
=
OCL_VECTOR_OWN
};
CV_EXPORTS
int
predictOptimalVectorWidth
(
InputArray
src1
,
InputArray
src2
=
noArray
(),
InputArray
src3
=
noArray
(),
CV_EXPORTS
int
predictOptimalVectorWidth
(
InputArray
src1
,
InputArray
src2
=
noArray
(),
InputArray
src3
=
noArray
(),
InputArray
src4
=
noArray
(),
InputArray
src5
=
noArray
(),
InputArray
src6
=
noArray
(),
InputArray
src7
=
noArray
(),
InputArray
src8
=
noArray
(),
InputArray
src9
=
noArray
(),
OclVectorStrategy
strat
=
OCL_VECTOR_DEFAULT
);
// with OCL_VECTOR_MAX strategy
CV_EXPORTS
int
predictOptimalVectorWidthMax
(
InputArray
src1
,
InputArray
src2
=
noArray
(),
InputArray
src3
=
noArray
(),
InputArray
src4
=
noArray
(),
InputArray
src5
=
noArray
(),
InputArray
src6
=
noArray
(),
InputArray
src4
=
noArray
(),
InputArray
src5
=
noArray
(),
InputArray
src6
=
noArray
(),
InputArray
src7
=
noArray
(),
InputArray
src8
=
noArray
(),
InputArray
src9
=
noArray
());
InputArray
src7
=
noArray
(),
InputArray
src8
=
noArray
(),
InputArray
src9
=
noArray
());
...
...
modules/core/src/ocl.cpp
浏览文件 @
b332152b
...
@@ -4451,42 +4451,46 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
...
@@ -4451,42 +4451,46 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
if (!src.empty()) \
if (!src.empty()) \
{ \
{ \
CV_Assert(src.isMat() || src.isUMat()); \
CV_Assert(src.isMat() || src.isUMat()); \
int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
Size csize = src.size(); \
Size csize = src.size(); \
cols.push_back(ccn * csize.width); \
int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
if (ctype != type) \
ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
if (cwidth < ckercn || ckercn <= 0) \
return 1; \
cols.push_back(cwidth); \
if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
return 1; \
return 1; \
offsets.push_back(src.offset()); \
offsets.push_back(src.offset()); \
steps.push_back(src.step()); \
steps.push_back(src.step()); \
dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
kercns.push_back(ckercn); \
} \
} \
} \
} \
while ((void)0, 0)
while ((void)0, 0)
int
predictOptimalVectorWidth
(
InputArray
src1
,
InputArray
src2
,
InputArray
src3
,
int
predictOptimalVectorWidth
(
InputArray
src1
,
InputArray
src2
,
InputArray
src3
,
InputArray
src4
,
InputArray
src5
,
InputArray
src6
,
InputArray
src4
,
InputArray
src5
,
InputArray
src6
,
InputArray
src7
,
InputArray
src8
,
InputArray
src9
)
InputArray
src7
,
InputArray
src8
,
InputArray
src9
,
OclVectorStrategy
strat
)
{
{
int
type
=
src1
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
),
esz1
=
CV_ELEM_SIZE1
(
depth
);
Size
ssize
=
src1
.
size
();
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
ref_type
=
src1
.
type
();
int
vectorWidths
[]
=
{
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthChar
(),
int
vectorWidths
[]
=
{
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthChar
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthShort
(),
d
.
preferredVectorWidthInt
(),
d
.
preferredVectorWidthFloat
(),
d
.
preferredVectorWidthInt
(),
d
.
preferredVectorWidthFloat
(),
d
.
preferredVectorWidthDouble
(),
-
1
}
,
kercn
=
vectorWidths
[
depth
]
;
d
.
preferredVectorWidthDouble
(),
-
1
};
// if the device says don't use vectors
// if the device says don't use vectors
if
(
vectorWidths
[
0
]
==
1
)
if
(
vectorWidths
[
0
]
==
1
)
{
{
// it's heuristic
// it's heuristic
int
vectorWidthsOthers
[]
=
{
16
,
16
,
8
,
8
,
1
,
1
,
1
,
-
1
};
vectorWidths
[
CV_8U
]
=
vectorWidths
[
CV_8S
]
=
16
;
kercn
=
vectorWidthsOthers
[
depth
];
vectorWidths
[
CV_16U
]
=
vectorWidths
[
CV_16S
]
=
8
;
vectorWidths
[
CV_32S
]
=
vectorWidths
[
CV_32F
]
=
vectorWidths
[
CV_64F
]
=
1
;
}
}
if
(
ssize
.
width
*
cn
<
kercn
||
kercn
<=
0
)
return
1
;
std
::
vector
<
size_t
>
offsets
,
steps
,
cols
;
std
::
vector
<
size_t
>
offsets
,
steps
,
cols
;
std
::
vector
<
int
>
dividers
,
kercns
;
PROCESS_SRC
(
src1
);
PROCESS_SRC
(
src1
);
PROCESS_SRC
(
src2
);
PROCESS_SRC
(
src2
);
PROCESS_SRC
(
src3
);
PROCESS_SRC
(
src3
);
...
@@ -4498,27 +4502,24 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
...
@@ -4498,27 +4502,24 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
PROCESS_SRC
(
src9
);
PROCESS_SRC
(
src9
);
size_t
size
=
offsets
.
size
();
size_t
size
=
offsets
.
size
();
int
wsz
=
kercn
*
esz1
;
std
::
vector
<
int
>
dividers
(
size
,
wsz
);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
while
(
offsets
[
i
]
%
dividers
[
i
]
!=
0
||
steps
[
i
]
%
dividers
[
i
]
!=
0
||
cols
[
i
]
%
divider
s
[
i
]
!=
0
)
while
(
offsets
[
i
]
%
dividers
[
i
]
!=
0
||
steps
[
i
]
%
dividers
[
i
]
!=
0
||
cols
[
i
]
%
kercn
s
[
i
]
!=
0
)
dividers
[
i
]
>>=
1
;
dividers
[
i
]
>>=
1
,
kercns
[
i
]
>>=
1
;
// default strategy
// default strategy
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
int
kercn
=
*
std
::
min_element
(
kercns
.
begin
(),
kercns
.
end
());
if
(
dividers
[
i
]
!=
wsz
)
{
kercn
=
1
;
break
;
}
// another strategy
// width = *std::min_element(dividers.begin(), dividers.end());
return
kercn
;
return
kercn
;
}
}
int
predictOptimalVectorWidthMax
(
InputArray
src1
,
InputArray
src2
,
InputArray
src3
,
InputArray
src4
,
InputArray
src5
,
InputArray
src6
,
InputArray
src7
,
InputArray
src8
,
InputArray
src9
)
{
return
predictOptimalVectorWidth
(
src1
,
src2
,
src3
,
src4
,
src5
,
src6
,
src7
,
src8
,
src9
,
OCL_VECTOR_MAX
);
}
#undef PROCESS_SRC
#undef PROCESS_SRC
...
...
modules/imgproc/src/accum.cpp
浏览文件 @
b332152b
...
@@ -369,11 +369,10 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
...
@@ -369,11 +369,10 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
CV_Assert
(
op_type
==
ACCUMULATE
||
op_type
==
ACCUMULATE_SQUARE
||
CV_Assert
(
op_type
==
ACCUMULATE
||
op_type
==
ACCUMULATE_SQUARE
||
op_type
==
ACCUMULATE_PRODUCT
||
op_type
==
ACCUMULATE_WEIGHTED
);
op_type
==
ACCUMULATE_PRODUCT
||
op_type
==
ACCUMULATE_WEIGHTED
);
int
stype
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
stype
);
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
sdepth
=
CV_MAT_DEPTH
(
stype
),
ddepth
=
_dst
.
depth
();
bool
haveMask
=
!
_mask
.
empty
(),
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
int
stype
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
),
ddepth
=
_dst
.
depth
();
bool
doubleSupport
=
ocl
::
Device
::
getDefault
().
doubleFPConfig
()
>
0
,
int
kercn
=
haveMask
?
cn
:
ocl
::
predictOptimalVectorWidthMax
(
_src
,
_src2
,
_dst
),
rowsPerWI
=
dev
.
isIntel
()
?
4
:
1
;
haveMask
=
!
_mask
.
empty
();
if
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
if
(
!
doubleSupport
&&
(
sdepth
==
CV_64F
||
ddepth
==
CV_64F
))
return
false
;
return
false
;
...
@@ -381,11 +380,13 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
...
@@ -381,11 +380,13 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
const
char
*
const
opMap
[
4
]
=
{
"ACCUMULATE"
,
"ACCUMULATE_SQUARE"
,
"ACCUMULATE_PRODUCT"
,
const
char
*
const
opMap
[
4
]
=
{
"ACCUMULATE"
,
"ACCUMULATE_SQUARE"
,
"ACCUMULATE_PRODUCT"
,
"ACCUMULATE_WEIGHTED"
};
"ACCUMULATE_WEIGHTED"
};
char
cvt
[
40
];
ocl
::
Kernel
k
(
"accumulate"
,
ocl
::
imgproc
::
accumulate_oclsrc
,
ocl
::
Kernel
k
(
"accumulate"
,
ocl
::
imgproc
::
accumulate_oclsrc
,
format
(
"-D %s%s -D srcT
=%s -D cn=%d -D dstT=%s
%s"
,
format
(
"-D %s%s -D srcT
1=%s -D cn=%d -D dstT1=%s%s -D rowsPerWI=%d -D convertToDT=
%s"
,
opMap
[
op_type
],
haveMask
?
" -D HAVE_MASK"
:
""
,
opMap
[
op_type
],
haveMask
?
" -D HAVE_MASK"
:
""
,
ocl
::
typeToStr
(
sdepth
),
cn
,
ocl
::
typeToStr
(
ddepth
),
ocl
::
typeToStr
(
sdepth
),
kercn
,
ocl
::
typeToStr
(
ddepth
),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
));
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
rowsPerWI
,
ocl
::
convertTypeStr
(
sdepth
,
ddepth
,
1
,
cvt
)));
if
(
k
.
empty
())
if
(
k
.
empty
())
return
false
;
return
false
;
...
@@ -393,7 +394,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
...
@@ -393,7 +394,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
src2arg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
src2arg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src2
),
dstarg
=
ocl
::
KernelArg
::
ReadWrite
(
dst
),
dstarg
=
ocl
::
KernelArg
::
ReadWrite
(
dst
,
cn
,
kercn
),
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
);
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
);
int
argidx
=
k
.
set
(
0
,
srcarg
);
int
argidx
=
k
.
set
(
0
,
srcarg
);
...
@@ -410,7 +411,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
...
@@ -410,7 +411,7 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
if
(
haveMask
)
if
(
haveMask
)
k
.
set
(
argidx
,
maskarg
);
k
.
set
(
argidx
,
maskarg
);
size_t
globalsize
[
2
]
=
{
src
.
cols
,
src
.
rows
};
size_t
globalsize
[
2
]
=
{
src
.
cols
*
cn
/
kercn
,
(
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
}
...
...
modules/imgproc/src/opencl/accumulate.cl
浏览文件 @
b332152b
...
@@ -13,13 +13,18 @@
...
@@ -13,13 +13,18 @@
#
endif
#
endif
#
endif
#
endif
#
define
SRC_TSIZE
cn
*
(
int
)
sizeof
(
srcT1
)
#
define
DST_TSIZE
cn
*
(
int
)
sizeof
(
dstT1
)
#
define
noconvert
__kernel
void
accumulate
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
__kernel
void
accumulate
(
__global
const
uchar
*
srcptr,
int
src_step,
int
src_offset,
#
ifdef
ACCUMULATE_PRODUCT
#
ifdef
ACCUMULATE_PRODUCT
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
__global
const
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
#
endif
#
endif
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
#
ifdef
ACCUMULATE_WEIGHTED
#
ifdef
ACCUMULATE_WEIGHTED
,
dstT
alpha
,
dstT
1
alpha
#
endif
#
endif
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
...
@@ -27,39 +32,59 @@ __kernel void accumulate(__global const uchar * srcptr, int src_step, int src_of
...
@@ -27,39 +32,59 @@ __kernel void accumulate(__global const uchar * srcptr, int src_step, int src_of
)
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
*
rowsPerWI
;
if
(
x
<
dst_cols
&&
y
<
dst_rows
)
if
(
x
<
dst_cols
)
{
{
int
src_index
=
mad24
(
y,
src_step,
src_offset
+
x
*
cn
*
(
int
)
sizeof
(
srcT
))
;
int
src_index
=
mad24
(
y,
src_step,
mad24
(
x,
SRC_TSIZE,
src_offset
))
;
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
x
)
;
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
x
)
;
mask
+=
mask_index
;
mask
+=
mask_index
;
#
endif
#
endif
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
*
cn
*
(
int
)
sizeof
(
dstT
))
;
__global
const
srcT
*
src
=
(
__global
const
srcT
*
)(
srcptr
+
src_index
)
;
#
ifdef
ACCUMULATE_PRODUCT
#
ifdef
ACCUMULATE_PRODUCT
int
src2_index
=
mad24
(
y,
src2_step,
src2_offset
+
x
*
cn
*
(
int
)
sizeof
(
srcT
))
;
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
SRC_TSIZE,
src2_offset
))
;
__global
const
srcT
*
src2
=
(
__global
const
srcT
*
)(
src2ptr
+
src2_index
)
;
#
endif
#
endif
__global
dstT
*
dst
=
(
__global
dstT
*
)(
dstptr
+
dst_index
)
;
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
DST_TSIZE,
dst_offset
)
)
;
#
pragma
unroll
#
pragma
unroll
for
(
int
c
=
0
; c < cn; ++c)
for
(
int
i
=
0
; i < rowsPerWI; ++i)
if
(
y
<
dst_rows
)
{
__global
const
srcT1
*
src
=
(
__global
const
srcT1
*
)(
srcptr
+
src_index
)
;
#
ifdef
ACCUMULATE_PRODUCT
__global
const
srcT1
*
src2
=
(
__global
const
srcT1
*
)(
src2ptr
+
src2_index
)
;
#
endif
__global
dstT1
*
dst
=
(
__global
dstT1
*
)(
dstptr
+
dst_index
)
;
#
ifdef
HAVE_MASK
#
ifdef
HAVE_MASK
if
(
mask[0]
)
if
(
mask[0]
)
#
endif
#
endif
#
pragma
unroll
for
(
int
c
=
0
; c < cn; ++c)
{
#
ifdef
ACCUMULATE
#
ifdef
ACCUMULATE
dst[c]
+=
src[c]
;
dst[c]
+=
convertToDT
(
src[c]
)
;
#
elif
defined
ACCUMULATE_SQUARE
#
elif
defined
ACCUMULATE_SQUARE
dst[c]
+=
src[c]
*
src[c]
;
dstT1
val
=
convertToDT
(
src[c]
)
;
dst[c]
=
fma
(
val,
val,
dst[c]
)
;
#
elif
defined
ACCUMULATE_PRODUCT
#
elif
defined
ACCUMULATE_PRODUCT
dst[c]
+=
src[c]
*
src2[c]
;
dst[c]
=
fma
(
convertToDT
(
src[c]
)
,
convertToDT
(
src2[c]
)
,
dst[c]
)
;
#
elif
defined
ACCUMULATE_WEIGHTED
#
elif
defined
ACCUMULATE_WEIGHTED
dst[c]
=
(
1
-
alpha
)
*
dst[c]
+
src[c]
*
alpha
;
dst[c]
=
fma
(
1
-
alpha,
dst[c],
src[c]
*
alpha
)
;
#
else
#
else
#
error
"Unknown accumulation type"
#
error
"Unknown accumulation type"
#
endif
#
endif
}
}
src_index
+=
src_step
;
#
ifdef
ACCUMULATE_PRODUCT
src2_index
+=
src2_step
;
#
endif
#
ifdef
HAVE_MASK
mask
+=
mask_step
;
#
endif
dst_index
+=
dst_step
;
++y
;
}
}
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录