Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
5726e80f
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5726e80f
编写于
3月 14, 2014
作者:
A
Andrey Pavlenko
提交者:
OpenCV Buildbot
3月 14, 2014
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2475 from ilya-lavrenov:ocl_2.4_fix
上级
836635d2
61c347fb
变更
13
展开全部
隐藏空白更改
内联
并排
Showing
13 changed file
with
199 addition
and
794 deletion
+199
-794
modules/nonfree/perf/perf_main.cpp
modules/nonfree/perf/perf_main.cpp
+1
-1
modules/nonfree/perf/perf_surf_ocl.cpp
modules/nonfree/perf/perf_surf_ocl.cpp
+2
-2
modules/ocl/src/arithm.cpp
modules/ocl/src/arithm.cpp
+127
-166
modules/ocl/src/gftt.cpp
modules/ocl/src/gftt.cpp
+14
-15
modules/ocl/src/opencl/arithm_bitwise.cl
modules/ocl/src/opencl/arithm_bitwise.cl
+31
-20
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
+0
-88
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
+0
-82
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+0
-86
modules/ocl/src/opencl/arithm_bitwise_not.cl
modules/ocl/src/opencl/arithm_bitwise_not.cl
+0
-253
modules/ocl/src/opencl/arithm_minMax.cl
modules/ocl/src/opencl/arithm_minMax.cl
+14
-64
modules/ocl/src/opencl/arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+4
-9
modules/ocl/src/opencl/arithm_sum.cl
modules/ocl/src/opencl/arithm_sum.cl
+4
-6
modules/ocl/test/test_arithm.cpp
modules/ocl/test/test_arithm.cpp
+2
-2
未找到文件。
modules/nonfree/perf/perf_main.cpp
浏览文件 @
5726e80f
...
...
@@ -5,7 +5,7 @@ static const char * impls[] = {
#ifdef HAVE_CUDA
"cuda"
,
#endif
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENC
V_OC
L
"ocl"
,
#endif
"plain"
...
...
modules/nonfree/perf/perf_surf_ocl.cpp
浏览文件 @
5726e80f
...
...
@@ -59,7 +59,7 @@ typedef perf::TestBaseWithParam<std::string> OCL_SURF;
#define OCL_TEST_CYCLE() for( ; startTimer(), next(); cv::ocl::finish(), stopTimer())
PERF_TEST_P
(
OCL_SURF
,
with_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
PERF_TEST_P
(
OCL_SURF
,
DISABLED_
with_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
{
string
filename
=
getDataPath
(
GetParam
());
Mat
src
=
imread
(
filename
,
IMREAD_GRAYSCALE
);
...
...
@@ -94,7 +94,7 @@ PERF_TEST_P(OCL_SURF, with_data_transfer, testing::Values(SURF_IMAGES))
SANITY_CHECK_NOTHING
();
}
PERF_TEST_P
(
OCL_SURF
,
without_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
PERF_TEST_P
(
OCL_SURF
,
DISABLED_
without_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
{
string
filename
=
getDataPath
(
GetParam
());
Mat
src
=
imread
(
filename
,
IMREAD_GRAYSCALE
);
...
...
modules/ocl/src/arithm.cpp
浏览文件 @
5726e80f
此差异已折叠。
点击以展开。
modules/ocl/src/gftt.cpp
浏览文件 @
5726e80f
...
...
@@ -146,34 +146,33 @@ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
CV_Assert
(
groupnum
!=
0
);
int
dbsize
=
groupnum
*
2
*
src
.
elemSize
();
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
dst
);
cl_mem
dst_data
=
reinterpret_cast
<
cl_mem
>
(
dst
.
data
);
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;
int
offset
=
src
.
offset
/
src
.
elemSize
();
int
vElemSize
=
src
.
elemSize1
();
int
src_step
=
src
.
step
/
vElemSize
,
src_offset
=
src
.
offset
/
vElemSize
;
int
total
=
src
.
size
().
area
();
{
// first parallel pass
{
// first parallel pass
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_
mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_col
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_
int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
row
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
"arithm_op_minMax"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
"-D T=float -D DEPTH_5"
);
args
,
-
1
,
-
1
,
"-D T=float -D DEPTH_5
-D vlen=1
"
);
}
{
// run final "serial" kernel to find accumulate results from threads and reset corner counter
{
// run final "serial" kernel to find accumulate results from threads and reset corner counter
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
...
...
modules/ocl/src/opencl/arithm_bitwise
_binary
.cl
→
modules/ocl/src/opencl/arithm_bitwise.cl
浏览文件 @
5726e80f
...
...
@@ -48,35 +48,46 @@
///////////////////////////////////////////
bitwise_binary
//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
__kernel
void
arithm_bitwise
(
__global
uchar
*
src1ptr,
int
src1_step,
int
src1_offset,
#
ifdef
OP_BINARY
__global
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
#
elif
defined
HAVE_SCALAR
T
scalar,
#
endif
#
ifdef
HAVE_MASK
__global
uchar
*
mask,
int
mask_step,
int
mask_offset,
#
endif
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
dst_cols
&&
y
<
dst_
rows
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
ifdef
HAVE_MASK
mask
+=
mad24
(
y,
mask_step,
x
+
mask_offset
)
;
if
(
mask[0]
)
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
src2_index
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src1_offset
))
;
#
ifdef
OP_BINARY
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src2_offset
))
;
#
endif
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst_offset
))
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
__global
const
T
*
src1
=
(
__global
const
T
*
)(
src1ptr
+
src1_index
)
;
#
ifdef
OP_BINARY
__global
const
T
*
src2
=
(
__global
const
T
*
)(
src2ptr
+
src2_index
)
;
#
endif
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
#
ifdef
OP_BINARY
dst[0]
=
src1[0]
Operation
src2[0]
;
#
elif
defined
HAVE_SCALAR
dst[0]
=
src1[0]
Operation
scalar
;
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index
]
;
dst[0]
=
Operation
src1[0
]
;
#
endif
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
已删除
100644 → 0
浏览文件 @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_mask
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
mask,
int
mask_step,
int
mask_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
cols1,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols1
&&
y
<
rows
)
{
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
x
)
;
if
(
mask[mask_index]
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
src2_index
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
#
endif
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
已删除
100644 → 0
浏览文件 @
836635d2
////////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
///////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary/////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_scalar
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
src1_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[0]
;
#
endif
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
已删除
100644 → 0
浏览文件 @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_scalar_mask
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
)
;
if
(
mask[mask_index]
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[0]
;
#
endif
}
}
}
modules/ocl/src/opencl/arithm_bitwise_not.cl
已删除
100644 → 0
浏览文件 @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_not_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
uchar4
src1_data
=
vload4
(
0
,
src1
+
src1_index
)
;
uchar4
dst_data
=
vload4
(
0
,
dst
+
dst_index
)
;
uchar4
tmp_data
=
~src1_data
;
dst_data.x
=
dst_index
+
0
<
dst_end
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
dst_index
+
1
<
dst_end
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
dst_index
+
2
<
dst_end
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
dst_index
+
3
<
dst_end
?
tmp_data.w
:
dst_data.w
;
vstore4
(
dst_data,
0
,
dst
+
dst_index
)
;
}
}
__kernel
void
arithm_bitwise_not_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
char4
src1_data
=
vload4
(
0
,
src1
+
src1_index
)
;
char4
dst_data
=
vload4
(
0
,
dst
+
dst_index
)
;
char4
tmp_data
=
~src1_data
;
dst_data.x
=
dst_index
+
0
<
dst_end
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
dst_index
+
1
<
dst_end
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
dst_index
+
2
<
dst_end
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
dst_index
+
3
<
dst_end
?
tmp_data.w
:
dst_data.w
;
vstore4
(
dst_data,
0
,
dst
+
dst_index
)
;
}
}
__kernel
void
arithm_bitwise_not_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
(
x
<<
1
)
&
(
int
)
0xfffffff8
)
;
ushort4
src1_data
=
vload4
(
0
,
(
__global
ushort
*
)((
__global
char
*
)
src1
+
src1_index
))
;
ushort4
dst_data
=
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
;
ushort4
tmp_data
=
~
src1_data
;
dst_data.x
=
((
dst_index
+
0
>=
dst_start
)
&&
(
dst_index
+
0
<
dst_end
))
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
((
dst_index
+
2
>=
dst_start
)
&&
(
dst_index
+
2
<
dst_end
))
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
((
dst_index
+
4
>=
dst_start
)
&&
(
dst_index
+
4
<
dst_end
))
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
((
dst_index
+
6
>=
dst_start
)
&&
(
dst_index
+
6
<
dst_end
))
?
tmp_data.w
:
dst_data.w
;
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
dst_data
;
}
}
__kernel
void
arithm_bitwise_not_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
(
x
<<
1
)
&
(
int
)
0xfffffff8
)
;
short4
src1_data
=
vload4
(
0
,
(
__global
short
*
)((
__global
char
*
)
src1
+
src1_index
))
;
short4
dst_data
=
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
;
short4
tmp_data
=
~
src1_data
;
dst_data.x
=
((
dst_index
+
0
>=
dst_start
)
&&
(
dst_index
+
0
<
dst_end
))
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
((
dst_index
+
2
>=
dst_start
)
&&
(
dst_index
+
2
<
dst_end
))
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
((
dst_index
+
4
>=
dst_start
)
&&
(
dst_index
+
4
<
dst_end
))
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
((
dst_index
+
6
>=
dst_start
)
&&
(
dst_index
+
6
<
dst_end
))
?
tmp_data.w
:
dst_data.w
;
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
dst_data
;
}
}
__kernel
void
arithm_bitwise_not_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
2
)
+
dst_offset
)
;
int
data1
=
*
((
__global
int
*
)((
__global
char
*
)
src1
+
src1_index
))
;
int
tmp
=
~
data1
;
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
__kernel
void
arithm_bitwise_not_D5
(
__global
char
*src,
int
src_step,
int
src_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
(
x
<<
2
)
+
src_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
2
)
+
dst_offset
)
;
char4
data
;
data
=
*
((
__global
char4
*
)((
__global
char
*
)
src
+
src_index
))
;
data
=
~
data
;
*
((
__global
char4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_not_D6
(
__global
char
*src,
int
src_step,
int
src_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
(
x
<<
3
)
+
src_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
3
)
+
dst_offset
)
;
char8
data
;
data
=
*
((
__global
char8
*
)((
__global
char
*
)
src
+
src_index
))
;
data
=
~
data
;
*
((
__global
char8
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
endif
modules/ocl/src/opencl/arithm_minMax.cl
浏览文件 @
5726e80f
...
...
@@ -63,81 +63,31 @@
/**************************************Array
minMax**************************************/
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
__global
T
*
dst,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
src[idx]
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
if
(
lid
>
127
)
{
localmem_min[lid
-
128]
=
minval
;
localmem_max[lid
-
128]
=
maxval
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
{
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
dst[gid]
=
localmem_min[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
}
}
__kernel
void
arithm_op_minMax_mask
(
__global
const
T
*
src,
__global
T
*
dst,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
const
__global
uchar
*
mask,
int
minvalid_cols,
int
moffset
)
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
int
total,
int
groupnum,
__global
T
*
dst
#
ifdef
WITH_MASK
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
#
endif
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
int
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
int
y,
x
;
for
(
int
grainSize
=
groupnum
<<
8
; id <
elemnum
; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id <
total
; id += grainSize)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid
_cols
;
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid
_cols
;
y
=
id
/
src
_cols
;
x
=
id
%
src
_cols
;
if
(
mask[midx]
)
#
ifdef
WITH_MASK
if
(
mask[mad24
(
y,
mask_step,
x
+
mask_offset
)
]
)
#
endif
{
temp
=
src[
idx
]
;
temp
=
src[
mad24
(
y,
src_step,
x
+
src_offset
)
]
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
...
...
modules/ocl/src/opencl/arithm_nonzero.cl
浏览文件 @
5726e80f
...
...
@@ -52,23 +52,18 @@
/**************************************Count
NonZero**************************************/
__kernel
void
arithm_op_nonzero
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
,
__global
srcT
*src,
__global
dstT
*
dst
)
__kernel
void
arithm_op_nonzero
(
__global
srcT
*
src,
int
src_step,
int
src_offset,
int
src_cols
,
int
total,
int
groupnum,
__global
dstT
*
dst
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_nonzero[128]
;
dstT
nonzero
=
(
dstT
)(
0
)
;
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
;
for
(
int
grain
=
groupnum
<<
8
; id < elemnum; id += grain)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
nonzero
+=
src[idx]
==
zero
?
zero
:
one
;
}
for
(
int
grain
=
groupnum
<<
8
; id < total; id += grain)
nonzero
+=
convertToDstT
(
src[mad24
(
id
/
src_cols,
src_step,
id
%
src_cols
+
src_offset
)
]
==
(
srcT
)(
0
))
?
(
dstT
)(
0
)
:
(
dstT
)(
1
)
;
if
(
lid
>
127
)
localmem_nonzero[lid
-
128]
=
nonzero
;
...
...
modules/ocl/src/opencl/arithm_sum.cl
浏览文件 @
5726e80f
...
...
@@ -63,21 +63,19 @@
/**************************************Array
buffer
SUM**************************************/
__kernel
void
arithm_op_sum
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum
,
__global
srcT
*src,
__global
dstT
*
dst
)
__kernel
void
arithm_op_sum
(
__global
srcT
*
src,
int
src_step,
int
src_offset,
int
src_cols
,
int
total,
int
groupnum,
__global
dstT
*
dst
)
{
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_sum[128]
;
dstT
sum
=
(
dstT
)(
0
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id <
elemnum
; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id <
total
; id += grainSize)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
convertToDstT
(
src[idx]
)
;
temp
=
convertToDstT
(
src[mad24
(
id
/
src_cols,
src_step,
id
%
src_cols
+
src_offset
)
]
)
;
FUNC
(
temp,
sum
)
;
}
...
...
modules/ocl/test/test_arithm.cpp
浏览文件 @
5726e80f
...
...
@@ -198,7 +198,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
Size
roiSize
=
randomSize
(
1
,
MAX_VALUE
);
Border
src1Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src1
,
src1_roi
,
roiSize
,
src1Border
,
type
,
2
,
11
);
randomSubMat
(
src1
,
src1_roi
,
roiSize
,
src1Border
,
type
,
-
11
,
11
);
Border
src2Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src2
,
src2_roi
,
roiSize
,
src2Border
,
type
,
-
1540
,
1740
);
...
...
@@ -1163,7 +1163,7 @@ OCL_TEST_P(CountNonZero, MAT)
int
cpures
=
cv
::
countNonZero
(
src1_roi
);
int
gpures
=
cv
::
ocl
::
countNonZero
(
gsrc1_roi
);
EXPECT_
DOUBLE_EQ
((
double
)
cpures
,
(
double
)
gpures
);
EXPECT_
EQ
(
cpures
,
gpures
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录