Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
5726e80f
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
5726e80f
编写于
3月 14, 2014
作者:
A
Andrey Pavlenko
提交者:
OpenCV Buildbot
3月 14, 2014
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #2475 from ilya-lavrenov:ocl_2.4_fix
上级
836635d2
61c347fb
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
199 addition
and
794 deletion
+199
-794
modules/nonfree/perf/perf_main.cpp
modules/nonfree/perf/perf_main.cpp
+1
-1
modules/nonfree/perf/perf_surf_ocl.cpp
modules/nonfree/perf/perf_surf_ocl.cpp
+2
-2
modules/ocl/src/arithm.cpp
modules/ocl/src/arithm.cpp
+127
-166
modules/ocl/src/gftt.cpp
modules/ocl/src/gftt.cpp
+14
-15
modules/ocl/src/opencl/arithm_bitwise.cl
modules/ocl/src/opencl/arithm_bitwise.cl
+31
-20
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
+0
-88
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
+0
-82
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+0
-86
modules/ocl/src/opencl/arithm_bitwise_not.cl
modules/ocl/src/opencl/arithm_bitwise_not.cl
+0
-253
modules/ocl/src/opencl/arithm_minMax.cl
modules/ocl/src/opencl/arithm_minMax.cl
+14
-64
modules/ocl/src/opencl/arithm_nonzero.cl
modules/ocl/src/opencl/arithm_nonzero.cl
+4
-9
modules/ocl/src/opencl/arithm_sum.cl
modules/ocl/src/opencl/arithm_sum.cl
+4
-6
modules/ocl/test/test_arithm.cpp
modules/ocl/test/test_arithm.cpp
+2
-2
未找到文件。
modules/nonfree/perf/perf_main.cpp
浏览文件 @
5726e80f
...
@@ -5,7 +5,7 @@ static const char * impls[] = {
...
@@ -5,7 +5,7 @@ static const char * impls[] = {
#ifdef HAVE_CUDA
#ifdef HAVE_CUDA
"cuda"
,
"cuda"
,
#endif
#endif
#ifdef HAVE_OPENCL
#ifdef HAVE_OPENC
V_OC
L
"ocl"
,
"ocl"
,
#endif
#endif
"plain"
"plain"
...
...
modules/nonfree/perf/perf_surf_ocl.cpp
浏览文件 @
5726e80f
...
@@ -59,7 +59,7 @@ typedef perf::TestBaseWithParam<std::string> OCL_SURF;
...
@@ -59,7 +59,7 @@ typedef perf::TestBaseWithParam<std::string> OCL_SURF;
#define OCL_TEST_CYCLE() for( ; startTimer(), next(); cv::ocl::finish(), stopTimer())
#define OCL_TEST_CYCLE() for( ; startTimer(), next(); cv::ocl::finish(), stopTimer())
PERF_TEST_P
(
OCL_SURF
,
with_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
PERF_TEST_P
(
OCL_SURF
,
DISABLED_
with_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
{
{
string
filename
=
getDataPath
(
GetParam
());
string
filename
=
getDataPath
(
GetParam
());
Mat
src
=
imread
(
filename
,
IMREAD_GRAYSCALE
);
Mat
src
=
imread
(
filename
,
IMREAD_GRAYSCALE
);
...
@@ -94,7 +94,7 @@ PERF_TEST_P(OCL_SURF, with_data_transfer, testing::Values(SURF_IMAGES))
...
@@ -94,7 +94,7 @@ PERF_TEST_P(OCL_SURF, with_data_transfer, testing::Values(SURF_IMAGES))
SANITY_CHECK_NOTHING
();
SANITY_CHECK_NOTHING
();
}
}
PERF_TEST_P
(
OCL_SURF
,
without_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
PERF_TEST_P
(
OCL_SURF
,
DISABLED_
without_data_transfer
,
testing
::
Values
(
SURF_IMAGES
))
{
{
string
filename
=
getDataPath
(
GetParam
());
string
filename
=
getDataPath
(
GetParam
());
Mat
src
=
imread
(
filename
,
IMREAD_GRAYSCALE
);
Mat
src
=
imread
(
filename
,
IMREAD_GRAYSCALE
);
...
...
modules/ocl/src/arithm.cpp
浏览文件 @
5726e80f
...
@@ -313,32 +313,28 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int
...
@@ -313,32 +313,28 @@ void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int
enum
{
SUM
=
0
,
ABS_SUM
,
SQR_SUM
};
enum
{
SUM
=
0
,
ABS_SUM
,
SQR_SUM
};
static
void
arithmetic_sum_buffer_run
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
groupnum
,
int
type
,
int
ddepth
)
static
void
arithmetic_sum_buffer_run
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
groupnum
,
int
type
,
int
ddepth
,
int
vlen
)
{
{
int
ochannels
=
src
.
oclchannels
();
int
vElemSize
=
vlen
*
src
.
elemSize
();
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
src_offset
=
src
.
offset
/
vElemSize
,
src_step
=
src
.
step
/
vElemSize
;
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
src_cols
=
src
.
cols
/
vlen
,
total
=
src
.
size
().
area
()
/
vlen
;
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
vlen
*=
src
.
oclchannels
();
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;;
int
offset
=
src
.
offset
/
src
.
elemSize
();
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
const
char
*
const
funcMap
[]
=
{
"FUNC_SUM"
,
"FUNC_ABS_SUM"
,
"FUNC_SQR_SUM"
};
const
char
*
const
funcMap
[]
=
{
"FUNC_SUM"
,
"FUNC_ABS_SUM"
,
"FUNC_SQR_SUM"
};
const
char
*
const
channelMap
[]
=
{
" "
,
" "
,
"2"
,
"4"
,
"4"
};
const
char
*
const
channelMap
[]
=
{
" "
,
" "
,
"2"
,
"4"
,
"4"
,
""
,
""
,
""
,
"8"
};
string
buildOptions
=
format
(
"-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s -D %s"
,
string
buildOptions
=
format
(
"-D srcT=%s%s -D dstT=%s%s -D convertToDstT=convert_%s%s -D %s"
,
typeMap
[
src
.
depth
()],
channelMap
[
ochannels
],
typeMap
[
src
.
depth
()],
channelMap
[
vlen
],
typeMap
[
ddepth
],
typeMap
[
ddepth
],
channelMap
[
ochannels
],
channelMap
[
vlen
],
typeMap
[
ddepth
],
channelMap
[
vlen
],
funcMap
[
type
]);
typeMap
[
ddepth
],
channelMap
[
ochannels
],
funcMap
[
type
]);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
...
@@ -360,7 +356,11 @@ Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
...
@@ -360,7 +356,11 @@ Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
size_t
groupnum
=
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
;
size_t
groupnum
=
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
;
CV_Assert
(
groupnum
!=
0
);
CV_Assert
(
groupnum
!=
0
);
int
dbsize
=
groupnum
*
src
.
oclchannels
();
int
vlen
=
8
/
src
.
channels
(),
vElemSize
=
vlen
*
src
.
elemSize1
();
while
(
src
.
offset
%
vElemSize
!=
0
||
src
.
step
%
vElemSize
!=
0
||
src
.
cols
%
vlen
!=
0
)
vlen
>>=
1
,
vElemSize
>>=
1
;
int
dbsize
=
groupnum
*
src
.
oclchannels
()
*
vlen
;
Context
*
clCxt
=
src
.
clCxt
;
Context
*
clCxt
=
src
.
clCxt
;
AutoBuffer
<
T
>
_buf
(
dbsize
);
AutoBuffer
<
T
>
_buf
(
dbsize
);
...
@@ -368,12 +368,12 @@ Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
...
@@ -368,12 +368,12 @@ Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
memset
(
p
,
0
,
dbsize
*
sizeof
(
T
));
memset
(
p
,
0
,
dbsize
*
sizeof
(
T
));
cl_mem
dstBuffer
=
openCLCreateBuffer
(
clCxt
,
CL_MEM_WRITE_ONLY
,
dbsize
*
sizeof
(
T
));
cl_mem
dstBuffer
=
openCLCreateBuffer
(
clCxt
,
CL_MEM_WRITE_ONLY
,
dbsize
*
sizeof
(
T
));
arithmetic_sum_buffer_run
(
src
,
dstBuffer
,
groupnum
,
type
,
ddepth
);
arithmetic_sum_buffer_run
(
src
,
dstBuffer
,
groupnum
,
type
,
ddepth
,
vlen
);
openCLReadBuffer
(
clCxt
,
dstBuffer
,
(
void
*
)
p
,
dbsize
*
sizeof
(
T
));
openCLReadBuffer
(
clCxt
,
dstBuffer
,
(
void
*
)
p
,
dbsize
*
sizeof
(
T
));
openCLFree
(
dstBuffer
);
openCLFree
(
dstBuffer
);
Scalar
s
=
Scalar
::
all
(
0.0
);
Scalar
s
=
Scalar
::
all
(
0.0
);
for
(
int
i
=
0
;
i
<
dbsize
;)
for
(
int
i
=
0
;
i
<
dbsize
;
)
for
(
int
j
=
0
;
j
<
src
.
oclchannels
();
j
++
,
i
++
)
for
(
int
j
=
0
;
j
<
src
.
oclchannels
();
j
++
,
i
++
)
s
.
val
[
j
]
+=
p
[
i
];
s
.
val
[
j
]
+=
p
[
i
];
...
@@ -473,20 +473,13 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
...
@@ -473,20 +473,13 @@ void cv::ocl::meanStdDev(const oclMat &src, Scalar &mean, Scalar &stddev)
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
template
<
typename
T
,
typename
WT
>
template
<
typename
T
,
typename
WT
>
static
void
arithmetic_minMax_run
(
const
oclMat
&
src
,
const
oclMat
&
mask
,
cl_mem
&
dst
,
int
groupnum
,
string
kernelName
)
static
void
arithmetic_minMax_run
(
const
oclMat
&
src
,
const
oclMat
&
mask
,
cl_mem
&
dst
,
int
vlen
,
int
groupnum
)
{
{
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;
int
offset
=
src
.
offset
/
src
.
elemSize
();
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
const
char
*
const
channelMap
[]
=
{
" "
,
" "
,
"2"
,
"4"
,
"4"
};
const
char
*
const
channelMap
[]
=
{
" "
,
" "
,
"2"
,
"4"
,
"4"
,
""
,
""
,
""
,
"8"
};
ostringstream
stream
;
ostringstream
stream
;
stream
<<
"-D T="
<<
typeMap
[
src
.
depth
()]
<<
channelMap
[
src
.
channels
()
];
stream
<<
"-D T="
<<
typeMap
[
src
.
depth
()]
<<
channelMap
[
vlen
];
if
(
numeric_limits
<
T
>::
is_integer
)
if
(
numeric_limits
<
T
>::
is_integer
)
{
{
stream
<<
" -D MAX_VAL="
<<
(
WT
)
numeric_limits
<
T
>::
max
();
stream
<<
" -D MAX_VAL="
<<
(
WT
)
numeric_limits
<
T
>::
max
();
...
@@ -494,38 +487,38 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem
...
@@ -494,38 +487,38 @@ static void arithmetic_minMax_run(const oclMat &src, const oclMat & mask, cl_mem
}
}
else
else
stream
<<
" -D DEPTH_"
<<
src
.
depth
();
stream
<<
" -D DEPTH_"
<<
src
.
depth
();
stream
<<
" -D vlen="
<<
vlen
;
std
::
string
buildOptions
=
stream
.
str
();
std
::
string
buildOptions
=
stream
.
str
();
int
vElemSize
=
src
.
elemSize1
()
*
vlen
,
src_cols
=
src
.
cols
/
vlen
;
int
src_step
=
src
.
step
/
vElemSize
,
src_offset
=
src
.
offset
/
vElemSize
;
int
mask_step
=
mask
.
step
/
vlen
,
mask_offset
=
mask
.
offset
/
vlen
;
int
total
=
src
.
size
().
area
()
/
vlen
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_
mem
)
,
(
void
*
)
&
dst
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_
int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_col
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
row
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
int
minvalid_cols
=
0
,
moffset
=
0
;
if
(
!
mask
.
empty
())
if
(
!
mask
.
empty
())
{
{
int
mall_cols
=
mask
.
step
/
mask
.
elemSize
();
int
mpre_cols
=
(
mask
.
offset
%
mask
.
step
)
/
mask
.
elemSize
();
int
msec_cols
=
mall_cols
-
(
mask
.
offset
%
mask
.
step
+
mask
.
cols
*
mask
.
elemSize
()
-
1
)
/
mask
.
elemSize
()
-
1
;
minvalid_cols
=
mpre_cols
+
msec_cols
;
moffset
=
mask
.
offset
/
mask
.
elemSize
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
m
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
m
ask_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
moffset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
m
ask_
offset
));
kernelName
+=
"_mask
"
;
buildOptions
+=
" -D WITH_MASK
"
;
}
}
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
// kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
// kernel use fixed grid size, replace lt on NULL is impos
s
ible without kernel changes
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
kernelName
,
globalThreads
,
localThreads
,
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
"arithm_op_minMax"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
}
}
...
@@ -535,25 +528,33 @@ void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const
...
@@ -535,25 +528,33 @@ void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal, const
size_t
groupnum
=
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
;
size_t
groupnum
=
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
;
CV_Assert
(
groupnum
!=
0
);
CV_Assert
(
groupnum
!=
0
);
int
dbsize
=
groupnum
*
2
*
src
.
elemSize
();
int
vlen
=
mask
.
empty
()
?
8
:
1
,
vElemSize
=
vlen
*
src
.
elemSize1
();
while
(
src
.
offset
%
vElemSize
!=
0
||
src
.
step
%
vElemSize
!=
0
||
src
.
cols
%
vlen
!=
0
)
{
vlen
>>=
1
;
vElemSize
>>=
1
;
}
int
dbsize
=
groupnum
*
2
*
vElemSize
;
oclMat
buf
;
oclMat
buf
;
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
buf
);
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
buf
);
cl_mem
buf_data
=
reinterpret_cast
<
cl_mem
>
(
buf
.
data
);
cl_mem
buf_data
=
reinterpret_cast
<
cl_mem
>
(
buf
.
data
);
arithmetic_minMax_run
<
T
,
WT
>
(
src
,
mask
,
buf_data
,
groupnum
,
"arithm_op_minMax"
);
arithmetic_minMax_run
<
T
,
WT
>
(
src
,
mask
,
buf_data
,
vlen
,
groupnum
);
Mat
matbuf
=
Mat
(
buf
);
Mat
matbuf
=
Mat
(
buf
);
T
*
p
=
matbuf
.
ptr
<
T
>
();
T
*
p
=
matbuf
.
ptr
<
T
>
();
if
(
minVal
!=
NULL
)
if
(
minVal
!=
NULL
)
{
{
*
minVal
=
std
::
numeric_limits
<
double
>::
max
();
*
minVal
=
std
::
numeric_limits
<
double
>::
max
();
for
(
int
i
=
0
,
end
=
src
.
oclchannels
()
*
(
int
)
groupnum
;
i
<
end
;
i
++
)
for
(
int
i
=
0
,
end
=
vlen
*
(
int
)
groupnum
;
i
<
end
;
i
++
)
*
minVal
=
*
minVal
<
p
[
i
]
?
*
minVal
:
p
[
i
];
*
minVal
=
*
minVal
<
p
[
i
]
?
*
minVal
:
p
[
i
];
}
}
if
(
maxVal
!=
NULL
)
if
(
maxVal
!=
NULL
)
{
{
*
maxVal
=
-
std
::
numeric_limits
<
double
>::
max
();
*
maxVal
=
-
std
::
numeric_limits
<
double
>::
max
();
for
(
int
i
=
src
.
oclchannels
()
*
(
int
)
groupnum
,
end
=
i
<<
1
;
i
<
end
;
i
++
)
for
(
int
i
=
vlen
*
(
int
)
groupnum
,
end
=
i
<<
1
;
i
<
end
;
i
++
)
*
maxVal
=
*
maxVal
>
p
[
i
]
?
*
maxVal
:
p
[
i
];
*
maxVal
=
*
maxVal
>
p
[
i
]
?
*
maxVal
:
p
[
i
];
}
}
}
}
...
@@ -564,7 +565,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc
...
@@ -564,7 +565,7 @@ void cv::ocl::minMax(const oclMat &src, double *minVal, double *maxVal, const oc
{
{
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
src
.
channels
()
==
1
);
CV_Assert
(
src
.
size
()
==
mask
.
size
()
||
mask
.
empty
());
CV_Assert
(
src
.
size
()
==
mask
.
size
()
||
mask
.
empty
());
CV_Assert
(
src
.
step
%
src
.
elemSize
()
==
0
);
CV_Assert
(
src
.
step
%
src
.
elemSize
1
()
==
0
);
if
(
minVal
==
NULL
&&
maxVal
==
NULL
)
if
(
minVal
==
NULL
&&
maxVal
==
NULL
)
return
;
return
;
...
@@ -1139,7 +1140,7 @@ static void arithmetic_minMaxLoc_run(const oclMat &src, cl_mem &dst, int vlen ,
...
@@ -1139,7 +1140,7 @@ static void arithmetic_minMaxLoc_run(const oclMat &src, cl_mem &dst, int vlen ,
sprintf
(
build_options
,
"-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d"
,
src
.
depth
(),
repeat_s
,
repeat_e
);
sprintf
(
build_options
,
"-D DEPTH_%d -D REPEAT_S%d -D REPEAT_E%d"
,
src
.
depth
(),
repeat_s
,
repeat_e
);
size_t
gt
[
3
]
=
{
groupnum
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
size_t
gt
[
3
]
=
{
groupnum
*
256
,
1
,
1
},
lt
[
3
]
=
{
256
,
1
,
1
};
// kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
// kernel use fixed grid size, replace lt on NULL is impos
s
ible without kernel changes
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMaxLoc
,
"arithm_op_minMaxLoc"
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMaxLoc
,
"arithm_op_minMaxLoc"
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
}
}
...
@@ -1169,7 +1170,7 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask,
...
@@ -1169,7 +1170,7 @@ static void arithmetic_minMaxLoc_mask_run(const oclMat &src, const oclMat &mask,
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
// kernel use fixed grid size, replace lt on NULL is imposible without kernel changes
// kernel use fixed grid size, replace lt on NULL is impos
s
ible without kernel changes
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMaxLoc_mask
,
"arithm_op_minMaxLoc_mask"
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMaxLoc_mask
,
"arithm_op_minMaxLoc_mask"
,
gt
,
lt
,
args
,
-
1
,
-
1
,
build_options
);
}
}
}
}
...
@@ -1262,38 +1263,35 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
...
@@ -1262,38 +1263,35 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
///////////////////////////// countNonZero ///////////////////////////////////
///////////////////////////// countNonZero ///////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static
void
arithmetic_countNonZero_run
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
groupnum
,
string
kernelName
)
static
void
arithmetic_countNonZero_run
(
const
oclMat
&
src
,
cl_mem
&
dst
,
int
groupnum
,
int
vlen
)
{
{
int
ochannels
=
src
.
oclchannels
();
int
vElemSize
=
vlen
*
src
.
elemSize1
();
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
src_step
=
src
.
step
/
vElemSize
,
src_offset
=
src
.
offset
/
vElemSize
;
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
src_cols
=
src
.
cols
/
vlen
,
total
=
src
.
size
().
area
()
/
vlen
;
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;;
int
offset
=
src
.
offset
/
src
.
elemSize
();
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"char"
,
"ushort"
,
"short"
,
"int"
,
"float"
,
"double"
};
const
char
*
const
channelMap
[]
=
{
" "
,
" "
,
"2"
,
"4"
,
"4"
};
const
char
*
const
channelMap
[]
=
{
""
,
""
,
"2"
,
"4"
,
"4"
,
""
,
""
,
""
,
"8"
};
string
buildOptions
=
format
(
"-D srcT=%s%s -D dstT=int%s"
,
typeMap
[
src
.
depth
()],
channelMap
[
ochannels
],
string
buildOptions
=
format
(
"-D srcT=%s%s -D dstT=int%s -D convertToDstT=convert_int%s"
,
channelMap
[
ochannels
]);
typeMap
[
src
.
depth
()],
channelMap
[
vlen
],
channelMap
[
vlen
],
channelMap
[
vlen
]);
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst
));
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
#ifdef ANDROID
#ifdef ANDROID
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
kernelName
,
globalThreads
,
NULL
,
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
"arithm_op_nonzero"
,
globalThreads
,
NULL
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
#else
#else
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
kernelName
,
globalThreads
,
localThreads
,
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_nonzero
,
"arithm_op_nonzero"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
#endif
#endif
}
}
...
@@ -1310,18 +1308,20 @@ int cv::ocl::countNonZero(const oclMat &src)
...
@@ -1310,18 +1308,20 @@ int cv::ocl::countNonZero(const oclMat &src)
return
-
1
;
return
-
1
;
}
}
int
vlen
=
8
,
vElemSize
=
src
.
elemSize1
()
*
vlen
;
while
(
src
.
offset
%
vElemSize
!=
0
||
src
.
step
%
vElemSize
!=
0
||
src
.
cols
%
vlen
!=
0
)
vlen
>>=
1
,
vElemSize
>>=
1
;
size_t
groupnum
=
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
;
size_t
groupnum
=
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
;
CV_Assert
(
groupnum
!=
0
);
CV_Assert
(
groupnum
!=
0
);
int
dbsize
=
groupnum
;
int
dbsize
=
groupnum
*
vlen
;
string
kernelName
=
"arithm_op_nonzero"
;
AutoBuffer
<
int
>
_buf
(
dbsize
);
AutoBuffer
<
int
>
_buf
(
dbsize
);
int
*
p
=
(
int
*
)
_buf
,
nonzero
=
0
;
int
*
p
=
(
int
*
)
_buf
,
nonzero
=
0
;
memset
(
p
,
0
,
dbsize
*
sizeof
(
int
));
memset
(
p
,
0
,
dbsize
*
sizeof
(
int
));
cl_mem
dstBuffer
=
openCLCreateBuffer
(
clCxt
,
CL_MEM_WRITE_ONLY
,
dbsize
*
sizeof
(
int
));
cl_mem
dstBuffer
=
openCLCreateBuffer
(
clCxt
,
CL_MEM_WRITE_ONLY
,
dbsize
*
sizeof
(
int
));
arithmetic_countNonZero_run
(
src
,
dstBuffer
,
groupnum
,
kernelName
);
arithmetic_countNonZero_run
(
src
,
dstBuffer
,
groupnum
,
vlen
);
openCLReadBuffer
(
clCxt
,
dstBuffer
,
(
void
*
)
p
,
dbsize
*
sizeof
(
int
));
openCLReadBuffer
(
clCxt
,
dstBuffer
,
(
void
*
)
p
,
dbsize
*
sizeof
(
int
));
for
(
int
i
=
0
;
i
<
dbsize
;
i
++
)
for
(
int
i
=
0
;
i
<
dbsize
;
i
++
)
...
@@ -1336,157 +1336,118 @@ int cv::ocl::countNonZero(const oclMat &src)
...
@@ -1336,157 +1336,118 @@ int cv::ocl::countNonZero(const oclMat &src)
////////////////////////////////bitwise_op////////////////////////////////////
////////////////////////////////bitwise_op////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
static
void
bitwise_unary_run
(
const
oclMat
&
src1
,
oclMat
&
dst
,
string
kernelName
,
const
cv
::
ocl
::
ProgramEntry
*
source
)
enum
{
AND
=
0
,
OR
,
XOR
,
NOT
};
{
dst
.
create
(
src1
.
size
(),
src1
.
type
());
int
channels
=
dst
.
oclchannels
();
int
depth
=
dst
.
depth
();
int
vector_lengths
[
4
][
7
]
=
{{
4
,
4
,
4
,
4
,
1
,
1
,
1
},
{
4
,
4
,
4
,
4
,
1
,
1
,
1
},
{
4
,
4
,
4
,
4
,
1
,
1
,
1
},
{
4
,
4
,
4
,
4
,
1
,
1
,
1
}
};
size_t
vector_length
=
vector_lengths
[
channels
-
1
][
depth
];
int
offset_cols
=
(
dst
.
offset
/
dst
.
elemSize1
())
&
(
vector_length
-
1
);
int
cols
=
divUp
(
dst
.
cols
*
channels
+
offset_cols
,
vector_length
);
#ifdef ANDROID
static
void
bitwise_run
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
const
Scalar
&
src3
,
const
oclMat
&
mask
,
size_t
localThreads
[
3
]
=
{
64
,
2
,
1
};
oclMat
&
dst
,
int
operationType
)
#else
size_t
localThreads
[
3
]
=
{
64
,
4
,
1
};
#endif
size_t
globalThreads
[
3
]
=
{
cols
,
dst
.
rows
,
1
};
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
openCLExecuteKernel
(
src1
.
clCxt
,
source
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
}
enum
{
AND
=
0
,
OR
,
XOR
};
static
void
bitwise_binary_run
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
const
Scalar
&
src3
,
const
oclMat
&
mask
,
oclMat
&
dst
,
int
operationType
)
{
{
CV_Assert
(
operationType
>=
AND
&&
operationType
<=
XOR
);
CV_Assert
(
operationType
>=
AND
&&
operationType
<=
NOT
);
CV_Assert
(
src2
.
empty
()
||
(
!
src2
.
empty
()
&&
src1
.
type
()
==
src2
.
type
()
&&
src1
.
size
()
==
src2
.
size
()));
CV_Assert
(
src2
.
empty
()
||
(
src1
.
type
()
==
src2
.
type
()
&&
src1
.
size
()
==
src2
.
size
()));
CV_Assert
(
mask
.
empty
()
||
(
!
mask
.
empty
()
&&
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
src1
.
size
()));
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
src1
.
size
()));
dst
.
create
(
src1
.
size
(),
src1
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
oclMat
m
;
double
scalar
[
4
]
;
const
char
operationMap
[]
=
{
'&'
,
'|'
,
'^'
};
bool
haveScalar
=
src2
.
empty
()
&&
operationType
!=
NOT
,
haveMask
=
!
mask
.
empty
();
std
::
string
kernelName
(
"arithm_bitwise_binary"
);
int
ocn
=
dst
.
oclchannels
(),
depth
=
dst
.
depth
();
const
char
operationMap
[]
=
{
'&'
,
'|'
,
'^'
,
'~'
};
const
char
*
const
typeMap
[]
=
{
"uchar"
,
"uchar"
,
"ushort"
,
"ushort"
,
"int"
,
"int"
,
"ulong"
};
const
char
*
const
channelMap
[]
=
{
""
,
""
,
"2"
,
"4"
,
"4"
,
""
,
""
,
""
,
"8"
,
""
,
""
,
""
,
""
,
""
,
""
,
""
,
"16"
};
const
int
preferredVectorWidth
[]
=
{
4
,
4
,
2
,
2
,
1
,
1
,
1
};
int
kercn
=
haveMask
||
haveScalar
?
ocn
:
preferredVectorWidth
[
depth
];
int
vlen
=
std
::
min
<
int
>
(
8
,
src1
.
elemSize1
()
*
src1
.
oclchannels
());
if
(
!
haveScalar
&&
!
haveMask
)
std
::
string
vlenstr
=
vlen
>
1
?
format
(
"%d"
,
vlen
)
:
""
;
{
std
::
string
buildOptions
=
format
(
"-D Operation=%c -D vloadn=vload%s -D vstoren=vstore%s -D elemSize=%d -D vlen=%d"
int
velemsize
=
dst
.
elemSize1
()
*
kercn
;
" -D ucharv=uchar%s"
,
while
(
src1
.
offset
%
velemsize
!=
0
||
src1
.
step
%
velemsize
!=
0
||
src1
.
cols
*
ocn
%
kercn
!=
0
||
operationMap
[
operationType
],
vlenstr
.
c_str
(),
vlenstr
.
c_str
(),
src2
.
offset
%
velemsize
!=
0
||
src2
.
step
%
velemsize
!=
0
||
src2
.
cols
*
ocn
%
kercn
!=
0
||
(
int
)
src1
.
elemSize
(),
vlen
,
vlenstr
.
c_str
());
dst
.
offset
%
velemsize
!=
0
||
dst
.
step
%
velemsize
!=
0
||
dst
.
cols
*
ocn
%
kercn
!=
0
)
kercn
>>=
1
,
velemsize
>>=
1
;
}
#ifdef ANDROID
int
cols
=
dst
.
cols
*
ocn
/
kercn
;
size_t
localThreads
[
3
]
=
{
16
,
10
,
1
};
#else
std
::
string
buildOptions
=
format
(
"-D Operation=%c -D T=%s%s"
,
operationMap
[
operationType
],
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
typeMap
[
depth
],
channelMap
[
kercn
]);
#endif
size_t
globalThreads
[
3
]
=
{
dst
.
cols
,
dst
.
rows
,
1
};
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
if
(
src2
.
empty
()
)
if
(
haveScalar
)
{
{
m
.
create
(
1
,
1
,
dst
.
type
()
);
int
sctype
=
CV_MAKE_TYPE
(
dst
.
depth
(),
ocn
);
m
.
setTo
(
src3
);
cv
::
scalarToRawData
(
src3
,
scalar
,
sctype
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
m
.
data
));
args
.
push_back
(
make_pair
(
CV_ELEM_SIZE
(
sctype
),
(
void
*
)
scalar
));
kernelName
+=
"_scalar
"
;
buildOptions
+=
" -D HAVE_SCALAR
"
;
}
}
else
else
if
(
operationType
!=
NOT
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
offset
));
buildOptions
+=
" -D OP_BINARY"
;
}
}
if
(
!
mask
.
empty
()
)
if
(
haveMask
)
{
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask
.
offset
));
kernelName
+=
"_mask
"
;
buildOptions
+=
" -D HAVE_MASK
"
;
}
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
col
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
row
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
row
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
col
s
));
openCLExecuteKernel
(
src1
.
clCxt
,
mask
.
empty
()
?
(
!
src2
.
empty
()
?
&
arithm_bitwise_binary
:
&
arithm_bitwise_binary_scalar
)
:
size_t
globalsize
[
3
]
=
{
dst
.
cols
*
ocn
/
kercn
,
dst
.
rows
,
1
};
(
!
src2
.
empty
()
?
&
arithm_bitwise_binary_mask
:
&
arithm_bitwise_binary_scalar_mask
),
globalsize
[
0
]
=
divUp
(
globalsize
[
0
],
256
)
*
256
;
kernelName
,
globalThreads
,
localThreads
,
openCLExecuteKernel
(
src1
.
clCxt
,
&
arithm_bitwise
,
"arithm_bitwise"
,
globalsize
,
NULL
,
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
args
,
-
1
,
-
1
,
buildOptions
.
c_str
());
}
}
void
cv
::
ocl
::
bitwise_not
(
const
oclMat
&
src
,
oclMat
&
dst
)
void
cv
::
ocl
::
bitwise_not
(
const
oclMat
&
src
,
oclMat
&
dst
)
{
{
if
(
!
src
.
clCxt
->
supportsFeature
(
FEATURE_CL_DOUBLE
)
&&
src
.
depth
()
==
CV_64F
)
bitwise_run
(
src
,
oclMat
(),
Scalar
(),
oclMat
(),
dst
,
NOT
);
{
CV_Error
(
CV_OpenCLDoubleNotSupported
,
"Selected device doesn't support double"
);
return
;
}
dst
.
create
(
src
.
size
(),
src
.
type
());
bitwise_unary_run
(
src
,
dst
,
"arithm_bitwise_not"
,
&
arithm_bitwise_not
);
}
}
void
cv
::
ocl
::
bitwise_or
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
void
cv
::
ocl
::
bitwise_or
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
{
{
bitwise_
binary_
run
(
src1
,
src2
,
Scalar
(),
mask
,
dst
,
OR
);
bitwise_run
(
src1
,
src2
,
Scalar
(),
mask
,
dst
,
OR
);
}
}
void
cv
::
ocl
::
bitwise_or
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
void
cv
::
ocl
::
bitwise_or
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
{
{
bitwise_
binary_
run
(
src1
,
oclMat
(),
src2
,
mask
,
dst
,
OR
);
bitwise_run
(
src1
,
oclMat
(),
src2
,
mask
,
dst
,
OR
);
}
}
void
cv
::
ocl
::
bitwise_and
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
void
cv
::
ocl
::
bitwise_and
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
{
{
bitwise_
binary_
run
(
src1
,
src2
,
Scalar
(),
mask
,
dst
,
AND
);
bitwise_run
(
src1
,
src2
,
Scalar
(),
mask
,
dst
,
AND
);
}
}
void
cv
::
ocl
::
bitwise_and
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
void
cv
::
ocl
::
bitwise_and
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
{
{
bitwise_
binary_
run
(
src1
,
oclMat
(),
src2
,
mask
,
dst
,
AND
);
bitwise_run
(
src1
,
oclMat
(),
src2
,
mask
,
dst
,
AND
);
}
}
void
cv
::
ocl
::
bitwise_xor
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
void
cv
::
ocl
::
bitwise_xor
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
{
{
bitwise_
binary_
run
(
src1
,
src2
,
Scalar
(),
mask
,
dst
,
XOR
);
bitwise_run
(
src1
,
src2
,
Scalar
(),
mask
,
dst
,
XOR
);
}
}
void
cv
::
ocl
::
bitwise_xor
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
void
cv
::
ocl
::
bitwise_xor
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
)
{
{
bitwise_
binary_
run
(
src1
,
oclMat
(),
src2
,
mask
,
dst
,
XOR
);
bitwise_run
(
src1
,
oclMat
(),
src2
,
mask
,
dst
,
XOR
);
}
}
oclMat
cv
::
ocl
::
operator
~
(
const
oclMat
&
src
)
oclMat
cv
::
ocl
::
operator
~
(
const
oclMat
&
src
)
...
...
modules/ocl/src/gftt.cpp
浏览文件 @
5726e80f
...
@@ -146,34 +146,33 @@ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
...
@@ -146,34 +146,33 @@ static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
CV_Assert
(
groupnum
!=
0
);
CV_Assert
(
groupnum
!=
0
);
int
dbsize
=
groupnum
*
2
*
src
.
elemSize
();
int
dbsize
=
groupnum
*
2
*
src
.
elemSize
();
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
dst
);
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
dst
);
cl_mem
dst_data
=
reinterpret_cast
<
cl_mem
>
(
dst
.
data
);
cl_mem
dst_data
=
reinterpret_cast
<
cl_mem
>
(
dst
.
data
);
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
vElemSize
=
src
.
elemSize1
();
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
src_step
=
src
.
step
/
vElemSize
,
src_offset
=
src
.
offset
/
vElemSize
;
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
total
=
src
.
size
().
area
();
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;
int
offset
=
src
.
offset
/
src
.
elemSize
();
{
// first parallel pass
{
// first parallel pass
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_
mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_
int
)
,
(
void
*
)
&
src_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src_offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_col
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
row
s
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
total
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
"arithm_op_minMax"
,
globalThreads
,
localThreads
,
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
"arithm_op_minMax"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
"-D T=float -D DEPTH_5"
);
args
,
-
1
,
-
1
,
"-D T=float -D DEPTH_5
-D vlen=1
"
);
}
}
{
// run final "serial" kernel to find accumulate results from threads and reset corner counter
{
// run final "serial" kernel to find accumulate results from threads and reset corner counter
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
...
...
modules/ocl/src/opencl/arithm_bitwise
_binary
.cl
→
modules/ocl/src/opencl/arithm_bitwise.cl
浏览文件 @
5726e80f
...
@@ -48,35 +48,46 @@
...
@@ -48,35 +48,46 @@
///////////////////////////////////////////
bitwise_binary
//////////////////////////////////////////
///////////////////////////////////////////
bitwise_binary
//////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__kernel
void
arithm_bitwise
(
__global
uchar
*
src1ptr,
int
src1_step,
int
src1_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
#
ifdef
OP_BINARY
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
__global
uchar
*
src2ptr,
int
src2_step,
int
src2_offset,
int
cols,
int
rows
)
#
elif
defined
HAVE_SCALAR
T
scalar,
#
endif
#
ifdef
HAVE_MASK
__global
uchar
*
mask,
int
mask_step,
int
mask_offset,
#
endif
__global
uchar
*
dstptr,
int
dst_step,
int
dst_offset,
int
dst_rows,
int
dst_cols
)
{
{
int
x
=
get_global_id
(
0
)
;
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
if
(
x
<
dst_cols
&&
y
<
dst_
rows
)
{
{
#
if
elemSize
>
1
#
ifdef
HAVE_MASK
x
*=
elemSize
;
mask
+=
mad24
(
y,
mask_step,
x
+
mask_offset
)
;
if
(
mask[0]
)
#
endif
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
int
src1_index
=
mad24
(
y,
src1_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src1_offset
))
;
ucharv
t1
=
vloadn
(
0
,
src2
+
src2_index
+
i
)
;
#
ifdef
OP_BINARY
ucharv
t2
=
t0
Operation
t1
;
int
src2_index
=
mad24
(
y,
src2_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
src2_offset
))
;
#
endif
int
dst_index
=
mad24
(
y,
dst_step,
mad24
(
x,
(
int
)
sizeof
(
T
)
,
dst_offset
))
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
__global
const
T
*
src1
=
(
__global
const
T
*
)(
src1ptr
+
src1_index
)
;
}
#
ifdef
OP_BINARY
__global
const
T
*
src2
=
(
__global
const
T
*
)(
src2ptr
+
src2_index
)
;
#
endif
__global
T
*
dst
=
(
__global
T
*
)(
dstptr
+
dst_index
)
;
#
ifdef
OP_BINARY
dst[0]
=
src1[0]
Operation
src2[0]
;
#
elif
defined
HAVE_SCALAR
dst[0]
=
src1[0]
Operation
scalar
;
#
else
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index
]
;
dst[0]
=
Operation
src1[0
]
;
#
endif
#
endif
}
}
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_mask.cl
已删除
100644 → 0
浏览文件 @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_mask
(
__global
uchar
*
src1,
int
src1_step,
int
src1_offset,
__global
uchar
*
src2,
int
src2_step,
int
src2_offset,
__global
uchar
*
mask,
int
mask_step,
int
mask_offset,
__global
uchar
*
dst,
int
dst_step,
int
dst_offset,
int
cols1,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols1
&&
y
<
rows
)
{
int
mask_index
=
mad24
(
y,
mask_step,
mask_offset
+
x
)
;
if
(
mask[mask_index]
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
src2_index
=
mad24
(
y,
src2_step,
x
+
src2_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
src2_index
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[src2_index]
;
#
endif
}
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_scalar.cl
已删除
100644 → 0
浏览文件 @
836635d2
////////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
Peng
Xiao,
pengxiao@outlook.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
///////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary/////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_scalar
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
src1_offset
+
x
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[0]
;
#
endif
}
}
modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
已删除
100644 → 0
浏览文件 @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
//////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_binary_scalar_mask
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
__global
uchar
*mask,
int
mask_step,
int
mask_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
cols,
int
rows
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
mask_index
=
mad24
(
y,
mask_step,
x
+
mask_offset
)
;
if
(
mask[mask_index]
)
{
#
if
elemSize
>
1
x
*=
elemSize
;
#
endif
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
x
+
dst_offset
)
;
#
if
elemSize
>
1
#
pragma
unroll
for
(
int
i
=
0
; i < elemSize; i += vlen)
{
ucharv
t0
=
vloadn
(
0
,
src1
+
src1_index
+
i
)
;
ucharv
t1
=
vloadn
(
0
,
src2
+
i
)
;
ucharv
t2
=
t0
Operation
t1
;
vstoren
(
t2,
0
,
dst
+
dst_index
+
i
)
;
}
#
else
dst[dst_index]
=
src1[src1_index]
Operation
src2[0]
;
#
endif
}
}
}
modules/ocl/src/opencl/arithm_bitwise_not.cl
已删除
100644 → 0
浏览文件 @
836635d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
IMPORTANT:
READ
BEFORE
DOWNLOADING,
COPYING,
INSTALLING
OR
USING.
//
//
By
downloading,
copying,
installing
or
using
the
software
you
agree
to
this
license.
//
If
you
do
not
agree
to
this
license,
do
not
download,
install,
//
copy
or
use
the
software.
//
//
//
License
Agreement
//
For
Open
Source
Computer
Vision
Library
//
//
Copyright
(
C
)
2010-2012,
Institute
Of
Software
Chinese
Academy
Of
Science,
all
rights
reserved.
//
Copyright
(
C
)
2010-2012,
Advanced
Micro
Devices,
Inc.,
all
rights
reserved.
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Jiang
Liyuan,
jlyuan001.good@163.com
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
//
*
Redistribution
's
of
source
code
must
retain
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer.
//
//
*
Redistribution
's
in
binary
form
must
reproduce
the
above
copyright
notice,
//
this
list
of
conditions
and
the
following
disclaimer
in
the
documentation
//
and/or
other
materials
provided
with
the
distribution.
//
//
*
The
name
of
the
copyright
holders
may
not
be
used
to
endorse
or
promote
products
//
derived
from
this
software
without
specific
prior
written
permission.
//
//
This
software
is
provided
by
the
copyright
holders
and
contributors
as
is
and
//
any
express
or
implied
warranties,
including,
but
not
limited
to,
the
implied
//
warranties
of
merchantability
and
fitness
for
a
particular
purpose
are
disclaimed.
//
In
no
event
shall
the
Intel
Corporation
or
contributors
be
liable
for
any
direct,
//
indirect,
incidental,
special,
exemplary,
or
consequential
damages
//
(
including,
but
not
limited
to,
procurement
of
substitute
goods
or
services
;
//
loss
of
use,
data,
or
profits
; or business interruption) however caused
//
and
on
any
theory
of
liability,
whether
in
contract,
strict
liability,
//
or
tort
(
including
negligence
or
otherwise
)
arising
in
any
way
out
of
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//M*/
#
ifdef
DOUBLE_SUPPORT
#
ifdef
cl_amd_fp64
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
cl_khr_fp64
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
#
endif
///////////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////BITWISE_NOT////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
arithm_bitwise_not_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
uchar4
src1_data
=
vload4
(
0
,
src1
+
src1_index
)
;
uchar4
dst_data
=
vload4
(
0
,
dst
+
dst_index
)
;
uchar4
tmp_data
=
~src1_data
;
dst_data.x
=
dst_index
+
0
<
dst_end
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
dst_index
+
1
<
dst_end
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
dst_index
+
2
<
dst_end
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
dst_index
+
3
<
dst_end
?
tmp_data.w
:
dst_data.w
;
vstore4
(
dst_data,
0
,
dst
+
dst_index
)
;
}
}
__kernel
void
arithm_bitwise_not_D1
(
__global
char
*src1,
int
src1_step,
int
src1_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
int
src1_index
=
mad24
(
y,
src1_step,
x
+
src1_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
x
)
;
char4
src1_data
=
vload4
(
0
,
src1
+
src1_index
)
;
char4
dst_data
=
vload4
(
0
,
dst
+
dst_index
)
;
char4
tmp_data
=
~src1_data
;
dst_data.x
=
dst_index
+
0
<
dst_end
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
dst_index
+
1
<
dst_end
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
dst_index
+
2
<
dst_end
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
dst_index
+
3
<
dst_end
?
tmp_data.w
:
dst_data.w
;
vstore4
(
dst_data,
0
,
dst
+
dst_index
)
;
}
}
__kernel
void
arithm_bitwise_not_D2
(
__global
ushort
*src1,
int
src1_step,
int
src1_offset,
__global
ushort
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
(
x
<<
1
)
&
(
int
)
0xfffffff8
)
;
ushort4
src1_data
=
vload4
(
0
,
(
__global
ushort
*
)((
__global
char
*
)
src1
+
src1_index
))
;
ushort4
dst_data
=
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
;
ushort4
tmp_data
=
~
src1_data
;
dst_data.x
=
((
dst_index
+
0
>=
dst_start
)
&&
(
dst_index
+
0
<
dst_end
))
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
((
dst_index
+
2
>=
dst_start
)
&&
(
dst_index
+
2
<
dst_end
))
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
((
dst_index
+
4
>=
dst_start
)
&&
(
dst_index
+
4
<
dst_end
))
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
((
dst_index
+
6
>=
dst_start
)
&&
(
dst_index
+
6
<
dst_end
))
?
tmp_data.w
:
dst_data.w
;
*
((
__global
ushort4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
dst_data
;
}
}
__kernel
void
arithm_bitwise_not_D3
(
__global
short
*src1,
int
src1_step,
int
src1_offset,
__global
short
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
x
=
x
<<
2
;
#
ifdef
dst_align
#
undef
dst_align
#
endif
#
define
dst_align
((
dst_offset
>>
1
)
&
3
)
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
1
)
+
src1_offset
-
(
dst_align
<<
1
))
;
int
dst_start
=
mad24
(
y,
dst_step,
dst_offset
)
;
int
dst_end
=
mad24
(
y,
dst_step,
dst_offset
+
dst_step1
)
;
int
dst_index
=
mad24
(
y,
dst_step,
dst_offset
+
(
x
<<
1
)
&
(
int
)
0xfffffff8
)
;
short4
src1_data
=
vload4
(
0
,
(
__global
short
*
)((
__global
char
*
)
src1
+
src1_index
))
;
short4
dst_data
=
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
;
short4
tmp_data
=
~
src1_data
;
dst_data.x
=
((
dst_index
+
0
>=
dst_start
)
&&
(
dst_index
+
0
<
dst_end
))
?
tmp_data.x
:
dst_data.x
;
dst_data.y
=
((
dst_index
+
2
>=
dst_start
)
&&
(
dst_index
+
2
<
dst_end
))
?
tmp_data.y
:
dst_data.y
;
dst_data.z
=
((
dst_index
+
4
>=
dst_start
)
&&
(
dst_index
+
4
<
dst_end
))
?
tmp_data.z
:
dst_data.z
;
dst_data.w
=
((
dst_index
+
6
>=
dst_start
)
&&
(
dst_index
+
6
<
dst_end
))
?
tmp_data.w
:
dst_data.w
;
*
((
__global
short4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
dst_data
;
}
}
__kernel
void
arithm_bitwise_not_D4
(
__global
int
*src1,
int
src1_step,
int
src1_offset,
__global
int
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src1_index
=
mad24
(
y,
src1_step,
(
x
<<
2
)
+
src1_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
2
)
+
dst_offset
)
;
int
data1
=
*
((
__global
int
*
)((
__global
char
*
)
src1
+
src1_index
))
;
int
tmp
=
~
data1
;
*
((
__global
int
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp
;
}
}
__kernel
void
arithm_bitwise_not_D5
(
__global
char
*src,
int
src_step,
int
src_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
(
x
<<
2
)
+
src_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
2
)
+
dst_offset
)
;
char4
data
;
data
=
*
((
__global
char4
*
)((
__global
char
*
)
src
+
src_index
))
;
data
=
~
data
;
*
((
__global
char4
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_bitwise_not_D6
(
__global
char
*src,
int
src_step,
int
src_offset,
__global
char
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
if
(
x
<
cols
&&
y
<
rows
)
{
int
src_index
=
mad24
(
y,
src_step,
(
x
<<
3
)
+
src_offset
)
;
int
dst_index
=
mad24
(
y,
dst_step,
(
x
<<
3
)
+
dst_offset
)
;
char8
data
;
data
=
*
((
__global
char8
*
)((
__global
char
*
)
src
+
src_index
))
;
data
=
~
data
;
*
((
__global
char8
*
)((
__global
char
*
)
dst
+
dst_index
))
=
data
;
}
}
#
endif
modules/ocl/src/opencl/arithm_minMax.cl
浏览文件 @
5726e80f
...
@@ -63,81 +63,31 @@
...
@@ -63,81 +63,31 @@
/**************************************Array
minMax**************************************/
/**************************************Array
minMax**************************************/
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
__global
T
*
dst,
__kernel
void
arithm_op_minMax
(
__global
const
T
*
src,
int
src_step,
int
src_offset,
int
src_rows,
int
src_cols,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
)
int
total,
int
groupnum,
__global
T
*
dst
{
#
ifdef
WITH_MASK
int
lid
=
get_local_id
(
0
)
;
,
__global
const
uchar
*
mask,
int
mask_step,
int
mask_offset
int
gid
=
get_group_id
(
0
)
;
#
endif
int
id
=
get_global_id
(
0
)
;
)
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id < elemnum; id += grainSize)
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
src[idx]
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
if
(
lid
>
127
)
{
localmem_min[lid
-
128]
=
minval
;
localmem_max[lid
-
128]
=
maxval
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
lid
<
128
)
{
localmem_min[lid]
=
min
(
minval,
localmem_min[lid]
)
;
localmem_max[lid]
=
max
(
maxval,
localmem_max[lid]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
lsize
=
64
; lsize > 0; lsize >>= 1)
{
if
(
lid
<
lsize
)
{
int
lid2
=
lsize
+
lid
;
localmem_min[lid]
=
min
(
localmem_min[lid],
localmem_min[lid2]
)
;
localmem_max[lid]
=
max
(
localmem_max[lid],
localmem_max[lid2]
)
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
lid
==
0
)
{
dst[gid]
=
localmem_min[0]
;
dst[gid
+
groupnum]
=
localmem_max[0]
;
}
}
__kernel
void
arithm_op_minMax_mask
(
__global
const
T
*
src,
__global
T
*
dst,
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum,
const
__global
uchar
*
mask,
int
minvalid_cols,
int
moffset
)
{
{
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
int
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid_cols
;
__local
T
localmem_max[128],
localmem_min[128]
;
__local
T
localmem_max[128],
localmem_min[128]
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
T
minval
=
(
T
)(
MAX_VAL
)
,
maxval
=
(
T
)(
MIN_VAL
)
,
temp
;
int
y,
x
;
for
(
int
grainSize
=
groupnum
<<
8
; id <
elemnum
; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id <
total
; id += grainSize)
{
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid
_cols
;
y
=
id
/
src
_cols
;
midx
=
moffset
+
id
+
(
id
/
cols
)
*
minvalid
_cols
;
x
=
id
%
src
_cols
;
if
(
mask[midx]
)
#
ifdef
WITH_MASK
if
(
mask[mad24
(
y,
mask_step,
x
+
mask_offset
)
]
)
#
endif
{
{
temp
=
src[
idx
]
;
temp
=
src[
mad24
(
y,
src_step,
x
+
src_offset
)
]
;
minval
=
min
(
minval,
temp
)
;
minval
=
min
(
minval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
maxval
=
max
(
maxval,
temp
)
;
}
}
...
...
modules/ocl/src/opencl/arithm_nonzero.cl
浏览文件 @
5726e80f
...
@@ -52,23 +52,18 @@
...
@@ -52,23 +52,18 @@
/**************************************Count
NonZero**************************************/
/**************************************Count
NonZero**************************************/
__kernel
void
arithm_op_nonzero
(
int
cols,
int
invalid_cols,
int
offset,
int
elemnum,
int
groupnum
,
__kernel
void
arithm_op_nonzero
(
__global
srcT
*
src,
int
src_step,
int
src_offset,
int
src_cols
,
__global
srcT
*src,
__global
dstT
*
dst
)
int
total,
int
groupnum,
__global
dstT
*
dst
)
{
{
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_nonzero[128]
;
__local
dstT
localmem_nonzero[128]
;
dstT
nonzero
=
(
dstT
)(
0
)
;
dstT
nonzero
=
(
dstT
)(
0
)
;
srcT
zero
=
(
srcT
)(
0
)
,
one
=
(
srcT
)(
1
)
;
for
(
int
grain
=
groupnum
<<
8
; id < elemnum; id += grain)
for
(
int
grain
=
groupnum
<<
8
; id < total; id += grain)
{
nonzero
+=
convertToDstT
(
src[mad24
(
id
/
src_cols,
src_step,
id
%
src_cols
+
src_offset
)
]
==
(
srcT
)(
0
))
?
(
dstT
)(
0
)
:
(
dstT
)(
1
)
;
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
nonzero
+=
src[idx]
==
zero
?
zero
:
one
;
}
if
(
lid
>
127
)
if
(
lid
>
127
)
localmem_nonzero[lid
-
128]
=
nonzero
;
localmem_nonzero[lid
-
128]
=
nonzero
;
...
...
modules/ocl/src/opencl/arithm_sum.cl
浏览文件 @
5726e80f
...
@@ -63,21 +63,19 @@
...
@@ -63,21 +63,19 @@
/**************************************Array
buffer
SUM**************************************/
/**************************************Array
buffer
SUM**************************************/
__kernel
void
arithm_op_sum
(
int
cols,int
invalid_cols,int
offset,int
elemnum,int
groupnum
,
__kernel
void
arithm_op_sum
(
__global
srcT
*
src,
int
src_step,
int
src_offset,
int
src_cols
,
__global
srcT
*src,
__global
dstT
*
dst
)
int
total,
int
groupnum,
__global
dstT
*
dst
)
{
{
int
lid
=
get_local_id
(
0
)
;
int
lid
=
get_local_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
gid
=
get_group_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
;
int
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
__local
dstT
localmem_sum[128]
;
__local
dstT
localmem_sum[128]
;
dstT
sum
=
(
dstT
)(
0
)
,
temp
;
dstT
sum
=
(
dstT
)(
0
)
,
temp
;
for
(
int
grainSize
=
groupnum
<<
8
; id <
elemnum
; id += grainSize)
for
(
int
grainSize
=
groupnum
<<
8
; id <
total
; id += grainSize)
{
{
idx
=
offset
+
id
+
(
id
/
cols
)
*
invalid_cols
;
temp
=
convertToDstT
(
src[mad24
(
id
/
src_cols,
src_step,
id
%
src_cols
+
src_offset
)
]
)
;
temp
=
convertToDstT
(
src[idx]
)
;
FUNC
(
temp,
sum
)
;
FUNC
(
temp,
sum
)
;
}
}
...
...
modules/ocl/test/test_arithm.cpp
浏览文件 @
5726e80f
...
@@ -198,7 +198,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
...
@@ -198,7 +198,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
Size
roiSize
=
randomSize
(
1
,
MAX_VALUE
);
Size
roiSize
=
randomSize
(
1
,
MAX_VALUE
);
Border
src1Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
Border
src1Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src1
,
src1_roi
,
roiSize
,
src1Border
,
type
,
2
,
11
);
randomSubMat
(
src1
,
src1_roi
,
roiSize
,
src1Border
,
type
,
-
11
,
11
);
Border
src2Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
Border
src2Border
=
randomBorder
(
0
,
use_roi
?
MAX_VALUE
:
0
);
randomSubMat
(
src2
,
src2_roi
,
roiSize
,
src2Border
,
type
,
-
1540
,
1740
);
randomSubMat
(
src2
,
src2_roi
,
roiSize
,
src2Border
,
type
,
-
1540
,
1740
);
...
@@ -1163,7 +1163,7 @@ OCL_TEST_P(CountNonZero, MAT)
...
@@ -1163,7 +1163,7 @@ OCL_TEST_P(CountNonZero, MAT)
int
cpures
=
cv
::
countNonZero
(
src1_roi
);
int
cpures
=
cv
::
countNonZero
(
src1_roi
);
int
gpures
=
cv
::
ocl
::
countNonZero
(
gsrc1_roi
);
int
gpures
=
cv
::
ocl
::
countNonZero
(
gsrc1_roi
);
EXPECT_
DOUBLE_EQ
((
double
)
cpures
,
(
double
)
gpures
);
EXPECT_
EQ
(
cpures
,
gpures
);
}
}
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录