Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
687d7639
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
687d7639
编写于
8月 03, 2012
作者:
Y
yao
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' of
git://code.opencv.org/opencv
上级
1eedcea5
3180bbe3
变更
26
展开全部
隐藏空白更改
内联
并排
Showing
26 changed file
with
2170 addition
and
1095 deletion
+2170
-1095
modules/core/include/opencv2/core/parallel_tool.hpp
modules/core/include/opencv2/core/parallel_tool.hpp
+108
-0
modules/core/src/parallel_tool.cpp
modules/core/src/parallel_tool.cpp
+112
-0
modules/core/src/precomp.hpp
modules/core/src/precomp.hpp
+1
-0
modules/gpu/src/cascadeclassifier.cpp
modules/gpu/src/cascadeclassifier.cpp
+469
-467
modules/imgproc/perf/perf_bilateral.cpp
modules/imgproc/perf/perf_bilateral.cpp
+38
-0
modules/imgproc/src/precomp.hpp
modules/imgproc/src/precomp.hpp
+1
-0
modules/imgproc/src/smooth.cpp
modules/imgproc/src/smooth.cpp
+156
-98
modules/imgproc/test/test_bilateral_filter.cpp
modules/imgproc/test/test_bilateral_filter.cpp
+290
-0
modules/ocl/include/opencv2/ocl/matrix_operations.hpp
modules/ocl/include/opencv2/ocl/matrix_operations.hpp
+1
-1
modules/ocl/include/opencv2/ocl/ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+1
-0
modules/ocl/src/arithm.cpp
modules/ocl/src/arithm.cpp
+51
-51
modules/ocl/src/imgproc.cpp
modules/ocl/src/imgproc.cpp
+23
-11
modules/ocl/src/initialization.cpp
modules/ocl/src/initialization.cpp
+21
-5
modules/ocl/src/kernels/arithm_addWeighted.cl
modules/ocl/src/kernels/arithm_addWeighted.cl
+18
-18
modules/ocl/src/kernels/arithm_cartToPolar.cl
modules/ocl/src/kernels/arithm_cartToPolar.cl
+4
-0
modules/ocl/src/kernels/arithm_div.cl
modules/ocl/src/kernels/arithm_div.cl
+72
-59
modules/ocl/src/kernels/arithm_exp.cl
modules/ocl/src/kernels/arithm_exp.cl
+4
-0
modules/ocl/src/kernels/arithm_log.cl
modules/ocl/src/kernels/arithm_log.cl
+2
-2
modules/ocl/src/kernels/convertC3C4.cl
modules/ocl/src/kernels/convertC3C4.cl
+86
-85
modules/ocl/src/kernels/imgproc_resize.cl
modules/ocl/src/kernels/imgproc_resize.cl
+132
-77
modules/ocl/src/kernels/operator_setTo.cl
modules/ocl/src/kernels/operator_setTo.cl
+5
-55
modules/ocl/src/kernels/operator_setToM.cl
modules/ocl/src/kernels/operator_setToM.cl
+10
-104
modules/ocl/src/matrix_operations.cpp
modules/ocl/src/matrix_operations.cpp
+462
-57
modules/ocl/src/precomp.hpp
modules/ocl/src/precomp.hpp
+3
-3
modules/ocl/test/test_imgproc.cpp
modules/ocl/test/test_imgproc.cpp
+1
-1
modules/ocl/test/test_matrix_operation.cpp
modules/ocl/test/test_matrix_operation.cpp
+99
-1
未找到文件。
modules/core/include/opencv2/core/parallel_tool.hpp
0 → 100644
浏览文件 @
687d7639
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_PARALLEL_TOOL_HPP__
#define __OPENCV_PARALLEL_TOOL_HPP__
#ifdef HAVE_CVCONFIG_H
# include <cvconfig.h>
#endif // HAVE_CVCONFIG_H
/*
HAVE_TBB - using TBB
HAVE_GCD - using GCD
HAVE_OPENMP - using OpenMP
HAVE_CONCURRENCY - using visual studio 2010 concurrency
*/
#ifdef HAVE_TBB
# include "tbb/tbb_stddef.h"
# if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
# include "tbb/tbb.h"
# include "tbb/task.h"
# undef min
# undef max
# else
# undef HAVE_TBB
# endif // end TBB version
#endif // HAVE_TBB
#ifdef __cplusplus
namespace
cv
{
// a base body class
class
CV_EXPORTS
ParallelLoopBody
{
public:
virtual
void
operator
()
(
const
Range
&
range
)
const
=
0
;
virtual
~
ParallelLoopBody
();
};
CV_EXPORTS
void
parallel_for_
(
const
Range
&
range
,
const
ParallelLoopBody
&
body
);
template
<
typename
Iterator
,
typename
Body
>
inline
CV_EXPORTS
void
parallel_do_
(
Iterator
first
,
Iterator
last
,
const
Body
&
body
)
{
#ifdef HAVE_TBB
tbb
::
parallel_do
(
first
,
last
,
body
);
#else
for
(
;
first
!=
last
;
++
first
)
body
(
*
first
);
#endif // HAVE_TBB
}
template
<
typename
Body
>
inline
CV_EXPORTS
void
parallel_reduce_
(
const
Range
&
range
,
Body
&
body
)
{
#ifdef HAVE_TBB
tbb
::
parallel_reduce
(
tbb
::
blocked_range
<
int
>
(
range
.
start
,
range
.
end
),
body
);
#else
body
(
range
);
#endif // end HAVE_TBB
}
}
// namespace cv
#endif // __cplusplus
#endif // __OPENCV_PARALLEL_TOOL_HPP__
modules/core/src/parallel_tool.cpp
0 → 100644
浏览文件 @
687d7639
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#ifdef HAVE_CONCURRENCY
# include <ppl.h>
#elif defined HAVE_OPENMP
# include <omp.h>
#elif defined HAVE_GCD
# include <dispatch/dispatch.h>
#endif // HAVE_CONCURRENCY
namespace
cv
{
ParallelLoopBody
::~
ParallelLoopBody
()
{
}
#ifdef HAVE_TBB
class
TbbProxyLoopBody
{
public:
TbbProxyLoopBody
(
const
ParallelLoopBody
&
_body
)
:
body
(
&
_body
)
{
}
void
operator
()(
const
tbb
::
blocked_range
<
int
>&
range
)
const
{
body
->
operator
()(
Range
(
range
.
begin
(),
range
.
end
()));
}
private:
const
ParallelLoopBody
*
body
;
};
#endif // end HAVE_TBB
#ifdef HAVE_GCD
static
void
block_function
(
void
*
context
,
size_t
index
)
{
ParallelLoopBody
*
ptr_body
=
static_cast
<
ParallelLoopBody
*>
(
context
);
ptr_body
->
operator
()(
Range
(
index
,
index
+
1
));
}
#endif // HAVE_GCD
void
parallel_for_
(
const
Range
&
range
,
const
ParallelLoopBody
&
body
)
{
#ifdef HAVE_TBB
tbb
::
parallel_for
(
tbb
::
blocked_range
<
int
>
(
range
.
start
,
range
.
end
),
TbbProxyLoopBody
(
body
));
#elif defined HAVE_CONCURRENCY
Concurrency
::
parallel_for
(
range
.
start
,
range
.
end
,
body
);
#elif defined HAVE_OPENMP
#pragma omp parallel for schedule(dynamic)
for
(
int
i
=
range
.
start
;
i
<
range
.
end
;
++
i
)
body
(
Range
(
i
,
i
+
1
));
#elif defined (HAVE_GCD)
dispatch_queue_t
concurrent_queue
=
dispatch_get_global_queue
(
DISPATCH_QUEUE_PRIORITY_DEFAULT
,
0
);
dispatch_apply_f
(
range
.
end
-
range
.
start
,
concurrent_queue
,
&
const_cast
<
ParallelLoopBody
&>
(
body
),
block_function
);
#else
body
(
range
);
#endif // end HAVE_TBB
}
}
// namespace cv
modules/core/src/precomp.hpp
浏览文件 @
687d7639
...
...
@@ -50,6 +50,7 @@
#include "opencv2/core/core.hpp"
#include "opencv2/core/core_c.h"
#include "opencv2/core/internal.hpp"
#include "opencv2/core/parallel_tool.hpp"
#include <assert.h>
#include <ctype.h>
...
...
modules/gpu/src/cascadeclassifier.cpp
浏览文件 @
687d7639
此差异已折叠。
点击以展开。
modules/imgproc/perf/perf_bilateral.cpp
0 → 100644
浏览文件 @
687d7639
#include "perf_precomp.hpp"
using
namespace
std
;
using
namespace
cv
;
using
namespace
perf
;
using
namespace
testing
;
using
std
::
tr1
::
make_tuple
;
using
std
::
tr1
::
get
;
CV_ENUM
(
Mat_Type
,
CV_8UC1
,
CV_8UC3
,
CV_32FC1
,
CV_32FC3
)
typedef
TestBaseWithParam
<
tr1
::
tuple
<
Size
,
int
,
Mat_Type
>
>
TestBilateralFilter
;
PERF_TEST_P
(
TestBilateralFilter
,
BilateralFilter
,
Combine
(
Values
(
szVGA
,
sz1080p
),
// image size
Values
(
3
,
5
),
// d
ValuesIn
(
Mat_Type
::
all
()
)
// image type
)
)
{
Size
sz
;
int
d
,
type
;
const
double
sigmaColor
=
1.
,
sigmaSpace
=
1.
;
sz
=
get
<
0
>
(
GetParam
());
d
=
get
<
1
>
(
GetParam
());
type
=
get
<
2
>
(
GetParam
());
Mat
src
(
sz
,
type
);
Mat
dst
(
sz
,
type
);
declare
.
in
(
src
,
WARMUP_RNG
).
out
(
dst
).
time
(
20
);
TEST_CYCLE
()
bilateralFilter
(
src
,
dst
,
d
,
sigmaColor
,
sigmaSpace
,
BORDER_DEFAULT
);
SANITY_CHECK
(
dst
);
}
modules/imgproc/src/precomp.hpp
浏览文件 @
687d7639
...
...
@@ -50,6 +50,7 @@
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/internal.hpp"
#include "opencv2/core/parallel_tool.hpp"
#include <math.h>
#include <assert.h>
#include <string.h>
...
...
modules/imgproc/src/smooth.cpp
浏览文件 @
687d7639
...
...
@@ -1288,48 +1288,119 @@ void cv::medianBlur( InputArray _src0, OutputArray _dst, int ksize )
namespace
cv
{
class
BilateralFilter_8u_Invoker
:
public
ParallelLoopBody
{
public:
BilateralFilter_8u_Invoker
(
const
Mat
&
_src
,
Mat
&
_dst
,
Mat
_temp
,
int
_radius
,
int
_maxk
,
int
*
_space_ofs
,
float
*
_space_weight
,
float
*
_color_weight
)
:
ParallelLoopBody
(),
src
(
_src
),
dst
(
_dst
),
temp
(
_temp
),
radius
(
_radius
),
maxk
(
_maxk
),
space_ofs
(
_space_ofs
),
space_weight
(
_space_weight
),
color_weight
(
_color_weight
)
{
}
virtual
void
operator
()
(
const
Range
&
range
)
const
{
int
i
,
j
,
cn
=
src
.
channels
(),
k
;
Size
size
=
src
.
size
();
for
(
i
=
range
.
start
;
i
<
range
.
end
;
i
++
)
{
const
uchar
*
sptr
=
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
+
radius
*
cn
;
uchar
*
dptr
=
dst
.
data
+
i
*
dst
.
step
;
if
(
cn
==
1
)
{
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
{
float
sum
=
0
,
wsum
=
0
;
int
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
int
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
w
=
space_weight
[
k
]
*
color_weight
[
std
::
abs
(
val
-
val0
)];
sum
+=
val
*
w
;
wsum
+=
w
;
}
// overflow is not possible here => there is no need to use CV_CAST_8U
dptr
[
j
]
=
(
uchar
)
cvRound
(
sum
/
wsum
);
}
}
else
{
assert
(
cn
==
3
);
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
{
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
int
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
const
uchar
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
int
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
w
=
space_weight
[
k
]
*
color_weight
[
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
)];
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
}
wsum
=
1.
f
/
wsum
;
b0
=
cvRound
(
sum_b
*
wsum
);
g0
=
cvRound
(
sum_g
*
wsum
);
r0
=
cvRound
(
sum_r
*
wsum
);
dptr
[
j
]
=
(
uchar
)
b0
;
dptr
[
j
+
1
]
=
(
uchar
)
g0
;
dptr
[
j
+
2
]
=
(
uchar
)
r0
;
}
}
}
}
private:
const
Mat
&
src
;
Mat
&
dst
,
temp
;
int
radius
,
maxk
,
*
space_ofs
;
float
*
space_weight
,
*
color_weight
;
};
static
void
bilateralFilter_8u
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
double
sigma_color
,
double
sigma_space
,
int
borderType
)
double
sigma_color
,
double
sigma_space
,
int
borderType
)
{
int
cn
=
src
.
channels
();
int
i
,
j
,
k
,
maxk
,
radius
;
int
i
,
j
,
maxk
,
radius
;
Size
size
=
src
.
size
();
CV_Assert
(
(
src
.
type
()
==
CV_8UC1
||
src
.
type
()
==
CV_8UC3
)
&&
src
.
type
()
==
dst
.
type
()
&&
src
.
size
()
==
dst
.
size
()
&&
src
.
data
!=
dst
.
data
);
src
.
type
()
==
dst
.
type
()
&&
src
.
size
()
==
dst
.
size
()
&&
src
.
data
!=
dst
.
data
);
if
(
sigma_color
<=
0
)
sigma_color
=
1
;
if
(
sigma_space
<=
0
)
sigma_space
=
1
;
double
gauss_color_coeff
=
-
0.5
/
(
sigma_color
*
sigma_color
);
double
gauss_space_coeff
=
-
0.5
/
(
sigma_space
*
sigma_space
);
if
(
d
<=
0
)
radius
=
cvRound
(
sigma_space
*
1.5
);
else
radius
=
d
/
2
;
radius
=
MAX
(
radius
,
1
);
d
=
radius
*
2
+
1
;
Mat
temp
;
copyMakeBorder
(
src
,
temp
,
radius
,
radius
,
radius
,
radius
,
borderType
);
vector
<
float
>
_color_weight
(
cn
*
256
);
vector
<
float
>
_space_weight
(
d
*
d
);
vector
<
int
>
_space_ofs
(
d
*
d
);
float
*
color_weight
=
&
_color_weight
[
0
];
float
*
space_weight
=
&
_space_weight
[
0
];
int
*
space_ofs
=
&
_space_ofs
[
0
];
// initialize color-related bilateral filter coefficients
for
(
i
=
0
;
i
<
256
*
cn
;
i
++
)
color_weight
[
i
]
=
(
float
)
std
::
exp
(
i
*
i
*
gauss_color_coeff
);
// initialize space-related bilateral filter coefficients
for
(
i
=
-
radius
,
maxk
=
0
;
i
<=
radius
;
i
++
)
for
(
j
=
-
radius
;
j
<=
radius
;
j
++
)
...
...
@@ -1340,55 +1411,89 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
space_weight
[
maxk
]
=
(
float
)
std
::
exp
(
r
*
r
*
gauss_space_coeff
);
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
temp
.
step
+
j
*
cn
);
}
BilateralFilter_8u_Invoker
body
(
src
,
dst
,
temp
,
radius
,
maxk
,
space_ofs
,
space_weight
,
color_weight
);
parallel_for_
(
Range
(
0
,
size
.
height
),
body
);
}
for
(
i
=
0
;
i
<
size
.
height
;
i
++
)
class
BilateralFilter_32f_Invoker
:
public
ParallelLoopBody
{
public:
BilateralFilter_32f_Invoker
(
int
_cn
,
int
_radius
,
int
_maxk
,
int
*
_space_ofs
,
Mat
_temp
,
Mat
*
_dest
,
Size
_size
,
float
_scale_index
,
float
*
_space_weight
,
float
*
_expLUT
)
:
ParallelLoopBody
(),
cn
(
_cn
),
radius
(
_radius
),
maxk
(
_maxk
),
space_ofs
(
_space_ofs
),
temp
(
_temp
),
dest
(
_dest
),
size
(
_size
),
scale_index
(
_scale_index
),
space_weight
(
_space_weight
),
expLUT
(
_expLUT
)
{
const
uchar
*
sptr
=
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
+
radius
*
cn
;
uchar
*
dptr
=
dst
.
data
+
i
*
dst
.
step
;
}
if
(
cn
==
1
)
virtual
void
operator
()
(
const
Range
&
range
)
const
{
Mat
&
dst
=
*
dest
;
int
i
,
j
,
k
;
for
(
i
=
range
.
start
;
i
<
range
.
end
;
i
++
)
{
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
const
float
*
sptr
=
(
const
float
*
)(
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
)
+
radius
*
cn
;
float
*
dptr
=
(
float
*
)(
dst
.
data
+
i
*
dst
.
step
);
if
(
cn
==
1
)
{
float
sum
=
0
,
wsum
=
0
;
int
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
{
int
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
w
=
space_weight
[
k
]
*
color_weight
[
std
::
abs
(
val
-
val0
)];
sum
+=
val
*
w
;
wsum
+=
w
;
float
sum
=
0
,
wsum
=
0
;
float
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
float
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
alpha
=
(
float
)(
std
::
abs
(
val
-
val0
)
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum
+=
val
*
w
;
wsum
+=
w
;
}
dptr
[
j
]
=
(
float
)(
sum
/
wsum
);
}
// overflow is not possible here => there is no need to use CV_CAST_8U
dptr
[
j
]
=
(
uchar
)
cvRound
(
sum
/
wsum
);
}
}
else
{
assert
(
cn
==
3
);
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
else
{
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
int
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
assert
(
cn
==
3
);
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
{
const
uchar
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
int
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
w
=
space_weight
[
k
]
*
color_weight
[
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
)];
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
float
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
const
float
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
float
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
alpha
=
(
float
)((
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
))
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
}
wsum
=
1.
f
/
wsum
;
b0
=
sum_b
*
wsum
;
g0
=
sum_g
*
wsum
;
r0
=
sum_r
*
wsum
;
dptr
[
j
]
=
b0
;
dptr
[
j
+
1
]
=
g0
;
dptr
[
j
+
2
]
=
r0
;
}
wsum
=
1.
f
/
wsum
;
b0
=
cvRound
(
sum_b
*
wsum
);
g0
=
cvRound
(
sum_g
*
wsum
);
r0
=
cvRound
(
sum_r
*
wsum
);
dptr
[
j
]
=
(
uchar
)
b0
;
dptr
[
j
+
1
]
=
(
uchar
)
g0
;
dptr
[
j
+
2
]
=
(
uchar
)
r0
;
}
}
}
}
private:
int
cn
,
radius
,
maxk
,
*
space_ofs
;
Mat
temp
,
*
dest
;
Size
size
;
float
scale_index
,
*
space_weight
,
*
expLUT
;
};
static
void
bilateralFilter_32f
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
...
...
@@ -1396,7 +1501,7 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
int
borderType
)
{
int
cn
=
src
.
channels
();
int
i
,
j
,
k
,
maxk
,
radius
;
int
i
,
j
,
maxk
,
radius
;
double
minValSrc
=-
1
,
maxValSrc
=
1
;
const
int
kExpNumBinsPerChannel
=
1
<<
12
;
int
kExpNumBins
=
0
;
...
...
@@ -1474,57 +1579,10 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
(
temp
.
step
/
sizeof
(
float
))
+
j
*
cn
);
}
for
(
i
=
0
;
i
<
size
.
height
;
i
++
)
{
const
float
*
sptr
=
(
const
float
*
)(
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
)
+
radius
*
cn
;
float
*
dptr
=
(
float
*
)(
dst
.
data
+
i
*
dst
.
step
);
// parallel_for usage
if
(
cn
==
1
)
{
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
{
float
sum
=
0
,
wsum
=
0
;
float
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
float
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
alpha
=
(
float
)(
std
::
abs
(
val
-
val0
)
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum
+=
val
*
w
;
wsum
+=
w
;
}
dptr
[
j
]
=
(
float
)(
sum
/
wsum
);
}
}
else
{
assert
(
cn
==
3
);
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
{
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
float
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
const
float
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
float
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
alpha
=
(
float
)((
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
))
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
}
wsum
=
1.
f
/
wsum
;
b0
=
sum_b
*
wsum
;
g0
=
sum_g
*
wsum
;
r0
=
sum_r
*
wsum
;
dptr
[
j
]
=
b0
;
dptr
[
j
+
1
]
=
g0
;
dptr
[
j
+
2
]
=
r0
;
}
}
}
BilateralFilter_32f_Invoker
body
(
cn
,
radius
,
maxk
,
space_ofs
,
temp
,
&
dst
,
size
,
scale_index
,
space_weight
,
expLUT
);
parallel_for_
(
Range
(
0
,
size
.
height
),
body
);
}
}
...
...
modules/imgproc/test/test_bilateral_filter.cpp
0 → 100644
浏览文件 @
687d7639
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
using
namespace
cv
;
namespace
cvtest
{
class
CV_BilateralFilterTest
:
public
cvtest
::
BaseTest
{
public:
enum
{
MAX_WIDTH
=
1920
,
MIN_WIDTH
=
1
,
MAX_HEIGHT
=
1080
,
MIN_HEIGHT
=
1
};
CV_BilateralFilterTest
();
~
CV_BilateralFilterTest
();
protected:
virtual
void
run_func
();
virtual
int
prepare_test_case
(
int
test_case_index
);
virtual
int
validate_test_results
(
int
test_case_index
);
private:
void
reference_bilateral_filter
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
double
sigma_color
,
double
sigma_space
,
int
borderType
=
BORDER_DEFAULT
);
int
getRandInt
(
RNG
&
rng
,
int
min_value
,
int
max_value
)
const
;
double
_sigma_color
;
double
_sigma_space
;
Mat
_src
;
Mat
_parallel_dst
;
int
_d
;
};
CV_BilateralFilterTest
::
CV_BilateralFilterTest
()
:
cvtest
::
BaseTest
(),
_src
(),
_parallel_dst
(),
_d
()
{
test_case_count
=
1000
;
}
CV_BilateralFilterTest
::~
CV_BilateralFilterTest
()
{
}
int
CV_BilateralFilterTest
::
getRandInt
(
RNG
&
rng
,
int
min_value
,
int
max_value
)
const
{
double
rand_value
=
rng
.
uniform
(
log
(
min_value
),
log
(
max_value
+
1
));
return
cvRound
(
exp
(
rand_value
));
}
void
CV_BilateralFilterTest
::
reference_bilateral_filter
(
const
Mat
&
src
,
Mat
&
dst
,
int
d
,
double
sigma_color
,
double
sigma_space
,
int
borderType
)
{
int
cn
=
src
.
channels
();
int
i
,
j
,
k
,
maxk
,
radius
;
double
minValSrc
=
-
1
,
maxValSrc
=
1
;
const
int
kExpNumBinsPerChannel
=
1
<<
12
;
int
kExpNumBins
=
0
;
float
lastExpVal
=
1.
f
;
float
len
,
scale_index
;
Size
size
=
src
.
size
();
dst
.
create
(
size
,
src
.
type
());
CV_Assert
(
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC3
)
&&
src
.
type
()
==
dst
.
type
()
&&
src
.
size
()
==
dst
.
size
()
&&
src
.
data
!=
dst
.
data
);
if
(
sigma_color
<=
0
)
sigma_color
=
1
;
if
(
sigma_space
<=
0
)
sigma_space
=
1
;
double
gauss_color_coeff
=
-
0.5
/
(
sigma_color
*
sigma_color
);
double
gauss_space_coeff
=
-
0.5
/
(
sigma_space
*
sigma_space
);
if
(
d
<=
0
)
radius
=
cvRound
(
sigma_space
*
1.5
);
else
radius
=
d
/
2
;
radius
=
MAX
(
radius
,
1
);
d
=
radius
*
2
+
1
;
// compute the min/max range for the input image (even if multichannel)
minMaxLoc
(
src
.
reshape
(
1
),
&
minValSrc
,
&
maxValSrc
);
if
(
std
::
abs
(
minValSrc
-
maxValSrc
)
<
FLT_EPSILON
)
{
src
.
copyTo
(
dst
);
return
;
}
// temporary copy of the image with borders for easy processing
Mat
temp
;
copyMakeBorder
(
src
,
temp
,
radius
,
radius
,
radius
,
radius
,
borderType
);
patchNaNs
(
temp
);
// allocate lookup tables
vector
<
float
>
_space_weight
(
d
*
d
);
vector
<
int
>
_space_ofs
(
d
*
d
);
float
*
space_weight
=
&
_space_weight
[
0
];
int
*
space_ofs
=
&
_space_ofs
[
0
];
// assign a length which is slightly more than needed
len
=
(
float
)(
maxValSrc
-
minValSrc
)
*
cn
;
kExpNumBins
=
kExpNumBinsPerChannel
*
cn
;
vector
<
float
>
_expLUT
(
kExpNumBins
+
2
);
float
*
expLUT
=
&
_expLUT
[
0
];
scale_index
=
kExpNumBins
/
len
;
// initialize the exp LUT
for
(
i
=
0
;
i
<
kExpNumBins
+
2
;
i
++
)
{
if
(
lastExpVal
>
0.
f
)
{
double
val
=
i
/
scale_index
;
expLUT
[
i
]
=
(
float
)
std
::
exp
(
val
*
val
*
gauss_color_coeff
);
lastExpVal
=
expLUT
[
i
];
}
else
expLUT
[
i
]
=
0.
f
;
}
// initialize space-related bilateral filter coefficients
for
(
i
=
-
radius
,
maxk
=
0
;
i
<=
radius
;
i
++
)
for
(
j
=
-
radius
;
j
<=
radius
;
j
++
)
{
double
r
=
std
::
sqrt
((
double
)
i
*
i
+
(
double
)
j
*
j
);
if
(
r
>
radius
)
continue
;
space_weight
[
maxk
]
=
(
float
)
std
::
exp
(
r
*
r
*
gauss_space_coeff
);
space_ofs
[
maxk
++
]
=
(
int
)(
i
*
(
temp
.
step
/
sizeof
(
float
))
+
j
*
cn
);
}
for
(
i
=
0
;
i
<
size
.
height
;
i
++
)
{
const
float
*
sptr
=
(
const
float
*
)(
temp
.
data
+
(
i
+
radius
)
*
temp
.
step
)
+
radius
*
cn
;
float
*
dptr
=
(
float
*
)(
dst
.
data
+
i
*
dst
.
step
);
if
(
cn
==
1
)
{
for
(
j
=
0
;
j
<
size
.
width
;
j
++
)
{
float
sum
=
0
,
wsum
=
0
;
float
val0
=
sptr
[
j
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
float
val
=
sptr
[
j
+
space_ofs
[
k
]];
float
alpha
=
(
float
)(
std
::
abs
(
val
-
val0
)
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum
+=
val
*
w
;
wsum
+=
w
;
}
dptr
[
j
]
=
(
float
)(
sum
/
wsum
);
}
}
else
{
assert
(
cn
==
3
);
for
(
j
=
0
;
j
<
size
.
width
*
3
;
j
+=
3
)
{
float
sum_b
=
0
,
sum_g
=
0
,
sum_r
=
0
,
wsum
=
0
;
float
b0
=
sptr
[
j
],
g0
=
sptr
[
j
+
1
],
r0
=
sptr
[
j
+
2
];
for
(
k
=
0
;
k
<
maxk
;
k
++
)
{
const
float
*
sptr_k
=
sptr
+
j
+
space_ofs
[
k
];
float
b
=
sptr_k
[
0
],
g
=
sptr_k
[
1
],
r
=
sptr_k
[
2
];
float
alpha
=
(
float
)((
std
::
abs
(
b
-
b0
)
+
std
::
abs
(
g
-
g0
)
+
std
::
abs
(
r
-
r0
))
*
scale_index
);
int
idx
=
cvFloor
(
alpha
);
alpha
-=
idx
;
float
w
=
space_weight
[
k
]
*
(
expLUT
[
idx
]
+
alpha
*
(
expLUT
[
idx
+
1
]
-
expLUT
[
idx
]));
sum_b
+=
b
*
w
;
sum_g
+=
g
*
w
;
sum_r
+=
r
*
w
;
wsum
+=
w
;
}
wsum
=
1.
f
/
wsum
;
b0
=
sum_b
*
wsum
;
g0
=
sum_g
*
wsum
;
r0
=
sum_r
*
wsum
;
dptr
[
j
]
=
b0
;
dptr
[
j
+
1
]
=
g0
;
dptr
[
j
+
2
]
=
r0
;
}
}
}
}
int
CV_BilateralFilterTest
::
prepare_test_case
(
int
/* test_case_index */
)
{
const
static
int
types
[]
=
{
CV_32FC1
,
CV_32FC3
,
CV_8UC1
,
CV_8UC3
};
RNG
&
rng
=
ts
->
get_rng
();
Size
size
(
getRandInt
(
rng
,
MIN_WIDTH
,
MAX_WIDTH
),
getRandInt
(
rng
,
MIN_HEIGHT
,
MAX_HEIGHT
));
int
type
=
types
[
rng
(
sizeof
(
types
)
/
sizeof
(
types
[
0
]))];
_d
=
rng
.
uniform
(
0.
,
1.
)
>
0.5
?
5
:
3
;
_src
.
create
(
size
,
type
);
rng
.
fill
(
_src
,
RNG
::
UNIFORM
,
0
,
256
);
_sigma_color
=
_sigma_space
=
1.
;
return
1
;
}
int
CV_BilateralFilterTest
::
validate_test_results
(
int
test_case_index
)
{
static
const
double
eps
=
1
;
Mat
reference_dst
,
reference_src
;
if
(
_src
.
depth
()
==
CV_32F
)
reference_bilateral_filter
(
_src
,
reference_dst
,
_d
,
_sigma_color
,
_sigma_space
);
else
{
int
type
=
_src
.
type
();
_src
.
convertTo
(
reference_src
,
CV_32F
);
reference_bilateral_filter
(
reference_src
,
reference_dst
,
_d
,
_sigma_color
,
_sigma_space
);
reference_dst
.
convertTo
(
reference_dst
,
type
);
}
double
e
=
norm
(
reference_dst
,
_parallel_dst
);
if
(
e
>
eps
)
{
ts
->
printf
(
cvtest
::
TS
::
CONSOLE
,
"actual error: %g, expected: %g"
,
e
,
eps
);
ts
->
set_failed_test_info
(
cvtest
::
TS
::
FAIL_BAD_ACCURACY
);
}
else
ts
->
set_failed_test_info
(
cvtest
::
TS
::
OK
);
return
BaseTest
::
validate_test_results
(
test_case_index
);
}
void
CV_BilateralFilterTest
::
run_func
()
{
bilateralFilter
(
_src
,
_parallel_dst
,
_d
,
_sigma_color
,
_sigma_space
);
}
TEST
(
Imgproc_BilateralFilter
,
accuracy
)
{
CV_BilateralFilterTest
test
;
test
.
safe_run
();
}
}
// end of namespace cvtest
modules/ocl/include/opencv2/ocl/matrix_operations.hpp
浏览文件 @
687d7639
...
...
@@ -49,7 +49,7 @@ namespace cv
namespace
ocl
{
////////////////////////////////////OpenCL kernel strings//////////////////////////
extern
const
char
*
convertC3C4
;
//
extern const char *convertC3C4;
////////////////////////////////////////////////////////////////////////
//////////////////////////////// oclMat ////////////////////////////////
...
...
modules/ocl/include/opencv2/ocl/ocl.hpp
浏览文件 @
687d7639
...
...
@@ -49,6 +49,7 @@
#include "opencv2/core/core.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/features2d/features2d.hpp"
namespace
cv
{
...
...
modules/ocl/src/arithm.cpp
浏览文件 @
687d7639
...
...
@@ -455,13 +455,12 @@ void cv::ocl::multiply(const oclMat &src1, const oclMat &src2, oclMat &dst, doub
}
void
cv
::
ocl
::
divide
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
,
double
scalar
)
{
if
(
src1
.
clCxt
->
impl
->
double_support
==
0
)
{
CV_Error
(
-
217
,
"Selected device don't support double
\r\n
"
);
return
;
}
arithmetic_run
<
double
>
(
src1
,
src2
,
dst
,
"arithm_div"
,
&
arithm_div
,
(
void
*
)(
&
scalar
));
if
(
src1
.
clCxt
->
impl
->
double_support
!=
0
)
arithmetic_run
<
double
>
(
src1
,
src2
,
dst
,
"arithm_div"
,
&
arithm_div
,
(
void
*
)(
&
scalar
));
else
arithmetic_run
<
float
>
(
src1
,
src2
,
dst
,
"arithm_div"
,
&
arithm_div
,
(
void
*
)(
&
scalar
));
}
template
<
typename
WT
,
typename
CL_WT
>
void
arithmetic_scalar_run
(
const
oclMat
&
src1
,
const
Scalar
&
src2
,
oclMat
&
dst
,
const
oclMat
&
mask
,
string
kernelName
,
const
char
**
kernelString
,
int
isMatSubScalar
)
...
...
@@ -579,7 +578,14 @@ void arithmetic_scalar_run(const oclMat &src, oclMat &dst, string kernelName, co
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
scalar
));
if
(
src
.
clCxt
->
impl
->
double_support
!=
0
)
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
scalar
));
else
{
float
f_scalar
=
(
float
)
scalar
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
f_scalar
));
}
openCLExecuteKernel
(
clCxt
,
kernelString
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
}
...
...
@@ -670,9 +676,9 @@ void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string ker
int
cols
=
divUp
(
dst
.
cols
+
offset_cols
,
vector_length
);
size_t
localThreads
[
3
]
=
{
64
,
4
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
...
...
@@ -1253,7 +1259,11 @@ void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, c
CV_Assert
(
src
.
type
()
==
CV_32F
||
src
.
type
()
==
CV_64F
);
Context
*
clCxt
=
src
.
clCxt
;
if
(
clCxt
->
impl
->
double_support
==
0
&&
src
.
type
()
==
CV_64F
)
{
CV_Error
(
-
217
,
"Selected device don't support double
\r\n
"
);
return
;
}
//int channels = dst.channels();
int
depth
=
dst
.
depth
();
...
...
@@ -2193,56 +2203,46 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
if
(
sizeof
(
double
)
==
8
)
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
offset
));
if
(
src1
.
clCxt
->
impl
->
double_support
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
alpha
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
beta
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
gama
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src1
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
alpha
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src2
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
beta
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src2
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
gama
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
}
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src1
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst_step1
));
openCLExecuteKernel
(
clCxt
,
&
arithm_addWeighted
,
"addWeighted"
,
globalThreads
,
localThreads
,
args
,
-
1
,
depth
);
}
void
cv
::
ocl
::
magnitudeSqr
(
const
oclMat
&
src1
,
const
oclMat
&
src2
,
oclMat
&
dst
)
{
CV_Assert
(
src1
.
type
()
==
src2
.
type
()
&&
src1
.
size
()
==
src2
.
size
()
&&
(
src1
.
depth
()
==
CV_32F
));
(
src1
.
depth
()
==
CV_32F
));
dst
.
create
(
src1
.
size
(),
src1
.
type
());
...
...
@@ -2265,9 +2265,9 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, const oclMat &src2, oclMat &dst)
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
...
...
@@ -2313,9 +2313,9 @@ void cv::ocl::magnitudeSqr(const oclMat &src1, oclMat &dst)
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
divUp
(
dst
.
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
...
...
@@ -2348,9 +2348,9 @@ void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernel
size_t
localThreads
[
3
]
=
{
64
,
4
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
cols
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
divUp
(
rows
,
localThreads
[
1
])
*
localThreads
[
1
],
1
};
int
dst_step1
=
dst
.
cols
*
dst
.
elemSize
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
...
...
modules/ocl/src/imgproc.cpp
浏览文件 @
687d7639
...
...
@@ -410,7 +410,11 @@ namespace cv
float
ify
=
1.
/
fy
;
double
ifx_d
=
1.
/
fx
;
double
ify_d
=
1.
/
fy
;
int
srcStep_in_pixel
=
src
.
step1
()
/
src
.
channels
();
int
srcoffset_in_pixel
=
src
.
offset
/
src
.
elemSize
();
int
dstStep_in_pixel
=
dst
.
step1
()
/
dst
.
channels
();
int
dstoffset_in_pixel
=
dst
.
offset
/
dst
.
elemSize
();
//printf("%d %d\n",src.step1() , dst.elemSize());
string
kernelName
;
if
(
interpolation
==
INTER_LINEAR
)
kernelName
=
"resizeLN"
;
...
...
@@ -438,25 +442,33 @@ namespace cv
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
Step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
Step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ifx_d
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ify_d
));
if
(
src
.
clCxt
->
impl
->
double_support
!=
0
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ifx_d
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_double
),
(
void
*
)
&
ify_d
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ifx
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
ify
));
}
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
dst
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
offset_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
Step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
Step_in_pixel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
dst
.
cols
));
...
...
modules/ocl/src/initialization.cpp
浏览文件 @
687d7639
...
...
@@ -378,20 +378,36 @@ namespace cv
void
openCLMemcpy2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
const
void
*
src
,
size_t
spitch
,
size_t
width
,
size_t
height
,
enum
openCLMemcpyKind
kind
)
size_t
width
,
size_t
height
,
enum
openCLMemcpyKind
kind
,
int
channels
)
{
size_t
buffer_origin
[
3
]
=
{
0
,
0
,
0
};
size_t
host_origin
[
3
]
=
{
0
,
0
,
0
};
size_t
region
[
3
]
=
{
width
,
height
,
1
};
if
(
kind
==
clMemcpyHostToDevice
)
{
openCLSafeCall
(
clEnqueueWriteBufferRect
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
dst
,
CL_TRUE
,
buffer_origin
,
host_origin
,
region
,
dpitch
,
0
,
spitch
,
0
,
src
,
0
,
0
,
0
));
if
(
dpitch
==
width
||
channels
==
3
)
{
openCLSafeCall
(
clEnqueueWriteBuffer
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
dst
,
CL_TRUE
,
0
,
width
*
height
,
src
,
0
,
NULL
,
NULL
));
}
else
{
openCLSafeCall
(
clEnqueueWriteBufferRect
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
dst
,
CL_TRUE
,
buffer_origin
,
host_origin
,
region
,
dpitch
,
0
,
spitch
,
0
,
src
,
0
,
0
,
0
));
}
}
else
if
(
kind
==
clMemcpyDeviceToHost
)
{
openCLSafeCall
(
clEnqueueReadBufferRect
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
src
,
CL_TRUE
,
buffer_origin
,
host_origin
,
region
,
spitch
,
0
,
dpitch
,
0
,
dst
,
0
,
0
,
0
));
if
(
spitch
==
width
||
channels
==
3
)
{
openCLSafeCall
(
clEnqueueReadBuffer
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
src
,
CL_TRUE
,
0
,
width
*
height
,
dst
,
0
,
NULL
,
NULL
));
}
else
{
openCLSafeCall
(
clEnqueueReadBufferRect
(
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
src
,
CL_TRUE
,
buffer_origin
,
host_origin
,
region
,
spitch
,
0
,
dpitch
,
0
,
dst
,
0
,
0
,
0
));
}
}
}
...
...
modules/ocl/src/kernels/arithm_addWeighted.cl
浏览文件 @
687d7639
...
...
@@ -51,9 +51,9 @@ typedef float F;
//////////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////addWeighted//////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////////////
__kernel
void
addWeighted_D0
(
__global
uchar
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__global
uchar
*src2,
F
beta,
int
src2_step,int
src2_offset,
F
gama,
__kernel
void
addWeighted_D0
(
__global
uchar
*src1,int
src1_step,int
src1_offset,
__global
uchar
*src2,
int
src2_step,int
src2_offset,
F
alpha,F
beta,F
gama,
__global
uchar
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
{
...
...
@@ -99,9 +99,9 @@ __kernel void addWeighted_D0 (__global uchar *src1, F alpha,int src1_step,int sr
__kernel
void
addWeighted_D2
(
__global
ushort
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__global
ushort
*src2,
F
beta,
int
src2_step,int
src2_offset,
F
gama,
__kernel
void
addWeighted_D2
(
__global
ushort
*src1,
int
src1_step,int
src1_offset,
__global
ushort
*src2,
int
src2_step,int
src2_offset,
F
alpha,F
beta,F
gama,
__global
ushort
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
{
...
...
@@ -145,9 +145,9 @@ __kernel void addWeighted_D2 (__global ushort *src1, F alpha,int src1_step,int s
}
__kernel
void
addWeighted_D3
(
__global
short
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__global
short
*src2,
F
beta,
int
src2_step,int
src2_offset,
F
gama,
__kernel
void
addWeighted_D3
(
__global
short
*src1,
int
src1_step,int
src1_offset,
__global
short
*src2,
int
src2_step,int
src2_offset,
F
alpha,F
beta,F
gama,
__global
short
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
{
...
...
@@ -190,9 +190,9 @@ __kernel void addWeighted_D3 (__global short *src1, F alpha,int src1_step,int sr
}
__kernel
void
addWeighted_D4
(
__global
int
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__global
int
*src2,
F
beta,
int
src2_step,int
src2_offset,
F
gama,
__kernel
void
addWeighted_D4
(
__global
int
*src1,
int
src1_step,int
src1_offset,
__global
int
*src2,
int
src2_step,int
src2_offset,
F
alpha,F
beta,
F
gama,
__global
int
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
{
...
...
@@ -238,9 +238,9 @@ __kernel void addWeighted_D4 (__global int *src1, F alpha,int src1_step,int src1
}
__kernel
void
addWeighted_D5
(
__global
float
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__global
float
*src2,
F
beta,
int
src2_step,int
src2_offset,
F
gama,
__kernel
void
addWeighted_D5
(
__global
float
*src1,int
src1_step,int
src1_offset,
__global
float
*src2,
int
src2_step,int
src2_offset,
F
alpha,F
beta,
F
gama,
__global
float
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
{
...
...
@@ -286,9 +286,9 @@ __kernel void addWeighted_D5 (__global float *src1, F alpha,int src1_step,int sr
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
addWeighted_D6
(
__global
double
*src1,
F
alpha,
int
src1_step,int
src1_offset,
__global
double
*src2,
F
beta,
int
src2_step,int
src2_offset,
F
gama,
__kernel
void
addWeighted_D6
(
__global
double
*src1,
int
src1_step,int
src1_offset,
__global
double
*src2,
int
src2_step,int
src2_offset,
F
alpha,F
beta,
F
gama,
__global
double
*dst,
int
dst_step,int
dst_offset,
int
rows,
int
cols,int
dst_step1
)
{
...
...
modules/ocl/src/kernels/arithm_cartToPolar.cl
浏览文件 @
687d7639
...
...
@@ -49,6 +49,10 @@
#
define
CV_PI
3.1415926535897932384626433832795
#
ifndef
DBL_EPSILON
#
define
DBL_EPSILON
0x1.0p-52
#
endif
__kernel
void
arithm_cartToPolar_D5
(
__global
float
*src1,
int
src1_step,
int
src1_offset,
__global
float
*src2,
int
src2_step,
int
src2_offset,
__global
float
*dst1,
int
dst1_step,
int
dst1_offset,
//magnitude
...
...
modules/ocl/src/kernels/arithm_div.cl
浏览文件 @
687d7639
...
...
@@ -45,36 +45,45 @@
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
typedef
double
F
;
typedef
double4
F4
;
#
define
convert_F4
convert_double4
#
define
convert_F
convert_double
#
else
typedef
float
F
;
typedef
float4
F4
;
#
define
convert_F4
convert_float4
#
define
convert_F
convert_float
#
endif
uchar
round2_uchar
(
double
v
)
{
uchar
round2_uchar
(
F
v
)
{
uchar
v1
=
convert_uchar_sat
(
v
)
;
uchar
v2
=
convert_uchar_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
uchar
v1
=
convert_uchar_sat
(
round
(
v
)
)
;
//
uchar
v2
=
convert_uchar_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
(((
v-v1
)
==0.5
)
&&
(
v1%2==0
))
?
v1
:
v2
;
return
v1
;//
(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
}
ushort
round2_ushort
(
double
v
)
{
ushort
round2_ushort
(
F
v
)
{
ushort
v1
=
convert_ushort_sat
(
v
)
;
ushort
v2
=
convert_ushort_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
ushort
v1
=
convert_ushort_sat
(
round
(
v
)
)
;
//
ushort
v2
=
convert_ushort_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
(((
v-v1
)
==0.5
)
&&
(
v1%2==0
))
?
v1
:
v2
;
return
v1
;//
(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
}
short
round2_short
(
double
v
)
{
short
round2_short
(
F
v
)
{
short
v1
=
convert_short_sat
(
v
)
;
short
v2
=
convert_short_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
short
v1
=
convert_short_sat
(
round
(
v
)
)
;
//
short
v2
=
convert_short_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
(((
v-v1
)
==0.5
)
&&
(
v1%2==0
))
?
v1
:
v2
;
return
v1
;//
(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
}
int
round2_int
(
double
v
)
{
int
round2_int
(
F
v
)
{
int
v1
=
convert_int_sat
(
v
)
;
int
v2
=
convert_int_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
int
v1
=
convert_int_sat
(
round
(
v
)
)
;
//
int
v2
=
convert_int_sat
(
v+
(
v>=0
?
0.5
:
-0.5
))
;
return
(((
v-v1
)
==0.5
)
&&
(
v1%2==0
))
?
v1
:
v2
;
return
v1
;//
(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2;
}
///////////////////////////////////////////////////////////////////////////////////////
////////////////////////////divide///////////////////////////////////////////////////
...
...
@@ -83,7 +92,7 @@ int round2_int(double v){
__kernel
void
arithm_div_D0
(
__global
uchar
*src1,
int
src1_step,
int
src1_offset,
__global
uchar
*src2,
int
src2_step,
int
src2_offset,
__global
uchar
*dst,
int
dst_step,
int
dst_offset,
int
rows,
int
cols,
int
dst_step1,
double
scalar
)
int
rows,
int
cols,
int
dst_step1,
F
scalar
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
...
...
@@ -104,13 +113,13 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
uchar4
src2_data
=
vload4
(
0
,
src2
+
src2_index
)
;
uchar4
dst_data
=
*
((
__global
uchar4
*
)(
dst
+
dst_index
))
;
double4
tmp
=
convert_double
4
(
src1_data
)
*
scalar
;
F4
tmp
=
convert_F
4
(
src1_data
)
*
scalar
;
uchar4
tmp_data
;
tmp_data.x
=
((
tmp.x
==
0
)
|
| (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (
double
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (
double
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (
double
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (
double
)src2_data.w);
tmp_data.x
=
((
tmp.x
==
0
)
|
| (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (
F
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (
F
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (
F
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (
F
)src2_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
...
...
@@ -124,7 +133,7 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse
__kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *src2, int src2_step, int src2_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -145,13 +154,13 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
double4 tmp = convert_double
4(src1_data) * scalar;
F4 tmp = convert_F
4(src1_data) * scalar;
ushort4 tmp_data;
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_ushort(tmp.x / (
double
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_ushort(tmp.y / (
double
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_ushort(tmp.z / (
double
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_ushort(tmp.w / (
double
)src2_data.w);
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_ushort(tmp.x / (
F
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_ushort(tmp.y / (
F
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_ushort(tmp.z / (
F
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_ushort(tmp.w / (
F
)src2_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
...
...
@@ -164,7 +173,7 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs
__kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *src2, int src2_step, int src2_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -185,13 +194,13 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
double4 tmp = convert_double
4(src1_data) * scalar;
F4 tmp = convert_F
4(src1_data) * scalar;
short4 tmp_data;
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_short(tmp.x / (
double
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_short(tmp.y / (
double
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_short(tmp.z / (
double
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_short(tmp.w / (
double
)src2_data.w);
tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_short(tmp.x / (
F
)src2_data.x);
tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_short(tmp.y / (
F
)src2_data.y);
tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_short(tmp.z / (
F
)src2_data.z);
tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_short(tmp.w / (
F
)src2_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
...
...
@@ -206,7 +215,7 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse
__kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *src2, int src2_step, int src2_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -220,8 +229,8 @@ __kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
int data1 = *((__global int *)((__global char *)src1 + src1_index));
int data2 = *((__global int *)((__global char *)src2 + src2_index));
double tmp = convert_double
(data1) * scalar;
int tmp_data = (tmp == 0 || data2 == 0) ? 0 : round2_int(tmp / (convert_
double
)(data2));
F tmp = convert_F
(data1) * scalar;
int tmp_data = (tmp == 0 || data2 == 0) ? 0 : round2_int(tmp / (convert_
F
)(data2));
*((__global int *)((__global char *)dst + dst_index)) =tmp_data;
}
...
...
@@ -230,7 +239,7 @@ __kernel void arithm_div_D4 (__global int *src1, int src1_step, int src1_offset,
__kernel void arithm_div_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -244,13 +253,14 @@ __kernel void arithm_div_D5 (__global float *src1, int src1_step, int src1_offse
float data1 = *((__global float *)((__global char *)src1 + src1_index));
float data2 = *((__global float *)((__global char *)src2 + src2_index));
double tmp = convert_double
(data1) * scalar;
float tmp_data = (tmp == 0 || data2 == 0) ? 0 : convert_float(tmp / (convert_
double
)(data2));
F tmp = convert_F
(data1) * scalar;
float tmp_data = (tmp == 0 || data2 == 0) ? 0 : convert_float(tmp / (convert_
F
)(data2));
*((__global float *)((__global char *)dst + dst_index)) = tmp_data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_div_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *src2, int src2_step, int src2_offset,
__global double *dst, int dst_step, int dst_offset,
...
...
@@ -274,10 +284,11 @@ __kernel void arithm_div_D6 (__global double *src1, int src1_step, int src1_offs
*((__global double *)((__global char *)dst + dst_index)) = tmp_data;
}
}
#endif
/************************************div with scalar************************************/
__kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -297,10 +308,10 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
uchar4 dst_data = *((__global uchar4 *)(dst + dst_index));
uchar4 tmp_data;
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_uchar(scalar / (
double
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_uchar(scalar / (
double
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_uchar(scalar / (
double
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_uchar(scalar / (
double
)src_data.w);
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_uchar(scalar / (
F
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_uchar(scalar / (
F
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_uchar(scalar / (
F
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_uchar(scalar / (
F
)src_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y;
...
...
@@ -313,7 +324,7 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset
__kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -333,10 +344,10 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 tmp_data;
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_ushort(scalar / (
double
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_ushort(scalar / (
double
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_ushort(scalar / (
double
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_ushort(scalar / (
double
)src_data.w);
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_ushort(scalar / (
F
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_ushort(scalar / (
F
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_ushort(scalar / (
F
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_ushort(scalar / (
F
)src_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
...
...
@@ -348,7 +359,7 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse
}
__kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -368,10 +379,10 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
short4 tmp_data;
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_short(scalar / (
double
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_short(scalar / (
double
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_short(scalar / (
double
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_short(scalar / (
double
)src_data.w);
tmp_data.x = ((scalar == 0) || (src_data.x == 0)) ? 0 : round2_short(scalar / (
F
)src_data.x);
tmp_data.y = ((scalar == 0) || (src_data.y == 0)) ? 0 : round2_short(scalar / (
F
)src_data.y);
tmp_data.z = ((scalar == 0) || (src_data.z == 0)) ? 0 : round2_short(scalar / (
F
)src_data.z);
tmp_data.w = ((scalar == 0) || (src_data.w == 0)) ? 0 : round2_short(scalar / (
F
)src_data.w);
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
...
...
@@ -385,7 +396,7 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset
__kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -397,7 +408,7 @@ __kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
int data = *((__global int *)((__global char *)src + src_index));
int tmp_data = (scalar == 0 || data == 0) ? 0 : round2_int(scalar / (convert_
double
)(data));
int tmp_data = (scalar == 0 || data == 0) ? 0 : round2_int(scalar / (convert_
F
)(data));
*((__global int *)((__global char *)dst + dst_index)) =tmp_data;
}
...
...
@@ -405,7 +416,7 @@ __kernel void arithm_s_div_D4 (__global int *src, int src_step, int src_offset,
__kernel void arithm_s_div_D5 (__global float *src, int src_step, int src_offset,
__global float *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1,
double
scalar)
int rows, int cols, int dst_step1,
F
scalar)
{
int x = get_global_id(0);
int y = get_global_id(1);
...
...
@@ -417,12 +428,13 @@ __kernel void arithm_s_div_D5 (__global float *src, int src_step, int src_offset
float data = *((__global float *)((__global char *)src + src_index));
float tmp_data = (scalar == 0 || data == 0) ? 0 : convert_float(scalar / (convert_
double
)(data));
float tmp_data = (scalar == 0 || data == 0) ? 0 : convert_float(scalar / (convert_
F
)(data));
*((__global float *)((__global char *)dst + dst_index)) = tmp_data;
}
}
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offset,
__global double *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1, double scalar)
...
...
@@ -442,5 +454,6 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse
*
((
__global
double
*
)((
__global
char
*
)
dst
+
dst_index
))
=
tmp_data
;
}
}
#
endif
modules/ocl/src/kernels/arithm_exp.cl
浏览文件 @
687d7639
...
...
@@ -70,6 +70,8 @@ __kernel void arithm_exp_D5(int rows, int cols, int srcStep, int dstStep, int sr
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_exp_D6
(
int
rows,
int
cols,
int
srcStep,
int
dstStep,
int
srcOffset,
int
dstOffset,
__global
double
*src,
__global
double
*dst
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -87,3 +89,5 @@ __kernel void arithm_exp_D6(int rows, int cols, int srcStep, int dstStep, int sr
//
dst[dstIdx]
=
exp
(
src[srcIdx]
)
;
}
}
#
endif
modules/ocl/src/kernels/arithm_log.cl
浏览文件 @
687d7639
...
...
@@ -73,7 +73,7 @@ __kernel void arithm_log_D5(int rows, int cols, int srcStep, int dstStep, int sr
}
}
#
if
defined
(
DOUBLE_SUPPORT
)
__kernel
void
arithm_log_D6
(
int
rows,
int
cols,
int
srcStep,
int
dstStep,
int
srcOffset,
int
dstOffset,
__global
double
*src,
__global
double
*dst
)
{
int
x
=
get_global_id
(
0
)
;
...
...
@@ -91,4 +91,4 @@ __kernel void arithm_log_D6(int rows, int cols, int srcStep, int dstStep, int sr
}
}
#
endif
modules/ocl/src/kernels/convertC3C4.cl
浏览文件 @
687d7639
...
...
@@ -6,7 +6,7 @@
//
Third
party
copyrights
are
property
of
their
respective
owners.
//
//
@Authors
//
Zero
Lin,
zero.lin@amd
.com
//
Niko
Li,
newlife20080214@gmail
.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
...
...
@@ -32,106 +32,107 @@
//
the
use
of
this
software,
even
if
advised
of
the
possibility
of
such
damage.
//
//
__kernel
void
convertC3C4
_D0
(
__global
const
char4
*
restrict
src,
__global
char
4
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
__kernel
void
convertC3C4
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE
4
*dst,
int
cols,
int
rows,
int
dstStep_in_piexl,int
pixel_end
)
{
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
//int
pixel_end
=
mul24
(
cols
-1
,
rows
-1
)
;
int3
pixelid
=
(
int3
)(
mul24
(
id,3
)
,
mad24
(
id,3,1
)
,
mad24
(
id,3,2
))
;
pixelid
=
clamp
(
pixelid,0,pixel_end
)
;
GENTYPE4
pixel0,
pixel1,
pixel2,
outpix0,outpix1,outpix2,outpix3
;
pixel0
=
src[pixelid.x]
;
pixel1
=
src[pixelid.y]
;
pixel2
=
src[pixelid.z]
;
int
d
=
y
*
srcStep
+
x
*
3
;
char8
data
=
(
char8
)(
src[d>>2],
src[
(
d>>2
)
+
1]
)
;
char
temp[8]
=
{data.s0,
data.s1,
data.s2,
data.s3,
data.s4,
data.s5,
data.s6,
data.s7}
;
int
start
=
d
&
3
;
char4
ndata
=
(
char4
)(
temp[start],
temp[start
+
1],
temp[start
+
2],
0
)
;
if
(
y
<
rows
)
dst[y
*
dstStep
+
x]
=
ndata
;
}
__kernel
void
convertC3C4_D1
(
__global
const
short*
restrict
src,
__global
short4
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
{
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
outpix0
=
(
GENTYPE4
)(
pixel0.x,pixel0.y,pixel0.z,0
)
;
outpix1
=
(
GENTYPE4
)(
pixel0.w,pixel1.x,pixel1.y,0
)
;
outpix2
=
(
GENTYPE4
)(
pixel1.z,pixel1.w,pixel2.x,0
)
;
outpix3
=
(
GENTYPE4
)(
pixel2.y,pixel2.z,pixel2.w,0
)
;
int
d
=
(
y
*
srcStep
+
x
*
6
)
>>1
;
short4
data
=
*
(
__global
short4
*
)(
src
+
((
d>>1
)
<<1
))
;
short
temp[4]
=
{data.s0,
data.s1,
data.s2,
data.s3}
;
int
start
=
d
&
1
;
short4
ndata
=
(
short4
)(
temp[start],
temp[start
+
1],
temp[start
+
2],
0
)
;
if
(
y
<
rows
)
dst[y
*
dstStep
+
x]
=
ndata
;
int4
outy
=
(
id<<2
)
/cols
;
int4
outx
=
(
id<<2
)
%cols
;
outx.y++
;
outx.z+=2
;
outx.w+=3
;
outy
=
select
(
outy,outy+1,outx>=cols
)
;
outx
=
select
(
outx,outx-cols,outx>=cols
)
;
//outpix3
=
select
(
outpix3,
outpix0,
(
uchar4
)(
outy.w>=rows
))
;
//outpix2
=
select
(
outpix2,
outpix0,
(
uchar4
)(
outy.z>=rows
))
;
//outpix1
=
select
(
outpix1,
outpix0,
(
uchar4
)(
outy.y>=rows
))
;
//outx
=
select
(
outx,
(
int4
)
outx.x,outy>=rows
)
;
//outy
=
select
(
outy,
(
int4
)
outy.x,outy>=rows
)
;
int4
addr
=
mad24
(
outy,dstStep_in_piexl,outx
)
;
if
(
outx.w<cols
&&
outy.w<rows
)
{
dst[addr.x]
=
outpix0
;
dst[addr.y]
=
outpix1
;
dst[addr.z]
=
outpix2
;
dst[addr.w]
=
outpix3
;
}
else
if
(
outx.z<cols
&&
outy.z<rows
)
{
dst[addr.x]
=
outpix0
;
dst[addr.y]
=
outpix1
;
dst[addr.z]
=
outpix2
;
}
else
if
(
outx.y<cols
&&
outy.y<rows
)
{
dst[addr.x]
=
outpix0
;
dst[addr.y]
=
outpix1
;
}
else
if
(
outx.x<cols
&&
outy.x<rows
)
{
dst[addr.x]
=
outpix0
;
}
}
__kernel
void
convertC3C4_D2
(
__global
const
int
*
restrict
src,
__global
int4
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
{
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
int
d
=
(
y
*
srcStep
+
x
*
12
)
>>2
;
int4
data
=
*
(
__global
int4
*
)(
src
+
d
)
;
data.z
=
0
;
if
(
y
<
rows
)
dst[y
*
dstStep
+
x]
=
data
;
}
__kernel
void
convertC4C3_D2
(
__global
const
int4
*
restrict
src,
__global
int
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
{
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
int4
data
=
src[y
*
srcStep
+
x]
;
if
(
y
<
rows
)
{
int
d
=
y
*
dstStep
+
x
*
3
;
dst[d]
=
data.x
;
dst[d
+
1]
=
data.y
;
dst[d
+
2]
=
data.z
;
}
}
__kernel
void
convertC4C3
_D1
(
__global
const
short4
*
restrict
src,
__global
short
*dst,
int
cols,
int
rows,
int
srcStep
,
int
dstStep
)
__kernel
void
convertC4C3
(
__global
const
GENTYPE4
*
restrict
src,
__global
GENTYPE4
*dst,
int
cols,
int
rows,
int
srcStep
_in_pixel,int
pixel_end
)
{
int
id
=
get_global_id
(
0
)
;
int
id
=
get_global_id
(
0
)
<<2
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
int4
x4
=
(
int4
)(
x,x+1,x+2,x+3
)
;
int4
y4
=
select
((
int4
)
y,
(
int4
)(
y+1
)
,
x4>=
(
int4
)
cols
)
;
x4
=
select
(
x4,x4-
(
int4
)
cols,x4>=
(
int4
)
cols
)
;
int4
addr
=
mad24
(
y4,
(
int4
)
srcStep_in_pixel,x4
)
;
GENTYPE4
pixel0,pixel1,pixel2,pixel3,
outpixel1,
outpixel2
;
pixel0
=
src[addr.x]
;
pixel1
=
src[addr.y]
;
pixel2
=
src[addr.z]
;
pixel3
=
src[addr.w]
;
short4
data
=
src[y
*
srcStep
+
x]
;
if
(
y
<
rows
)
pixel0.w
=
pixel1.x
;
outpixel1.x
=
pixel1.y
;
outpixel1.y
=
pixel1.z
;
outpixel1.z
=
pixel2.x
;
outpixel1.w
=
pixel2.y
;
outpixel2.x
=
pixel2.z
;
outpixel2.y
=
pixel3.x
;
outpixel2.z
=
pixel3.y
;
outpixel2.w
=
pixel3.z
;
int4
outaddr
=
mul24
(
id>>2
,
3
)
;
outaddr.y++
;
outaddr.z+=2
;
//printf
(
"%d "
,
outaddr.z
)
;
if
(
outaddr.z
<=
pixel_end
)
{
int
d
=
y
*
dstStep
+
x
*
3
;
dst[d]
=
data.x
;
dst[d
+
1]
=
data.y
;
dst[d
+
2]
=
data.z
;
dst[outaddr.x]
=
pixel0
;
dst[outaddr.y]
=
outpixel1
;
dst[outaddr.z]
=
outpixel2
;
}
}
__kernel
void
convertC4C3_D0
(
__global
const
char4
*
restrict
src,
__global
char
*dst,
int
cols,
int
rows,
int
srcStep,
int
dstStep
)
{
int
id
=
get_global_id
(
0
)
;
int
y
=
id
/
cols
;
int
x
=
id
%
cols
;
char4
data
=
src[y
*
srcStep
+
x]
;
if
(
y
<
rows
)
else
if
(
outaddr.y
<=
pixel_end
)
{
int
d
=
y
*
dstStep
+
x
*
3
;
dst[d]
=
data.x
;
dst[d
+
1]
=
data.y
;
dst[d
+
2]
=
data.z
;
dst[outaddr.x]
=
pixel0
;
dst[outaddr.y]
=
outpixel1
;
}
else
if
(
outaddr.x
<=
pixel_end
)
{
dst[outaddr.x]
=
pixel0
;
}
}
modules/ocl/src/kernels/imgproc_resize.cl
浏览文件 @
687d7639
...
...
@@ -16,7 +16,7 @@
//
//
@Authors
//
Zhang
Ying,
zhangying913@gmail.com
//
//
Niko
Li,
newlife20080214@gmail.com
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
...
...
@@ -50,21 +50,11 @@
#
if
defined
DOUBLE_SUPPORT
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
typedef
double
F
;
#
define
F
double
#
else
typedef
float
F
;
#
define
F
float
#
endif
inline
uint4
getPoint_8uc4
(
__global
uchar4
*
data,
int
offset,
int
x,
int
y,
int
step
)
{
return
convert_uint4
(
data[
(
offset>>2
)
+
y
*
(
step>>2
)
+
x]
)
;
}
inline
float
getPoint_32fc1
(
__global
float
*
data,
int
offset,
int
x,
int
y,
int
step
)
{
return
data[
(
offset>>2
)
+
y
*
(
step>>2
)
+
x]
;
}
#
define
INTER_RESIZE_COEF_BITS
11
#
define
INTER_RESIZE_COEF_SCALE
(
1
<<
INTER_RESIZE_COEF_BITS
)
...
...
@@ -72,8 +62,8 @@ inline float getPoint_32fc1(__global float * data, int offset, int x, int y, int
#
define
CAST_SCALE
(
1.0f/
(
1<<CAST_BITS
))
#
define
INC
(
x,l
)
((
x+1
)
>=
(
l
)
?
(
x
)
:
((
x
)
+1
))
__kernel
void
resizeLN_C1_D0
(
__global
u
nsigned
char
*
dst,
__global
unsigned
char
const
*
restrict
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
__kernel
void
resizeLN_C1_D0
(
__global
u
char
*
dst,
__global
u
char
const
*
restrict
src,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
gx
=
get_global_id
(
0
)
;
...
...
@@ -81,7 +71,7 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
float4
sx,
u,
xf
;
int4
x,
DX
;
gx
=
(
gx<<2
)
-
(
dst
_offset
&3
)
;
gx
=
(
gx<<2
)
-
(
dst
offset_in_pixel
&3
)
;
DX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
sx
=
(
convert_float4
(
DX
)
+
0.5f
)
*
ifx
-
0.5f
;
xf
=
floor
(
sx
)
;
...
...
@@ -119,10 +109,10 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
int4
val1,
val2,
val
;
int4
sdata1,
sdata2,
sdata3,
sdata4
;
int4
pos1
=
src_offset
+
y
*
src_step
+
x
;
int4
pos2
=
src_offset
+
y
*
src_step
+
x_
;
int4
pos3
=
src_offset
+
y_
*
src_step
+
x
;
int4
pos4
=
src_offset
+
y_
*
src_step
+
x_
;
int4
pos1
=
mad24
(
y,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
int4
pos2
=
mad24
(
y,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
int4
pos3
=
mad24
(
y_,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
int4
pos4
=
mad24
(
y_,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
sdata1.s0
=
src[pos1.s0]
;
sdata1.s1
=
src[pos1.s1]
;
...
...
@@ -144,20 +134,44 @@ __kernel void resizeLN_C1_D0(__global unsigned char * dst, __global unsigned cha
sdata4.s2
=
src[pos4.s2]
;
sdata4.s3
=
src[pos4.s3]
;
val1
=
U1
*
sdata1
+
U
*
sdata2
;
val2
=
U1
*
sdata3
+
U
*
sdata4
;
val
=
V1
*
val1
+
V
*
val2
;
val1
=
mul24
(
U1
,
sdata1
)
+
mul24
(
U
,
sdata2
)
;
val2
=
mul24
(
U1
,
sdata3
)
+
mul24
(
U
,
sdata4
)
;
val
=
mul24
(
V1
,
val1
)
+
mul24
(
V
,
val2
)
;
__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dst_offset
+
dy
*
dst_step
+
gx
)
;
uchar4
dVal
=
*d
;
int4
con
=
(
DX
>=
0
&&
DX
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
//__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dstoffset_in_pixel
+
dy
*
dststep_in_pixel
+
gx
)
;
//
uchar4
dVal
=
*d
;
//
int4
con
=
(
DX
>=
0
&&
DX
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
val
=
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>
CAST_BITS
)
;
*d
=
convert_uchar4
(
con
!=
0
)
?
convert_uchar4_sat
(
val
)
:
dVal
;
//*d
=
convert_uchar4
(
con
!=
0
)
?
convert_uchar4_sat
(
val
)
:
dVal
;
pos4
=
mad24
(
dy,
dststep_in_pixel,
gx+dstoffset_in_pixel
)
;
pos4.y++
;
pos4.z+=2
;
uchar4
uval
=
convert_uchar4_sat
(
val
)
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos4.x
)
=uval
;
}
else
{
if
(
gx
>=
0
&&
gx
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos4.x]=uval.x
;
}
if
(
gx+1
>=
0
&&
gx+1
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos4.y]=uval.y
;
}
if
(
gx+2
>=
0
&&
gx+2
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos4.z]=uval.z
;
}
}
}
__kernel
void
resizeLN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -182,18 +196,25 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
uint4
val
=
U1*
V1
*
getPoint_8uc4
(
src,src_offset,x,y,src_step
)
+
U1*
V
*
getPoint_8uc4
(
src,src_offset,x,y_,src_step
)
+
U
*
V1
*
getPoint_8uc4
(
src,src_offset,x_,y,src_step
)
+
U
*
V
*
getPoint_8uc4
(
src,src_offset,x_,y_,src_step
)
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
int4
data0
=
convert_int4
(
src[srcpos.x]
)
;
int4
data1
=
convert_int4
(
src[srcpos.y]
)
;
int4
data2
=
convert_int4
(
src[srcpos.z]
)
;
int4
data3
=
convert_int4
(
src[srcpos.w]
)
;
int4
val
=
mul24
(
mul24
(
U1,
V1
)
,
data0
)
+
mul24
(
mul24
(
U,
V1
)
,
data1
)
+mul24
(
mul24
(
U1,
V
)
,
data2
)
+mul24
(
mul24
(
U,
V
)
,
data3
)
;
int
dstpos
=
mad24
(
dy,
dststep_in_pixel,
dx+dstoffset_in_pixel
)
;
uchar4
uval
=
convert_uchar4
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>CAST_BITS
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[
(
dst_offset>>2
)
+
dy
*
(
dst_step>>2
)
+
dx]
=
convert_uchar4
((
val
+
(
1<<
(
CAST_BITS-1
)))
>>CAST_BITS
)
;
dst[
dstpos]
=
uval
;
}
__kernel
void
resizeLN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -210,19 +231,29 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
float
val1
=
(
1.0f-u
)
*
getPoint_32fc1
(
src,src_offset,x,y,src_step
)
+
u
*
getPoint_32fc1
(
src,src_offset,x_,y,src_step
)
;
float
val2
=
(
1.0f-u
)
*
getPoint_32fc1
(
src,src_offset,x,y_,src_step
)
+
u
*
getPoint_32fc1
(
src,src_offset,x_,y_,src_step
)
;
float
val
=
(
1.0f-v
)
*
val1
+
v
*
val2
;
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
float
data0
=
src[srcpos.x]
;
float
data1
=
src[srcpos.y]
;
float
data2
=
src[srcpos.z]
;
float
data3
=
src[srcpos.w]
;
float
val1
=
u1
*
data0
+
u
*
data1
;
float
val2
=
u1
*
data2
+
u
*
data3
;
float
val
=
v1
*
val1
+
v
*
val2
;
int
dstpos
=
mad24
(
dy,
dststep_in_pixel,
dx+dstoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[
(
dst_offset>>2
)
+
dy
*
(
dst_step>>2
)
+
dx
]
=
val
;
dst[
dstpos
]
=
val
;
}
__kernel
void
resizeLN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
float
ifx,
float
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -239,31 +270,35 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
int
y_
=
INC
(
y,src_rows
)
;
int
x_
=
INC
(
x,src_cols
)
;
float
u1
=
1.f-u
;
float
v1
=
1.f-v
;
int4
srcpos
;
srcpos.x
=
mad24
(
y,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.y
=
mad24
(
y,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
srcpos.z
=
mad24
(
y_,
srcstep_in_pixel,
x+srcoffset_in_pixel
)
;
srcpos.w
=
mad24
(
y_,
srcstep_in_pixel,
x_+srcoffset_in_pixel
)
;
float4
s_data1,
s_data2,
s_data3,
s_data4
;
src_offset
=
(
src_offset
>>
4
)
;
src_step
=
(
src_step
>>
4
)
;
s_data1
=
src[src_offset
+
y*src_step
+
x]
;
s_data2
=
src[src_offset
+
y*src_step
+
x_]
;
s_data3
=
src[src_offset
+
y_*src_step
+
x]
;
s_data4
=
src[src_offset
+
y_*src_step
+
x_]
;
s_data1
=
(
1.0f-u
)
*
s_data1
+
u
*
s_data2
;
s_data2
=
(
1.0f-u
)
*
s_data3
+
u
*
s_data4
;
s_data3
=
(
1.0f-v
)
*
s_data1
+
v
*
s_data2
;
s_data1
=
src[srcpos.x]
;
s_data2
=
src[srcpos.y]
;
s_data3
=
src[srcpos.z]
;
s_data4
=
src[srcpos.w]
;
float4
val
=
u1
*
v1
*
s_data1
+
u
*
v1
*
s_data2
+u1
*
v
*s_data3
+
u
*
v
*s_data4
;
int
dstpos
=
mad24
(
dy,
dststep_in_pixel,
dx+dstoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[
(
dst_offset>>4
)
+
dy
*
(
dst_step>>4
)
+
dx]
=
s_data3
;
dst[
dstpos]
=
val
;
}
__kernel
void
resizeNN_C1_D0
(
__global
uchar
*
dst,
__global
uchar
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
gx
=
get_global_id
(
0
)
;
int
dy
=
get_global_id
(
1
)
;
gx
=
(
gx<<2
)
-
(
dst
_offset
&3
)
;
int4
GX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
gx
=
(
gx<<2
)
-
(
dst
offset_in_pixel
&3
)
;
//
int4
GX
=
(
int4
)(
gx,
gx+1,
gx+2,
gx+3
)
;
int4
sx
;
int
sy
;
...
...
@@ -279,22 +314,42 @@ __kernel void resizeNN_C1_D0(__global uchar * dst, __global uchar * src,
sy
=
min
((
int
)
floor
(
s5
)
,
src_rows-1
)
;
uchar4
val
;
int4
pos
=
src_offset
+
sy
*
src_step
+
sx
;
int4
pos
=
mad24
(
sy,
srcstep_in_pixel,
sx+srcoffset_in_pixel
)
;
val.s0
=
src[pos.s0]
;
val.s1
=
src[pos.s1]
;
val.s2
=
src[pos.s2]
;
val.s3
=
src[pos.s3]
;
__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dst_offset
+
dy
*
dst_step
+
gx
)
;
uchar4
dVal
=
*d
;
int4
con
=
(
GX
>=
0
&&
GX
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
val
=
convert_uchar4
(
con
!=
0
)
?
val
:
dVal
;
*d
=
val
;
//__global
uchar4*
d
=
(
__global
uchar4*
)(
dst
+
dstoffset_in_pixel
+
dy
*
dststep_in_pixel
+
gx
)
;
//uchar4
dVal
=
*d
;
pos
=
mad24
(
dy,
dststep_in_pixel,
gx+dstoffset_in_pixel
)
;
pos.y++
;
pos.z+=2
;
int
con
=
(
gx
>=
0
&&
gx+3
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
;
if
(
con
)
{
*
(
__global
uchar4*
)(
dst
+
pos.x
)
=val
;
}
else
{
if
(
gx
>=
0
&&
gx
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.x]=val.x
;
}
if
(
gx+1
>=
0
&&
gx+1
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.y]=val.y
;
}
if
(
gx+2
>=
0
&&
gx+2
<
dst_cols
&&
dy
>=
0
&&
dy
<
dst_rows
)
{
dst[pos.z]=val.z
;
}
}
}
__kernel
void
resizeNN_C4_D0
(
__global
uchar4
*
dst,
__global
uchar4
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -304,8 +359,8 @@ __kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
dpos
=
(
dst_offset>>2
)
+
dy
*
(
dst_step>>2
)
+
dx
;
int
spos
=
(
src_offset>>2
)
+
sy
*
(
src_step>>2
)
+
sx
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
...
...
@@ -313,7 +368,7 @@ __kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
}
__kernel
void
resizeNN_C1_D5
(
__global
float
*
dst,
__global
float
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -323,16 +378,16 @@ __kernel void resizeNN_C1_D5(__global float * dst, __global float * src,
F
s2
=
dy*ify
;
int
sx
=
fmin
((
float
)
floor
(
s1
)
,
(
float
)
src_cols-1
)
;
int
sy
=
fmin
((
float
)
floor
(
s2
)
,
(
float
)
src_rows-1
)
;
int
dpos
=
(
dst_offset>>2
)
+
dy
*
(
dst_step>>2
)
+
dx
;
int
spos
=
(
src_offset>>2
)
+
sy
*
(
src_step>>2
)
+
sx
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
}
__kernel
void
resizeNN_C4_D5
(
__global
float4
*
dst,
__global
float4
*
src,
int
dst
_offset,
int
src_offset,int
dst_step,
int
src_step
,
int
dst
offset_in_pixel,
int
srcoffset_in_pixel,int
dststep_in_pixel,
int
srcstep_in_pixel
,
int
src_cols,
int
src_rows,
int
dst_cols,
int
dst_rows,
F
ifx,
F
ify
)
{
int
dx
=
get_global_id
(
0
)
;
...
...
@@ -343,8 +398,8 @@ __kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src,
int
s_row
=
floor
(
s2
)
;
int
sx
=
min
(
s_col,
src_cols-1
)
;
int
sy
=
min
(
s_row,
src_rows-1
)
;
int
dpos
=
(
dst_offset>>4
)
+
dy
*
(
dst_step>>4
)
+
dx
;
int
spos
=
(
src_offset>>4
)
+
sy
*
(
src_step>>4
)
+
sx
;
int
dpos
=
mad24
(
dy,
dststep_in_pixel,
dx
+
dstoffset_in_pixel
)
;
int
spos
=
mad24
(
sy,
srcstep_in_pixel,
sx
+
srcoffset_in_pixel
)
;
if
(
dx>=0
&&
dx<dst_cols
&&
dy>=0
&&
dy<dst_rows
)
dst[dpos]
=
src[spos]
;
...
...
modules/ocl/src/kernels/operator_setTo.cl
浏览文件 @
687d7639
...
...
@@ -34,13 +34,8 @@
//
//
/*
#
if
defined
(
DOUBLE_SUPPORT
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
*/
__kernel
void
set_to_without_mask_C1_D0
(
float4
scalar,__global
uchar
*
dstMat,
__kernel
void
set_to_without_mask_C1_D0
(
uchar
scalar,__global
uchar
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
<<2
;
...
...
@@ -49,7 +44,8 @@ __kernel void set_to_without_mask_C1_D0(float4 scalar,__global uchar * dstMat,
int
addr_end
=
mad24
(
y,dstStep_in_pixel,cols+offset_in_pixel
)
;
int
idx
=
mad24
(
y,dstStep_in_pixel,
(
int
)(
x+
offset_in_pixel
&
(
int
)
0xfffffffc
))
;
uchar4
out
;
out.x
=
out.y
=
out.z
=
out.w
=
convert_uchar_sat
(
scalar.x
)
;
out.x
=
out.y
=
out.z
=
out.w
=
scalar
;
if
(
(
idx>=addr_start
)
&
(
idx+3
<
addr_end
)
&
(
y
<
rows
))
{
*
(
__global
uchar4*
)(
dstMat+idx
)
=
out
;
...
...
@@ -65,7 +61,7 @@ __kernel void set_to_without_mask_C1_D0(float4 scalar,__global uchar * dstMat,
}
}
__kernel
void
set_to_without_mask
_C4_D0
(
float4
scalar,__global
uchar4
*
dstMat,
__kernel
void
set_to_without_mask
(
GENTYPE
scalar,__global
GENTYPE
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
...
...
@@ -73,52 +69,6 @@ __kernel void set_to_without_mask_C4_D0(float4 scalar,__global uchar4 * dstMat,
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,dstStep_in_pixel,x+
offset_in_pixel
)
;
dstMat[idx]
=
convert_uchar4_sat
(
scalar
)
;
dstMat[idx]
=
scalar
;
}
}
__kernel
void
set_to_without_mask_C1_D4
(
float4
scalar,__global
int
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,
dstStep_in_pixel,
x+offset_in_pixel
)
;
dstMat[idx]
=
convert_int_sat
(
scalar.x
)
;
}
}
__kernel
void
set_to_without_mask_C4_D4
(
float4
scalar,__global
int4
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,dstStep_in_pixel,x+
offset_in_pixel
)
;
dstMat[idx]
=
convert_int4_sat
(
scalar
)
;
}
}
__kernel
void
set_to_without_mask_C1_D5
(
float4
scalar,__global
float
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,dstStep_in_pixel,x+
offset_in_pixel
)
;
dstMat[idx]
=
scalar.x
;
}
}
__kernel
void
set_to_without_mask_C4_D5
(
float4
scalar,__global
float4
*
dstMat,
int
cols,int
rows,int
dstStep_in_pixel,int
offset_in_pixel
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
if
(
(
x
<
cols
)
&
(
y
<
rows
))
{
int
idx
=
mad24
(
y,dstStep_in_pixel,x+
offset_in_pixel
)
;
dstMat[idx]
=
scalar
;
}
}
modules/ocl/src/kernels/operator_setToM.cl
浏览文件 @
687d7639
...
...
@@ -35,12 +35,6 @@
//
/*#if
defined
(
__ATI__
)
#
pragma
OPENCL
EXTENSION
cl_amd_fp64:enable
#
elif
defined
(
__NVIDIA__
)
#
pragma
OPENCL
EXTENSION
cl_khr_fp64:enable
#
endif
*/
/*
__kernel
void
set_to_with_mask_C1_D0
(
float4
scalar,
...
...
@@ -67,7 +61,7 @@ __kernel void set_to_with_mask_C1_D0(
*/
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
__kernel
void
set_to_with_mask_C1_D0
(
float4
scalar,
uchar
scalar,
__global
uchar*
dstMat,
int
cols,
int
rows,
...
...
@@ -85,7 +79,7 @@ __kernel void set_to_with_mask_C1_D0(
int
mask_addr_start
=
mad24
(
y,maskStep,maskoffset
)
;
int
mask_addr_end
=
mad24
(
y,maskStep,cols+maskoffset
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
&
(
int
)
0xfffffffc
)
;
uchar
out
=
convert_uchar_sat
(
scalar.x
)
;
int
off_mask
=
(
maskoffset
&
3
)
-
(
dstoffset_in_pixel
&
3
)
+3
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
)
...
...
@@ -107,104 +101,16 @@ __kernel void set_to_with_mask_C1_D0(
temp_mask2.z
=
(
maskidx+6
>=mask_addr_start
)
&
(
maskidx+6
<
mask_addr_end
)
?
temp_mask2.z
:
0
;
temp_mask2.w
=
(
maskidx+7
>=mask_addr_start
)
&
(
maskidx+7
<
mask_addr_end
)
?
temp_mask2.w
:
0
;
uchar
trans_mask[10]
=
{temp_mask1.y,temp_mask1.z,temp_mask1.w,temp_mask.x,temp_mask.y,temp_mask.z,temp_mask.w,temp_mask2.x,temp_mask2.y,temp_mask2.z}
;
temp_dst.x
=
(
dstidx>=dst_addr_start
)
&
(
dstidx<dst_addr_end
)
&
trans_mask[off_mask]
?
out
:
temp_dst.x
;
temp_dst.y
=
(
dstidx+1>=dst_addr_start
)
&
(
dstidx+1<dst_addr_end
)
&
trans_mask[off_mask+1]
?
out
:
temp_dst.y
;
temp_dst.z
=
(
dstidx+2>=dst_addr_start
)
&
(
dstidx+2<dst_addr_end
)
&
trans_mask[off_mask+2]
?
out
:
temp_dst.z
;
temp_dst.w
=
(
dstidx+3>=dst_addr_start
)
&
(
dstidx+3<dst_addr_end
)
&
trans_mask[off_mask+3]
?
out
:
temp_dst.w
;
temp_dst.x
=
(
dstidx>=dst_addr_start
)
&
(
dstidx<dst_addr_end
)
&
trans_mask[off_mask]
?
scalar
:
temp_dst.x
;
temp_dst.y
=
(
dstidx+1>=dst_addr_start
)
&
(
dstidx+1<dst_addr_end
)
&
trans_mask[off_mask+1]
?
scalar
:
temp_dst.y
;
temp_dst.z
=
(
dstidx+2>=dst_addr_start
)
&
(
dstidx+2<dst_addr_end
)
&
trans_mask[off_mask+2]
?
scalar
:
temp_dst.z
;
temp_dst.w
=
(
dstidx+3>=dst_addr_start
)
&
(
dstidx+3<dst_addr_end
)
&
trans_mask[off_mask+3]
?
scalar
:
temp_dst.w
;
*
(
__global
uchar4*
)(
dstMat+dstidx
)
=
temp_dst
;
}
}
__kernel
void
set_to_with_mask_C4_D0
(
float4
scalar,
__global
uchar4
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
int
dstoffset_in_pixel,
__global
const
uchar
*
restrict
maskMat,
int
maskStep,
int
maskoffset
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
int
dstidx
=
mad24
(
y,dstStep_in_pixel,x+
dstoffset_in_pixel
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
)
;
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
convert_uchar4_sat
(
scalar
)
;
}
}
__kernel
void
set_to_with_mask_C1_D4
(
float4
scalar,
__global
int
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
int
dstoffset_in_pixel,
__global
const
uchar
*
restrict
maskMat,
int
maskStep,
int
maskoffset
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
int
dstidx
=
mad24
(
y,dstStep_in_pixel,x+
dstoffset_in_pixel
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
)
;
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
convert_int_sat
(
scalar.x
)
;
}
}
__kernel
void
set_to_with_mask_C4_D4
(
float4
scalar,
__global
int4
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
int
dstoffset_in_pixel,
__global
const
uchar
*
restrict
maskMat,
int
maskStep,
int
maskoffset
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
int
dstidx
=
mad24
(
y,dstStep_in_pixel,x+
dstoffset_in_pixel
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
)
;
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
convert_int4_sat
(
scalar
)
;
}
}
__kernel
void
set_to_with_mask_C1_D5
(
float4
scalar,
__global
float
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
int
dstoffset_in_pixel,
__global
const
uchar
*
restrict
maskMat,
int
maskStep,
int
maskoffset
)
{
int
x=get_global_id
(
0
)
;
int
y=get_global_id
(
1
)
;
int
dstidx
=
mad24
(
y,dstStep_in_pixel,x+
dstoffset_in_pixel
)
;
int
maskidx
=
mad24
(
y,maskStep,x+
maskoffset
)
;
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
scalar.x
;
}
}
__kernel
void
set_to_with_mask_C4_D5
(
float4
scalar,
__global
float4
*
dstMat,
__kernel
void
set_to_with_mask
(
GENTYPE
scalar,
__global
GENTYPE
*
dstMat,
int
cols,
int
rows,
int
dstStep_in_pixel,
...
...
@@ -220,7 +126,7 @@ __kernel void set_to_with_mask_C4_D5(
uchar
mask
=
maskMat[maskidx]
;
if
(
(
x
<
cols
)
&
(
y
<
rows
)
&
mask
)
{
dstMat[dstidx]
=
scalar
;
dstMat[dstidx]
=
scalar
;
}
}
...
...
modules/ocl/src/matrix_operations.cpp
浏览文件 @
687d7639
此差异已折叠。
点击以展开。
modules/ocl/src/precomp.hpp
浏览文件 @
687d7639
...
...
@@ -97,7 +97,7 @@ namespace cv
size_t
widthInBytes
,
size_t
height
);
void
openCLMemcpy2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
const
void
*
src
,
size_t
spitch
,
size_t
width
,
size_t
height
,
enum
openCLMemcpyKind
kind
);
size_t
width
,
size_t
height
,
enum
openCLMemcpyKind
kind
,
int
channels
=-
1
);
void
openCLCopyBuffer2D
(
Context
*
clCxt
,
void
*
dst
,
size_t
dpitch
,
int
dst_offset
,
const
void
*
src
,
size_t
spitch
,
size_t
width
,
size_t
height
,
int
src_offset
,
enum
openCLMemcpyKind
kind
);
...
...
@@ -126,8 +126,8 @@ namespace cv
cl_mem
openCLMalloc
(
cl_context
clCxt
,
size_t
size
,
cl_mem_flags
flags
,
void
*
host_ptr
);
void
openCLMemcpy2DWithNoPadding
(
cl_command_queue
command_queue
,
cl_mem
buffer
,
size_t
size
,
size_t
offset
,
void
*
ptr
,
enum
openCLMemcpyKind
kind
,
cl_bool
blocking_write
);
//
void openCLMemcpy2DWithNoPadding(cl_command_queue command_queue, cl_mem buffer, size_t size, size_t offset, void *ptr,
//
enum openCLMemcpyKind kind, cl_bool blocking_write);
int
savetofile
(
const
Context
*
clcxt
,
cl_program
&
program
,
const
char
*
fileName
);
struct
Context
::
Impl
{
...
...
modules/ocl/test/test_imgproc.cpp
浏览文件 @
687d7639
...
...
@@ -958,7 +958,7 @@ TEST_P(Remap, Mat)
if
((
interpolation
==
1
&&
map1Type
==
CV_16SC2
)
||
(
interpolation
==
1
&&
map1Type
==
CV_16SC1
&&
map2Type
==
CV_16SC1
))
{
cout
<<
"LINEAR don't support the map1Type and map2Type"
<<
endl
;
return
;
return
;
}
int
bordertype
[]
=
{
cv
::
BORDER_CONSTANT
,
cv
::
BORDER_REPLICATE
/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/
};
const
char
*
borderstr
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/
};
...
...
modules/ocl/test/test_matrix_operation.cpp
浏览文件 @
687d7639
...
...
@@ -396,6 +396,101 @@ TEST_P(SetTo, With_mask)
}
}
//convertC3C4
PARAM_TEST_CASE
(
convertC3C4
,
MatType
,
cv
::
Size
)
{
int
type
;
cv
::
Size
ksize
;
//src mat
cv
::
Mat
mat1
;
cv
::
Mat
dst
;
// set up roi
int
roicols
;
int
roirows
;
int
src1x
;
int
src1y
;
int
dstx
;
int
dsty
;
//src mat with roi
cv
::
Mat
mat1_roi
;
cv
::
Mat
dst_roi
;
std
::
vector
<
cv
::
ocl
::
Info
>
oclinfo
;
//ocl dst mat for testing
cv
::
ocl
::
oclMat
gdst_whole
;
//ocl mat with roi
cv
::
ocl
::
oclMat
gmat1
;
cv
::
ocl
::
oclMat
gdst
;
virtual
void
SetUp
()
{
type
=
GET_PARAM
(
0
);
ksize
=
GET_PARAM
(
1
);
//dst = randomMat(rng, size, type, 5, 16, false);
int
devnums
=
getDevice
(
oclinfo
);
CV_Assert
(
devnums
>
0
);
//if you want to use undefault device, set it here
//setDevice(oclinfo[1]);
}
void
random_roi
()
{
#ifdef RANDOMROI
//randomize ROI
cv
::
RNG
&
rng
=
TS
::
ptr
()
->
get_rng
();
roicols
=
rng
.
uniform
(
2
,
mat1
.
cols
);
roirows
=
rng
.
uniform
(
2
,
mat1
.
rows
);
src1x
=
rng
.
uniform
(
0
,
mat1
.
cols
-
roicols
);
src1y
=
rng
.
uniform
(
0
,
mat1
.
rows
-
roirows
);
dstx
=
rng
.
uniform
(
0
,
dst
.
cols
-
roicols
);
dsty
=
rng
.
uniform
(
0
,
dst
.
rows
-
roirows
);
#else
roicols
=
mat1
.
cols
;
roirows
=
mat1
.
rows
;
src1x
=
0
;
src1y
=
0
;
dstx
=
0
;
dsty
=
0
;
#endif
mat1_roi
=
mat1
(
Rect
(
src1x
,
src1y
,
roicols
,
roirows
));
dst_roi
=
dst
(
Rect
(
dstx
,
dsty
,
roicols
,
roirows
));
gdst_whole
=
dst
;
gdst
=
gdst_whole
(
Rect
(
dstx
,
dsty
,
roicols
,
roirows
));
gmat1
=
mat1_roi
;
}
};
TEST_P
(
convertC3C4
,
Accuracy
)
{
cv
::
RNG
&
rng
=
TS
::
ptr
()
->
get_rng
();
for
(
int
j
=
0
;
j
<
LOOP_TIMES
;
j
++
)
{
//random_roi();
int
width
=
rng
.
uniform
(
2
,
MWIDTH
);
int
height
=
rng
.
uniform
(
2
,
MHEIGHT
);
cv
::
Size
size
(
width
,
height
);
mat1
=
randomMat
(
rng
,
size
,
type
,
0
,
40
,
false
);
gmat1
=
mat1
;
cv
::
Mat
cpu_dst
;
gmat1
.
download
(
cpu_dst
);
char
sss
[
1024
];
sprintf
(
sss
,
"cols=%d,rows=%d"
,
mat1
.
cols
,
mat1
.
rows
);
EXPECT_MAT_NEAR
(
mat1
,
cpu_dst
,
0.0
,
sss
);
}
}
INSTANTIATE_TEST_CASE_P
(
MatrixOperation
,
ConvertTo
,
Combine
(
Values
(
CV_8UC1
,
CV_8UC4
,
CV_32SC1
,
CV_32SC4
,
CV_32FC1
,
CV_32FC4
),
...
...
@@ -408,5 +503,8 @@ INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine(
INSTANTIATE_TEST_CASE_P
(
MatrixOperation
,
SetTo
,
Combine
(
Values
(
CV_8UC1
,
CV_8UC4
,
CV_32SC1
,
CV_32SC4
,
CV_32FC1
,
CV_32FC4
),
Values
(
false
)));
// Values(false) is the reserved parameter
INSTANTIATE_TEST_CASE_P
(
MatrixOperation
,
convertC3C4
,
Combine
(
Values
(
CV_8UC3
,
CV_32SC3
,
CV_32FC3
),
Values
(
cv
::
Size
())));
#endif
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录