Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
2e28ff78
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
2e28ff78
编写于
2月 12, 2019
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #13780 from alalek:core_dispatch_convertTo
上级
5f578425
d32d576d
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
568 addition
and
384 deletion
+568
-384
modules/core/CMakeLists.txt
modules/core/CMakeLists.txt
+2
-0
modules/core/src/convert.dispatch.cpp
modules/core/src/convert.dispatch.cpp
+272
-0
modules/core/src/convert.simd.hpp
modules/core/src/convert.simd.hpp
+24
-146
modules/core/src/convert_scale.dispatch.cpp
modules/core/src/convert_scale.dispatch.cpp
+259
-0
modules/core/src/convert_scale.simd.hpp
modules/core/src/convert_scale.simd.hpp
+11
-238
未找到文件。
modules/core/CMakeLists.txt
浏览文件 @
2e28ff78
...
...
@@ -3,6 +3,8 @@ set(the_description "The Core Functionality")
ocv_add_dispatched_file
(
mathfuncs_core SSE2 AVX AVX2
)
ocv_add_dispatched_file
(
stat SSE4_2 AVX2
)
ocv_add_dispatched_file
(
arithm SSE2 SSE4_1 AVX2 VSX3
)
ocv_add_dispatched_file
(
convert SSE2 AVX2
)
ocv_add_dispatched_file
(
convert_scale SSE2 AVX2
)
# dispatching for accuracy tests
ocv_add_dispatched_file_force_all
(
test_intrin128 TEST SSE2 SSE3 SSSE3 SSE4_1 SSE4_2 AVX FP16 AVX2
)
...
...
modules/core/src/convert.dispatch.cpp
0 → 100644
浏览文件 @
2e28ff78
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "convert.simd.hpp"
#include "convert.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
/* [TODO] Recover IPP calls
#if defined(HAVE_IPP)
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height)) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
CV_IPP_RUN(src && dst, CV_INSTRUMENT_FUN_IPP(ippiConvert_##ippFavor, src, (int)sstep, dst, (int)dstep, ippiSize(size.width, size.height), ippRndFinancial, 0) >= 0) \
cvt_(src, sstep, dst, dstep, size); \
}
#else
#define DEF_CVT_FUNC_F(suffix, stype, dtype, ippFavor) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CVT_FUNC_F2 DEF_CVT_FUNC_F
#endif
#define DEF_CVT_FUNC(suffix, stype, dtype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
dtype* dst, size_t dstep, Size size, double*) \
{ \
cvt_(src, sstep, dst, dstep, size); \
}
#define DEF_CPY_FUNC(suffix, stype) \
static void cvt##suffix( const stype* src, size_t sstep, const uchar*, size_t, \
stype* dst, size_t dstep, Size size, double*) \
{ \
cpy_(src, sstep, dst, dstep, size); \
}
DEF_CPY_FUNC(8u, uchar)
DEF_CVT_FUNC_F(8s8u, schar, uchar, 8s8u_C1Rs)
DEF_CVT_FUNC_F(16u8u, ushort, uchar, 16u8u_C1R)
DEF_CVT_FUNC_F(16s8u, short, uchar, 16s8u_C1R)
DEF_CVT_FUNC_F(32s8u, int, uchar, 32s8u_C1R)
DEF_CVT_FUNC_F2(32f8u, float, uchar, 32f8u_C1RSfs)
DEF_CVT_FUNC(64f8u, double, uchar)
DEF_CVT_FUNC_F2(8u8s, uchar, schar, 8u8s_C1RSfs)
DEF_CVT_FUNC_F2(16u8s, ushort, schar, 16u8s_C1RSfs)
DEF_CVT_FUNC_F2(16s8s, short, schar, 16s8s_C1RSfs)
DEF_CVT_FUNC_F(32s8s, int, schar, 32s8s_C1R)
DEF_CVT_FUNC_F2(32f8s, float, schar, 32f8s_C1RSfs)
DEF_CVT_FUNC(64f8s, double, schar)
DEF_CVT_FUNC_F(8u16u, uchar, ushort, 8u16u_C1R)
DEF_CVT_FUNC_F(8s16u, schar, ushort, 8s16u_C1Rs)
DEF_CPY_FUNC(16u, ushort)
DEF_CVT_FUNC_F(16s16u, short, ushort, 16s16u_C1Rs)
DEF_CVT_FUNC_F2(32s16u, int, ushort, 32s16u_C1RSfs)
DEF_CVT_FUNC_F2(32f16u, float, ushort, 32f16u_C1RSfs)
DEF_CVT_FUNC(64f16u, double, ushort)
DEF_CVT_FUNC_F(8u16s, uchar, short, 8u16s_C1R)
DEF_CVT_FUNC_F(8s16s, schar, short, 8s16s_C1R)
DEF_CVT_FUNC_F2(16u16s, ushort, short, 16u16s_C1RSfs)
DEF_CVT_FUNC_F2(32s16s, int, short, 32s16s_C1RSfs)
DEF_CVT_FUNC(32f16s, float, short)
DEF_CVT_FUNC(64f16s, double, short)
DEF_CVT_FUNC_F(8u32s, uchar, int, 8u32s_C1R)
DEF_CVT_FUNC_F(8s32s, schar, int, 8s32s_C1R)
DEF_CVT_FUNC_F(16u32s, ushort, int, 16u32s_C1R)
DEF_CVT_FUNC_F(16s32s, short, int, 16s32s_C1R)
DEF_CPY_FUNC(32s, int)
DEF_CVT_FUNC_F2(32f32s, float, int, 32f32s_C1RSfs)
DEF_CVT_FUNC(64f32s, double, int)
DEF_CVT_FUNC_F(8u32f, uchar, float, 8u32f_C1R)
DEF_CVT_FUNC_F(8s32f, schar, float, 8s32f_C1R)
DEF_CVT_FUNC_F(16u32f, ushort, float, 16u32f_C1R)
DEF_CVT_FUNC_F(16s32f, short, float, 16s32f_C1R)
DEF_CVT_FUNC_F(32s32f, int, float, 32s32f_C1R)
DEF_CVT_FUNC(64f32f, double, float)
DEF_CVT_FUNC(8u64f, uchar, double)
DEF_CVT_FUNC(8s64f, schar, double)
DEF_CVT_FUNC(16u64f, ushort, double)
DEF_CVT_FUNC(16s64f, short, double)
DEF_CVT_FUNC(32s64f, int, double)
DEF_CVT_FUNC(32f64f, float, double)
DEF_CPY_FUNC(64s, int64)
*/
BinaryFunc
getConvertFunc
(
int
sdepth
,
int
ddepth
)
{
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
getConvertFunc
,
(
sdepth
,
ddepth
),
CV_CPU_DISPATCH_MODES_ALL
);
}
static
BinaryFunc
get_cvt32f16f
()
{
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
get_cvt32f16f
,
(),
CV_CPU_DISPATCH_MODES_ALL
);
}
static
BinaryFunc
get_cvt16f32f
()
{
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
get_cvt16f32f
,
(),
CV_CPU_DISPATCH_MODES_ALL
);
}
#ifdef HAVE_OPENCL
static
bool
ocl_convertFp16
(
InputArray
_src
,
OutputArray
_dst
,
int
sdepth
,
int
ddepth
)
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
_dst
.
createSameSize
(
_src
,
CV_MAKETYPE
(
ddepth
,
cn
)
);
int
kercn
=
1
;
int
rowsPerWI
=
1
;
String
build_opt
=
format
(
"-D HALF_SUPPORT -D srcT=%s -D dstT=%s -D rowsPerWI=%d%s"
,
sdepth
==
CV_32F
?
"float"
:
"half"
,
sdepth
==
CV_32F
?
"half"
:
"float"
,
rowsPerWI
,
sdepth
==
CV_32F
?
" -D FLOAT_TO_HALF "
:
""
);
ocl
::
Kernel
k
(
"convertFp16"
,
ocl
::
core
::
halfconvert_oclsrc
,
build_opt
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dstarg
=
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
,
kercn
);
k
.
args
(
srcarg
,
dstarg
);
size_t
globalsize
[
2
]
=
{
(
size_t
)
src
.
cols
*
cn
/
kercn
,
((
size_t
)
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
#endif
void
Mat
::
convertTo
(
OutputArray
_dst
,
int
_type
,
double
alpha
,
double
beta
)
const
{
CV_INSTRUMENT_REGION
();
if
(
empty
()
)
{
_dst
.
release
();
return
;
}
bool
noScale
=
fabs
(
alpha
-
1
)
<
DBL_EPSILON
&&
fabs
(
beta
)
<
DBL_EPSILON
;
if
(
_type
<
0
)
_type
=
_dst
.
fixedType
()
?
_dst
.
type
()
:
type
();
else
_type
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
_type
),
channels
());
int
sdepth
=
depth
(),
ddepth
=
CV_MAT_DEPTH
(
_type
);
if
(
sdepth
==
ddepth
&&
noScale
)
{
copyTo
(
_dst
);
return
;
}
Mat
src
=
*
this
;
if
(
dims
<=
2
)
_dst
.
create
(
size
(),
_type
);
else
_dst
.
create
(
dims
,
size
,
_type
);
Mat
dst
=
_dst
.
getMat
();
BinaryFunc
func
=
noScale
?
getConvertFunc
(
sdepth
,
ddepth
)
:
getConvertScaleFunc
(
sdepth
,
ddepth
);
double
scale
[]
=
{
alpha
,
beta
};
int
cn
=
channels
();
CV_Assert
(
func
!=
0
);
if
(
dims
<=
2
)
{
Size
sz
=
getContinuousSize2D
(
src
,
dst
,
cn
);
func
(
src
.
data
,
src
.
step
,
0
,
0
,
dst
.
data
,
dst
.
step
,
sz
,
scale
);
}
else
{
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
Size
sz
((
int
)(
it
.
size
*
cn
),
1
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
1
,
0
,
0
,
ptrs
[
1
],
1
,
sz
,
scale
);
}
}
//==================================================================================================
void
convertFp16
(
InputArray
_src
,
OutputArray
_dst
)
{
CV_INSTRUMENT_REGION
();
int
sdepth
=
_src
.
depth
(),
ddepth
=
0
;
BinaryFunc
func
=
0
;
switch
(
sdepth
)
{
case
CV_32F
:
if
(
_dst
.
fixedType
())
{
ddepth
=
_dst
.
depth
();
CV_Assert
(
ddepth
==
CV_16S
/*|| ddepth == CV_16F*/
);
CV_Assert
(
_dst
.
channels
()
==
_src
.
channels
());
}
else
ddepth
=
CV_16S
;
func
=
(
BinaryFunc
)
get_cvt32f16f
();
break
;
case
CV_16S
:
//case CV_16F:
ddepth
=
CV_32F
;
func
=
(
BinaryFunc
)
get_cvt16f32f
();
break
;
default:
CV_Error
(
Error
::
StsUnsupportedFormat
,
"Unsupported input depth"
);
return
;
}
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
_dst
.
isUMat
(),
ocl_convertFp16
(
_src
,
_dst
,
sdepth
,
ddepth
))
Mat
src
=
_src
.
getMat
();
int
type
=
CV_MAKETYPE
(
ddepth
,
src
.
channels
());
_dst
.
create
(
src
.
dims
,
src
.
size
,
type
);
Mat
dst
=
_dst
.
getMat
();
int
cn
=
src
.
channels
();
CV_Assert
(
func
!=
0
);
if
(
src
.
dims
<=
2
)
{
Size
sz
=
getContinuousSize2D
(
src
,
dst
,
cn
);
func
(
src
.
data
,
src
.
step
,
0
,
0
,
dst
.
data
,
dst
.
step
,
sz
,
0
);
}
else
{
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
Size
sz
((
int
)(
it
.
size
*
cn
),
1
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
0
,
0
,
0
,
ptrs
[
1
],
0
,
sz
,
0
);
}
}
}
// namespace cv
modules/core/src/convert.
c
pp
→
modules/core/src/convert.
simd.h
pp
浏览文件 @
2e28ff78
...
...
@@ -3,10 +3,16 @@
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "convert.hpp"
namespace
cv
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
BinaryFunc
getConvertFunc
(
int
sdepth
,
int
ddepth
);
BinaryFunc
get_cvt32f16f
();
BinaryFunc
get_cvt16f32f
();
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
/*namespace hal {
...
...
@@ -66,7 +72,7 @@ void addRNGBias64f( double* arr, const double* scaleBiasPairs, int len )
}*/
template
<
typename
_Ts
,
typename
_Td
,
typename
_Twvec
>
inline
void
template
<
typename
_Ts
,
typename
_Td
,
typename
_Twvec
>
static
inline
void
cvt_
(
const
_Ts
*
src
,
size_t
sstep
,
_Td
*
dst
,
size_t
dstep
,
Size
size
)
{
sstep
/=
sizeof
(
src
[
0
]);
...
...
@@ -97,7 +103,7 @@ cvt_( const _Ts* src, size_t sstep, _Td* dst, size_t dstep, Size size )
// in order to reduce the code size, for (16f <-> ...) conversions
// we add a conversion function without loop unrolling
template
<
typename
_Ts
,
typename
_Td
,
typename
_Twvec
>
inline
void
template
<
typename
_Ts
,
typename
_Td
,
typename
_Twvec
>
static
inline
void
cvt1_
(
const
_Ts
*
src
,
size_t
sstep
,
_Td
*
dst
,
size_t
dstep
,
Size
size
)
{
sstep
/=
sizeof
(
src
[
0
]);
...
...
@@ -140,7 +146,10 @@ static void cvtCopy( const uchar* src, size_t sstep,
#define DEF_CVT_FUNC(suffix, cvtfunc, _Ts, _Td, _Twvec) \
static void cvt##suffix(const _Ts* src, size_t sstep, uchar*, size_t, \
_Td* dst, size_t dstep, Size size, void*) \
{ cvtfunc<_Ts, _Td, _Twvec>(src, sstep, dst, dstep, size); }
{ \
CV_INSTRUMENT_REGION(); \
cvtfunc<_Ts, _Td, _Twvec>(src, sstep, dst, dstep, size); \
}
////////////////////// 8u -> ... ////////////////////////
...
...
@@ -225,16 +234,16 @@ DEF_CVT_FUNC(16f32f, cvt1_, float16_t, float, v_float32)
///////////// "conversion" w/o conversion ///////////////
static
void
cvt8u
(
const
uchar
*
src
,
size_t
sstep
,
uchar
*
,
size_t
,
uchar
*
dst
,
size_t
dstep
,
Size
size
,
void
*
)
{
cvtCopy
(
src
,
sstep
,
dst
,
dstep
,
size
,
1
);
}
{
CV_INSTRUMENT_REGION
();
cvtCopy
(
src
,
sstep
,
dst
,
dstep
,
size
,
1
);
}
static
void
cvt16u
(
const
ushort
*
src
,
size_t
sstep
,
uchar
*
,
size_t
,
ushort
*
dst
,
size_t
dstep
,
Size
size
,
void
*
)
{
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
2
);
}
{
CV_INSTRUMENT_REGION
();
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
2
);
}
static
void
cvt32s
(
const
int
*
src
,
size_t
sstep
,
uchar
*
,
size_t
,
int
*
dst
,
size_t
dstep
,
Size
size
,
void
*
)
{
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
4
);
}
{
CV_INSTRUMENT_REGION
();
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
4
);
}
static
void
cvt64s
(
const
int64
*
src
,
size_t
sstep
,
uchar
*
,
size_t
,
int64
*
dst
,
size_t
dstep
,
Size
size
,
void
*
)
{
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
8
);
}
{
CV_INSTRUMENT_REGION
();
cvtCopy
((
const
uchar
*
)
src
,
sstep
,
(
uchar
*
)
dst
,
dstep
,
size
,
8
);
}
/* [TODO] Recover IPP calls
...
...
@@ -380,148 +389,17 @@ BinaryFunc getConvertFunc(int sdepth, int ddepth)
return
cvtTab
[
CV_MAT_DEPTH
(
ddepth
)][
CV_MAT_DEPTH
(
sdepth
)];
}
#ifdef HAVE_OPENCL
static
bool
ocl_convertFp16
(
InputArray
_src
,
OutputArray
_dst
,
int
sdepth
,
int
ddepth
)
BinaryFunc
get_cvt32f16f
()
{
int
type
=
_src
.
type
(),
cn
=
CV_MAT_CN
(
type
);
_dst
.
createSameSize
(
_src
,
CV_MAKETYPE
(
ddepth
,
cn
)
);
int
kercn
=
1
;
int
rowsPerWI
=
1
;
String
build_opt
=
format
(
"-D HALF_SUPPORT -D srcT=%s -D dstT=%s -D rowsPerWI=%d%s"
,
sdepth
==
CV_32F
?
"float"
:
"half"
,
sdepth
==
CV_32F
?
"half"
:
"float"
,
rowsPerWI
,
sdepth
==
CV_32F
?
" -D FLOAT_TO_HALF "
:
""
);
ocl
::
Kernel
k
(
"convertFp16"
,
ocl
::
core
::
halfconvert_oclsrc
,
build_opt
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dstarg
=
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
,
kercn
);
k
.
args
(
srcarg
,
dstarg
);
size_t
globalsize
[
2
]
=
{
(
size_t
)
src
.
cols
*
cn
/
kercn
,
((
size_t
)
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
return
(
BinaryFunc
)
cvt32f16f
;
}
#endif
}
// cv::
void
cv
::
Mat
::
convertTo
(
OutputArray
_dst
,
int
_type
,
double
alpha
,
double
beta
)
const
BinaryFunc
get_cvt16f32f
()
{
CV_INSTRUMENT_REGION
();
if
(
empty
()
)
{
_dst
.
release
();
return
;
}
bool
noScale
=
fabs
(
alpha
-
1
)
<
DBL_EPSILON
&&
fabs
(
beta
)
<
DBL_EPSILON
;
if
(
_type
<
0
)
_type
=
_dst
.
fixedType
()
?
_dst
.
type
()
:
type
();
else
_type
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
_type
),
channels
());
int
sdepth
=
depth
(),
ddepth
=
CV_MAT_DEPTH
(
_type
);
if
(
sdepth
==
ddepth
&&
noScale
)
{
copyTo
(
_dst
);
return
;
}
Mat
src
=
*
this
;
if
(
dims
<=
2
)
_dst
.
create
(
size
(),
_type
);
else
_dst
.
create
(
dims
,
size
,
_type
);
Mat
dst
=
_dst
.
getMat
();
BinaryFunc
func
=
noScale
?
getConvertFunc
(
sdepth
,
ddepth
)
:
getConvertScaleFunc
(
sdepth
,
ddepth
);
double
scale
[]
=
{
alpha
,
beta
};
int
cn
=
channels
();
CV_Assert
(
func
!=
0
);
if
(
dims
<=
2
)
{
Size
sz
=
getContinuousSize2D
(
src
,
dst
,
cn
);
func
(
src
.
data
,
src
.
step
,
0
,
0
,
dst
.
data
,
dst
.
step
,
sz
,
scale
);
}
else
{
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
Size
sz
((
int
)(
it
.
size
*
cn
),
1
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
1
,
0
,
0
,
ptrs
[
1
],
1
,
sz
,
scale
);
}
return
(
BinaryFunc
)
cvt16f32f
;
}
//==================================================================================================
void
cv
::
convertFp16
(
InputArray
_src
,
OutputArray
_dst
)
{
CV_INSTRUMENT_REGION
();
int
sdepth
=
_src
.
depth
(),
ddepth
=
0
;
BinaryFunc
func
=
0
;
switch
(
sdepth
)
{
case
CV_32F
:
if
(
_dst
.
fixedType
())
{
ddepth
=
_dst
.
depth
();
CV_Assert
(
ddepth
==
CV_16S
/*|| ddepth == CV_16F*/
);
CV_Assert
(
_dst
.
channels
()
==
_src
.
channels
());
}
else
ddepth
=
CV_16S
;
func
=
(
BinaryFunc
)
cvt32f16f
;
break
;
case
CV_16S
:
//case CV_16F:
ddepth
=
CV_32F
;
func
=
(
BinaryFunc
)
cvt16f32f
;
break
;
default:
CV_Error
(
Error
::
StsUnsupportedFormat
,
"Unsupported input depth"
);
return
;
}
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
_dst
.
isUMat
(),
ocl_convertFp16
(
_src
,
_dst
,
sdepth
,
ddepth
))
Mat
src
=
_src
.
getMat
();
int
type
=
CV_MAKETYPE
(
ddepth
,
src
.
channels
());
_dst
.
create
(
src
.
dims
,
src
.
size
,
type
);
Mat
dst
=
_dst
.
getMat
();
int
cn
=
src
.
channels
();
CV_Assert
(
func
!=
0
);
if
(
src
.
dims
<=
2
)
{
Size
sz
=
getContinuousSize2D
(
src
,
dst
,
cn
);
func
(
src
.
data
,
src
.
step
,
0
,
0
,
dst
.
data
,
dst
.
step
,
sz
,
0
);
}
else
{
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
Size
sz
((
int
)(
it
.
size
*
cn
),
1
);
#endif
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
0
,
0
,
0
,
ptrs
[
1
],
0
,
sz
,
0
);
}
}
CV_CPU_OPTIMIZATION_NAMESPACE_END
}
// namespace
modules/core/src/convert_scale.dispatch.cpp
0 → 100644
浏览文件 @
2e28ff78
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "convert_scale.simd.hpp"
#include "convert_scale.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
static
BinaryFunc
getCvtScaleAbsFunc
(
int
depth
)
{
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
getCvtScaleAbsFunc
,
(
depth
),
CV_CPU_DISPATCH_MODES_ALL
);
}
BinaryFunc
getConvertScaleFunc
(
int
sdepth
,
int
ddepth
)
{
CV_INSTRUMENT_REGION
();
CV_CPU_DISPATCH
(
getConvertScaleFunc
,
(
sdepth
,
ddepth
),
CV_CPU_DISPATCH_MODES_ALL
);
}
#ifdef HAVE_OPENCL
static
bool
ocl_convertScaleAbs
(
InputArray
_src
,
OutputArray
_dst
,
double
alpha
,
double
beta
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
!
doubleSupport
&&
depth
==
CV_64F
)
return
false
;
_dst
.
create
(
_src
.
size
(),
CV_8UC
(
cn
));
int
kercn
=
1
;
if
(
d
.
isIntel
())
{
static
const
int
vectorWidths
[]
=
{
4
,
4
,
4
,
4
,
4
,
4
,
4
,
-
1
};
kercn
=
ocl
::
checkOptimalVectorWidth
(
vectorWidths
,
_src
,
_dst
,
noArray
(),
noArray
(),
noArray
(),
noArray
(),
noArray
(),
noArray
(),
noArray
(),
ocl
::
OCL_VECTOR_MAX
);
}
else
kercn
=
ocl
::
predictOptimalVectorWidthMax
(
_src
,
_dst
);
int
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
char
cvt
[
2
][
50
];
int
wdepth
=
std
::
max
(
depth
,
CV_32F
);
String
build_opt
=
format
(
"-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D DEPTH_dst=%d -D srcT1=%s"
" -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s"
" -D workT1=%s -D rowsPerWI=%d%s"
,
ocl
::
typeToStr
(
CV_8UC
(
kercn
)),
CV_8U
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
kercn
)),
wdepth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
kercn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
CV_8U
,
kercn
,
cvt
[
1
]),
ocl
::
typeToStr
(
wdepth
),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
build_opt
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dstarg
=
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
,
kercn
);
if
(
wdepth
==
CV_32F
)
k
.
args
(
srcarg
,
dstarg
,
(
float
)
alpha
,
(
float
)
beta
);
else
if
(
wdepth
==
CV_64F
)
k
.
args
(
srcarg
,
dstarg
,
alpha
,
beta
);
size_t
globalsize
[
2
]
=
{
(
size_t
)
src
.
cols
*
cn
/
kercn
,
((
size_t
)
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
#endif
void
convertScaleAbs
(
InputArray
_src
,
OutputArray
_dst
,
double
alpha
,
double
beta
)
{
CV_INSTRUMENT_REGION
();
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
_dst
.
isUMat
(),
ocl_convertScaleAbs
(
_src
,
_dst
,
alpha
,
beta
))
Mat
src
=
_src
.
getMat
();
int
cn
=
src
.
channels
();
double
scale
[]
=
{
alpha
,
beta
};
_dst
.
create
(
src
.
dims
,
src
.
size
,
CV_8UC
(
cn
)
);
Mat
dst
=
_dst
.
getMat
();
BinaryFunc
func
=
getCvtScaleAbsFunc
(
src
.
depth
());
CV_Assert
(
func
!=
0
);
if
(
src
.
dims
<=
2
)
{
Size
sz
=
getContinuousSize2D
(
src
,
dst
,
cn
);
func
(
src
.
ptr
(),
src
.
step
,
0
,
0
,
dst
.
ptr
(),
dst
.
step
,
sz
,
scale
);
}
else
{
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
Size
sz
((
int
)
it
.
size
*
cn
,
1
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
0
,
0
,
0
,
ptrs
[
1
],
0
,
sz
,
scale
);
}
}
//==================================================================================================
#ifdef HAVE_OPENCL
static
bool
ocl_normalize
(
InputArray
_src
,
InputOutputArray
_dst
,
InputArray
_mask
,
int
dtype
,
double
scale
,
double
delta
)
{
UMat
src
=
_src
.
getUMat
();
if
(
_mask
.
empty
()
)
src
.
convertTo
(
_dst
,
dtype
,
scale
,
delta
);
else
if
(
src
.
channels
()
<=
4
)
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
stype
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
),
ddepth
=
CV_MAT_DEPTH
(
dtype
),
wdepth
=
std
::
max
(
CV_32F
,
std
::
max
(
sdepth
,
ddepth
)),
rowsPerWI
=
dev
.
isIntel
()
?
4
:
1
;
float
fscale
=
static_cast
<
float
>
(
scale
),
fdelta
=
static_cast
<
float
>
(
delta
);
bool
haveScale
=
std
::
fabs
(
scale
-
1
)
>
DBL_EPSILON
,
haveZeroScale
=
!
(
std
::
fabs
(
scale
)
>
DBL_EPSILON
),
haveDelta
=
std
::
fabs
(
delta
)
>
DBL_EPSILON
,
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
if
(
!
haveScale
&&
!
haveDelta
&&
stype
==
dtype
)
{
_src
.
copyTo
(
_dst
,
_mask
);
return
true
;
}
if
(
haveZeroScale
)
{
_dst
.
setTo
(
Scalar
(
delta
),
_mask
);
return
true
;
}
if
((
sdepth
==
CV_64F
||
ddepth
==
CV_64F
)
&&
!
doubleSupport
)
return
false
;
char
cvt
[
2
][
40
];
String
opts
=
format
(
"-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
" -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s"
,
ocl
::
typeToStr
(
stype
),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
cn
,
rowsPerWI
,
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
haveScale
?
" -D HAVE_SCALE"
:
""
,
haveDelta
?
" -D HAVE_DELTA"
:
""
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
));
ocl
::
Kernel
k
(
"normalizek"
,
ocl
::
core
::
normalize_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
mask
=
_mask
.
getUMat
(),
dst
=
_dst
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
),
dstarg
=
ocl
::
KernelArg
::
ReadWrite
(
dst
);
if
(
haveScale
)
{
if
(
haveDelta
)
k
.
args
(
srcarg
,
maskarg
,
dstarg
,
fscale
,
fdelta
);
else
k
.
args
(
srcarg
,
maskarg
,
dstarg
,
fscale
);
}
else
{
if
(
haveDelta
)
k
.
args
(
srcarg
,
maskarg
,
dstarg
,
fdelta
);
else
k
.
args
(
srcarg
,
maskarg
,
dstarg
);
}
size_t
globalsize
[
2
]
=
{
(
size_t
)
src
.
cols
,
((
size_t
)
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
else
{
UMat
temp
;
src
.
convertTo
(
temp
,
dtype
,
scale
,
delta
);
temp
.
copyTo
(
_dst
,
_mask
);
}
return
true
;
}
#endif
void
normalize
(
InputArray
_src
,
InputOutputArray
_dst
,
double
a
,
double
b
,
int
norm_type
,
int
rtype
,
InputArray
_mask
)
{
CV_INSTRUMENT_REGION
();
double
scale
=
1
,
shift
=
0
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
);
if
(
rtype
<
0
)
rtype
=
_dst
.
fixedType
()
?
_dst
.
depth
()
:
depth
;
if
(
norm_type
==
CV_MINMAX
)
{
double
smin
=
0
,
smax
=
0
;
double
dmin
=
MIN
(
a
,
b
),
dmax
=
MAX
(
a
,
b
);
minMaxIdx
(
_src
,
&
smin
,
&
smax
,
0
,
0
,
_mask
);
scale
=
(
dmax
-
dmin
)
*
(
smax
-
smin
>
DBL_EPSILON
?
1.
/
(
smax
-
smin
)
:
0
);
if
(
rtype
==
CV_32F
)
{
scale
=
(
float
)
scale
;
shift
=
(
float
)
dmin
-
(
float
)(
smin
*
scale
);
}
else
shift
=
dmin
-
smin
*
scale
;
}
else
if
(
norm_type
==
CV_L2
||
norm_type
==
CV_L1
||
norm_type
==
CV_C
)
{
scale
=
norm
(
_src
,
norm_type
,
_mask
);
scale
=
scale
>
DBL_EPSILON
?
a
/
scale
:
0.
;
shift
=
0
;
}
else
CV_Error
(
CV_StsBadArg
,
"Unknown/unsupported norm type"
);
CV_OCL_RUN
(
_dst
.
isUMat
(),
ocl_normalize
(
_src
,
_dst
,
_mask
,
rtype
,
scale
,
shift
))
Mat
src
=
_src
.
getMat
();
if
(
_mask
.
empty
()
)
src
.
convertTo
(
_dst
,
rtype
,
scale
,
shift
);
else
{
Mat
temp
;
src
.
convertTo
(
temp
,
rtype
,
scale
,
shift
);
temp
.
copyTo
(
_dst
,
_mask
);
}
}
}
// namespace
modules/core/src/convert_scale.
c
pp
→
modules/core/src/convert_scale.
simd.h
pp
浏览文件 @
2e28ff78
...
...
@@ -4,16 +4,20 @@
#include "precomp.hpp"
#include "opencl_kernels_core.hpp"
#include "convert.hpp"
namespace
cv
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
BinaryFunc
getCvtScaleAbsFunc
(
int
depth
);
BinaryFunc
getConvertScaleFunc
(
int
sdepth
,
int
ddepth
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
/****************************************************************************************\
* convertScale[Abs] *
\****************************************************************************************/
namespace
cv
{
template
<
typename
_Ts
,
typename
_Td
>
inline
void
cvtabs_32f
(
const
_Ts
*
src
,
size_t
sstep
,
_Td
*
dst
,
size_t
dstep
,
Size
size
,
float
a
,
float
b
)
...
...
@@ -287,7 +291,7 @@ DEF_CVT_SCALE_FUNC(32f16f, cvt1_32f, float, float16_t, float)
DEF_CVT_SCALE_FUNC(64f16f, cvt_64f, double, float16_t, double)
DEF_CVT_SCALE_FUNC(16f, cvt1_32f, float16_t, float16_t, float)*/
static
BinaryFunc
getCvtScaleAbsFunc
(
int
depth
)
BinaryFunc
getCvtScaleAbsFunc
(
int
depth
)
{
static
BinaryFunc
cvtScaleAbsTab
[]
=
{
...
...
@@ -349,238 +353,7 @@ BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
return
cvtScaleTab
[
CV_MAT_DEPTH
(
ddepth
)][
CV_MAT_DEPTH
(
sdepth
)];
}
#ifdef HAVE_OPENCL
static
bool
ocl_convertScaleAbs
(
InputArray
_src
,
OutputArray
_dst
,
double
alpha
,
double
beta
)
{
const
ocl
::
Device
&
d
=
ocl
::
Device
::
getDefault
();
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
),
cn
=
CV_MAT_CN
(
type
);
bool
doubleSupport
=
d
.
doubleFPConfig
()
>
0
;
if
(
!
doubleSupport
&&
depth
==
CV_64F
)
return
false
;
_dst
.
create
(
_src
.
size
(),
CV_8UC
(
cn
));
int
kercn
=
1
;
if
(
d
.
isIntel
())
{
static
const
int
vectorWidths
[]
=
{
4
,
4
,
4
,
4
,
4
,
4
,
4
,
-
1
};
kercn
=
ocl
::
checkOptimalVectorWidth
(
vectorWidths
,
_src
,
_dst
,
noArray
(),
noArray
(),
noArray
(),
noArray
(),
noArray
(),
noArray
(),
noArray
(),
ocl
::
OCL_VECTOR_MAX
);
}
else
kercn
=
ocl
::
predictOptimalVectorWidthMax
(
_src
,
_dst
);
int
rowsPerWI
=
d
.
isIntel
()
?
4
:
1
;
char
cvt
[
2
][
50
];
int
wdepth
=
std
::
max
(
depth
,
CV_32F
);
String
build_opt
=
format
(
"-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D DEPTH_dst=%d -D srcT1=%s"
" -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s"
" -D workT1=%s -D rowsPerWI=%d%s"
,
ocl
::
typeToStr
(
CV_8UC
(
kercn
)),
CV_8U
,
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
depth
,
kercn
)),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
kercn
)),
wdepth
,
ocl
::
convertTypeStr
(
depth
,
wdepth
,
kercn
,
cvt
[
0
]),
ocl
::
convertTypeStr
(
wdepth
,
CV_8U
,
kercn
,
cvt
[
1
]),
ocl
::
typeToStr
(
wdepth
),
rowsPerWI
,
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
);
ocl
::
Kernel
k
(
"KF"
,
ocl
::
core
::
arithm_oclsrc
,
build_opt
);
if
(
k
.
empty
())
return
false
;
UMat
src
=
_src
.
getUMat
();
UMat
dst
=
_dst
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
dstarg
=
ocl
::
KernelArg
::
WriteOnly
(
dst
,
cn
,
kercn
);
if
(
wdepth
==
CV_32F
)
k
.
args
(
srcarg
,
dstarg
,
(
float
)
alpha
,
(
float
)
beta
);
else
if
(
wdepth
==
CV_64F
)
k
.
args
(
srcarg
,
dstarg
,
alpha
,
beta
);
size_t
globalsize
[
2
]
=
{
(
size_t
)
src
.
cols
*
cn
/
kercn
,
((
size_t
)
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
#endif
}
//cv::
void
cv
::
convertScaleAbs
(
InputArray
_src
,
OutputArray
_dst
,
double
alpha
,
double
beta
)
{
CV_INSTRUMENT_REGION
();
CV_OCL_RUN
(
_src
.
dims
()
<=
2
&&
_dst
.
isUMat
(),
ocl_convertScaleAbs
(
_src
,
_dst
,
alpha
,
beta
))
Mat
src
=
_src
.
getMat
();
int
cn
=
src
.
channels
();
double
scale
[]
=
{
alpha
,
beta
};
_dst
.
create
(
src
.
dims
,
src
.
size
,
CV_8UC
(
cn
)
);
Mat
dst
=
_dst
.
getMat
();
BinaryFunc
func
=
getCvtScaleAbsFunc
(
src
.
depth
());
CV_Assert
(
func
!=
0
);
if
(
src
.
dims
<=
2
)
{
Size
sz
=
getContinuousSize2D
(
src
,
dst
,
cn
);
func
(
src
.
ptr
(),
src
.
step
,
0
,
0
,
dst
.
ptr
(),
dst
.
step
,
sz
,
scale
);
}
else
{
const
Mat
*
arrays
[]
=
{
&
src
,
&
dst
,
0
};
uchar
*
ptrs
[
2
]
=
{};
NAryMatIterator
it
(
arrays
,
ptrs
);
Size
sz
((
int
)
it
.
size
*
cn
,
1
);
for
(
size_t
i
=
0
;
i
<
it
.
nplanes
;
i
++
,
++
it
)
func
(
ptrs
[
0
],
0
,
0
,
0
,
ptrs
[
1
],
0
,
sz
,
scale
);
}
}
//==================================================================================================
namespace
cv
{
#ifdef HAVE_OPENCL
static
bool
ocl_normalize
(
InputArray
_src
,
InputOutputArray
_dst
,
InputArray
_mask
,
int
dtype
,
double
scale
,
double
delta
)
{
UMat
src
=
_src
.
getUMat
();
if
(
_mask
.
empty
()
)
src
.
convertTo
(
_dst
,
dtype
,
scale
,
delta
);
else
if
(
src
.
channels
()
<=
4
)
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
int
stype
=
_src
.
type
(),
sdepth
=
CV_MAT_DEPTH
(
stype
),
cn
=
CV_MAT_CN
(
stype
),
ddepth
=
CV_MAT_DEPTH
(
dtype
),
wdepth
=
std
::
max
(
CV_32F
,
std
::
max
(
sdepth
,
ddepth
)),
rowsPerWI
=
dev
.
isIntel
()
?
4
:
1
;
float
fscale
=
static_cast
<
float
>
(
scale
),
fdelta
=
static_cast
<
float
>
(
delta
);
bool
haveScale
=
std
::
fabs
(
scale
-
1
)
>
DBL_EPSILON
,
haveZeroScale
=
!
(
std
::
fabs
(
scale
)
>
DBL_EPSILON
),
haveDelta
=
std
::
fabs
(
delta
)
>
DBL_EPSILON
,
doubleSupport
=
dev
.
doubleFPConfig
()
>
0
;
if
(
!
haveScale
&&
!
haveDelta
&&
stype
==
dtype
)
{
_src
.
copyTo
(
_dst
,
_mask
);
return
true
;
}
if
(
haveZeroScale
)
{
_dst
.
setTo
(
Scalar
(
delta
),
_mask
);
return
true
;
}
if
((
sdepth
==
CV_64F
||
ddepth
==
CV_64F
)
&&
!
doubleSupport
)
return
false
;
char
cvt
[
2
][
40
];
String
opts
=
format
(
"-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
" -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s"
,
ocl
::
typeToStr
(
stype
),
ocl
::
typeToStr
(
dtype
),
ocl
::
convertTypeStr
(
sdepth
,
wdepth
,
cn
,
cvt
[
0
]),
cn
,
rowsPerWI
,
ocl
::
convertTypeStr
(
wdepth
,
ddepth
,
cn
,
cvt
[
1
]),
ocl
::
typeToStr
(
CV_MAKE_TYPE
(
wdepth
,
cn
)),
doubleSupport
?
" -D DOUBLE_SUPPORT"
:
""
,
haveScale
?
" -D HAVE_SCALE"
:
""
,
haveDelta
?
" -D HAVE_DELTA"
:
""
,
ocl
::
typeToStr
(
sdepth
),
ocl
::
typeToStr
(
ddepth
));
ocl
::
Kernel
k
(
"normalizek"
,
ocl
::
core
::
normalize_oclsrc
,
opts
);
if
(
k
.
empty
())
return
false
;
UMat
mask
=
_mask
.
getUMat
(),
dst
=
_dst
.
getUMat
();
ocl
::
KernelArg
srcarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
src
),
maskarg
=
ocl
::
KernelArg
::
ReadOnlyNoSize
(
mask
),
dstarg
=
ocl
::
KernelArg
::
ReadWrite
(
dst
);
if
(
haveScale
)
{
if
(
haveDelta
)
k
.
args
(
srcarg
,
maskarg
,
dstarg
,
fscale
,
fdelta
);
else
k
.
args
(
srcarg
,
maskarg
,
dstarg
,
fscale
);
}
else
{
if
(
haveDelta
)
k
.
args
(
srcarg
,
maskarg
,
dstarg
,
fdelta
);
else
k
.
args
(
srcarg
,
maskarg
,
dstarg
);
}
size_t
globalsize
[
2
]
=
{
(
size_t
)
src
.
cols
,
((
size_t
)
src
.
rows
+
rowsPerWI
-
1
)
/
rowsPerWI
};
return
k
.
run
(
2
,
globalsize
,
NULL
,
false
);
}
else
{
UMat
temp
;
src
.
convertTo
(
temp
,
dtype
,
scale
,
delta
);
temp
.
copyTo
(
_dst
,
_mask
);
}
return
true
;
}
#endif
}
// cv::
void
cv
::
normalize
(
InputArray
_src
,
InputOutputArray
_dst
,
double
a
,
double
b
,
int
norm_type
,
int
rtype
,
InputArray
_mask
)
{
CV_INSTRUMENT_REGION
();
double
scale
=
1
,
shift
=
0
;
int
type
=
_src
.
type
(),
depth
=
CV_MAT_DEPTH
(
type
);
if
(
rtype
<
0
)
rtype
=
_dst
.
fixedType
()
?
_dst
.
depth
()
:
depth
;
if
(
norm_type
==
CV_MINMAX
)
{
double
smin
=
0
,
smax
=
0
;
double
dmin
=
MIN
(
a
,
b
),
dmax
=
MAX
(
a
,
b
);
minMaxIdx
(
_src
,
&
smin
,
&
smax
,
0
,
0
,
_mask
);
scale
=
(
dmax
-
dmin
)
*
(
smax
-
smin
>
DBL_EPSILON
?
1.
/
(
smax
-
smin
)
:
0
);
if
(
rtype
==
CV_32F
)
{
scale
=
(
float
)
scale
;
shift
=
(
float
)
dmin
-
(
float
)(
smin
*
scale
);
}
else
shift
=
dmin
-
smin
*
scale
;
}
else
if
(
norm_type
==
CV_L2
||
norm_type
==
CV_L1
||
norm_type
==
CV_C
)
{
scale
=
norm
(
_src
,
norm_type
,
_mask
);
scale
=
scale
>
DBL_EPSILON
?
a
/
scale
:
0.
;
shift
=
0
;
}
else
CV_Error
(
CV_StsBadArg
,
"Unknown/unsupported norm type"
);
CV_OCL_RUN
(
_dst
.
isUMat
(),
ocl_normalize
(
_src
,
_dst
,
_mask
,
rtype
,
scale
,
shift
))
Mat
src
=
_src
.
getMat
();
if
(
_mask
.
empty
()
)
src
.
convertTo
(
_dst
,
rtype
,
scale
,
shift
);
else
{
Mat
temp
;
src
.
convertTo
(
temp
,
rtype
,
scale
,
shift
);
temp
.
copyTo
(
_dst
,
_mask
);
}
}
CV_CPU_OPTIMIZATION_NAMESPACE_END
}
// namespace
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录