Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
1e6ce1d2
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1e6ce1d2
编写于
3月 23, 2017
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
core(mathfuncs_core): cpu optimization dispatched code
上级
17e5e4cd
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
252 addition
and
157 deletion
+252
-157
modules/core/CMakeLists.txt
modules/core/CMakeLists.txt
+3
-0
modules/core/src/mathfuncs_core.dispatch.cpp
modules/core/src/mathfuncs_core.dispatch.cpp
+215
-0
modules/core/src/mathfuncs_core.simd.hpp
modules/core/src/mathfuncs_core.simd.hpp
+34
-157
未找到文件。
modules/core/CMakeLists.txt
浏览文件 @
1e6ce1d2
set
(
the_description
"The Core Functionality"
)
ocv_add_dispatched_file
(
mathfuncs_core SSE2 AVX AVX2
)
ocv_add_module
(
core
"
${
OPENCV_HAL_LINKER_LIBS
}
"
OPTIONAL opencv_cudev
...
...
modules/core/src/mathfuncs_core.dispatch.cpp
0 → 100644
浏览文件 @
1e6ce1d2
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "mathfuncs_core.simd.hpp"
#include "mathfuncs_core.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
namespace
hal
{
///////////////////////////////////// ATAN2 ////////////////////////////////////
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan32f
,
cv_hal_fastAtan32f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
CV_CPU_DISPATCH
(
fastAtan32f
,
(
Y
,
X
,
angle
,
len
,
angleInDegrees
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan64f
,
cv_hal_fastAtan64f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
CV_CPU_DISPATCH
(
fastAtan64f
,
(
Y
,
X
,
angle
,
len
,
angleInDegrees
),
CV_CPU_DISPATCH_MODES_ALL
);
}
// deprecated
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
fastAtan32f
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
void
magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
mag
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude32f
,
cv_hal_magnitude32f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_32f
,
x
,
y
,
mag
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
magnitude32f
,
(
x
,
y
,
mag
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
mag
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude64f
,
cv_hal_magnitude64f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_64f
,
x
,
y
,
mag
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
magnitude64f
,
(
x
,
y
,
mag
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
invSqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt32f
,
cv_hal_invSqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
invSqrt32f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
invSqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt64f
,
cv_hal_invSqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
invSqrt64f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt32f
,
cv_hal_sqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
sqrt32f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
sqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt64f
,
cv_hal_sqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
sqrt64f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
exp32f
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
exp32f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
exp64f
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
exp64f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
log32f
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
log32f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
log64f
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
log64f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
//=============================================================================
// for compatibility with 3.0
void
exp
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
exp32f
(
src
,
dst
,
n
);
}
void
exp
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
exp64f
(
src
,
dst
,
n
);
}
void
log
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
log32f
(
src
,
dst
,
n
);
}
void
log
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
log64f
(
src
,
dst
,
n
);
}
void
magnitude
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
n
)
{
magnitude32f
(
x
,
y
,
dst
,
n
);
}
void
magnitude
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
n
)
{
magnitude64f
(
x
,
y
,
dst
,
n
);
}
void
sqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
sqrt32f
(
src
,
dst
,
len
);
}
void
sqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
sqrt64f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
invSqrt32f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
invSqrt64f
(
src
,
dst
,
len
);
}
}}
// namespace cv::hal::
float
cv
::
fastAtan2
(
float
y
,
float
x
)
{
using
namespace
cv
::
hal
;
CV_CPU_CALL_BASELINE
(
fastAtan2
,
(
y
,
x
));
}
modules/core/src/mathfuncs_core.
c
pp
→
modules/core/src/mathfuncs_core.
simd.h
pp
浏览文件 @
1e6ce1d2
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
);
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
);
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
);
void
magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
mag
,
int
len
);
void
magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
mag
,
int
len
);
void
invSqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
);
void
invSqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
);
void
sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
);
void
sqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
);
void
exp32f
(
const
float
*
src
,
float
*
dst
,
int
n
);
void
exp64f
(
const
double
*
src
,
double
*
dst
,
int
n
);
void
log32f
(
const
float
*
src
,
float
*
dst
,
int
n
);
void
log64f
(
const
double
*
src
,
double
*
dst
,
int
n
);
float
fastAtan2
(
float
y
,
float
x
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
using
namespace
std
;
...
...
@@ -197,23 +180,17 @@ static inline void atanImpl(const T *Y, const T *X, T *angle, int len, bool angl
}
// anonymous::
namespace
cv
{
namespace
hal
{
///////////////////////////////////// ATAN2 ////////////////////////////////////
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan32f
,
cv_hal_fastAtan32f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
atanImpl
<
float
>
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan64f
,
cv_hal_fastAtan64f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
atanImpl
<
double
>
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
...
...
@@ -221,7 +198,6 @@ void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
fastAtan32f
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
...
...
@@ -229,9 +205,6 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude32f
,
cv_hal_magnitude32f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_32f
,
x
,
y
,
mag
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -257,9 +230,6 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude64f
,
cv_hal_magnitude64f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_64f
,
x
,
y
,
mag
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128_64F
...
...
@@ -286,9 +256,6 @@ void invSqrt32f(const float* src, float* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt32f
,
cv_hal_invSqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -310,9 +277,6 @@ void invSqrt64f(const double* src, double* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt64f
,
cv_hal_invSqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SSE2
...
...
@@ -330,9 +294,6 @@ void sqrt32f(const float* src, float* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt32f
,
cv_hal_sqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -354,9 +315,6 @@ void sqrt64f(const double* src, double* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt64f
,
cv_hal_sqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128_64F
...
...
@@ -381,9 +339,6 @@ void exp32f(const float *src, float *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
exp
(
src
[
i
]);
...
...
@@ -394,9 +349,6 @@ void exp64f(const double *src, double *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
exp
(
src
[
i
]);
...
...
@@ -407,9 +359,6 @@ void log32f(const float *src, float *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
log
(
src
[
i
]);
...
...
@@ -419,9 +368,6 @@ void log64f(const double *src, double *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
log
(
src
[
i
]);
...
...
@@ -534,9 +480,6 @@ void exp32f( const float *_x, float *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
_x
,
y
,
n
)
>=
0
);
static
const
float
A4
=
(
float
)(
1.000000000000002438532970795181890933776
/
EXPPOLY_32F_A0
),
A3
=
(
float
)(
.6931471805521448196800669615864773144641
/
EXPPOLY_32F_A0
),
...
...
@@ -569,7 +512,6 @@ void exp32f( const float *_x, float *y, int n )
for
(
;
i
<=
n
-
8
;
i
+=
8
)
{
__m256
xf
;
__m128i
xi0
,
xi1
;
__m256d
xd0
=
_mm256_cvtps_pd
(
_mm_min_ps
(
_mm_max_ps
(
_mm_loadu_ps
(
&
x
[
i
].
f
),
minval4
),
maxval4
));
...
...
@@ -586,8 +528,7 @@ void exp32f( const float *_x, float *y, int n )
// gcc does not support _mm256_set_m128
//xf = _mm256_set_m128(_mm256_cvtpd_ps(xd1), _mm256_cvtpd_ps(xd0));
xf
=
_mm256_insertf128_ps
(
xf
,
_mm256_cvtpd_ps
(
xd0
),
0
);
xf
=
_mm256_insertf128_ps
(
xf
,
_mm256_cvtpd_ps
(
xd1
),
1
);
__m256
xf
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
_mm256_cvtpd_ps
(
xd0
)),
_mm256_cvtpd_ps
(
xd1
),
1
);
xf
=
_mm256_mul_ps
(
xf
,
postscale8
);
...
...
@@ -606,14 +547,10 @@ void exp32f( const float *_x, float *y, int n )
// gcc does not support _mm256_set_m128
//__m256 yf = _mm256_set_m128(_mm256_cvtpd_ps(yd1), _mm256_cvtpd_ps(yd0));
__m256
yf
;
yf
=
_mm256_insertf128_ps
(
yf
,
_mm256_cvtpd_ps
(
yd0
),
0
);
yf
=
_mm256_insertf128_ps
(
yf
,
_mm256_cvtpd_ps
(
yd1
),
1
);
__m256
yf
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
_mm256_cvtpd_ps
(
yd0
)),
_mm256_cvtpd_ps
(
yd1
),
1
);
//_mm256_set_m128i(xi1, xi0)
__m256i
temp
;
temp
=
_mm256_inserti128_si256
(
temp
,
xi0
,
0
);
temp
=
_mm256_inserti128_si256
(
temp
,
xi1
,
1
);
__m256i
temp
=
(
__m256i
)
_mm256_insertf128_ps
(
_mm256_castps128_ps256
((
__m128
)
xi0
),
(
__m128
)
xi1
,
1
);
yf
=
_mm256_mul_ps
(
yf
,
_mm256_castsi256_ps
(
_mm256_slli_epi32
(
temp
,
23
)));
...
...
@@ -827,9 +764,6 @@ void exp64f( const double *_x, double *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
_x
,
y
,
n
)
>=
0
);
static
const
double
A5
=
.99999999999999999998285227504999
/
EXPPOLY_32F_A0
,
A4
=
.69314718055994546743029643825322
/
EXPPOLY_32F_A0
,
...
...
@@ -1276,9 +1210,6 @@ void log32f( const float *_x, float *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
_x
,
y
,
n
)
>=
0
);
static
const
float
shift
[]
=
{
0
,
-
1.
f
/
512
};
static
const
float
A0
=
0.3333333333333333333333333
f
,
...
...
@@ -1425,9 +1356,6 @@ void log64f( const double *x, double *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
x
,
y
,
n
)
>=
0
);
static
const
double
shift
[]
=
{
0
,
-
1.
/
512
};
static
const
double
A7
=
1.0
,
...
...
@@ -1613,64 +1541,13 @@ void log64f( const double *x, double *y, int n )
#endif // issue 7795
//=============================================================================
// for compatibility with 3.0
void
exp
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
exp32f
(
src
,
dst
,
n
);
}
void
exp
(
const
double
*
src
,
double
*
dst
,
int
n
)
float
fastAtan2
(
float
y
,
float
x
)
{
exp64f
(
src
,
dst
,
n
);
}
void
log
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
log32f
(
src
,
dst
,
n
);
}
void
log
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
log64f
(
src
,
dst
,
n
);
}
void
magnitude
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
n
)
{
magnitude32f
(
x
,
y
,
dst
,
n
);
}
void
magnitude
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
n
)
{
magnitude64f
(
x
,
y
,
dst
,
n
);
}
void
sqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
sqrt32f
(
src
,
dst
,
len
);
}
void
sqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
sqrt64f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
invSqrt32f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
invSqrt64f
(
src
,
dst
,
len
);
return
atanImpl
<
float
>
(
y
,
x
);
}
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
}
// cv::hal::
}
// cv::
CV_CPU_OPTIMIZATION_NAMESPACE_END
float
cv
::
fastAtan2
(
float
y
,
float
x
)
{
return
atanImpl
<
float
>
(
y
,
x
);
}
}}
// namespace cv::hal
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录