Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
自由之枫~
opencv
提交
83ce1de8
O
opencv
项目概览
自由之枫~
/
opencv
与 Fork 源项目一致
Fork自
OpenCV / opencv
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
83ce1de8
编写于
1月 26, 2022
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #21506 from alalek:core_fp_denormals
上级
906f5f7e
b1d484f8
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
256 addition
and
0 deletion
+256
-0
modules/core/include/opencv2/core/utils/fp_control.private.hpp
...es/core/include/opencv2/core/utils/fp_control.private.hpp
+29
-0
modules/core/include/opencv2/core/utils/fp_control_utils.hpp
modules/core/include/opencv2/core/utils/fp_control_utils.hpp
+69
-0
modules/core/src/parallel.cpp
modules/core/src/parallel.cpp
+14
-0
modules/core/src/system.cpp
modules/core/src/system.cpp
+79
-0
modules/core/test/test_misc.cpp
modules/core/test/test_misc.cpp
+65
-0
未找到文件。
modules/core/include/opencv2/core/utils/fp_control.private.hpp
0 → 100644
浏览文件 @
83ce1de8
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
#define OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
#include "fp_control_utils.hpp"
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT == 0
// disabled
#elif defined(OPENCV_IMPL_FP_HINTS)
// custom
#elif defined(OPENCV_IMPL_FP_HINTS_X86)
// custom
#elif defined(__SSE__) || defined(__SSE2__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
#include <xmmintrin.h>
#define OPENCV_IMPL_FP_HINTS_X86 1
#define OPENCV_IMPL_FP_HINTS 1
#endif
#ifndef OPENCV_IMPL_FP_HINTS
#define OPENCV_IMPL_FP_HINTS 0
#endif
#ifndef OPENCV_IMPL_FP_HINTS_X86
#define OPENCV_IMPL_FP_HINTS_X86 0
#endif
#endif // OPENCV_CORE_FP_CONTROL_UTILS_PRIVATE_HPP
modules/core/include/opencv2/core/utils/fp_control_utils.hpp
0 → 100644
浏览文件 @
83ce1de8
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#ifndef OPENCV_CORE_FP_CONTROL_UTILS_HPP
#define OPENCV_CORE_FP_CONTROL_UTILS_HPP
namespace
cv
{
namespace
details
{
struct
FPDenormalsModeState
{
uint32_t
reserved
[
16
];
// 64-bytes
};
// FPDenormalsModeState
CV_EXPORTS
void
setFPDenormalsIgnoreHint
(
bool
ignore
,
CV_OUT
FPDenormalsModeState
&
state
);
CV_EXPORTS
int
saveFPDenormalsState
(
CV_OUT
FPDenormalsModeState
&
state
);
CV_EXPORTS
bool
restoreFPDenormalsState
(
const
FPDenormalsModeState
&
state
);
class
FPDenormalsIgnoreHintScope
{
public:
inline
explicit
FPDenormalsIgnoreHintScope
(
bool
ignore
=
true
)
{
details
::
setFPDenormalsIgnoreHint
(
ignore
,
saved_state
);
}
inline
explicit
FPDenormalsIgnoreHintScope
(
const
FPDenormalsModeState
&
state
)
{
details
::
saveFPDenormalsState
(
saved_state
);
details
::
restoreFPDenormalsState
(
state
);
}
inline
~
FPDenormalsIgnoreHintScope
()
{
details
::
restoreFPDenormalsState
(
saved_state
);
}
protected:
FPDenormalsModeState
saved_state
;
};
// FPDenormalsIgnoreHintScope
class
FPDenormalsIgnoreHintScopeNOOP
{
public:
inline
FPDenormalsIgnoreHintScopeNOOP
(
bool
ignore
=
true
)
{
CV_UNUSED
(
ignore
);
}
inline
FPDenormalsIgnoreHintScopeNOOP
(
const
FPDenormalsModeState
&
state
)
{
CV_UNUSED
(
state
);
}
inline
~
FPDenormalsIgnoreHintScopeNOOP
()
{
}
};
// FPDenormalsIgnoreHintScopeNOOP
}
// namespace details
// Should depend on target compilation architecture only
// Note: previously added archs should NOT be removed to preserve ABI compatibility
#if defined(OPENCV_SUPPORTS_FP_DENORMALS_HINT)
// preserve configuration overloading through ports
#elif defined(__i386__) || defined(__x86_64__) || defined(_M_X64) || defined(_X86_)
typedef
details
::
FPDenormalsIgnoreHintScope
FPDenormalsIgnoreHintScope
;
#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 1
#else
#define OPENCV_SUPPORTS_FP_DENORMALS_HINT 0
typedef
details
::
FPDenormalsIgnoreHintScopeNOOP
FPDenormalsIgnoreHintScope
;
#endif
}
// namespace cv
#endif // OPENCV_CORE_FP_CONTROL_UTILS_HPP
modules/core/src/parallel.cpp
浏览文件 @
83ce1de8
...
...
@@ -142,6 +142,9 @@
#include "opencv2/core/detail/exception_ptr.hpp" // CV__EXCEPTION_PTR = 1 if std::exception_ptr is available
#include <opencv2/core/utils/fp_control_utils.hpp>
#include <opencv2/core/utils/fp_control.private.hpp>
using
namespace
cv
;
namespace
cv
{
...
...
@@ -191,6 +194,9 @@ namespace {
// propagate main thread state
rng
=
cv
::
theRNG
();
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
details
::
saveFPDenormalsState
(
fp_denormals_base_state
);
#endif
#ifdef OPENCV_TRACE
traceRootRegion
=
CV_TRACE_NS
::
details
::
getCurrentRegion
();
...
...
@@ -271,6 +277,11 @@ namespace {
}
}
}
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
details
::
FPDenormalsModeState
fp_denormals_base_state
;
#endif
private:
ParallelLoopBodyWrapperContext
(
const
ParallelLoopBodyWrapperContext
&
);
// disabled
ParallelLoopBodyWrapperContext
&
operator
=
(
const
ParallelLoopBodyWrapperContext
&
);
// disabled
...
...
@@ -307,6 +318,9 @@ namespace {
// propagate main thread state
cv
::
theRNG
()
=
ctx
.
rng
;
#if OPENCV_SUPPORTS_FP_DENORMALS_HINT && OPENCV_IMPL_FP_HINTS
FPDenormalsIgnoreHintScope
fp_denormals_scope
(
ctx
.
fp_denormals_base_state
);
#endif
cv
::
Range
r
;
cv
::
Range
wholeRange
=
ctx
.
wholeRange
;
...
...
modules/core/src/system.cpp
浏览文件 @
83ce1de8
...
...
@@ -53,6 +53,9 @@
#include <opencv2/core/utils/tls.hpp>
#include <opencv2/core/utils/instrumentation.hpp>
#include <opencv2/core/utils/fp_control_utils.hpp>
#include <opencv2/core/utils/fp_control.private.hpp>
#ifndef OPENCV_WITH_THREAD_SANITIZER
#if defined(__clang__) && defined(__has_feature)
#if __has_feature(thread_sanitizer)
...
...
@@ -2733,6 +2736,82 @@ void setUseIPP_NE(bool flag)
}
// namespace ipp
namespace
details
{
#if OPENCV_IMPL_FP_HINTS_X86
#ifndef _MM_DENORMALS_ZERO_ON // requires pmmintrin.h (SSE3)
#define _MM_DENORMALS_ZERO_ON 0x0040
#endif
#ifndef _MM_DENORMALS_ZERO_MASK // requires pmmintrin.h (SSE3)
#define _MM_DENORMALS_ZERO_MASK 0x0040
#endif
#endif
void
setFPDenormalsIgnoreHint
(
bool
ignore
,
CV_OUT
FPDenormalsModeState
&
state
)
{
#if OPENCV_IMPL_FP_HINTS_X86
unsigned
mask
=
_MM_FLUSH_ZERO_MASK
;
unsigned
value
=
ignore
?
_MM_FLUSH_ZERO_ON
:
0
;
if
(
featuresEnabled
.
have
[
CPU_SSE3
])
{
mask
|=
_MM_DENORMALS_ZERO_MASK
;
value
|=
ignore
?
_MM_DENORMALS_ZERO_ON
:
0
;
}
const
unsigned
old_flags
=
_mm_getcsr
();
const
unsigned
old_value
=
old_flags
&
mask
;
unsigned
flags
=
(
old_flags
&
~
mask
)
|
value
;
CV_LOG_DEBUG
(
NULL
,
"core: update FP mxcsr flags = "
<<
cv
::
format
(
"0x%08x"
,
flags
));
// save state
state
.
reserved
[
0
]
=
(
uint32_t
)
mask
;
state
.
reserved
[
1
]
=
(
uint32_t
)
old_value
;
_mm_setcsr
(
flags
);
#else
CV_UNUSED
(
ignore
);
CV_UNUSED
(
state
);
#endif
}
int
saveFPDenormalsState
(
CV_OUT
FPDenormalsModeState
&
state
)
{
#if OPENCV_IMPL_FP_HINTS_X86
unsigned
mask
=
_MM_FLUSH_ZERO_MASK
;
if
(
featuresEnabled
.
have
[
CPU_SSE3
])
{
mask
|=
_MM_DENORMALS_ZERO_MASK
;
}
const
unsigned
old_flags
=
_mm_getcsr
();
const
unsigned
old_value
=
old_flags
&
mask
;
// save state
state
.
reserved
[
0
]
=
(
uint32_t
)
mask
;
state
.
reserved
[
1
]
=
(
uint32_t
)
old_value
;
return
2
;
#else
CV_UNUSED
(
state
);
return
0
;
#endif
}
bool
restoreFPDenormalsState
(
const
FPDenormalsModeState
&
state
)
{
#if OPENCV_IMPL_FP_HINTS_X86
const
unsigned
mask
=
(
unsigned
)
state
.
reserved
[
0
];
CV_DbgAssert
(
mask
!=
0
);
// invalid state (ensure that state is properly saved earlier)
const
unsigned
value
=
(
unsigned
)
state
.
reserved
[
1
];
CV_DbgCheck
((
int
)
value
,
value
==
(
value
&
mask
),
"invalid SSE FP state"
);
const
unsigned
old_flags
=
_mm_getcsr
();
unsigned
flags
=
(
old_flags
&
~
mask
)
|
value
;
CV_LOG_DEBUG
(
NULL
,
"core: restore FP mxcsr flags = "
<<
cv
::
format
(
"0x%08x"
,
flags
));
_mm_setcsr
(
flags
);
return
true
;
#else
CV_UNUSED
(
state
);
return
false
;
#endif
}
}
// namespace details
}
// namespace cv
#ifdef HAVE_TEGRA_OPTIMIZATION
...
...
modules/core/test/test_misc.cpp
浏览文件 @
83ce1de8
...
...
@@ -3,6 +3,15 @@
// of this distribution and at http://opencv.org/license.html.
#include "test_precomp.hpp"
#include "opencv2/core/utils/logger.hpp"
#include <opencv2/core/utils/fp_control_utils.hpp>
#ifdef CV_CXX11
#include <chrono>
#include <thread>
#endif
namespace
opencv_test
{
namespace
{
TEST
(
Core_OutputArrayCreate
,
_1997
)
...
...
@@ -242,6 +251,62 @@ TEST(Core_Parallel, propagate_exceptions)
},
cv
::
Exception
);
}
class
FPDenormalsHintCheckerParallelLoopBody
:
public
cv
::
ParallelLoopBody
{
public:
FPDenormalsHintCheckerParallelLoopBody
()
:
isOK
(
true
)
{
state_values_to_check
=
cv
::
details
::
saveFPDenormalsState
(
base_state
);
}
~
FPDenormalsHintCheckerParallelLoopBody
()
{}
void
operator
()(
const
cv
::
Range
&
r
)
const
{
CV_UNUSED
(
r
);
cv
::
details
::
FPDenormalsModeState
state
;
if
(
cv
::
details
::
saveFPDenormalsState
(
state
))
{
for
(
int
i
=
0
;
i
<
state_values_to_check
;
++
i
)
{
if
(
base_state
.
reserved
[
i
]
!=
state
.
reserved
[
i
])
{
CV_LOG_ERROR
(
NULL
,
cv
::
format
(
"FP state[%d] mismatch: base=0x%08x thread=0x%08x"
,
i
,
base_state
.
reserved
[
i
],
state
.
reserved
[
i
]));
isOK
=
false
;
cv
::
details
::
restoreFPDenormalsState
(
base_state
);
}
}
}
else
{
// FP state is not supported
// no checks
}
#ifdef CV_CXX11
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
milliseconds
(
100
));
#endif
}
cv
::
details
::
FPDenormalsModeState
base_state
;
int
state_values_to_check
;
mutable
bool
isOK
;
};
TEST
(
Core_Parallel
,
propagate_fp_denormals_ignore_hint
)
{
int
nThreads
=
std
::
max
(
1
,
cv
::
getNumThreads
())
*
3
;
for
(
int
i
=
0
;
i
<
4
;
++
i
)
{
SCOPED_TRACE
(
cv
::
format
(
"Case=%d: FP denormals ignore hint: %s
\n
"
,
i
,
((
i
&
1
)
!=
0
)
?
"enable"
:
"disable"
));
FPDenormalsIgnoreHintScope
fp_denormals_scope
((
i
&
1
)
!=
0
);
FPDenormalsHintCheckerParallelLoopBody
job
;
ASSERT_NO_THROW
({
parallel_for_
(
cv
::
Range
(
0
,
nThreads
),
job
);
});
EXPECT_TRUE
(
job
.
isOK
);
}
}
TEST
(
Core_Version
,
consistency
)
{
// this test verifies that OpenCV version loaded in runtime
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录