Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
297ba853
O
Opencv
项目概览
Greenplum
/
Opencv
10 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
297ba853
编写于
4月 03, 2017
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #8441 from alalek:dispatch_mathfuncs_core
上级
36e80175
1e6ce1d2
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
617 addition
and
207 deletion
+617
-207
cmake/OpenCVCompilerOptimizations.cmake
cmake/OpenCVCompilerOptimizations.cmake
+92
-11
cmake/OpenCVModule.cmake
cmake/OpenCVModule.cmake
+11
-2
cmake/OpenCVPCHSupport.cmake
cmake/OpenCVPCHSupport.cmake
+6
-4
modules/core/CMakeLists.txt
modules/core/CMakeLists.txt
+3
-0
modules/core/include/opencv2/core/cv_cpu_dispatch.h
modules/core/include/opencv2/core/cv_cpu_dispatch.h
+27
-0
modules/core/include/opencv2/core/cv_cpu_helper.h
modules/core/include/opencv2/core/cv_cpu_helper.h
+51
-36
modules/core/include/opencv2/core/cvdef.h
modules/core/include/opencv2/core/cvdef.h
+11
-0
modules/core/include/opencv2/core/hal/intrin.hpp
modules/core/include/opencv2/core/hal/intrin.hpp
+30
-0
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
+7
-1
modules/core/include/opencv2/core/hal/intrin_neon.hpp
modules/core/include/opencv2/core/hal/intrin_neon.hpp
+5
-1
modules/core/include/opencv2/core/hal/intrin_sse.hpp
modules/core/include/opencv2/core/hal/intrin_sse.hpp
+5
-1
modules/core/include/opencv2/core/private.hpp
modules/core/include/opencv2/core/private.hpp
+8
-2
modules/core/include/opencv2/core/private/cv_cpu_include_simd_declarations.hpp
...opencv2/core/private/cv_cpu_include_simd_declarations.hpp
+30
-0
modules/core/src/mathfuncs_core.dispatch.cpp
modules/core/src/mathfuncs_core.dispatch.cpp
+215
-0
modules/core/src/mathfuncs_core.simd.hpp
modules/core/src/mathfuncs_core.simd.hpp
+115
-149
modules/world/CMakeLists.txt
modules/world/CMakeLists.txt
+1
-0
未找到文件。
cmake/OpenCVCompilerOptimizations.cmake
浏览文件 @
297ba853
...
...
@@ -275,6 +275,11 @@ set(CPU_BASELINE_FLAGS "")
set
(
CPU_BASELINE_FINAL
""
)
set
(
CPU_DISPATCH_FINAL
""
)
if
(
CV_DISABLE_OPTIMIZATION
)
set
(
CPU_DISPATCH
""
)
set
(
CPU_DISPATCH_REQUIRE
""
)
endif
()
macro
(
ocv_check_compiler_optimization OPT
)
if
(
NOT DEFINED CPU_
${
OPT
}
_SUPPORTED
)
if
((
DEFINED CPU_
${
OPT
}
_FLAGS_ON AND NOT
"x
${
CPU_
${
OPT
}
_FLAGS_ON
}
"
STREQUAL
"x"
)
OR CPU_
${
OPT
}
_TEST_FILE
)
...
...
@@ -319,7 +324,7 @@ macro(ocv_check_compiler_optimization OPT)
endmacro
()
foreach
(
OPT
${
CPU_KNOWN_OPTIMIZATIONS
}
)
set
(
CPU_
${
OPT
}
_USAGE_COUNT 0 CACHE INTERNAL
""
FORCE
)
set
(
CPU_
${
OPT
}
_USAGE_COUNT 0 CACHE INTERNAL
""
)
if
(
NOT DEFINED CPU_
${
OPT
}
_FORCE
)
set
(
CPU_
${
OPT
}
_FORCE
"
${
CPU_
${
OPT
}
_IMPLIES
}
"
)
endif
()
...
...
@@ -515,15 +520,27 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
endforeach
()
foreach
(
fname
${${
SOURCES_VAR_NAME
}}
)
string
(
TOLOWER
"
${
fname
}
"
fname_LOWER
)
if
(
fname_LOWER MATCHES
"[.]opt_.*[.]cpp$"
)
if
(
CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS
)
message
(
STATUS
"Excluding from source files list:
${
fname
}
"
)
if
(
fname_LOWER MATCHES
"
\\
.(.*)
\\
.cpp$"
)
string
(
TOUPPER
"
${
CMAKE_MATCH_1
}
"
OPT_
)
if
(
OPT_ MATCHES
"(CUDA.*|DISPATCH.*|OCL)"
)
# don't touch files like filename.cuda.cpp
list
(
APPEND __result
"
${
fname
}
"
)
#continue()
elseif
(
CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS
)
message
(
STATUS
"Excluding from source files list (optimization is disabled):
${
fname
}
"
)
#continue()
else
()
get_source_file_property
(
__definitions
"
${
fname
}
"
COMPILE_DEFINITIONS
)
if
(
__definitions
)
list
(
APPEND __definitions
"CV_CPU_DISPATCH_MODE=
${
OPT_
}
"
)
else
()
set
(
__definitions
"CV_CPU_DISPATCH_MODE=
${
OPT_
}
"
)
endif
()
set_source_files_properties
(
"
${
fname
}
"
PROPERTIES COMPILE_DEFINITIONS
"
${
__definitions
}
"
)
set
(
__opt_found 0
)
foreach
(
OPT
${
CPU_BASELINE_FINAL
}
)
string
(
TOLOWER
"
${
OPT
}
"
OPT_LOWER
)
if
(
fname_LOWER MATCHES
"
_
${
OPT_LOWER
}
[.]
cpp$"
)
if
(
fname_LOWER MATCHES
"
\\
.
${
OPT_LOWER
}
\\
.
cpp$"
)
#message("${fname} BASELINE-${OPT}")
set
(
__opt_found 1
)
list
(
APPEND __result
"
${
fname
}
"
)
...
...
@@ -533,11 +550,11 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
foreach
(
OPT
${
CPU_DISPATCH_FINAL
}
)
foreach
(
OPT2
${
CPU_DISPATCH_
${
OPT
}
_FORCED
}
)
string
(
TOLOWER
"
${
OPT2
}
"
OPT2_LOWER
)
if
(
fname_LOWER MATCHES
"
_
${
OPT2_LOWER
}
[.]
cpp$"
)
if
(
fname_LOWER MATCHES
"
\\
.
${
OPT2_LOWER
}
\\
.
cpp$"
)
list
(
APPEND __result_
${
OPT
}
"
${
fname
}
"
)
math
(
EXPR CPU_
${
OPT
}
_USAGE_COUNT
"
${
CPU_
${
OPT
}
_USAGE_COUNT
}
+1"
)
set
(
CPU_
${
OPT
}
_USAGE_COUNT
"
${
CPU_
${
OPT
}
_USAGE_COUNT
}
"
CACHE INTERNAL
""
FORCE
)
#message("${fname} ${OPT}")
#message("
(${CPU_${OPT}_USAGE_COUNT})
${fname} ${OPT}")
#message(" ${CPU_DISPATCH_${OPT}_INCLUDED}")
#message(" ${CPU_DISPATCH_DEFINITIONS_${OPT}}")
#message(" ${CPU_DISPATCH_FLAGS_${OPT}}")
...
...
@@ -573,7 +590,13 @@ macro(ocv_compiler_optimization_process_sources SOURCES_VAR_NAME LIBS_VAR_NAME T
list
(
APPEND __result
"$<TARGET_OBJECTS:
${
TARGET_BASE_NAME
}
_
${
OPT
}
>"
)
else
()
foreach
(
fname
${
__result_
${
OPT
}}
)
set_source_files_properties
(
"
${
fname
}
"
PROPERTIES COMPILE_DEFINITIONS
"
${
CPU_DISPATCH_DEFINITIONS_
${
OPT
}}
"
)
get_source_file_property
(
__definitions
"
${
fname
}
"
COMPILE_DEFINITIONS
)
if
(
__definitions
)
list
(
APPEND __definitions
"
${
CPU_DISPATCH_DEFINITIONS_
${
OPT
}}
"
)
else
()
set
(
__definitions
"
${
CPU_DISPATCH_DEFINITIONS_
${
OPT
}}
"
)
endif
()
set_source_files_properties
(
"
${
fname
}
"
PROPERTIES COMPILE_DEFINITIONS
"
${
__definitions
}
"
)
set_source_files_properties
(
"
${
fname
}
"
PROPERTIES COMPILE_FLAGS
"
${
CPU_DISPATCH_FLAGS_
${
OPT
}}
"
)
endforeach
()
list
(
APPEND __result
${
__result_
${
OPT
}}
)
...
...
@@ -620,18 +643,25 @@ macro(ocv_compiler_optimization_fill_cpu_config)
set
(
OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE
"
${
OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE
}
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_
${
OPT
}
# define CV_CPU_HAS_SUPPORT_
${
OPT
}
1
# define CV_CPU_CALL_
${
OPT
}
(
...) return __VA_ARGS__
# define CV_CPU_CALL_
${
OPT
}
(
fn, args) return (opt_
${
OPT
}
::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_
${
OPT
}
# define CV_CPU_HAS_SUPPORT_
${
OPT
}
(cv::checkHardwareSupport(CV_CPU_
${
OPT
}
))
# define CV_CPU_CALL_
${
OPT
}
(
...) if (CV_CPU_HAS_SUPPORT_
${
OPT
}
) return __VA_ARGS__
# define CV_CPU_CALL_
${
OPT
}
(
fn, args) if (CV_CPU_HAS_SUPPORT_
${
OPT
}
) return (opt_
${
OPT
}
::fn args)
#else
# define CV_CPU_HAS_SUPPORT_
${
OPT
}
0
# define CV_CPU_CALL_
${
OPT
}
(
...
)
# define CV_CPU_CALL_
${
OPT
}
(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_
${
OPT
}
(fn, args, mode, ...) CV_CPU_CALL_
${
OPT
}
(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
"
)
endif
()
endforeach
()
set
(
OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE
"
${
OPENCV_CPU_CONTROL_DEFINITIONS_CONFIGMAKE
}
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args) /* last in sequence */
"
)
set
(
__file
"
${
CMAKE_SOURCE_DIR
}
/modules/core/include/opencv2/core/cv_cpu_helper.h"
)
if
(
EXISTS
"
${
__file
}
"
)
file
(
READ
"
${
__file
}
"
__content
)
...
...
@@ -644,6 +674,57 @@ macro(ocv_compiler_optimization_fill_cpu_config)
endif
()
endmacro
()
macro
(
ocv_add_dispatched_file filename
)
if
(
NOT OPENCV_INITIAL_PASS
)
set
(
__codestr
"
#include
\"
precomp.hpp
\"
#include
\"
${
filename
}
.simd.hpp
\"
"
)
set
(
__declarations_str
"#define CV_CPU_SIMD_FILENAME
\"
${
filename
}
.simd.hpp
\"
"
)
set
(
__dispatch_modes
"BASELINE"
)
set
(
__optimizations
"
${
ARGN
}
"
)
if
(
CV_DISABLE_OPTIMIZATION OR NOT CV_ENABLE_INTRINSICS
)
set
(
__optimizations
""
)
endif
()
foreach
(
OPT
${
__optimizations
}
)
string
(
TOLOWER
"
${
OPT
}
"
OPT_LOWER
)
set
(
__file
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
filename
}
.
${
OPT_LOWER
}
.cpp"
)
if
(
EXISTS
"
${
__file
}
"
)
file
(
READ
"
${
__file
}
"
__content
)
endif
()
if
(
__content STREQUAL __codestr
)
#message(STATUS "${__file} contains up-to-date content")
else
()
file
(
WRITE
"
${
__file
}
"
"
${
__codestr
}
"
)
endif
()
list
(
APPEND OPENCV_MODULE_
${
the_module
}
_SOURCES_DISPATCHED
"
${
__file
}
"
)
set
(
__declarations_str
"
${
__declarations_str
}
#define CV_CPU_DISPATCH_MODE
${
OPT
}
#include
\"
opencv2/core/private/cv_cpu_include_simd_declarations.hpp
\"
"
)
set
(
__dispatch_modes
"
${
OPT
}
,
${
__dispatch_modes
}
"
)
endforeach
()
set
(
__declarations_str
"
${
__declarations_str
}
#define CV_CPU_DISPATCH_MODES_ALL
${
__dispatch_modes
}
"
)
set
(
__file
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
filename
}
.simd_declarations.hpp"
)
if
(
EXISTS
"
${
__file
}
"
)
file
(
READ
"
${
__file
}
"
__content
)
endif
()
if
(
__content STREQUAL __declarations_str
)
#message(STATUS "${__file} contains up-to-date content")
else
()
file
(
WRITE
"
${
__file
}
"
"
${
__declarations_str
}
"
)
endif
()
endif
()
endmacro
()
if
(
CV_DISABLE_OPTIMIZATION OR CV_ICC
)
ocv_update
(
CV_ENABLE_UNROLLED 0
)
else
()
...
...
cmake/OpenCVModule.cmake
浏览文件 @
297ba853
...
...
@@ -314,6 +314,7 @@ macro(ocv_glob_modules)
set
(
OPENCV_INITIAL_PASS OFF
)
if
(
${
BUILD_opencv_world
}
)
foreach
(
m
${
OPENCV_MODULES_BUILD
}
)
set
(
the_module
"
${
m
}
"
)
if
(
"
${
m
}
"
STREQUAL opencv_world
)
add_subdirectory
(
"
${
OPENCV_MODULE_opencv_world_LOCATION
}
"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/world"
)
elseif
(
NOT OPENCV_MODULE_
${
m
}
_IS_PART_OF_WORLD AND NOT
${
m
}
STREQUAL opencv_world
)
...
...
@@ -329,6 +330,7 @@ macro(ocv_glob_modules)
endforeach
()
else
()
foreach
(
m
${
OPENCV_MODULES_BUILD
}
)
set
(
the_module
"
${
m
}
"
)
if
(
m MATCHES
"^opencv_"
)
string
(
REGEX REPLACE
"^opencv_"
""
__shortname
"
${
m
}
"
)
add_subdirectory
(
"
${
OPENCV_MODULE_
${
m
}
_LOCATION
}
"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
__shortname
}
"
)
...
...
@@ -646,11 +648,13 @@ macro(ocv_set_module_sources)
ocv_get_module_external_sources
()
endif
()
if
(
OPENCV_MODULE_
${
the_module
}
_SOURCES_DISPATCHED
)
list
(
APPEND OPENCV_MODULE_
${
the_module
}
_SOURCES
${
OPENCV_MODULE_
${
the_module
}
_SOURCES_DISPATCHED
}
)
endif
()
# use full paths for module to be independent from the module location
ocv_convert_to_full_paths
(
OPENCV_MODULE_
${
the_module
}
_HEADERS
)
ocv_compiler_optimization_process_sources
(
OPENCV_MODULE_
${
the_module
}
_SOURCES OPENCV_MODULE_
${
the_module
}
_DEPS_EXT
${
the_module
}
)
set
(
OPENCV_MODULE_
${
the_module
}
_HEADERS
${
OPENCV_MODULE_
${
the_module
}
_HEADERS
}
CACHE INTERNAL
"List of header files for
${
the_module
}
"
)
set
(
OPENCV_MODULE_
${
the_module
}
_SOURCES
${
OPENCV_MODULE_
${
the_module
}
_SOURCES
}
CACHE INTERNAL
"List of source files for
${
the_module
}
"
)
endmacro
()
...
...
@@ -766,6 +770,11 @@ macro(ocv_create_module)
endmacro
()
macro
(
_ocv_create_module
)
ocv_compiler_optimization_process_sources
(
OPENCV_MODULE_
${
the_module
}
_SOURCES OPENCV_MODULE_
${
the_module
}
_DEPS_EXT
${
the_module
}
)
set
(
OPENCV_MODULE_
${
the_module
}
_HEADERS
${
OPENCV_MODULE_
${
the_module
}
_HEADERS
}
CACHE INTERNAL
"List of header files for
${
the_module
}
"
)
set
(
OPENCV_MODULE_
${
the_module
}
_SOURCES
${
OPENCV_MODULE_
${
the_module
}
_SOURCES
}
CACHE INTERNAL
"List of source files for
${
the_module
}
"
)
# The condition we ought to be testing here is whether ocv_add_precompiled_headers will
# be called at some point in the future. We can't look into the future, though,
# so this will have to do.
...
...
cmake/OpenCVPCHSupport.cmake
浏览文件 @
297ba853
...
...
@@ -288,11 +288,12 @@ MACRO(ADD_PRECOMPILED_HEADER _targetName _input)
foreach
(
src
${
_sources
}
)
if
(
NOT
"
${
src
}
"
MATCHES
"
\\
.mm$"
)
get_source_file_property
(
oldProps
"
${
src
}
"
COMPILE_FLAGS
)
if
(
NOT oldProps
)
get_source_file_property
(
oldProps2
"
${
src
}
"
COMPILE_DEFINITIONS
)
if
(
NOT oldProps AND NOT oldProps2
)
set
(
newProperties
"-include
\"
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
_name
}
\"
"
)
set_source_files_properties
(
"
${
src
}
"
PROPERTIES COMPILE_FLAGS
"
${
newProperties
}
"
)
else
()
ocv_debug_message
(
"Skip PCH, flags:
${
oldProps
}
, file:
${
src
}
"
)
ocv_debug_message
(
"Skip PCH, flags:
${
oldProps
}
defines:
${
oldProps2
}
, file:
${
src
}
"
)
endif
()
endif
()
endforeach
()
...
...
@@ -339,11 +340,12 @@ MACRO(ADD_NATIVE_PRECOMPILED_HEADER _targetName _input)
AND NOT
"
${
src
}
"
MATCHES
"^
\$
"
# CMake generator expressions
)
get_source_file_property
(
oldProps
"
${
src
}
"
COMPILE_FLAGS
)
if
(
NOT oldProps
)
get_source_file_property
(
oldProps2
"
${
src
}
"
COMPILE_DEFINITIONS
)
if
(
NOT oldProps AND NOT oldProps2
)
set
(
newProperties
"/Yu
\"
${
_input
}
\"
/FI
\"
${
_input
}
\"
"
)
set_source_files_properties
(
"
${
src
}
"
PROPERTIES COMPILE_FLAGS
"
${
newProperties
}
"
)
else
()
ocv_debug_message
(
"Skip PCH, flags:
${
oldProps
}
, file:
${
src
}
"
)
ocv_debug_message
(
"Skip PCH, flags:
${
oldProps
}
defines:
${
oldProps2
}
, file:
${
src
}
"
)
endif
()
endif
()
endforeach
()
...
...
modules/core/CMakeLists.txt
浏览文件 @
297ba853
set
(
the_description
"The Core Functionality"
)
ocv_add_dispatched_file
(
mathfuncs_core SSE2 AVX AVX2
)
ocv_add_module
(
core
"
${
OPENCV_HAL_LINKER_LIBS
}
"
OPTIONAL opencv_cudev
...
...
modules/core/include/opencv2/core/cv_cpu_dispatch.h
浏览文件 @
297ba853
...
...
@@ -7,6 +7,23 @@
#include "cv_cpu_config.h"
#include "cv_cpu_helper.h"
#ifdef CV_CPU_DISPATCH_MODE
#define CV_CPU_OPTIMIZATION_NAMESPACE __CV_CAT(opt_, CV_CPU_DISPATCH_MODE)
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
#else
#define CV_CPU_OPTIMIZATION_NAMESPACE cpu_baseline
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace cpu_baseline {
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
#endif
#define __CV_CPU_DISPATCH_CHAIN_END(fn, args, mode, ...)
/* done */
#define __CV_CPU_DISPATCH(fn, args, mode, ...) __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define __CV_CPU_DISPATCH_EXPAND(fn, args, ...) __CV_EXPAND(__CV_CPU_DISPATCH(fn, args, __VA_ARGS__))
#define CV_CPU_DISPATCH(fn, args, ...) __CV_CPU_DISPATCH_EXPAND(fn, args, __VA_ARGS__, END) // expand macros
#if defined CV_ENABLE_INTRINSICS \
&& !defined CV_DISABLE_OPTIMIZATION \
&& !defined __CUDACC__
/* do not include SSE/AVX/NEON headers for NVCC compiler */
\
...
...
@@ -76,6 +93,16 @@
#endif // CV_ENABLE_INTRINSICS && !CV_DISABLE_OPTIMIZATION && !__CUDACC__
#if defined CV_CPU_COMPILE_AVX && !defined CV_CPU_BASELINE_COMPILE_AVX
struct
VZeroUpperGuard
{
#ifdef __GNUC__
__attribute__
((
always_inline
))
#endif
inline
~
VZeroUpperGuard
()
{
_mm256_zeroupper
();
}
};
#define __CV_AVX_GUARD VZeroUpperGuard __vzeroupper_guard;
#endif
#endif // __OPENCV_BUILD
...
...
modules/core/include/opencv2/core/cv_cpu_helper.h
浏览文件 @
297ba853
...
...
@@ -2,132 +2,147 @@
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE
# define CV_CPU_HAS_SUPPORT_SSE 1
# define CV_CPU_CALL_SSE(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE(
fn, args) return (opt_SSE::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE
# define CV_CPU_HAS_SUPPORT_SSE (cv::checkHardwareSupport(CV_CPU_SSE))
# define CV_CPU_CALL_SSE(
...) if (CV_CPU_HAS_SUPPORT_SSE) return __VA_ARGS__
# define CV_CPU_CALL_SSE(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE) return (opt_SSE::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE 0
# define CV_CPU_CALL_SSE(
...
)
# define CV_CPU_CALL_SSE(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE(fn, args, mode, ...) CV_CPU_CALL_SSE(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE2
# define CV_CPU_HAS_SUPPORT_SSE2 1
# define CV_CPU_CALL_SSE2(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE2(
fn, args) return (opt_SSE2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE2
# define CV_CPU_HAS_SUPPORT_SSE2 (cv::checkHardwareSupport(CV_CPU_SSE2))
# define CV_CPU_CALL_SSE2(
...) if (CV_CPU_HAS_SUPPORT_SSE2) return __VA_ARGS__
# define CV_CPU_CALL_SSE2(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE2) return (opt_SSE2::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE2 0
# define CV_CPU_CALL_SSE2(
...
)
# define CV_CPU_CALL_SSE2(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE2(fn, args, mode, ...) CV_CPU_CALL_SSE2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE3
# define CV_CPU_HAS_SUPPORT_SSE3 1
# define CV_CPU_CALL_SSE3(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE3(
fn, args) return (opt_SSE3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE3
# define CV_CPU_HAS_SUPPORT_SSE3 (cv::checkHardwareSupport(CV_CPU_SSE3))
# define CV_CPU_CALL_SSE3(
...) if (CV_CPU_HAS_SUPPORT_SSE3) return __VA_ARGS__
# define CV_CPU_CALL_SSE3(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE3) return (opt_SSE3::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE3 0
# define CV_CPU_CALL_SSE3(
...
)
# define CV_CPU_CALL_SSE3(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE3(fn, args, mode, ...) CV_CPU_CALL_SSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSSE3
# define CV_CPU_HAS_SUPPORT_SSSE3 1
# define CV_CPU_CALL_SSSE3(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSSE3(
fn, args) return (opt_SSSE3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSSE3
# define CV_CPU_HAS_SUPPORT_SSSE3 (cv::checkHardwareSupport(CV_CPU_SSSE3))
# define CV_CPU_CALL_SSSE3(
...) if (CV_CPU_HAS_SUPPORT_SSSE3) return __VA_ARGS__
# define CV_CPU_CALL_SSSE3(
fn, args) if (CV_CPU_HAS_SUPPORT_SSSE3) return (opt_SSSE3::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSSE3 0
# define CV_CPU_CALL_SSSE3(
...
)
# define CV_CPU_CALL_SSSE3(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSSE3(fn, args, mode, ...) CV_CPU_CALL_SSSE3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_1
# define CV_CPU_HAS_SUPPORT_SSE4_1 1
# define CV_CPU_CALL_SSE4_1(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE4_1(
fn, args) return (opt_SSE4_1::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_1
# define CV_CPU_HAS_SUPPORT_SSE4_1 (cv::checkHardwareSupport(CV_CPU_SSE4_1))
# define CV_CPU_CALL_SSE4_1(
...) if (CV_CPU_HAS_SUPPORT_SSE4_1) return __VA_ARGS__
# define CV_CPU_CALL_SSE4_1(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_1) return (opt_SSE4_1::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE4_1 0
# define CV_CPU_CALL_SSE4_1(
...
)
# define CV_CPU_CALL_SSE4_1(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE4_1(fn, args, mode, ...) CV_CPU_CALL_SSE4_1(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_SSE4_2
# define CV_CPU_HAS_SUPPORT_SSE4_2 1
# define CV_CPU_CALL_SSE4_2(
...) return __VA_ARGS__
# define CV_CPU_CALL_SSE4_2(
fn, args) return (opt_SSE4_2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_SSE4_2
# define CV_CPU_HAS_SUPPORT_SSE4_2 (cv::checkHardwareSupport(CV_CPU_SSE4_2))
# define CV_CPU_CALL_SSE4_2(
...) if (CV_CPU_HAS_SUPPORT_SSE4_2) return __VA_ARGS__
# define CV_CPU_CALL_SSE4_2(
fn, args) if (CV_CPU_HAS_SUPPORT_SSE4_2) return (opt_SSE4_2::fn args)
#else
# define CV_CPU_HAS_SUPPORT_SSE4_2 0
# define CV_CPU_CALL_SSE4_2(
...
)
# define CV_CPU_CALL_SSE4_2(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_SSE4_2(fn, args, mode, ...) CV_CPU_CALL_SSE4_2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_POPCNT
# define CV_CPU_HAS_SUPPORT_POPCNT 1
# define CV_CPU_CALL_POPCNT(
...) return __VA_ARGS__
# define CV_CPU_CALL_POPCNT(
fn, args) return (opt_POPCNT::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_POPCNT
# define CV_CPU_HAS_SUPPORT_POPCNT (cv::checkHardwareSupport(CV_CPU_POPCNT))
# define CV_CPU_CALL_POPCNT(
...) if (CV_CPU_HAS_SUPPORT_POPCNT) return __VA_ARGS__
# define CV_CPU_CALL_POPCNT(
fn, args) if (CV_CPU_HAS_SUPPORT_POPCNT) return (opt_POPCNT::fn args)
#else
# define CV_CPU_HAS_SUPPORT_POPCNT 0
# define CV_CPU_CALL_POPCNT(
...
)
# define CV_CPU_CALL_POPCNT(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_POPCNT(fn, args, mode, ...) CV_CPU_CALL_POPCNT(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX
# define CV_CPU_HAS_SUPPORT_AVX 1
# define CV_CPU_CALL_AVX(
...) return __VA_ARGS__
# define CV_CPU_CALL_AVX(
fn, args) return (opt_AVX::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX
# define CV_CPU_HAS_SUPPORT_AVX (cv::checkHardwareSupport(CV_CPU_AVX))
# define CV_CPU_CALL_AVX(
...) if (CV_CPU_HAS_SUPPORT_AVX) return __VA_ARGS__
# define CV_CPU_CALL_AVX(
fn, args) if (CV_CPU_HAS_SUPPORT_AVX) return (opt_AVX::fn args)
#else
# define CV_CPU_HAS_SUPPORT_AVX 0
# define CV_CPU_CALL_AVX(
...
)
# define CV_CPU_CALL_AVX(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX(fn, args, mode, ...) CV_CPU_CALL_AVX(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FP16
# define CV_CPU_HAS_SUPPORT_FP16 1
# define CV_CPU_CALL_FP16(
...) return __VA_ARGS__
# define CV_CPU_CALL_FP16(
fn, args) return (opt_FP16::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FP16
# define CV_CPU_HAS_SUPPORT_FP16 (cv::checkHardwareSupport(CV_CPU_FP16))
# define CV_CPU_CALL_FP16(
...) if (CV_CPU_HAS_SUPPORT_FP16) return __VA_ARGS__
# define CV_CPU_CALL_FP16(
fn, args) if (CV_CPU_HAS_SUPPORT_FP16) return (opt_FP16::fn args)
#else
# define CV_CPU_HAS_SUPPORT_FP16 0
# define CV_CPU_CALL_FP16(
...
)
# define CV_CPU_CALL_FP16(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_FP16(fn, args, mode, ...) CV_CPU_CALL_FP16(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_AVX2
# define CV_CPU_HAS_SUPPORT_AVX2 1
# define CV_CPU_CALL_AVX2(
...) return __VA_ARGS__
# define CV_CPU_CALL_AVX2(
fn, args) return (opt_AVX2::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_AVX2
# define CV_CPU_HAS_SUPPORT_AVX2 (cv::checkHardwareSupport(CV_CPU_AVX2))
# define CV_CPU_CALL_AVX2(
...) if (CV_CPU_HAS_SUPPORT_AVX2) return __VA_ARGS__
# define CV_CPU_CALL_AVX2(
fn, args) if (CV_CPU_HAS_SUPPORT_AVX2) return (opt_AVX2::fn args)
#else
# define CV_CPU_HAS_SUPPORT_AVX2 0
# define CV_CPU_CALL_AVX2(
...
)
# define CV_CPU_CALL_AVX2(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_AVX2(fn, args, mode, ...) CV_CPU_CALL_AVX2(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_FMA3
# define CV_CPU_HAS_SUPPORT_FMA3 1
# define CV_CPU_CALL_FMA3(
...) return __VA_ARGS__
# define CV_CPU_CALL_FMA3(
fn, args) return (opt_FMA3::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_FMA3
# define CV_CPU_HAS_SUPPORT_FMA3 (cv::checkHardwareSupport(CV_CPU_FMA3))
# define CV_CPU_CALL_FMA3(
...) if (CV_CPU_HAS_SUPPORT_FMA3) return __VA_ARGS__
# define CV_CPU_CALL_FMA3(
fn, args) if (CV_CPU_HAS_SUPPORT_FMA3) return (opt_FMA3::fn args)
#else
# define CV_CPU_HAS_SUPPORT_FMA3 0
# define CV_CPU_CALL_FMA3(
...
)
# define CV_CPU_CALL_FMA3(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_FMA3(fn, args, mode, ...) CV_CPU_CALL_FMA3(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#if !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_COMPILE_NEON
# define CV_CPU_HAS_SUPPORT_NEON 1
# define CV_CPU_CALL_NEON(
...) return __VA_ARGS__
# define CV_CPU_CALL_NEON(
fn, args) return (opt_NEON::fn args)
#elif !defined CV_DISABLE_OPTIMIZATION && defined CV_ENABLE_INTRINSICS && defined CV_CPU_DISPATCH_COMPILE_NEON
# define CV_CPU_HAS_SUPPORT_NEON (cv::checkHardwareSupport(CV_CPU_NEON))
# define CV_CPU_CALL_NEON(
...) if (CV_CPU_HAS_SUPPORT_NEON) return __VA_ARGS__
# define CV_CPU_CALL_NEON(
fn, args) if (CV_CPU_HAS_SUPPORT_NEON) return (opt_NEON::fn args)
#else
# define CV_CPU_HAS_SUPPORT_NEON 0
# define CV_CPU_CALL_NEON(
...
)
# define CV_CPU_CALL_NEON(
fn, args
)
#endif
#define __CV_CPU_DISPATCH_CHAIN_NEON(fn, args, mode, ...) CV_CPU_CALL_NEON(fn, args); __CV_EXPAND(__CV_CPU_DISPATCH_CHAIN_ ## mode(fn, args, __VA_ARGS__))
#define CV_CPU_CALL_BASELINE(fn, args) return (cpu_baseline::fn args)
#define __CV_CPU_DISPATCH_CHAIN_BASELINE(fn, args, mode, ...) CV_CPU_CALL_BASELINE(fn, args)
/* last in sequence */
modules/core/include/opencv2/core/cvdef.h
浏览文件 @
297ba853
...
...
@@ -52,6 +52,17 @@
#include "cvconfig.h"
#endif
#ifndef __CV_EXPAND
#define __CV_EXPAND(x) x
#endif
#ifndef __CV_CAT
#define __CV_CAT__(x, y) x ## y
#define __CV_CAT_(x, y) __CV_CAT__(x, y)
#define __CV_CAT(x, y) __CV_CAT_(x, y)
#endif
#if !defined _CRT_SECURE_NO_DEPRECATE && defined _MSC_VER && _MSC_VER > 1300
# define _CRT_SECURE_NO_DEPRECATE
/* to avoid multiple Visual Studio warnings */
#endif
...
...
modules/core/include/opencv2/core/hal/intrin.hpp
浏览文件 @
297ba853
...
...
@@ -60,6 +60,25 @@
// access from within opencv code more accessible
namespace
cv
{
#ifndef CV_DOXYGEN
#ifdef CV_CPU_DISPATCH_MODE
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE __CV_CAT(hal_, CV_CPU_DISPATCH_MODE)
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace __CV_CAT(hal_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#else
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE hal_baseline
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN namespace hal_baseline {
#define CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END }
#endif
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
using
namespace
CV_CPU_OPTIMIZATION_HAL_NAMESPACE
;
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#endif
//! @addtogroup core_hal_intrin
//! @{
...
...
@@ -281,6 +300,9 @@ template <typename T> struct V_SIMD128Traits
//! @}
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
}
#ifdef CV_DOXYGEN
...
...
@@ -323,6 +345,10 @@ template <typename T> struct V_SIMD128Traits
namespace
cv
{
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#endif
template
<
typename
R
>
struct
V_RegTrait128
;
template
<
>
struct
V_RegTrait128
<
uchar
>
{
...
...
@@ -407,6 +433,10 @@ template <> struct V_RegTrait128<double> {
};
#endif
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
}
// cv::
//! @endcond
...
...
modules/core/include/opencv2/core/hal/intrin_cpp.hpp
浏览文件 @
297ba853
...
...
@@ -53,6 +53,10 @@
namespace
cv
{
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#endif
/** @addtogroup core_hal_intrin
"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on
...
...
@@ -1827,7 +1831,9 @@ static inline bool hasSIMD128()
//! @}
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
}
#endif
modules/core/include/opencv2/core/hal/intrin_neon.hpp
浏览文件 @
297ba853
...
...
@@ -53,6 +53,8 @@ namespace cv
//! @cond IGNORED
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
#define CV_SIMD128 1
#if defined(__aarch64__)
#define CV_SIMD128_64F 1
...
...
@@ -1238,11 +1240,13 @@ inline v_float16x4 v_cvt_f16(const v_float32x4& a)
//! @brief Check CPU capability of SIMD operation
static
inline
bool
hasSIMD128
()
{
return
checkHardwareSupport
(
CV_CPU_NEON
)
;
return
(
CV_CPU_HAS_SUPPORT_NEON
)
?
true
:
false
;
}
//! @}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
}
...
...
modules/core/include/opencv2/core/hal/intrin_sse.hpp
浏览文件 @
297ba853
...
...
@@ -56,6 +56,8 @@ namespace cv
//! @cond IGNORED
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_BEGIN
struct
v_uint8x16
{
typedef
uchar
lane_type
;
...
...
@@ -1791,11 +1793,13 @@ inline v_float16x4 v_cvt_f16(const v_float32x4& a)
//! @brief Check CPU capability of SIMD operation
static
inline
bool
hasSIMD128
()
{
return
checkHardwareSupport
(
CV_CPU_SSE2
)
;
return
(
CV_CPU_HAS_SUPPORT_SSE2
)
?
true
:
false
;
}
//! @}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
}
...
...
modules/core/include/opencv2/core/private.hpp
浏览文件 @
297ba853
...
...
@@ -540,7 +540,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
///// General instrumentation
// General OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION
()
CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
#define CV_INSTRUMENT_REGION
_()
CV_INSTRUMENT_REGION_META(__FUNCTION__, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
// Custom OpenCV region instrumentation macro
#define CV_INSTRUMENT_REGION_NAME(NAME) CV_INSTRUMENT_REGION_CUSTOM_META(NAME, false, ::cv::instr::TYPE_GENERAL, ::cv::instr::IMPL_PLAIN)
// Instrumentation for parallel_for_ or other regions which forks and gathers threads
...
...
@@ -566,7 +566,7 @@ CV_EXPORTS InstrNode* getCurrentNode();
#else
#define CV_INSTRUMENT_REGION_META(...)
#define CV_INSTRUMENT_REGION()
#define CV_INSTRUMENT_REGION
_
()
#define CV_INSTRUMENT_REGION_NAME(...)
#define CV_INSTRUMENT_REGION_MT_FORK()
...
...
@@ -580,6 +580,12 @@ CV_EXPORTS InstrNode* getCurrentNode();
#define CV_INSTRUMENT_MARK_OPENCL(...)
#endif
#ifdef __CV_AVX_GUARD
#define CV_INSTRUMENT_REGION() __CV_AVX_GUARD CV_INSTRUMENT_REGION_()
#else
#define CV_INSTRUMENT_REGION() CV_INSTRUMENT_REGION_()
#endif
//! @endcond
#endif // OPENCV_CORE_PRIVATE_HPP
modules/core/include/opencv2/core/private/cv_cpu_include_simd_declarations.hpp
0 → 100644
浏览文件 @
297ba853
// Helper file to include dispatched functions declaration:
//
// Usage:
// #define CV_CPU_SIMD_FILENAME "<filename>.simd.hpp"
// #define CV_CPU_DISPATCH_MODE AVX2
// #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
// #define CV_CPU_DISPATCH_MODE SSE2
// #include "opencv2/core/private/cv_cpu_include_simd_declarations.hpp"
#ifndef CV_DISABLE_OPTIMIZATION
#ifdef _MSC_VER
#pragma warning(disable: 4702) // unreachable code
#endif
#endif
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
#define CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
#endif
#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
#undef CV_CPU_OPTIMIZATION_NAMESPACE_END
#define CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN namespace __CV_CAT(opt_, CV_CPU_DISPATCH_MODE) {
#define CV_CPU_OPTIMIZATION_NAMESPACE_END }
#include CV_CPU_SIMD_FILENAME
#undef CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
#undef CV_CPU_OPTIMIZATION_NAMESPACE_END
#undef CV_CPU_DISPATCH_MODE
modules/core/src/mathfuncs_core.dispatch.cpp
0 → 100644
浏览文件 @
297ba853
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
#include "precomp.hpp"
#include "mathfuncs_core.simd.hpp"
#include "mathfuncs_core.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
namespace
cv
{
namespace
hal
{
///////////////////////////////////// ATAN2 ////////////////////////////////////
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan32f
,
cv_hal_fastAtan32f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
CV_CPU_DISPATCH
(
fastAtan32f
,
(
Y
,
X
,
angle
,
len
,
angleInDegrees
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan64f
,
cv_hal_fastAtan64f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
CV_CPU_DISPATCH
(
fastAtan64f
,
(
Y
,
X
,
angle
,
len
,
angleInDegrees
),
CV_CPU_DISPATCH_MODES_ALL
);
}
// deprecated
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
fastAtan32f
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
void
magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
mag
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude32f
,
cv_hal_magnitude32f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_32f
,
x
,
y
,
mag
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
magnitude32f
,
(
x
,
y
,
mag
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
mag
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude64f
,
cv_hal_magnitude64f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_64f
,
x
,
y
,
mag
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
magnitude64f
,
(
x
,
y
,
mag
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
invSqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt32f
,
cv_hal_invSqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
invSqrt32f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
invSqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt64f
,
cv_hal_invSqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
invSqrt64f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt32f
,
cv_hal_sqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
sqrt32f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
sqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt64f
,
cv_hal_sqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
CV_CPU_DISPATCH
(
sqrt64f
,
(
src
,
dst
,
len
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
exp32f
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
exp32f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
exp64f
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
exp64f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
log32f
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
log32f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
void
log64f
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
src
,
dst
,
n
)
>=
0
);
CV_CPU_DISPATCH
(
log64f
,
(
src
,
dst
,
n
),
CV_CPU_DISPATCH_MODES_ALL
);
}
//=============================================================================
// for compatibility with 3.0
void
exp
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
exp32f
(
src
,
dst
,
n
);
}
void
exp
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
exp64f
(
src
,
dst
,
n
);
}
void
log
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
log32f
(
src
,
dst
,
n
);
}
void
log
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
log64f
(
src
,
dst
,
n
);
}
void
magnitude
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
n
)
{
magnitude32f
(
x
,
y
,
dst
,
n
);
}
void
magnitude
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
n
)
{
magnitude64f
(
x
,
y
,
dst
,
n
);
}
void
sqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
sqrt32f
(
src
,
dst
,
len
);
}
void
sqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
sqrt64f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
invSqrt32f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
invSqrt64f
(
src
,
dst
,
len
);
}
}}
// namespace cv::hal::
float
cv
::
fastAtan2
(
float
y
,
float
x
)
{
using
namespace
cv
::
hal
;
CV_CPU_CALL_BASELINE
(
fastAtan2
,
(
y
,
x
));
}
modules/core/src/mathfuncs_core.
c
pp
→
modules/core/src/mathfuncs_core.
simd.h
pp
浏览文件 @
297ba853
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
namespace
cv
{
namespace
hal
{
CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
// forward declarations
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
);
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
);
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
);
void
magnitude32f
(
const
float
*
x
,
const
float
*
y
,
float
*
mag
,
int
len
);
void
magnitude64f
(
const
double
*
x
,
const
double
*
y
,
double
*
mag
,
int
len
);
void
invSqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
);
void
invSqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
);
void
sqrt32f
(
const
float
*
src
,
float
*
dst
,
int
len
);
void
sqrt64f
(
const
double
*
src
,
double
*
dst
,
int
len
);
void
exp32f
(
const
float
*
src
,
float
*
dst
,
int
n
);
void
exp64f
(
const
double
*
src
,
double
*
dst
,
int
n
);
void
log32f
(
const
float
*
src
,
float
*
dst
,
int
n
);
void
log64f
(
const
double
*
src
,
double
*
dst
,
int
n
);
float
fastAtan2
(
float
y
,
float
x
);
#ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
using
namespace
std
;
...
...
@@ -197,23 +180,17 @@ static inline void atanImpl(const T *Y, const T *X, T *angle, int len, bool angl
}
// anonymous::
namespace
cv
{
namespace
hal
{
///////////////////////////////////// ATAN2 ////////////////////////////////////
void
fastAtan32f
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan32f
,
cv_hal_fastAtan32f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
atanImpl
<
float
>
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
void
fastAtan64f
(
const
double
*
Y
,
const
double
*
X
,
double
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
fastAtan64f
,
cv_hal_fastAtan64f
,
Y
,
X
,
angle
,
len
,
angleInDegrees
);
atanImpl
<
double
>
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
...
...
@@ -221,7 +198,6 @@ void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool
void
fastAtan2
(
const
float
*
Y
,
const
float
*
X
,
float
*
angle
,
int
len
,
bool
angleInDegrees
)
{
CV_INSTRUMENT_REGION
()
fastAtan32f
(
Y
,
X
,
angle
,
len
,
angleInDegrees
);
}
...
...
@@ -229,9 +205,6 @@ void magnitude32f(const float* x, const float* y, float* mag, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude32f
,
cv_hal_magnitude32f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_32f
,
x
,
y
,
mag
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -257,9 +230,6 @@ void magnitude64f(const double* x, const double* y, double* mag, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
magnitude64f
,
cv_hal_magnitude64f
,
x
,
y
,
mag
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsMagnitude_64f
,
x
,
y
,
mag
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128_64F
...
...
@@ -286,9 +256,6 @@ void invSqrt32f(const float* src, float* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt32f
,
cv_hal_invSqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -310,9 +277,6 @@ void invSqrt64f(const double* src, double* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
invSqrt64f
,
cv_hal_invSqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsInvSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SSE2
...
...
@@ -330,9 +294,6 @@ void sqrt32f(const float* src, float* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt32f
,
cv_hal_sqrt32f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_32f_A21
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128
...
...
@@ -354,9 +315,6 @@ void sqrt64f(const double* src, double* dst, int len)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
sqrt64f
,
cv_hal_sqrt64f
,
src
,
dst
,
len
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsSqrt_64f_A50
,
src
,
dst
,
len
)
>=
0
);
int
i
=
0
;
#if CV_SIMD128_64F
...
...
@@ -381,9 +339,6 @@ void exp32f(const float *src, float *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
exp
(
src
[
i
]);
...
...
@@ -394,9 +349,6 @@ void exp64f(const double *src, double *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
exp
(
src
[
i
]);
...
...
@@ -407,9 +359,6 @@ void log32f(const float *src, float *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
log
(
src
[
i
]);
...
...
@@ -419,9 +368,6 @@ void log64f(const double *src, double *dst, int n)
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
src
,
dst
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
src
,
dst
,
n
)
>=
0
);
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
dst
[
i
]
=
std
::
log
(
src
[
i
]);
...
...
@@ -534,9 +480,6 @@ void exp32f( const float *_x, float *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp32f
,
cv_hal_exp32f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_32f_A21
,
_x
,
y
,
n
)
>=
0
);
static
const
float
A4
=
(
float
)(
1.000000000000002438532970795181890933776
/
EXPPOLY_32F_A0
),
A3
=
(
float
)(
.6931471805521448196800669615864773144641
/
EXPPOLY_32F_A0
),
...
...
@@ -551,7 +494,90 @@ void exp32f( const float *_x, float *y, int n )
const
Cv32suf
*
x
=
(
const
Cv32suf
*
)
_x
;
Cv32suf
buf
[
4
];
#if CV_SSE2
#if CV_AVX2
if
(
n
>=
8
)
{
static
const
__m256d
prescale4
=
_mm256_set1_pd
(
exp_prescale
);
static
const
__m256
postscale8
=
_mm256_set1_ps
((
float
)
exp_postscale
);
static
const
__m128
maxval4
=
_mm_set1_ps
((
float
)(
exp_max_val
/
exp_prescale
));
static
const
__m128
minval4
=
_mm_set1_ps
((
float
)(
-
exp_max_val
/
exp_prescale
));
static
const
__m256
mA1
=
_mm256_set1_ps
(
A1
);
static
const
__m256
mA2
=
_mm256_set1_ps
(
A2
);
static
const
__m256
mA3
=
_mm256_set1_ps
(
A3
);
static
const
__m256
mA4
=
_mm256_set1_ps
(
A4
);
bool
y_aligned
=
(
size_t
)(
void
*
)
y
%
32
==
0
;
ushort
CV_DECL_ALIGNED
(
32
)
tab_idx
[
16
];
for
(
;
i
<=
n
-
8
;
i
+=
8
)
{
__m128i
xi0
,
xi1
;
__m256d
xd0
=
_mm256_cvtps_pd
(
_mm_min_ps
(
_mm_max_ps
(
_mm_loadu_ps
(
&
x
[
i
].
f
),
minval4
),
maxval4
));
__m256d
xd1
=
_mm256_cvtps_pd
(
_mm_min_ps
(
_mm_max_ps
(
_mm_loadu_ps
(
&
x
[
i
+
4
].
f
),
minval4
),
maxval4
));
xd0
=
_mm256_mul_pd
(
xd0
,
prescale4
);
xd1
=
_mm256_mul_pd
(
xd1
,
prescale4
);
xi0
=
_mm256_cvtpd_epi32
(
xd0
);
xi1
=
_mm256_cvtpd_epi32
(
xd1
);
xd0
=
_mm256_sub_pd
(
xd0
,
_mm256_cvtepi32_pd
(
xi0
));
xd1
=
_mm256_sub_pd
(
xd1
,
_mm256_cvtepi32_pd
(
xi1
));
// gcc does not support _mm256_set_m128
//xf = _mm256_set_m128(_mm256_cvtpd_ps(xd1), _mm256_cvtpd_ps(xd0));
__m256
xf
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
_mm256_cvtpd_ps
(
xd0
)),
_mm256_cvtpd_ps
(
xd1
),
1
);
xf
=
_mm256_mul_ps
(
xf
,
postscale8
);
xi0
=
_mm_packs_epi32
(
xi0
,
xi1
);
_mm_store_si128
((
__m128i
*
)
tab_idx
,
_mm_and_si128
(
xi0
,
_mm_set1_epi16
(
EXPTAB_MASK
)));
xi0
=
_mm_add_epi16
(
_mm_srai_epi16
(
xi0
,
EXPTAB_SCALE
),
_mm_set1_epi16
(
127
));
xi0
=
_mm_max_epi16
(
xi0
,
_mm_setzero_si128
());
xi0
=
_mm_min_epi16
(
xi0
,
_mm_set1_epi16
(
255
));
xi1
=
_mm_unpackhi_epi16
(
xi0
,
_mm_setzero_si128
());
xi0
=
_mm_unpacklo_epi16
(
xi0
,
_mm_setzero_si128
());
__m256d
yd0
=
_mm256_set_pd
(
expTab
[
tab_idx
[
3
]],
expTab
[
tab_idx
[
2
]],
expTab
[
tab_idx
[
1
]],
expTab
[
tab_idx
[
0
]]);
__m256d
yd1
=
_mm256_set_pd
(
expTab
[
tab_idx
[
7
]],
expTab
[
tab_idx
[
6
]],
expTab
[
tab_idx
[
5
]],
expTab
[
tab_idx
[
4
]]);
// gcc does not support _mm256_set_m128
//__m256 yf = _mm256_set_m128(_mm256_cvtpd_ps(yd1), _mm256_cvtpd_ps(yd0));
__m256
yf
=
_mm256_insertf128_ps
(
_mm256_castps128_ps256
(
_mm256_cvtpd_ps
(
yd0
)),
_mm256_cvtpd_ps
(
yd1
),
1
);
//_mm256_set_m128i(xi1, xi0)
__m256i
temp
=
(
__m256i
)
_mm256_insertf128_ps
(
_mm256_castps128_ps256
((
__m128
)
xi0
),
(
__m128
)
xi1
,
1
);
yf
=
_mm256_mul_ps
(
yf
,
_mm256_castsi256_ps
(
_mm256_slli_epi32
(
temp
,
23
)));
__m256
zf
=
_mm256_add_ps
(
xf
,
mA1
);
#if CV_FMA3
zf
=
_mm256_fmadd_ps
(
zf
,
xf
,
mA2
);
zf
=
_mm256_fmadd_ps
(
zf
,
xf
,
mA3
);
zf
=
_mm256_fmadd_ps
(
zf
,
xf
,
mA4
);
#else
zf
=
_mm256_add_ps
(
_mm256_mul_ps
(
zf
,
xf
),
mA2
);
zf
=
_mm256_add_ps
(
_mm256_mul_ps
(
zf
,
xf
),
mA3
);
zf
=
_mm256_add_ps
(
_mm256_mul_ps
(
zf
,
xf
),
mA4
);
#endif
zf
=
_mm256_mul_ps
(
zf
,
yf
);
if
(
y_aligned
)
{
_mm256_store_ps
(
y
+
i
,
zf
);
}
else
{
_mm256_storeu_ps
(
y
+
i
,
zf
);
}
}
}
#elif CV_SSE2
if
(
n
>=
8
)
{
static
const
__m128d
prescale2
=
_mm_set1_pd
(
exp_prescale
);
...
...
@@ -738,9 +764,6 @@ void exp64f( const double *_x, double *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
exp64f
,
cv_hal_exp64f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsExp_64f_A50
,
_x
,
y
,
n
)
>=
0
);
static
const
double
A5
=
.99999999999999999998285227504999
/
EXPPOLY_32F_A0
,
A4
=
.69314718055994546743029643825322
/
EXPPOLY_32F_A0
,
...
...
@@ -1187,9 +1210,6 @@ void log32f( const float *_x, float *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log32f
,
cv_hal_log32f
,
_x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_32f_A21
,
_x
,
y
,
n
)
>=
0
);
static
const
float
shift
[]
=
{
0
,
-
1.
f
/
512
};
static
const
float
A0
=
0.3333333333333333333333333
f
,
...
...
@@ -1336,9 +1356,6 @@ void log64f( const double *x, double *y, int n )
{
CV_INSTRUMENT_REGION
()
CALL_HAL
(
log64f
,
cv_hal_log64f
,
x
,
y
,
n
);
CV_IPP_RUN_FAST
(
CV_INSTRUMENT_FUN_IPP
(
ippsLn_64f_A50
,
x
,
y
,
n
)
>=
0
);
static
const
double
shift
[]
=
{
0
,
-
1.
/
512
};
static
const
double
A7
=
1.0
,
...
...
@@ -1524,64 +1541,13 @@ void log64f( const double *x, double *y, int n )
#endif // issue 7795
//=============================================================================
// for compatibility with 3.0
void
exp
(
const
float
*
src
,
float
*
dst
,
int
n
)
{
exp32f
(
src
,
dst
,
n
);
}
void
exp
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
exp64f
(
src
,
dst
,
n
);
}
void
log
(
const
float
*
src
,
float
*
dst
,
int
n
)
float
fastAtan2
(
float
y
,
float
x
)
{
log32f
(
src
,
dst
,
n
);
}
void
log
(
const
double
*
src
,
double
*
dst
,
int
n
)
{
log64f
(
src
,
dst
,
n
);
}
void
magnitude
(
const
float
*
x
,
const
float
*
y
,
float
*
dst
,
int
n
)
{
magnitude32f
(
x
,
y
,
dst
,
n
);
}
void
magnitude
(
const
double
*
x
,
const
double
*
y
,
double
*
dst
,
int
n
)
{
magnitude64f
(
x
,
y
,
dst
,
n
);
}
void
sqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
sqrt32f
(
src
,
dst
,
len
);
}
void
sqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
sqrt64f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
float
*
src
,
float
*
dst
,
int
len
)
{
invSqrt32f
(
src
,
dst
,
len
);
}
void
invSqrt
(
const
double
*
src
,
double
*
dst
,
int
len
)
{
invSqrt64f
(
src
,
dst
,
len
);
return
atanImpl
<
float
>
(
y
,
x
);
}
#endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
}
// cv::hal::
}
// cv::
CV_CPU_OPTIMIZATION_NAMESPACE_END
float
cv
::
fastAtan2
(
float
y
,
float
x
)
{
return
atanImpl
<
float
>
(
y
,
x
);
}
}}
// namespace cv::hal
modules/world/CMakeLists.txt
浏览文件 @
297ba853
...
...
@@ -24,6 +24,7 @@ if(NOT OPENCV_INITIAL_PASS)
message
(
STATUS
"Processing WORLD modules..."
)
foreach
(
m
${
OPENCV_MODULES_BUILD
}
)
set
(
the_module
${
m
}
)
if
(
OPENCV_MODULE_
${
m
}
_IS_PART_OF_WORLD
)
message
(
STATUS
" module
${
m
}
..."
)
set
(
CMAKE_CURRENT_SOURCE_DIR
"
${
OPENCV_MODULE_
${
m
}
_LOCATION
}
"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录