提交 d34f2cfe 编写于 作者: A Alexander Alekhin

Merge pull request #7317 from tomoaki0705:fixIfdefFp16

...@@ -151,7 +151,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) ...@@ -151,7 +151,7 @@ if(CMAKE_COMPILER_IS_GNUCXX)
add_extra_compiler_option("-mfp16-format=ieee") add_extra_compiler_option("-mfp16-format=ieee")
endif(ARM) endif(ARM)
if(ENABLE_NEON) if(ENABLE_NEON)
add_extra_compiler_option("-mfpu=neon-fp16") add_extra_compiler_option("-mfpu=neon")
endif() endif()
if(ENABLE_VFPV3 AND NOT ENABLE_NEON) if(ENABLE_VFPV3 AND NOT ENABLE_NEON)
add_extra_compiler_option("-mfpu=vfpv3") add_extra_compiler_option("-mfpu=vfpv3")
...@@ -336,6 +336,34 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) ...@@ -336,6 +336,34 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399)
add_extra_compiler_option(-fvisibility-inlines-hidden) add_extra_compiler_option(-fvisibility-inlines-hidden)
endif() endif()
if(NOT OPENCV_FP16_DISABLE)
if(ARM AND ENABLE_NEON)
set(FP16_OPTION "-mfpu=neon-fp16")
elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX)
set(FP16_OPTION "-mf16c")
endif()
try_compile(__VALID_FP16
"${OpenCV_BINARY_DIR}"
"${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}"
OUTPUT_VARIABLE TRY_OUT
)
if(NOT __VALID_FP16)
if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX)
# GCC enables AVX when mf16c is passed
message(STATUS "FP16: Feature disabled")
else()
message(STATUS "FP16: Compiler support is not available")
endif()
else()
message(STATUS "FP16: Compiler support is available")
set(HAVE_FP16 1)
if(NOT ${FP16_OPTION} STREQUAL "")
add_extra_compiler_option(${FP16_OPTION})
endif()
endif()
endif()
#combine all "extra" options #combine all "extra" options
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}")
...@@ -376,21 +404,6 @@ if(MSVC) ...@@ -376,21 +404,6 @@ if(MSVC)
endif() endif()
endif() endif()
if(NOT OPENCV_FP16_DISABLE)
try_compile(__VALID_FP16
"${OpenCV_BINARY_DIR}"
"${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp"
COMPILE_DEFINITIONS "-DCHECK_FP16"
OUTPUT_VARIABLE TRY_OUT
)
if(NOT __VALID_FP16)
message(STATUS "FP16: Compiler support is not available")
else()
message(STATUS "FP16: Compiler support is available")
set(HAVE_FP16 1)
endif()
endif()
if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib") if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib")
link_directories("/usr/local/lib") link_directories("/usr/local/lib")
endif() endif()
...@@ -310,7 +310,7 @@ enum CpuFeatures { ...@@ -310,7 +310,7 @@ enum CpuFeatures {
typedef union Cv16suf typedef union Cv16suf
{ {
short i; short i;
#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) #if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
__fp16 h; __fp16 h;
#endif #endif
struct _fp16Format struct _fp16Format
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include "precomp.hpp" #include "precomp.hpp"
#include "opencl_kernels_core.hpp" #include "opencl_kernels_core.hpp"
#include "opencv2/core/hal/intrin.hpp"
#ifdef __APPLE__ #ifdef __APPLE__
#undef CV_NEON #undef CV_NEON
...@@ -4379,7 +4380,7 @@ struct Cvt_SIMD<float, int> ...@@ -4379,7 +4380,7 @@ struct Cvt_SIMD<float, int>
#endif #endif
#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) ) #if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) ) )
// const numbers for floating points format // const numbers for floating points format
const unsigned int kShiftSignificand = 13; const unsigned int kShiftSignificand = 13;
const unsigned int kMaskFp16Significand = 0x3ff; const unsigned int kMaskFp16Significand = 0x3ff;
...@@ -4387,7 +4388,7 @@ const unsigned int kBiasFp16Exponent = 15; ...@@ -4387,7 +4388,7 @@ const unsigned int kBiasFp16Exponent = 15;
const unsigned int kBiasFp32Exponent = 127; const unsigned int kBiasFp32Exponent = 127;
#endif #endif
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) #if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
static float convertFp16SW(short fp16) static float convertFp16SW(short fp16)
{ {
// Fp16 -> Fp32 // Fp16 -> Fp32
...@@ -4449,7 +4450,7 @@ static float convertFp16SW(short fp16) ...@@ -4449,7 +4450,7 @@ static float convertFp16SW(short fp16)
} }
#endif #endif
#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) #if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) )
static short convertFp16SW(float fp32) static short convertFp16SW(float fp32)
{ {
// Fp32 -> Fp16 // Fp32 -> Fp16
...@@ -4557,7 +4558,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t ...@@ -4557,7 +4558,7 @@ cvtScaleHalf_<float, short>( const float* src, size_t sstep, short* dst, size_t
if ( ( (intptr_t)dst & 0xf ) == 0 ) if ( ( (intptr_t)dst & 0xf ) == 0 )
#endif #endif
{ {
#if CV_FP16 #if CV_FP16 && CV_SIMD128
for ( ; x <= size.width - 4; x += 4) for ( ; x <= size.width - 4; x += 4)
{ {
v_float32x4 v_src = v_load(src + x); v_float32x4 v_src = v_load(src + x);
...@@ -4603,7 +4604,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t ...@@ -4603,7 +4604,7 @@ cvtScaleHalf_<short, float>( const short* src, size_t sstep, float* dst, size_t
if ( ( (intptr_t)src & 0xf ) == 0 ) if ( ( (intptr_t)src & 0xf ) == 0 )
#endif #endif
{ {
#if CV_FP16 #if CV_FP16 && CV_SIMD128
for ( ; x <= size.width - 4; x += 4) for ( ; x <= size.width - 4; x += 4)
{ {
v_float16x4 v_src = v_load_f16(src + x); v_float16x4 v_src = v_load_f16(src + x);
......
...@@ -711,12 +711,14 @@ template<typename R> struct TheTest ...@@ -711,12 +711,14 @@ template<typename R> struct TheTest
return *this; return *this;
} }
#if CV_FP16
TheTest & test_loadstore_fp16() TheTest & test_loadstore_fp16()
{ {
#if CV_FP16
AlignedData<R> data; AlignedData<R> data;
AlignedData<R> out; AlignedData<R> out;
if(checkHardwareSupport(CV_CPU_FP16))
{
// check if addresses are aligned and unaligned respectively // check if addresses are aligned and unaligned respectively
EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16);
EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); EXPECT_NE((size_t)0, (size_t)&data.u.d % 16);
...@@ -735,24 +737,30 @@ template<typename R> struct TheTest ...@@ -735,24 +737,30 @@ template<typename R> struct TheTest
out.a.clear(); out.a.clear();
v_store_f16(out.a.d, r1); v_store_f16(out.a.d, r1);
EXPECT_EQ(data.a, out.a); EXPECT_EQ(data.a, out.a);
}
return *this; return *this;
#endif
} }
TheTest & test_float_cvt_fp16() TheTest & test_float_cvt_fp16()
{ {
#if CV_FP16
AlignedData<v_float32x4> data; AlignedData<v_float32x4> data;
if(checkHardwareSupport(CV_CPU_FP16))
{
// check conversion // check conversion
v_float32x4 r1 = v_load(data.a.d); v_float32x4 r1 = v_load(data.a.d);
v_float16x4 r2 = v_cvt_f16(r1); v_float16x4 r2 = v_cvt_f16(r1);
v_float32x4 r3 = v_cvt_f32(r2); v_float32x4 r3 = v_cvt_f32(r2);
EXPECT_EQ(0x3c00, r2.get0()); EXPECT_EQ(0x3c00, r2.get0());
EXPECT_EQ(r3.get0(), r1.get0()); EXPECT_EQ(r3.get0(), r1.get0());
}
return *this; return *this;
}
#endif #endif
}
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册