diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 15e4db11259b7620736c3b28ddf5b71c2aedde44..686b0b885ef06a7dd9f38b3cc487a7cc775ecb21 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -151,7 +151,7 @@ if(CMAKE_COMPILER_IS_GNUCXX) add_extra_compiler_option("-mfp16-format=ieee") endif(ARM) if(ENABLE_NEON) - add_extra_compiler_option("-mfpu=neon-fp16") + add_extra_compiler_option("-mfpu=neon") endif() if(ENABLE_VFPV3 AND NOT ENABLE_NEON) add_extra_compiler_option("-mfpu=vfpv3") @@ -336,6 +336,34 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) add_extra_compiler_option(-fvisibility-inlines-hidden) endif() +if(NOT OPENCV_FP16_DISABLE) + if(ARM AND ENABLE_NEON) + set(FP16_OPTION "-mfpu=neon-fp16") + elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX) + set(FP16_OPTION "-mf16c") + endif() + try_compile(__VALID_FP16 + "${OpenCV_BINARY_DIR}" + "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" + COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" + OUTPUT_VARIABLE TRY_OUT + ) + if(NOT __VALID_FP16) + if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX) + # GCC enables AVX when mf16c is passed + message(STATUS "FP16: Feature disabled") + else() + message(STATUS "FP16: Compiler support is not available") + endif() + else() + message(STATUS "FP16: Compiler support is available") + set(HAVE_FP16 1) + if(NOT ${FP16_OPTION} STREQUAL "") + add_extra_compiler_option(${FP16_OPTION}) + endif() + endif() +endif() + #combine all "extra" options set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") @@ -376,21 +404,6 @@ if(MSVC) endif() endif() -if(NOT OPENCV_FP16_DISABLE) - try_compile(__VALID_FP16 - "${OpenCV_BINARY_DIR}" - "${OpenCV_SOURCE_DIR}/cmake/checks/fp16.cpp" - COMPILE_DEFINITIONS "-DCHECK_FP16" - OUTPUT_VARIABLE TRY_OUT - ) - if(NOT __VALID_FP16) - message(STATUS "FP16: Compiler support is not available") - else() - message(STATUS "FP16: Compiler support is available") - set(HAVE_FP16 1) - endif() -endif() - if(APPLE AND NOT CMAKE_CROSSCOMPILING AND NOT DEFINED ENV{LDFLAGS} AND EXISTS "/usr/local/lib") link_directories("/usr/local/lib") endif() diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index a26cd1caf6e8180a78c17a070e142d8b22ae4278..87fe88594e621747256381d2bed348192b54eb40 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -310,7 +310,7 @@ enum CpuFeatures { typedef union Cv16suf { short i; -#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) +#if ( defined (__arm__) || defined (__aarch64__) ) && !defined (__CUDACC__) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) ) __fp16 h; #endif struct _fp16Format diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index c405919f17d66acf889651540827504ea0c2b7c7..dc2da5e61141f2c01f8debc52f0254a3222b1987 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -44,6 +44,7 @@ #include "precomp.hpp" #include "opencl_kernels_core.hpp" +#include "opencv2/core/hal/intrin.hpp" #ifdef __APPLE__ #undef CV_NEON @@ -4379,7 +4380,7 @@ struct Cvt_SIMD #endif -#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) ) +#if !( ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) ) ) // const numbers for floating points format const unsigned int kShiftSignificand = 13; const unsigned int kMaskFp16Significand = 0x3ff; @@ -4387,7 +4388,7 @@ const unsigned int kBiasFp16Exponent = 15; const unsigned int kBiasFp32Exponent = 127; #endif -#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) +#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) ) static float convertFp16SW(short fp16) { // Fp16 -> Fp32 @@ -4449,7 +4450,7 @@ static float convertFp16SW(short fp16) } #endif -#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC__ ) ) || ( 5 <= __GNUC__ ) ) ) +#if ( defined (__arm__) || defined (__aarch64__) ) && ( defined (__GNUC__) && ( ( ( 4 <= __GNUC__ ) && ( 7 <= __GNUC_MINOR__ ) ) || ( 5 <= __GNUC__ ) ) ) static short convertFp16SW(float fp32) { // Fp32 -> Fp16 @@ -4557,7 +4558,7 @@ cvtScaleHalf_( const float* src, size_t sstep, short* dst, size_t if ( ( (intptr_t)dst & 0xf ) == 0 ) #endif { -#if CV_FP16 +#if CV_FP16 && CV_SIMD128 for ( ; x <= size.width - 4; x += 4) { v_float32x4 v_src = v_load(src + x); @@ -4603,7 +4604,7 @@ cvtScaleHalf_( const short* src, size_t sstep, float* dst, size_t if ( ( (intptr_t)src & 0xf ) == 0 ) #endif { -#if CV_FP16 +#if CV_FP16 && CV_SIMD128 for ( ; x <= size.width - 4; x += 4) { v_float16x4 v_src = v_load_f16(src + x); diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp index d9704ef4327b3337c52e53a46dcbdd7510e54816..7ebd8e979cda9b52d0c62e12386f805f9ea2f181 100644 --- a/modules/core/test/test_intrin.cpp +++ b/modules/core/test/test_intrin.cpp @@ -711,48 +711,56 @@ template struct TheTest return *this; } -#if CV_FP16 TheTest & test_loadstore_fp16() { +#if CV_FP16 AlignedData data; AlignedData out; - // check if addresses are aligned and unaligned respectively - EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); - EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); - EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); - - // check some initialization methods - R r1 = data.u; - R r2 = v_load_f16(data.a.d); - R r3(r2); - EXPECT_EQ(data.u[0], r1.get0()); - EXPECT_EQ(data.a[0], r2.get0()); - EXPECT_EQ(data.a[0], r3.get0()); - - // check some store methods - out.a.clear(); - v_store_f16(out.a.d, r1); - EXPECT_EQ(data.a, out.a); + if(checkHardwareSupport(CV_CPU_FP16)) + { + // check if addresses are aligned and unaligned respectively + EXPECT_EQ((size_t)0, (size_t)&data.a.d % 16); + EXPECT_NE((size_t)0, (size_t)&data.u.d % 16); + EXPECT_EQ((size_t)0, (size_t)&out.a.d % 16); + EXPECT_NE((size_t)0, (size_t)&out.u.d % 16); + + // check some initialization methods + R r1 = data.u; + R r2 = v_load_f16(data.a.d); + R r3(r2); + EXPECT_EQ(data.u[0], r1.get0()); + EXPECT_EQ(data.a[0], r2.get0()); + EXPECT_EQ(data.a[0], r3.get0()); + + // check some store methods + out.a.clear(); + v_store_f16(out.a.d, r1); + EXPECT_EQ(data.a, out.a); + } return *this; +#endif } TheTest & test_float_cvt_fp16() { +#if CV_FP16 AlignedData data; - // check conversion - v_float32x4 r1 = v_load(data.a.d); - v_float16x4 r2 = v_cvt_f16(r1); - v_float32x4 r3 = v_cvt_f32(r2); - EXPECT_EQ(0x3c00, r2.get0()); - EXPECT_EQ(r3.get0(), r1.get0()); + if(checkHardwareSupport(CV_CPU_FP16)) + { + // check conversion + v_float32x4 r1 = v_load(data.a.d); + v_float16x4 r2 = v_cvt_f16(r1); + v_float32x4 r3 = v_cvt_f32(r2); + EXPECT_EQ(0x3c00, r2.get0()); + EXPECT_EQ(r3.get0(), r1.get0()); + } return *this; - } #endif + } };