diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 1656840441b3e4afbb1d831c00eafb112baee599..030837a5d09b5ce2ce6056539e9bb1f7afed9721 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -273,35 +273,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) add_extra_compiler_option(-fvisibility-inlines-hidden) endif() -# TODO !!!!! -if(NOT OPENCV_FP16_DISABLE AND NOT IOS) - if(ARM AND ENABLE_NEON) - set(FP16_OPTION "-mfpu=neon-fp16") - elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX) - set(FP16_OPTION "-mf16c") - endif() - try_compile(__VALID_FP16 - "${OpenCV_BINARY_DIR}" - "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp" - COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" - OUTPUT_VARIABLE TRY_OUT - ) - if(NOT __VALID_FP16) - if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX) - # GCC enables AVX when mf16c is passed - message(STATUS "FP16: Feature disabled") - else() - message(STATUS "FP16: Compiler support is not available") - endif() - else() - message(STATUS "FP16: Compiler support is available") - set(HAVE_FP16 1) - if(NOT ${FP16_OPTION} STREQUAL "") - add_extra_compiler_option(${FP16_OPTION}) - endif() - endif() -endif() - #combine all "extra" options set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 658d12c14cdb995d4e47ec82157c5fafa01c1611..f20ff0c1743099943865015110355d5cd3bf9184 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -207,9 +207,6 @@ /* Lapack */ #cmakedefine HAVE_LAPACK -/* FP16 */ -#cmakedefine HAVE_FP16 - /* Library was compiled with functions instrumentation */ #cmakedefine ENABLE_INSTRUMENTATION diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index aaabea38d4c42e0e9b7743678843e98eb848984e..c6beacaba84e85ef47dd4c6a942f352d0a9eebbe 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -70,6 +70,10 @@ # include # define CV_AVX 1 #endif +#ifdef CV_CPU_COMPILE_FP16 +# include +# define CV_FP16 1 +#endif #ifdef CV_CPU_COMPILE_AVX2 # include # define CV_AVX2 1 @@ -154,6 +158,9 @@ struct VZeroUpperGuard { #ifndef CV_AVX # define CV_AVX 0 #endif +#ifndef CV_FP16 +# define CV_FP16 0 +#endif #ifndef CV_AVX2 # define CV_AVX2 0 #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index e8c166405e05f62091dedfab12b72e1e166777df..a5675e4f4dd442a00915598d1aeb23cd071e0288 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -721,7 +721,7 @@ inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, { typedef typename V_TypeTraits<_Tp>::abs_type rtype; v_reg c; - const rtype mask = std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0; + const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); for( int i = 0; i < n; i++ ) { rtype ua = a.s[i] ^ mask; diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index c7f4e90a36d8787b2656d42446a596334f338264..6dd02bd0dbee20ad8d8852cadda56fe21a3785e8 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -278,7 +278,7 @@ struct v_float64x2 }; #endif -#if defined (HAVE_FP16) +#if CV_FP16 // Workaround for old comiplers template static inline int16x4_t vreinterpret_s16_f16(T a) { return (int16x4_t)a; } @@ -775,7 +775,7 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32) OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64) #endif -#if defined (HAVE_FP16) +#if CV_FP16 // Workaround for old comiplers inline v_float16x4 v_load_f16(const short* ptr) { return v_float16x4(vld1_f16(ptr)); } @@ -1223,7 +1223,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) } #endif -#if defined (HAVE_FP16) +#if CV_FP16 inline v_float32x4 v_cvt_f32(const v_float16x4& a) { return v_float32x4(vcvt_f32_f16(a.val)); diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 5b9a27fd8a1e37203a79ebd2fd1a590c6cd9f17c..364f5d76f4cd82d08d97b1b169af0a40ab07d189 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -255,7 +255,7 @@ struct v_float64x2 __m128d val; }; -#if defined(HAVE_FP16) +#if CV_FP16 struct v_float16x4 { typedef short lane_type; @@ -1056,7 +1056,7 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps) OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd) -#if defined(HAVE_FP16) +#if CV_FP16 inline v_float16x4 v_load_f16(const short* ptr) { return v_float16x4(_mm_loadl_epi64((const __m128i*)ptr)); } inline void v_store_f16(short* ptr, v_float16x4& a) @@ -1776,7 +1776,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8)))); } -#if defined(HAVE_FP16) +#if CV_FP16 inline v_float32x4 v_cvt_f32(const v_float16x4& a) { return v_float32x4(_mm_cvtph_ps(a.val)); diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index afbc55716686a40a68478670811cacc286bf20f5..c43bd727e7ba85f22f01f5048cc4f9ffcf7c9905 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -66,17 +66,6 @@ # undef max #endif -#if defined HAVE_FP16 && (defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)) -# include -# define CV_FP16 1 -#elif defined HAVE_FP16 && defined __GNUC__ -# define CV_FP16 1 -#endif - -#ifndef CV_FP16 -# define CV_FP16 0 -#endif - //! @cond IGNORED namespace cv diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp index 0ec24ef1fb7bbbab95406436c9b7e46c98876ecd..7d51bd3a6fae92b413973df9485da2659db2aba2 100644 --- a/modules/core/test/test_intrin.cpp +++ b/modules/core/test/test_intrin.cpp @@ -743,7 +743,7 @@ template struct TheTest TheTest & test_loadstore_fp16() { -#if CV_FP16 +#if CV_FP16 && CV_SIMD128 AlignedData data; AlignedData out; @@ -775,7 +775,7 @@ template struct TheTest TheTest & test_float_cvt_fp16() { -#if CV_FP16 +#if CV_FP16 && CV_SIMD128 AlignedData data; if(checkHardwareSupport(CV_CPU_FP16)) @@ -1008,7 +1008,7 @@ TEST(hal_intrin, float64x2) { } #endif -#if CV_FP16 +#if CV_FP16 && CV_SIMD128 TEST(hal_intrin, float16x4) { TheTest() .test_loadstore_fp16()