From e5d9b608c47d54e43496041595025fa282fa9de5 Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 4 Apr 2017 20:34:58 +0300 Subject: [PATCH] cmake: fix fp16 support --- cmake/OpenCVCompilerOptions.cmake | 29 ------------------- cmake/templates/cvconfig.h.in | 3 -- .../include/opencv2/core/cv_cpu_dispatch.h | 7 +++++ .../include/opencv2/core/hal/intrin_cpp.hpp | 2 +- .../include/opencv2/core/hal/intrin_neon.hpp | 6 ++-- .../include/opencv2/core/hal/intrin_sse.hpp | 6 ++-- modules/core/include/opencv2/core/private.hpp | 11 ------- modules/core/test/test_intrin.cpp | 6 ++-- 8 files changed, 17 insertions(+), 53 deletions(-) diff --git a/cmake/OpenCVCompilerOptions.cmake b/cmake/OpenCVCompilerOptions.cmake index 1656840441..030837a5d0 100644 --- a/cmake/OpenCVCompilerOptions.cmake +++ b/cmake/OpenCVCompilerOptions.cmake @@ -273,35 +273,6 @@ if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_OPENCV_GCC_VERSION_NUM GREATER 399) add_extra_compiler_option(-fvisibility-inlines-hidden) endif() -# TODO !!!!! -if(NOT OPENCV_FP16_DISABLE AND NOT IOS) - if(ARM AND ENABLE_NEON) - set(FP16_OPTION "-mfpu=neon-fp16") - elseif((X86 OR X86_64) AND NOT MSVC AND ENABLE_AVX) - set(FP16_OPTION "-mf16c") - endif() - try_compile(__VALID_FP16 - "${OpenCV_BINARY_DIR}" - "${OpenCV_SOURCE_DIR}/cmake/checks/cpu_fp16.cpp" - COMPILE_DEFINITIONS "-DCHECK_FP16" "${FP16_OPTION}" - OUTPUT_VARIABLE TRY_OUT - ) - if(NOT __VALID_FP16) - if((X86 OR X86_64) AND NOT MSVC AND NOT ENABLE_AVX) - # GCC enables AVX when mf16c is passed - message(STATUS "FP16: Feature disabled") - else() - message(STATUS "FP16: Compiler support is not available") - endif() - else() - message(STATUS "FP16: Compiler support is available") - set(HAVE_FP16 1) - if(NOT ${FP16_OPTION} STREQUAL "") - add_extra_compiler_option(${FP16_OPTION}) - endif() - endif() -endif() - #combine all "extra" options set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENCV_EXTRA_FLAGS} ${OPENCV_EXTRA_CXX_FLAGS}") diff --git a/cmake/templates/cvconfig.h.in b/cmake/templates/cvconfig.h.in index 658d12c14c..f20ff0c174 100644 --- a/cmake/templates/cvconfig.h.in +++ b/cmake/templates/cvconfig.h.in @@ -207,9 +207,6 @@ /* Lapack */ #cmakedefine HAVE_LAPACK -/* FP16 */ -#cmakedefine HAVE_FP16 - /* Library was compiled with functions instrumentation */ #cmakedefine ENABLE_INSTRUMENTATION diff --git a/modules/core/include/opencv2/core/cv_cpu_dispatch.h b/modules/core/include/opencv2/core/cv_cpu_dispatch.h index aaabea38d4..c6beacaba8 100644 --- a/modules/core/include/opencv2/core/cv_cpu_dispatch.h +++ b/modules/core/include/opencv2/core/cv_cpu_dispatch.h @@ -70,6 +70,10 @@ # include # define CV_AVX 1 #endif +#ifdef CV_CPU_COMPILE_FP16 +# include +# define CV_FP16 1 +#endif #ifdef CV_CPU_COMPILE_AVX2 # include # define CV_AVX2 1 @@ -154,6 +158,9 @@ struct VZeroUpperGuard { #ifndef CV_AVX # define CV_AVX 0 #endif +#ifndef CV_FP16 +# define CV_FP16 0 +#endif #ifndef CV_AVX2 # define CV_AVX2 0 #endif diff --git a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp index e8c166405e..a5675e4f4d 100644 --- a/modules/core/include/opencv2/core/hal/intrin_cpp.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_cpp.hpp @@ -721,7 +721,7 @@ inline v_reg::abs_type, n> v_absdiff(const v_reg<_Tp, { typedef typename V_TypeTraits<_Tp>::abs_type rtype; v_reg c; - const rtype mask = std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0; + const rtype mask = (rtype)(std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0); for( int i = 0; i < n; i++ ) { rtype ua = a.s[i] ^ mask; diff --git a/modules/core/include/opencv2/core/hal/intrin_neon.hpp b/modules/core/include/opencv2/core/hal/intrin_neon.hpp index c7f4e90a36..6dd02bd0db 100644 --- a/modules/core/include/opencv2/core/hal/intrin_neon.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_neon.hpp @@ -278,7 +278,7 @@ struct v_float64x2 }; #endif -#if defined (HAVE_FP16) +#if CV_FP16 // Workaround for old comiplers template static inline int16x4_t vreinterpret_s16_f16(T a) { return (int16x4_t)a; } @@ -775,7 +775,7 @@ OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32) OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64) #endif -#if defined (HAVE_FP16) +#if CV_FP16 // Workaround for old comiplers inline v_float16x4 v_load_f16(const short* ptr) { return v_float16x4(vld1_f16(ptr)); } @@ -1223,7 +1223,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) } #endif -#if defined (HAVE_FP16) +#if CV_FP16 inline v_float32x4 v_cvt_f32(const v_float16x4& a) { return v_float32x4(vcvt_f32_f16(a.val)); diff --git a/modules/core/include/opencv2/core/hal/intrin_sse.hpp b/modules/core/include/opencv2/core/hal/intrin_sse.hpp index 5b9a27fd8a..364f5d76f4 100644 --- a/modules/core/include/opencv2/core/hal/intrin_sse.hpp +++ b/modules/core/include/opencv2/core/hal/intrin_sse.hpp @@ -255,7 +255,7 @@ struct v_float64x2 __m128d val; }; -#if defined(HAVE_FP16) +#if CV_FP16 struct v_float16x4 { typedef short lane_type; @@ -1056,7 +1056,7 @@ inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float32x4, float, ps) OPENCV_HAL_IMPL_SSE_LOADSTORE_FLT_OP(v_float64x2, double, pd) -#if defined(HAVE_FP16) +#if CV_FP16 inline v_float16x4 v_load_f16(const short* ptr) { return v_float16x4(_mm_loadl_epi64((const __m128i*)ptr)); } inline void v_store_f16(short* ptr, v_float16x4& a) @@ -1776,7 +1776,7 @@ inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) return v_float64x2(_mm_cvtps_pd(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(a.val),8)))); } -#if defined(HAVE_FP16) +#if CV_FP16 inline v_float32x4 v_cvt_f32(const v_float16x4& a) { return v_float32x4(_mm_cvtph_ps(a.val)); diff --git a/modules/core/include/opencv2/core/private.hpp b/modules/core/include/opencv2/core/private.hpp index afbc557166..c43bd727e7 100644 --- a/modules/core/include/opencv2/core/private.hpp +++ b/modules/core/include/opencv2/core/private.hpp @@ -66,17 +66,6 @@ # undef max #endif -#if defined HAVE_FP16 && (defined __F16C__ || (defined _MSC_VER && _MSC_VER >= 1700)) -# include -# define CV_FP16 1 -#elif defined HAVE_FP16 && defined __GNUC__ -# define CV_FP16 1 -#endif - -#ifndef CV_FP16 -# define CV_FP16 0 -#endif - //! @cond IGNORED namespace cv diff --git a/modules/core/test/test_intrin.cpp b/modules/core/test/test_intrin.cpp index 0ec24ef1fb..7d51bd3a6f 100644 --- a/modules/core/test/test_intrin.cpp +++ b/modules/core/test/test_intrin.cpp @@ -743,7 +743,7 @@ template struct TheTest TheTest & test_loadstore_fp16() { -#if CV_FP16 +#if CV_FP16 && CV_SIMD128 AlignedData data; AlignedData out; @@ -775,7 +775,7 @@ template struct TheTest TheTest & test_float_cvt_fp16() { -#if CV_FP16 +#if CV_FP16 && CV_SIMD128 AlignedData data; if(checkHardwareSupport(CV_CPU_FP16)) @@ -1008,7 +1008,7 @@ TEST(hal_intrin, float64x2) { } #endif -#if CV_FP16 +#if CV_FP16 && CV_SIMD128 TEST(hal_intrin, float16x4) { TheTest() .test_loadstore_fp16() -- GitLab