From 44aa68cbffdfbfc31efbfed4efab8f17750898e6 Mon Sep 17 00:00:00 2001 From: xiguadong <55774832+xiguadong@users.noreply.github.com> Date: Tue, 18 Jan 2022 23:21:33 +0800 Subject: [PATCH] fix fp16 bug (#1249) * fix fp16 bug * apply code-format changes Co-authored-by: root Co-authored-by: xiguadong --- source/utility/float.c | 6 +++--- source/utility/float.h | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/source/utility/float.c b/source/utility/float.c index 09896fac..d2f803f1 100644 --- a/source/utility/float.c +++ b/source/utility/float.c @@ -23,7 +23,6 @@ */ #include "utility/float.h" -#include #define BF16_EXP_MAX (256 - 1) // 2^8 - 1 #define FP16_EXP_MAX (32 - 1) // 2^5 - 1 @@ -37,7 +36,8 @@ #define FP32_NAN ((FP32_EXP_MAX << 23) + 1) #define FP32_INF ((FP32_EXP_MAX << 23) + 0) -#ifndef __ARM_ARCH +#if !defined(__ARM_ARCH) || (defined(__ARM_ARCH) && (0 == __ARM_FEATURE_FP16_VECTOR_ARITHMETIC)) + fp32_t fp16_to_fp32(fp16_t package) { fp32_pack_t data; @@ -100,7 +100,7 @@ fp32_t fp16_to_fp32(fp16_t package) return data.value; } - return NAN; + return data.value; } fp16_t fp32_to_fp16(fp32_t value) diff --git a/source/utility/float.h b/source/utility/float.h index e7fdef12..4675fbfa 100644 --- a/source/utility/float.h +++ b/source/utility/float.h @@ -121,10 +121,14 @@ typedef union fp64_pack #endif #ifdef __ARM_ARCH +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC typedef __fp16 fp16_t; #else typedef fp16_pack_t fp16_t; #endif +#else +typedef fp16_pack_t fp16_t; +#endif typedef bf16_pack_t bf16_t; typedef float fp32_t; typedef double fp64_t; @@ -168,6 +172,28 @@ fp32_t bf16_to_fp32(bf16_t package); bf16_t fp32_to_bf16(fp32_t package); #ifdef __ARM_ARCH + +#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC #define fp16_to_fp32(data) ({ float f = data; f; }) #define fp32_to_fp16(data) ({ __fp16 f = data; f; }) +#else +/*! +* @brief Convert a number from float16 to float32. +* +* @param [in] package: Input float16 precision number. +* +* @return The converted float32 precision number. +*/ +fp32_t fp16_to_fp32(fp16_t package); + +/*! +* @brief Convert a number from float32 to float16. +* +* @param [in] package: Input float32 precision number. +* +* @return The converted float16 precision number. +*/ +fp16_t fp32_to_fp16(fp32_t package); +#endif + #endif -- GitLab