未验证 提交 44aa68cb 编写于 作者: X xiguadong 提交者: GitHub

fix fp16 bug (#1249)

* fix fp16 bug

* apply code-format changes
Co-authored-by: Nroot <root@ddzhao.openailab.local>
Co-authored-by: Nxiguadong <xiguadong@users.noreply.github.com>
上级 1fde79ef
......@@ -23,7 +23,6 @@
*/
#include "utility/float.h"
#include <math.h>
#define BF16_EXP_MAX (256 - 1) // 2^8 - 1
#define FP16_EXP_MAX (32 - 1) // 2^5 - 1
......@@ -37,7 +36,8 @@
#define FP32_NAN ((FP32_EXP_MAX << 23) + 1)
#define FP32_INF ((FP32_EXP_MAX << 23) + 0)
#ifndef __ARM_ARCH
#if !defined(__ARM_ARCH) || (defined(__ARM_ARCH) && (0 == __ARM_FEATURE_FP16_VECTOR_ARITHMETIC))
fp32_t fp16_to_fp32(fp16_t package)
{
fp32_pack_t data;
......@@ -100,7 +100,7 @@ fp32_t fp16_to_fp32(fp16_t package)
return data.value;
}
return NAN;
return data.value;
}
fp16_t fp32_to_fp16(fp32_t value)
......
......@@ -121,10 +121,14 @@ typedef union fp64_pack
#endif
#ifdef __ARM_ARCH
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
typedef __fp16 fp16_t;
#else
typedef fp16_pack_t fp16_t;
#endif
#else
typedef fp16_pack_t fp16_t;
#endif
typedef bf16_pack_t bf16_t;
typedef float fp32_t;
typedef double fp64_t;
......@@ -168,6 +172,28 @@ fp32_t bf16_to_fp32(bf16_t package);
bf16_t fp32_to_bf16(fp32_t package);
#ifdef __ARM_ARCH
#if __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
#define fp16_to_fp32(data) ({ float f = data; f; })
#define fp32_to_fp16(data) ({ __fp16 f = data; f; })
#else
/*!
* @brief Convert a number from float16 to float32.
*
* @param [in] package: Input float16 precision number.
*
* @return The converted float32 precision number.
*/
fp32_t fp16_to_fp32(fp16_t package);
/*!
* @brief Convert a number from float32 to float16.
*
* @param [in] package: Input float32 precision number.
*
* @return The converted float16 precision number.
*/
fp16_t fp32_to_fp16(fp32_t package);
#endif
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册