提交 3ad4d02b 编写于 作者: Peacoor Zomboss's avatar Peacoor Zomboss

Add a simple soft floating-point impl

上级 68141969
/* 测试代码,仅用于展示目的
编译命令:
$ gcc sftest.c -o sftest -O3
$ gcc sftest.c -o sftest_trunc -O3 -DROUND_TRUNC
*/
#include <stdio.h> // printf
#include <fenv.h> // fesetround
// #define ROUND_TRUNC
#ifdef MAKEFILE
#include "softfloat.h"
#else
#include "softfloat.c"
#endif
#define INF (1.f / 0.f)
#define NAN (0.f / 0.f)
static char *if_true_false(bool b)
{
return b ? "true" : "false";
}
static void test_cmp(float f1, float f2)
{
sfloat32 sf1 = sf32_from_float(f1);
sfloat32 sf2 = sf32_from_float(f2);
printf("%f == %f [%s] \n", f1, f2, if_true_false(sf32_eq(sf1, sf2)));
printf("%f < %f [%s] \n", f1, f2, if_true_false(sf32_lt(sf1, sf2)));
printf("%f <= %f [%s] \n", f1, f2, if_true_false(sf32_le(sf1, sf2)));
printf("%f != %f [%s] \n", f1, f2, if_true_false(sf32_ne(sf1, sf2)));
printf("%f > %f [%s] \n", f1, f2, if_true_false(sf32_gt(sf1, sf2)));
printf("%f >= %f [%s] \n", f1, f2, if_true_false(sf32_ge(sf1, sf2)));
printf("\n");
}
static void test_cmps()
{
test_cmp(0, 1);
test_cmp(1, INF);
test_cmp(1, -INF);
test_cmp(INF, INF);
test_cmp(INF, -INF);
test_cmp(INF, NAN);
test_cmp(NAN, NAN);
test_cmp(0, NAN);
test_cmp(+0, -0);
printf("\n");
}
static void test_add(float f1, float f2)
{
float f = f1 + f2;
sfloat32 sf1 = sf32_from_float(f1);
sfloat32 sf2 = sf32_from_float(f2);
sfloat32 sf = sf32_add(sf1, sf2);
printf("%.10e + %.10e = [hard]%.10e, [soft]%.10e\n", f1, f2, f, sf32_to_float(sf));
}
static void test_adds()
{
test_add(1.1, 2.2);
test_add(3e38, 2e38);
test_add(1.1, 0);
test_add(2e20, 2);
test_add(2, INF);
test_add(INF, 100);
test_add(NAN, NAN);
test_add(NAN, INF);
test_add(INF, NAN);
test_add(NAN, 2);
test_add(100, NAN);
printf("\n");
}
static void test_sub(float f1, float f2)
{
float f = f1 - f2;
sfloat32 sf1 = sf32_from_float(f1);
sfloat32 sf2 = sf32_from_float(f2);
sfloat32 sf = sf32_sub(sf1, sf2);
printf("%.10e - %.10e = [hard]%.10e, [soft]%.10e\n", f1, f2, f, sf32_to_float(sf));
}
static void test_subs()
{
test_sub(1.1, 2.2);
test_sub(1.1, 0);
test_sub(2e20, 2);
test_sub(2, INF);
test_sub(INF, 100);
test_sub(NAN, NAN);
test_sub(NAN, INF);
test_sub(INF, NAN);
test_sub(NAN, 2);
test_sub(100, NAN);
printf("\n");
}
static void test_mul(float f1, float f2)
{
float f = f1 * f2;
sfloat32 sf1 = sf32_from_float(f1);
sfloat32 sf2 = sf32_from_float(f2);
sfloat32 sf = sf32_mul(sf1, sf2);
printf("%.10e * %.10e = [hard]%.10e, [soft]%.10e\n", f1, f2, f, sf32_to_float(sf));
}
static void test_muls()
{
test_mul(1.1, 2.2);
test_mul(1.1, 0);
test_mul(2e20, 2);
test_mul(2, INF);
test_mul(INF, 100);
test_mul(NAN, NAN);
test_mul(NAN, INF);
test_mul(INF, NAN);
test_mul(NAN, 2);
test_mul(100, NAN);
test_mul(0, INF);
test_mul(INF, 0);
test_mul(2e20, 2e20);
printf("\n");
}
static void test_div(float f1, float f2)
{
float f = f1 / f2;
sfloat32 sf1 = sf32_from_float(f1);
sfloat32 sf2 = sf32_from_float(f2);
sfloat32 sf = sf32_div(sf1, sf2);
printf("%.10e / %.10e = [hard]%.10e, [soft]%.10e\n", f1, f2, f, sf32_to_float(sf));
}
static void test_divs()
{
test_div(1.1, 2.2);
test_div(1.1, 0);
test_div(2e20, 2);
test_div(2, INF);
test_div(INF, 100);
test_div(NAN, NAN);
test_div(NAN, INF);
test_div(INF, NAN);
test_div(NAN, 2);
test_div(100, NAN);
test_div(0, INF);
test_div(INF, 0);
printf("\n");
}
static void test_conv_sf_to_i(float f)
{
sfloat32 sf = sf32_from_float(f);
int i = sf32_to_i32(sf);
printf("int of %f = %d\n", f, i);
}
static void test_conv_i_to_sf(int i)
{
sfloat32 sf = sf32_from_i32(i);
float f = sf32_to_float(sf);
printf("float from %d = %f\n", i, f);
}
static void test_convs()
{
test_conv_sf_to_i(-1);
test_conv_sf_to_i(0);
test_conv_sf_to_i(0.5);
test_conv_sf_to_i(2.5);
test_conv_sf_to_i(8388607);
test_conv_sf_to_i(8388608);
test_conv_sf_to_i(8388609);
test_conv_sf_to_i(16777217); // == 16777216
test_conv_sf_to_i(1e20); // 溢出
test_conv_sf_to_i(-1e20);
printf("\n");
test_conv_i_to_sf(-1);
test_conv_i_to_sf(0);
test_conv_i_to_sf(2);
test_conv_i_to_sf(0x7fffff);
test_conv_i_to_sf(0x800000);
test_conv_i_to_sf(0x800001);
test_conv_i_to_sf(0x8fffff);
test_conv_i_to_sf(0x1000000);
test_conv_i_to_sf(0x1000001); // == 16777216
printf("\n");
}
int main()
{
#ifdef ROUND_TRUNC
fesetround(FE_TOWARDZERO);
#endif
test_cmps();
test_adds();
test_subs();
test_muls();
test_divs();
test_convs();
}
#include "softfloat.h"
#define SF32_INF 0x7f800000 // Inf,指数为0xff,有效位为0
#define SF32_NAN 0xffc00000 // NaN,指数为0xff,有效位不为0
#define SF32_MAX 0x7f7fffff // 单精度浮点表示的最大值
#define SF32_EXP(sf) (((sf) >> 23) & 0xff) // 取8位指数
#define SF32_SIG(sf) ((sf) & 0x7fffff) // 取23位有效位
#define SF32_SIGN(sf) ((sf) >> 31) // 取符号位
/* 构造一个32位浮点数 */
static inline sfloat32 sf32_pack(bool sign, uint16_t exp, uint32_t sig)
{
return ((uint32_t)sign << 31) | ((uint32_t)exp << 23) | (sig & 0x7fffff);
}
inline sfloat32 sf32_from_float(float f)
{
unionsf32 usf;
usf.f = f;
return usf.u;
}
inline float sf32_to_float(sfloat32 sf)
{
unionsf32 usf;
usf.u = sf;
return usf.f;
}
bool sf32_isnan(sfloat32 sf)
{
return (sf << 9 != 0) && (((sf >> 23) & 0xff) == 0xff);
}
bool sf32_isinf(sfloat32 sf)
{
return (sf & 0x7fffffff) == SF32_INF;
}
bool sf32_sign(sfloat32 sf)
{
return SF32_SIGN(sf);
}
static bool sf32_real_lt(sfloat32 sf1, sfloat32 sf2)
{
bool sign1 = SF32_SIGN(sf1);
bool sign2 = SF32_SIGN(sf2);
if (sign1 != sign2) // 符号不同,也可以用异或判断
// 若左边为负数且两数不为0,则说明左边小于右边
return sign1 && ((sf1 | sf2) << 1);
// 若符号相同,则左边小于右边的条件是两数不相等
// 同时,若两数为正数,绝对值小的小,否则绝对值大的小
return (sf1 != sf2) && (sign1 ^ (sf1 < sf2));
}
static bool sf32_real_le(sfloat32 sf1, sfloat32 sf2)
{
bool sign1 = SF32_SIGN(sf1);
bool sign2 = SF32_SIGN(sf2);
if (sign1 != sign2)
// 与之前不同,两数可以为0
return sign1 && (((sf1 | sf2) << 1) == 0);
// 与之前不同,相等或小于
return (sf1 == sf2) || (sign1 ^ (sf1 < sf2));
}
bool sf32_eq(sfloat32 sf1, sfloat32 sf2)
{
if (sf32_isnan(sf1) || sf32_isnan(sf2))
return false;
return
(sf1 == sf2) || // 内容完全一样
(((sf1 | sf2) << 1) == 0); // 对于0则忽略符号位
}
bool sf32_ne(sfloat32 sf1, sfloat32 sf2)
{
return !sf32_eq(sf1, sf2);
}
bool sf32_lt(sfloat32 sf1, sfloat32 sf2)
{
if (sf32_isnan(sf1) || sf32_isnan(sf2))
return false;
return sf32_real_lt(sf1, sf2);
}
bool sf32_le(sfloat32 sf1, sfloat32 sf2)
{
if (sf32_isnan(sf1) || sf32_isnan(sf2))
return false;
return sf32_real_le(sf1, sf2);
}
bool sf32_gt(sfloat32 sf1, sfloat32 sf2)
{
if (sf32_isnan(sf1) || sf32_isnan(sf2))
return false;
return !sf32_real_le(sf1, sf2);
}
bool sf32_ge(sfloat32 sf1, sfloat32 sf2)
{
if (sf32_isnan(sf1) || sf32_isnan(sf2))
return false;
return !sf32_real_lt(sf1, sf2);
}
// 对于32位数,右移保留粘贴位
static uint32_t shift_right_sticky32(uint32_t sig, uint16_t n)
{
uint32_t res = 0;
if (n < 31) {
res = sig >> n;
if (sig << (32 - n))
res = res | 1;
}
else {
if (sig)
res = 1;
}
return res;
}
// 对于64位数,右移保留粘贴位
static uint64_t shift_right_sticky64(uint64_t sig, uint16_t n)
{
uint64_t res = 0;
if (n < 63) {
res = sig >> n;
if (sig << (64 - n))
res = res | 1;
}
else {
if (sig)
res = 1;
}
return res;
}
// 对于32位数,进行向偶数舍入操作,低3位分别为GRS位
static uint32_t round_even_grs_32(uint32_t sig)
{
uint8_t grs = sig & 7;
if ((grs > 4) || ((grs == 4) && ((sig >> 3) & 1)))
sig = sig + 8;
return sig;
}
// 判断NaN或Inf
static inline sfloat32 sf32_nan_inf(bool sign, uint32_t sig)
{
if (sig)
return SF32_NAN;
return (SF32_INF | ((uint32_t)sign << 31));
}
// 64位无符号整数除以32位无符号整数,返回商,余数可选
// 确保结果不超过32位宽度
static inline uint32_t div_64_32_32_32(uint64_t n, uint32_t base, uint32_t *prem)
{
uint64_t rem = n;
uint64_t b = base;
uint64_t d = 1;
uint32_t r = 0;
while ((int64_t)b > 0 && b < rem) {
b = b + b;
d = d + d;
}
do {
if (rem >= b) {
rem -= b;
r += d;
}
b = b >> 1;
d = d >> 1;
} while (d);
if (prem)
*prem = rem;
return r;
}
/* 加法,两数绝对值相加,符号取决于第一个参数 */
static sfloat32 real_sf32_add(sfloat32 sf1, sfloat32 sf2)
{
int exp1 = SF32_EXP(sf1);
int exp2 = SF32_EXP(sf2);
uint32_t sig1 = SF32_SIG(sf1);
uint32_t sig2 = SF32_SIG(sf2);
bool sign = SF32_SIGN(sf1);
int expdiff = exp1 - exp2;
if (expdiff == 0) {
if (exp1 == 0) // 忽略非规格化数
return ((uint32_t)sign << 31);
if (exp1 == 0xff) // 判断无穷或NaN
return sf32_nan_inf(sign, sig1 | sig2);
}
if (exp1 == 0xff)
return sf32_nan_inf(sign, sig1);
if (exp2 == 0xff)
return sf32_nan_inf(sign, sig2);
if (exp1 == 0)
return (sf2 & 0x7fffffff) | ((uint32_t)sign << 31);
if (exp2 == 0)
return (sf1 & 0x7fffffff) | ((uint32_t)sign << 31);
sig1 |= 0x800000;
sig2 |= 0x800000;
#ifndef ROUND_TRUNC
sig1 = sig1 << 2;
sig2 = sig2 << 2;
#endif
if (expdiff < 0) {
expdiff = -expdiff;
#ifdef ROUND_TRUNC
if (expdiff > 24)
sig1 = 0;
else
sig1 = sig1 >> expdiff;
#else
sig1 = shift_right_sticky32(sig1, expdiff);
#endif
exp1 = exp2;
}
else if (expdiff > 0) {
#ifdef ROUND_TRUNC
if (expdiff > 24)
sig2 = 0;
else
sig2 = sig2 >> expdiff;
#else
sig2 = shift_right_sticky32(sig2, expdiff);
#endif
}
uint32_t sig = sig1 + sig2;
#ifdef ROUND_TRUNC
if (sig > 0xffffff) {
sig = sig >> 1;
exp1++;
}
#else
if (sig < 0x4000000)
sig = sig << 1;
else
exp1++;
sig = round_even_grs_32(sig);
if (sig > 0x7ffffff) {
sig = sig >> 1;
exp1++;
}
sig = sig >> 3;
#endif
if (exp1 > 254)
#ifdef ROUND_TRUNC
return (SF32_MAX | ((uint32_t)sign << 31));
#else
return (SF32_INF | ((uint32_t)sign << 31));
#endif
return sf32_pack(sign, exp1, sig);
}
/* 减法,两数绝对值相减,符号为绝对值大的那个 */
static sfloat32 real_sf32_sub(sfloat32 sf1, sfloat32 sf2)
{
int exp1 = SF32_EXP(sf1);
int exp2 = SF32_EXP(sf2);
uint32_t sig1 = SF32_SIG(sf1);
uint32_t sig2 = SF32_SIG(sf2);
uint32_t sig;
bool sign = SF32_SIGN(sf1);
int expdiff = exp1 - exp2;
if (expdiff == 0) {
if (exp1 == 0xff)
return SF32_NAN;
if (sig1 == sig2)
return 0;
if (sig1 > sig2) {
sig = sig1 - sig2;
}
else {
sig = sig2 - sig1;
sign = !sign;
}
sig = sig << 3;
}
else if (expdiff < 0) {
sign = !sign;
if (exp2 == 0xff)
return sf32_nan_inf(sign, sig2);
if (exp1 == 0)
return (sf2 & 0x7fffffff) | ((uint32_t)sign << 31);
exp1 = exp2;
expdiff = -expdiff;
sig1 = (sig1 | 0x800000) << 3;
sig1 = shift_right_sticky32(sig1, expdiff);
sig = ((sig2 | 0x800000) << 3) - sig1;
}
else {
if (exp1 == 0xff)
return sf32_nan_inf(sign, sig1);
if (exp2 == 0)
return (sf1 & 0x7fffffff) | ((uint32_t)sign << 31);
sig2 = (sig2 | 0x800000) << 3;
sig2 = shift_right_sticky32(sig2, expdiff);
sig = ((sig1 | 0x800000) << 3) - sig2;
}
while (sig < 0x4000000) { // 可以优化为求前导0数量
sig = sig << 1;
exp1--;
}
#ifndef ROUND_TRUNC
sig = round_even_grs_32(sig);
if (sig > 0x7ffffff) {
sig = sig >> 1;
exp1++;
}
#endif
sig = sig >> 3;
if (exp1 < 1)
return ((uint32_t)sign << 31);
return sf32_pack(sign, exp1, sig);
}
inline sfloat32 sf32_add(sfloat32 sf1, sfloat32 sf2)
{
if (SF32_SIGN(sf1) == SF32_SIGN(sf2))
return real_sf32_add(sf1, sf2);
return real_sf32_sub(sf1, sf2);
}
inline sfloat32 sf32_sub(sfloat32 sf1, sfloat32 sf2)
{
if (SF32_SIGN(sf1) == SF32_SIGN(sf2))
return real_sf32_sub(sf1, sf2);
return real_sf32_add(sf1, sf2);
}
sfloat32 sf32_mul(sfloat32 sf1, sfloat32 sf2)
{
int exp1 = SF32_EXP(sf1);
int exp2 = SF32_EXP(sf2);
uint32_t sig1 = SF32_SIG(sf1);
uint32_t sig2 = SF32_SIG(sf2);
bool sign = SF32_SIGN(sf1) ^ SF32_SIGN(sf2);
if (exp1 == 0xff) {
if (sig1 || ((exp2 == 0xff) && sig2) || (exp2 == 0))
return SF32_NAN;
return (SF32_INF | ((uint32_t)sign << 31));
}
if (exp2 == 0xff) {
if (sig2 || (exp1 == 0))
return SF32_NAN;
return (SF32_INF | ((uint32_t)sign << 31));
}
if ((exp1 == 0) || (exp2 == 0))
return ((uint32_t)sign << 31);
uint32_t sig;
int exp = exp1 + exp2 - 127;
sig1 |= 0x800000;
sig2 |= 0x800000;
#ifdef ROUND_TRUNC
sig = ((uint64_t)sig1 * sig2) >> 23;
if (sig > 0xffffff) {
sig = sig >> 1;
exp++;
}
#else
sig = shift_right_sticky64((uint64_t)sig1 * sig2, 21);
if (sig < 0x4000000)
sig = sig << 1;
else
exp++;
sig = round_even_grs_32(sig);
if (sig > 0x7ffffff) {
sig = sig >> 1;
exp++;
}
sig = sig >> 3;
#endif
if (exp < 1)
return ((uint32_t)sign << 31);
if (exp > 254)
#ifdef ROUND_TRUNC
return (SF32_MAX | ((uint32_t)sign << 31));
#else
return (SF32_INF | ((uint32_t)sign << 31));
#endif
return sf32_pack(sign, exp, sig);
}
sfloat32 sf32_div(sfloat32 sf1, sfloat32 sf2)
{
int exp1 = SF32_EXP(sf1);
int exp2 = SF32_EXP(sf2);
uint32_t sig1 = SF32_SIG(sf1);
uint32_t sig2 = SF32_SIG(sf2);
bool sign = SF32_SIGN(sf1) ^ SF32_SIGN(sf2);
if (exp1 == 0xff) {
if (sig1 || (exp2 == 0xff))
return SF32_NAN;
return (SF32_INF | ((uint32_t)sign << 31));
}
if (exp2 == 0xff) {
if (sig2)
return SF32_NAN;
return ((uint32_t)sign << 31);
}
if (exp1 == 0) {
if (exp2 == 0)
return SF32_NAN;
return ((uint32_t)sign << 31);
}
if (exp2 == 0)
return (SF32_INF | ((uint32_t)sign << 31));
int exp = exp1 - exp2 + 127;
sig1 = sig1 | 0x800000;
sig2 = sig2 | 0x800000;
uint64_t sig64;
if (sig1 < sig2) {
exp--;
sig64 = (uint64_t)sig1 << 32;
}
else {
sig64 = (uint64_t)sig1 << 31;
}
uint32_t sig;
#ifdef ROUND_TRUNC
sig = div_64_32_32_32(sig64, sig2, NULL) >> 8;
#else
uint32_t rem;
sig = div_64_32_32_32(sig64, sig2, &rem);
if (rem)
sig |= 1;
sig = shift_right_sticky32(sig, 5);
sig = round_even_grs_32(sig) >> 3;
#endif
if (exp < 1)
return ((uint32_t)sign << 31);
if (exp > 254)
return (SF32_INF | ((uint32_t)sign << 31));
return sf32_pack(sign, exp, sig);
}
int32_t sf32_to_i32(sfloat32 sf)
{
int exp = SF32_EXP(sf) - 127;
if (exp < 0)
return 0;
bool sign = SF32_SIGN(sf);
if (exp > 30) {
if (sign)
return (int32_t)0x80000000;
else
return 0x7fffffff;
}
uint32_t sig = SF32_SIG(sf) | 0x800000;
int shift = 23 - exp;
int32_t res;
if (shift > 0)
res = sig >> shift;
else
res = sig << (-shift);
if (sign)
res = -res;
return res;
}
sfloat32 sf32_from_i32(int32_t i)
{
if (i == 0)
return 0;
bool sign = i >> 31;
if (sign)
i = -i;
int exp = 127 + 23;
if (i >= 0x1000000) {
while (i >= 0x1000000) {
i = i >> 1;
exp++;
}
}
else {
while (i < 0x800000) {
i = i << 1;
exp--;
}
}
return sf32_pack(sign, exp, i);
}
/*
软浮点,目前实现了32位浮点数的加减乘除以及转化
性能不是主要目的,重点是实现原理以及代码可读性
代码部分参考了伯克利的软浮点实现
http://www.jhauser.us/arithmetic/SoftFloat.html
https://github.com/ucb-bar/berkeley-softfloat-3
*/
#pragma once
#include <stdint.h>
#include <stdbool.h>
// 截断模式,可以减少代码体积,但是精度略低
// 需要则可以在include文件之前定义
// #define ROUND_TRUNC
typedef uint32_t sfloat32;
typedef union
{
float f;
uint32_t u;
} unionsf32;
// 将浮点数转换到软浮点数
sfloat32 sf32_from_float(float f);
// 将软浮点数转换到浮点数
float sf32_to_float(sfloat32 sf);
// 判断是否是NaN
bool sf32_isnan(sfloat32 sf);
// 判断是否无穷
bool sf32_isinf(sfloat32 sf);
// 获取符号位
bool sf32_sign(sfloat32 sf);
// 判断相等
bool sf32_eq(sfloat32 sf1, sfloat32 sf2);
// 判断不相等
bool sf32_ne(sfloat32 sf1, sfloat32 sf2);
// 判断小于
bool sf32_lt(sfloat32 sf1, sfloat32 sf2);
// 判断小于等于
bool sf32_le(sfloat32 sf1, sfloat32 sf2);
// 判断大于
bool sf32_gt(sfloat32 sf1, sfloat32 sf2);
// 判断大于等于
bool sf32_ge(sfloat32 sf1, sfloat32 sf2);
// 加法运算
sfloat32 sf32_add(sfloat32 sf1, sfloat32 sf2);
// 减法运算
sfloat32 sf32_sub(sfloat32 sf1, sfloat32 sf2);
// 乘法运算
sfloat32 sf32_mul(sfloat32 sf1, sfloat32 sf2);
// 除法运算
sfloat32 sf32_div(sfloat32 sf1, sfloat32 sf2);
// 转换到整数
int32_t sf32_to_i32(sfloat32 sf);
// 从整数转换
sfloat32 sf32_from_i32(int32_t i);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册