math: cbrt cleanup and long double fix

* use float_t and double_t * cleanup subnormal handling * bithacks according to the new convention (ldshape for long double and explicit unions for float and double)

math: cbrt cleanup and long double fix
* use float_t and double_t * cleanup subnormal handling * bithacks according to the new convention (ldshape for long double and explicit unions for float and double)
535104ab · Szabolcs Nagy · 39c910fb · 535104ab · 535104ab · 535104ab
隐藏空白更改
内联并排

Showing with 59 addition and 72 deletion

src/math/cbrt.c src/math/cbrt.c +17 -19

src/math/cbrtf.c src/math/cbrtf.c +14 -17

src/math/cbrtl.c src/math/cbrtl.c +28 -36

未找到文件。
--- a/src/math/cbrt.c
+++ b/src/math/cbrt.c
@@ -15,7 +15,8 @@
 * Return cube root of x
 */
-#include "libm.h"
+#include <math.h>
+#include <stdint.h>
 static const uint32_t
 B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
@@ -31,15 +32,10 @@ P4 =  0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
 double cbrt(double x)
 {
-	int32_t hx;
+	union {double f; uint64_t i;} u = {x};
-	union dshape u;
+	double_t r,s,t,w;
-	double r,s,t=0.0,w;
+	uint32_t hx = u.i>>32 & 0x7fffffff;
-	uint32_t sign;
-	uint32_t high,low;
-	EXTRACT_WORDS(hx, low, x);
-	sign = hx & 0x80000000;
-	hx ^= sign;
 	if (hx >= 0x7ff00000)  /* cbrt(NaN,INF) is itself */
 		return x+x;
@@ -59,14 +55,16 @@ double cbrt(double x)
 	 * division rounds towards minus infinity; this is also efficient.
 	 */
 	if (hx < 0x00100000) { /* zero or subnormal? */
-		if ((hx|low) == 0)
+		u.f = x*0x1p54;
+		hx = u.i>>32 & 0x7fffffff;
+		if (hx == 0)
 			return x;  /* cbrt(0) is itself */
-		SET_HIGH_WORD(t, 0x43500000); /* set t = 2**54 */
+		hx = hx/3 + B2;
-		t *= x;
-		GET_HIGH_WORD(high, t);
-		INSERT_WORDS(t, sign|((high&0x7fffffff)/3+B2), 0);
 	} else
-		INSERT_WORDS(t, sign|(hx/3+B1), 0);
+		hx = hx/3 + B1;
+	u.i &= 1ULL<<63;
+	u.i |= (uint64_t)hx << 32;
+	t = u.f;
 	/*
 	 * New cbrt to 23 bits:
@@ -76,7 +74,7 @@ double cbrt(double x)
 	 * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
 	 * gives us bounds for r = t**3/x.
 	 *
-	 * Try to optimize for parallel evaluation as in k_tanf.c.
+	 * Try to optimize for parallel evaluation as in __tanf.c.
 	 */
 	r = (t*t)*(t/x);
 	t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
@@ -91,9 +89,9 @@ double cbrt(double x)
 	 * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
 	 * before the final error is larger than 0.667 ulps.
 	 */
-	u.value = t;
+	u.f = t;
-	u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
+	u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
-	t = u.value;
+	t = u.f;
 	/* one step Newton iteration to 53 bits with error < 0.667 ulps */
 	s = t*t;         /* t*t is exact */

--- a/src/math/cbrtf.c
+++ b/src/math/cbrtf.c
@@ -17,7 +17,8 @@
 * Return cube root of x
 */
-#include "libm.h"
+#include <math.h>
+#include <stdint.h>
 static const unsigned
 B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
@@ -25,15 +26,10 @@ B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
 float cbrtf(float x)
 {
-	double r,T;
+	double_t r,T;
-	float t;
+	union {float f; uint32_t i;} u = {x};
-	int32_t hx;
+	uint32_t hx = u.i & 0x7fffffff;
-	uint32_t sign;
-	uint32_t high;
-	GET_FLOAT_WORD(hx, x);
-	sign = hx & 0x80000000;
-	hx ^= sign;
 	if (hx >= 0x7f800000)  /* cbrt(NaN,INF) is itself */
 		return x + x;
@@ -41,28 +37,29 @@ float cbrtf(float x)
 	if (hx < 0x00800000) {  /* zero or subnormal? */
 		if (hx == 0)
 			return x;  /* cbrt(+-0) is itself */
-		SET_FLOAT_WORD(t, 0x4b800000);  /* set t = 2**24 */
+		u.f = x*0x1p24f;
-		t *= x;
+		hx = u.i & 0x7fffffff;
-		GET_FLOAT_WORD(high, t);
+		hx = hx/3 + B2;
-		SET_FLOAT_WORD(t, sign|((high&0x7fffffff)/3+B2));
 	} else
-		SET_FLOAT_WORD(t, sign|(hx/3+B1));
+		hx = hx/3 + B1;
+	u.i &= 0x80000000;
+	u.i |= hx;
 	/*
 	 * First step Newton iteration (solving t*t-x/t == 0) to 16 bits.  In
 	 * double precision so that its terms can be arranged for efficiency
 	 * without causing overflow or underflow.
 	 */
-	T = t;
+	T = u.f;
 	r = T*T*T;
-	T = T*((double)x+x+r)/(x+r+r);
+	T = T*((double_t)x+x+r)/(x+r+r);
 	/*
 	 * Second step Newton iteration to 47 bits.  In double precision for
 	 * efficiency and accuracy.
 	 */
 	r = T*T*T;
-	T = T*((double)x+x+r)/(x+r+r);
+	T = T*((double_t)x+x+r)/(x+r+r);
 	/* rounding to 24 bits is perfect in round-to-nearest mode */
 	return T;

--- a/src/math/cbrtl.c
+++ b/src/math/cbrtl.c
@@ -23,58 +23,50 @@ long double cbrtl(long double x)
 	return cbrt(x);
 }
 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
-#define BIAS (LDBL_MAX_EXP - 1)
 static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
 long double cbrtl(long double x)
 {
-	union IEEEl2bits u, v;
+	union ldshape u = {x}, v;
+	union {float f; uint32_t i;} uft;
 	long double r, s, t, w;
-	double dr, dt, dx;
+	double_t dr, dt, dx;
-	float ft, fx;
+	float_t ft;
-	uint32_t hx;
+	int e = u.i.se & 0x7fff;
-	uint16_t expsign;
+	int sign = u.i.se & 0x8000;
-	int k;
-	u.e = x;
-	expsign = u.xbits.expsign;
-	k = expsign & 0x7fff;
 	/*
 	 * If x = +-Inf, then cbrt(x) = +-Inf.
 	 * If x = NaN, then cbrt(x) = NaN.
 	 */
-	if (k == BIAS + LDBL_MAX_EXP)
+	if (e == 0x7fff)
 		return x + x;
+	if (e == 0) {
-	if (k == 0) {
+		/* Adjust subnormal numbers. */
+		u.f *= 0x1p120;
+		e = u.i.se & 0x7fff;
 		/* If x = +-0, then cbrt(x) = +-0. */
-		if ((u.bits.manh | u.bits.manl) == 0)
+		if (e == 0)
 			return x;
-		/* Adjust subnormal numbers. */
+		e -= 120;
-		u.e *= 0x1.0p514;
+	}
-		k = u.bits.exp;
+	e -= 0x3fff;
-		k -= BIAS + 514;
+	u.i.se = 0x3fff;
-	} else
+	x = u.f;
-		k -= BIAS;
+	switch (e % 3) {
-	u.xbits.expsign = BIAS;
-	v.e = 1;
-	x = u.e;
-	switch (k % 3) {
 	case 1:
 	case -2:
-		x = 2*x;
+		x *= 2;
-		k--;
+		e--;
 		break;
 	case 2:
 	case -1:
-		x = 4*x;
+		x *= 4;
-		k -= 2;
+		e -= 2;
 		break;
 	}
-	v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3);
+	v.f = 1.0;
+	v.i.se = sign | (0x3fff + e/3);
 	/*
 	 * The following is the guts of s_cbrtf, with the handling of
@@ -83,9 +75,9 @@ long double cbrtl(long double x)
 	 */
 	/* ~5-bit estimate: */
-	fx = x;
+	uft.f = x;
-	GET_FLOAT_WORD(hx, fx);
+	uft.i = (uft.i & 0x7fffffff)/3 + B1;
-	SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1));
+	ft = uft.f;
 	/* ~16-bit estimate: */
 	dx = x;
@@ -126,7 +118,7 @@ long double cbrtl(long double x)
 	r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
 	t = t+t*r;       /* error <= 0.5 + 0.5/3 + epsilon */
-	t *= v.e;
+	t *= v.f;
 	return t;
 }
 #endif