提交 535104ab 编写于 作者: S Szabolcs Nagy

math: cbrt cleanup and long double fix

* use float_t and double_t
* cleanup subnormal handling
* bithacks according to the new convention (ldshape for long double
and explicit unions for float and double)
上级 39c910fb
...@@ -15,7 +15,8 @@ ...@@ -15,7 +15,8 @@
* Return cube root of x * Return cube root of x
*/ */
#include "libm.h" #include <math.h>
#include <stdint.h>
static const uint32_t static const uint32_t
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */ B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
...@@ -31,15 +32,10 @@ P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */ ...@@ -31,15 +32,10 @@ P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
double cbrt(double x) double cbrt(double x)
{ {
int32_t hx; union {double f; uint64_t i;} u = {x};
union dshape u; double_t r,s,t,w;
double r,s,t=0.0,w; uint32_t hx = u.i>>32 & 0x7fffffff;
uint32_t sign;
uint32_t high,low;
EXTRACT_WORDS(hx, low, x);
sign = hx & 0x80000000;
hx ^= sign;
if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */ if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */
return x+x; return x+x;
...@@ -59,14 +55,16 @@ double cbrt(double x) ...@@ -59,14 +55,16 @@ double cbrt(double x)
* division rounds towards minus infinity; this is also efficient. * division rounds towards minus infinity; this is also efficient.
*/ */
if (hx < 0x00100000) { /* zero or subnormal? */ if (hx < 0x00100000) { /* zero or subnormal? */
if ((hx|low) == 0) u.f = x*0x1p54;
hx = u.i>>32 & 0x7fffffff;
if (hx == 0)
return x; /* cbrt(0) is itself */ return x; /* cbrt(0) is itself */
SET_HIGH_WORD(t, 0x43500000); /* set t = 2**54 */ hx = hx/3 + B2;
t *= x;
GET_HIGH_WORD(high, t);
INSERT_WORDS(t, sign|((high&0x7fffffff)/3+B2), 0);
} else } else
INSERT_WORDS(t, sign|(hx/3+B1), 0); hx = hx/3 + B1;
u.i &= 1ULL<<63;
u.i |= (uint64_t)hx << 32;
t = u.f;
/* /*
* New cbrt to 23 bits: * New cbrt to 23 bits:
...@@ -76,7 +74,7 @@ double cbrt(double x) ...@@ -76,7 +74,7 @@ double cbrt(double x)
* has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
* gives us bounds for r = t**3/x. * gives us bounds for r = t**3/x.
* *
* Try to optimize for parallel evaluation as in k_tanf.c. * Try to optimize for parallel evaluation as in __tanf.c.
*/ */
r = (t*t)*(t/x); r = (t*t)*(t/x);
t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4)); t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
...@@ -91,9 +89,9 @@ double cbrt(double x) ...@@ -91,9 +89,9 @@ double cbrt(double x)
* 0.667; the error in the rounded t can be up to about 3 23-bit ulps * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
* before the final error is larger than 0.667 ulps. * before the final error is larger than 0.667 ulps.
*/ */
u.value = t; u.f = t;
u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL; u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
t = u.value; t = u.f;
/* one step Newton iteration to 53 bits with error < 0.667 ulps */ /* one step Newton iteration to 53 bits with error < 0.667 ulps */
s = t*t; /* t*t is exact */ s = t*t; /* t*t is exact */
......
...@@ -17,7 +17,8 @@ ...@@ -17,7 +17,8 @@
* Return cube root of x * Return cube root of x
*/ */
#include "libm.h" #include <math.h>
#include <stdint.h>
static const unsigned static const unsigned
B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */ B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
...@@ -25,15 +26,10 @@ B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */ ...@@ -25,15 +26,10 @@ B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
float cbrtf(float x) float cbrtf(float x)
{ {
double r,T; double_t r,T;
float t; union {float f; uint32_t i;} u = {x};
int32_t hx; uint32_t hx = u.i & 0x7fffffff;
uint32_t sign;
uint32_t high;
GET_FLOAT_WORD(hx, x);
sign = hx & 0x80000000;
hx ^= sign;
if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */ if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */
return x + x; return x + x;
...@@ -41,28 +37,29 @@ float cbrtf(float x) ...@@ -41,28 +37,29 @@ float cbrtf(float x)
if (hx < 0x00800000) { /* zero or subnormal? */ if (hx < 0x00800000) { /* zero or subnormal? */
if (hx == 0) if (hx == 0)
return x; /* cbrt(+-0) is itself */ return x; /* cbrt(+-0) is itself */
SET_FLOAT_WORD(t, 0x4b800000); /* set t = 2**24 */ u.f = x*0x1p24f;
t *= x; hx = u.i & 0x7fffffff;
GET_FLOAT_WORD(high, t); hx = hx/3 + B2;
SET_FLOAT_WORD(t, sign|((high&0x7fffffff)/3+B2));
} else } else
SET_FLOAT_WORD(t, sign|(hx/3+B1)); hx = hx/3 + B1;
u.i &= 0x80000000;
u.i |= hx;
/* /*
* First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In * First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In
* double precision so that its terms can be arranged for efficiency * double precision so that its terms can be arranged for efficiency
* without causing overflow or underflow. * without causing overflow or underflow.
*/ */
T = t; T = u.f;
r = T*T*T; r = T*T*T;
T = T*((double)x+x+r)/(x+r+r); T = T*((double_t)x+x+r)/(x+r+r);
/* /*
* Second step Newton iteration to 47 bits. In double precision for * Second step Newton iteration to 47 bits. In double precision for
* efficiency and accuracy. * efficiency and accuracy.
*/ */
r = T*T*T; r = T*T*T;
T = T*((double)x+x+r)/(x+r+r); T = T*((double_t)x+x+r)/(x+r+r);
/* rounding to 24 bits is perfect in round-to-nearest mode */ /* rounding to 24 bits is perfect in round-to-nearest mode */
return T; return T;
......
...@@ -23,58 +23,50 @@ long double cbrtl(long double x) ...@@ -23,58 +23,50 @@ long double cbrtl(long double x)
return cbrt(x); return cbrt(x);
} }
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384 #elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#define BIAS (LDBL_MAX_EXP - 1)
static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */ static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
long double cbrtl(long double x) long double cbrtl(long double x)
{ {
union IEEEl2bits u, v; union ldshape u = {x}, v;
union {float f; uint32_t i;} uft;
long double r, s, t, w; long double r, s, t, w;
double dr, dt, dx; double_t dr, dt, dx;
float ft, fx; float_t ft;
uint32_t hx; int e = u.i.se & 0x7fff;
uint16_t expsign; int sign = u.i.se & 0x8000;
int k;
u.e = x;
expsign = u.xbits.expsign;
k = expsign & 0x7fff;
/* /*
* If x = +-Inf, then cbrt(x) = +-Inf. * If x = +-Inf, then cbrt(x) = +-Inf.
* If x = NaN, then cbrt(x) = NaN. * If x = NaN, then cbrt(x) = NaN.
*/ */
if (k == BIAS + LDBL_MAX_EXP) if (e == 0x7fff)
return x + x; return x + x;
if (e == 0) {
if (k == 0) { /* Adjust subnormal numbers. */
u.f *= 0x1p120;
e = u.i.se & 0x7fff;
/* If x = +-0, then cbrt(x) = +-0. */ /* If x = +-0, then cbrt(x) = +-0. */
if ((u.bits.manh | u.bits.manl) == 0) if (e == 0)
return x; return x;
/* Adjust subnormal numbers. */ e -= 120;
u.e *= 0x1.0p514; }
k = u.bits.exp; e -= 0x3fff;
k -= BIAS + 514; u.i.se = 0x3fff;
} else x = u.f;
k -= BIAS; switch (e % 3) {
u.xbits.expsign = BIAS;
v.e = 1;
x = u.e;
switch (k % 3) {
case 1: case 1:
case -2: case -2:
x = 2*x; x *= 2;
k--; e--;
break; break;
case 2: case 2:
case -1: case -1:
x = 4*x; x *= 4;
k -= 2; e -= 2;
break; break;
} }
v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3); v.f = 1.0;
v.i.se = sign | (0x3fff + e/3);
/* /*
* The following is the guts of s_cbrtf, with the handling of * The following is the guts of s_cbrtf, with the handling of
...@@ -83,9 +75,9 @@ long double cbrtl(long double x) ...@@ -83,9 +75,9 @@ long double cbrtl(long double x)
*/ */
/* ~5-bit estimate: */ /* ~5-bit estimate: */
fx = x; uft.f = x;
GET_FLOAT_WORD(hx, fx); uft.i = (uft.i & 0x7fffffff)/3 + B1;
SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1)); ft = uft.f;
/* ~16-bit estimate: */ /* ~16-bit estimate: */
dx = x; dx = x;
...@@ -126,7 +118,7 @@ long double cbrtl(long double x) ...@@ -126,7 +118,7 @@ long double cbrtl(long double x)
r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */ r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */ t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
t *= v.e; t *= v.f;
return t; return t;
} }
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册