提交 535104ab 编写于 作者: S Szabolcs Nagy

math: cbrt cleanup and long double fix

* use float_t and double_t
* cleanup subnormal handling
* bithacks according to the new convention (ldshape for long double
and explicit unions for float and double)
上级 39c910fb
......@@ -15,7 +15,8 @@
* Return cube root of x
*/
#include "libm.h"
#include <math.h>
#include <stdint.h>
static const uint32_t
B1 = 715094163, /* B1 = (1023-1023/3-0.03306235651)*2**20 */
......@@ -31,15 +32,10 @@ P4 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
double cbrt(double x)
{
int32_t hx;
union dshape u;
double r,s,t=0.0,w;
uint32_t sign;
uint32_t high,low;
union {double f; uint64_t i;} u = {x};
double_t r,s,t,w;
uint32_t hx = u.i>>32 & 0x7fffffff;
EXTRACT_WORDS(hx, low, x);
sign = hx & 0x80000000;
hx ^= sign;
if (hx >= 0x7ff00000) /* cbrt(NaN,INF) is itself */
return x+x;
......@@ -59,14 +55,16 @@ double cbrt(double x)
* division rounds towards minus infinity; this is also efficient.
*/
if (hx < 0x00100000) { /* zero or subnormal? */
if ((hx|low) == 0)
u.f = x*0x1p54;
hx = u.i>>32 & 0x7fffffff;
if (hx == 0)
return x; /* cbrt(0) is itself */
SET_HIGH_WORD(t, 0x43500000); /* set t = 2**54 */
t *= x;
GET_HIGH_WORD(high, t);
INSERT_WORDS(t, sign|((high&0x7fffffff)/3+B2), 0);
hx = hx/3 + B2;
} else
INSERT_WORDS(t, sign|(hx/3+B1), 0);
hx = hx/3 + B1;
u.i &= 1ULL<<63;
u.i |= (uint64_t)hx << 32;
t = u.f;
/*
* New cbrt to 23 bits:
......@@ -76,7 +74,7 @@ double cbrt(double x)
* has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
* gives us bounds for r = t**3/x.
*
* Try to optimize for parallel evaluation as in k_tanf.c.
* Try to optimize for parallel evaluation as in __tanf.c.
*/
r = (t*t)*(t/x);
t = t*((P0+r*(P1+r*P2))+((r*r)*r)*(P3+r*P4));
......@@ -91,9 +89,9 @@ double cbrt(double x)
* 0.667; the error in the rounded t can be up to about 3 23-bit ulps
* before the final error is larger than 0.667 ulps.
*/
u.value = t;
u.bits = (u.bits + 0x80000000) & 0xffffffffc0000000ULL;
t = u.value;
u.f = t;
u.i = (u.i + 0x80000000) & 0xffffffffc0000000ULL;
t = u.f;
/* one step Newton iteration to 53 bits with error < 0.667 ulps */
s = t*t; /* t*t is exact */
......
......@@ -17,7 +17,8 @@
* Return cube root of x
*/
#include "libm.h"
#include <math.h>
#include <stdint.h>
static const unsigned
B1 = 709958130, /* B1 = (127-127.0/3-0.03306235651)*2**23 */
......@@ -25,15 +26,10 @@ B2 = 642849266; /* B2 = (127-127.0/3-24/3-0.03306235651)*2**23 */
float cbrtf(float x)
{
double r,T;
float t;
int32_t hx;
uint32_t sign;
uint32_t high;
double_t r,T;
union {float f; uint32_t i;} u = {x};
uint32_t hx = u.i & 0x7fffffff;
GET_FLOAT_WORD(hx, x);
sign = hx & 0x80000000;
hx ^= sign;
if (hx >= 0x7f800000) /* cbrt(NaN,INF) is itself */
return x + x;
......@@ -41,28 +37,29 @@ float cbrtf(float x)
if (hx < 0x00800000) { /* zero or subnormal? */
if (hx == 0)
return x; /* cbrt(+-0) is itself */
SET_FLOAT_WORD(t, 0x4b800000); /* set t = 2**24 */
t *= x;
GET_FLOAT_WORD(high, t);
SET_FLOAT_WORD(t, sign|((high&0x7fffffff)/3+B2));
u.f = x*0x1p24f;
hx = u.i & 0x7fffffff;
hx = hx/3 + B2;
} else
SET_FLOAT_WORD(t, sign|(hx/3+B1));
hx = hx/3 + B1;
u.i &= 0x80000000;
u.i |= hx;
/*
* First step Newton iteration (solving t*t-x/t == 0) to 16 bits. In
* double precision so that its terms can be arranged for efficiency
* without causing overflow or underflow.
*/
T = t;
T = u.f;
r = T*T*T;
T = T*((double)x+x+r)/(x+r+r);
T = T*((double_t)x+x+r)/(x+r+r);
/*
* Second step Newton iteration to 47 bits. In double precision for
* efficiency and accuracy.
*/
r = T*T*T;
T = T*((double)x+x+r)/(x+r+r);
T = T*((double_t)x+x+r)/(x+r+r);
/* rounding to 24 bits is perfect in round-to-nearest mode */
return T;
......
......@@ -23,58 +23,50 @@ long double cbrtl(long double x)
return cbrt(x);
}
#elif (LDBL_MANT_DIG == 64 || LDBL_MANT_DIG == 113) && LDBL_MAX_EXP == 16384
#define BIAS (LDBL_MAX_EXP - 1)
static const unsigned B1 = 709958130; /* B1 = (127-127.0/3-0.03306235651)*2**23 */
long double cbrtl(long double x)
{
union IEEEl2bits u, v;
union ldshape u = {x}, v;
union {float f; uint32_t i;} uft;
long double r, s, t, w;
double dr, dt, dx;
float ft, fx;
uint32_t hx;
uint16_t expsign;
int k;
u.e = x;
expsign = u.xbits.expsign;
k = expsign & 0x7fff;
double_t dr, dt, dx;
float_t ft;
int e = u.i.se & 0x7fff;
int sign = u.i.se & 0x8000;
/*
* If x = +-Inf, then cbrt(x) = +-Inf.
* If x = NaN, then cbrt(x) = NaN.
*/
if (k == BIAS + LDBL_MAX_EXP)
if (e == 0x7fff)
return x + x;
if (k == 0) {
if (e == 0) {
/* Adjust subnormal numbers. */
u.f *= 0x1p120;
e = u.i.se & 0x7fff;
/* If x = +-0, then cbrt(x) = +-0. */
if ((u.bits.manh | u.bits.manl) == 0)
if (e == 0)
return x;
/* Adjust subnormal numbers. */
u.e *= 0x1.0p514;
k = u.bits.exp;
k -= BIAS + 514;
} else
k -= BIAS;
u.xbits.expsign = BIAS;
v.e = 1;
x = u.e;
switch (k % 3) {
e -= 120;
}
e -= 0x3fff;
u.i.se = 0x3fff;
x = u.f;
switch (e % 3) {
case 1:
case -2:
x = 2*x;
k--;
x *= 2;
e--;
break;
case 2:
case -1:
x = 4*x;
k -= 2;
x *= 4;
e -= 2;
break;
}
v.xbits.expsign = (expsign & 0x8000) | (BIAS + k / 3);
v.f = 1.0;
v.i.se = sign | (0x3fff + e/3);
/*
* The following is the guts of s_cbrtf, with the handling of
......@@ -83,9 +75,9 @@ long double cbrtl(long double x)
*/
/* ~5-bit estimate: */
fx = x;
GET_FLOAT_WORD(hx, fx);
SET_FLOAT_WORD(ft, ((hx & 0x7fffffff) / 3 + B1));
uft.f = x;
uft.i = (uft.i & 0x7fffffff)/3 + B1;
ft = uft.f;
/* ~16-bit estimate: */
dx = x;
......@@ -126,7 +118,7 @@ long double cbrtl(long double x)
r = (r-t)/(w+r); /* r-t is exact; w+r ~= 3*t */
t = t+t*r; /* error <= 0.5 + 0.5/3 + epsilon */
t *= v.e;
t *= v.f;
return t;
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册