提交 1d5ba3bb 编写于 作者: S Szabolcs Nagy

math: tan cleanups

* use unsigned arithmetics on the representation
* store arg reduction quotient in unsigned (so n%2 would work like n&1)
* use different convention to pass the arg reduction bit to __tan
  (this argument used to be 1 for even and -1 for odd reduction
  which meant obscure bithacks, the new n&1 is cleaner)
* raise inexact and underflow flags correctly for small x
  (tanl(x) may still raise spurious underflow for small but normal x)
  (this exception raising code increases codesize a bit, similar fixes
  are needed in many other places, it may worth investigating at some
  point if the inexact and underflow flags are worth raising correctly
  as this is not strictly required by the standard)
* tanf manual reduction optimization is kept for now
* tanl code path is cleaned up to follow similar logic to tan and tanf
上级 22730d65
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854 * kernel tan function on ~[-pi/4, pi/4] (except on -0), pi/4 ~ 0.7854
* Input x is assumed to be bounded by ~pi/4 in magnitude. * Input x is assumed to be bounded by ~pi/4 in magnitude.
* Input y is the tail of x. * Input y is the tail of x.
* Input k indicates whether tan (if k = 1) or -1/tan (if k = -1) is returned. * Input odd indicates whether tan (if odd = 0) or -1/tan (if odd = 1) is returned.
* *
* Algorithm * Algorithm
* 1. Since tan(-x) = -tan(x), we need only to consider positive x. * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
...@@ -63,21 +63,22 @@ static const double T[] = { ...@@ -63,21 +63,22 @@ static const double T[] = {
pio4 = 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */ pio4 = 7.85398163397448278999e-01, /* 3FE921FB, 54442D18 */
pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */ pio4lo = 3.06161699786838301793e-17; /* 3C81A626, 33145C07 */
double __tan(double x, double y, int iy) double __tan(double x, double y, int odd)
{ {
double_t z, r, v, w, s, sign; double_t z, r, v, w, s, a;
int32_t ix, hx; double w0, a0;
uint32_t hx;
int big, sign;
GET_HIGH_WORD(hx,x); GET_HIGH_WORD(hx,x);
ix = hx & 0x7fffffff; /* high word of |x| */ big = (hx&0x7fffffff) >= 0x3FE59428; /* |x| >= 0.6744 */
if (ix >= 0x3FE59428) { /* |x| >= 0.6744 */ if (big) {
if (hx < 0) { sign = hx>>31;
if (sign) {
x = -x; x = -x;
y = -y; y = -y;
} }
z = pio4 - x; x = (pio4 - x) + (pio4lo - y);
w = pio4lo - y;
x = z + w;
y = 0.0; y = 0.0;
} }
z = x * x; z = x * x;
...@@ -90,30 +91,20 @@ double __tan(double x, double y, int iy) ...@@ -90,30 +91,20 @@ double __tan(double x, double y, int iy)
r = T[1] + w*(T[3] + w*(T[5] + w*(T[7] + w*(T[9] + w*T[11])))); r = T[1] + w*(T[3] + w*(T[5] + w*(T[7] + w*(T[9] + w*T[11]))));
v = z*(T[2] + w*(T[4] + w*(T[6] + w*(T[8] + w*(T[10] + w*T[12]))))); v = z*(T[2] + w*(T[4] + w*(T[6] + w*(T[8] + w*(T[10] + w*T[12])))));
s = z * x; s = z * x;
r = y + z * (s * (r + v) + y); r = y + z*(s*(r + v) + y) + s*T[0];
r += T[0] * s;
w = x + r; w = x + r;
if (ix >= 0x3FE59428) { if (big) {
v = iy; s = 1 - 2*odd;
sign = 1 - ((hx >> 30) & 2); v = s - 2.0 * (x + (r - w*w/(w + s)));
return sign * (v - 2.0 * (x - (w * w / (w + v) - r))); return sign ? -v : v;
} }
if (iy == 1) if (!odd)
return w; return w;
else { /* -1.0/(x+r) has up to 2ulp error, so compute it accurately */
/* w0 = w;
* if allow error up to 2 ulp, simply return SET_LOW_WORD(w0, 0);
* -1.0 / (x+r) here v = r - (w0 - x); /* w0+v = r+x */
*/ a0 = a = -1.0 / w;
/* compute -1.0 / (x+r) accurately */ SET_LOW_WORD(a0, 0);
double_t a; return a0 + a*(1.0 + a0*w0 + a0*v);
double z, t;
z = w;
SET_LOW_WORD(z,0);
v = r - (z - x); /* z+v = r+x */
t = a = -1.0 / w; /* a = -1.0/w */
SET_LOW_WORD(t,0);
s = 1.0 + t * z;
return t + a * (s + t * v);
}
} }
...@@ -25,7 +25,7 @@ static const double T[] = { ...@@ -25,7 +25,7 @@ static const double T[] = {
0x1362b9bf971bcd.0p-59, /* 0.00946564784943673166728 */ 0x1362b9bf971bcd.0p-59, /* 0.00946564784943673166728 */
}; };
float __tandf(double x, int iy) float __tandf(double x, int odd)
{ {
double_t z,r,w,s,t,u; double_t z,r,w,s,t,u;
...@@ -50,6 +50,5 @@ float __tandf(double x, int iy) ...@@ -50,6 +50,5 @@ float __tandf(double x, int iy)
s = z*x; s = z*x;
u = T[0] + z*T[1]; u = T[0] + z*T[1];
r = (x + s*u) + (s*w)*(t + w*r); r = (x + s*u) + (s*w)*(t + w*r);
if(iy==1) return r; return odd ? -1.0/r : r;
else return -1.0/r;
} }
...@@ -45,25 +45,21 @@ T29 = 0.0000078293456938132840, /* 0x106b59141a6cb3.0p-69 */ ...@@ -45,25 +45,21 @@ T29 = 0.0000078293456938132840, /* 0x106b59141a6cb3.0p-69 */
T31 = -0.0000032609076735050182, /* -0x1b5abef3ba4b59.0p-71 */ T31 = -0.0000032609076735050182, /* -0x1b5abef3ba4b59.0p-71 */
T33 = 0.0000023261313142559411; /* 0x13835436c0c87f.0p-71 */ T33 = 0.0000023261313142559411; /* 0x13835436c0c87f.0p-71 */
long double __tanl(long double x, long double y, int iy) { long double __tanl(long double x, long double y, int odd) {
long double z, r, v, w, s, a, t; long double z, r, v, w, s, a, t;
long double osign; int big, sign;
int i;
iy = iy == 1 ? -1 : 1; /* XXX recover original interface */ big = fabsl(x) >= 0.67434;
osign = copysignl(1.0, x); if (big) {
if (fabsl(x) >= 0.67434) { sign = 0;
if (x < 0) { if (x < 0) {
sign = 1;
x = -x; x = -x;
y = -y; y = -y;
} }
z = pio4 - x; x = (pio4 - x) + (pio4lo - y);
w = pio4lo - y;
x = z + w;
y = 0.0; y = 0.0;
i = 1; }
} else
i = 0;
z = x * x; z = x * x;
w = z * z; w = z * z;
r = T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 + r = T5 + w * (T9 + w * (T13 + w * (T17 + w * (T21 +
...@@ -71,14 +67,14 @@ long double __tanl(long double x, long double y, int iy) { ...@@ -71,14 +67,14 @@ long double __tanl(long double x, long double y, int iy) {
v = z * (T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 + v = z * (T7 + w * (T11 + w * (T15 + w * (T19 + w * (T23 +
w * (T27 + w * T31)))))); w * (T27 + w * T31))))));
s = z * x; s = z * x;
r = y + z * (s * (r + v) + y); r = y + z * (s * (r + v) + y) + T3 * s;
r += T3 * s;
w = x + r; w = x + r;
if (i == 1) { if (big) {
v = (long double)iy; s = 1 - 2*odd;
return osign * (v - 2.0 * (x - (w * w / (w + v) - r))); v = s - 2.0 * (x + (r - w * w / (w + s)));
return sign ? -v : v;
} }
if (iy == 1) if (!odd)
return w; return w;
/* /*
......
...@@ -43,27 +43,28 @@ ...@@ -43,27 +43,28 @@
double tan(double x) double tan(double x)
{ {
double y[2], z = 0.0; double y[2];
int32_t n, ix; uint32_t ix;
unsigned n;
/* High word of x. */
GET_HIGH_WORD(ix, x); GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff;
/* |x| ~< pi/4 */ /* |x| ~< pi/4 */
ix &= 0x7fffffff;
if (ix <= 0x3fe921fb) { if (ix <= 0x3fe921fb) {
if (ix < 0x3e400000) /* x < 2**-27 */ if (ix < 0x3e400000) { /* |x| < 2**-27 */
/* raise inexact if x != 0 */ /* raise inexact if x!=0 and underflow if subnormal */
if ((int)x == 0) FORCE_EVAL(ix < 0x00100000 ? x/0x1p120f : x+0x1p120f);
return x; return x;
return __tan(x, z, 1); }
return __tan(x, 0.0, 0);
} }
/* tan(Inf or NaN) is NaN */ /* tan(Inf or NaN) is NaN */
if (ix >= 0x7ff00000) if (ix >= 0x7ff00000)
return x - x; return x - x;
/* argument reduction needed */ /* argument reduction */
n = __rem_pio2(x, y); n = __rem_pio2(x, y);
return __tan(y[0], y[1], 1 - ((n&1)<<1)); /* n even: 1, n odd: -1 */ return __tan(y[0], y[1], n&1);
} }
...@@ -26,37 +26,39 @@ t4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */ ...@@ -26,37 +26,39 @@ t4pio2 = 4*M_PI_2; /* 0x401921FB, 0x54442D18 */
float tanf(float x) float tanf(float x)
{ {
double y; double y;
int32_t n, hx, ix; uint32_t ix;
unsigned n, sign;
GET_FLOAT_WORD(hx, x); GET_FLOAT_WORD(ix, x);
ix = hx & 0x7fffffff; sign = ix >> 31;
ix &= 0x7fffffff;
if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */ if (ix <= 0x3f490fda) { /* |x| ~<= pi/4 */
if (ix < 0x39800000) /* |x| < 2**-12 */ if (ix < 0x39800000) { /* |x| < 2**-12 */
/* return x and raise inexact if x != 0 */ /* raise inexact if x!=0 and underflow if subnormal */
if ((int)x == 0) FORCE_EVAL(ix < 0x00800000 ? x/0x1p120f : x+0x1p120f);
return x; return x;
return __tandf(x, 1); }
return __tandf(x, 0);
} }
if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */ if (ix <= 0x407b53d1) { /* |x| ~<= 5*pi/4 */
if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */ if (ix <= 0x4016cbe3) /* |x| ~<= 3pi/4 */
return __tandf((hx > 0 ? x-t1pio2 : x+t1pio2), -1); return __tandf((sign ? x+t1pio2 : x-t1pio2), 1);
else else
return __tandf((hx > 0 ? x-t2pio2 : x+t2pio2), 1); return __tandf((sign ? x+t2pio2 : x-t2pio2), 0);
} }
if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */ if (ix <= 0x40e231d5) { /* |x| ~<= 9*pi/4 */
if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */ if (ix <= 0x40afeddf) /* |x| ~<= 7*pi/4 */
return __tandf((hx > 0 ? x-t3pio2 : x+t3pio2), -1); return __tandf((sign ? x+t3pio2 : x-t3pio2), 1);
else else
return __tandf((hx > 0 ? x-t4pio2 : x+t4pio2), 1); return __tandf((sign ? x+t4pio2 : x-t4pio2), 0);
} }
/* tan(Inf or NaN) is NaN */ /* tan(Inf or NaN) is NaN */
if (ix >= 0x7f800000) if (ix >= 0x7f800000)
return x - x; return x - x;
/* general argument reduction needed */ /* argument reduction */
n = __rem_pio2f(x, &y); n = __rem_pio2f(x, &y);
/* integer parameter: n even: 1; n odd: -1 */ return __tandf(y, n&1);
return __tandf(y, 1-((n&1)<<1));
} }
...@@ -41,42 +41,27 @@ long double tanl(long double x) ...@@ -41,42 +41,27 @@ long double tanl(long double x)
long double tanl(long double x) long double tanl(long double x)
{ {
union IEEEl2bits z; union IEEEl2bits z;
int e0, s;
long double y[2]; long double y[2];
long double hi, lo; unsigned n;
z.e = x; z.e = x;
s = z.bits.sign;
z.bits.sign = 0; z.bits.sign = 0;
/* If x = +-0 or x is subnormal, then tan(x) = x. */
if (z.bits.exp == 0)
return x;
/* If x = NaN or Inf, then tan(x) = NaN. */ /* If x = NaN or Inf, then tan(x) = NaN. */
if (z.bits.exp == 32767) if (z.bits.exp == 0x7fff)
return (x - x) / (x - x); return (x - x) / (x - x);
/* Optimize the case where x is already within range. */ /* |x| < (double)pi/4 */
if (z.e < M_PI_4) { if (z.e < M_PI_4) {
hi = __tanl(z.e, 0, 0); /* x = +-0 or x is subnormal */
return s ? -hi : hi; if (z.bits.exp == 0)
/* inexact and underflow if x!=0 */
return x + x*0x1p-120f;
/* can raise spurious underflow */
return __tanl(x, 0, 0);
} }
e0 = __rem_pio2l(x, y); n = __rem_pio2l(x, y);
hi = y[0]; return __tanl(y[0], y[1], n&1);
lo = y[1];
switch (e0 & 3) {
case 0:
case 2:
hi = __tanl(hi, lo, 0);
break;
case 1:
case 3:
hi = __tanl(hi, lo, 1);
break;
}
return hi;
} }
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册