提交 3fdf94ec 编写于 作者: S Szabolcs Nagy

math: clean up __rem_pio2

- remove the HAVE_EFFICIENT_IRINT case: fn is an exact integer, so
  it can be converted to int32_t a bit more efficiently than with a
  cast (the rounding mode change can be avoided), but musl does not
  support this case on any arch.
- __rem_pio2: use double_t where possible
- __rem_pio2f: use less assignments to avoid stores on i386
- use unsigned int bit manipulation (and union instead of macros)
- use hexfloat literals instead of named constants
上级 10c8b714
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
*/ */
static const double static const double
two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */ pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */ pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
...@@ -41,18 +40,19 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ ...@@ -41,18 +40,19 @@ pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ /* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
int __rem_pio2(double x, double *y) int __rem_pio2(double x, double *y)
{ {
double z,w,t,r,fn; union {double f; uint64_t i;} u = {x};
double tx[3],ty[2]; double_t z,w,t,r;
int32_t e0,i,j,nx,n,ix,hx; double tx[3],ty[2],fn;
uint32_t low; uint32_t ix;
int sign, n, ex, ey, i;
GET_HIGH_WORD(hx,x); sign = u.i>>63;
ix = hx & 0x7fffffff; ix = u.i>>32 & 0x7fffffff;
if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */ if (ix <= 0x400f6a7a) { /* |x| ~<= 5pi/4 */
if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */ if ((ix & 0xfffff) == 0x921fb) /* |x| ~= pi/2 or 2pi/2 */
goto medium; /* cancellation -- use medium case */ goto medium; /* cancellation -- use medium case */
if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */ if (ix <= 0x4002d97c) { /* |x| ~<= 3pi/4 */
if (hx > 0) { if (!sign) {
z = x - pio2_1; /* one round good to 85 bits */ z = x - pio2_1; /* one round good to 85 bits */
y[0] = z - pio2_1t; y[0] = z - pio2_1t;
y[1] = (z-y[0]) - pio2_1t; y[1] = (z-y[0]) - pio2_1t;
...@@ -64,7 +64,7 @@ int __rem_pio2(double x, double *y) ...@@ -64,7 +64,7 @@ int __rem_pio2(double x, double *y)
return -1; return -1;
} }
} else { } else {
if (hx > 0) { if (!sign) {
z = x - 2*pio2_1; z = x - 2*pio2_1;
y[0] = z - 2*pio2_1t; y[0] = z - 2*pio2_1t;
y[1] = (z-y[0]) - 2*pio2_1t; y[1] = (z-y[0]) - 2*pio2_1t;
...@@ -81,7 +81,7 @@ int __rem_pio2(double x, double *y) ...@@ -81,7 +81,7 @@ int __rem_pio2(double x, double *y)
if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */ if (ix <= 0x4015fdbc) { /* |x| ~<= 7pi/4 */
if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */ if (ix == 0x4012d97c) /* |x| ~= 3pi/2 */
goto medium; goto medium;
if (hx > 0) { if (!sign) {
z = x - 3*pio2_1; z = x - 3*pio2_1;
y[0] = z - 3*pio2_1t; y[0] = z - 3*pio2_1t;
y[1] = (z-y[0]) - 3*pio2_1t; y[1] = (z-y[0]) - 3*pio2_1t;
...@@ -95,7 +95,7 @@ int __rem_pio2(double x, double *y) ...@@ -95,7 +95,7 @@ int __rem_pio2(double x, double *y)
} else { } else {
if (ix == 0x401921fb) /* |x| ~= 4pi/2 */ if (ix == 0x401921fb) /* |x| ~= 4pi/2 */
goto medium; goto medium;
if (hx > 0) { if (!sign) {
z = x - 4*pio2_1; z = x - 4*pio2_1;
y[0] = z - 4*pio2_1t; y[0] = z - 4*pio2_1t;
y[1] = (z-y[0]) - 4*pio2_1t; y[1] = (z-y[0]) - 4*pio2_1t;
...@@ -109,32 +109,26 @@ int __rem_pio2(double x, double *y) ...@@ -109,32 +109,26 @@ int __rem_pio2(double x, double *y)
} }
} }
if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */ if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
uint32_t high;
medium: medium:
/* Use a specialized rint() to get fn. Assume round-to-nearest. */ /* rint(x/(pi/2)), Assume round-to-nearest. */
fn = x*invpio2 + 0x1.8p52; fn = x*invpio2 + 0x1.8p52;
fn = fn - 0x1.8p52; fn = fn - 0x1.8p52;
// FIXME
#ifdef HAVE_EFFICIENT_IRINT
n = irint(fn);
#else
n = (int32_t)fn; n = (int32_t)fn;
#endif
r = x - fn*pio2_1; r = x - fn*pio2_1;
w = fn*pio2_1t; /* 1st round, good to 85 bits */ w = fn*pio2_1t; /* 1st round, good to 85 bits */
j = ix>>20;
y[0] = r - w; y[0] = r - w;
GET_HIGH_WORD(high,y[0]); u.f = y[0];
i = j - ((high>>20)&0x7ff); ey = u.i>>52 & 0x7ff;
if (i > 16) { /* 2nd round, good to 118 bits */ ex = ix>>20;
if (ex - ey > 16) { /* 2nd round, good to 118 bits */
t = r; t = r;
w = fn*pio2_2; w = fn*pio2_2;
r = t - w; r = t - w;
w = fn*pio2_2t - ((t-r)-w); w = fn*pio2_2t - ((t-r)-w);
y[0] = r - w; y[0] = r - w;
GET_HIGH_WORD(high,y[0]); u.f = y[0];
i = j - ((high>>20)&0x7ff); ey = u.i>>52 & 0x7ff;
if (i > 49) { /* 3rd round, good to 151 bits, covers all cases */ if (ex - ey > 49) { /* 3rd round, good to 151 bits, covers all cases */
t = r; t = r;
w = fn*pio2_3; w = fn*pio2_3;
r = t - w; r = t - w;
...@@ -142,7 +136,7 @@ medium: ...@@ -142,7 +136,7 @@ medium:
y[0] = r - w; y[0] = r - w;
} }
} }
y[1] = (r-y[0]) - w; y[1] = (r - y[0]) - w;
return n; return n;
} }
/* /*
...@@ -152,19 +146,21 @@ medium: ...@@ -152,19 +146,21 @@ medium:
y[0] = y[1] = x - x; y[0] = y[1] = x - x;
return 0; return 0;
} }
/* set z = scalbn(|x|,ilogb(x)-23) */ /* set z = scalbn(|x|,-ilogb(x)+23) */
GET_LOW_WORD(low,x); u.f = x;
e0 = (ix>>20) - 1046; /* e0 = ilogb(z)-23; */ u.i &= (uint64_t)-1>>12;
INSERT_WORDS(z, ix - ((int32_t)(e0<<20)), low); u.i |= (uint64_t)(0x3ff + 23)<<52;
for (i=0; i<2; i++) { z = u.f;
tx[i] = (double)((int32_t)(z)); for (i=0; i < 2; i++) {
z = (z-tx[i])*two24; tx[i] = (double)(int32_t)z;
z = (z-tx[i])*0x1p24;
} }
tx[2] = z; tx[i] = z;
nx = 3; /* skip zero terms, first term is non-zero */
while (tx[nx-1] == 0.0) nx--; /* skip zero term */ while (tx[i] == 0.0)
n = __rem_pio2_large(tx,ty,e0,nx,1); i--;
if (hx < 0) { n = __rem_pio2_large(tx,ty,(int)(ix>>20)-(0x3ff+23),i+1,1);
if (sign) {
y[0] = -ty[0]; y[0] = -ty[0];
y[1] = -ty[1]; y[1] = -ty[1];
return -n; return -n;
......
...@@ -270,10 +270,6 @@ static const double PIo2[] = { ...@@ -270,10 +270,6 @@ static const double PIo2[] = {
2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
}; };
static const double
two24 = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec) int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
{ {
int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; int32_t jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
...@@ -304,8 +300,8 @@ int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec) ...@@ -304,8 +300,8 @@ int __rem_pio2_large(double *x, double *y, int e0, int nx, int prec)
recompute: recompute:
/* distill q[] into iq[] reversingly */ /* distill q[] into iq[] reversingly */
for (i=0,j=jz,z=q[jz]; j>0; i++,j--) { for (i=0,j=jz,z=q[jz]; j>0; i++,j--) {
fw = (double)((int32_t)(twon24* z)); fw = (double)(int32_t)(0x1p-24*z);
iq[i] = (int32_t)(z-two24*fw); iq[i] = (int32_t)(z - 0x1p24*fw);
z = q[j-1]+fw; z = q[j-1]+fw;
} }
...@@ -330,7 +326,7 @@ recompute: ...@@ -330,7 +326,7 @@ recompute:
if (carry == 0) { if (carry == 0) {
if (j != 0) { if (j != 0) {
carry = 1; carry = 1;
iq[i] = 0x1000000- j; iq[i] = 0x1000000 - j;
} }
} else } else
iq[i] = 0xffffff - j; iq[i] = 0xffffff - j;
...@@ -378,9 +374,9 @@ recompute: ...@@ -378,9 +374,9 @@ recompute:
} }
} else { /* break z into 24-bit if necessary */ } else { /* break z into 24-bit if necessary */
z = scalbn(z,-q0); z = scalbn(z,-q0);
if (z >= two24) { if (z >= 0x1p24) {
fw = (double)((int32_t)(twon24*z)); fw = (double)(int32_t)(0x1p-24*z);
iq[jz] = (int32_t)(z-two24*fw); iq[jz] = (int32_t)(z - 0x1p24*fw);
jz += 1; jz += 1;
q0 += 24; q0 += 24;
iq[jz] = (int32_t)fw; iq[jz] = (int32_t)fw;
...@@ -392,7 +388,7 @@ recompute: ...@@ -392,7 +388,7 @@ recompute:
fw = scalbn(1.0,q0); fw = scalbn(1.0,q0);
for (i=jz; i>=0; i--) { for (i=jz; i>=0; i--) {
q[i] = fw*(double)iq[i]; q[i] = fw*(double)iq[i];
fw *= twon24; fw *= 0x1p-24;
} }
/* compute PIo2[0,...,jp]*q[jz,...,0] */ /* compute PIo2[0,...,jp]*q[jz,...,0] */
......
...@@ -34,42 +34,32 @@ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ ...@@ -34,42 +34,32 @@ pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
int __rem_pio2f(float x, double *y) int __rem_pio2f(float x, double *y)
{ {
double w,r,fn; union {float f; uint32_t i;} u = {x};
double tx[1],ty[1]; double tx[1],ty[1],fn;
float z; uint32_t ix;
int32_t e0,n,ix,hx; int n, sign, e0;
GET_FLOAT_WORD(hx, x); ix = u.i & 0x7fffffff;
ix = hx & 0x7fffffff;
/* 25+53 bit pi is good enough for medium size */ /* 25+53 bit pi is good enough for medium size */
if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */ if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
/* Use a specialized rint() to get fn. Assume round-to-nearest. */ /* Use a specialized rint() to get fn. Assume round-to-nearest. */
fn = x*invpio2 + 0x1.8p52; fn = x*invpio2 + 0x1.8p52;
fn = fn - 0x1.8p52; fn = fn - 0x1.8p52;
// FIXME
#ifdef HAVE_EFFICIENT_IRINT
n = irint(fn);
#else
n = (int32_t)fn; n = (int32_t)fn;
#endif *y = x - fn*pio2_1 - fn*pio2_1t;
r = x - fn*pio2_1;
w = fn*pio2_1t;
*y = r - w;
return n; return n;
} }
/*
* all other (large) arguments
*/
if(ix>=0x7f800000) { /* x is inf or NaN */ if(ix>=0x7f800000) { /* x is inf or NaN */
*y = x-x; *y = x-x;
return 0; return 0;
} }
/* set z = scalbn(|x|,ilogb(|x|)-23) */ /* scale x into [2^23, 2^24-1] */
e0 = (ix>>23) - 150; /* e0 = ilogb(|x|)-23; */ sign = u.i>>31;
SET_FLOAT_WORD(z, ix - ((int32_t)(e0<<23))); e0 = (ix>>23) - (0x7f+23); /* e0 = ilogb(|x|)-23, positive */
tx[0] = z; u.i = ix - (e0<<23);
tx[0] = u.f;
n = __rem_pio2_large(tx,ty,e0,1,0); n = __rem_pio2_large(tx,ty,e0,1,0);
if (hx < 0) { if (sign) {
*y = -ty[0]; *y = -ty[0];
return -n; return -n;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册