提交 5bb6b249 编写于 作者: S Szabolcs Nagy

math: bessel cleanup (j1.c and j1f.c)

a common code path in j1 and y1 was factored out so the resulting
object code is a bit smaller

unsigned int arithmetics is used for bit manipulation

j1(-inf) now returns 0 instead of -0

an incorrect threshold in the common code of j1f and y1f got fixed
(this caused spurious overflow and underflow exceptions)

the else branch in pone and pzero functions are fixed
(so code analyzers dont warn about uninitialized values)
上级 697acde6
...@@ -59,10 +59,47 @@ ...@@ -59,10 +59,47 @@
static double pone(double), qone(double); static double pone(double), qone(double);
static const double static const double
huge = 1e300,
invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */ invsqrtpi = 5.64189583547756279280e-01, /* 0x3FE20DD7, 0x50429B6D */
tpi = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */ tpi = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */
static double common(uint32_t ix, double x, int y1, int sign)
{
double z,s,c,ss,cc;
/*
* j1(x) = sqrt(2/(pi*x))*(p1(x)*cos(x-3pi/4)-q1(x)*sin(x-3pi/4))
* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x-3pi/4)+q1(x)*cos(x-3pi/4))
*
* sin(x-3pi/4) = -(sin(x) + cos(x))/sqrt(2)
* cos(x-3pi/4) = (sin(x) - cos(x))/sqrt(2)
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
*/
s = sin(x);
if (y1)
s = -s;
c = cos(x);
cc = s-c;
if (ix < 0x7fe00000) {
/* avoid overflow in 2*x */
ss = -s-c;
z = cos(2*x);
if (s*c > 0)
cc = z/ss;
else
ss = z/cc;
if (ix < 0x48000000) {
if (y1)
ss = -ss;
cc = pone(x)*cc-qone(x)*ss;
}
}
if (sign)
cc = -cc;
return invsqrtpi*cc/sqrt(x);
}
/* R0/S0 on [0,2] */ /* R0/S0 on [0,2] */
static const double
r00 = -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */ r00 = -6.25000000000000000000e-02, /* 0xBFB00000, 0x00000000 */
r01 = 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */ r01 = 1.40705666955189706048e-03, /* 0x3F570D9F, 0x98472C61 */
r02 = -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */ r02 = -1.59955631084035597520e-05, /* 0xBEF0C5C6, 0xBA169668 */
...@@ -75,52 +112,26 @@ s05 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */ ...@@ -75,52 +112,26 @@ s05 = 1.23542274426137913908e-11; /* 0x3DAB2ACF, 0xCFB97ED8 */
double j1(double x) double j1(double x)
{ {
double z,s,c,ss,cc,r,u,v,y; double z,r,s;
int32_t hx,ix; uint32_t ix;
int sign;
GET_HIGH_WORD(hx, x); GET_HIGH_WORD(ix, x);
ix = hx & 0x7fffffff; sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7ff00000) if (ix >= 0x7ff00000)
return 1.0/x; return 1/(x*x);
y = fabs(x); if (ix >= 0x40000000) /* |x| >= 2 */
if (ix >= 0x40000000) { /* |x| >= 2.0 */ return common(ix, fabs(x), 0, sign);
s = sin(y); if (ix >= 0x38000000) { /* |x| >= 2**-127 */
c = cos(y); z = x*x;
ss = -s-c; r = z*(r00+z*(r01+z*(r02+z*r03)));
cc = s-c; s = 1+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
if (ix < 0x7fe00000) { /* make sure y+y not overflow */ z = r/s;
z = cos(y+y); } else
if (s*c > 0.0) /* avoid underflow, raise inexact if x!=0 */
cc = z/ss; z = x;
else return (0.5 + z)*x;
ss = z/cc;
}
/*
* j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
* y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
*/
if (ix > 0x48000000)
z = (invsqrtpi*cc)/sqrt(y);
else {
u = pone(y);
v = qone(y);
z = invsqrtpi*(u*cc-v*ss)/sqrt(y);
}
if (hx < 0)
return -z;
else
return z;
}
if (ix < 0x3e400000) { /* |x| < 2**-27 */
/* raise inexact if x!=0 */
if (huge+x > 1.0)
return 0.5*x;
}
z = x*x;
r = z*(r00+z*(r01+z*(r02+z*r03)));
s = 1.0+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
r *= x;
return x*0.5 + r/s;
} }
static const double U0[5] = { static const double U0[5] = {
...@@ -138,59 +149,28 @@ static const double V0[5] = { ...@@ -138,59 +149,28 @@ static const double V0[5] = {
1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */ 1.66559246207992079114e-11, /* 0x3DB25039, 0xDACA772A */
}; };
double y1(double x) double y1(double x)
{ {
double z,s,c,ss,cc,u,v; double z,u,v;
int32_t hx,ix,lx; uint32_t ix,lx;
EXTRACT_WORDS(hx, lx, x); EXTRACT_WORDS(ix, lx, x);
ix = 0x7fffffff & hx; /* y1(nan)=nan, y1(<0)=nan, y1(0)=-inf, y1(inf)=0 */
/* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */ if ((ix<<1 | lx) == 0)
return -1/0.0;
if (ix>>31)
return 0/0.0;
if (ix >= 0x7ff00000) if (ix >= 0x7ff00000)
return 1.0/(x+x*x); return 1/x;
if ((ix|lx) == 0)
return -1.0/0.0; if (ix >= 0x40000000) /* x >= 2 */
if (hx < 0) return common(ix, x, 1, 0);
return 0.0/0.0; if (ix < 0x3c900000) /* x < 2**-54 */
if (ix >= 0x40000000) { /* |x| >= 2.0 */
s = sin(x);
c = cos(x);
ss = -s-c;
cc = s-c;
if (ix < 0x7fe00000) { /* make sure x+x not overflow */
z = cos(x+x);
if (s*c > 0.0)
cc = z/ss;
else
ss = z/cc;
}
/* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
* where x0 = x-3pi/4
* Better formula:
* cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
* = 1/sqrt(2) * (sin(x) - cos(x))
* sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
* = -1/sqrt(2) * (cos(x) + sin(x))
* To avoid cancellation, use
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
* to compute the worse one.
*/
if (ix > 0x48000000)
z = (invsqrtpi*ss)/sqrt(x);
else {
u = pone(x);
v = qone(x);
z = invsqrtpi*(u*ss+v*cc)/sqrt(x);
}
return z;
}
if (ix <= 0x3c900000) /* x < 2**-54 */
return -tpi/x; return -tpi/x;
z = x*x; z = x*x;
u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
v = 1.0+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4])))); v = 1+z*(V0[0]+z*(V0[1]+z*(V0[2]+z*(V0[3]+z*V0[4]))));
return x*(u/v) + tpi*(j1(x)*log(x)-1.0/x); return x*(u/v) + tpi*(j1(x)*log(x)-1/x);
} }
/* For x >= 8, the asymptotic expansions of pone is /* For x >= 8, the asymptotic expansions of pone is
...@@ -271,14 +251,14 @@ static double pone(double x) ...@@ -271,14 +251,14 @@ static double pone(double x)
{ {
const double *p,*q; const double *p,*q;
double z,r,s; double z,r,s;
int32_t ix; uint32_t ix;
GET_HIGH_WORD(ix, x); GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff; ix &= 0x7fffffff;
if (ix >= 0x40200000){p = pr8; q = ps8;} if (ix >= 0x40200000){p = pr8; q = ps8;}
else if (ix >= 0x40122E8B){p = pr5; q = ps5;} else if (ix >= 0x40122E8B){p = pr5; q = ps5;}
else if (ix >= 0x4006DB6D){p = pr3; q = ps3;} else if (ix >= 0x4006DB6D){p = pr3; q = ps3;}
else if (ix >= 0x40000000){p = pr2; q = ps2;} else /*ix >= 0x40000000*/ {p = pr2; q = ps2;}
z = 1.0/(x*x); z = 1.0/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
...@@ -367,14 +347,14 @@ static double qone(double x) ...@@ -367,14 +347,14 @@ static double qone(double x)
{ {
const double *p,*q; const double *p,*q;
double s,r,z; double s,r,z;
int32_t ix; uint32_t ix;
GET_HIGH_WORD(ix, x); GET_HIGH_WORD(ix, x);
ix &= 0x7fffffff; ix &= 0x7fffffff;
if (ix >= 0x40200000){p = qr8; q = qs8;} if (ix >= 0x40200000){p = qr8; q = qs8;}
else if (ix >= 0x40122E8B){p = qr5; q = qs5;} else if (ix >= 0x40122E8B){p = qr5; q = qs5;}
else if (ix >= 0x4006DB6D){p = qr3; q = qs3;} else if (ix >= 0x4006DB6D){p = qr3; q = qs3;}
else if (ix >= 0x40000000){p = qr2; q = qs2;} else /*ix >= 0x40000000*/ {p = qr2; q = qs2;}
z = 1.0/(x*x); z = 1.0/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); s = 1.0+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
......
...@@ -18,10 +18,38 @@ ...@@ -18,10 +18,38 @@
static float ponef(float), qonef(float); static float ponef(float), qonef(float);
static const float static const float
huge = 1e30,
invsqrtpi = 5.6418961287e-01, /* 0x3f106ebb */ invsqrtpi = 5.6418961287e-01, /* 0x3f106ebb */
tpi = 6.3661974669e-01, /* 0x3f22f983 */ tpi = 6.3661974669e-01; /* 0x3f22f983 */
static float common(uint32_t ix, float x, int y1, int sign)
{
double z,s,c,ss,cc;
s = sinf(x);
if (y1)
s = -s;
c = cosf(x);
cc = s-c;
if (ix < 0x7f000000) {
ss = -s-c;
z = cosf(2*x);
if (s*c > 0)
cc = z/ss;
else
ss = z/cc;
if (ix < 0x58800000) {
if (y1)
ss = -ss;
cc = ponef(x)*cc-qonef(x)*ss;
}
}
if (sign)
cc = -cc;
return invsqrtpi*cc/sqrtf(x);
}
/* R0/S0 on [0,2] */ /* R0/S0 on [0,2] */
static const float
r00 = -6.2500000000e-02, /* 0xbd800000 */ r00 = -6.2500000000e-02, /* 0xbd800000 */
r01 = 1.4070566976e-03, /* 0x3ab86cfd */ r01 = 1.4070566976e-03, /* 0x3ab86cfd */
r02 = -1.5995563444e-05, /* 0xb7862e36 */ r02 = -1.5995563444e-05, /* 0xb7862e36 */
...@@ -34,51 +62,26 @@ s05 = 1.2354227016e-11; /* 0x2d59567e */ ...@@ -34,51 +62,26 @@ s05 = 1.2354227016e-11; /* 0x2d59567e */
float j1f(float x) float j1f(float x)
{ {
float z,s,c,ss,cc,r,u,v,y; float z,r,s;
int32_t hx,ix; uint32_t ix;
int sign;
GET_FLOAT_WORD(hx, x); GET_FLOAT_WORD(ix, x);
ix = hx & 0x7fffffff; sign = ix>>31;
ix &= 0x7fffffff;
if (ix >= 0x7f800000) if (ix >= 0x7f800000)
return 1.0f/x; return 1/(x*x);
y = fabsf(x); if (ix >= 0x40000000) /* |x| >= 2 */
if (ix >= 0x40000000) { /* |x| >= 2.0 */ return common(ix, fabsf(x), 0, sign);
s = sinf(y); if (ix >= 0x32000000) { /* |x| >= 2**-27 */
c = cosf(y); z = x*x;
ss = -s-c; r = z*(r00+z*(r01+z*(r02+z*r03)));
cc = s-c; s = 1+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
if (ix < 0x7f000000) { /* make sure y+y not overflow */ z = 0.5f + r/s;
z = cosf(y+y); } else
if (s*c > 0.0f)
cc = z/ss;
else
ss = z/cc;
}
/*
* j1(x) = 1/sqrt(pi) * (P(1,x)*cc - Q(1,x)*ss) / sqrt(x)
* y1(x) = 1/sqrt(pi) * (P(1,x)*ss + Q(1,x)*cc) / sqrt(x)
*/
if (ix > 0x80000000)
z = (invsqrtpi*cc)/sqrtf(y);
else {
u = ponef(y);
v = qonef(y);
z = invsqrtpi*(u*cc-v*ss)/sqrtf(y);
}
if (hx < 0)
return -z;
return z;
}
if (ix < 0x32000000) { /* |x| < 2**-27 */
/* raise inexact if x!=0 */ /* raise inexact if x!=0 */
if (huge+x > 1.0f) z = 0.5f + x;
return 0.5f*x; return z*x;
}
z = x*x;
r = z*(r00+z*(r01+z*(r02+z*r03)));
s = 1.0f+z*(s01+z*(s02+z*(s03+z*(s04+z*s05))));
r *= x;
return 0.5f*x + r/s;
} }
static const float U0[5] = { static const float U0[5] = {
...@@ -98,51 +101,19 @@ static const float V0[5] = { ...@@ -98,51 +101,19 @@ static const float V0[5] = {
float y1f(float x) float y1f(float x)
{ {
float z,s,c,ss,cc,u,v; float z,u,v;
int32_t hx,ix; uint32_t ix;
GET_FLOAT_WORD(hx, x); GET_FLOAT_WORD(ix, x);
ix = 0x7fffffff & hx; if ((ix & 0x7fffffff) == 0)
/* if Y1(NaN) is NaN, Y1(-inf) is NaN, Y1(inf) is 0 */ return -1/0.0f;
if (ix>>31)
return 0/0.0f;
if (ix >= 0x7f800000) if (ix >= 0x7f800000)
return 1.0f/(x+x*x); return 1/x;
if (ix == 0) if (ix >= 0x40000000) /* |x| >= 2.0 */
return -1.0f/0.0f; return common(ix,x,1,0);
if (hx < 0) if (ix < 0x32000000) /* x < 2**-27 */
return 0.0f/0.0f;
if (ix >= 0x40000000) { /* |x| >= 2.0 */
s = sinf(x);
c = cosf(x);
ss = -s-c;
cc = s-c;
if (ix < 0x7f000000) { /* make sure x+x not overflow */
z = cosf(x+x);
if (s*c > 0.0f)
cc = z/ss;
else
ss = z/cc;
}
/* y1(x) = sqrt(2/(pi*x))*(p1(x)*sin(x0)+q1(x)*cos(x0))
* where x0 = x-3pi/4
* Better formula:
* cos(x0) = cos(x)cos(3pi/4)+sin(x)sin(3pi/4)
* = 1/sqrt(2) * (sin(x) - cos(x))
* sin(x0) = sin(x)cos(3pi/4)-cos(x)sin(3pi/4)
* = -1/sqrt(2) * (cos(x) + sin(x))
* To avoid cancellation, use
* sin(x) +- cos(x) = -cos(2x)/(sin(x) -+ cos(x))
* to compute the worse one.
*/
if (ix > 0x48000000)
z = (invsqrtpi*ss)/sqrtf(x);
else {
u = ponef(x);
v = qonef(x);
z = invsqrtpi*(u*ss+v*cc)/sqrtf(x);
}
return z;
}
if (ix <= 0x24800000) /* x < 2**-54 */
return -tpi/x; return -tpi/x;
z = x*x; z = x*x;
u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4]))); u = U0[0]+z*(U0[1]+z*(U0[2]+z*(U0[3]+z*U0[4])));
...@@ -228,14 +199,14 @@ static float ponef(float x) ...@@ -228,14 +199,14 @@ static float ponef(float x)
{ {
const float *p,*q; const float *p,*q;
float z,r,s; float z,r,s;
int32_t ix; uint32_t ix;
GET_FLOAT_WORD(ix, x); GET_FLOAT_WORD(ix, x);
ix &= 0x7fffffff; ix &= 0x7fffffff;
if (ix >= 0x41000000){p = pr8; q = ps8;} if (ix >= 0x41000000){p = pr8; q = ps8;}
else if (ix >= 0x40f71c58){p = pr5; q = ps5;} else if (ix >= 0x40f71c58){p = pr5; q = ps5;}
else if (ix >= 0x4036db68){p = pr3; q = ps3;} else if (ix >= 0x4036db68){p = pr3; q = ps3;}
else if (ix >= 0x40000000){p = pr2; q = ps2;} else /*ix >= 0x40000000*/ {p = pr2; q = ps2;}
z = 1.0f/(x*x); z = 1.0f/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4])))); s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*q[4]))));
...@@ -324,14 +295,14 @@ static float qonef(float x) ...@@ -324,14 +295,14 @@ static float qonef(float x)
{ {
const float *p,*q; const float *p,*q;
float s,r,z; float s,r,z;
int32_t ix; uint32_t ix;
GET_FLOAT_WORD(ix, x); GET_FLOAT_WORD(ix, x);
ix &= 0x7fffffff; ix &= 0x7fffffff;
if (ix >= 0x40200000){p = qr8; q = qs8;} if (ix >= 0x40200000){p = qr8; q = qs8;}
else if (ix >= 0x40f71c58){p = qr5; q = qs5;} else if (ix >= 0x40f71c58){p = qr5; q = qs5;}
else if (ix >= 0x4036db68){p = qr3; q = qs3;} else if (ix >= 0x4036db68){p = qr3; q = qs3;}
else if (ix >= 0x40000000){p = qr2; q = qs2;} else /*ix >= 0x40000000*/ {p = qr2; q = qs2;}
z = 1.0f/(x*x); z = 1.0f/(x*x);
r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5])))); r = p[0]+z*(p[1]+z*(p[2]+z*(p[3]+z*(p[4]+z*p[5]))));
s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5]))))); s = 1.0f+z*(q[0]+z*(q[1]+z*(q[2]+z*(q[3]+z*(q[4]+z*q[5])))));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册