math: fix asin, atan, log1p, tanh to raise underflow on subnormal

for these functions f(x)=x for small inputs, because f(0)=0 and f'(0)=1, but for subnormal values they should raise the underflow flag (required by annex F), if they are approximated by a polynomial around 0 then spurious underflow should be avoided (not required by annex F) all these functions should raise inexact flag for small x if x!=0, but it's not required by the standard and it does not seem a worthy goal, so support for it is removed in some cases. raising underflow: - x*x may not raise underflow for subnormal x if FLT_EVAL_METHOD!=0 - x*x may raise spurious underflow for normal x if FLT_EVAL_METHOD==0 - in case of double subnormal x, store x as float - in case of float subnormal x, store x*x as float

math: fix asin, atan, log1p, tanh to raise underflow on subnormal
for these functions f(x)=x for small inputs, because f(0)=0 and f'(0)=1, but for subnormal values they should raise the underflow flag (required by annex F), if they are approximated by a polynomial around 0 then spurious underflow should be avoided (not required by annex F) all these functions should raise inexact flag for small x if x!=0, but it's not required by the standard and it does not seem a worthy goal, so support for it is removed in some cases. raising underflow: - x*x may not raise underflow for subnormal x if FLT_EVAL_METHOD!=0 - x*x may raise spurious underflow for normal x if FLT_EVAL_METHOD==0 - in case of double subnormal x, store x as float - in case of float subnormal x, store x*x as float
c599f4f4 · Szabolcs Nagy · cd18dc85 · c599f4f4 · c599f4f4 · c599f4f4
9 changed file
--- a/src/math/asin.c
+++ b/src/math/asin.c
@@ -82,11 +82,9 @@ double asin(double x)
 	}
 	/* |x| < 0.5 */
 	if (ix < 0x3fe00000) {
-		if (ix < 0x3e500000) {
+		/* if 0x1p-1022 <= |x| < 0x1p-26, avoid raising underflow */
-			/* |x|<0x1p-26, return x with inexact if x!=0*/
+		if (ix < 0x3e500000 && ix >= 0x00100000)
-			FORCE_EVAL(x + 0x1p120f);
 			return x;
-		}
 		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */

--- a/src/math/asinf.c
+++ b/src/math/asinf.c
@@ -46,10 +46,9 @@ float asinf(float x)
 		return 0/(x-x);  /* asin(|x|>1) is NaN */
 	}
 	if (ix < 0x3f000000) {  /* |x| < 0.5 */
-		if (ix < 0x39800000) {  /* |x| < 2**-12 */
+		/* if 0x1p-126 <= |x| < 0x1p-12, avoid raising underflow */
-			FORCE_EVAL(x + 0x1p120f);
+		if (ix < 0x39800000 && ix >= 0x00800000)
-			return x; /* return x with inexact if x!=0 */
+			return x;
-		}
 		return x + x*R(x*x);
 	}
 	/* 1 > |x| >= 0.5 */

--- a/src/math/atan.c
+++ b/src/math/atan.c
@@ -77,8 +77,9 @@ double atan(double x)
 	}
 	if (ix < 0x3fdc0000) {    /* |x| < 0.4375 */
 		if (ix < 0x3e400000) {  /* |x| < 2^-27 */
-			/* raise inexact if x!=0 */
+			if (ix < 0x00100000)
-			FORCE_EVAL(x + 0x1p120f);
+				/* raise underflow for subnormal x */
+				FORCE_EVAL((float)x);
 			return x;
 		}
 		id = -1;

--- a/src/math/atanf.c
+++ b/src/math/atanf.c
@@ -55,8 +55,9 @@ float atanf(float x)
 	}
 	if (ix < 0x3ee00000) {   /* |x| < 0.4375 */
 		if (ix < 0x39800000) {  /* |x| < 2**-12 */
-			/* raise inexact if x!=0 */
+			if (ix < 0x00800000)
-			FORCE_EVAL(x + 0x1p120f);
+				/* raise underflow for subnormal x */
+				FORCE_EVAL(x*x);
 			return x;
 		}
 		id = -1;

--- a/src/math/log1p.c
+++ b/src/math/log1p.c
@@ -104,9 +104,12 @@ double log1p(double x)
 			return (x-x)/(x-x);         /* log1p(x<-1)=NaN */
 		}
 		if (ax < 0x3e200000) {   /* |x| < 2**-29 */
-			/* raise inexact */
+			/* if 0x1p-1022 <= |x| < 0x1p-54, avoid raising underflow */
-			if (two54 + x > 0.0 && ax < 0x3c900000)  /* |x| < 2**-54 */
+			if (ax < 0x3c900000 && ax >= 0x00100000)
 				return x;
+#if FLT_EVAL_METHOD != 0
+			FORCE_EVAL((float)x);
+#endif
 			return x - x*x*0.5;
 		}
 		if (hx > 0 || hx <= (int32_t)0xbfd2bec4) {  /* sqrt(2)/2- <= 1+x < sqrt(2)+ */

--- a/src/math/log1pf.c
+++ b/src/math/log1pf.c
@@ -43,9 +43,12 @@ float log1pf(float x)
 			return (x-x)/(x-x);         /* log1p(x<-1)=NaN */
 		}
 		if (ax < 0x38000000) {   /* |x| < 2**-15 */
-			/* raise inexact */
+			/* if 0x1p-126 <= |x| < 0x1p-24, avoid raising underflow */
-			if (two25 + x > 0.0f && ax < 0x33800000)  /* |x| < 2**-24 */
+			if (ax < 0x33800000 && ax >= 0x00800000)
 				return x;
+#if FLT_EVAL_METHOD != 0
+			FORCE_EVAL(x*x);
+#endif
 			return x - x*x*0.5f;
 		}
 		if (hx > 0 || hx <= (int32_t)0xbe95f619) { /* sqrt(2)/2- <= 1+x < sqrt(2)+ */

--- a/src/math/sinh.c
+++ b/src/math/sinh.c
@@ -23,8 +23,8 @@ double sinh(double x)
 		t = expm1(absx);
 		if (w < 0x3ff00000) {
 			if (w < 0x3ff00000 - (26<<20))
-				/* note: inexact is raised by expm1 */
+				/* note: inexact and underflow are raised by expm1 */
-				/* note: this branch avoids underflow */
+				/* note: this branch avoids spurious underflow */
 				return x;
 			return h*(2*t - t*t/(t+1));
 		}

--- a/src/math/tanh.c
+++ b/src/math/tanh.c
@@ -9,7 +9,7 @@ double tanh(double x)
 	union {double f; uint64_t i;} u = {.f = x};
 	uint32_t w;
 	int sign;
-	double t;
+	double_t t;
 	/* x = |x| */
 	sign = u.i >> 63;
@@ -22,8 +22,7 @@ double tanh(double x)
 		if (w > 0x40340000) {
 			/* |x| > 20 or nan */
 			/* note: this branch avoids raising overflow */
-			/* raise inexact if x!=+-inf and handle nan */
+			t = 1 - 0/x;
-			t = 1 + 0/(x + 0x1p-120f);
 		} else {
 			t = expm1(2*x);
 			t = 1 - 2/(t+2);
@@ -32,10 +31,15 @@ double tanh(double x)
 		/* |x| > log(5/3)/2 ~= 0.2554 */
 		t = expm1(2*x);
 		t = t/(t+2);
-	} else {
+	} else if (w >= 0x00100000) {
-		/* |x| is small, up to 2ulp error in [0.1,0.2554] */
+		/* |x| >= 0x1p-1022, up to 2ulp error in [0.1,0.2554] */
 		t = expm1(-2*x);
 		t = -t/(t+2);
+	} else {
+		/* |x| is subnormal */
+		/* note: the branch above would not raise underflow in [0x1p-1023,0x1p-1022) */
+		FORCE_EVAL((float)x);
+		t = x;
 	}
 	return sign ? -t : t;
 }
--- a/src/math/tanhf.c
+++ b/src/math/tanhf.c
@@ -17,7 +17,7 @@ float tanhf(float x)
 		/* |x| > log(3)/2 ~= 0.5493 or nan */
 		if (w > 0x41200000) {
 			/* |x| > 10 */
-			t = 1 + 0/(x + 0x1p-120f);
+			t = 1 + 0/x;
 		} else {
 			t = expm1f(2*x);
 			t = 1 - 2/(t+2);
@@ -26,10 +26,14 @@ float tanhf(float x)
 		/* |x| > log(5/3)/2 ~= 0.2554 */
 		t = expm1f(2*x);
 		t = t/(t+2);
-	} else {
+	} else if (w >= 0x00800000) {
-		/* |x| is small */
+		/* |x| >= 0x1p-126 */
 		t = expm1f(-2*x);
 		t = -t/(t+2);
+	} else {
+		/* |x| is subnormal */
+		FORCE_EVAL(x*x);
+		t = x;
 	}
 	return sign ? -t : t;
 }