math: fix rare underflow issue in fma

the issue is described in commits 1e5eb735 and ffd8ac2d

math: fix rare underflow issue in fma
the issue is described in commits 1e5eb735 and ffd8ac2d
8f438115 · Szabolcs Nagy · 4b539a82 · 8f438115 · 8f438115 · 8f438115
隐藏空白更改
内联并排

Showing with 55 addition and 13 deletion

src/math/fma.c src/math/fma.c +14 -2

src/math/fmaf.c src/math/fmaf.c +27 -9

src/math/fmal.c src/math/fmal.c +14 -2

未找到文件。
--- a/src/math/fma.c
+++ b/src/math/fma.c
@@ -431,12 +431,24 @@ double fma(double x, double y, double z)
 		/*
 		 * There is no need to worry about double rounding in directed
 		 * rounding modes.
-		 * TODO: underflow is not raised properly, example in downward rounding:
+		 * But underflow may not be raised properly, example in downward rounding:
 		 * fma(0x1.000000001p-1000, 0x1.000000001p-30, -0x1p-1066)
 		 */
+		double ret;
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		int e = fetestexcept(FE_INEXACT);
+		feclearexcept(FE_INEXACT);
+#endif
 		fesetround(oround);
 		adj = r.lo + xy.lo;
-		return scalbn(r.hi + adj, spread);
+		ret = scalbn(r.hi + adj, spread);
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		if (ilogb(ret) < -1022 && fetestexcept(FE_INEXACT))
+			feraiseexcept(FE_UNDERFLOW);
+		else if (e)
+			feraiseexcept(FE_INEXACT);
+#endif
+		return ret;
 	}

 	adj = add_adjusted(r.lo, xy.lo);

--- a/src/math/fmaf.c
+++ b/src/math/fmaf.c
@@ -26,7 +26,8 @@
 */

 #include <fenv.h>
-#include "libm.h"
+#include <math.h>
+#include <stdint.h>

 /*
 * Fused multiply-add: Compute x * y + z with a single rounding error.
@@ -39,21 +40,35 @@ float fmaf(float x, float y, float z)
 {
 	#pragma STDC FENV_ACCESS ON
 	double xy, result;
-	uint32_t hr, lr;
+	union {double f; uint64_t i;} u;
+	int e;

 	xy = (double)x * y;
 	result = xy + z;
-	EXTRACT_WORDS(hr, lr, result);
+	u.f = result;
+	e = u.i>>52 & 0x7ff;
 	/* Common case: The double precision result is fine. */
-	if ((lr & 0x1fffffff) != 0x10000000 ||  /* not a halfway case */
-		(hr & 0x7ff00000) == 0x7ff00000 ||  /* NaN */
+	if ((u.i & 0x1fffffff) != 0x10000000 || /* not a halfway case */
+		e == 0x7ff ||                   /* NaN */
 		result - xy == z ||                 /* exact */
 		fegetround() != FE_TONEAREST)       /* not round-to-nearest */
 	{
 		/*
-		TODO: underflow is not raised correctly, example in
-		downward rouding: fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
+		underflow may not be raised correctly, example:
+		fmaf(0x1p-120f, 0x1p-120f, 0x1p-149f)
 		*/
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		if (e < 0x3ff-126 && e >= 0x3ff-149 && fetestexcept(FE_INEXACT)) {
+			feclearexcept(FE_INEXACT);
+			/* TODO: gcc and clang bug workaround */
+			volatile float vz = z;
+			result = xy + vz;
+			if (fetestexcept(FE_INEXACT))
+				feraiseexcept(FE_UNDERFLOW);
+			else
+				feraiseexcept(FE_INEXACT);
+		}
+#endif
 		z = result;
 		return z;
 	}
@@ -68,8 +83,11 @@ float fmaf(float x, float y, float z)
 	volatile double vxy = xy;  /* XXX work around gcc CSE bug */
 	double adjusted_result = vxy + z;
 	fesetround(FE_TONEAREST);
-	if (result == adjusted_result)
-		SET_LOW_WORD(adjusted_result, lr + 1);
+	if (result == adjusted_result) {
+		u.f = adjusted_result;
+		u.i++;
+		adjusted_result = u.f;
+	}
 	z = adjusted_result;
 	return z;
 }
--- a/src/math/fmal.c
+++ b/src/math/fmal.c
@@ -264,12 +264,24 @@ long double fmal(long double x, long double y, long double z)
 		/*
 		 * There is no need to worry about double rounding in directed
 		 * rounding modes.
-		 * TODO: underflow is not raised correctly, example in downward rounding:
+		 * But underflow may not be raised correctly, example in downward rounding:
 		 * fmal(0x1.0000000001p-16000L, 0x1.0000000001p-400L, -0x1p-16440L)
 		 */
+		long double ret;
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		int e = fetestexcept(FE_INEXACT);
+		feclearexcept(FE_INEXACT);
+#endif
 		fesetround(oround);
 		adj = r.lo + xy.lo;
-		return scalbnl(r.hi + adj, spread);
+		ret = scalbnl(r.hi + adj, spread);
+#if defined(FE_INEXACT) && defined(FE_UNDERFLOW)
+		if (ilogbl(ret) < -16382 && fetestexcept(FE_INEXACT))
+			feraiseexcept(FE_UNDERFLOW);
+		else if (e)
+			feraiseexcept(FE_INEXACT);
+#endif
+		return ret;
 	}

 	adj = add_adjusted(r.lo, xy.lo);