optimize floatscan: avoid excessive upscaling

upscaling by even one step too much creates 3-29 extra iterations for the next loop. this is still suboptimal since it always goes by 2^29 rather than using a smaller upscale factor when nearing the target, but performance on common, small-magnitude, few-digit values has already more than doubled with this change. more optimizations on the way...

optimize floatscan: avoid excessive upscaling
upscaling by even one step too much creates 3-29 extra iterations for the next loop. this is still suboptimal since it always goes by 2^29 rather than using a smaller upscale factor when nearing the target, but performance on common, small-magnitude, few-digit values has already more than doubled with this change. more optimizations on the way...
1bdd5c8b · Rich Felker · 7ef1a9bb · 1bdd5c8b
隐藏空白更改
内联并排

Showing with 27 addition and 27 deletion

src/internal/floatscan.c src/internal/floatscan.c +27 -27

未找到文件。
--- a/src/internal/floatscan.c
+++ b/src/internal/floatscan.c
@@ -138,38 +138,12 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
 	e2 = 0;
 	rp = lrp;

-	while (rp < 18+9*LD_B1B_DIG) {
-		uint32_t carry = 0;
-		e2 -= 29;
-		for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
-			uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
-			if (tmp > 1000000000) {
-				carry = tmp / 1000000000;
-				x[k] = tmp % 1000000000;
-			} else {
-				carry = 0;
-				x[k] = tmp;
-			}
-			if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
-			if (k==a) break;
-		}
-		if (carry) {
-			rp += 9;
-			if (a == z) {
-				z = (z-1 & MASK);
-				x[z-1 & MASK] |= x[z];
-			}
-			a = (a-1 & MASK);
-			x[a] = carry;
-		}
-	}
-
 	if (rp % 9) {
 		static const int p10s[] = {
 			100000000, 10000000, 1000000, 100000,
 			10000, 1000, 100, 10
 		};
-		int rpm9 = rp % 9;
+		int rpm9 = rp>=0 ? rp%9 : rp%9+9;
 		int p10 = p10s[rpm9-1];
 		uint32_t carry = 0;
 		for (k=a; k!=z; k=(k+1 & MASK)) {
@@ -190,6 +164,32 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
 		rp += 9-rpm9;
 	}

+	while (rp < 9*LD_B1B_DIG || (rp == 9*LD_B1B_DIG && x[0]<th[0])) {
+		uint32_t carry = 0;
+		e2 -= 29;
+		for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
+			uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
+			if (tmp > 1000000000) {
+				carry = tmp / 1000000000;
+				x[k] = tmp % 1000000000;
+			} else {
+				carry = 0;
+				x[k] = tmp;
+			}
+			if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
+			if (k==a) break;
+		}
+		if (carry) {
+			rp += 9;
+			if (a == z) {
+				z = (z-1 & MASK);
+				x[z-1 & MASK] |= x[z];
+			}
+			a = (a-1 & MASK);
+			x[a] = carry;
+		}
+	}
+
 	for (;;) {
 		uint32_t carry = 0;
 		int sh = 1;