提交 1bdd5c8b 编写于 作者: R Rich Felker

optimize floatscan: avoid excessive upscaling

upscaling by even one step too much creates 3-29 extra iterations for
the next loop. this is still suboptimal since it always goes by 2^29
rather than using a smaller upscale factor when nearing the target,
but performance on common, small-magnitude, few-digit values has
already more than doubled with this change.

more optimizations on the way...
上级 7ef1a9bb
......@@ -138,38 +138,12 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
e2 = 0;
rp = lrp;
while (rp < 18+9*LD_B1B_DIG) {
uint32_t carry = 0;
e2 -= 29;
for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
if (tmp > 1000000000) {
carry = tmp / 1000000000;
x[k] = tmp % 1000000000;
} else {
carry = 0;
x[k] = tmp;
}
if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
if (k==a) break;
}
if (carry) {
rp += 9;
if (a == z) {
z = (z-1 & MASK);
x[z-1 & MASK] |= x[z];
}
a = (a-1 & MASK);
x[a] = carry;
}
}
if (rp % 9) {
static const int p10s[] = {
100000000, 10000000, 1000000, 100000,
10000, 1000, 100, 10
};
int rpm9 = rp % 9;
int rpm9 = rp>=0 ? rp%9 : rp%9+9;
int p10 = p10s[rpm9-1];
uint32_t carry = 0;
for (k=a; k!=z; k=(k+1 & MASK)) {
......@@ -190,6 +164,32 @@ static long double decfloat(FILE *f, int c, int bits, int emin, int sign, int po
rp += 9-rpm9;
}
while (rp < 9*LD_B1B_DIG || (rp == 9*LD_B1B_DIG && x[0]<th[0])) {
uint32_t carry = 0;
e2 -= 29;
for (k=(z-1 & MASK); ; k=(k-1 & MASK)) {
uint64_t tmp = ((uint64_t)x[k] << 29) + carry;
if (tmp > 1000000000) {
carry = tmp / 1000000000;
x[k] = tmp % 1000000000;
} else {
carry = 0;
x[k] = tmp;
}
if (k==(z-1 & MASK) && k!=a && !x[k]) z = k;
if (k==a) break;
}
if (carry) {
rp += 9;
if (a == z) {
z = (z-1 & MASK);
x[z-1 & MASK] |= x[z];
}
a = (a-1 & MASK);
x[a] = carry;
}
}
for (;;) {
uint32_t carry = 0;
int sh = 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册