提交 b1756ec8 编写于 作者: S Szabolcs Nagy 提交者: Rich Felker

math: new software sqrtf

same method as in sqrt, this was tested on all inputs against
an sqrtf instruction. (the only difference found was that x86
sqrtf does not signal the x86 specific input-denormal exception
on negative subnormal inputs while the software sqrtf does,
this is fine as it was designed for ieee754 exceptions only.)

there is known faster method:
"Computing Floating-Point Square Roots via Bivariate Polynomial Evaluation"
that computes sqrtf directly via pipelined polynomial evaluation
which allows more parallelism, but the design does not generalize
easily to higher precisions.
上级 97e9b73d
/* origin: FreeBSD /usr/src/lib/msun/src/e_sqrtf.c */
/*
* Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
*/
/*
* ====================================================
* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
*
* Developed at SunPro, a Sun Microsystems, Inc. business.
* Permission to use, copy, modify, and distribute this
* software is freely granted, provided that this notice
* is preserved.
* ====================================================
*/
#include <stdint.h>
#include <math.h>
#include "libm.h"
#include "sqrt_data.h"
static const float tiny = 1.0e-30;
#define FENV_SUPPORT 1
float sqrtf(float x)
static inline uint32_t mul32(uint32_t a, uint32_t b)
{
float z;
int32_t sign = (int)0x80000000;
int32_t ix,s,q,m,t,i;
uint32_t r;
return (uint64_t)a*b >> 32;
}
GET_FLOAT_WORD(ix, x);
/* see sqrt.c for more detailed comments. */
/* take care of Inf and NaN */
if ((ix&0x7f800000) == 0x7f800000)
return x*x + x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf, sqrt(-inf)=sNaN */
float sqrtf(float x)
{
uint32_t ix, m, m1, m0, even, ey;
/* take care of zero */
if (ix <= 0) {
if ((ix&~sign) == 0)
return x; /* sqrt(+-0) = +-0 */
if (ix < 0)
return (x-x)/(x-x); /* sqrt(-ve) = sNaN */
}
/* normalize x */
m = ix>>23;
if (m == 0) { /* subnormal x */
for (i = 0; (ix&0x00800000) == 0; i++)
ix<<=1;
m -= i - 1;
ix = asuint(x);
if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000)) {
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return x;
if (ix == 0x7f800000)
return x;
if (ix > 0x7f800000)
return __math_invalidf(x);
/* x is subnormal, normalize it. */
ix = asuint(x * 0x1p23f);
ix -= 23 << 23;
}
m -= 127; /* unbias exponent */
ix = (ix&0x007fffff)|0x00800000;
if (m&1) /* odd m, double x to make it even */
ix += ix;
m >>= 1; /* m = [m/2] */
/* generate sqrt(x) bit by bit */
ix += ix;
q = s = 0; /* q = sqrt(x) */
r = 0x01000000; /* r = moving bit from right to left */
/* x = 4^e m; with int e and m in [1, 4). */
even = ix & 0x00800000;
m1 = (ix << 8) | 0x80000000;
m0 = (ix << 7) & 0x7fffffff;
m = even ? m0 : m1;
while (r != 0) {
t = s + r;
if (t <= ix) {
s = t+r;
ix -= t;
q += r;
}
ix += ix;
r >>= 1;
}
/* 2^e is the exponent part of the return value. */
ey = ix >> 1;
ey += 0x3f800000 >> 1;
ey &= 0x7f800000;
/* compute r ~ 1/sqrt(m), s ~ sqrt(m) with 2 goldschmidt iterations. */
static const uint32_t three = 0xc0000000;
uint32_t r, s, d, u, i;
i = (ix >> 17) % 128;
r = (uint32_t)__rsqrt_tab[i] << 16;
/* |r*sqrt(m) - 1| < 0x1p-8 */
s = mul32(m, r);
/* |s/sqrt(m) - 1| < 0x1p-8 */
d = mul32(s, r);
u = three - d;
r = mul32(r, u) << 1;
/* |r*sqrt(m) - 1| < 0x1.7bp-16 */
s = mul32(s, u) << 1;
/* |s/sqrt(m) - 1| < 0x1.7bp-16 */
d = mul32(s, r);
u = three - d;
s = mul32(s, u);
/* -0x1.03p-28 < s/sqrt(m) - 1 < 0x1.fp-31 */
s = (s - 1)>>6;
/* s < sqrt(m) < s + 0x1.08p-23 */
/* use floating add to find out rounding direction */
if (ix != 0) {
z = 1.0f - tiny; /* raise inexact flag */
if (z >= 1.0f) {
z = 1.0f + tiny;
if (z > 1.0f)
q += 2;
else
q += q & 1;
}
/* compute nearest rounded result. */
uint32_t d0, d1, d2;
float y, t;
d0 = (m << 16) - s*s;
d1 = s - d0;
d2 = d1 + s + 1;
s += d1 >> 31;
s &= 0x007fffff;
s |= ey;
y = asfloat(s);
if (FENV_SUPPORT) {
/* handle rounding and inexact exception. */
uint32_t tiny = predict_false(d2==0) ? 0 : 0x01000000;
tiny |= (d1^d2) & 0x80000000;
t = asfloat(tiny);
y = eval_as_float(y + t);
}
ix = (q>>1) + 0x3f000000;
SET_FLOAT_WORD(z, ix + ((uint32_t)m << 23));
return z;
return y;
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册