提交 02db27d9 编写于 作者: R Rich Felker

optimize exponential asm for i386

up to 30% faster exp2 by avoiding slow frndint and fscale functions.
expm1 also takes a much more direct path for small arguments (the
expected usage case).
上级 da7458a6
.global expm1f
.type expm1f,@function
expm1f:
flds 4(%esp)
jmp 1f
.global expm1l
.type expm1l,@function
expm1l:
fldt 4(%esp)
jmp 1f
.global expm1
.type expm1,@function
expm1:
fldl 4(%esp)
1: fldl2e
fmulp
fld1
fld %st(1)
fabs
fucom %st(1)
fnstsw %ax
fstp %st(0)
fstp %st(0)
sahf
ja 1f
f2xm1
ret
1: call 1f
fld1
fsubrp
ret
.global exp2f
.type exp2f,@function
exp2f:
......@@ -34,22 +68,53 @@ exp:
.type exp2,@function
exp2:
fldl 4(%esp)
1: fxam
fnstsw %ax
1: mov $0x47000000,%eax
push %eax
flds (%esp)
shl $7,%eax
push %eax
add %eax,%eax
push %eax
fld %st(1)
fabs
fucom %st(1)
fnstsw
sahf
jnp 1f
jnc 1f
fstps 4(%esp)
mov $0xfe,%al
and %al,7(%esp)
flds 4(%esp)
1: fld %st(0)
frndint
ja 2f
fstp %st(0)
fstp %st(0)
fld %st(0)
fistpl 8(%esp)
fildl 8(%esp)
fxch %st(1)
fsub %st(1)
mov $0x3fff,%eax
add %eax,8(%esp)
f2xm1
fld1
faddp
fscale
fldt (%esp)
fmulp
fstp %st(1)
add $12,%esp
ret
2: fstp %st(0)
fstp %st(0)
fsts 8(%esp)
mov 8(%esp),%eax
lea (%eax,%eax),%ecx
cmp $0xff000000,%ecx
ja 2f
fstp %st(0)
xor %ecx,%ecx
inc %ecx
add %eax,%eax
jc 1f
mov $0x7ffe,%ecx
1: mov %ecx,8(%esp)
fldt (%esp)
fld %st(0)
fmulp
2: add $12,%esp
ret
.global expm1f
.type expm1f,@function
expm1f:
flds 4(%esp)
jmp 1f
.global expm1l
.type expm1l,@function
expm1l:
fldt 4(%esp)
jmp 1f
.global expm1
.type expm1,@function
expm1:
fldl 4(%esp)
1: fxam
fnstsw %ax
sahf
jnp 1f
jnc 1f
fstps 4(%esp)
mov $0xfe,%al
and %al,7(%esp)
flds 4(%esp)
1: fldl2e
fmulp
fld %st(0)
frndint
fldz
fcomp
fnstsw %ax
sahf
jnz 1f
fstp %st(0)
f2xm1
ret
1: fxch %st(1)
fsub %st(1)
f2xm1
fld1
faddp
fscale
fld1
fsubrp
fstp %st(1)
ret
# see exp.s
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册