提交 a732e80d 编写于 作者: S Szabolcs Nagy

math: fix x86_64 and x32 asm not to use sahf instruction

Some early x86_64 cpus (released before 2006) did not support sahf/lahf
instructions so they should be avoided (intel manual says they are only
supported if CPUID.80000001H:ECX.LAHF-SAHF[bit 0] = 1).

The workaround simplifies exp2l and expm1l because fucomip can be
used instead of the fucomp;fnstsw;sahf sequence copied from i386.

In fmodl and remainderl sahf is replaced by a simple bit test.
上级 de2b9c21
...@@ -6,9 +6,7 @@ expm1l: ...@@ -6,9 +6,7 @@ expm1l:
fmulp fmulp
movl $0xc2820000,-4(%esp) movl $0xc2820000,-4(%esp)
flds -4(%esp) flds -4(%esp)
fucomp %st(1) fucomip %st(1)
fnstsw %ax
sahf
fld1 fld1
jb 1f jb 1f
# x*log2e <= -65, return -1 without underflow # x*log2e <= -65, return -1 without underflow
...@@ -17,11 +15,8 @@ expm1l: ...@@ -17,11 +15,8 @@ expm1l:
ret ret
1: fld %st(1) 1: fld %st(1)
fabs fabs
fucom %st(1) fucomip %st(1)
fnstsw %ax
fstp %st(0) fstp %st(0)
fstp %st(0)
sahf
ja 1f ja 1f
f2xm1 f2xm1
ret ret
...@@ -53,9 +48,7 @@ exp2l: ...@@ -53,9 +48,7 @@ exp2l:
fld %st(1) fld %st(1)
fsub %st(1) fsub %st(1)
faddp faddp
fucomp %st(1) fucomip %st(1)
fnstsw
sahf
je 2f # x - 0x1p63 + 0x1p63 == x je 2f # x - 0x1p63 + 0x1p63 == x
movl $1,(%esp) movl $1,(%esp)
flds (%esp) # 0x1p-149 flds (%esp) # 0x1p-149
......
...@@ -5,7 +5,7 @@ fmodl: ...@@ -5,7 +5,7 @@ fmodl:
fldt 8(%esp) fldt 8(%esp)
1: fprem 1: fprem
fstsw %ax fstsw %ax
sahf testb $4,%ah
jp 1b jnz 1b
fstp %st(1) fstp %st(1)
ret ret
...@@ -5,7 +5,7 @@ remainderl: ...@@ -5,7 +5,7 @@ remainderl:
fldt 8(%esp) fldt 8(%esp)
1: fprem1 1: fprem1
fstsw %ax fstsw %ax
sahf testb $4,%ah
jp 1b jnz 1b
fstp %st(1) fstp %st(1)
ret ret
...@@ -6,9 +6,7 @@ expm1l: ...@@ -6,9 +6,7 @@ expm1l:
fmulp fmulp
movl $0xc2820000,-4(%rsp) movl $0xc2820000,-4(%rsp)
flds -4(%rsp) flds -4(%rsp)
fucomp %st(1) fucomip %st(1)
fnstsw %ax
sahf
fld1 fld1
jb 1f jb 1f
# x*log2e <= -65, return -1 without underflow # x*log2e <= -65, return -1 without underflow
...@@ -17,11 +15,8 @@ expm1l: ...@@ -17,11 +15,8 @@ expm1l:
ret ret
1: fld %st(1) 1: fld %st(1)
fabs fabs
fucom %st(1) fucomip %st(1)
fnstsw %ax
fstp %st(0) fstp %st(0)
fstp %st(0)
sahf
ja 1f ja 1f
f2xm1 f2xm1
ret ret
...@@ -53,9 +48,7 @@ exp2l: ...@@ -53,9 +48,7 @@ exp2l:
fld %st(1) fld %st(1)
fsub %st(1) fsub %st(1)
faddp faddp
fucomp %st(1) fucomip %st(1)
fnstsw
sahf
je 2f # x - 0x1p63 + 0x1p63 == x je 2f # x - 0x1p63 + 0x1p63 == x
movl $1,(%rsp) movl $1,(%rsp)
flds (%rsp) # 0x1p-149 flds (%rsp) # 0x1p-149
......
...@@ -5,7 +5,7 @@ fmodl: ...@@ -5,7 +5,7 @@ fmodl:
fldt 8(%rsp) fldt 8(%rsp)
1: fprem 1: fprem
fstsw %ax fstsw %ax
sahf testb $4,%ah
jp 1b jnz 1b
fstp %st(1) fstp %st(1)
ret ret
...@@ -5,7 +5,7 @@ remainderl: ...@@ -5,7 +5,7 @@ remainderl:
fldt 8(%rsp) fldt 8(%rsp)
1: fprem1 1: fprem1
fstsw %ax fstsw %ax
sahf testb $4,%ah
jp 1b jnz 1b
fstp %st(1) fstp %st(1)
ret ret
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册