mitigate performance regression in libc-internal locks on x86_64

commit 3c43c076 fixed missing synchronization in the atomic store operation for i386 and x86_64, but opted to use mfence for the barrier on x86_64 where it's always available. however, in practice mfence is significantly slower than the barrier approach used on i386 (a nop-like lock orl operation). this commit changes x86_64 (and x32) to use the faster barrier.

mitigate performance regression in libc-internal locks on x86_64
commit 3c43c076 fixed missing synchronization in the atomic store operation for i386 and x86_64, but opted to use mfence for the barrier on x86_64 where it's always available. however, in practice mfence is significantly slower than the barrier approach used on i386 (a nop-like lock orl operation). this commit changes x86_64 (and x32) to use the faster barrier.
5a9c8c05 · Rich Felker · c13f2af1 · 5a9c8c05 · 5a9c8c05
隐藏空白更改
内联并排

Showing with 2 addition and 2 deletion

arch/x32/atomic.h arch/x32/atomic.h +1 -1

arch/x86_64/atomic.h arch/x86_64/atomic.h +1 -1

未找到文件。
--- a/arch/x32/atomic.h
+++ b/arch/x32/atomic.h
@@ -83,7 +83,7 @@ static inline void a_dec(volatile int *x)

 static inline void a_store(volatile int *p, int x)
 {
-	__asm__( "mov %1, %0 ; mfence" : "=m"(*p) : "r"(x) : "memory" );
+	__asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
 }

 static inline void a_spin()

--- a/arch/x86_64/atomic.h
+++ b/arch/x86_64/atomic.h
@@ -83,7 +83,7 @@ static inline void a_dec(volatile int *x)

 static inline void a_store(volatile int *p, int x)
 {
-	__asm__( "mov %1, %0 ; mfence" : "=m"(*p) : "r"(x) : "memory" );
+	__asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
 }

 static inline void a_spin()