提交 4a241f14 编写于 作者: R Rich Felker

overhaul ARM atomics/tls for performance and compatibility

previously, builds for pre-armv6 targets hard-coded use of the "kuser
helper" system for atomics and thread-pointer access, resulting in
binaries that fail to run (crash) on systems where this functionality
has been disabled (as a security/hardening measure) in the kernel.
additionally, builds for armv6 hard-coded an outdated/deprecated
memory barrier instruction which may require emulation (extremely
slow) on future models.

this overhaul replaces the behavior for all pre-armv7 builds (both of
the above cases) to perform runtime detection of the appropriate
mechanisms for barrier, atomic compare-and-swap, and thread pointer
access. detection is based on information provided by the kernel in
auxv: presence of the HWCAP_TLS bit for AT_HWCAP and the architecture
version encoded in AT_PLATFORM. direct use of the instructions is
preferred when possible, since probing for the existence of the kuser
helper page would be difficult and would incur runtime cost.

for builds targeting armv7 or later, the runtime detection code is not
compiled at all, and much more efficient versions of the non-cas
atomic operations are provided by using ldrex/strex directly rather
than wrapping cas.
上级 d8bdc97d
......@@ -22,37 +22,150 @@ static inline int a_ctz_64(uint64_t x)
return a_ctz_l(y);
}
#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
#define MEM_BARRIER "dmb ish"
#else
#define MEM_BARRIER "mcr p15,0,r0,c7,c10,5"
#endif
static inline int __k_cas(int t, int s, volatile int *p)
static inline void a_barrier()
{
int ret;
__asm__(
" " MEM_BARRIER "\n"
__asm__ __volatile__("dmb ish");
}
static inline int a_cas(volatile int *p, int t, int s)
{
int old;
__asm__ __volatile__(
" dmb ish\n"
"1: ldrex %0,%3\n"
" subs %0,%0,%1\n"
#ifdef __thumb__
" itt eq\n"
#endif
" strexeq %0,%2,%3\n"
" teqeq %0,#1\n"
" beq 1b\n"
" " MEM_BARRIER "\n"
: "=&r"(ret)
" cmp %0,%1\n"
" bne 1f\n"
" strex %0,%2,%3\n"
" cmp %0, #0\n"
" bne 1b\n"
" mov %0, %1\n"
"1: dmb ish\n"
: "=&r"(old)
: "r"(t), "r"(s), "Q"(*p)
: "memory", "cc" );
return ret;
return old;
}
static inline int a_swap(volatile int *x, int v)
{
int old, tmp;
__asm__ __volatile__(
" dmb ish\n"
"1: ldrex %0,%3\n"
" strex %1,%2,%3\n"
" cmp %1, #0\n"
" bne 1b\n"
" dmb ish\n"
: "=&r"(old), "=&r"(tmp)
: "r"(v), "Q"(*x)
: "memory", "cc" );
return old;
}
static inline int a_fetch_add(volatile int *x, int v)
{
int old, tmp;
__asm__ __volatile__(
" dmb ish\n"
"1: ldrex %0,%3\n"
" add %0,%0,%2\n"
" strex %1,%0,%3\n"
" cmp %1, #0\n"
" bne 1b\n"
" dmb ish\n"
: "=&r"(old), "=&r"(tmp)
: "r"(v), "Q"(*x)
: "memory", "cc" );
return old-v;
}
static inline void a_inc(volatile int *x)
{
int tmp, tmp2;
__asm__ __volatile__(
" dmb ish\n"
"1: ldrex %0,%2\n"
" add %0,%0,#1\n"
" strex %1,%0,%2\n"
" cmp %1, #0\n"
" bne 1b\n"
" dmb ish\n"
: "=&r"(tmp), "=&r"(tmp2)
: "Q"(*x)
: "memory", "cc" );
}
static inline void a_dec(volatile int *x)
{
int tmp, tmp2;
__asm__ __volatile__(
" dmb ish\n"
"1: ldrex %0,%2\n"
" sub %0,%0,#1\n"
" strex %1,%0,%2\n"
" cmp %1, #0\n"
" bne 1b\n"
" dmb ish\n"
: "=&r"(tmp), "=&r"(tmp2)
: "Q"(*x)
: "memory", "cc" );
}
static inline void a_and(volatile int *x, int v)
{
int tmp, tmp2;
__asm__ __volatile__(
" dmb ish\n"
"1: ldrex %0,%3\n"
" and %0,%0,%2\n"
" strex %1,%0,%3\n"
" cmp %1, #0\n"
" bne 1b\n"
" dmb ish\n"
: "=&r"(tmp), "=&r"(tmp2)
: "r"(v), "Q"(*x)
: "memory", "cc" );
}
static inline void a_or(volatile int *x, int v)
{
int tmp, tmp2;
__asm__ __volatile__(
" dmb ish\n"
"1: ldrex %0,%3\n"
" orr %0,%0,%2\n"
" strex %1,%0,%3\n"
" cmp %1, #0\n"
" bne 1b\n"
" dmb ish\n"
: "=&r"(tmp), "=&r"(tmp2)
: "r"(v), "Q"(*x)
: "memory", "cc" );
}
static inline void a_store(volatile int *p, int x)
{
__asm__ __volatile__(
" dmb ish\n"
" str %1,%0\n"
" dmb ish\n"
: "=m"(*p)
: "r"(x)
: "memory", "cc" );
}
#else
#define __k_cas ((int (*)(int, int, volatile int *))0xffff0fc0)
#endif
int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden")));
#define __k_cas __a_cas
static inline void a_barrier()
{
__asm__ __volatile__("bl __a_barrier"
: : : "memory", "cc", "ip", "lr" );
}
static inline int a_cas(volatile int *p, int t, int s)
{
......@@ -65,11 +178,6 @@ static inline int a_cas(volatile int *p, int t, int s)
}
}
static inline void *a_cas_p(volatile void *p, void *t, void *s)
{
return (void *)a_cas(p, (int)t, (int)s);
}
static inline int a_swap(volatile int *x, int v)
{
int old;
......@@ -98,19 +206,9 @@ static inline void a_dec(volatile int *x)
static inline void a_store(volatile int *p, int x)
{
while (__k_cas(*p, x, p));
}
#define a_spin a_barrier
static inline void a_barrier()
{
__k_cas(0, 0, &(int){0});
}
static inline void a_crash()
{
*(volatile char *)0=0;
a_barrier();
*p = x;
a_barrier();
}
static inline void a_and(volatile int *p, int v)
......@@ -127,6 +225,20 @@ static inline void a_or(volatile int *p, int v)
while (__k_cas(old, old|v, p));
}
#endif
static inline void *a_cas_p(volatile void *p, void *t, void *s)
{
return (void *)a_cas(p, (int)t, (int)s);
}
#define a_spin a_barrier
static inline void a_crash()
{
*(volatile char *)0=0;
}
static inline void a_or_l(volatile void *p, long v)
{
a_or(p, v);
......
......@@ -10,9 +10,17 @@ static inline __attribute__((const)) pthread_t __pthread_self()
#else
typedef char *(*__ptr_func_t)(void) __attribute__((const));
#define __pthread_self() \
((pthread_t)(((__ptr_func_t)0xffff0fe0)()+8-sizeof(struct pthread)))
static inline __attribute__((const)) pthread_t __pthread_self()
{
#ifdef __clang__
char *p;
__asm__( "bl __a_gettp\n\tmov %0,r0" : "=r"(p) : : "cc", "r0", "lr" );
#else
register char *p __asm__("r0");
__asm__( "bl __a_gettp" : "=r"(p) : : "cc", "lr" );
#endif
return (void *)(p+8-sizeof(struct pthread));
}
#endif
......
#include <stdint.h>
#include <elf.h>
#include "pthread_impl.h"
#include "libc.h"
#define HWCAP_TLS (1 << 15)
extern const unsigned char __attribute__((__visibility__("hidden")))
__a_barrier_dummy[], __a_barrier_oldkuser[],
__a_barrier_v6[], __a_barrier_v7[],
__a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
__a_gettp_dummy[], __a_gettp_native[];
#define __a_barrier_kuser 0xffff0fa0
#define __a_cas_kuser 0xffff0fc0
#define __a_gettp_kuser 0xffff0fe0
extern uintptr_t __attribute__((__visibility__("hidden")))
__a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
#define SET(op,ver) (__a_##op##_ptr = \
(uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
int __set_thread_area(void *p)
{
#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
if (__hwcap & HWCAP_TLS) {
size_t *aux;
SET(gettp, native);
SET(cas, v7);
SET(barrier, v7);
for (aux=libc.auxv; *aux; aux+=2) {
if (*aux != AT_PLATFORM) continue;
const char *s = (void *)aux[1];
if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
SET(cas, v6);
SET(barrier, v6);
break;
}
} else {
int ver = *(int *)0xffff0ffc;
SET(gettp, kuser);
SET(cas, kuser);
SET(barrier, kuser);
if (ver < 2) a_crash();
if (ver < 3) SET(barrier, oldkuser);
}
#endif
return __syscall(0xf0005, p);
}
.text
.global __a_barrier
.hidden __a_barrier
.type __a_barrier,%function
__a_barrier:
ldr ip,1f
ldr ip,[pc,ip]
add pc,pc,ip
1: .word __a_barrier_ptr-1b
.global __a_barrier_dummy
.hidden __a_barrier_dummy
__a_barrier_dummy:
tst lr,#1
moveq pc,lr
bx lr
.global __a_barrier_oldkuser
.hidden __a_barrier_oldkuser
__a_barrier_oldkuser:
push {r0,r1,r2,r3,ip,lr}
mov r1,r0
mov r2,sp
ldr ip,=0xffff0fc0
mov lr,pc
mov pc,ip
pop {r0,r1,r2,r3,ip,lr}
tst lr,#1
moveq pc,lr
bx lr
.global __a_barrier_v6
.hidden __a_barrier_v6
__a_barrier_v6:
mcr p15,0,r0,c7,c10,5
bx lr
.global __a_barrier_v7
.hidden __a_barrier_v7
__a_barrier_v7:
.word 0xf57ff05b /* dmb ish */
bx lr
.global __a_cas
.hidden __a_cas
.type __a_cas,%function
__a_cas:
ldr ip,1f
ldr ip,[pc,ip]
add pc,pc,ip
1: .word __a_cas_ptr-1b
.global __a_cas_dummy
.hidden __a_cas_dummy
__a_cas_dummy:
mov r3,r0
ldr r0,[r2]
subs r0,r3,r0
streq r1,[r2]
tst lr,#1
moveq pc,lr
bx lr
.global __a_cas_v6
.hidden __a_cas_v6
__a_cas_v6:
mov r3,r0
mcr p15,0,r0,c7,c10,5
1: .word 0xe1920f9f /* ldrex r0,[r2] */
subs r0,r3,r0
.word 0x01820f91 /* strexeq r0,r1,[r2] */
teqeq r0,#1
beq 1b
mcr p15,0,r0,c7,c10,5
bx lr
.global __a_cas_v7
.hidden __a_cas_v7
__a_cas_v7:
mov r3,r0
.word 0xf57ff05b /* dmb ish */
1: .word 0xe1920f9f /* ldrex r0,[r2] */
subs r0,r3,r0
.word 0x01820f91 /* strexeq r0,r1,[r2] */
teqeq r0,#1
beq 1b
.word 0xf57ff05b /* dmb ish */
bx lr
.global __a_gettp
.hidden __a_gettp
.type __a_gettp,%function
__a_gettp:
ldr r0,1f
ldr r0,[pc,r0]
add pc,pc,r0
1: .word __a_gettp_ptr-1b
.global __a_gettp_dummy
.hidden __a_gettp_dummy
__a_gettp_dummy:
.word 0xe7fddef1
.global __a_gettp_native
.hidden __a_gettp_native
__a_gettp_native:
mrc p15,0,r0,c13,c0,3
bx lr
.data
.global __a_barrier_ptr
.hidden __a_barrier_ptr
__a_barrier_ptr:
.word 0
.global __a_cas_ptr
.hidden __a_cas_ptr
__a_cas_ptr:
.word 0
.global __a_gettp_ptr
.hidden __a_gettp_ptr
__a_gettp_ptr:
.word 0
.text
.global __set_thread_area
.type __set_thread_area,%function
__set_thread_area:
mov r1,r7
mov r7,#0x0f0000
add r7,r7,#5
svc 0
mov r7,r1
tst lr,#1
moveq pc,lr
bx lr
/* Replaced by C code in arch/arm/src */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册