diff --git a/include/sys/membarrier.h b/include/sys/membarrier.h new file mode 100644 index 0000000000000000000000000000000000000000..10cb31083c0b2902668e0423f75401269c591980 --- /dev/null +++ b/include/sys/membarrier.h @@ -0,0 +1,17 @@ +#ifndef _SYS_MEMBARRIER_H +#define _SYS_MEMBARRIER_H + +#define MEMBARRIER_CMD_QUERY 0 +#define MEMBARRIER_CMD_GLOBAL 1 +#define MEMBARRIER_CMD_GLOBAL_EXPEDITED 2 +#define MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED 4 +#define MEMBARRIER_CMD_PRIVATE_EXPEDITED 8 +#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED 16 +#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE 32 +#define MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE 64 + +#define MEMBARRIER_CMD_SHARED MEMBARRIER_CMD_GLOBAL + +int membarrier(int, int); + +#endif diff --git a/ldso/dynlink.c b/ldso/dynlink.c index c7d9dd393d01ad7fd311a17ea88e7fe0cab9c87f..025ed1b0c81eae20d5b01593393c17931b23d944 100644 --- a/ldso/dynlink.c +++ b/ldso/dynlink.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "pthread_impl.h" #include "libc.h" #include "dynlink.h" @@ -1351,18 +1352,6 @@ static void update_tls_size() tls_align); } -void __dl_prepare_for_threads(void) -{ - /* MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED */ - __syscall(SYS_membarrier, 1<<4, 0); -} - -static sem_t barrier_sem; -static void bcast_barrier(int s) -{ - sem_post(&barrier_sem); -} - static void install_new_tls(void) { sigset_t set; @@ -1397,26 +1386,11 @@ static void install_new_tls(void) } /* Broadcast barrier to ensure contents of new dtv is visible - * if the new dtv pointer is. Use SYS_membarrier if it works, - * otherwise emulate with a signal. */ - - /* MEMBARRIER_CMD_PRIVATE_EXPEDITED */ - if (__syscall(SYS_membarrier, 1<<3, 0)) { - sem_init(&barrier_sem, 0, 0); - struct sigaction sa = { - .sa_flags = SA_RESTART, - .sa_handler = bcast_barrier - }; - memset(&sa.sa_mask, -1, sizeof sa.sa_mask); - __libc_sigaction(SIGSYNCCALL, &sa, 0); - for (td=self->next; td!=self; td=td->next) - __syscall(SYS_tkill, td->tid, SIGSYNCCALL); - for (td=self->next; td!=self; td=td->next) - sem_wait(&barrier_sem); - sa.sa_handler = SIG_IGN; - __libc_sigaction(SIGSYNCCALL, &sa, 0); - sem_destroy(&barrier_sem); - } + * if the new dtv pointer is. The __membarrier function has a + * fallback emulation using signals for kernels that lack the + * feature at the syscall level. */ + + __membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED, 0); /* Install new dtv for each thread. */ for (j=0, td=self; !j || td!=self; j++, td=td->next) { diff --git a/src/include/sys/membarrier.h b/src/include/sys/membarrier.h new file mode 100644 index 0000000000000000000000000000000000000000..3654491c2d46b9d9e275e7a9cc6c35431f774a89 --- /dev/null +++ b/src/include/sys/membarrier.h @@ -0,0 +1,9 @@ +#ifndef SYS_MEMBARRIER_H +#define SYS_MEMBARRIER_H + +#include "../../../include/sys/membarrier.h" +#include + +hidden int __membarrier(int, int); + +#endif diff --git a/src/internal/pthread_impl.h b/src/internal/pthread_impl.h index de089967630dbaaad39a8e93e1907802a2538d22..9b001421200f59a0e276972ad7d40302583469b6 100644 --- a/src/internal/pthread_impl.h +++ b/src/internal/pthread_impl.h @@ -130,7 +130,7 @@ hidden int __init_tp(void *); hidden void *__copy_tls(unsigned char *); hidden void __reset_tls(); -hidden void __dl_prepare_for_threads(void); +hidden void __membarrier_init(void); hidden void __dl_thread_cleanup(void); hidden void __testcancel(); hidden void __do_cleanup_push(struct __ptcb *); diff --git a/src/linux/membarrier.c b/src/linux/membarrier.c new file mode 100644 index 0000000000000000000000000000000000000000..26d143e79cb026610183d99c26e27eb7fa6aaca8 --- /dev/null +++ b/src/linux/membarrier.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include "pthread_impl.h" +#include "syscall.h" + +static void dummy_0(void) +{ +} + +static void dummy_1(pthread_t t) +{ +} + +weak_alias(dummy_0, __tl_lock); +weak_alias(dummy_0, __tl_unlock); +weak_alias(dummy_1, __tl_sync); + +static sem_t barrier_sem; + +static void bcast_barrier(int s) +{ + sem_post(&barrier_sem); +} + +int __membarrier(int cmd, int flags) +{ + int r = __syscall(SYS_membarrier, cmd, flags); + /* Emulate the private expedited command, which is needed by the + * dynamic linker for installation of dynamic TLS, for older + * kernels that lack the syscall. Unlike the syscall, this only + * synchronizes with threads of the process, not other processes + * sharing the VM, but such sharing is not a supported usage + * anyway. */ + if (r && cmd == MEMBARRIER_CMD_PRIVATE_EXPEDITED && !flags) { + pthread_t self=__pthread_self(), td; + sigset_t set; + __block_app_sigs(&set); + __tl_lock(); + sem_init(&barrier_sem, 0, 0); + struct sigaction sa = { + .sa_flags = SA_RESTART, + .sa_handler = bcast_barrier + }; + memset(&sa.sa_mask, -1, sizeof sa.sa_mask); + __libc_sigaction(SIGSYNCCALL, &sa, 0); + for (td=self->next; td!=self; td=td->next) + __syscall(SYS_tkill, td->tid, SIGSYNCCALL); + for (td=self->next; td!=self; td=td->next) + sem_wait(&barrier_sem); + sa.sa_handler = SIG_IGN; + __libc_sigaction(SIGSYNCCALL, &sa, 0); + sem_destroy(&barrier_sem); + __tl_unlock(); + __restore_sigs(&set); + return 0; + } + return __syscall_ret(r); +} + +void __membarrier_init(void) +{ + /* If membarrier is linked, attempt to pre-register to be able to use + * the private expedited command before the process becomes multi- + * threaded, since registering later has bad, potentially unbounded + * latency. This syscall should be essentially free, and it's arguably + * a mistake in the API design that registration was even required. + * For other commands, registration may impose some cost, so it's left + * to the application to do so if desired. Unfortunately this means + * library code initialized after the process becomes multi-threaded + * cannot use these features without accepting registration latency. */ + __syscall(SYS_membarrier, MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 0); +} + +weak_alias(__membarrier, membarrier); diff --git a/src/thread/pthread_create.c b/src/thread/pthread_create.c index 54c035545003bf98c8d4d8197484a5295f8e2e10..7d4dc2ed5f4ad5c5a1bbdda806fada9e017f5992 100644 --- a/src/thread/pthread_create.c +++ b/src/thread/pthread_create.c @@ -15,7 +15,7 @@ weak_alias(dummy_0, __release_ptc); weak_alias(dummy_0, __pthread_tsd_run_dtors); weak_alias(dummy_0, __do_orphaned_stdio_locks); weak_alias(dummy_0, __dl_thread_cleanup); -weak_alias(dummy_0, __dl_prepare_for_threads); +weak_alias(dummy_0, __membarrier_init); static int tl_lock_count; static int tl_lock_waiters; @@ -246,7 +246,7 @@ int __pthread_create(pthread_t *restrict res, const pthread_attr_t *restrict att init_file_lock(__stderr_used); __syscall(SYS_rt_sigprocmask, SIG_UNBLOCK, SIGPT_SET, 0, _NSIG/8); self->tsd = (void **)__pthread_tsd_main; - __dl_prepare_for_threads(); + __membarrier_init(); libc.threaded = 1; } if (attrp && !c11) attr = *attrp;