From 3d3efba020da1de57a715e2087cf761ed0ad0904 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Fri, 27 May 2016 15:51:49 +0100 Subject: [PATCH] linux-user: Fix race between multiple signals If multiple host signals are received in quick succession they would be queued in TaskState then delivered to the guest in spite of signals being supposed to be blocked by the guest signal handler's sa_mask. Fix this by decoupling the guest signal mask from the host signal mask, so we can have protected sections where all host signals are blocked. In particular we block signals from when host_signal_handler() queues a signal from the guest until process_pending_signals() has unqueued it. We also block signals while we are manipulating the guest signal mask in emulation of sigprocmask and similar syscalls. Blocking host signals also ensures the correct behaviour with respect to multiple threads and the overrun count of timer related signals. Alas blocking and queuing in qemu is still needed because of virtual processor exceptions, SIGSEGV and SIGBUS. Blocking signals inside process_pending_signals() protects against concurrency problems that would otherwise happen if host_signal_handler() ran and accessed the signal data structures while process_pending_signals() was manipulating them. Since we now track the guest signal mask separately from that of the host, the sigsuspend system calls must track the signal mask passed to them, because when we process signals as we leave the sigsuspend the guest signal mask in force is that passed to sigsuspend. Signed-off-by: Timothy Edward Baldwin Message-id: 1441497448-32489-19-git-send-email-T.E.Baldwin99@members.leeds.ac.uk [PMM: make signal_pending a simple flag rather than a word with two flag bits; ensure we don't call block_signals() twice in sigreturn codepaths; document and assert() the guarantee that using do_sigprocmask() to get the current mask never fails; use the qemu atomics.h functions rather than raw volatile variable access; add extra commentary and documentation; block SIGSEGV/SIGBUS in block_signals() and in process_pending_signals() because they can't occur synchronously here; check the right do_sigprocmask() call for errors in ssetmask syscall; expand commit message; fixed sigsuspend() hanging] Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell Signed-off-by: Riku Voipio --- linux-user/qemu.h | 50 ++++++++++++- linux-user/signal.c | 163 ++++++++++++++++++++++++++++++------------- linux-user/syscall.c | 73 +++++++++++++------ 3 files changed, 213 insertions(+), 73 deletions(-) diff --git a/linux-user/qemu.h b/linux-user/qemu.h index f09b750bbf..5138289bfb 100644 --- a/linux-user/qemu.h +++ b/linux-user/qemu.h @@ -123,14 +123,33 @@ typedef struct TaskState { #endif uint32_t stack_base; int used; /* non zero if used */ - bool sigsegv_blocked; /* SIGSEGV blocked by guest */ struct image_info *info; struct linux_binprm *bprm; struct emulated_sigtable sigtab[TARGET_NSIG]; struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */ struct sigqueue *first_free; /* first free siginfo queue entry */ - int signal_pending; /* non zero if a signal may be pending */ + /* This thread's signal mask, as requested by the guest program. + * The actual signal mask of this thread may differ: + * + we don't let SIGSEGV and SIGBUS be blocked while running guest code + * + sometimes we block all signals to avoid races + */ + sigset_t signal_mask; + /* The signal mask imposed by a guest sigsuspend syscall, if we are + * currently in the middle of such a syscall + */ + sigset_t sigsuspend_mask; + /* Nonzero if we're leaving a sigsuspend and sigsuspend_mask is valid. */ + int in_sigsuspend; + + /* Nonzero if process_pending_signals() needs to do something (either + * handle a pending signal or unblock signals). + * This flag is written from a signal handler so should be accessed via + * the atomic_read() and atomic_write() functions. (It is not accessed + * from multiple threads.) + */ + int signal_pending; + } __attribute__((aligned(16))) TaskState; extern char *exec_path; @@ -235,6 +254,12 @@ unsigned long init_guest_space(unsigned long host_start, * It's also OK to implement these with safe_syscall, though it will be * a little less efficient if a signal is delivered at the 'wrong' moment. * + * Some non-interruptible syscalls need to be handled using block_signals() + * to block signals for the duration of the syscall. This mainly applies + * to code which needs to modify the data structures used by the + * host_signal_handler() function and the functions it calls, including + * all syscalls which change the thread's signal mask. + * * (2) Interruptible syscalls * * These are guest syscalls that can be interrupted by signals and @@ -266,6 +291,8 @@ unsigned long init_guest_space(unsigned long host_start, * you make in the implementation returns either -TARGET_ERESTARTSYS or * EINTR though.) * + * block_signals() cannot be used for interruptible syscalls. + * * * How and why the safe_syscall implementation works: * @@ -352,6 +379,25 @@ long do_sigreturn(CPUArchState *env); long do_rt_sigreturn(CPUArchState *env); abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp); int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset); +/** + * block_signals: block all signals while handling this guest syscall + * + * Block all signals, and arrange that the signal mask is returned to + * its correct value for the guest before we resume execution of guest code. + * If this function returns non-zero, then the caller should immediately + * return -TARGET_ERESTARTSYS to the main loop, which will take the pending + * signal and restart execution of the syscall. + * If block_signals() returns zero, then the caller can continue with + * emulation of the system call knowing that no signals can be taken + * (and therefore that no race conditions will result). + * This should only be called once, because if it is called a second time + * it will always return non-zero. (Think of it like a mutex that can't + * be recursively locked.) + * Signals will be unblocked again by process_pending_signals(). + * + * Return value: non-zero if there was a pending signal, zero if not. + */ +int block_signals(void); /* Returns non zero if signal pending */ #ifdef TARGET_I386 /* vm86.c */ diff --git a/linux-user/signal.c b/linux-user/signal.c index 1b86a8550c..a89853d4eb 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -190,61 +190,81 @@ void target_to_host_old_sigset(sigset_t *sigset, target_to_host_sigset(sigset, &d); } +int block_signals(void) +{ + TaskState *ts = (TaskState *)thread_cpu->opaque; + sigset_t set; + int pending; + + /* It's OK to block everything including SIGSEGV, because we won't + * run any further guest code before unblocking signals in + * process_pending_signals(). + */ + sigfillset(&set); + sigprocmask(SIG_SETMASK, &set, 0); + + pending = atomic_xchg(&ts->signal_pending, 1); + + return pending; +} + /* Wrapper for sigprocmask function * Emulates a sigprocmask in a safe way for the guest. Note that set and oldset - * are host signal set, not guest ones. This wraps the sigprocmask host calls - * that should be protected (calls originated from guest) + * are host signal set, not guest ones. Returns -TARGET_ERESTARTSYS if + * a signal was already pending and the syscall must be restarted, or + * 0 on success. + * If set is NULL, this is guaranteed not to fail. */ int do_sigprocmask(int how, const sigset_t *set, sigset_t *oldset) { - int ret; - sigset_t val; - sigset_t *temp = NULL; - CPUState *cpu = thread_cpu; - TaskState *ts = (TaskState *)cpu->opaque; - bool segv_was_blocked = ts->sigsegv_blocked; + TaskState *ts = (TaskState *)thread_cpu->opaque; + + if (oldset) { + *oldset = ts->signal_mask; + } if (set) { - bool has_sigsegv = sigismember(set, SIGSEGV); - val = *set; - temp = &val; + int i; - sigdelset(temp, SIGSEGV); + if (block_signals()) { + return -TARGET_ERESTARTSYS; + } switch (how) { case SIG_BLOCK: - if (has_sigsegv) { - ts->sigsegv_blocked = true; - } + sigorset(&ts->signal_mask, &ts->signal_mask, set); break; case SIG_UNBLOCK: - if (has_sigsegv) { - ts->sigsegv_blocked = false; + for (i = 1; i <= NSIG; ++i) { + if (sigismember(set, i)) { + sigdelset(&ts->signal_mask, i); + } } break; case SIG_SETMASK: - ts->sigsegv_blocked = has_sigsegv; + ts->signal_mask = *set; break; default: g_assert_not_reached(); } - } - - ret = sigprocmask(how, temp, oldset); - if (oldset && segv_was_blocked) { - sigaddset(oldset, SIGSEGV); + /* Silently ignore attempts to change blocking status of KILL or STOP */ + sigdelset(&ts->signal_mask, SIGKILL); + sigdelset(&ts->signal_mask, SIGSTOP); } - - return ret; + return 0; } #if !defined(TARGET_OPENRISC) && !defined(TARGET_UNICORE32) && \ !defined(TARGET_X86_64) -/* Just set the guest's signal mask to the specified value */ +/* Just set the guest's signal mask to the specified value; the + * caller is assumed to have called block_signals() already. + */ static void set_sigmask(const sigset_t *set) { - do_sigprocmask(SIG_SETMASK, set, NULL); + TaskState *ts = (TaskState *)thread_cpu->opaque; + + ts->signal_mask = *set; } #endif @@ -376,6 +396,7 @@ static int core_dump_signal(int sig) void signal_init(void) { + TaskState *ts = (TaskState *)thread_cpu->opaque; struct sigaction act; struct sigaction oact; int i, j; @@ -391,6 +412,9 @@ void signal_init(void) target_to_host_signal_table[j] = i; } + /* Set the signal mask from the host mask. */ + sigprocmask(0, 0, &ts->signal_mask); + /* set all host signal handlers. ALL signals are blocked during the handlers to serialize them. */ memset(sigact_table, 0, sizeof(sigact_table)); @@ -509,7 +533,7 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) queue = gdb_queuesig (); handler = sigact_table[sig - 1]._sa_handler; - if (ts->sigsegv_blocked && sig == TARGET_SIGSEGV) { + if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) { /* Guest has blocked SIGSEGV but we got one anyway. Assume this * is a forced SIGSEGV (ie one the kernel handles via force_sig_info * because it got a real MMU fault). A blocked SIGSEGV in that @@ -565,7 +589,7 @@ int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info) q->next = NULL; k->pending = 1; /* signal that a new signal is pending */ - ts->signal_pending = 1; + atomic_set(&ts->signal_pending, 1); return 1; /* indicates that the signal was queued */ } } @@ -583,6 +607,7 @@ static void host_signal_handler(int host_signum, siginfo_t *info, CPUArchState *env = thread_cpu->env_ptr; int sig; target_siginfo_t tinfo; + ucontext_t *uc = puc; /* the CPU emulator uses some host signals to detect exceptions, we forward to it some signals */ @@ -602,6 +627,16 @@ static void host_signal_handler(int host_signum, siginfo_t *info, host_to_target_siginfo_noswap(&tinfo, info); if (queue_signal(env, sig, &tinfo) == 1) { + /* Block host signals until target signal handler entered. We + * can't block SIGSEGV or SIGBUS while we're executing guest + * code in case the guest code provokes one in the window between + * now and it getting out to the main loop. Signals will be + * unblocked again in process_pending_signals(). + */ + sigfillset(&uc->uc_sigmask); + sigdelset(&uc->uc_sigmask, SIGSEGV); + sigdelset(&uc->uc_sigmask, SIGBUS); + /* interrupt the virtual CPU as soon as possible */ cpu_exit(thread_cpu); } @@ -2673,9 +2708,13 @@ void sparc64_get_context(CPUSPARCState *env) env->pc = env->npc; env->npc += 4; - err = 0; - - do_sigprocmask(0, NULL, &set); + /* If we're only reading the signal mask then do_sigprocmask() + * is guaranteed not to fail, which is important because we don't + * have any way to signal a failure or restart this operation since + * this is not a normal syscall. + */ + err = do_sigprocmask(0, NULL, &set); + assert(err == 0); host_to_target_sigset_internal(&target_set, &set); if (TARGET_NSIG_WORDS == 1) { __put_user(target_set.sig[0], @@ -5778,7 +5817,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) { CPUState *cpu = ENV_GET_CPU(cpu_env); abi_ulong handler; - sigset_t set, old_set; + sigset_t set; target_sigset_t target_old_set; struct target_sigaction *sa; struct sigqueue *q; @@ -5801,7 +5840,7 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) handler = sa->_sa_handler; } - if (ts->sigsegv_blocked && sig == TARGET_SIGSEGV) { + if (sig == TARGET_SIGSEGV && sigismember(&ts->signal_mask, SIGSEGV)) { /* Guest has blocked SIGSEGV but we got one anyway. Assume this * is a forced SIGSEGV (ie one the kernel handles via force_sig_info * because it got a real MMU fault), and treat as if default handler. @@ -5825,17 +5864,23 @@ static void handle_pending_signal(CPUArchState *cpu_env, int sig) force_sig(sig); } else { /* compute the blocked signals during the handler execution */ + sigset_t *blocked_set; + target_to_host_sigset(&set, &sa->sa_mask); /* SA_NODEFER indicates that the current signal should not be blocked during the handler */ if (!(sa->sa_flags & TARGET_SA_NODEFER)) sigaddset(&set, target_to_host_signal(sig)); - /* block signals in the handler using Linux */ - do_sigprocmask(SIG_BLOCK, &set, &old_set); /* save the previous blocked signal state to restore it at the end of the signal execution (see do_sigreturn) */ - host_to_target_sigset_internal(&target_old_set, &old_set); + host_to_target_sigset_internal(&target_old_set, &ts->signal_mask); + + /* block signals in the handler */ + blocked_set = ts->in_sigsuspend ? + &ts->sigsuspend_mask : &ts->signal_mask; + sigorset(&ts->signal_mask, blocked_set, &set); + ts->in_sigsuspend = 0; /* if the CPU is in VM86 mode, we restore the 32 bit values */ #if defined(TARGET_I386) && !defined(TARGET_X86_64) @@ -5869,18 +5914,38 @@ void process_pending_signals(CPUArchState *cpu_env) CPUState *cpu = ENV_GET_CPU(cpu_env); int sig; TaskState *ts = cpu->opaque; + sigset_t set; + sigset_t *blocked_set; - if (!ts->signal_pending) - return; - - /* FIXME: This is not threadsafe. */ - for(sig = 1; sig <= TARGET_NSIG; sig++) { - if (ts->sigtab[sig - 1].pending) { - handle_pending_signal(cpu_env, sig); - return; + while (atomic_read(&ts->signal_pending)) { + /* FIXME: This is not threadsafe. */ + sigfillset(&set); + sigprocmask(SIG_SETMASK, &set, 0); + + for (sig = 1; sig <= TARGET_NSIG; sig++) { + blocked_set = ts->in_sigsuspend ? + &ts->sigsuspend_mask : &ts->signal_mask; + + if (ts->sigtab[sig - 1].pending && + (!sigismember(blocked_set, + target_to_host_signal_table[sig]) + || sig == TARGET_SIGSEGV)) { + handle_pending_signal(cpu_env, sig); + /* Restart scan from the beginning */ + sig = 1; + } } - } - /* if no signal is pending, just return */ - ts->signal_pending = 0; - return; + + /* if no signal is pending, unblock signals and recheck (the act + * of unblocking might cause us to take another host signal which + * will set signal_pending again). + */ + atomic_set(&ts->signal_pending, 0); + ts->in_sigsuspend = 0; + set = ts->signal_mask; + sigdelset(&set, SIGSEGV); + sigdelset(&set, SIGBUS); + sigprocmask(SIG_SETMASK, &set, 0); + } + ts->in_sigsuspend = 0; } diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 5bdfe2a936..639b328c74 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -5387,6 +5387,7 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, new_cpu->opaque = ts; ts->bprm = parent_ts->bprm; ts->info = parent_ts->info; + ts->signal_mask = parent_ts->signal_mask; nptl_flags = flags; flags &= ~CLONE_NPTL_FLAGS2; @@ -7482,9 +7483,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { sigset_t cur_set; abi_ulong target_set; - do_sigprocmask(0, NULL, &cur_set); - host_to_target_old_sigset(&target_set, &cur_set); - ret = target_set; + ret = do_sigprocmask(0, NULL, &cur_set); + if (!ret) { + host_to_target_old_sigset(&target_set, &cur_set); + ret = target_set; + } } break; #endif @@ -7493,12 +7496,20 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, { sigset_t set, oset, cur_set; abi_ulong target_set = arg1; - do_sigprocmask(0, NULL, &cur_set); + /* We only have one word of the new mask so we must read + * the rest of it with do_sigprocmask() and OR in this word. + * We are guaranteed that a do_sigprocmask() that only queries + * the signal mask will not fail. + */ + ret = do_sigprocmask(0, NULL, &cur_set); + assert(!ret); target_to_host_old_sigset(&set, &target_set); sigorset(&set, &set, &cur_set); - do_sigprocmask(SIG_SETMASK, &set, &oset); - host_to_target_old_sigset(&target_set, &oset); - ret = target_set; + ret = do_sigprocmask(SIG_SETMASK, &set, &oset); + if (!ret) { + host_to_target_old_sigset(&target_set, &oset); + ret = target_set; + } } break; #endif @@ -7527,7 +7538,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, mask = arg2; target_to_host_old_sigset(&set, &mask); - ret = get_errno(do_sigprocmask(how, &set, &oldset)); + ret = do_sigprocmask(how, &set, &oldset); if (!is_error(ret)) { host_to_target_old_sigset(&mask, &oldset); ret = mask; @@ -7561,7 +7572,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, how = 0; set_ptr = NULL; } - ret = get_errno(do_sigprocmask(how, set_ptr, &oldset)); + ret = do_sigprocmask(how, set_ptr, &oldset); if (!is_error(ret) && arg3) { if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0))) goto efault; @@ -7601,7 +7612,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, how = 0; set_ptr = NULL; } - ret = get_errno(do_sigprocmask(how, set_ptr, &oldset)); + ret = do_sigprocmask(how, set_ptr, &oldset); if (!is_error(ret) && arg3) { if (!(p = lock_user(VERIFY_WRITE, arg3, sizeof(target_sigset_t), 0))) goto efault; @@ -7639,28 +7650,36 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifdef TARGET_NR_sigsuspend case TARGET_NR_sigsuspend: { - sigset_t set; + TaskState *ts = cpu->opaque; #if defined(TARGET_ALPHA) abi_ulong mask = arg1; - target_to_host_old_sigset(&set, &mask); + target_to_host_old_sigset(&ts->sigsuspend_mask, &mask); #else if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1))) goto efault; - target_to_host_old_sigset(&set, p); + target_to_host_old_sigset(&ts->sigsuspend_mask, p); unlock_user(p, arg1, 0); #endif - ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE)); + ret = get_errno(safe_rt_sigsuspend(&ts->sigsuspend_mask, + SIGSET_T_SIZE)); + if (ret != -TARGET_ERESTARTSYS) { + ts->in_sigsuspend = 1; + } } break; #endif case TARGET_NR_rt_sigsuspend: { - sigset_t set; + TaskState *ts = cpu->opaque; if (!(p = lock_user(VERIFY_READ, arg1, sizeof(target_sigset_t), 1))) goto efault; - target_to_host_sigset(&set, p); + target_to_host_sigset(&ts->sigsuspend_mask, p); unlock_user(p, arg1, 0); - ret = get_errno(safe_rt_sigsuspend(&set, SIGSET_T_SIZE)); + ret = get_errno(safe_rt_sigsuspend(&ts->sigsuspend_mask, + SIGSET_T_SIZE)); + if (ret != -TARGET_ERESTARTSYS) { + ts->in_sigsuspend = 1; + } } break; case TARGET_NR_rt_sigtimedwait: @@ -7706,11 +7725,19 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, break; #ifdef TARGET_NR_sigreturn case TARGET_NR_sigreturn: - ret = do_sigreturn(cpu_env); + if (block_signals()) { + ret = -TARGET_ERESTARTSYS; + } else { + ret = do_sigreturn(cpu_env); + } break; #endif case TARGET_NR_rt_sigreturn: - ret = do_rt_sigreturn(cpu_env); + if (block_signals()) { + ret = -TARGET_ERESTARTSYS; + } else { + ret = do_rt_sigreturn(cpu_env); + } break; case TARGET_NR_sethostname: if (!(p = lock_user_string(arg1))) @@ -9764,9 +9791,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, } mask = arg2; target_to_host_old_sigset(&set, &mask); - do_sigprocmask(how, &set, &oldset); - host_to_target_old_sigset(&mask, &oldset); - ret = mask; + ret = do_sigprocmask(how, &set, &oldset); + if (!ret) { + host_to_target_old_sigset(&mask, &oldset); + ret = mask; + } } break; #endif -- GitLab