diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index af88ef6fffff7e972fcee7a26032424b4318dbb7..a14d63e945f4749dd78c0f8e15f2046dbd914f8d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -262,7 +262,7 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address, struct userfaultfd_ctx *ctx; struct userfaultfd_wait_queue uwq; int ret; - bool must_wait; + bool must_wait, return_to_userland; BUG_ON(!rwsem_is_locked(&mm->mmap_sem)); @@ -327,6 +327,9 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address, uwq.msg = userfault_msg(address, flags, reason); uwq.ctx = ctx; + return_to_userland = (flags & (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE)) == + (FAULT_FLAG_USER|FAULT_FLAG_KILLABLE); + spin_lock(&ctx->fault_pending_wqh.lock); /* * After the __add_wait_queue the uwq is visible to userland @@ -338,14 +341,16 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address, * following the spin_unlock to happen before the list_add in * __add_wait_queue. */ - set_current_state(TASK_KILLABLE); + set_current_state(return_to_userland ? TASK_INTERRUPTIBLE : + TASK_KILLABLE); spin_unlock(&ctx->fault_pending_wqh.lock); must_wait = userfaultfd_must_wait(ctx, address, flags, reason); up_read(&mm->mmap_sem); if (likely(must_wait && !ACCESS_ONCE(ctx->released) && - !fatal_signal_pending(current))) { + (return_to_userland ? !signal_pending(current) : + !fatal_signal_pending(current)))) { wake_up_poll(&ctx->fd_wqh, POLLIN); schedule(); ret |= VM_FAULT_MAJOR; @@ -353,6 +358,30 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address, __set_current_state(TASK_RUNNING); + if (return_to_userland) { + if (signal_pending(current) && + !fatal_signal_pending(current)) { + /* + * If we got a SIGSTOP or SIGCONT and this is + * a normal userland page fault, just let + * userland return so the signal will be + * handled and gdb debugging works. The page + * fault code immediately after we return from + * this function is going to release the + * mmap_sem and it's not depending on it + * (unlike gup would if we were not to return + * VM_FAULT_RETRY). + * + * If a fatal signal is pending we still take + * the streamlined VM_FAULT_RETRY failure path + * and there's no need to retake the mmap_sem + * in such case. + */ + down_read(&mm->mmap_sem); + ret = 0; + } + } + /* * Here we race with the list_del; list_add in * userfaultfd_ctx_read(), however because we don't ever run