diff --git a/include/linux/mm.h b/include/linux/mm.h index 3cccd053850fe650cde60e6d3fecf217d9dbfef0..3172a1c0f08e96faf072fda7b4327a9626ddf611 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -988,6 +988,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); struct page *get_dump_page(unsigned long addr); +extern int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, + unsigned long address, unsigned int fault_flags); extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned long offset); diff --git a/kernel/futex.c b/kernel/futex.c index 3fbc76cbb9aa77621b91f6b351bec811472f4fe5..0a308970c24ac4ce1ef00256df7f8afcf2018044 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -355,8 +355,8 @@ static int fault_in_user_writeable(u32 __user *uaddr) int ret; down_read(&mm->mmap_sem); - ret = get_user_pages(current, mm, (unsigned long)uaddr, - 1, 1, 0, NULL, NULL); + ret = fixup_user_fault(current, mm, (unsigned long)uaddr, + FAULT_FLAG_WRITE); up_read(&mm->mmap_sem); return ret < 0 ? ret : 0; diff --git a/mm/memory.c b/mm/memory.c index 3c9f3aa8332ef12ed4ac3cbdc3a25e63b43459ef..a56e3ba816b21e52016a1a46be99479afe45ad08 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1805,7 +1805,63 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(__get_user_pages); -/** +/* + * fixup_user_fault() - manually resolve a user page fault + * @tsk: the task_struct to use for page fault accounting, or + * NULL if faults are not to be recorded. + * @mm: mm_struct of target mm + * @address: user address + * @fault_flags:flags to pass down to handle_mm_fault() + * + * This is meant to be called in the specific scenario where for locking reasons + * we try to access user memory in atomic context (within a pagefault_disable() + * section), this returns -EFAULT, and we want to resolve the user fault before + * trying again. + * + * Typically this is meant to be used by the futex code. + * + * The main difference with get_user_pages() is that this function will + * unconditionally call handle_mm_fault() which will in turn perform all the + * necessary SW fixup of the dirty and young bits in the PTE, while + * handle_mm_fault() only guarantees to update these in the struct page. + * + * This is important for some architectures where those bits also gate the + * access permission to the page because they are maintained in software. On + * such architectures, gup() will not be enough to make a subsequent access + * succeed. + * + * This should be called with the mm_sem held for read. + */ +int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, + unsigned long address, unsigned int fault_flags) +{ + struct vm_area_struct *vma; + int ret; + + vma = find_extend_vma(mm, address); + if (!vma || address < vma->vm_start) + return -EFAULT; + + ret = handle_mm_fault(mm, vma, address, fault_flags); + if (ret & VM_FAULT_ERROR) { + if (ret & VM_FAULT_OOM) + return -ENOMEM; + if (ret & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) + return -EHWPOISON; + if (ret & VM_FAULT_SIGBUS) + return -EFAULT; + BUG(); + } + if (tsk) { + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + } + return 0; +} + +/* * get_user_pages() - pin user pages in memory * @tsk: the task_struct to use for page fault accounting, or * NULL if faults are not to be recorded.