diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 188fc9256baf16824431cb273f588daa219b2521..0ea6b40223cc0f890e899f95a708ae0d28ff12f7 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -169,7 +169,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index 4e8143de32e70107e733ae197c095c27ac443423..66db88cb8c62786b41953c69e1684c6b68be64d8 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -168,7 +168,6 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index a9ee0d9dc740a0582028a447d969d9ff0e39afbb..f49b996aebdb12cce83800fdefeb40090331706b 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -344,9 +344,6 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) regs, addr); } if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index db0df8418b135fb983e6db9bc3d6b085dfccab7a..6cd448d9835c163fb95ea111952d577591faea80 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -547,12 +547,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, return 0; } - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of - * starvation. - */ if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; mm_flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index eb263e61daf40d5e238e63ef1432d2460d4ec5aa..38190cfb105e15a1c38558c225b23c4d36d54e59 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -115,7 +115,6 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; } diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index a9d55ad8d67be8e1bb4ef0c0010cda5742637a43..49bb7f9885e8e42c8cb447956093fb3716131091 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -189,7 +189,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 9b6163c05a754f8d86e6508eba4fd77483f71892..ec7728cc8e38892592cbc2109515ba0621ef957c 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -162,9 +162,6 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index 202ad6a494f595cf9185b12a9aadd887c9bdab96..ed7e518d94652de7d0077adea59d062ed941f770 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -236,7 +236,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 73d8a0f0b810c669d7d541704ab417a386e2cb74..be6aa573cbf0f95ae249e23141f2a9ffc20439b9 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -178,7 +178,6 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, tsk->min_flt++; } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index b740534b152c1dbf340d11db9dd1650e4228a11c..0202c970bb00b7e03b01bee5f3f8912da0112eed 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -237,7 +237,6 @@ void do_page_fault(unsigned long entry, unsigned long addr, else tsk->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index 24fd84cf6006cf2a7ac285152918db0668af69da..3cb796e465223171cc6244ff3b2c67d3d0741149 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -158,9 +158,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index dc4dbafc1d83254066687dce319aaf3b16c2d752..ae72d50e91a8f010338c62fdcbad8726b651b301 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -185,7 +185,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, else tsk->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index c8e8b7c0555837144d3427daaf395b6317d06aa6..c10bb2bdba777e34611994d8f7a8c9e3c44b2ce4 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -327,14 +327,12 @@ void do_page_fault(struct pt_regs *regs, unsigned long code, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; - /* * No need to up_read(&mm->mmap_sem) as we would * have already released it in __lock_page_or_retry * in mm/filemap.c. */ - + flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4eed84f8e919697317f6ed8a630a6f947928b575..5b04d029f8249d0b330c42b4c7f7e37a4c03579b 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -564,13 +564,7 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address, * case. */ if (unlikely(fault & VM_FAULT_RETRY)) { - /* We retry only once */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. - */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; if (!fatal_signal_pending(current)) goto retry; diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 523dbfbac03ddedac0709a15102bc4e460b93d1f..de529e7a8aee160b29745a441d91161bbf87bf76 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -155,11 +155,6 @@ asmlinkage void do_page_fault(struct pt_regs *regs) 1, regs, addr); } if (fault & VM_FAULT_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. - */ - flags &= ~(FAULT_FLAG_ALLOW_RETRY); flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a6e3c7022245d4ac4e8f52acb1cefdbddff54705..c28a970cb1b9eb9f831105a3b7e68320a9e3f422 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -537,10 +537,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) fault = VM_FAULT_PFAULT; goto out_up; } - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~(FAULT_FLAG_ALLOW_RETRY | - FAULT_FLAG_RETRY_NOWAIT); + flags &= ~FAULT_FLAG_RETRY_NOWAIT; flags |= FAULT_FLAG_TRIED; down_read(&mm->mmap_sem); goto retry; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 6defd2c6d9b1d95698670cbd65e7426de56b5c15..2b83a00afdd9fbff1bc245a765e78859b938c787 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -498,7 +498,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, regs, address); } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index b0440b0edd97b4af739c6aa53d49402bc8de2656..6e8e6ea00836e039dbf7288c991b76fcadd859bd 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -261,7 +261,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write, 1, regs, address); } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 8f8a604c130023fb238408cedbb3f5a2a30c0a26..73399ef2e25558817b7e59133b7d556382779de7 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -459,7 +459,6 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs) 1, regs, address); } if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index cced829460427180091b494e7eb2c9315dd66d73..eacf4fe7ae4d7b406571b577bc8891760dd8cc78 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -96,7 +96,6 @@ int handle_page_fault(unsigned long address, unsigned long ip, else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; goto retry; diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index 8f12a5b50a42bffaa14efa03c04c8f6f67f8b1b1..14144b9e774a21dc5c1a7f2c216f332e2049419b 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -268,9 +268,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs) else tsk->min_flt++; if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index c61acf63529f025841694dab1e5806174b3e8f4e..52c8cbbd5a65b953652c0773fced29b4ca1602df 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1418,9 +1418,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, * that we made any progress. Handle this case first. */ if (unlikely(fault & VM_FAULT_RETRY)) { - /* Retry at most once */ if (flags & FAULT_FLAG_ALLOW_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; if (!fatal_signal_pending(tsk)) goto retry; diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 2ab0e0dcd1668bce6fb9ea6f9035d99d13458ed3..deb2cd8ac7b4d0c041f0430dd7850fab7e27c33f 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -128,7 +128,6 @@ void do_page_fault(struct pt_regs *regs) else current->min_flt++; if (fault & VM_FAULT_RETRY) { - flags &= ~FAULT_FLAG_ALLOW_RETRY; flags |= FAULT_FLAG_TRIED; /* No need to up_read(&mm->mmap_sem) as we would diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 185655f22f8986e8708ee3a9fbddbb1258d6bac4..2119ae715962013e1629c0ee53d361c7da637fb6 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -61,9 +61,10 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, /* * If possible, avoid waiting for GPU with mmap_sem - * held. + * held. We only do this if the fault allows retry and this + * is the first attempt. */ - if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault_flag_allow_retry_first(vmf->flags)) { ret = VM_FAULT_RETRY; if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) goto out_unlock; @@ -136,7 +137,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) if (err != -EBUSY) return VM_FAULT_NOPAGE; - if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { + /* + * If the fault allows retry and this is the first + * fault attempt, we try to release the mmap_sem + * before waiting + */ + if (fault_flag_allow_retry_first(vmf->flags)) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { ttm_bo_get(bo); up_read(&vmf->vma->vm_mm->mmap_sem); diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ea591ec38c4371f5144a8c98522045a2b5c1c20..073295cc94f30c8082b599e94c994095d41d4a4c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -327,16 +327,55 @@ extern unsigned int kobjsize(const void *objp); */ extern pgprot_t protection_map[16]; +/* + * About @FAULT_FLAG_ALLOW_RETRY and @FAULT_FLAG_TRIED: we can specify + * whether we would allow page faults to retry by specifying these two + * fault flags correctly. Currently there can be three legal combinations: + * + * (a) ALLOW_RETRY and !TRIED: this means the page fault allows retry, and + * this is the first try + * + * (b) ALLOW_RETRY and TRIED: this means the page fault allows retry, and + * we've already tried at least once + * + * (c) !ALLOW_RETRY and !TRIED: this means the page fault does not allow retry + * + * The unlisted combination (!ALLOW_RETRY && TRIED) is illegal and should never + * be used. Note that page faults can be allowed to retry for multiple times, + * in which case we'll have an initial fault with flags (a) then later on + * continuous faults with flags (b). We should always try to detect pending + * signals before a retry to make sure the continuous page faults can still be + * interrupted if necessary. + */ + #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_MKWRITE 0x02 /* Fault was mkwrite of existing pte */ #define FAULT_FLAG_ALLOW_RETRY 0x04 /* Retry fault if blocking */ #define FAULT_FLAG_RETRY_NOWAIT 0x08 /* Don't drop mmap_sem and wait when retrying */ #define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ -#define FAULT_FLAG_TRIED 0x20 /* Second try */ +#define FAULT_FLAG_TRIED 0x20 /* We've tried once */ #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ +/** + * fault_flag_allow_retry_first - check ALLOW_RETRY the first time + * + * This is mostly used for places where we want to try to avoid taking + * the mmap_sem for too long a time when waiting for another condition + * to change, in which case we can try to be polite to release the + * mmap_sem in the first round to avoid potential starvation of other + * processes that would also want the mmap_sem. + * + * Return: true if the page fault allows retry and this is the first + * attempt of the fault handling; false otherwise. + */ +static inline bool fault_flag_allow_retry_first(unsigned int flags) +{ + return (flags & FAULT_FLAG_ALLOW_RETRY) && + (!(flags & FAULT_FLAG_TRIED)); +} + #define FAULT_FLAG_TRACE \ { FAULT_FLAG_WRITE, "WRITE" }, \ { FAULT_FLAG_MKWRITE, "MKWRITE" }, \ diff --git a/mm/filemap.c b/mm/filemap.c index b4f919d487d154c2a7ffee1221ffe862b338dcca..dd02515475bc04f2ff5ab46c713c96ae0a78ea80 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1405,7 +1405,7 @@ EXPORT_SYMBOL_GPL(__lock_page_killable); int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags) { - if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault_flag_allow_retry_first(flags)) { /* * CAUTION! In this case, mmap_sem is not released * even though return 0. diff --git a/mm/internal.h b/mm/internal.h index 47ae16a79b6cbe4b814f8050a5317a47493b31ec..91ffb1dced61d8077e31f5b6e2034db9f67eb3e9 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -365,10 +365,10 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf, /* * FAULT_FLAG_RETRY_NOWAIT means we don't want to wait on page locks or * anything, so we only pin the file and drop the mmap_sem if only - * FAULT_FLAG_ALLOW_RETRY is set. + * FAULT_FLAG_ALLOW_RETRY is set, while this is the first attempt. */ - if ((flags & (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT)) == - FAULT_FLAG_ALLOW_RETRY) { + if (fault_flag_allow_retry_first(flags) && + !(flags & FAULT_FLAG_RETRY_NOWAIT)) { fpin = get_file(vmf->vma->vm_file); up_read(&vmf->vma->vm_mm->mmap_sem); }