diff --git a/include/linux/mm.h b/include/linux/mm.h index 040a04a889967eadc6a1fc469fc8172c8f603e4a..2c8ed8a894c83b73ac34431a7657bc8e15b07039 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1968,8 +1968,14 @@ void anon_vma_interval_tree_verify(struct anon_vma_chain *node); /* mmap.c */ extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); -extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); +extern int __vma_adjust(struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, + struct vm_area_struct *expand); +static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) +{ + return __vma_adjust(vma, start, end, pgoff, insert, NULL); +} extern struct vm_area_struct *vma_merge(struct mm_struct *, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, diff --git a/mm/mmap.c b/mm/mmap.c index 183694b80bcc85f1ae7ec050135648046133c4c2..e53637f8ac428566dd42b0469f45f2b14339669e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -601,14 +601,24 @@ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) mm->map_count++; } -static inline void -__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, - struct vm_area_struct *prev) +static __always_inline void __vma_unlink_common(struct mm_struct *mm, + struct vm_area_struct *vma, + struct vm_area_struct *prev, + bool has_prev) { struct vm_area_struct *next; vma_rb_erase(vma, &mm->mm_rb); - prev->vm_next = next = vma->vm_next; + next = vma->vm_next; + if (has_prev) + prev->vm_next = next; + else { + prev = vma->vm_prev; + if (prev) + prev->vm_next = next; + else + mm->mmap = next; + } if (next) next->vm_prev = prev; @@ -616,6 +626,19 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, vmacache_invalidate(mm); } +static inline void __vma_unlink_prev(struct mm_struct *mm, + struct vm_area_struct *vma, + struct vm_area_struct *prev) +{ + __vma_unlink_common(mm, vma, prev, true); +} + +static inline void __vma_unlink(struct mm_struct *mm, + struct vm_area_struct *vma) +{ + __vma_unlink_common(mm, vma, NULL, false); +} + /* * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that * is already present in an i_mmap tree without adjusting the tree. @@ -623,11 +646,12 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, * are necessary. The "insert" vma (if any) is to be inserted * before we drop the necessary locks. */ -int vma_adjust(struct vm_area_struct *vma, unsigned long start, - unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) +int __vma_adjust(struct vm_area_struct *vma, unsigned long start, + unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert, + struct vm_area_struct *expand) { struct mm_struct *mm = vma->vm_mm; - struct vm_area_struct *next = vma->vm_next; + struct vm_area_struct *next = vma->vm_next, *orig_vma = vma; struct address_space *mapping = NULL; struct rb_root *root = NULL; struct anon_vma *anon_vma = NULL; @@ -643,9 +667,38 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, /* * vma expands, overlapping all the next, and * perhaps the one after too (mprotect case 6). + * The only two other cases that gets here are + * case 1, case 7 and case 8. */ - remove_next = 1 + (end > next->vm_end); - end = next->vm_end; + if (next == expand) { + /* + * The only case where we don't expand "vma" + * and we expand "next" instead is case 8. + */ + VM_WARN_ON(end != next->vm_end); + /* + * remove_next == 3 means we're + * removing "vma" and that to do so we + * swapped "vma" and "next". + */ + remove_next = 3; + VM_WARN_ON(file != next->vm_file); + swap(vma, next); + } else { + VM_WARN_ON(expand != vma); + /* + * case 1, 6, 7, remove_next == 2 is case 6, + * remove_next == 1 is case 1 or 7. + */ + remove_next = 1 + (end > next->vm_end); + VM_WARN_ON(remove_next == 2 && + end != next->vm_next->vm_end); + VM_WARN_ON(remove_next == 1 && + end != next->vm_end); + /* trim end to next, for case 6 first pass */ + end = next->vm_end; + } + exporter = next; importer = vma; @@ -664,6 +717,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, adjust_next = (end - next->vm_start) >> PAGE_SHIFT; exporter = next; importer = vma; + VM_WARN_ON(expand != importer); } else if (end < vma->vm_end) { /* * vma shrinks, and !insert tells it's not @@ -673,6 +727,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, adjust_next = -((vma->vm_end - end) >> PAGE_SHIFT); exporter = vma; importer = next; + VM_WARN_ON(expand != importer); } /* @@ -690,7 +745,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, } } again: - vma_adjust_trans_huge(vma, start, end, adjust_next); + vma_adjust_trans_huge(orig_vma, start, end, adjust_next); if (file) { mapping = file->f_mapping; @@ -716,8 +771,8 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, if (!anon_vma && adjust_next) anon_vma = next->anon_vma; if (anon_vma) { - VM_BUG_ON_VMA(adjust_next && next->anon_vma && - anon_vma != next->anon_vma, next); + VM_WARN_ON(adjust_next && next->anon_vma && + anon_vma != next->anon_vma); anon_vma_lock_write(anon_vma); anon_vma_interval_tree_pre_update_vma(vma); if (adjust_next) @@ -757,7 +812,11 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, * vma_merge has merged next into vma, and needs * us to remove next before dropping the locks. */ - __vma_unlink(mm, next, vma); + if (remove_next != 3) + __vma_unlink_prev(mm, next, vma); + else + /* vma is not before next if they've been swapped */ + __vma_unlink(mm, next); if (file) __remove_shared_vm_struct(next, file, mapping); } else if (insert) { @@ -809,7 +868,27 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, * we must remove another next too. It would clutter * up the code too much to do both in one go. */ - next = vma->vm_next; + if (remove_next != 3) { + /* + * If "next" was removed and vma->vm_end was + * expanded (up) over it, in turn + * "next->vm_prev->vm_end" changed and the + * "vma->vm_next" gap must be updated. + */ + next = vma->vm_next; + } else { + /* + * For the scope of the comment "next" and + * "vma" considered pre-swap(): if "vma" was + * removed, next->vm_start was expanded (down) + * over it and the "next" gap must be updated. + * Because of the swap() the post-swap() "vma" + * actually points to pre-swap() "next" + * (post-swap() "next" as opposed is now a + * dangling pointer). + */ + next = vma; + } if (remove_next == 2) { remove_next = 1; end = next->vm_end; @@ -958,13 +1037,24 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, * cannot merge might become might become might become * PPNNNNNNNNNN PPPPPPPPPPNN PPPPPPPPPPPP 6 or * mmap, brk or case 4 below case 5 below PPPPPPPPXXXX 7 or - * mremap move: PPPPNNNNNNNN 8 + * mremap move: PPPPXXXXXXXX 8 * AAAA * PPPP NNNN PPPPPPPPPPPP PPPPPPPPNNNN PPPPNNNNNNNN * might become case 1 below case 2 below case 3 below * - * Odd one out? Case 8, because it extends NNNN but needs flags of XXXX: - * mprotect_fixup updates vm_flags & vm_page_prot on successful return. + * It is important for case 8 that the the vma NNNN overlapping the + * region AAAA is never going to extended over XXXX. Instead XXXX must + * be extended in region AAAA and NNNN must be removed. This way in + * all cases where vma_merge succeeds, the moment vma_adjust drops the + * rmap_locks, the properties of the merged vma will be already + * correct for the whole merged range. Some of those properties like + * vm_page_prot/vm_flags may be accessed by rmap_walks and they must + * be correct for the whole merged range immediately after the + * rmap_locks are released. Otherwise if XXXX would be removed and + * NNNN would be extended over the XXXX range, remove_migration_ptes + * or other rmap walkers (if working on addresses beyond the "end" + * parameter) may establish ptes with the wrong permissions of NNNN + * instead of the right permissions of XXXX. */ struct vm_area_struct *vma_merge(struct mm_struct *mm, struct vm_area_struct *prev, unsigned long addr, @@ -989,9 +1079,14 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, else next = mm->mmap; area = next; - if (next && next->vm_end == end) /* cases 6, 7, 8 */ + if (area && area->vm_end == end) /* cases 6, 7, 8 */ next = next->vm_next; + /* verify some invariant that must be enforced by the caller */ + VM_WARN_ON(prev && addr <= prev->vm_start); + VM_WARN_ON(area && end > area->vm_end); + VM_WARN_ON(addr >= end); + /* * Can it merge with the predecessor? */ @@ -1012,11 +1107,12 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) { /* cases 1, 6 */ - err = vma_adjust(prev, prev->vm_start, - next->vm_end, prev->vm_pgoff, NULL); + err = __vma_adjust(prev, prev->vm_start, + next->vm_end, prev->vm_pgoff, NULL, + prev); } else /* cases 2, 5, 7 */ - err = vma_adjust(prev, prev->vm_start, - end, prev->vm_pgoff, NULL); + err = __vma_adjust(prev, prev->vm_start, + end, prev->vm_pgoff, NULL, prev); if (err) return NULL; khugepaged_enter_vma_merge(prev, vm_flags); @@ -1032,11 +1128,18 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, anon_vma, file, pgoff+pglen, vm_userfaultfd_ctx)) { if (prev && addr < prev->vm_end) /* case 4 */ - err = vma_adjust(prev, prev->vm_start, - addr, prev->vm_pgoff, NULL); - else /* cases 3, 8 */ - err = vma_adjust(area, addr, next->vm_end, - next->vm_pgoff - pglen, NULL); + err = __vma_adjust(prev, prev->vm_start, + addr, prev->vm_pgoff, NULL, next); + else { /* cases 3, 8 */ + err = __vma_adjust(area, addr, next->vm_end, + next->vm_pgoff - pglen, NULL, next); + /* + * In case 3 area is already equal to next and + * this is a noop, but in case 8 "area" has + * been removed and next was expanded over it. + */ + area = next; + } if (err) return NULL; khugepaged_enter_vma_merge(area, vm_flags); diff --git a/mm/mprotect.c b/mm/mprotect.c index 063bbed22c7b67a24f114567e027292dc88982ee..ec91dfd3f9007353acc9bc238faacac59991de3d 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -304,6 +304,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, vma->vm_userfaultfd_ctx); if (*pprev) { vma = *pprev; + VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); goto success; }