diff --git a/mm/mmap.c b/mm/mmap.c index 75557c639ad419c49aff37f1a57de548077f702d..f90ea92f755aed2a9d7fa8dac940443dd86240f8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, struct address_space *mapping = NULL; struct prio_tree_root *root = NULL; struct file *file = vma->vm_file; - struct anon_vma *anon_vma = NULL; long adjust_next = 0; int remove_next = 0; if (next && !insert) { + struct vm_area_struct *exporter = NULL; + if (end >= next->vm_end) { /* * vma expands, overlapping all the next, and @@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, */ again: remove_next = 1 + (end > next->vm_end); end = next->vm_end; - anon_vma = next->anon_vma; + exporter = next; importer = vma; } else if (end > next->vm_start) { /* @@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end); * mprotect case 5 shifting the boundary up. */ adjust_next = (end - next->vm_start) >> PAGE_SHIFT; - anon_vma = next->anon_vma; + exporter = next; importer = vma; } else if (end < vma->vm_end) { /* @@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end); * mprotect case 4 shifting the boundary down. */ adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); - anon_vma = next->anon_vma; + exporter = vma; importer = next; } - } - /* - * When changing only vma->vm_end, we don't really need anon_vma lock. - */ - if (vma->anon_vma && (insert || importer || start != vma->vm_start)) - anon_vma = vma->anon_vma; - if (anon_vma) { /* * Easily overlooked: when mprotect shifts the boundary, * make sure the expanding vma has anon_vma set if the * shrinking vma had, to cover any anon pages imported. */ - if (importer && !importer->anon_vma) { - /* Block reverse map lookups until things are set up. */ - if (anon_vma_clone(importer, vma)) { + if (exporter && exporter->anon_vma && !importer->anon_vma) { + if (anon_vma_clone(importer, exporter)) return -ENOMEM; - } - importer->anon_vma = anon_vma; + importer->anon_vma = exporter->anon_vma; } } @@ -824,6 +816,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, return NULL; } +/* + * Rough compatbility check to quickly see if it's even worth looking + * at sharing an anon_vma. + * + * They need to have the same vm_file, and the flags can only differ + * in things that mprotect may change. + * + * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that + * we can merge the two vma's. For example, we refuse to merge a vma if + * there is a vm_ops->close() function, because that indicates that the + * driver is doing some kind of reference counting. But that doesn't + * really matter for the anon_vma sharing case. + */ +static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b) +{ + return a->vm_end == b->vm_start && + mpol_equal(vma_policy(a), vma_policy(b)) && + a->vm_file == b->vm_file && + !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) && + b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); +} + +/* + * Do some basic sanity checking to see if we can re-use the anon_vma + * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be + * the same as 'old', the other will be the new one that is trying + * to share the anon_vma. + * + * NOTE! This runs with mm_sem held for reading, so it is possible that + * the anon_vma of 'old' is concurrently in the process of being set up + * by another page fault trying to merge _that_. But that's ok: if it + * is being set up, that automatically means that it will be a singleton + * acceptable for merging, so we can do all of this optimistically. But + * we do that ACCESS_ONCE() to make sure that we never re-load the pointer. + * + * IOW: that the "list_is_singular()" test on the anon_vma_chain only + * matters for the 'stable anon_vma' case (ie the thing we want to avoid + * is to return an anon_vma that is "complex" due to having gone through + * a fork). + * + * We also make sure that the two vma's are compatible (adjacent, + * and with the same memory policies). That's all stable, even with just + * a read lock on the mm_sem. + */ +static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) +{ + if (anon_vma_compatible(a, b)) { + struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma); + + if (anon_vma && list_is_singular(&old->anon_vma_chain)) + return anon_vma; + } + return NULL; +} + /* * find_mergeable_anon_vma is used by anon_vma_prepare, to check * neighbouring vmas for a suitable anon_vma, before it goes off @@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, */ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) { + struct anon_vma *anon_vma; struct vm_area_struct *near; - unsigned long vm_flags; near = vma->vm_next; if (!near) goto try_prev; - /* - * Since only mprotect tries to remerge vmas, match flags - * which might be mprotected into each other later on. - * Neither mlock nor madvise tries to remerge at present, - * so leave their flags as obstructing a merge. - */ - vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); - vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); - - if (near->anon_vma && vma->vm_end == near->vm_start && - mpol_equal(vma_policy(vma), vma_policy(near)) && - can_vma_merge_before(near, vm_flags, - NULL, vma->vm_file, vma->vm_pgoff + - ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))) - return near->anon_vma; + anon_vma = reusable_anon_vma(near, vma, near); + if (anon_vma) + return anon_vma; try_prev: /* * It is potentially slow to have to call find_vma_prev here. @@ -868,14 +903,9 @@ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) if (!near) goto none; - vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); - vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); - - if (near->anon_vma && near->vm_end == vma->vm_start && - mpol_equal(vma_policy(near), vma_policy(vma)) && - can_vma_merge_after(near, vm_flags, - NULL, vma->vm_file, vma->vm_pgoff)) - return near->anon_vma; + anon_vma = reusable_anon_vma(near, near, vma); + if (anon_vma) + return anon_vma; none: /* * There's no absolute need to look only at touching neighbours: diff --git a/mm/rmap.c b/mm/rmap.c index eaa7a09eb72e85b8f48911c8b3bf35e38d0e213c..4bad3267537a8c7826d313afa661a03f51b2df26 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -182,7 +182,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) { struct anon_vma_chain *avc, *pavc; - list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { + list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { avc = anon_vma_chain_alloc(); if (!avc) goto enomem_failure; @@ -734,9 +734,20 @@ void page_move_anon_rmap(struct page *page, static void __page_set_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { - struct anon_vma *anon_vma = vma->anon_vma; + struct anon_vma_chain *avc; + struct anon_vma *anon_vma; + + BUG_ON(!vma->anon_vma); + + /* + * We must use the _oldest_ possible anon_vma for the page mapping! + * + * So take the last AVC chain entry in the vma, which is the deepest + * ancestor, and use the anon_vma from that. + */ + avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); + anon_vma = avc->anon_vma; - BUG_ON(!anon_vma); anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; page->index = linear_page_index(vma, address);