提交 a8fb5618 编写于 作者: H Hugh Dickins 提交者: Linus Torvalds

[PATCH] mm: unlink_file_vma, remove_vma

Divide remove_vm_struct into two parts: first anon_vma_unlink plus
unlink_file_vma, to unlink the vma from the list and tree by which rmap or
vmtruncate might find it; then remove_vma to close, fput and free.

The intention here is to do the anon_vma_unlink and unlink_file_vma earlier,
in free_pgtables before freeing any page tables: so we can be sure that any
page tables traversed by rmap and vmtruncate are stable (and other, ordinary
cases are stabilized by holding mmap_sem).

This will be crucial to traversing pgd,pud,pmd without page_table_lock.  But
testing the split-out patch showed that lifting the page_table_lock is
symbiotically necessary to make this change - the lock ordering is wrong to
move those unlinks into free_pgtables while it's under ptlock.
Signed-off-by: NHugh Dickins <hugh@veritas.com>
Signed-off-by: NAndrew Morton <akpm@osdl.org>
Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
上级 2c0b3814
...@@ -834,6 +834,7 @@ extern int split_vma(struct mm_struct *, ...@@ -834,6 +834,7 @@ extern int split_vma(struct mm_struct *,
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *); struct rb_node **, struct rb_node *);
extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **, extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff); unsigned long addr, unsigned long len, pgoff_t pgoff);
extern void exit_mmap(struct mm_struct *); extern void exit_mmap(struct mm_struct *);
......
...@@ -181,26 +181,44 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, ...@@ -181,26 +181,44 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
} }
/* /*
* Remove one vm structure and free it. * Unlink a file-based vm structure from its prio_tree, to hide
* vma from rmap and vmtruncate before freeing its page tables.
*/ */
static void remove_vm_struct(struct vm_area_struct *vma) void unlink_file_vma(struct vm_area_struct *vma)
{ {
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
might_sleep();
if (file) { if (file) {
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
__remove_shared_vm_struct(vma, file, mapping); __remove_shared_vm_struct(vma, file, mapping);
spin_unlock(&mapping->i_mmap_lock); spin_unlock(&mapping->i_mmap_lock);
} }
}
/*
* Close a vm structure and free it, returning the next.
*/
static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
{
struct vm_area_struct *next = vma->vm_next;
/*
* Hide vma from rmap and vmtruncate before freeing page tables:
* to be moved into free_pgtables once page_table_lock is lifted
* from it, but until then lock ordering forbids that move.
*/
anon_vma_unlink(vma);
unlink_file_vma(vma);
might_sleep();
if (vma->vm_ops && vma->vm_ops->close) if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma); vma->vm_ops->close(vma);
if (file) if (vma->vm_file)
fput(file); fput(vma->vm_file);
anon_vma_unlink(vma);
mpol_free(vma_policy(vma)); mpol_free(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma); kmem_cache_free(vm_area_cachep, vma);
return next;
} }
asmlinkage unsigned long sys_brk(unsigned long brk) asmlinkage unsigned long sys_brk(unsigned long brk)
...@@ -1612,15 +1630,13 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr) ...@@ -1612,15 +1630,13 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
{ {
do { do {
struct vm_area_struct *next = vma->vm_next;
long nrpages = vma_pages(vma); long nrpages = vma_pages(vma);
mm->total_vm -= nrpages; mm->total_vm -= nrpages;
if (vma->vm_flags & VM_LOCKED) if (vma->vm_flags & VM_LOCKED)
mm->locked_vm -= nrpages; mm->locked_vm -= nrpages;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages); vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
remove_vm_struct(vma); vma = remove_vma(vma);
vma = next;
} while (vma); } while (vma);
validate_mm(mm); validate_mm(mm);
} }
...@@ -1944,11 +1960,8 @@ void exit_mmap(struct mm_struct *mm) ...@@ -1944,11 +1960,8 @@ void exit_mmap(struct mm_struct *mm)
* Walk the list again, actually closing and freeing it * Walk the list again, actually closing and freeing it
* without holding any MM locks. * without holding any MM locks.
*/ */
while (vma) { while (vma)
struct vm_area_struct *next = vma->vm_next; vma = remove_vma(vma);
remove_vm_struct(vma);
vma = next;
}
BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT); BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册