diff --git a/mm/madvise.c b/mm/madvise.c index 603c5257ed6efecaadde1e4047ec60ff1f22dfaa..e75096b5a6d3eb4bb8eb4875d5f4bf9fd167c80e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -11,6 +11,24 @@ #include #include +/* + * Any behaviour which results in changes to the vma->vm_flags needs to + * take mmap_sem for writing. Others, which simply traverse vmas, need + * to only take it for reading. + */ +static int madvise_need_mmap_write(int behavior) +{ + switch (behavior) { + case MADV_REMOVE: + case MADV_WILLNEED: + case MADV_DONTNEED: + return 0; + default: + /* be safe, default to 1. list exceptions explicitly */ + return 1; + } +} + /* * We can potentially split a vm area into separate * areas, each area with its own behavior. @@ -183,9 +201,9 @@ static long madvise_remove(struct vm_area_struct *vma, + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ - up_write(¤t->mm->mmap_sem); + up_read(¤t->mm->mmap_sem); error = vmtruncate_range(mapping->host, offset, endoff); - down_write(¤t->mm->mmap_sem); + down_read(¤t->mm->mmap_sem); return error; } @@ -270,7 +288,10 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) int error = -EINVAL; size_t len; - down_write(¤t->mm->mmap_sem); + if (madvise_need_mmap_write(behavior)) + down_write(¤t->mm->mmap_sem); + else + down_read(¤t->mm->mmap_sem); if (start & ~PAGE_MASK) goto out; @@ -332,6 +353,10 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) vma = find_vma(current->mm, start); } out: - up_write(¤t->mm->mmap_sem); + if (madvise_need_mmap_write(behavior)) + up_write(¤t->mm->mmap_sem); + else + up_read(¤t->mm->mmap_sem); + return error; }