提交 cab350af 编写于 作者: M Mike Kravetz 提交者: Linus Torvalds

userfaultfd: hugetlbfs: allow registration of ranges containing huge pages

Expand the userfaultfd_register/unregister routines to allow VM_HUGETLB
vmas.  huge page alignment checking is performed after a VM_HUGETLB vma
is encountered.

Also, since there is no UFFDIO_ZEROPAGE support for huge pages do not
return that as a valid ioctl method for huge page ranges.

Link: http://lkml.kernel.org/r/20161216144821.5183-22-aarcange@redhat.comSigned-off-by: NMike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: NAndrea Arcangeli <aarcange@redhat.com>
Cc: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Michael Rapoport <RAPOPORT@il.ibm.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 1a1aad8a
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/mempolicy.h> #include <linux/mempolicy.h>
#include <linux/ioctl.h> #include <linux/ioctl.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/hugetlb.h>
static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly; static struct kmem_cache *userfaultfd_ctx_cachep __read_mostly;
...@@ -1058,6 +1059,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1058,6 +1059,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
struct uffdio_register __user *user_uffdio_register; struct uffdio_register __user *user_uffdio_register;
unsigned long vm_flags, new_flags; unsigned long vm_flags, new_flags;
bool found; bool found;
bool huge_pages;
unsigned long start, end, vma_end; unsigned long start, end, vma_end;
user_uffdio_register = (struct uffdio_register __user *) arg; user_uffdio_register = (struct uffdio_register __user *) arg;
...@@ -1108,6 +1110,17 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1108,6 +1110,17 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
if (vma->vm_start >= end) if (vma->vm_start >= end)
goto out_unlock; goto out_unlock;
/*
* If the first vma contains huge pages, make sure start address
* is aligned to huge page size.
*/
if (is_vm_hugetlb_page(vma)) {
unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
if (start & (vma_hpagesize - 1))
goto out_unlock;
}
/* /*
* Search for not compatible vmas. * Search for not compatible vmas.
* *
...@@ -1116,6 +1129,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1116,6 +1129,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* on anonymous vmas). * on anonymous vmas).
*/ */
found = false; found = false;
huge_pages = false;
for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) { for (cur = vma; cur && cur->vm_start < end; cur = cur->vm_next) {
cond_resched(); cond_resched();
...@@ -1124,8 +1138,21 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1124,8 +1138,21 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
/* check not compatible vmas */ /* check not compatible vmas */
ret = -EINVAL; ret = -EINVAL;
if (!vma_is_anonymous(cur)) if (!vma_is_anonymous(cur) && !is_vm_hugetlb_page(cur))
goto out_unlock;
/*
* If this vma contains ending address, and huge pages
* check alignment.
*/
if (is_vm_hugetlb_page(cur) && end <= cur->vm_end &&
end > cur->vm_start) {
unsigned long vma_hpagesize = vma_kernel_pagesize(cur);
ret = -EINVAL;
if (end & (vma_hpagesize - 1))
goto out_unlock; goto out_unlock;
}
/* /*
* Check that this vma isn't already owned by a * Check that this vma isn't already owned by a
...@@ -1138,6 +1165,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1138,6 +1165,12 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
cur->vm_userfaultfd_ctx.ctx != ctx) cur->vm_userfaultfd_ctx.ctx != ctx)
goto out_unlock; goto out_unlock;
/*
* Note vmas containing huge pages
*/
if (is_vm_hugetlb_page(cur))
huge_pages = true;
found = true; found = true;
} }
BUG_ON(!found); BUG_ON(!found);
...@@ -1149,7 +1182,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1149,7 +1182,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
do { do {
cond_resched(); cond_resched();
BUG_ON(!vma_is_anonymous(vma)); BUG_ON(!vma_is_anonymous(vma) && !is_vm_hugetlb_page(vma));
BUG_ON(vma->vm_userfaultfd_ctx.ctx && BUG_ON(vma->vm_userfaultfd_ctx.ctx &&
vma->vm_userfaultfd_ctx.ctx != ctx); vma->vm_userfaultfd_ctx.ctx != ctx);
...@@ -1207,7 +1240,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1207,7 +1240,8 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
* userland which ioctls methods are guaranteed to * userland which ioctls methods are guaranteed to
* succeed on this range. * succeed on this range.
*/ */
if (put_user(UFFD_API_RANGE_IOCTLS, if (put_user(huge_pages ? UFFD_API_RANGE_IOCTLS_HPAGE :
UFFD_API_RANGE_IOCTLS,
&user_uffdio_register->ioctls)) &user_uffdio_register->ioctls))
ret = -EFAULT; ret = -EFAULT;
} }
...@@ -1253,6 +1287,17 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, ...@@ -1253,6 +1287,17 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
if (vma->vm_start >= end) if (vma->vm_start >= end)
goto out_unlock; goto out_unlock;
/*
* If the first vma contains huge pages, make sure start address
* is aligned to huge page size.
*/
if (is_vm_hugetlb_page(vma)) {
unsigned long vma_hpagesize = vma_kernel_pagesize(vma);
if (start & (vma_hpagesize - 1))
goto out_unlock;
}
/* /*
* Search for not compatible vmas. * Search for not compatible vmas.
* *
...@@ -1275,7 +1320,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, ...@@ -1275,7 +1320,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
* provides for more strict behavior to notice * provides for more strict behavior to notice
* unregistration errors. * unregistration errors.
*/ */
if (!vma_is_anonymous(cur)) if (!vma_is_anonymous(cur) && !is_vm_hugetlb_page(cur))
goto out_unlock; goto out_unlock;
found = true; found = true;
...@@ -1289,7 +1334,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, ...@@ -1289,7 +1334,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
do { do {
cond_resched(); cond_resched();
BUG_ON(!vma_is_anonymous(vma)); BUG_ON(!vma_is_anonymous(vma) && !is_vm_hugetlb_page(vma));
/* /*
* Nothing to do: this vma is already registered into this * Nothing to do: this vma is already registered into this
......
...@@ -29,6 +29,9 @@ ...@@ -29,6 +29,9 @@
((__u64)1 << _UFFDIO_WAKE | \ ((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \ (__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_ZEROPAGE) (__u64)1 << _UFFDIO_ZEROPAGE)
#define UFFD_API_RANGE_IOCTLS_HPAGE \
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY)
/* /*
* Valid ioctl command number range with this API is from 0x00 to * Valid ioctl command number range with this API is from 0x00 to
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部