From e32905e57358fdfb82f9de024534f205b3af7dac Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Sat, 22 May 2021 17:42:11 -0700 Subject: [PATCH] userfaultfd: hugetlbfs: fix new flag usage in error path In commit d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") the use of PagePrivate to indicate a reservation count should be restored at free time was changed to the hugetlb specific flag HPageRestoreReserve. Changes to a userfaultfd error path as well as a VM_BUG_ON() in remove_inode_hugepages() were overlooked. Users could see incorrect hugetlb reserve counts if they experience an error with a UFFDIO_COPY operation. Specifically, this would be the result of an unlikely copy_huge_page_from_user error. There is not an increased chance of hitting the VM_BUG_ON. Link: https://lkml.kernel.org/r/20210521233952.236434-1-mike.kravetz@oracle.com Fixes: d6995da31122 ("hugetlb: use page.private for hugetlb specific page flags") Signed-off-by: Mike Kravetz Reviewed-by: Mina Almasry Cc: Oscar Salvador Cc: Michal Hocko Cc: Muchun Song Cc: Naoya Horiguchi Cc: David Hildenbrand Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mina Almasry Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- mm/userfaultfd.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 9d9e0097c1d3..55efd3dd04f6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -529,7 +529,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, * the subpool and global reserve usage count can need * to be adjusted. */ - VM_BUG_ON(PagePrivate(page)); + VM_BUG_ON(HPageRestoreReserve(page)); remove_huge_page(page); freed++; if (!truncate_op) { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index e14b3820c6a8..63a73e164d55 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -360,38 +360,38 @@ static __always_inline ssize_t __mcopy_atomic_hugetlb(struct mm_struct *dst_mm, * If a reservation for the page existed in the reservation * map of a private mapping, the map was modified to indicate * the reservation was consumed when the page was allocated. - * We clear the PagePrivate flag now so that the global + * We clear the HPageRestoreReserve flag now so that the global * reserve count will not be incremented in free_huge_page. * The reservation map will still indicate the reservation * was consumed and possibly prevent later page allocation. * This is better than leaking a global reservation. If no - * reservation existed, it is still safe to clear PagePrivate - * as no adjustments to reservation counts were made during - * allocation. + * reservation existed, it is still safe to clear + * HPageRestoreReserve as no adjustments to reservation counts + * were made during allocation. * * The reservation map for shared mappings indicates which * pages have reservations. When a huge page is allocated * for an address with a reservation, no change is made to - * the reserve map. In this case PagePrivate will be set - * to indicate that the global reservation count should be + * the reserve map. In this case HPageRestoreReserve will be + * set to indicate that the global reservation count should be * incremented when the page is freed. This is the desired * behavior. However, when a huge page is allocated for an * address without a reservation a reservation entry is added - * to the reservation map, and PagePrivate will not be set. - * When the page is freed, the global reserve count will NOT - * be incremented and it will appear as though we have leaked - * reserved page. In this case, set PagePrivate so that the - * global reserve count will be incremented to match the - * reservation map entry which was created. + * to the reservation map, and HPageRestoreReserve will not be + * set. When the page is freed, the global reserve count will + * NOT be incremented and it will appear as though we have + * leaked reserved page. In this case, set HPageRestoreReserve + * so that the global reserve count will be incremented to + * match the reservation map entry which was created. * * Note that vm_alloc_shared is based on the flags of the vma * for which the page was originally allocated. dst_vma could * be different or NULL on error. */ if (vm_alloc_shared) - SetPagePrivate(page); + SetHPageRestoreReserve(page); else - ClearPagePrivate(page); + ClearHPageRestoreReserve(page); put_page(page); } BUG_ON(copied < 0); -- GitLab