diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b11351579e7ae7cbb416b4da177b7ab91bd42229..b54559058d30683ade94518497ae5ac71d7ed34f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2373,6 +2373,34 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) return !(vma->vm_flags & VM_NO_THP); } +/* + * If mmap_sem temporarily dropped, revalidate vma + * before taking mmap_sem. + * Return 0 if succeeds, otherwise return none-zero + * value (scan code). + */ + +static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address) +{ + struct vm_area_struct *vma; + unsigned long hstart, hend; + + if (unlikely(khugepaged_test_exit(mm))) + return SCAN_ANY_PROCESS; + + vma = find_vma(mm, address); + if (!vma) + return SCAN_VMA_NULL; + + hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; + hend = vma->vm_end & HPAGE_PMD_MASK; + if (address < hstart || address + HPAGE_PMD_SIZE > hend) + return SCAN_ADDRESS_RANGE; + if (!hugepage_vma_check(vma)) + return SCAN_VMA_CHECK; + return 0; +} + /* * Bring missing pages in from swap, to complete THP collapse. * Only done if khugepaged_scan_pmd believes it is worthwhile. @@ -2381,7 +2409,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma) * but with mmap_sem held to protect against vma changes. */ -static void __collapse_huge_page_swapin(struct mm_struct *mm, +static bool __collapse_huge_page_swapin(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pmd_t *pmd) { @@ -2397,11 +2425,18 @@ static void __collapse_huge_page_swapin(struct mm_struct *mm, continue; swapped_in++; ret = do_swap_page(mm, vma, _address, pte, pmd, - FAULT_FLAG_ALLOW_RETRY|FAULT_FLAG_RETRY_NOWAIT, + FAULT_FLAG_ALLOW_RETRY, pteval); + /* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */ + if (ret & VM_FAULT_RETRY) { + down_read(&mm->mmap_sem); + /* vma is no longer available, don't continue to swapin */ + if (hugepage_vma_revalidate(mm, address)) + return false; + } if (ret & VM_FAULT_ERROR) { trace_mm_collapse_huge_page_swapin(mm, swapped_in, 0); - return; + return false; } /* pte is unmapped now, we need to map it */ pte = pte_offset_map(pmd, _address); @@ -2409,6 +2444,7 @@ static void __collapse_huge_page_swapin(struct mm_struct *mm, pte--; pte_unmap(pte); trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1); + return true; } static void collapse_huge_page(struct mm_struct *mm, @@ -2423,7 +2459,6 @@ static void collapse_huge_page(struct mm_struct *mm, struct page *new_page; spinlock_t *pmd_ptl, *pte_ptl; int isolated = 0, result = 0; - unsigned long hstart, hend; struct mem_cgroup *memcg; unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */ @@ -2446,39 +2481,37 @@ static void collapse_huge_page(struct mm_struct *mm, goto out_nolock; } - /* - * Prevent all access to pagetables with the exception of - * gup_fast later hanlded by the ptep_clear_flush and the VM - * handled by the anon_vma lock + PG_lock. - */ - down_write(&mm->mmap_sem); - if (unlikely(khugepaged_test_exit(mm))) { - result = SCAN_ANY_PROCESS; + down_read(&mm->mmap_sem); + result = hugepage_vma_revalidate(mm, address); + if (result) goto out; - } - vma = find_vma(mm, address); - if (!vma) { - result = SCAN_VMA_NULL; - goto out; - } - hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; - hend = vma->vm_end & HPAGE_PMD_MASK; - if (address < hstart || address + HPAGE_PMD_SIZE > hend) { - result = SCAN_ADDRESS_RANGE; - goto out; - } - if (!hugepage_vma_check(vma)) { - result = SCAN_VMA_CHECK; - goto out; - } pmd = mm_find_pmd(mm, address); if (!pmd) { result = SCAN_PMD_NULL; goto out; } - __collapse_huge_page_swapin(mm, vma, address, pmd); + /* + * __collapse_huge_page_swapin always returns with mmap_sem locked. + * If it fails, release mmap_sem and jump directly out. + * Continuing to collapse causes inconsistency. + */ + if (!__collapse_huge_page_swapin(mm, vma, address, pmd)) { + up_read(&mm->mmap_sem); + goto out; + } + + up_read(&mm->mmap_sem); + /* + * Prevent all access to pagetables with the exception of + * gup_fast later handled by the ptep_clear_flush and the VM + * handled by the anon_vma lock + PG_lock. + */ + down_write(&mm->mmap_sem); + result = hugepage_vma_revalidate(mm, address); + if (result) + goto out; anon_vma_lock_write(vma->anon_vma);