diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h index 197ced1eaaa0932d59d214a469e25f53b79fa483..2d9df40446f62dfabdaa68aab4b285b0010f684e 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-4k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h @@ -101,8 +101,6 @@ extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h index 8d40cf03cb67d10a691bc2d0b92071dbccc0c8e8..cb46d1034f335e16039d4ed47a4b8cd2d48dc43f 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-64k.h +++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h @@ -203,8 +203,6 @@ extern pmd_t hash__pmdp_collapse_flush(struct vm_area_struct *vma, extern void hash__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable); extern pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -extern void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); extern pmd_t hash__pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp); extern int hash__has_transparent_hugepage(void); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index ee19d5bbee06adac1edd3fd41c8bc74cbe68e36b..6ca1208cedcb7c9f65a138bf18b61975342aae98 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1140,15 +1140,6 @@ static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE -static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - if (radix_enabled()) - return radix__pmdp_huge_split_prepare(vma, address, pmdp); - return hash__pmdp_huge_split_prepare(vma, address, pmdp); -} - #define pmd_move_must_withdraw pmd_move_must_withdraw struct spinlock; static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 19c44e1495aefe8b1170385f87e0877dedb92341..365010f665708495243cce6a607f5286c24716f9 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -269,12 +269,6 @@ static inline pmd_t radix__pmd_mkhuge(pmd_t pmd) return __pmd(pmd_val(pmd) | _PAGE_PTE | R_PAGE_LARGE); return __pmd(pmd_val(pmd) | _PAGE_PTE); } -static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - /* Nothing to do for radix. */ - return; -} extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long clr, diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index ec277913e01b4e06b1034531818b96c253004863..469808e77e58b271e716d54241a016cb711f0836 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -296,28 +296,6 @@ pgtable_t hash__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } -void hash__pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(REGION_ID(address) != USER_REGION_ID); - VM_BUG_ON(pmd_devmap(*pmdp)); - - /* - * We can't mark the pmd none here, because that will cause a race - * against exit_mmap. We need to continue mark pmd TRANS HUGE, while - * we spilt, but at the same time we wan't rest of the ppc64 code - * not to insert hash pte on this, because we will be modifying - * the deposited pgtable in the caller of this function. Hence - * clear the _PAGE_USER so that we move the fault handling to - * higher level function and that will serialize against ptl. - * We need to flush existing hash pte entries here even though, - * the translation is still valid, because we will withdraw - * pgtable_t after this. - */ - pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED); -} - /* * A linux hugepage PMD was changed and the corresponding hash table entries * neesd to be flushed. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 51eebd7546b296d988b0dc367f075e384dd63d92..2cfa3075d148b60196c2d63ede83ea4e182ab1fe 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -329,14 +329,6 @@ extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); #endif -#ifndef __HAVE_ARCH_PMDP_HUGE_SPLIT_PREPARE -static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - -} -#endif - #ifndef __HAVE_ARCH_PTE_SAME static inline int pte_same(pte_t pte_a, pte_t pte_b) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2a79a6b7d19b6df1d8eed199c74e3d20df54be94..87ab9b8f56b53dae6875e19533b5e1e171d2533c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2063,7 +2063,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, struct mm_struct *mm = vma->vm_mm; struct page *page; pgtable_t pgtable; - pmd_t old, _pmd; + pmd_t old_pmd, _pmd; bool young, write, soft_dirty, pmd_migration = false; unsigned long addr; int i; @@ -2106,23 +2106,50 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return __split_huge_zero_page_pmd(vma, haddr, pmd); } + /* + * Up to this point the pmd is present and huge and userland has the + * whole access to the hugepage during the split (which happens in + * place). If we overwrite the pmd with the not-huge version pointing + * to the pte here (which of course we could if all CPUs were bug + * free), userland could trigger a small page size TLB miss on the + * small sized TLB while the hugepage TLB entry is still established in + * the huge TLB. Some CPU doesn't like that. + * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum + * 383 on page 93. Intel should be safe but is also warns that it's + * only safe if the permission and cache attributes of the two entries + * loaded in the two TLB is identical (which should be the case here). + * But it is generally safer to never allow small and huge TLB entries + * for the same virtual address to be loaded simultaneously. So instead + * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the + * current pmd notpresent (atomically because here the pmd_trans_huge + * must remain set at all times on the pmd until the split is complete + * for this pmd), then we flush the SMP TLB and finally we write the + * non-huge version of the pmd entry with pmd_populate. + */ + old_pmd = pmdp_invalidate(vma, haddr, pmd); + #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION - pmd_migration = is_pmd_migration_entry(*pmd); + pmd_migration = is_pmd_migration_entry(old_pmd); if (pmd_migration) { swp_entry_t entry; - entry = pmd_to_swp_entry(*pmd); + entry = pmd_to_swp_entry(old_pmd); page = pfn_to_page(swp_offset(entry)); } else #endif - page = pmd_page(*pmd); + page = pmd_page(old_pmd); VM_BUG_ON_PAGE(!page_count(page), page); page_ref_add(page, HPAGE_PMD_NR - 1); - write = pmd_write(*pmd); - young = pmd_young(*pmd); - soft_dirty = pmd_soft_dirty(*pmd); + if (pmd_dirty(old_pmd)) + SetPageDirty(page); + write = pmd_write(old_pmd); + young = pmd_young(old_pmd); + soft_dirty = pmd_soft_dirty(old_pmd); - pmdp_huge_split_prepare(vma, haddr, pmd); + /* + * Withdraw the table only after we mark the pmd entry invalid. + * This's critical for some architectures (Power). + */ pgtable = pgtable_trans_huge_withdraw(mm, pmd); pmd_populate(mm, &_pmd, pgtable); @@ -2176,35 +2203,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, } smp_wmb(); /* make pte visible before pmd */ - /* - * Up to this point the pmd is present and huge and userland has the - * whole access to the hugepage during the split (which happens in - * place). If we overwrite the pmd with the not-huge version pointing - * to the pte here (which of course we could if all CPUs were bug - * free), userland could trigger a small page size TLB miss on the - * small sized TLB while the hugepage TLB entry is still established in - * the huge TLB. Some CPU doesn't like that. - * See http://support.amd.com/us/Processor_TechDocs/41322.pdf, Erratum - * 383 on page 93. Intel should be safe but is also warns that it's - * only safe if the permission and cache attributes of the two entries - * loaded in the two TLB is identical (which should be the case here). - * But it is generally safer to never allow small and huge TLB entries - * for the same virtual address to be loaded simultaneously. So instead - * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the - * current pmd notpresent (atomically because here the pmd_trans_huge - * must remain set at all times on the pmd until the split is complete - * for this pmd), then we flush the SMP TLB and finally we write the - * non-huge version of the pmd entry with pmd_populate. - */ - old = pmdp_invalidate(vma, haddr, pmd); - - /* - * Transfer dirty bit using value returned by pmd_invalidate() to be - * sure we don't race with CPU that can set the bit under us. - */ - if (pmd_dirty(old)) - SetPageDirty(page); - pmd_populate(mm, pmd, pgtable); if (freeze) {