提交 6a1ea362 编写于 作者: A Aneesh Kumar K.V 提交者: Michael Ellerman

powerpc/mm: THP is only available on hash64 as of now

Only code movement in this patch. No functionality change.
Signed-off-by: NAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
上级 c0a6c719
master alk-4.19.24 alk-4.19.30 alk-4.19.34 alk-4.19.36 alk-4.19.43 alk-4.19.48 alk-4.19.57 ck-4.19.67 ck-4.19.81 ck-4.19.91 github/fork/deepanshu1422/fix-typo-in-comment github/fork/haosdent/fix-typo linux-next v4.19.91 v4.19.90 v4.19.89 v4.19.88 v4.19.87 v4.19.86 v4.19.85 v4.19.84 v4.19.83 v4.19.82 v4.19.81 v4.19.80 v4.19.79 v4.19.78 v4.19.77 v4.19.76 v4.19.75 v4.19.74 v4.19.73 v4.19.72 v4.19.71 v4.19.70 v4.19.69 v4.19.68 v4.19.67 v4.19.66 v4.19.65 v4.19.64 v4.19.63 v4.19.62 v4.19.61 v4.19.60 v4.19.59 v4.19.58 v4.19.57 v4.19.56 v4.19.55 v4.19.54 v4.19.53 v4.19.52 v4.19.51 v4.19.50 v4.19.49 v4.19.48 v4.19.47 v4.19.46 v4.19.45 v4.19.44 v4.19.43 v4.19.42 v4.19.41 v4.19.40 v4.19.39 v4.19.38 v4.19.37 v4.19.36 v4.19.35 v4.19.34 v4.19.33 v4.19.32 v4.19.31 v4.19.30 v4.19.29 v4.19.28 v4.19.27 v4.19.26 v4.19.25 v4.19.24 v4.19.23 v4.19.22 v4.19.21 v4.19.20 v4.19.19 v4.19.18 v4.19.17 v4.19.16 v4.19.15 v4.19.14 v4.19.13 v4.19.12 v4.19.11 v4.19.10 v4.19.9 v4.19.8 v4.19.7 v4.19.6 v4.19.5 v4.19.4 v4.19.3 v4.19.2 v4.19.1 v4.19 v4.19-rc8 v4.19-rc7 v4.19-rc6 v4.19-rc5 v4.19-rc4 v4.19-rc3 v4.19-rc2 v4.19-rc1 ck-release-21 ck-release-20 ck-release-19.2 ck-release-19.1 ck-release-19 ck-release-18 ck-release-17.2 ck-release-17.1 ck-release-17 ck-release-16 ck-release-15.1 ck-release-15 ck-release-14 ck-release-13.2 ck-release-13 ck-release-12 ck-release-11 ck-release-10 ck-release-9 ck-release-7 alk-release-15 alk-release-14 alk-release-13.2 alk-release-13 alk-release-12 alk-release-11 alk-release-10 alk-release-9 alk-release-7
无相关合并请求
......@@ -777,18 +777,6 @@ static inline void vmemmap_remove_mapping(unsigned long start,
#endif
struct page *realmode_pfn_to_page(unsigned long pfn);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
extern int has_transparent_hugepage(void);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline pte_t pmd_pte(pmd_t pmd)
{
return __pte(pmd_val(pmd));
......@@ -803,7 +791,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
{
return (pte_t *)pmd;
}
#define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd))
#define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd))
#define pmd_young(pmd) pte_young(pmd_pte(pmd))
......@@ -830,6 +817,16 @@ static inline int pmd_protnone(pmd_t pmd)
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) pte_write(pmd_pte(pmd))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd);
extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
extern int has_transparent_hugepage(void);
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
......@@ -878,5 +875,6 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
*/
return true;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
......@@ -15,6 +15,9 @@
#include "mmu_decl.h"
#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
* On hash-based CPUs, the vmemmap is bolted in the hash table.
......@@ -93,3 +96,358 @@ int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flag
smp_wmb();
return 0;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* This is called when relaxing access to a hugepage. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
* handled those two for us, we additionally deal with missing execute
* permission here on some processors
*/
int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp, pmd_t entry, int dirty)
{
int changed;
#ifdef CONFIG_DEBUG_VM
WARN_ON(!pmd_trans_huge(*pmdp));
assert_spin_locked(&vma->vm_mm->page_table_lock);
#endif
changed = !pmd_same(*(pmdp), entry);
if (changed) {
__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
/*
* Since we are not supporting SW TLB systems, we don't
* have any thing similar to flush_tlb_page_nohash()
*/
}
return changed;
}
unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long clr,
unsigned long set)
{
__be64 old_be, tmp;
unsigned long old;
#ifdef CONFIG_DEBUG_VM
WARN_ON(!pmd_trans_huge(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
__asm__ __volatile__(
"1: ldarx %0,0,%3\n\
and. %1,%0,%6\n\
bne- 1b \n\
andc %1,%0,%4 \n\
or %1,%1,%7\n\
stdcx. %1,0,%3 \n\
bne- 1b"
: "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
: "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
"r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
: "cc" );
old = be64_to_cpu(old_be);
trace_hugepage_update(addr, old, clr, set);
if (old & H_PAGE_HASHPTE)
hpte_do_hugepage_flush(mm, addr, pmdp, old);
return old;
}
pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(pmd_trans_huge(*pmdp));
pmd = *pmdp;
pmd_clear(pmdp);
/*
* Wait for all pending hash_page to finish. This is needed
* in case of subpage collapse. When we collapse normal pages
* to hugepage, we first clear the pmd, then invalidate all
* the PTE entries. The assumption here is that any low level
* page fault will see a none pmd and take the slow path that
* will wait on mmap_sem. But we could very well be in a
* hash_page with local ptep pointer value. Such a hash page
* can result in adding new HPTE entries for normal subpages.
* That means we could be modifying the page content as we
* copy them to a huge page. So wait for parallel hash_page
* to finish before invalidating HPTE entries. We can do this
* by sending an IPI to all the cpus and executing a dummy
* function there.
*/
kick_all_cpus_sync();
/*
* Now invalidate the hpte entries in the range
* covered by pmd. This make sure we take a
* fault and will find the pmd as none, which will
* result in a major fault which takes mmap_sem and
* hence wait for collapse to complete. Without this
* the __collapse_huge_page_copy can result in copying
* the old content.
*/
flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
return pmd;
}
/*
* We currently remove entries from the hashtable regardless of whether
* the entry was young or dirty.
*
* We should be more intelligent about this but for the moment we override
* these functions and force a tlb flush unconditionally
*/
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
}
/*
* We want to put the pgtable in pmd and use pgtable for tracking
* the base page size hptes
*/
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{
pgtable_t *pgtable_slot;
assert_spin_locked(&mm->page_table_lock);
/*
* we store the pgtable in the second half of PMD
*/
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
*pgtable_slot = pgtable;
/*
* expose the deposited pgtable to other cpus.
* before we set the hugepage PTE at pmd level
* hash fault code looks at the deposted pgtable
* to store hash index values.
*/
smp_wmb();
}
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
pgtable_t pgtable;
pgtable_t *pgtable_slot;
assert_spin_locked(&mm->page_table_lock);
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
pgtable = *pgtable_slot;
/*
* Once we withdraw, mark the entry NULL.
*/
*pgtable_slot = NULL;
/*
* We store HPTE information in the deposited PTE fragment.
* zero out the content on withdraw.
*/
memset(pgtable, 0, PTE_FRAG_SIZE);
return pgtable;
}
void pmdp_huge_split_prepare(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
/*
* We can't mark the pmd none here, because that will cause a race
* against exit_mmap. We need to continue mark pmd TRANS HUGE, while
* we spilt, but at the same time we wan't rest of the ppc64 code
* not to insert hash pte on this, because we will be modifying
* the deposited pgtable in the caller of this function. Hence
* clear the _PAGE_USER so that we move the fault handling to
* higher level function and that will serialize against ptl.
* We need to flush existing hash pte entries here even though,
* the translation is still valid, because we will withdraw
* pgtable_t after this.
*/
pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
}
/*
* set a new huge pmd. We should not be called for updating
* an existing pmd entry. That should go via pmd_hugepage_update.
*/
void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
#ifdef CONFIG_DEBUG_VM
WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
assert_spin_locked(&mm->page_table_lock);
WARN_ON(!pmd_trans_huge(pmd));
#endif
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
/*
* We use this to invalidate a pmdp entry before switching from a
* hugepte to regular pmd entry.
*/
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
/*
* This ensures that generic code that rely on IRQ disabling
* to prevent a parallel THP split work as expected.
*/
kick_all_cpus_sync();
}
/*
* A linux hugepage PMD was changed and the corresponding hash table entries
* neesd to be flushed.
*/
void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long old_pmd)
{
int ssize;
unsigned int psize;
unsigned long vsid;
unsigned long flags = 0;
const struct cpumask *tmp;
/* get the base page size,vsid and segment size */
#ifdef CONFIG_DEBUG_VM
psize = get_slice_psize(mm, addr);
BUG_ON(psize == MMU_PAGE_16M);
#endif
if (old_pmd & H_PAGE_COMBO)
psize = MMU_PAGE_4K;
else
psize = MMU_PAGE_64K;
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
vsid = get_vsid(mm->context.id, addr, ssize);
WARN_ON(vsid == 0);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;
}
tmp = cpumask_of(smp_processor_id());
if (cpumask_equal(mm_cpumask(mm), tmp))
flags |= HPTE_LOCAL_UPDATE;
return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
}
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
{
return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
}
pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
{
unsigned long pmdv;
pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
return pmd_set_protbits(__pmd(pmdv), pgprot);
}
pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
{
return pfn_pmd(page_to_pfn(page), pgprot);
}
pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
unsigned long pmdv;
pmdv = pmd_val(pmd);
pmdv &= _HPAGE_CHG_MASK;
return pmd_set_protbits(__pmd(pmdv), newprot);
}
/*
* This is called at the end of handling a user page fault, when the
* fault has been handled by updating a HUGE PMD entry in the linux page tables.
* We use it to preload an HPTE into the hash table corresponding to
* the updated linux HUGE PMD entry.
*/
void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd)
{
return;
}
pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old_pmd;
pgtable_t pgtable;
unsigned long old;
pgtable_t *pgtable_slot;
old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
old_pmd = __pmd(old);
/*
* We have pmd == none and we are holding page_table_lock.
* So we can safely go and clear the pgtable hash
* index info.
*/
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
pgtable = *pgtable_slot;
/*
* Let's zero out old valid and hash index details
* hash fault look at them.
*/
memset(pgtable, 0, PTE_FRAG_SIZE);
/*
* Serialize against find_linux_pte_or_hugepte which does lock-less
* lookup in page tables with local interrupts disabled. For huge pages
* it casts pmd_t to pte_t. Since format of pte_t is different from
* pmd_t we want to prevent transit from pmd pointing to page table
* to pmd pointing to huge page (and back) while interrupts are disabled.
* We clear pmd to possibly replace it with page table pointer in
* different code paths. So make sure we wait for the parallel
* find_linux_pte_or_hugepage to finish.
*/
kick_all_cpus_sync();
return old_pmd;
}
int has_transparent_hugepage(void)
{
if (!mmu_has_feature(MMU_FTR_16M_PAGE))
return 0;
/*
* We support THP only if PMD_SIZE is 16MB.
*/
if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
return 0;
/*
* We need to make sure that we support 16MB hugepage in a segement
* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
* of 64K.
*/
/*
* If we have 64K HPTE, we will be using that by default
*/
if (mmu_psize_defs[MMU_PAGE_64K].shift &&
(mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
return 0;
/*
* Ok we only have 4K HPTE
*/
if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
return 0;
return 1;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
......@@ -55,9 +55,6 @@
#include "mmu_decl.h"
#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
#ifdef CONFIG_PPC_STD_MMU_64
#if TASK_SIZE_USER64 > (1UL << (ESID_BITS + SID_SHIFT))
#error TASK_SIZE_USER64 exceeds user VSID range
......@@ -435,359 +432,3 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
}
}
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* This is called when relaxing access to a hugepage. It's also called in the page
* fault path when we don't hit any of the major fault cases, ie, a minor
* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
* handled those two for us, we additionally deal with missing execute
* permission here on some processors
*/
int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp, pmd_t entry, int dirty)
{
int changed;
#ifdef CONFIG_DEBUG_VM
WARN_ON(!pmd_trans_huge(*pmdp));
assert_spin_locked(&vma->vm_mm->page_table_lock);
#endif
changed = !pmd_same(*(pmdp), entry);
if (changed) {
__ptep_set_access_flags(pmdp_ptep(pmdp), pmd_pte(entry));
/*
* Since we are not supporting SW TLB systems, we don't
* have any thing similar to flush_tlb_page_nohash()
*/
}
return changed;
}
unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long clr,
unsigned long set)
{
__be64 old_be, tmp;
unsigned long old;
#ifdef CONFIG_DEBUG_VM
WARN_ON(!pmd_trans_huge(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
__asm__ __volatile__(
"1: ldarx %0,0,%3\n\
and. %1,%0,%6\n\
bne- 1b \n\
andc %1,%0,%4 \n\
or %1,%1,%7\n\
stdcx. %1,0,%3 \n\
bne- 1b"
: "=&r" (old_be), "=&r" (tmp), "=m" (*pmdp)
: "r" (pmdp), "r" (cpu_to_be64(clr)), "m" (*pmdp),
"r" (cpu_to_be64(H_PAGE_BUSY)), "r" (cpu_to_be64(set))
: "cc" );
old = be64_to_cpu(old_be);
trace_hugepage_update(addr, old, clr, set);
if (old & H_PAGE_HASHPTE)
hpte_do_hugepage_flush(mm, addr, pmdp, old);
return old;
}
pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(pmd_trans_huge(*pmdp));
pmd = *pmdp;
pmd_clear(pmdp);
/*
* Wait for all pending hash_page to finish. This is needed
* in case of subpage collapse. When we collapse normal pages
* to hugepage, we first clear the pmd, then invalidate all
* the PTE entries. The assumption here is that any low level
* page fault will see a none pmd and take the slow path that
* will wait on mmap_sem. But we could very well be in a
* hash_page with local ptep pointer value. Such a hash page
* can result in adding new HPTE entries for normal subpages.
* That means we could be modifying the page content as we
* copy them to a huge page. So wait for parallel hash_page
* to finish before invalidating HPTE entries. We can do this
* by sending an IPI to all the cpus and executing a dummy
* function there.
*/
kick_all_cpus_sync();
/*
* Now invalidate the hpte entries in the range
* covered by pmd. This make sure we take a
* fault and will find the pmd as none, which will
* result in a major fault which takes mmap_sem and
* hence wait for collapse to complete. Without this
* the __collapse_huge_page_copy can result in copying
* the old content.
*/
flush_tlb_pmd_range(vma->vm_mm, &pmd, address);
return pmd;
}
/*
* We currently remove entries from the hashtable regardless of whether
* the entry was young or dirty.
*
* We should be more intelligent about this but for the moment we override
* these functions and force a tlb flush unconditionally
*/
int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
return __pmdp_test_and_clear_young(vma->vm_mm, address, pmdp);
}
/*
* We want to put the pgtable in pmd and use pgtable for tracking
* the base page size hptes
*/
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{
pgtable_t *pgtable_slot;
assert_spin_locked(&mm->page_table_lock);
/*
* we store the pgtable in the second half of PMD
*/
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
*pgtable_slot = pgtable;
/*
* expose the deposited pgtable to other cpus.
* before we set the hugepage PTE at pmd level
* hash fault code looks at the deposted pgtable
* to store hash index values.
*/
smp_wmb();
}
pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
pgtable_t pgtable;
pgtable_t *pgtable_slot;
assert_spin_locked(&mm->page_table_lock);
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
pgtable = *pgtable_slot;
/*
* Once we withdraw, mark the entry NULL.
*/
*pgtable_slot = NULL;
/*
* We store HPTE information in the deposited PTE fragment.
* zero out the content on withdraw.
*/
memset(pgtable, 0, PTE_FRAG_SIZE);
return pgtable;
}
void pmdp_huge_split_prepare(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(REGION_ID(address) != USER_REGION_ID);
/*
* We can't mark the pmd none here, because that will cause a race
* against exit_mmap. We need to continue mark pmd TRANS HUGE, while
* we spilt, but at the same time we wan't rest of the ppc64 code
* not to insert hash pte on this, because we will be modifying
* the deposited pgtable in the caller of this function. Hence
* clear the _PAGE_USER so that we move the fault handling to
* higher level function and that will serialize against ptl.
* We need to flush existing hash pte entries here even though,
* the translation is still valid, because we will withdraw
* pgtable_t after this.
*/
pmd_hugepage_update(vma->vm_mm, address, pmdp, 0, _PAGE_PRIVILEGED);
}
/*
* set a new huge pmd. We should not be called for updating
* an existing pmd entry. That should go via pmd_hugepage_update.
*/
void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
#ifdef CONFIG_DEBUG_VM
WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
assert_spin_locked(&mm->page_table_lock);
WARN_ON(!pmd_trans_huge(pmd));
#endif
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
/*
* We use this to invalidate a pmdp entry before switching from a
* hugepte to regular pmd entry.
*/
void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
/*
* This ensures that generic code that rely on IRQ disabling
* to prevent a parallel THP split work as expected.
*/
kick_all_cpus_sync();
}
/*
* A linux hugepage PMD was changed and the corresponding hash table entries
* neesd to be flushed.
*/
void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long old_pmd)
{
int ssize;
unsigned int psize;
unsigned long vsid;
unsigned long flags = 0;
const struct cpumask *tmp;
/* get the base page size,vsid and segment size */
#ifdef CONFIG_DEBUG_VM
psize = get_slice_psize(mm, addr);
BUG_ON(psize == MMU_PAGE_16M);
#endif
if (old_pmd & H_PAGE_COMBO)
psize = MMU_PAGE_4K;
else
psize = MMU_PAGE_64K;
if (!is_kernel_addr(addr)) {
ssize = user_segment_size(addr);
vsid = get_vsid(mm->context.id, addr, ssize);
WARN_ON(vsid == 0);
} else {
vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
ssize = mmu_kernel_ssize;
}
tmp = cpumask_of(smp_processor_id());
if (cpumask_equal(mm_cpumask(mm), tmp))
flags |= HPTE_LOCAL_UPDATE;
return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags);
}
static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
{
return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
}
pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
{
unsigned long pmdv;
pmdv = (pfn << PAGE_SHIFT) & PTE_RPN_MASK;
return pmd_set_protbits(__pmd(pmdv), pgprot);
}
pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
{
return pfn_pmd(page_to_pfn(page), pgprot);
}
pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
unsigned long pmdv;
pmdv = pmd_val(pmd);
pmdv &= _HPAGE_CHG_MASK;
return pmd_set_protbits(__pmd(pmdv), newprot);
}
/*
* This is called at the end of handling a user page fault, when the
* fault has been handled by updating a HUGE PMD entry in the linux page tables.
* We use it to preload an HPTE into the hash table corresponding to
* the updated linux HUGE PMD entry.
*/
void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd)
{
return;
}
pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old_pmd;
pgtable_t pgtable;
unsigned long old;
pgtable_t *pgtable_slot;
old = pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
old_pmd = __pmd(old);
/*
* We have pmd == none and we are holding page_table_lock.
* So we can safely go and clear the pgtable hash
* index info.
*/
pgtable_slot = (pgtable_t *)pmdp + PTRS_PER_PMD;
pgtable = *pgtable_slot;
/*
* Let's zero out old valid and hash index details
* hash fault look at them.
*/
memset(pgtable, 0, PTE_FRAG_SIZE);
/*
* Serialize against find_linux_pte_or_hugepte which does lock-less
* lookup in page tables with local interrupts disabled. For huge pages
* it casts pmd_t to pte_t. Since format of pte_t is different from
* pmd_t we want to prevent transit from pmd pointing to page table
* to pmd pointing to huge page (and back) while interrupts are disabled.
* We clear pmd to possibly replace it with page table pointer in
* different code paths. So make sure we wait for the parallel
* find_linux_pte_or_hugepage to finish.
*/
kick_all_cpus_sync();
return old_pmd;
}
int has_transparent_hugepage(void)
{
if (!mmu_has_feature(MMU_FTR_16M_PAGE))
return 0;
/*
* We support THP only if PMD_SIZE is 16MB.
*/
if (mmu_psize_defs[MMU_PAGE_16M].shift != PMD_SHIFT)
return 0;
/*
* We need to make sure that we support 16MB hugepage in a segement
* with base page size 64K or 4K. We only enable THP with a PAGE_SIZE
* of 64K.
*/
/*
* If we have 64K HPTE, we will be using that by default
*/
if (mmu_psize_defs[MMU_PAGE_64K].shift &&
(mmu_psize_defs[MMU_PAGE_64K].penc[MMU_PAGE_16M] == -1))
return 0;
/*
* Ok we only have 4K HPTE
*/
if (mmu_psize_defs[MMU_PAGE_4K].penc[MMU_PAGE_16M] == -1)
return 0;
return 1;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
反馈
建议
客服 返回
顶部