提交 bde3eb62 编写于 作者: A Aneesh Kumar K.V 提交者: Michael Ellerman

powerpc/mm/radix: Add radix THP callbacks

The deposited pgtable_t is a pte fragment hence we cannot use page->lru
for linking then together. We use the first two 64 bits for pte fragment
as list_head type to link all deposited fragments together. On withdraw
we properly zero then out.
Signed-off-by: NAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
上级 3df33f12
...@@ -71,6 +71,8 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) ...@@ -71,6 +71,8 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
static inline pmd_t pmd_mkhuge(pmd_t pmd) static inline pmd_t pmd_mkhuge(pmd_t pmd)
{ {
if (radix_enabled())
return radix__pmd_mkhuge(pmd);
return hash__pmd_mkhuge(pmd); return hash__pmd_mkhuge(pmd);
} }
......
...@@ -827,6 +827,8 @@ extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, ...@@ -827,6 +827,8 @@ extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
extern int hash__has_transparent_hugepage(void); extern int hash__has_transparent_hugepage(void);
static inline int has_transparent_hugepage(void) static inline int has_transparent_hugepage(void)
{ {
if (radix_enabled())
return radix__has_transparent_hugepage();
return hash__has_transparent_hugepage(); return hash__has_transparent_hugepage();
} }
...@@ -834,6 +836,8 @@ static inline unsigned long ...@@ -834,6 +836,8 @@ static inline unsigned long
pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp,
unsigned long clr, unsigned long set) unsigned long clr, unsigned long set)
{ {
if (radix_enabled())
return radix__pmd_hugepage_update(mm, addr, pmdp, clr, set);
return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set); return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
} }
...@@ -885,12 +889,16 @@ extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, ...@@ -885,12 +889,16 @@ extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp) unsigned long addr, pmd_t *pmdp)
{ {
if (radix_enabled())
return radix__pmdp_huge_get_and_clear(mm, addr, pmdp);
return hash__pmdp_huge_get_and_clear(mm, addr, pmdp); return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
} }
static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp) unsigned long address, pmd_t *pmdp)
{ {
if (radix_enabled())
return radix__pmdp_collapse_flush(vma, address, pmdp);
return hash__pmdp_collapse_flush(vma, address, pmdp); return hash__pmdp_collapse_flush(vma, address, pmdp);
} }
#define pmdp_collapse_flush pmdp_collapse_flush #define pmdp_collapse_flush pmdp_collapse_flush
...@@ -899,6 +907,8 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, ...@@ -899,6 +907,8 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
static inline void pgtable_trans_huge_deposit(struct mm_struct *mm, static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
pmd_t *pmdp, pgtable_t pgtable) pmd_t *pmdp, pgtable_t pgtable)
{ {
if (radix_enabled())
return radix__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable); return hash__pgtable_trans_huge_deposit(mm, pmdp, pgtable);
} }
...@@ -906,6 +916,8 @@ static inline void pgtable_trans_huge_deposit(struct mm_struct *mm, ...@@ -906,6 +916,8 @@ static inline void pgtable_trans_huge_deposit(struct mm_struct *mm,
static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, static inline pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm,
pmd_t *pmdp) pmd_t *pmdp)
{ {
if (radix_enabled())
return radix__pgtable_trans_huge_withdraw(mm, pmdp);
return hash__pgtable_trans_huge_withdraw(mm, pmdp); return hash__pgtable_trans_huge_withdraw(mm, pmdp);
} }
...@@ -917,6 +929,8 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, ...@@ -917,6 +929,8 @@ extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma, static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp) unsigned long address, pmd_t *pmdp)
{ {
if (radix_enabled())
return radix__pmdp_huge_split_prepare(vma, address, pmdp);
return hash__pmdp_huge_split_prepare(vma, address, pmdp); return hash__pmdp_huge_split_prepare(vma, address, pmdp);
} }
...@@ -925,6 +939,8 @@ struct spinlock; ...@@ -925,6 +939,8 @@ struct spinlock;
static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
struct spinlock *old_pmd_ptl) struct spinlock *old_pmd_ptl)
{ {
if (radix_enabled())
return false;
/* /*
* Archs like ppc64 use pgtable to store per pmd * Archs like ppc64 use pgtable to store per pmd
* specific information. So when we switch the pmd, * specific information. So when we switch the pmd,
......
...@@ -196,6 +196,28 @@ static inline int radix__pmd_trans_huge(pmd_t pmd) ...@@ -196,6 +196,28 @@ static inline int radix__pmd_trans_huge(pmd_t pmd)
return !!(pmd_val(pmd) & _PAGE_PTE); return !!(pmd_val(pmd) & _PAGE_PTE);
} }
static inline pmd_t radix__pmd_mkhuge(pmd_t pmd)
{
return __pmd(pmd_val(pmd) | _PAGE_PTE);
}
static inline void radix__pmdp_huge_split_prepare(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp)
{
/* Nothing to do for radix. */
return;
}
extern unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long clr,
unsigned long set);
extern pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
extern void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable);
extern pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
extern pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp);
extern int radix__has_transparent_hugepage(void);
#endif #endif
extern int __meminit radix__vmemmap_create_mapping(unsigned long start, extern int __meminit radix__vmemmap_create_mapping(unsigned long start,
......
...@@ -69,7 +69,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, ...@@ -69,7 +69,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp) pmd_t *pmdp)
{ {
pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0); pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/* /*
* This ensures that generic code that rely on IRQ disabling * This ensures that generic code that rely on IRQ disabling
* to prevent a parallel THP split work as expected. * to prevent a parallel THP split work as expected.
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/firmware.h> #include <asm/firmware.h>
#include <trace/events/thp.h>
static int native_update_partition_table(u64 patb1) static int native_update_partition_table(u64 patb1)
{ {
partition_tb->patb1 = cpu_to_be64(patb1); partition_tb->patb1 = cpu_to_be64(patb1);
...@@ -407,3 +409,118 @@ void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size) ...@@ -407,3 +409,118 @@ void radix__vmemmap_remove_mapping(unsigned long start, unsigned long page_size)
} }
#endif #endif
#endif #endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, unsigned long clr,
unsigned long set)
{
unsigned long old;
#ifdef CONFIG_DEBUG_VM
WARN_ON(!radix__pmd_trans_huge(*pmdp));
assert_spin_locked(&mm->page_table_lock);
#endif
old = radix__pte_update(mm, addr, (pte_t *)pmdp, clr, set, 1);
trace_hugepage_update(addr, old, clr, set);
return old;
}
pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
pmd_t pmd;
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
VM_BUG_ON(radix__pmd_trans_huge(*pmdp));
/*
* khugepaged calls this for normal pmd
*/
pmd = *pmdp;
pmd_clear(pmdp);
/*FIXME!! Verify whether we need this kick below */
kick_all_cpus_sync();
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
return pmd;
}
/*
* For us pgtable_t is pte_t *. Inorder to save the deposisted
* page table, we consider the allocated page table as a list
* head. On withdraw we need to make sure we zero out the used
* list_head memory area.
*/
void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{
struct list_head *lh = (struct list_head *) pgtable;
assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */
if (!pmd_huge_pte(mm, pmdp))
INIT_LIST_HEAD(lh);
else
list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
pmd_huge_pte(mm, pmdp) = pgtable;
}
pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
pte_t *ptep;
pgtable_t pgtable;
struct list_head *lh;
assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */
pgtable = pmd_huge_pte(mm, pmdp);
lh = (struct list_head *) pgtable;
if (list_empty(lh))
pmd_huge_pte(mm, pmdp) = NULL;
else {
pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
list_del(lh);
}
ptep = (pte_t *) pgtable;
*ptep = __pte(0);
ptep++;
*ptep = __pte(0);
return pgtable;
}
pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
pmd_t old_pmd;
unsigned long old;
old = radix__pmd_hugepage_update(mm, addr, pmdp, ~0UL, 0);
old_pmd = __pmd(old);
/*
* Serialize against find_linux_pte_or_hugepte which does lock-less
* lookup in page tables with local interrupts disabled. For huge pages
* it casts pmd_t to pte_t. Since format of pte_t is different from
* pmd_t we want to prevent transit from pmd pointing to page table
* to pmd pointing to huge page (and back) while interrupts are disabled.
* We clear pmd to possibly replace it with page table pointer in
* different code paths. So make sure we wait for the parallel
* find_linux_pte_or_hugepage to finish.
*/
kick_all_cpus_sync();
return old_pmd;
}
int radix__has_transparent_hugepage(void)
{
/* For radix 2M at PMD level means thp */
if (mmu_psize_defs[MMU_PAGE_2M].shift == PMD_SHIFT)
return 1;
return 0;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册