提交 5aaabe83 编写于 作者: N Naoya Horiguchi 提交者: Linus Torvalds

pagemap: avoid splitting thp when reading /proc/pid/pagemap

Thp split is not necessary if we explicitly check whether pmds are mapping
thps or not.  This patch introduces this check and adds code to generate
pagemap entries for pmds mapping thps, which results in less performance
impact of pagemap on thp.
Signed-off-by: NNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: NAndi Kleen <ak@linux.intel.com>
Reviewed-by: NKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 b716ad95
...@@ -608,6 +608,9 @@ struct pagemapread { ...@@ -608,6 +608,9 @@ struct pagemapread {
u64 *buffer; u64 *buffer;
}; };
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
#define PAGEMAP_WALK_MASK (PMD_MASK)
#define PM_ENTRY_BYTES sizeof(u64) #define PM_ENTRY_BYTES sizeof(u64)
#define PM_STATUS_BITS 3 #define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
...@@ -666,6 +669,27 @@ static u64 pte_to_pagemap_entry(pte_t pte) ...@@ -666,6 +669,27 @@ static u64 pte_to_pagemap_entry(pte_t pte)
return pme; return pme;
} }
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
{
u64 pme = 0;
/*
* Currently pmd for thp is always present because thp can not be
* swapped-out, migrated, or HWPOISONed (split in such cases instead.)
* This if-check is just to prepare for future implementation.
*/
if (pmd_present(pmd))
pme = PM_PFRAME(pmd_pfn(pmd) + offset)
| PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
return pme;
}
#else
static inline u64 thp_pmd_to_pagemap_entry(pmd_t pmd, int offset)
{
return 0;
}
#endif
static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct mm_walk *walk) struct mm_walk *walk)
{ {
...@@ -673,15 +697,37 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -673,15 +697,37 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct pagemapread *pm = walk->private; struct pagemapread *pm = walk->private;
pte_t *pte; pte_t *pte;
int err = 0; int err = 0;
u64 pfn = PM_NOT_PRESENT;
split_huge_page_pmd(walk->mm, pmd);
if (pmd_trans_unstable(pmd)) if (pmd_trans_unstable(pmd))
return 0; return 0;
/* find the first VMA at or above 'addr' */ /* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr); vma = find_vma(walk->mm, addr);
spin_lock(&walk->mm->page_table_lock);
if (pmd_trans_huge(*pmd)) {
if (pmd_trans_splitting(*pmd)) {
spin_unlock(&walk->mm->page_table_lock);
wait_split_huge_page(vma->anon_vma, pmd);
} else {
for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset;
offset = (addr & ~PAGEMAP_WALK_MASK) >>
PAGE_SHIFT;
pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
err = add_to_pagemap(addr, pfn, pm);
if (err)
break;
}
spin_unlock(&walk->mm->page_table_lock);
return err;
}
} else {
spin_unlock(&walk->mm->page_table_lock);
}
for (; addr != end; addr += PAGE_SIZE) { for (; addr != end; addr += PAGE_SIZE) {
u64 pfn = PM_NOT_PRESENT;
/* check to see if we've left 'vma' behind /* check to see if we've left 'vma' behind
* and need a new, higher one */ * and need a new, higher one */
...@@ -764,8 +810,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, ...@@ -764,8 +810,6 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
* determine which areas of memory are actually mapped and llseek to * determine which areas of memory are actually mapped and llseek to
* skip over unmapped regions. * skip over unmapped regions.
*/ */
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
#define PAGEMAP_WALK_MASK (PMD_MASK)
static ssize_t pagemap_read(struct file *file, char __user *buf, static ssize_t pagemap_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册