提交 29ad768c 编写于 作者: A Andrea Arcangeli 提交者: Linus Torvalds

thp: KSM on THP

This makes KSM full operational with THP pages.  Subpages are scanned
while the hugepage is still in place and delivering max cpu performance,
and only if there's a match and we're going to deduplicate memory, the
single hugepages with the subpage match is split.

There will be no false sharing between ksmd and khugepaged.  khugepaged
won't collapse 2m virtual regions with KSM pages inside.  ksmd also should
only split pages when the checksum matches and we're likely to split an
hugepage for some long living ksm page (usual ksm heuristic to avoid
sharing pages that get de-cowed).
Signed-off-by: NAndrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 60ab3244
...@@ -412,6 +412,29 @@ static void break_cow(struct rmap_item *rmap_item) ...@@ -412,6 +412,29 @@ static void break_cow(struct rmap_item *rmap_item)
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
} }
static struct page *page_trans_compound_anon(struct page *page)
{
if (PageTransCompound(page)) {
struct page *head;
head = compound_head(page);
/*
* head may be a dangling pointer.
* __split_huge_page_refcount clears PageTail
* before overwriting first_page, so if
* PageTail is still there it means the head
* pointer isn't dangling.
*/
if (head != page) {
smp_rmb();
if (!PageTransCompound(page))
return NULL;
}
if (PageAnon(head))
return head;
}
return NULL;
}
static struct page *get_mergeable_page(struct rmap_item *rmap_item) static struct page *get_mergeable_page(struct rmap_item *rmap_item)
{ {
struct mm_struct *mm = rmap_item->mm; struct mm_struct *mm = rmap_item->mm;
...@@ -431,7 +454,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) ...@@ -431,7 +454,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
page = follow_page(vma, addr, FOLL_GET); page = follow_page(vma, addr, FOLL_GET);
if (IS_ERR_OR_NULL(page)) if (IS_ERR_OR_NULL(page))
goto out; goto out;
if (PageAnon(page) && !PageTransCompound(page)) { if (PageAnon(page) || page_trans_compound_anon(page)) {
flush_anon_page(vma, page, addr); flush_anon_page(vma, page, addr);
flush_dcache_page(page); flush_dcache_page(page);
} else { } else {
...@@ -709,6 +732,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, ...@@ -709,6 +732,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
if (addr == -EFAULT) if (addr == -EFAULT)
goto out; goto out;
BUG_ON(PageTransCompound(page));
ptep = page_check_address(page, mm, addr, &ptl, 0); ptep = page_check_address(page, mm, addr, &ptl, 0);
if (!ptep) if (!ptep)
goto out; goto out;
...@@ -784,6 +808,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, ...@@ -784,6 +808,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
goto out; goto out;
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
BUG_ON(pmd_trans_huge(*pmd));
if (!pmd_present(*pmd)) if (!pmd_present(*pmd))
goto out; goto out;
...@@ -811,6 +836,33 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, ...@@ -811,6 +836,33 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
return err; return err;
} }
static int page_trans_compound_anon_split(struct page *page)
{
int ret = 0;
struct page *transhuge_head = page_trans_compound_anon(page);
if (transhuge_head) {
/* Get the reference on the head to split it. */
if (get_page_unless_zero(transhuge_head)) {
/*
* Recheck we got the reference while the head
* was still anonymous.
*/
if (PageAnon(transhuge_head))
ret = split_huge_page(transhuge_head);
else
/*
* Retry later if split_huge_page run
* from under us.
*/
ret = 1;
put_page(transhuge_head);
} else
/* Retry later if split_huge_page run from under us. */
ret = 1;
}
return ret;
}
/* /*
* try_to_merge_one_page - take two pages and merge them into one * try_to_merge_one_page - take two pages and merge them into one
* @vma: the vma that holds the pte pointing to page * @vma: the vma that holds the pte pointing to page
...@@ -831,6 +883,9 @@ static int try_to_merge_one_page(struct vm_area_struct *vma, ...@@ -831,6 +883,9 @@ static int try_to_merge_one_page(struct vm_area_struct *vma,
if (!(vma->vm_flags & VM_MERGEABLE)) if (!(vma->vm_flags & VM_MERGEABLE))
goto out; goto out;
if (PageTransCompound(page) && page_trans_compound_anon_split(page))
goto out;
BUG_ON(PageTransCompound(page));
if (!PageAnon(page)) if (!PageAnon(page))
goto out; goto out;
...@@ -1285,14 +1340,8 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page) ...@@ -1285,14 +1340,8 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
cond_resched(); cond_resched();
continue; continue;
} }
if (PageTransCompound(*page)) { if (PageAnon(*page) ||
put_page(*page); page_trans_compound_anon(*page)) {
ksm_scan.address &= HPAGE_PMD_MASK;
ksm_scan.address += HPAGE_PMD_SIZE;
cond_resched();
continue;
}
if (PageAnon(*page)) {
flush_anon_page(vma, *page, ksm_scan.address); flush_anon_page(vma, *page, ksm_scan.address);
flush_dcache_page(*page); flush_dcache_page(*page);
rmap_item = get_next_rmap_item(slot, rmap_item = get_next_rmap_item(slot,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册