提交 a13ea5b7 编写于 作者: H Hugh Dickins 提交者: Linus Torvalds

mm: reinstate ZERO_PAGE

KAMEZAWA Hiroyuki has observed customers of earlier kernels taking
advantage of the ZERO_PAGE: which we stopped do_anonymous_page() from
using in 2.6.24.  And there were a couple of regression reports on LKML.

Following suggestions from Linus, reinstate do_anonymous_page() use of
the ZERO_PAGE; but this time avoid dirtying its struct page cacheline
with (map)count updates - let vm_normal_page() regard it as abnormal.

Use it only on arches which __HAVE_ARCH_PTE_SPECIAL (x86, s390, sh32,
most powerpc): that's not essential, but minimizes additional branches
(keeping them in the unlikely pte_special case); and incidentally
excludes mips (some models of which needed eight colours of ZERO_PAGE
to avoid costly exceptions).

Don't be fanatical about avoiding ZERO_PAGE updates: get_user_pages()
callers won't want to make exceptions for it, so increment its count
there.  Changes to mlock and migration? happily seems not needed.

In most places it's quicker to check pfn than struct page address:
prepare a __read_mostly zero_pfn for that.  Does get_dump_page()
still need its ZERO_PAGE check? probably not, but keep it anyway.
Signed-off-by: NHugh Dickins <hugh.dickins@tiscali.co.uk>
Acked-by: NRik van Riel <riel@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Minchan Kim <minchan.kim@gmail.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 1ac0cb5d
...@@ -108,6 +108,17 @@ static int __init disable_randmaps(char *s) ...@@ -108,6 +108,17 @@ static int __init disable_randmaps(char *s)
} }
__setup("norandmaps", disable_randmaps); __setup("norandmaps", disable_randmaps);
static unsigned long zero_pfn __read_mostly;
/*
* CONFIG_MMU architectures set up ZERO_PAGE in their paging_init()
*/
static int __init init_zero_pfn(void)
{
zero_pfn = page_to_pfn(ZERO_PAGE(0));
return 0;
}
core_initcall(init_zero_pfn);
/* /*
* If a p?d_bad entry is found while walking page tables, report * If a p?d_bad entry is found while walking page tables, report
...@@ -499,7 +510,9 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, ...@@ -499,7 +510,9 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
if (HAVE_PTE_SPECIAL) { if (HAVE_PTE_SPECIAL) {
if (likely(!pte_special(pte))) if (likely(!pte_special(pte)))
goto check_pfn; goto check_pfn;
if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))) if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
if (pfn != zero_pfn)
print_bad_pte(vma, addr, pte, NULL); print_bad_pte(vma, addr, pte, NULL);
return NULL; return NULL;
} }
...@@ -1144,9 +1157,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, ...@@ -1144,9 +1157,14 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
goto no_page; goto no_page;
if ((flags & FOLL_WRITE) && !pte_write(pte)) if ((flags & FOLL_WRITE) && !pte_write(pte))
goto unlock; goto unlock;
page = vm_normal_page(vma, address, pte); page = vm_normal_page(vma, address, pte);
if (unlikely(!page)) if (unlikely(!page)) {
goto bad_page; if ((flags & FOLL_DUMP) ||
pte_pfn(pte) != zero_pfn)
goto bad_page;
page = pte_page(pte);
}
if (flags & FOLL_GET) if (flags & FOLL_GET)
get_page(page); get_page(page);
...@@ -2084,10 +2102,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2084,10 +2102,19 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(anon_vma_prepare(vma))) if (unlikely(anon_vma_prepare(vma)))
goto oom; goto oom;
VM_BUG_ON(old_page == ZERO_PAGE(0));
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (pte_pfn(orig_pte) == zero_pfn) {
if (!new_page) new_page = alloc_zeroed_user_highpage_movable(vma, address);
goto oom; if (!new_page)
goto oom;
} else {
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
if (!new_page)
goto oom;
cow_user_page(new_page, old_page, address, vma);
}
__SetPageUptodate(new_page);
/* /*
* Don't let another task, with possibly unlocked vma, * Don't let another task, with possibly unlocked vma,
* keep the mlocked page. * keep the mlocked page.
...@@ -2097,8 +2124,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2097,8 +2124,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
clear_page_mlock(old_page); clear_page_mlock(old_page);
unlock_page(old_page); unlock_page(old_page);
} }
cow_user_page(new_page, old_page, address, vma);
__SetPageUptodate(new_page);
if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
goto oom_free_new; goto oom_free_new;
...@@ -2639,6 +2664,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2639,6 +2664,15 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
spinlock_t *ptl; spinlock_t *ptl;
pte_t entry; pte_t entry;
if (HAVE_PTE_SPECIAL && !(flags & FAULT_FLAG_WRITE)) {
entry = pte_mkspecial(pfn_pte(zero_pfn, vma->vm_page_prot));
ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
if (!pte_none(*page_table))
goto unlock;
goto setpte;
}
/* Allocate our own private page. */ /* Allocate our own private page. */
pte_unmap(page_table); pte_unmap(page_table);
...@@ -2662,6 +2696,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2662,6 +2696,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter(mm, anon_rss); inc_mm_counter(mm, anon_rss);
page_add_new_anon_rmap(page, vma, address); page_add_new_anon_rmap(page, vma, address);
setpte:
set_pte_at(mm, address, page_table, entry); set_pte_at(mm, address, page_table, entry);
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册