提交 2e441889 编写于 作者: H Hugh Dickins 提交者: Linus Torvalds

swapoff: scan ptes preemptibly

Provided that CONFIG_HIGHPTE is not set, unuse_pte_range can reduce latency
in swapoff by scanning the page table preemptibly: so long as unuse_pte is
careful to recheck that entry under pte lock.

(To tell the truth, this patch was not inspired by any cries for lower
latency here: rather, this restructuring permits a future memory controller
patch to allocate with GFP_KERNEL in unuse_pte, where before it could not.
But it would be wrong to tuck this change away inside a memcgroup patch.)
Signed-off-by: NHugh Dickins <hugh@veritas.com>
Acked-by: NBalbir Singh <balbir@linux.vnet.ibm.com>
Tested-by: NBalbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 8952898b
...@@ -506,9 +506,19 @@ unsigned int count_swap_pages(int type, int free) ...@@ -506,9 +506,19 @@ unsigned int count_swap_pages(int type, int free)
* just let do_wp_page work it out if a write is requested later - to * just let do_wp_page work it out if a write is requested later - to
* force COW, vm_page_prot omits write permission from any private vma. * force COW, vm_page_prot omits write permission from any private vma.
*/ */
static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, swp_entry_t entry, struct page *page) unsigned long addr, swp_entry_t entry, struct page *page)
{ {
spinlock_t *ptl;
pte_t *pte;
int found = 1;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) {
found = 0;
goto out;
}
inc_mm_counter(vma->vm_mm, anon_rss); inc_mm_counter(vma->vm_mm, anon_rss);
get_page(page); get_page(page);
set_pte_at(vma->vm_mm, addr, pte, set_pte_at(vma->vm_mm, addr, pte,
...@@ -520,6 +530,9 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, ...@@ -520,6 +530,9 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
* immediately swapped out again after swapon. * immediately swapped out again after swapon.
*/ */
activate_page(page); activate_page(page);
out:
pte_unmap_unlock(pte, ptl);
return found;
} }
static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
...@@ -528,22 +541,33 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -528,22 +541,33 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
{ {
pte_t swp_pte = swp_entry_to_pte(entry); pte_t swp_pte = swp_entry_to_pte(entry);
pte_t *pte; pte_t *pte;
spinlock_t *ptl;
int found = 0; int found = 0;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); /*
* We don't actually need pte lock while scanning for swp_pte: since
* we hold page lock and mmap_sem, swp_pte cannot be inserted into the
* page table while we're scanning; though it could get zapped, and on
* some architectures (e.g. x86_32 with PAE) we might catch a glimpse
* of unmatched parts which look like swp_pte, so unuse_pte must
* recheck under pte lock. Scanning without pte lock lets it be
* preemptible whenever CONFIG_PREEMPT but not CONFIG_HIGHPTE.
*/
pte = pte_offset_map(pmd, addr);
do { do {
/* /*
* swapoff spends a _lot_ of time in this loop! * swapoff spends a _lot_ of time in this loop!
* Test inline before going to call unuse_pte. * Test inline before going to call unuse_pte.
*/ */
if (unlikely(pte_same(*pte, swp_pte))) { if (unlikely(pte_same(*pte, swp_pte))) {
unuse_pte(vma, pte++, addr, entry, page); pte_unmap(pte);
found = 1; found = unuse_pte(vma, pmd, addr, entry, page);
break; if (found)
goto out;
pte = pte_offset_map(pmd, addr);
} }
} while (pte++, addr += PAGE_SIZE, addr != end); } while (pte++, addr += PAGE_SIZE, addr != end);
pte_unmap_unlock(pte - 1, ptl); pte_unmap(pte - 1);
out:
return found; return found;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册