提交 c0718806 编写于 作者: H Hugh Dickins 提交者: Linus Torvalds

[PATCH] mm: rmap with inner ptlock

rmap's page_check_address descend without page_table_lock.  First just
pte_offset_map in case there's no pte present worth locking for, then take
page_table_lock for the full check, and pass ptl back to caller in the same
style as pte_offset_map_lock.  __xip_unmap, page_referenced_one and
try_to_unmap_one use pte_unmap_unlock.  try_to_unmap_cluster also.

page_check_address reformatted to avoid progressive indentation.  No use is
made of its one error code, return NULL when it fails.
Signed-off-by: NHugh Dickins <hugh@veritas.com>
Signed-off-by: NAndrew Morton <akpm@osdl.org>
Signed-off-by: NLinus Torvalds <torvalds@osdl.org>
上级 67b02f11
...@@ -95,8 +95,8 @@ int try_to_unmap(struct page *); ...@@ -95,8 +95,8 @@ int try_to_unmap(struct page *);
/* /*
* Called from mm/filemap_xip.c to unmap empty zero page * Called from mm/filemap_xip.c to unmap empty zero page
*/ */
pte_t *page_check_address(struct page *, struct mm_struct *, unsigned long); pte_t *page_check_address(struct page *, struct mm_struct *,
unsigned long, spinlock_t **);
/* /*
* Used by swapoff to help locate where page is expected in vma. * Used by swapoff to help locate where page is expected in vma.
......
...@@ -174,6 +174,7 @@ __xip_unmap (struct address_space * mapping, ...@@ -174,6 +174,7 @@ __xip_unmap (struct address_space * mapping,
unsigned long address; unsigned long address;
pte_t *pte; pte_t *pte;
pte_t pteval; pte_t pteval;
spinlock_t *ptl;
struct page *page; struct page *page;
spin_lock(&mapping->i_mmap_lock); spin_lock(&mapping->i_mmap_lock);
...@@ -183,20 +184,15 @@ __xip_unmap (struct address_space * mapping, ...@@ -183,20 +184,15 @@ __xip_unmap (struct address_space * mapping,
((pgoff - vma->vm_pgoff) << PAGE_SHIFT); ((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
BUG_ON(address < vma->vm_start || address >= vma->vm_end); BUG_ON(address < vma->vm_start || address >= vma->vm_end);
page = ZERO_PAGE(address); page = ZERO_PAGE(address);
/* pte = page_check_address(page, mm, address, &ptl);
* We need the page_table_lock to protect us from page faults, if (pte) {
* munmap, fork, etc...
*/
pte = page_check_address(page, mm, address);
if (!IS_ERR(pte)) {
/* Nuke the page table entry. */ /* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte)); flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte); pteval = ptep_clear_flush(vma, address, pte);
page_remove_rmap(page); page_remove_rmap(page);
dec_mm_counter(mm, file_rss); dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval)); BUG_ON(pte_dirty(pteval));
pte_unmap(pte); pte_unmap_unlock(pte, ptl);
spin_unlock(&mm->page_table_lock);
page_cache_release(page); page_cache_release(page);
} }
} }
......
...@@ -247,34 +247,41 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) ...@@ -247,34 +247,41 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
* On success returns with mapped pte and locked mm->page_table_lock. * On success returns with mapped pte and locked mm->page_table_lock.
*/ */
pte_t *page_check_address(struct page *page, struct mm_struct *mm, pte_t *page_check_address(struct page *page, struct mm_struct *mm,
unsigned long address) unsigned long address, spinlock_t **ptlp)
{ {
pgd_t *pgd; pgd_t *pgd;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
spinlock_t *ptl;
/*
* We need the page_table_lock to protect us from page faults,
* munmap, fork, etc...
*/
spin_lock(&mm->page_table_lock);
pgd = pgd_offset(mm, address); pgd = pgd_offset(mm, address);
if (likely(pgd_present(*pgd))) { if (!pgd_present(*pgd))
pud = pud_offset(pgd, address); return NULL;
if (likely(pud_present(*pud))) {
pmd = pmd_offset(pud, address); pud = pud_offset(pgd, address);
if (likely(pmd_present(*pmd))) { if (!pud_present(*pud))
pte = pte_offset_map(pmd, address); return NULL;
if (likely(pte_present(*pte) &&
page_to_pfn(page) == pte_pfn(*pte))) pmd = pmd_offset(pud, address);
return pte; if (!pmd_present(*pmd))
pte_unmap(pte); return NULL;
}
} pte = pte_offset_map(pmd, address);
/* Make a quick check before getting the lock */
if (!pte_present(*pte)) {
pte_unmap(pte);
return NULL;
} }
spin_unlock(&mm->page_table_lock);
return ERR_PTR(-ENOENT); ptl = &mm->page_table_lock;
spin_lock(ptl);
if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
*ptlp = ptl;
return pte;
}
pte_unmap_unlock(pte, ptl);
return NULL;
} }
/* /*
...@@ -287,28 +294,28 @@ static int page_referenced_one(struct page *page, ...@@ -287,28 +294,28 @@ static int page_referenced_one(struct page *page,
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long address; unsigned long address;
pte_t *pte; pte_t *pte;
spinlock_t *ptl;
int referenced = 0; int referenced = 0;
address = vma_address(page, vma); address = vma_address(page, vma);
if (address == -EFAULT) if (address == -EFAULT)
goto out; goto out;
pte = page_check_address(page, mm, address); pte = page_check_address(page, mm, address, &ptl);
if (!IS_ERR(pte)) { if (!pte)
if (ptep_clear_flush_young(vma, address, pte)) goto out;
referenced++;
/* Pretend the page is referenced if the task has the if (ptep_clear_flush_young(vma, address, pte))
swap token and is in the middle of a page fault. */ referenced++;
if (mm != current->mm && !ignore_token &&
has_swap_token(mm) &&
rwsem_is_locked(&mm->mmap_sem))
referenced++;
(*mapcount)--; /* Pretend the page is referenced if the task has the
pte_unmap(pte); swap token and is in the middle of a page fault. */
spin_unlock(&mm->page_table_lock); if (mm != current->mm && !ignore_token && has_swap_token(mm) &&
} rwsem_is_locked(&mm->mmap_sem))
referenced++;
(*mapcount)--;
pte_unmap_unlock(pte, ptl);
out: out:
return referenced; return referenced;
} }
...@@ -507,14 +514,15 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) ...@@ -507,14 +514,15 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
unsigned long address; unsigned long address;
pte_t *pte; pte_t *pte;
pte_t pteval; pte_t pteval;
spinlock_t *ptl;
int ret = SWAP_AGAIN; int ret = SWAP_AGAIN;
address = vma_address(page, vma); address = vma_address(page, vma);
if (address == -EFAULT) if (address == -EFAULT)
goto out; goto out;
pte = page_check_address(page, mm, address); pte = page_check_address(page, mm, address, &ptl);
if (IS_ERR(pte)) if (!pte)
goto out; goto out;
/* /*
...@@ -564,8 +572,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma) ...@@ -564,8 +572,7 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma)
page_cache_release(page); page_cache_release(page);
out_unmap: out_unmap:
pte_unmap(pte); pte_unmap_unlock(pte, ptl);
spin_unlock(&mm->page_table_lock);
out: out:
return ret; return ret;
} }
...@@ -599,19 +606,14 @@ static void try_to_unmap_cluster(unsigned long cursor, ...@@ -599,19 +606,14 @@ static void try_to_unmap_cluster(unsigned long cursor,
pgd_t *pgd; pgd_t *pgd;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte, *original_pte; pte_t *pte;
pte_t pteval; pte_t pteval;
spinlock_t *ptl;
struct page *page; struct page *page;
unsigned long address; unsigned long address;
unsigned long end; unsigned long end;
unsigned long pfn; unsigned long pfn;
/*
* We need the page_table_lock to protect us from page faults,
* munmap, fork, etc...
*/
spin_lock(&mm->page_table_lock);
address = (vma->vm_start + cursor) & CLUSTER_MASK; address = (vma->vm_start + cursor) & CLUSTER_MASK;
end = address + CLUSTER_SIZE; end = address + CLUSTER_SIZE;
if (address < vma->vm_start) if (address < vma->vm_start)
...@@ -621,22 +623,22 @@ static void try_to_unmap_cluster(unsigned long cursor, ...@@ -621,22 +623,22 @@ static void try_to_unmap_cluster(unsigned long cursor,
pgd = pgd_offset(mm, address); pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd)) if (!pgd_present(*pgd))
goto out_unlock; return;
pud = pud_offset(pgd, address); pud = pud_offset(pgd, address);
if (!pud_present(*pud)) if (!pud_present(*pud))
goto out_unlock; return;
pmd = pmd_offset(pud, address); pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd)) if (!pmd_present(*pmd))
goto out_unlock; return;
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
/* Update high watermark before we lower rss */ /* Update high watermark before we lower rss */
update_hiwater_rss(mm); update_hiwater_rss(mm);
for (original_pte = pte = pte_offset_map(pmd, address); for (; address < end; pte++, address += PAGE_SIZE) {
address < end; pte++, address += PAGE_SIZE) {
if (!pte_present(*pte)) if (!pte_present(*pte))
continue; continue;
...@@ -669,10 +671,7 @@ static void try_to_unmap_cluster(unsigned long cursor, ...@@ -669,10 +671,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
dec_mm_counter(mm, file_rss); dec_mm_counter(mm, file_rss);
(*mapcount)--; (*mapcount)--;
} }
pte_unmap_unlock(pte - 1, ptl);
pte_unmap(original_pte);
out_unlock:
spin_unlock(&mm->page_table_lock);
} }
static int try_to_unmap_anon(struct page *page) static int try_to_unmap_anon(struct page *page)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册