提交 c4088ebd 编写于 作者: K Kirill A. Shutemov 提交者: Linus Torvalds

mm: convert the rest to new page table lock api

Only trivial cases left. Let's convert them altogether.
Signed-off-by: NNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: NKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: NAlex Thorlton <athorlton@sgi.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: "Paul E . McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Dave Jones <davej@redhat.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Hugh Dickins <hughd@google.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 cb900f41
...@@ -710,6 +710,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, ...@@ -710,6 +710,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
struct page *page) struct page *page)
{ {
pgtable_t pgtable; pgtable_t pgtable;
spinlock_t *ptl;
VM_BUG_ON(!PageCompound(page)); VM_BUG_ON(!PageCompound(page));
pgtable = pte_alloc_one(mm, haddr); pgtable = pte_alloc_one(mm, haddr);
...@@ -724,9 +725,9 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, ...@@ -724,9 +725,9 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
*/ */
__SetPageUptodate(page); __SetPageUptodate(page);
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_none(*pmd))) { if (unlikely(!pmd_none(*pmd))) {
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mem_cgroup_uncharge_page(page); mem_cgroup_uncharge_page(page);
put_page(page); put_page(page);
pte_free(mm, pgtable); pte_free(mm, pgtable);
...@@ -739,7 +740,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, ...@@ -739,7 +740,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
set_pmd_at(mm, haddr, pmd, entry); set_pmd_at(mm, haddr, pmd, entry);
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
atomic_long_inc(&mm->nr_ptes); atomic_long_inc(&mm->nr_ptes);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
} }
return 0; return 0;
...@@ -759,6 +760,7 @@ static inline struct page *alloc_hugepage_vma(int defrag, ...@@ -759,6 +760,7 @@ static inline struct page *alloc_hugepage_vma(int defrag,
HPAGE_PMD_ORDER, vma, haddr, nd); HPAGE_PMD_ORDER, vma, haddr, nd);
} }
/* Caller must hold page table lock. */
static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
struct page *zero_page) struct page *zero_page)
...@@ -790,6 +792,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -790,6 +792,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM; return VM_FAULT_OOM;
if (!(flags & FAULT_FLAG_WRITE) && if (!(flags & FAULT_FLAG_WRITE) &&
transparent_hugepage_use_zero_page()) { transparent_hugepage_use_zero_page()) {
spinlock_t *ptl;
pgtable_t pgtable; pgtable_t pgtable;
struct page *zero_page; struct page *zero_page;
bool set; bool set;
...@@ -802,10 +805,10 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -802,10 +805,10 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(THP_FAULT_FALLBACK); count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
} }
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
zero_page); zero_page);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
if (!set) { if (!set) {
pte_free(mm, pgtable); pte_free(mm, pgtable);
put_huge_zero_page(); put_huge_zero_page();
...@@ -838,6 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -838,6 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
spinlock_t *dst_ptl, *src_ptl;
struct page *src_page; struct page *src_page;
pmd_t pmd; pmd_t pmd;
pgtable_t pgtable; pgtable_t pgtable;
...@@ -848,8 +852,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -848,8 +852,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
if (unlikely(!pgtable)) if (unlikely(!pgtable))
goto out; goto out;
spin_lock(&dst_mm->page_table_lock); dst_ptl = pmd_lock(dst_mm, dst_pmd);
spin_lock_nested(&src_mm->page_table_lock, SINGLE_DEPTH_NESTING); src_ptl = pmd_lockptr(src_mm, src_pmd);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
ret = -EAGAIN; ret = -EAGAIN;
pmd = *src_pmd; pmd = *src_pmd;
...@@ -858,7 +863,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -858,7 +863,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
goto out_unlock; goto out_unlock;
} }
/* /*
* mm->page_table_lock is enough to be sure that huge zero pmd is not * When page table lock is held, the huge zero pmd should not be
* under splitting since we don't split the page itself, only pmd to * under splitting since we don't split the page itself, only pmd to
* a page table. * a page table.
*/ */
...@@ -879,8 +884,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -879,8 +884,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
} }
if (unlikely(pmd_trans_splitting(pmd))) { if (unlikely(pmd_trans_splitting(pmd))) {
/* split huge page running from under us */ /* split huge page running from under us */
spin_unlock(&src_mm->page_table_lock); spin_unlock(src_ptl);
spin_unlock(&dst_mm->page_table_lock); spin_unlock(dst_ptl);
pte_free(dst_mm, pgtable); pte_free(dst_mm, pgtable);
wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */ wait_split_huge_page(vma->anon_vma, src_pmd); /* src_vma */
...@@ -900,8 +905,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -900,8 +905,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
ret = 0; ret = 0;
out_unlock: out_unlock:
spin_unlock(&src_mm->page_table_lock); spin_unlock(src_ptl);
spin_unlock(&dst_mm->page_table_lock); spin_unlock(dst_ptl);
out: out:
return ret; return ret;
} }
...@@ -912,10 +917,11 @@ void huge_pmd_set_accessed(struct mm_struct *mm, ...@@ -912,10 +917,11 @@ void huge_pmd_set_accessed(struct mm_struct *mm,
pmd_t *pmd, pmd_t orig_pmd, pmd_t *pmd, pmd_t orig_pmd,
int dirty) int dirty)
{ {
spinlock_t *ptl;
pmd_t entry; pmd_t entry;
unsigned long haddr; unsigned long haddr;
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_same(*pmd, orig_pmd))) if (unlikely(!pmd_same(*pmd, orig_pmd)))
goto unlock; goto unlock;
...@@ -925,13 +931,14 @@ void huge_pmd_set_accessed(struct mm_struct *mm, ...@@ -925,13 +931,14 @@ void huge_pmd_set_accessed(struct mm_struct *mm,
update_mmu_cache_pmd(vma, address, pmd); update_mmu_cache_pmd(vma, address, pmd);
unlock: unlock:
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
} }
static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address, struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr) pmd_t *pmd, pmd_t orig_pmd, unsigned long haddr)
{ {
spinlock_t *ptl;
pgtable_t pgtable; pgtable_t pgtable;
pmd_t _pmd; pmd_t _pmd;
struct page *page; struct page *page;
...@@ -958,7 +965,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, ...@@ -958,7 +965,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
mmun_end = haddr + HPAGE_PMD_SIZE; mmun_end = haddr + HPAGE_PMD_SIZE;
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_same(*pmd, orig_pmd))) if (unlikely(!pmd_same(*pmd, orig_pmd)))
goto out_free_page; goto out_free_page;
...@@ -985,7 +992,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, ...@@ -985,7 +992,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
} }
smp_wmb(); /* make pte visible before pmd */ smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable); pmd_populate(mm, pmd, pgtable);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
put_huge_zero_page(); put_huge_zero_page();
inc_mm_counter(mm, MM_ANONPAGES); inc_mm_counter(mm, MM_ANONPAGES);
...@@ -995,7 +1002,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, ...@@ -995,7 +1002,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
out: out:
return ret; return ret;
out_free_page: out_free_page:
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
mem_cgroup_uncharge_page(page); mem_cgroup_uncharge_page(page);
put_page(page); put_page(page);
...@@ -1009,6 +1016,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, ...@@ -1009,6 +1016,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
struct page *page, struct page *page,
unsigned long haddr) unsigned long haddr)
{ {
spinlock_t *ptl;
pgtable_t pgtable; pgtable_t pgtable;
pmd_t _pmd; pmd_t _pmd;
int ret = 0, i; int ret = 0, i;
...@@ -1055,7 +1063,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, ...@@ -1055,7 +1063,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
mmun_end = haddr + HPAGE_PMD_SIZE; mmun_end = haddr + HPAGE_PMD_SIZE;
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_same(*pmd, orig_pmd))) if (unlikely(!pmd_same(*pmd, orig_pmd)))
goto out_free_pages; goto out_free_pages;
VM_BUG_ON(!PageHead(page)); VM_BUG_ON(!PageHead(page));
...@@ -1081,7 +1089,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, ...@@ -1081,7 +1089,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
smp_wmb(); /* make pte visible before pmd */ smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable); pmd_populate(mm, pmd, pgtable);
page_remove_rmap(page); page_remove_rmap(page);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
...@@ -1092,7 +1100,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, ...@@ -1092,7 +1100,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
return ret; return ret;
out_free_pages: out_free_pages:
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
mem_cgroup_uncharge_start(); mem_cgroup_uncharge_start();
for (i = 0; i < HPAGE_PMD_NR; i++) { for (i = 0; i < HPAGE_PMD_NR; i++) {
...@@ -1107,17 +1115,19 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, ...@@ -1107,17 +1115,19 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd, pmd_t orig_pmd) unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
{ {
spinlock_t *ptl;
int ret = 0; int ret = 0;
struct page *page = NULL, *new_page; struct page *page = NULL, *new_page;
unsigned long haddr; unsigned long haddr;
unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_start; /* For mmu_notifiers */
unsigned long mmun_end; /* For mmu_notifiers */ unsigned long mmun_end; /* For mmu_notifiers */
ptl = pmd_lockptr(mm, pmd);
VM_BUG_ON(!vma->anon_vma); VM_BUG_ON(!vma->anon_vma);
haddr = address & HPAGE_PMD_MASK; haddr = address & HPAGE_PMD_MASK;
if (is_huge_zero_pmd(orig_pmd)) if (is_huge_zero_pmd(orig_pmd))
goto alloc; goto alloc;
spin_lock(&mm->page_table_lock); spin_lock(ptl);
if (unlikely(!pmd_same(*pmd, orig_pmd))) if (unlikely(!pmd_same(*pmd, orig_pmd)))
goto out_unlock; goto out_unlock;
...@@ -1133,7 +1143,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1133,7 +1143,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out_unlock; goto out_unlock;
} }
get_page(page); get_page(page);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
alloc: alloc:
if (transparent_hugepage_enabled(vma) && if (transparent_hugepage_enabled(vma) &&
!transparent_hugepage_debug_cow()) !transparent_hugepage_debug_cow())
...@@ -1180,11 +1190,11 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1180,11 +1190,11 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
mmun_end = haddr + HPAGE_PMD_SIZE; mmun_end = haddr + HPAGE_PMD_SIZE;
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock); spin_lock(ptl);
if (page) if (page)
put_page(page); put_page(page);
if (unlikely(!pmd_same(*pmd, orig_pmd))) { if (unlikely(!pmd_same(*pmd, orig_pmd))) {
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mem_cgroup_uncharge_page(new_page); mem_cgroup_uncharge_page(new_page);
put_page(new_page); put_page(new_page);
goto out_mn; goto out_mn;
...@@ -1206,13 +1216,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1206,13 +1216,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
} }
ret |= VM_FAULT_WRITE; ret |= VM_FAULT_WRITE;
} }
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
out_mn: out_mn:
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
out: out:
return ret; return ret;
out_unlock: out_unlock:
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
return ret; return ret;
} }
...@@ -1224,7 +1234,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, ...@@ -1224,7 +1234,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct page *page = NULL; struct page *page = NULL;
assert_spin_locked(&mm->page_table_lock); assert_spin_locked(pmd_lockptr(mm, pmd));
if (flags & FOLL_WRITE && !pmd_write(*pmd)) if (flags & FOLL_WRITE && !pmd_write(*pmd))
goto out; goto out;
...@@ -1271,6 +1281,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, ...@@ -1271,6 +1281,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pmd_t pmd, pmd_t *pmdp) unsigned long addr, pmd_t pmd, pmd_t *pmdp)
{ {
spinlock_t *ptl;
struct anon_vma *anon_vma = NULL; struct anon_vma *anon_vma = NULL;
struct page *page; struct page *page;
unsigned long haddr = addr & HPAGE_PMD_MASK; unsigned long haddr = addr & HPAGE_PMD_MASK;
...@@ -1280,7 +1291,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1280,7 +1291,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
bool migrated = false; bool migrated = false;
int flags = 0; int flags = 0;
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmdp);
if (unlikely(!pmd_same(pmd, *pmdp))) if (unlikely(!pmd_same(pmd, *pmdp)))
goto out_unlock; goto out_unlock;
...@@ -1318,7 +1329,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1318,7 +1329,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
* relock and check_same as the page may no longer be mapped. * relock and check_same as the page may no longer be mapped.
* As the fault is being retried, do not account for it. * As the fault is being retried, do not account for it.
*/ */
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
wait_on_page_locked(page); wait_on_page_locked(page);
page_nid = -1; page_nid = -1;
goto out; goto out;
...@@ -1326,13 +1337,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1326,13 +1337,13 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Page is misplaced, serialise migrations and parallel THP splits */ /* Page is misplaced, serialise migrations and parallel THP splits */
get_page(page); get_page(page);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
if (!page_locked) if (!page_locked)
lock_page(page); lock_page(page);
anon_vma = page_lock_anon_vma_read(page); anon_vma = page_lock_anon_vma_read(page);
/* Confirm the PMD did not change while page_table_lock was released */ /* Confirm the PMD did not change while page_table_lock was released */
spin_lock(&mm->page_table_lock); spin_lock(ptl);
if (unlikely(!pmd_same(pmd, *pmdp))) { if (unlikely(!pmd_same(pmd, *pmdp))) {
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
...@@ -1344,7 +1355,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1344,7 +1355,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
* Migrate the THP to the requested node, returns with page unlocked * Migrate the THP to the requested node, returns with page unlocked
* and pmd_numa cleared. * and pmd_numa cleared.
*/ */
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
migrated = migrate_misplaced_transhuge_page(mm, vma, migrated = migrate_misplaced_transhuge_page(mm, vma,
pmdp, pmd, addr, page, target_nid); pmdp, pmd, addr, page, target_nid);
if (migrated) { if (migrated) {
...@@ -1361,7 +1372,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -1361,7 +1372,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
update_mmu_cache_pmd(vma, addr, pmdp); update_mmu_cache_pmd(vma, addr, pmdp);
unlock_page(page); unlock_page(page);
out_unlock: out_unlock:
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
out: out:
if (anon_vma) if (anon_vma)
...@@ -2371,7 +2382,7 @@ static void collapse_huge_page(struct mm_struct *mm, ...@@ -2371,7 +2382,7 @@ static void collapse_huge_page(struct mm_struct *mm,
pte_t *pte; pte_t *pte;
pgtable_t pgtable; pgtable_t pgtable;
struct page *new_page; struct page *new_page;
spinlock_t *ptl; spinlock_t *pmd_ptl, *pte_ptl;
int isolated; int isolated;
unsigned long hstart, hend; unsigned long hstart, hend;
unsigned long mmun_start; /* For mmu_notifiers */ unsigned long mmun_start; /* For mmu_notifiers */
...@@ -2414,12 +2425,12 @@ static void collapse_huge_page(struct mm_struct *mm, ...@@ -2414,12 +2425,12 @@ static void collapse_huge_page(struct mm_struct *mm,
anon_vma_lock_write(vma->anon_vma); anon_vma_lock_write(vma->anon_vma);
pte = pte_offset_map(pmd, address); pte = pte_offset_map(pmd, address);
ptl = pte_lockptr(mm, pmd); pte_ptl = pte_lockptr(mm, pmd);
mmun_start = address; mmun_start = address;
mmun_end = address + HPAGE_PMD_SIZE; mmun_end = address + HPAGE_PMD_SIZE;
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock); /* probably unnecessary */ pmd_ptl = pmd_lock(mm, pmd); /* probably unnecessary */
/* /*
* After this gup_fast can't run anymore. This also removes * After this gup_fast can't run anymore. This also removes
* any huge TLB entry from the CPU so we won't allow * any huge TLB entry from the CPU so we won't allow
...@@ -2427,16 +2438,16 @@ static void collapse_huge_page(struct mm_struct *mm, ...@@ -2427,16 +2438,16 @@ static void collapse_huge_page(struct mm_struct *mm,
* to avoid the risk of CPU bugs in that area. * to avoid the risk of CPU bugs in that area.
*/ */
_pmd = pmdp_clear_flush(vma, address, pmd); _pmd = pmdp_clear_flush(vma, address, pmd);
spin_unlock(&mm->page_table_lock); spin_unlock(pmd_ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
spin_lock(ptl); spin_lock(pte_ptl);
isolated = __collapse_huge_page_isolate(vma, address, pte); isolated = __collapse_huge_page_isolate(vma, address, pte);
spin_unlock(ptl); spin_unlock(pte_ptl);
if (unlikely(!isolated)) { if (unlikely(!isolated)) {
pte_unmap(pte); pte_unmap(pte);
spin_lock(&mm->page_table_lock); spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd)); BUG_ON(!pmd_none(*pmd));
/* /*
* We can only use set_pmd_at when establishing * We can only use set_pmd_at when establishing
...@@ -2444,7 +2455,7 @@ static void collapse_huge_page(struct mm_struct *mm, ...@@ -2444,7 +2455,7 @@ static void collapse_huge_page(struct mm_struct *mm,
* points to regular pagetables. Use pmd_populate for that * points to regular pagetables. Use pmd_populate for that
*/ */
pmd_populate(mm, pmd, pmd_pgtable(_pmd)); pmd_populate(mm, pmd, pmd_pgtable(_pmd));
spin_unlock(&mm->page_table_lock); spin_unlock(pmd_ptl);
anon_vma_unlock_write(vma->anon_vma); anon_vma_unlock_write(vma->anon_vma);
goto out; goto out;
} }
...@@ -2455,7 +2466,7 @@ static void collapse_huge_page(struct mm_struct *mm, ...@@ -2455,7 +2466,7 @@ static void collapse_huge_page(struct mm_struct *mm,
*/ */
anon_vma_unlock_write(vma->anon_vma); anon_vma_unlock_write(vma->anon_vma);
__collapse_huge_page_copy(pte, new_page, vma, address, ptl); __collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl);
pte_unmap(pte); pte_unmap(pte);
__SetPageUptodate(new_page); __SetPageUptodate(new_page);
pgtable = pmd_pgtable(_pmd); pgtable = pmd_pgtable(_pmd);
...@@ -2470,13 +2481,13 @@ static void collapse_huge_page(struct mm_struct *mm, ...@@ -2470,13 +2481,13 @@ static void collapse_huge_page(struct mm_struct *mm,
*/ */
smp_wmb(); smp_wmb();
spin_lock(&mm->page_table_lock); spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd)); BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address); page_add_new_anon_rmap(new_page, vma, address);
pgtable_trans_huge_deposit(mm, pmd, pgtable); pgtable_trans_huge_deposit(mm, pmd, pgtable);
set_pmd_at(mm, address, pmd, _pmd); set_pmd_at(mm, address, pmd, _pmd);
update_mmu_cache_pmd(vma, address, pmd); update_mmu_cache_pmd(vma, address, pmd);
spin_unlock(&mm->page_table_lock); spin_unlock(pmd_ptl);
*hpage = NULL; *hpage = NULL;
...@@ -2805,6 +2816,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, ...@@ -2805,6 +2816,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd) pmd_t *pmd)
{ {
spinlock_t *ptl;
struct page *page; struct page *page;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long haddr = address & HPAGE_PMD_MASK; unsigned long haddr = address & HPAGE_PMD_MASK;
...@@ -2817,22 +2829,22 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address, ...@@ -2817,22 +2829,22 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
mmun_end = haddr + HPAGE_PMD_SIZE; mmun_end = haddr + HPAGE_PMD_SIZE;
again: again:
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_trans_huge(*pmd))) { if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
return; return;
} }
if (is_huge_zero_pmd(*pmd)) { if (is_huge_zero_pmd(*pmd)) {
__split_huge_zero_page_pmd(vma, haddr, pmd); __split_huge_zero_page_pmd(vma, haddr, pmd);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
return; return;
} }
page = pmd_page(*pmd); page = pmd_page(*pmd);
VM_BUG_ON(!page_count(page)); VM_BUG_ON(!page_count(page));
get_page(page); get_page(page);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
split_huge_page(page); split_huge_page(page);
......
...@@ -550,6 +550,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -550,6 +550,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long address) pmd_t *pmd, unsigned long address)
{ {
spinlock_t *ptl;
pgtable_t new = pte_alloc_one(mm, address); pgtable_t new = pte_alloc_one(mm, address);
int wait_split_huge_page; int wait_split_huge_page;
if (!new) if (!new)
...@@ -570,7 +571,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -570,7 +571,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
*/ */
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
wait_split_huge_page = 0; wait_split_huge_page = 0;
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
atomic_long_inc(&mm->nr_ptes); atomic_long_inc(&mm->nr_ptes);
...@@ -578,7 +579,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -578,7 +579,7 @@ int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
new = NULL; new = NULL;
} else if (unlikely(pmd_trans_splitting(*pmd))) } else if (unlikely(pmd_trans_splitting(*pmd)))
wait_split_huge_page = 1; wait_split_huge_page = 1;
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
if (new) if (new)
pte_free(mm, new); pte_free(mm, new);
if (wait_split_huge_page) if (wait_split_huge_page)
...@@ -1516,20 +1517,20 @@ struct page *follow_page_mask(struct vm_area_struct *vma, ...@@ -1516,20 +1517,20 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
split_huge_page_pmd(vma, address, pmd); split_huge_page_pmd(vma, address, pmd);
goto split_fallthrough; goto split_fallthrough;
} }
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
if (likely(pmd_trans_huge(*pmd))) { if (likely(pmd_trans_huge(*pmd))) {
if (unlikely(pmd_trans_splitting(*pmd))) { if (unlikely(pmd_trans_splitting(*pmd))) {
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
wait_split_huge_page(vma->anon_vma, pmd); wait_split_huge_page(vma->anon_vma, pmd);
} else { } else {
page = follow_trans_huge_pmd(vma, address, page = follow_trans_huge_pmd(vma, address,
pmd, flags); pmd, flags);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
*page_mask = HPAGE_PMD_NR - 1; *page_mask = HPAGE_PMD_NR - 1;
goto out; goto out;
} }
} else } else
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
/* fall through */ /* fall through */
} }
split_fallthrough: split_fallthrough:
......
...@@ -1667,6 +1667,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1667,6 +1667,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
unsigned long address, unsigned long address,
struct page *page, int node) struct page *page, int node)
{ {
spinlock_t *ptl;
unsigned long haddr = address & HPAGE_PMD_MASK; unsigned long haddr = address & HPAGE_PMD_MASK;
pg_data_t *pgdat = NODE_DATA(node); pg_data_t *pgdat = NODE_DATA(node);
int isolated = 0; int isolated = 0;
...@@ -1706,9 +1707,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1706,9 +1707,9 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
WARN_ON(PageLRU(new_page)); WARN_ON(PageLRU(new_page));
/* Recheck the target PMD */ /* Recheck the target PMD */
spin_lock(&mm->page_table_lock); ptl = pmd_lock(mm, pmd);
if (unlikely(!pmd_same(*pmd, entry))) { if (unlikely(!pmd_same(*pmd, entry))) {
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
/* Reverse changes made by migrate_page_copy() */ /* Reverse changes made by migrate_page_copy() */
if (TestClearPageActive(new_page)) if (TestClearPageActive(new_page))
...@@ -1753,7 +1754,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, ...@@ -1753,7 +1754,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
* before it's fully transferred to the new page. * before it's fully transferred to the new page.
*/ */
mem_cgroup_end_migration(memcg, page, new_page, true); mem_cgroup_end_migration(memcg, page, new_page, true);
spin_unlock(&mm->page_table_lock); spin_unlock(ptl);
unlock_page(new_page); unlock_page(new_page);
unlock_page(page); unlock_page(page);
......
...@@ -151,7 +151,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, ...@@ -151,7 +151,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable) pgtable_t pgtable)
{ {
assert_spin_locked(&mm->page_table_lock); assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */ /* FIFO */
if (!pmd_huge_pte(mm, pmdp)) if (!pmd_huge_pte(mm, pmdp))
...@@ -170,7 +170,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) ...@@ -170,7 +170,7 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{ {
pgtable_t pgtable; pgtable_t pgtable;
assert_spin_locked(&mm->page_table_lock); assert_spin_locked(pmd_lockptr(mm, pmdp));
/* FIFO */ /* FIFO */
pgtable = pmd_huge_pte(mm, pmdp); pgtable = pmd_huge_pte(mm, pmdp);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册