提交 8ac1f832 编写于 作者: A Andrea Arcangeli 提交者: Linus Torvalds

thp: pte alloc trans splitting

pte alloc routines must wait for split_huge_page if the pmd is not present
and not null (i.e.  pmd_trans_splitting).  The additional branches are
optimized away at compile time by pmd_trans_splitting if the config option
is off.  However we must pass the vma down in order to know the anon_vma
lock to wait for.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: NAndrea Arcangeli <aarcange@redhat.com>
Acked-by: NRik van Riel <riel@redhat.com>
Acked-by: NMel Gorman <mel@csn.ul.ie>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 64cc6ae0
...@@ -50,7 +50,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) ...@@ -50,7 +50,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
if (!new_pmd) if (!new_pmd)
goto no_pmd; goto no_pmd;
new_pte = pte_alloc_map(mm, new_pmd, 0); new_pte = pte_alloc_map(mm, NULL, new_pmd, 0);
if (!new_pte) if (!new_pte)
goto no_pte; goto no_pte;
......
...@@ -38,7 +38,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) ...@@ -38,7 +38,7 @@ huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
if (pud) { if (pud) {
pmd = pmd_alloc(mm, pud, taddr); pmd = pmd_alloc(mm, pud, taddr);
if (pmd) if (pmd)
pte = pte_alloc_map(mm, pmd, taddr); pte = pte_alloc_map(mm, NULL, pmd, taddr);
} }
return pte; return pte;
} }
......
...@@ -35,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ...@@ -35,7 +35,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
if (pud) { if (pud) {
pmd = pmd_alloc(mm, pud, addr); pmd = pmd_alloc(mm, pud, addr);
if (pmd) if (pmd)
pte = pte_alloc_map(mm, pmd, addr); pte = pte_alloc_map(mm, NULL, pmd, addr);
} }
} }
......
...@@ -50,7 +50,7 @@ static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned ...@@ -50,7 +50,7 @@ static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned
end = PGDIR_SIZE; end = PGDIR_SIZE;
offset -= address; offset -= address;
do { do {
pte_t * pte = pte_alloc_map(mm, pmd, address); pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space); io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
......
...@@ -92,7 +92,7 @@ static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned ...@@ -92,7 +92,7 @@ static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned
end = PGDIR_SIZE; end = PGDIR_SIZE;
offset -= address; offset -= address;
do { do {
pte_t * pte = pte_alloc_map(mm, pmd, address); pte_t *pte = pte_alloc_map(mm, NULL, pmd, address);
if (!pte) if (!pte)
return -ENOMEM; return -ENOMEM;
io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space); io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
......
...@@ -214,7 +214,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ...@@ -214,7 +214,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
if (pud) { if (pud) {
pmd = pmd_alloc(mm, pud, addr); pmd = pmd_alloc(mm, pud, addr);
if (pmd) if (pmd)
pte = pte_alloc_map(mm, pmd, addr); pte = pte_alloc_map(mm, NULL, pmd, addr);
} }
return pte; return pte;
} }
......
...@@ -31,7 +31,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc, ...@@ -31,7 +31,7 @@ static int init_stub_pte(struct mm_struct *mm, unsigned long proc,
if (!pmd) if (!pmd)
goto out_pmd; goto out_pmd;
pte = pte_alloc_map(mm, pmd, proc); pte = pte_alloc_map(mm, NULL, pmd, proc);
if (!pte) if (!pte)
goto out_pte; goto out_pte;
......
...@@ -133,7 +133,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn, ...@@ -133,7 +133,7 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
pmd = pmd_alloc(&tboot_mm, pud, vaddr); pmd = pmd_alloc(&tboot_mm, pud, vaddr);
if (!pmd) if (!pmd)
return -1; return -1;
pte = pte_alloc_map(&tboot_mm, pmd, vaddr); pte = pte_alloc_map(&tboot_mm, NULL, pmd, vaddr);
if (!pte) if (!pte)
return -1; return -1;
set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
......
...@@ -1131,7 +1131,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, ...@@ -1131,7 +1131,8 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud,
int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
#endif #endif
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long address);
int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
/* /*
...@@ -1200,16 +1201,18 @@ static inline void pgtable_page_dtor(struct page *page) ...@@ -1200,16 +1201,18 @@ static inline void pgtable_page_dtor(struct page *page)
pte_unmap(pte); \ pte_unmap(pte); \
} while (0) } while (0)
#define pte_alloc_map(mm, pmd, address) \ #define pte_alloc_map(mm, vma, pmd, address) \
((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, vma, \
NULL: pte_offset_map(pmd, address)) pmd, address))? \
NULL: pte_offset_map(pmd, address))
#define pte_alloc_map_lock(mm, pmd, address, ptlp) \ #define pte_alloc_map_lock(mm, pmd, address, ptlp) \
((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \ ((unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, NULL, \
pmd, address))? \
NULL: pte_offset_map_lock(mm, pmd, address, ptlp)) NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
#define pte_alloc_kernel(pmd, address) \ #define pte_alloc_kernel(pmd, address) \
((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \ ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
NULL: pte_offset_kernel(pmd, address)) NULL: pte_offset_kernel(pmd, address))
extern void free_area_init(unsigned long * zones_size); extern void free_area_init(unsigned long * zones_size);
......
...@@ -394,9 +394,11 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -394,9 +394,11 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
} }
} }
int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) int __pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long address)
{ {
pgtable_t new = pte_alloc_one(mm, address); pgtable_t new = pte_alloc_one(mm, address);
int wait_split_huge_page;
if (!new) if (!new)
return -ENOMEM; return -ENOMEM;
...@@ -416,14 +418,18 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address) ...@@ -416,14 +418,18 @@ int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */ smp_wmb(); /* Could be smp_wmb__xxx(before|after)_spin_lock */
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
if (!pmd_present(*pmd)) { /* Has another populated it ? */ wait_split_huge_page = 0;
if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
mm->nr_ptes++; mm->nr_ptes++;
pmd_populate(mm, pmd, new); pmd_populate(mm, pmd, new);
new = NULL; new = NULL;
} } else if (unlikely(pmd_trans_splitting(*pmd)))
wait_split_huge_page = 1;
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
if (new) if (new)
pte_free(mm, new); pte_free(mm, new);
if (wait_split_huge_page)
wait_split_huge_page(vma->anon_vma, pmd);
return 0; return 0;
} }
...@@ -436,10 +442,11 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) ...@@ -436,10 +442,11 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
smp_wmb(); /* See comment in __pte_alloc */ smp_wmb(); /* See comment in __pte_alloc */
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
if (!pmd_present(*pmd)) { /* Has another populated it ? */ if (likely(pmd_none(*pmd))) { /* Has another populated it ? */
pmd_populate_kernel(&init_mm, pmd, new); pmd_populate_kernel(&init_mm, pmd, new);
new = NULL; new = NULL;
} } else
VM_BUG_ON(pmd_trans_splitting(*pmd));
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
if (new) if (new)
pte_free_kernel(&init_mm, new); pte_free_kernel(&init_mm, new);
...@@ -3253,7 +3260,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3253,7 +3260,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pmd = pmd_alloc(mm, pud, address); pmd = pmd_alloc(mm, pud, address);
if (!pmd) if (!pmd)
return VM_FAULT_OOM; return VM_FAULT_OOM;
pte = pte_alloc_map(mm, pmd, address); pte = pte_alloc_map(mm, vma, pmd, address);
if (!pte) if (!pte)
return VM_FAULT_OOM; return VM_FAULT_OOM;
......
...@@ -47,7 +47,8 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) ...@@ -47,7 +47,8 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
return pmd; return pmd;
} }
static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr)
{ {
pgd_t *pgd; pgd_t *pgd;
pud_t *pud; pud_t *pud;
...@@ -62,7 +63,8 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) ...@@ -62,7 +63,8 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
if (!pmd) if (!pmd)
return NULL; return NULL;
if (!pmd_present(*pmd) && __pte_alloc(mm, pmd, addr)) VM_BUG_ON(pmd_trans_huge(*pmd));
if (pmd_none(*pmd) && __pte_alloc(mm, vma, pmd, addr))
return NULL; return NULL;
return pmd; return pmd;
...@@ -147,7 +149,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, ...@@ -147,7 +149,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
old_pmd = get_old_pmd(vma->vm_mm, old_addr); old_pmd = get_old_pmd(vma->vm_mm, old_addr);
if (!old_pmd) if (!old_pmd)
continue; continue;
new_pmd = alloc_new_pmd(vma->vm_mm, new_addr); new_pmd = alloc_new_pmd(vma->vm_mm, vma, new_addr);
if (!new_pmd) if (!new_pmd)
break; break;
next = (new_addr + PMD_SIZE) & PMD_MASK; next = (new_addr + PMD_SIZE) & PMD_MASK;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册