diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 78740716c9e74ece8e162aa0b249cf055a8e62e2..56627fa453a65a7fe3426792730a487c6b6d4d35 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -170,6 +170,26 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres #define move_pte(pte, prot, old_addr, new_addr) (pte) #endif +/* + * A facility to provide lazy MMU batching. This allows PTE updates and + * page invalidations to be delayed until a call to leave lazy MMU mode + * is issued. Some architectures may benefit from doing this, and it is + * beneficial for both shadow and direct mode hypervisors, which may batch + * the PTE updates which happen during this window. Note that using this + * interface requires that read hazards be removed from the code. A read + * hazard could result in the direct mode hypervisor case, since the actual + * write to the page tables may not yet have taken place, so reads though + * a raw PTE pointer after it has been modified are not guaranteed to be + * up to date. This mode can only be entered and left under the protection of + * the page table locks for all page tables which may be modified. In the UP + * case, this is required so that preemption is disabled, and in the SMP case, + * it must synchronize the delayed page table writes properly on other CPUs. + */ +#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE +#define arch_enter_lazy_mmu_mode() do {} while (0) +#define arch_leave_lazy_mmu_mode() do {} while (0) +#endif + /* * When walking page tables, get the address of the next boundary, * or the end address of the range if that comes earlier. Although no diff --git a/mm/memory.c b/mm/memory.c index 2e754621d3332a288f40c23a58236f4eb5f60527..9cf3f341a28a6cf4c53af58f01959a9cf7c23eb1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -506,6 +506,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, src_pte = pte_offset_map_nested(src_pmd, addr); src_ptl = pte_lockptr(src_mm, src_pmd); spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + arch_enter_lazy_mmu_mode(); do { /* @@ -527,6 +528,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, progress += 8; } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); spin_unlock(src_ptl); pte_unmap_nested(src_pte - 1); add_mm_rss(dst_mm, rss[0], rss[1]); @@ -628,6 +630,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, int anon_rss = 0; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + arch_enter_lazy_mmu_mode(); do { pte_t ptent = *pte; if (pte_none(ptent)) { @@ -694,6 +697,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); add_mm_rss(mm, file_rss, anon_rss); + arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return addr; @@ -1109,6 +1113,7 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; + arch_enter_lazy_mmu_mode(); do { struct page *page = ZERO_PAGE(addr); pte_t zero_pte = pte_wrprotect(mk_pte(page, prot)); @@ -1118,6 +1123,7 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd, BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, zero_pte); } while (pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return 0; } @@ -1275,11 +1281,13 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; + arch_enter_lazy_mmu_mode(); do { BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, pfn_pte(pfn, prot)); pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); return 0; } diff --git a/mm/mprotect.c b/mm/mprotect.c index 955f9d0e38aa3f5f35d09eb2473ce3f565196292..3b8f3c0c63f3b948cda9edca535d7aaa65b4a38b 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -34,6 +34,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, spinlock_t *ptl; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); + arch_enter_lazy_mmu_mode(); do { oldpte = *pte; if (pte_present(oldpte)) { @@ -70,6 +71,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd, } } while (pte++, addr += PAGE_SIZE, addr != end); + arch_leave_lazy_mmu_mode(); pte_unmap_unlock(pte - 1, ptl); } diff --git a/mm/mremap.c b/mm/mremap.c index 7c15cf3373ad0d0e94ba6f5b74145339da0052d3..9c769fa29f32aff451ef6ff86ce836c1fc0bf856 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -98,6 +98,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, new_ptl = pte_lockptr(mm, new_pmd); if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + arch_enter_lazy_mmu_mode(); for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, new_pte++, new_addr += PAGE_SIZE) { @@ -109,6 +110,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, set_pte_at(mm, new_addr, new_pte, pte); } + arch_leave_lazy_mmu_mode(); if (new_ptl != old_ptl) spin_unlock(new_ptl); pte_unmap_nested(new_pte - 1);