提交 3403e56b 编写于 作者: A Alex Van Brunt 提交者: Will Deacon

arm64: mm: Don't wait for completion of TLB invalidation when page aging

When transitioning a PTE from young to old as part of page aging, we
can avoid waiting for the TLB invalidation to complete and therefore
drop the subsequent DSB instruction. Whilst this opens up a race with
page reclaim, where a PTE in active use via a stale, young TLB entry
does not update the underlying descriptor, the worst thing that happens
is that the page is reclaimed and then immediately faulted back in.

Given that we have a DSB in our context-switch path, the window for a
spurious reclaim is fairly limited and eliding the barrier claims to
boost NVMe/SSD accesses by over 10% on some platforms.

A similar optimisation was made for x86 in commit b13b1d2d ("x86/mm:
In the PTE swapout page reclaim case clear the accessed bit instead of
flushing the TLB").
Signed-off-by: NAlex Van Brunt <avanbrunt@nvidia.com>
Signed-off-by: NAshish Mhetre <amhetre@nvidia.com>
[will: rewrote patch]
Signed-off-by: NWill Deacon <will.deacon@arm.com>
上级 c8ebf64e
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <asm/memory.h> #include <asm/memory.h>
#include <asm/pgtable-hwdef.h> #include <asm/pgtable-hwdef.h>
#include <asm/pgtable-prot.h> #include <asm/pgtable-prot.h>
#include <asm/tlbflush.h>
/* /*
* VMALLOC range. * VMALLOC range.
...@@ -685,6 +686,27 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, ...@@ -685,6 +686,27 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
return __ptep_test_and_clear_young(ptep); return __ptep_test_and_clear_young(ptep);
} }
#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep)
{
int young = ptep_test_and_clear_young(vma, address, ptep);
if (young) {
/*
* We can elide the trailing DSB here since the worst that can
* happen is that a CPU continues to use the young entry in its
* TLB and we mistakenly reclaim the associated page. The
* window for such an event is bounded by the next
* context-switch, which provides a DSB to complete the TLB
* invalidation.
*/
flush_tlb_page_nosync(vma, address);
}
return young;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/mm_types.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/cputype.h> #include <asm/cputype.h>
#include <asm/mmu.h> #include <asm/mmu.h>
...@@ -164,14 +165,20 @@ static inline void flush_tlb_mm(struct mm_struct *mm) ...@@ -164,14 +165,20 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
dsb(ish); dsb(ish);
} }
static inline void flush_tlb_page(struct vm_area_struct *vma, static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
unsigned long uaddr) unsigned long uaddr)
{ {
unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
dsb(ishst); dsb(ishst);
__tlbi(vale1is, addr); __tlbi(vale1is, addr);
__tlbi_user(vale1is, addr); __tlbi_user(vale1is, addr);
}
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long uaddr)
{
flush_tlb_page_nosync(vma, uaddr);
dsb(ish); dsb(ish);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册