From 0cd4b4743cd5e27c7200f1e6388b3c6a8d09188e Mon Sep 17 00:00:00 2001 From: Alex Van Brunt Date: Tue, 13 Aug 2019 20:52:02 +0800 Subject: [PATCH] arm64: mm: Don't wait for completion of TLB invalidation when page aging mainline inclusion from mainline-4.21 commit: 3403e56b41c176f6531a2a6d77d85b46fa34169c category: feature feature: Reduce synchronous TLB invalidation on ARM64 bugzilla: NA CVE: NA -------------------------------------------------- When transitioning a PTE from young to old as part of page aging, we can avoid waiting for the TLB invalidation to complete and therefore drop the subsequent DSB instruction. Whilst this opens up a race with page reclaim, where a PTE in active use via a stale, young TLB entry does not update the underlying descriptor, the worst thing that happens is that the page is reclaimed and then immediately faulted back in. Given that we have a DSB in our context-switch path, the window for a spurious reclaim is fairly limited and eliding the barrier claims to boost NVMe/SSD accesses by over 10% on some platforms. A similar optimisation was made for x86 in commit b13b1d2d8692 ("x86/mm: In the PTE swapout page reclaim case clear the accessed bit instead of flushing the TLB"). Signed-off-by: Alex Van Brunt Signed-off-by: Ashish Mhetre [will: rewrote patch] Signed-off-by: Will Deacon Signed-off-by: Hanjun Guo Reviewed-by: Xuefeng Wang Signed-off-by: Yang Yingliang --- arch/arm64/include/asm/pgtable.h | 22 ++++++++++++++++++++++ arch/arm64/include/asm/tlbflush.h | 11 +++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6e71c5734e78..7fb7af66b805 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -22,6 +22,7 @@ #include #include #include +#include /* * VMALLOC range. @@ -653,6 +654,27 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, return __ptep_test_and_clear_young(ptep); } +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +static inline int ptep_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep) +{ + int young = ptep_test_and_clear_young(vma, address, ptep); + + if (young) { + /* + * We can elide the trailing DSB here since the worst that can + * happen is that a CPU continues to use the young entry in its + * TLB and we mistakenly reclaim the associated page. The + * window for such an event is bounded by the next + * context-switch, which provides a DSB to complete the TLB + * invalidation. + */ + flush_tlb_page_nosync(vma, address); + } + + return young; +} + #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c3c0387aee18..a629a4067aae 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -21,6 +21,7 @@ #ifndef __ASSEMBLY__ +#include #include #include #include @@ -164,14 +165,20 @@ static inline void flush_tlb_mm(struct mm_struct *mm) dsb(ish); } -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long uaddr) +static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, + unsigned long uaddr) { unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); dsb(ishst); __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); +} + +static inline void flush_tlb_page(struct vm_area_struct *vma, + unsigned long uaddr) +{ + flush_tlb_page_nosync(vma, uaddr); dsb(ish); } -- GitLab