!1852 arm64: support batched/deferred tlb shootdown during page reclamation/migration

Merge Pull Request from: @ci-robot PR sync from: Jinjiang Tu <tujinjiang@huawei.com> https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/EP3I3LIWZJK6QHJPE6SA4KNXJO2ACWCI/ Support batched/deferred tlb shootdown during page reclamation/migration for arm64. Changelog: v6: - move kabi fix code into a separate patch. v5: - adjust tab num for macro DEFINE_TLB_UBC v4 - define macro DEFINE_TLB_UBC v3: - fix kabi breakage for task_struct->tlb_ubc v2: - fix kabi breakage for mm_struct->tlb_flush_batched Anshuman Khandual (1): mm/tlbbatch: introduce arch_tlbbatch_should_defer() Barry Song (2): mm/tlbbatch: rename and extend some functions arm64: support batched/deferred tlb shootdown during page reclamation/migration Jinjiang Tu (1): mm/tlbbatch: fix kabi change Yicong Yang (1): mm/tlbbatch: introduce arch_flush_tlb_batched_pending() -- 2.25.1 https://gitee.com/openeuler/kernel/issues/I7U78A Link:https://gitee.com/openeuler/kernel/pulls/1852 Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com>

!1852 arm64: support batched/deferred tlb shootdown during page reclamation/migration
Merge Pull Request from: @ci-robot PR sync from: Jinjiang Tu <tujinjiang@huawei.com> https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/EP3I3LIWZJK6QHJPE6SA4KNXJO2ACWCI/ Support batched/deferred tlb shootdown during page reclamation/migration for arm64. Changelog: v6: - move kabi fix code into a separate patch. v5: - adjust tab num for macro DEFINE_TLB_UBC v4 - define macro DEFINE_TLB_UBC v3: - fix kabi breakage for task_struct->tlb_ubc v2: - fix kabi breakage for mm_struct->tlb_flush_batched Anshuman Khandual (1): mm/tlbbatch: introduce arch_tlbbatch_should_defer() Barry Song (2): mm/tlbbatch: rename and extend some functions arm64: support batched/deferred tlb shootdown during page reclamation/migration Jinjiang Tu (1): mm/tlbbatch: fix kabi change Yicong Yang (1): mm/tlbbatch: introduce arch_flush_tlb_batched_pending() -- 2.25.1 https://gitee.com/openeuler/kernel/issues/I7U78A Link:https://gitee.com/openeuler/kernel/pulls/1852 Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com>
06bae869 · openeuler-ci-bot · Gitee · 9c8467e9 · 5237dd97 · 06bae869
9 changed file
--- a/Documentation/features/vm/TLB/arch-support.txt
+++ b/Documentation/features/vm/TLB/arch-support.txt
@@ -9,7 +9,7 @@
    |       alpha: | TODO |
    |         arc: | TODO |
    |         arm: | TODO |
-    |       arm64: | TODO |
+    |       arm64: |  ok  |
    |         c6x: |  ..  |
    |        csky: | TODO |
    |       h8300: |  ..  |

--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -78,6 +78,7 @@ config ARM64
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
 	select ARCH_SUPPORTS_NUMA_BALANCING
+	select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
 	select ARCH_WANT_DEFAULT_BPF_JIT
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT

--- a/arch/arm64/include/asm/tlbbatch.h
+++ b/arch/arm64/include/asm/tlbbatch.h
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ARCH_ARM64_TLBBATCH_H
+#define _ARCH_ARM64_TLBBATCH_H
+
+struct arch_tlbflush_unmap_batch {
+	/*
+	 * For arm64, HW can do tlb shootdown, so we don't
+	 * need to record cpumask for sending IPI
+	 */
+};
+
+#endif /* _ARCH_ARM64_TLBBATCH_H */
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -254,17 +254,23 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
 	dsb(ish);
 }

-static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+static inline void __flush_tlb_page_nosync(struct mm_struct *mm,
 					   unsigned long uaddr)
 {
 	unsigned long addr;

 	dsb(ishst);
-	addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
+	addr = __TLBI_VADDR(uaddr, ASID(mm));
 	__tlbi(vale1is, addr);
 	__tlbi_user(vale1is, addr);
 }

+static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+					 unsigned long uaddr)
+{
+	return __flush_tlb_page_nosync(vma->vm_mm, uaddr);
+}
+
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 				  unsigned long uaddr)
 {
@@ -272,6 +278,38 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 	dsb(ish);
 }

+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
+	/*
+	 * TLB flush deferral is not required on systems which are affected by
+	 * ARM64_WORKAROUND_REPEAT_TLBI, as __tlbi()/__tlbi_user() implementation
+	 * will have two consecutive TLBI instructions with a dsb(ish) in between
+	 * defeating the purpose (i.e save overall 'dsb ish' cost).
+	 */
+	if (unlikely(cpus_have_const_cap(ARM64_WORKAROUND_REPEAT_TLBI)))
+		return false;
+#endif
+	return true;
+}
+
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr)
+{
+	__flush_tlb_page_nosync(mm, uaddr);
+}
+
+static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+{
+	dsb(ish);
+}
+
+static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
+{
+	dsb(ish);
+}
+
 /*
 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
 * necessarily a performance improvement.

--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -239,6 +239,18 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
 }

+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+	bool should_defer = false;
+
+	/* If remote CPUs need to be flushed then defer batch the flush */
+	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
+		should_defer = true;
+	put_cpu();
+
+	return should_defer;
+}
+
 static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 {
 	/*
@@ -250,13 +262,19 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 	return atomic64_inc_return(&mm->context.tlb_gen);
 }

-static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
-					struct mm_struct *mm)
+static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
+					     struct mm_struct *mm,
+					     unsigned long uaddr)
 {
 	inc_mm_tlb_gen(mm);
 	cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
 }

+static inline void arch_flush_tlb_batched_pending(struct mm_struct *mm)
+{
+	flush_tlb_mm(mm);
+}
+
 extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);

 #endif /* !MODULE */

--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -598,7 +598,7 @@ struct mm_struct {
 		 * moving a PROT_NONE or PROT_NUMA mapped page.
 		 */
 		atomic_t tlb_flush_pending;
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+#if defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH) && !defined(CONFIG_ARM64)
 		/* See flush_tlb_batched_pending() */
 		bool tlb_flush_batched;
 #endif
@@ -620,6 +620,8 @@ struct mm_struct {

 #if defined(CONFIG_X86_64)
 	KABI_USE(1, struct mm_struct_extend *mm_extend)
+#elif defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH) && defined(CONFIG_ARM64)
+	KABI_USE(1, bool tlb_flush_batched)
 #else
 	KABI_RESERVE(1)
 #endif

--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -74,11 +74,11 @@ struct page_frag {

 /* Track pages that require TLB flushes */
 struct tlbflush_unmap_batch {
-#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+#if defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH) && !defined(CONFIG_ARM64)
 	/*
 	 * The arch code makes the following promise: generic code can modify a
-	 * PTE, then call arch_tlbbatch_add_mm() (which internally provides all
-	 * needed barriers), then call arch_tlbbatch_flush(), and the entries
+	 * PTE, then call arch_tlbbatch_add_pending() (which internally provides
+	 * all needed barriers), then call arch_tlbbatch_flush(), and the entries
 	 * will be flushed on all CPUs by the time that arch_tlbbatch_flush()
 	 * returns.
 	 */
@@ -96,4 +96,27 @@ struct tlbflush_unmap_batch {
 #endif
 };

+#if defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH) && defined(CONFIG_ARM64)
+struct tlbflush_unmap_batch_arm64 {
+	/*
+	 * The arch code makes the following promise: generic code can modify a
+	 * PTE, then call arch_tlbbatch_add_pending() (which internally provides
+	 * all needed barriers), then call arch_tlbbatch_flush(), and the entries
+	 * will be flushed on all CPUs by the time that arch_tlbbatch_flush()
+	 * returns.
+	 */
+	struct arch_tlbflush_unmap_batch arch;
+
+	/* True if a flush is needed. */
+	bool flush_required;
+
+	/*
+	 * If true then the PTE was dirty when unmapped. The entry must be
+	 * flushed before IO is initiated or a stale TLB entry potentially
+	 * allows an update without redirtying the page.
+	 */
+	bool writable;
+};
+#endif
+
 #endif /* _LINUX_MM_TYPES_TASK_H */
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -685,6 +685,9 @@ struct task_struct_resvd {
 #ifdef CONFIG_MMU
 	struct timer_list	oom_reaper_timer;
 #endif
+#if defined(CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH) && defined(CONFIG_ARM64)
+	struct tlbflush_unmap_batch_arm64       tlb_ubc;
+#endif
 };

 struct task_struct {

--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -596,6 +596,14 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 }

 #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+
+#ifdef CONFIG_ARM64
+#define DEFINE_TLB_UBC(name) struct tlbflush_unmap_batch_arm64 *name = \
+				&current->_resvd->tlb_ubc
+#else
+#define DEFINE_TLB_UBC(name) struct tlbflush_unmap_batch *name = &current->tlb_ubc
+#endif
+
 /*
 * Flush TLB entries for recently unmapped pages from remote CPUs. It is
 * important if a PTE was dirty when it was unmapped that it's flushed
@@ -604,7 +612,7 @@ void page_unlock_anon_vma_read(struct anon_vma *anon_vma)
 */
 void try_to_unmap_flush(void)
 {
-	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+	DEFINE_TLB_UBC(tlb_ubc);

 	if (!tlb_ubc->flush_required)
 		return;
@@ -617,17 +625,18 @@ void try_to_unmap_flush(void)
 /* Flush iff there are potentially writable TLB entries that can race with IO */
 void try_to_unmap_flush_dirty(void)
 {
-	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+	DEFINE_TLB_UBC(tlb_ubc);

 	if (tlb_ubc->writable)
 		try_to_unmap_flush();
 }

-static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable,
+						unsigned long uaddr)
 {
-	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+	DEFINE_TLB_UBC(tlb_ubc);

-	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
+	arch_tlbbatch_add_pending(&tlb_ubc->arch, mm, uaddr);
 	tlb_ubc->flush_required = true;

 	/*
@@ -652,17 +661,10 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
 */
 static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
 {
-	bool should_defer = false;
-
 	if (!(flags & TTU_BATCH_FLUSH))
 		return false;

-	/* If remote CPUs need to be flushed then defer batch the flush */
-	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
-		should_defer = true;
-	put_cpu();
-
-	return should_defer;
+	return arch_tlbbatch_should_defer(mm);
 }

 /*
@@ -683,7 +685,7 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
 void flush_tlb_batched_pending(struct mm_struct *mm)
 {
 	if (data_race(mm->tlb_flush_batched)) {
-		flush_tlb_mm(mm);
+		arch_flush_tlb_batched_pending(mm);

 		/*
 		 * Do not allow the compiler to re-order the clearing of
@@ -694,7 +696,8 @@ void flush_tlb_batched_pending(struct mm_struct *mm)
 	}
 }
 #else
-static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable,
+						unsigned long uaddr)
 {
 }

@@ -1573,7 +1576,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 			 */
 			pteval = ptep_get_and_clear(mm, address, pvmw.pte);

-			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval));
+			set_tlb_ubc_flush_pending(mm, pte_dirty(pteval), address);
 		} else {
 			pteval = ptep_clear_flush(vma, address, pvmw.pte);
 		}