From ca25518c942bfabf17faf7a7407cb7629ee67088 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Wed, 21 Jul 2021 17:43:09 +0800 Subject: [PATCH] x86: hugepage: use nt copy hugepage to AEP in x86 euleros inclusion category: feature feature: etmem bugzilla: 48246 ------------------------------------------------- Add proc/sys/vm/hugepage_nocache_copy switch. Set 1 to copy hugepage with movnt SSE instructoin if cpu support it. Set 0 to copy hugepage as usual. Signed-off-by: Kemeng Shi Reviewed-by: louhongxiang Signed-off-by: Zheng Zengkai --- arch/x86/include/asm/page_64.h | 10 +++++ arch/x86/lib/Makefile | 1 + arch/x86/lib/copy_highpages.c | 74 ++++++++++++++++++++++++++++++++++ arch/x86/lib/copy_page_64.S | 73 +++++++++++++++++++++++++++++++++ include/linux/highmem.h | 14 +++++++ kernel/sysctl.c | 11 +++++ mm/migrate.c | 6 +-- 7 files changed, 184 insertions(+), 5 deletions(-) create mode 100644 arch/x86/lib/copy_highpages.c diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 939b1cff4a7b..9b02293ac617 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -56,6 +56,16 @@ static inline void clear_page(void *page) void copy_page(void *to, void *from); +void copy_page_nocache(void *to, void *from); +void copy_page_nocache_barrir(void); + +struct page; +struct ctl_table; +# define __HAVE_ARCH_COPY_HUGEPAGES 1 +void copy_highpages(struct page *to, struct page *from, int nr_pages); +int sysctl_hugepage_nocache_copy(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); + #endif /* !__ASSEMBLY__ */ #ifdef CONFIG_X86_VSYSCALL_EMULATION diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index bad4dee4f0e4..ab0d91b808be 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -70,4 +70,5 @@ else lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o lib-y += cmpxchg16b_emu.o + lib-y += copy_highpages.o endif diff --git a/arch/x86/lib/copy_highpages.c b/arch/x86/lib/copy_highpages.c new file mode 100644 index 000000000000..74a94703f09b --- /dev/null +++ b/arch/x86/lib/copy_highpages.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * accelerate copying page to pmem with non-temproal stroes + */ +#include +#include +#include +#include + +DEFINE_STATIC_KEY_FALSE(hugepage_nocache_copy); + +static void set_hugepage_nocache_copy(bool enabled) +{ + if (enabled) + static_branch_enable(&hugepage_nocache_copy); + else + static_branch_disable(&hugepage_nocache_copy); +} + +int sysctl_hugepage_nocache_copy(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + int err; + int state = static_branch_unlikely(&hugepage_nocache_copy); + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + if (write) + set_hugepage_nocache_copy(state); + return err; +} + +static void copy_highpages_nocache(struct page *to, struct page *from, int nr_pages) +{ + char *vfrom, *vto; + int i; + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + vfrom = kmap_atomic(from); + vto = kmap_atomic(to); + copy_page_nocache(vto, vfrom); + kunmap_atomic(vto); + kunmap_atomic(vfrom); + to++; + from++; + } + copy_page_nocache_barrir(); +} + +static void copy_highpages_cache(struct page *to, struct page *from, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + copy_highpage(to + i, from + i); + } +} + +void copy_highpages(struct page *to, struct page *from, int nr_pages) +{ + if (static_branch_unlikely(&hugepage_nocache_copy) && is_node_pmem(page_to_nid(to))) + return copy_highpages_nocache(to, from, nr_pages); + + return copy_highpages_cache(to, from, nr_pages); +} diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S index 2402d4c489d2..16ca196d349b 100644 --- a/arch/x86/lib/copy_page_64.S +++ b/arch/x86/lib/copy_page_64.S @@ -87,3 +87,76 @@ SYM_FUNC_START_LOCAL(copy_page_regs) addq $2*8, %rsp ret SYM_FUNC_END(copy_page_regs) + +SYM_FUNC_START(copy_page_nocache) + ALTERNATIVE "jmp copy_page", "", X86_FEATURE_XMM2 + subq $2*8, %rsp + movq %rbx, (%rsp) + movq %r12, 1*8(%rsp) + + movl $(4096/64)-5, %ecx + .p2align 4 +.LoopNT64: + dec %rcx + movq 0x8*0(%rsi), %rax + movq 0x8*1(%rsi), %rbx + movq 0x8*2(%rsi), %rdx + movq 0x8*3(%rsi), %r8 + movq 0x8*4(%rsi), %r9 + movq 0x8*5(%rsi), %r10 + movq 0x8*6(%rsi), %r11 + movq 0x8*7(%rsi), %r12 + + prefetcht0 5*64(%rsi) + + movnti %rax, 0x8*0(%rdi) + movnti %rbx, 0x8*1(%rdi) + movnti %rdx, 0x8*2(%rdi) + movnti %r8, 0x8*3(%rdi) + movnti %r9, 0x8*4(%rdi) + movnti %r10, 0x8*5(%rdi) + movnti %r11, 0x8*6(%rdi) + movnti %r12, 0x8*7(%rdi) + + leaq 64 (%rsi), %rsi + leaq 64 (%rdi), %rdi + + jnz .LoopNT64 + + movl $5, %ecx + .p2align 4 +.LoopNT2: + decl %ecx + + movq 0x8*0(%rsi), %rax + movq 0x8*1(%rsi), %rbx + movq 0x8*2(%rsi), %rdx + movq 0x8*3(%rsi), %r8 + movq 0x8*4(%rsi), %r9 + movq 0x8*5(%rsi), %r10 + movq 0x8*6(%rsi), %r11 + movq 0x8*7(%rsi), %r12 + + movnti %rax, 0x8*0(%rdi) + movnti %rbx, 0x8*1(%rdi) + movnti %rdx, 0x8*2(%rdi) + movnti %r8, 0x8*3(%rdi) + movnti %r9, 0x8*4(%rdi) + movnti %r10, 0x8*5(%rdi) + movnti %r11, 0x8*6(%rdi) + movnti %r12, 0x8*7(%rdi) + + leaq 64(%rdi), %rdi + leaq 64(%rsi), %rsi + jnz .LoopNT2 + + movq (%rsp), %rbx + movq 1*8(%rsp), %r12 + addq $2*8, %rsp + ret +SYM_FUNC_END(copy_page_nocache) + +SYM_FUNC_START(copy_page_nocache_barrir) + ALTERNATIVE "", "sfence", X86_FEATURE_XMM2 + ret +SYM_FUNC_END(copy_page_nocache_barrir) diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 14e6202ce47f..db8fc7a9631e 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -345,4 +345,18 @@ static inline void copy_highpage(struct page *to, struct page *from) #endif +#ifndef __HAVE_ARCH_COPY_HUGEPAGES + +static inline void copy_highpages(struct page *to, struct page *from, int nr_pages) +{ + int i; + + for (i = 0; i < nr_pages; i++) { + cond_resched(); + copy_highpage(to + i, from + i); + } +} + +#endif /* __HAVE_ARCH_COPY_HUGEPAGES */ + #endif /* _LINUX_HIGHMEM_H */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5aeb9b26aaa0..4fbc106aa195 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -3147,6 +3147,17 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#if defined(CONFIG_X86_64) + { + .procname = "hugepage_nocache_copy", + .data = NULL, /* filled in by handler */ + .maxlen = sizeof(unsigned int), + .mode = 0600, + .proc_handler = sysctl_hugepage_nocache_copy, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif { } }; diff --git a/mm/migrate.c b/mm/migrate.c index 7982256a5125..465a227d13e7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -577,7 +577,6 @@ static void __copy_gigantic_page(struct page *dst, struct page *src, static void copy_huge_page(struct page *dst, struct page *src) { - int i; int nr_pages; if (PageHuge(src)) { @@ -595,10 +594,7 @@ static void copy_huge_page(struct page *dst, struct page *src) nr_pages = thp_nr_pages(src); } - for (i = 0; i < nr_pages; i++) { - cond_resched(); - copy_highpage(dst + i, src + i); - } + copy_highpages(dst, src, nr_pages); } /* -- GitLab