From aa316fa10f0d125e2ff88c2cafa2c23ed9b96bb5 Mon Sep 17 00:00:00 2001 From: Wei Li <liwei391@huawei.com> Date: Fri, 2 Jul 2021 11:12:53 +0800 Subject: [PATCH] arm64: clear_page: Add new implementation of clear_page() by STNP hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZN72 CVE: NA --------------------------- Currently, clear_page() clear the page through 'dc zva', while the page may not be used immediately mostly, so the cache flush is in vain. Add an optimized implementation of clear_page() by 'stnp' for performance promotion. It can be switched by the boot cmdline 'mm.use_clearpage_stnp'. In the hugetlb clear test, we gained about 53.7% performance improvement: Set mm.use_clearpage_stnp = 0 | Set mm.use_clearpage_stnp = 1 [root@localhost liwei]# ./a.out 50 20 | [root@localhost liwei]# ./a.out 50 20 size is 50 Gib, test times is 20 | size is 50 Gib, test times is 20 test_time[0] : use 8.438046 sec | test_time[0] : use 3.722682 sec test_time[1] : use 8.028493 sec | test_time[1] : use 3.640274 sec test_time[2] : use 8.646547 sec | test_time[2] : use 4.095052 sec test_time[3] : use 8.122490 sec | test_time[3] : use 3.998446 sec test_time[4] : use 8.053038 sec | test_time[4] : use 4.084259 sec test_time[5] : use 8.843512 sec | test_time[5] : use 3.933871 sec test_time[6] : use 8.308906 sec | test_time[6] : use 3.934334 sec test_time[7] : use 8.093817 sec | test_time[7] : use 3.869142 sec test_time[8] : use 8.303504 sec | test_time[8] : use 3.902916 sec test_time[9] : use 8.178336 sec | test_time[9] : use 3.541885 sec test_time[10] : use 8.003625 sec | test_time[10] : use 3.595554 sec test_time[11] : use 8.163807 sec | test_time[11] : use 3.583813 sec test_time[12] : use 8.267464 sec | test_time[12] : use 3.863033 sec test_time[13] : use 8.055326 sec | test_time[13] : use 3.770953 sec test_time[14] : use 8.246986 sec | test_time[14] : use 3.808006 sec test_time[15] : use 8.546992 sec | test_time[15] : use 3.653194 sec test_time[16] : use 8.727256 sec | test_time[16] : use 3.722395 sec test_time[17] : use 8.288951 sec | test_time[17] : use 3.683508 sec test_time[18] : use 8.019322 sec | test_time[18] : use 4.253087 sec test_time[19] : use 8.250685 sec | test_time[19] : use 4.082845 sec hugetlb test end! | hugetlb test end! Signed-off-by: Wei Li <liwei391@huawei.com> Reviewed-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpufeature.c | 34 ++++++++++++++++++++++++++++++++ arch/arm64/lib/clear_page.S | 21 ++++++++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index e7d2bdc92acf..47f8fae7ecdb 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -68,7 +68,8 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_MPAM 59 #define ARM64_WORKAROUND_HISI_HIP08_RU_PREFETCH 60 +#define ARM64_CLEARPAGE_STNP 61 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 62 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d8e1bdb5abed..15b49edfe2c8 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1736,6 +1736,34 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap) return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT); } +static bool use_clearpage_stnp; + +static int __init early_use_clearpage_stnp(char *p) +{ + return strtobool(p, &use_clearpage_stnp); +} +early_param("mm.use_clearpage_stnp", early_use_clearpage_stnp); + +static bool has_mor_nontemporal(const struct arm64_cpu_capabilities *entry) +{ + /* + * List of CPUs which have memory ordering ruled non-temporal + * load and store. + */ + static const struct midr_range cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + {}, + }; + + return is_midr_in_range_list(read_cpuid_id(), cpus); +} + +static bool can_clearpage_use_stnp(const struct arm64_cpu_capabilities *entry, + int scope) +{ + return use_clearpage_stnp && has_mor_nontemporal(entry); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -2158,6 +2186,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_mte, }, #endif /* CONFIG_ARM64_MTE */ + { + .desc = "Clear Page by STNP", + .capability = ARM64_CLEARPAGE_STNP, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = can_clearpage_use_stnp, + }, {}, }; diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 073acbf02a7c..39ad3ee1860e 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -7,6 +7,25 @@ #include <linux/const.h> #include <asm/assembler.h> #include <asm/page.h> +#include <asm/alternative.h> + +/* + * Clear page @dest + * + * Parameters: + * x0 - dest + */ +SYM_FUNC_START(clear_page_stnp) + .align 6 +1: stnp xzr, xzr, [x0] + stnp xzr, xzr, [x0, #0x10] + stnp xzr, xzr, [x0, #0x20] + stnp xzr, xzr, [x0, #0x30] + add x0, x0, #0x40 + tst x0, #(PAGE_SIZE - 1) + b.ne 1b + ret +SYM_FUNC_END(clear_page_stnp) /* * Clear page @dest @@ -15,6 +34,8 @@ * x0 - dest */ SYM_FUNC_START(clear_page) + ALTERNATIVE("nop", "b clear_page_stnp", ARM64_CLEARPAGE_STNP) + mrs x1, dczid_el0 and w1, w1, #0xf mov x2, #4 -- GitLab