提交 aa316fa1 编写于 作者: W Wei Li 提交者: Zheng Zengkai

arm64: clear_page: Add new implementation of clear_page() by STNP

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I3ZN72
CVE: NA

---------------------------

Currently, clear_page() clear the page through 'dc zva', while the page may
not be used immediately mostly, so the cache flush is in vain.

Add an optimized implementation of clear_page() by 'stnp' for performance
promotion. It can be switched by the boot cmdline 'mm.use_clearpage_stnp'.

In the hugetlb clear test, we gained about 53.7% performance improvement:

Set mm.use_clearpage_stnp = 0          |  Set mm.use_clearpage_stnp = 1
[root@localhost liwei]# ./a.out 50 20  |  [root@localhost liwei]# ./a.out 50 20
size is 50 Gib, test times is 20       |  size is 50 Gib, test times is 20
test_time[0] : use 8.438046 sec        |  test_time[0] : use 3.722682 sec
test_time[1] : use 8.028493 sec        |  test_time[1] : use 3.640274 sec
test_time[2] : use 8.646547 sec        |  test_time[2] : use 4.095052 sec
test_time[3] : use 8.122490 sec        |  test_time[3] : use 3.998446 sec
test_time[4] : use 8.053038 sec        |  test_time[4] : use 4.084259 sec
test_time[5] : use 8.843512 sec        |  test_time[5] : use 3.933871 sec
test_time[6] : use 8.308906 sec        |  test_time[6] : use 3.934334 sec
test_time[7] : use 8.093817 sec        |  test_time[7] : use 3.869142 sec
test_time[8] : use 8.303504 sec        |  test_time[8] : use 3.902916 sec
test_time[9] : use 8.178336 sec        |  test_time[9] : use 3.541885 sec
test_time[10] : use 8.003625 sec       |  test_time[10] : use 3.595554 sec
test_time[11] : use 8.163807 sec       |  test_time[11] : use 3.583813 sec
test_time[12] : use 8.267464 sec       |  test_time[12] : use 3.863033 sec
test_time[13] : use 8.055326 sec       |  test_time[13] : use 3.770953 sec
test_time[14] : use 8.246986 sec       |  test_time[14] : use 3.808006 sec
test_time[15] : use 8.546992 sec       |  test_time[15] : use 3.653194 sec
test_time[16] : use 8.727256 sec       |  test_time[16] : use 3.722395 sec
test_time[17] : use 8.288951 sec       |  test_time[17] : use 3.683508 sec
test_time[18] : use 8.019322 sec       |  test_time[18] : use 4.253087 sec
test_time[19] : use 8.250685 sec       |  test_time[19] : use 4.082845 sec
hugetlb test end!                      |  hugetlb test end!
Signed-off-by: NWei Li <liwei391@huawei.com>
Reviewed-by: NHanjun Guo <guohanjun@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 585de8f5
......@@ -68,7 +68,8 @@
#define ARM64_WORKAROUND_1508412 58
#define ARM64_HAS_MPAM 59
#define ARM64_WORKAROUND_HISI_HIP08_RU_PREFETCH 60
#define ARM64_CLEARPAGE_STNP 61
#define ARM64_NCAPS 61
#define ARM64_NCAPS 62
#endif /* __ASM_CPUCAPS_H */
......@@ -1736,6 +1736,34 @@ cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap)
return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT);
}
static bool use_clearpage_stnp;
static int __init early_use_clearpage_stnp(char *p)
{
return strtobool(p, &use_clearpage_stnp);
}
early_param("mm.use_clearpage_stnp", early_use_clearpage_stnp);
static bool has_mor_nontemporal(const struct arm64_cpu_capabilities *entry)
{
/*
* List of CPUs which have memory ordering ruled non-temporal
* load and store.
*/
static const struct midr_range cpus[] = {
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
{},
};
return is_midr_in_range_list(read_cpuid_id(), cpus);
}
static bool can_clearpage_use_stnp(const struct arm64_cpu_capabilities *entry,
int scope)
{
return use_clearpage_stnp && has_mor_nontemporal(entry);
}
static const struct arm64_cpu_capabilities arm64_features[] = {
{
.desc = "GIC system register CPU interface",
......@@ -2158,6 +2186,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_mte,
},
#endif /* CONFIG_ARM64_MTE */
{
.desc = "Clear Page by STNP",
.capability = ARM64_CLEARPAGE_STNP,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = can_clearpage_use_stnp,
},
{},
};
......
......@@ -7,6 +7,25 @@
#include <linux/const.h>
#include <asm/assembler.h>
#include <asm/page.h>
#include <asm/alternative.h>
/*
* Clear page @dest
*
* Parameters:
* x0 - dest
*/
SYM_FUNC_START(clear_page_stnp)
.align 6
1: stnp xzr, xzr, [x0]
stnp xzr, xzr, [x0, #0x10]
stnp xzr, xzr, [x0, #0x20]
stnp xzr, xzr, [x0, #0x30]
add x0, x0, #0x40
tst x0, #(PAGE_SIZE - 1)
b.ne 1b
ret
SYM_FUNC_END(clear_page_stnp)
/*
* Clear page @dest
......@@ -15,6 +34,8 @@
* x0 - dest
*/
SYM_FUNC_START(clear_page)
ALTERNATIVE("nop", "b clear_page_stnp", ARM64_CLEARPAGE_STNP)
mrs x1, dczid_el0
and w1, w1, #0xf
mov x2, #4
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册