提交 c6fa39d7 编写于 作者: S Sang Yan 提交者: Yang Yingliang

arm64: smp: Add support for cpu park

hulk inclusion
category: feature
bugzilla: 48159
CVE: N/A

------------------------------

Introducing a feature of CPU PARK in order to save time
of cpus down and up during kexec, which may cost 250ms of
per cpu's down and 30ms of up.

As a result, for 128 cores, it costs more than 30 seconds
to down and up cpus during kexec. Think about 256 cores and more.

CPU PARK is a state that cpu power-on and staying in spin loop, polling
for exit chances, such as writing exit address.

Reserving a block of memory, to fill with cpu park text section,
exit address and park-magic-flag of each cpu. In implementation,
reserved one page for one cpu core.

Cpus going to park state instead of down in machine_shutdown().
Cpus going out of park state in smp_init instead of brought up.

One of cpu park sections in pre-reserved memory blocks,:
+--------------+
+ exit address +
+--------------+
+ park magic   +
+--------------+
+ park codes   +
+      .       +
+      .       +
+      .       +
+--------------+
Signed-off-by: NSang Yan <sangyan@huawei.com>
Reviewed-by: NWang Xiongfeng <wangxiongfeng2@huawei.com>
Acked-by: NHanjun Guo <guohanjun@huawei.com>
Reviewed-by: NXie XiuQi <xiexiuqi@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 f5da1c16
...@@ -1014,6 +1014,18 @@ config CRASH_DUMP ...@@ -1014,6 +1014,18 @@ config CRASH_DUMP
For more details see Documentation/kdump/kdump.txt For more details see Documentation/kdump/kdump.txt
config ARM64_CPU_PARK
bool "Support CPU PARK on kexec"
depends on SMP
depends on KEXEC_CORE
help
This enables support for CPU PARK feature in
order to save time of cpu down to up.
CPU park is a state through kexec, spin loop
instead of cpu die before jumping to new kernel,
jumping out from loop to new kernel entry in
smp_init.
config XEN_DOM0 config XEN_DOM0
def_bool y def_bool y
depends on XEN depends on XEN
......
...@@ -31,6 +31,11 @@ ...@@ -31,6 +31,11 @@
/* 2M alignment for crash kernel regions */ /* 2M alignment for crash kernel regions */
#define CRASH_ALIGN SZ_2M #define CRASH_ALIGN SZ_2M
#ifdef CONFIG_ARM64_CPU_PARK
/* CPU park state flag: "park" */
#define PARK_MAGIC 0x7061726b
#endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
/** /**
...@@ -96,6 +101,8 @@ static inline void crash_prepare_suspend(void) {} ...@@ -96,6 +101,8 @@ static inline void crash_prepare_suspend(void) {}
static inline void crash_post_resume(void) {} static inline void crash_post_resume(void) {}
#endif #endif
void machine_kexec_mask_interrupts(void);
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif #endif
...@@ -153,6 +153,22 @@ extern bool smp_crash_stop_failed(void); ...@@ -153,6 +153,22 @@ extern bool smp_crash_stop_failed(void);
void ipi_set_nmi_prio(void __iomem *base, u8 prio); void ipi_set_nmi_prio(void __iomem *base, u8 prio);
#ifdef CONFIG_ARM64_CPU_PARK
#define PARK_SECTION_SIZE 1024
struct cpu_park_info {
/* Physical address of reserved park memory. */
unsigned long start;
/* park reserve mem len should be PARK_SECTION_SIZE * NR_CPUS */
unsigned long len;
/* Virtual address of reserved park memory. */
unsigned long start_v;
};
extern struct cpu_park_info park_info;
extern void enter_cpu_park(unsigned long text, unsigned long exit);
extern void do_cpu_park(unsigned long exit);
extern int kexec_smp_send_park(void);
#endif
#endif /* ifndef __ASSEMBLY__ */ #endif /* ifndef __ASSEMBLY__ */
#endif /* ifndef __ASM_SMP_H */ #endif /* ifndef __ASM_SMP_H */
...@@ -54,6 +54,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o ...@@ -54,6 +54,7 @@ arm64-obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o arm64-obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \ arm64-obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o \
cpu-reset.o cpu-reset.o
arm64-obj-$(CONFIG_ARM64_CPU_PARK) += cpu-park.o
arm64-obj-$(CONFIG_ASCEND_BOOT_CRASH_KERNEL) += kexec_mailbox.o arm64-obj-$(CONFIG_ASCEND_BOOT_CRASH_KERNEL) += kexec_mailbox.o
arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o
arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o
......
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* CPU park routines
*
* Copyright (C) 2020 Huawei Technologies., Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/kexec.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
.text
.pushsection .idmap.text, "awx"
/* cpu park helper in idmap section */
ENTRY(enter_cpu_park)
/* Clear sctlr_el1 flags. */
mrs x12, sctlr_el1
mov_q x13, SCTLR_ELx_FLAGS
bic x12, x12, x13
pre_disable_mmu_workaround
msr sctlr_el1, x12 /* disable mmu */
isb
mov x18, x0
mov x0, x1 /* secondary_entry addr */
br x18 /* call do_cpu_park of each cpu */
ENDPROC(enter_cpu_park)
.popsection
ENTRY(do_cpu_park)
ldr x18, =PARK_MAGIC /* magic number "park" */
add x1, x0, #8
str x18, [x1] /* set on-park flag */
dc civac, x1 /* flush cache of "park" */
dsb nsh
isb
.Lloop:
wfe
isb
ldr x19, [x0]
cmp x19, #0 /* test secondary_entry */
b.eq .Lloop
ic iallu /* invalidate the local I-cache */
dsb nsh
isb
br x19 /* jump to secondary_entry */
ENDPROC(do_cpu_park)
...@@ -221,7 +221,7 @@ void machine_kexec(struct kimage *kimage) ...@@ -221,7 +221,7 @@ void machine_kexec(struct kimage *kimage)
BUG(); /* Should never get here. */ BUG(); /* Should never get here. */
} }
static void machine_kexec_mask_interrupts(void) void machine_kexec_mask_interrupts(void)
{ {
unsigned int i; unsigned int i;
struct irq_desc *desc; struct irq_desc *desc;
......
...@@ -157,6 +157,10 @@ void arch_cpu_idle_dead(void) ...@@ -157,6 +157,10 @@ void arch_cpu_idle_dead(void)
*/ */
void machine_shutdown(void) void machine_shutdown(void)
{ {
#ifdef CONFIG_ARM64_CPU_PARK
if (kexec_smp_send_park() == 0)
return;
#endif
disable_nonboot_cpus(); disable_nonboot_cpus();
} }
......
...@@ -98,6 +98,167 @@ static inline int op_cpu_kill(unsigned int cpu) ...@@ -98,6 +98,167 @@ static inline int op_cpu_kill(unsigned int cpu)
} }
#endif #endif
#ifdef CONFIG_ARM64_CPU_PARK
struct cpu_park_section {
unsigned long exit; /* exit address of park look */
unsigned long magic; /* maigc represent park state */
char text[0]; /* text section of park */
};
static int mmap_cpu_park_mem(void)
{
if (!park_info.start)
return -ENOMEM;
if (park_info.start_v)
return 0;
park_info.start_v = (unsigned long)__ioremap(park_info.start,
park_info.len,
PAGE_KERNEL_EXEC);
if (!park_info.start_v) {
pr_warn("map park memory failed.");
return -ENOMEM;
}
return 0;
}
static inline unsigned long cpu_park_section_v(unsigned int cpu)
{
return park_info.start_v + PARK_SECTION_SIZE * (cpu - 1);
}
static inline unsigned long cpu_park_section_p(unsigned int cpu)
{
return park_info.start + PARK_SECTION_SIZE * (cpu - 1);
}
/*
* Write the secondary_entry to exit section of park state.
* Then the secondary cpu will jump straight into the kernel
* by the secondary_entry.
*/
static int write_park_exit(unsigned int cpu)
{
struct cpu_park_section *park_section;
unsigned long *park_exit;
unsigned long *park_text;
if (mmap_cpu_park_mem() != 0)
return -EPERM;
park_section = (struct cpu_park_section *)cpu_park_section_v(cpu);
park_exit = &park_section->exit;
park_text = (unsigned long *)park_section->text;
pr_debug("park_text 0x%lx : 0x%lx, do_cpu_park text 0x%lx : 0x%lx",
(unsigned long)park_text, *park_text,
(unsigned long)do_cpu_park,
*(unsigned long *)do_cpu_park);
/*
* Test first 8 bytes to determine
* whether needs to write cpu park exit.
*/
if (*park_text == *(unsigned long *)do_cpu_park) {
writeq_relaxed(__pa_symbol(secondary_entry), park_exit);
__flush_dcache_area((__force void *)park_exit,
sizeof(unsigned long));
flush_icache_range((unsigned long)park_exit,
(unsigned long)(park_exit + 1));
sev();
dsb(sy);
isb();
pr_debug("Write cpu %u secondary entry 0x%lx to 0x%lx.",
cpu, *park_exit, (unsigned long)park_exit);
pr_info("Boot cpu %u from PARK state.", cpu);
return 0;
}
return -EPERM;
}
/* Install cpu park sections for the specific cpu. */
static int install_cpu_park(unsigned int cpu)
{
struct cpu_park_section *park_section;
unsigned long *park_exit;
unsigned long *park_magic;
unsigned long park_text_len;
park_section = (struct cpu_park_section *)cpu_park_section_v(cpu);
pr_debug("Install cpu park on cpu %u park exit 0x%lx park text 0x%lx",
cpu, (unsigned long)park_section,
(unsigned long)(park_section->text));
park_exit = &park_section->exit;
park_magic = &park_section->magic;
park_text_len = PARK_SECTION_SIZE - sizeof(struct cpu_park_section);
*park_exit = 0UL;
*park_magic = 0UL;
memcpy((void *)park_section->text, do_cpu_park, park_text_len);
__flush_dcache_area((void *)park_section, PARK_SECTION_SIZE);
return 0;
}
static int uninstall_cpu_park(unsigned int cpu)
{
unsigned long park_section;
if (mmap_cpu_park_mem() != 0)
return -EPERM;
park_section = cpu_park_section_v(cpu);
memset((void *)park_section, 0, PARK_SECTION_SIZE);
__flush_dcache_area((void *)park_section, PARK_SECTION_SIZE);
return 0;
}
static int cpu_wait_park(unsigned int cpu)
{
long timeout;
struct cpu_park_section *park_section;
volatile unsigned long *park_magic;
park_section = (struct cpu_park_section *)cpu_park_section_v(cpu);
park_magic = &park_section->magic;
timeout = USEC_PER_SEC;
while (*park_magic != PARK_MAGIC && timeout--)
udelay(1);
if (timeout > 0)
pr_debug("cpu %u park done.", cpu);
else
pr_err("cpu %u park failed.", cpu);
return *park_magic == PARK_MAGIC;
}
static void cpu_park(unsigned int cpu)
{
unsigned long park_section_p;
unsigned long park_exit_phy;
unsigned long do_park;
typeof(enter_cpu_park) *park;
park_section_p = cpu_park_section_p(cpu);
park_exit_phy = park_section_p;
pr_debug("Go to park cpu %u exit address 0x%lx", cpu, park_exit_phy);
do_park = park_section_p + sizeof(struct cpu_park_section);
park = (void *)__pa_symbol(enter_cpu_park);
cpu_install_idmap();
park(do_park, park_exit_phy);
unreachable();
}
#endif
/* /*
* Boot a secondary CPU, and assign it the specified idle task. * Boot a secondary CPU, and assign it the specified idle task.
...@@ -105,6 +266,10 @@ static inline int op_cpu_kill(unsigned int cpu) ...@@ -105,6 +266,10 @@ static inline int op_cpu_kill(unsigned int cpu)
*/ */
static int boot_secondary(unsigned int cpu, struct task_struct *idle) static int boot_secondary(unsigned int cpu, struct task_struct *idle)
{ {
#ifdef CONFIG_ARM64_CPU_PARK
if (write_park_exit(cpu) == 0)
return 0;
#endif
if (cpu_ops[cpu]->cpu_boot) if (cpu_ops[cpu]->cpu_boot)
return cpu_ops[cpu]->cpu_boot(cpu); return cpu_ops[cpu]->cpu_boot(cpu);
...@@ -153,6 +318,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) ...@@ -153,6 +318,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
return ret; return ret;
} }
#ifdef CONFIG_ARM64_CPU_PARK
uninstall_cpu_park(cpu);
#endif
secondary_data.task = NULL; secondary_data.task = NULL;
secondary_data.stack = NULL; secondary_data.stack = NULL;
status = READ_ONCE(secondary_data.status); status = READ_ONCE(secondary_data.status);
...@@ -863,6 +1032,20 @@ static void ipi_cpu_stop(unsigned int cpu) ...@@ -863,6 +1032,20 @@ static void ipi_cpu_stop(unsigned int cpu)
local_daif_mask(); local_daif_mask();
sdei_mask_local_cpu(); sdei_mask_local_cpu();
#ifdef CONFIG_ARM64_CPU_PARK
/*
* Go to cpu park state.
* Otherwise go to cpu die.
*/
if (kexec_in_progress && park_info.start_v) {
machine_kexec_mask_interrupts();
cpu_park(cpu);
if (cpu_ops[cpu]->cpu_die)
cpu_ops[cpu]->cpu_die(cpu);
}
#endif
while (1) while (1)
cpu_relax(); cpu_relax();
} }
...@@ -1037,6 +1220,45 @@ void smp_send_stop(void) ...@@ -1037,6 +1220,45 @@ void smp_send_stop(void)
sdei_mask_local_cpu(); sdei_mask_local_cpu();
} }
#ifdef CONFIG_ARM64_CPU_PARK
int kexec_smp_send_park(void)
{
unsigned long cpu;
if (WARN_ON(!kexec_in_progress)) {
pr_crit("%s called not in kexec progress.", __func__);
return -EPERM;
}
if (mmap_cpu_park_mem() != 0) {
pr_info("no cpuparkmem, goto normal way.");
return -EPERM;
}
local_irq_disable();
if (num_online_cpus() > 1) {
cpumask_t mask;
cpumask_copy(&mask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &mask);
for_each_cpu(cpu, &mask)
install_cpu_park(cpu);
smp_cross_call(&mask, IPI_CPU_STOP);
/* Wait for other CPUs to park */
for_each_cpu(cpu, &mask)
cpu_wait_park(cpu);
pr_info("smp park other cpus done\n");
}
sdei_mask_local_cpu();
return 0;
}
#endif
#ifdef CONFIG_KEXEC_CORE #ifdef CONFIG_KEXEC_CORE
void crash_smp_send_stop(void) void crash_smp_send_stop(void)
{ {
......
...@@ -515,6 +515,57 @@ static int __init parse_memmap_opt(char *str) ...@@ -515,6 +515,57 @@ static int __init parse_memmap_opt(char *str)
} }
early_param("memmap", parse_memmap_opt); early_param("memmap", parse_memmap_opt);
#ifdef CONFIG_ARM64_CPU_PARK
struct cpu_park_info park_info = {
.start = 0,
.len = PARK_SECTION_SIZE * NR_CPUS,
.start_v = 0,
};
static int __init parse_park_mem(char *p)
{
if (!p)
return 0;
park_info.start = PAGE_ALIGN(memparse(p, NULL));
if (park_info.start == 0)
pr_info("cpu park mem params[%s]", p);
return 0;
}
early_param("cpuparkmem", parse_park_mem);
static int __init reserve_park_mem(void)
{
if (park_info.start == 0 || park_info.len == 0)
return 0;
park_info.start = PAGE_ALIGN(park_info.start);
park_info.len = PAGE_ALIGN(park_info.len);
if (!memblock_is_region_memory(park_info.start, park_info.len)) {
pr_warn("cannot reserve park mem: region is not memory!");
goto out;
}
if (memblock_is_region_reserved(park_info.start, park_info.len)) {
pr_warn("cannot reserve park mem: region overlaps reserved memory!");
goto out;
}
memblock_remove(park_info.start, park_info.len);
pr_info("cpu park mem reserved: 0x%016lx - 0x%016lx (%ld MB)",
park_info.start, park_info.start + park_info.len,
park_info.len >> 20);
return 0;
out:
park_info.start = 0;
park_info.len = 0;
return -EINVAL;
}
#endif
void __init arm64_memblock_init(void) void __init arm64_memblock_init(void)
{ {
const s64 linear_region_size = -(s64)PAGE_OFFSET; const s64 linear_region_size = -(s64)PAGE_OFFSET;
...@@ -633,6 +684,19 @@ void __init arm64_memblock_init(void) ...@@ -633,6 +684,19 @@ void __init arm64_memblock_init(void)
reserve_pin_memory_res(); reserve_pin_memory_res();
/*
* Reserve park memory before crashkernel and quick kexec.
* Because park memory must be specified by address, but
* crashkernel and quickkexec may be specified by memory length,
* then find one sutiable memory region to reserve.
*
* So reserve park memory firstly is better, but it may cause
* crashkernel or quickkexec reserving failed.
*/
#ifdef CONFIG_ARM64_CPU_PARK
reserve_park_mem();
#endif
reserve_crashkernel(); reserve_crashkernel();
reserve_elfcorehdr(); reserve_elfcorehdr();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册