提交 48f6e7f6 编写于 作者: N Nathan Lynch 提交者: Michael Ellerman

powerpc/pseries: remove cede offline state for CPUs

This effectively reverts commit 3aa565f5 ("powerpc/pseries: Add
hooks to put the CPU into an appropriate offline state"), which added
an offline mode for CPUs which uses the H_CEDE hcall instead of the
architected stop-self RTAS function in order to facilitate "folding"
of dedicated mode processors on PowerVM platforms to achieve energy
savings. This has been the default offline mode since its
introduction.

There's nothing about stop-self that would prevent the hypervisor from
achieving the energy savings available via H_CEDE, so the original
premise of this change appears to be flawed.

I also have encountered the claim that the transition to and from
ceded state is much faster than stop-self/start-cpu. Certainly we
would not want to use stop-self as an *idle* mode. That is what H_CEDE
is for. However, this difference is insignificant in the context of
Linux CPU hotplug, where the latency of an offline or online operation
on current systems is on the order of 100ms, mainly attributable to
all the various subsystems' cpuhp callbacks.

The cede offline mode also prevents accurate accounting, as discussed
before:
https://lore.kernel.org/linuxppc-dev/1571740391-3251-1-git-send-email-ego@linux.vnet.ibm.com/

Unconditionally use stop-self to offline processor threads. This is
the architected method for offlining CPUs on PAPR systems.

The "cede_offline" boot parameter is rendered obsolete.

Removing this code enables the removal of the partition suspend code
which temporarily onlines all present CPUs.

Fixes: 3aa565f5 ("powerpc/pseries: Add hooks to put the CPU into an appropriate offline state")
Signed-off-by: NNathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: NGautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: NMichael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20200612051238.1007764-2-nathanl@linux.ibm.com
上级 4d24e21c
...@@ -50,13 +50,6 @@ Command Line Switches ...@@ -50,13 +50,6 @@ Command Line Switches
This option is limited to the X86 and S390 architecture. This option is limited to the X86 and S390 architecture.
``cede_offline={"off","on"}``
Use this option to disable/enable putting offlined processors to an extended
``H_CEDE`` state on supported pseries platforms. If nothing is specified,
``cede_offline`` is set to "on".
This option is limited to the PowerPC architecture.
``cpu0_hotplug`` ``cpu0_hotplug``
Allow to shutdown CPU0. Allow to shutdown CPU0.
......
...@@ -35,54 +35,10 @@ ...@@ -35,54 +35,10 @@
#include <asm/topology.h> #include <asm/topology.h>
#include "pseries.h" #include "pseries.h"
#include "offline_states.h"
/* This version can't take the spinlock, because it never returns */ /* This version can't take the spinlock, because it never returns */
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE; static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
CPU_STATE_OFFLINE;
static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
static bool cede_offline_enabled __read_mostly = true;
/*
* Enable/disable cede_offline when available.
*/
static int __init setup_cede_offline(char *str)
{
return (kstrtobool(str, &cede_offline_enabled) == 0);
}
__setup("cede_offline=", setup_cede_offline);
enum cpu_state_vals get_cpu_current_state(int cpu)
{
return per_cpu(current_state, cpu);
}
void set_cpu_current_state(int cpu, enum cpu_state_vals state)
{
per_cpu(current_state, cpu) = state;
}
enum cpu_state_vals get_preferred_offline_state(int cpu)
{
return per_cpu(preferred_offline_state, cpu);
}
void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
{
per_cpu(preferred_offline_state, cpu) = state;
}
void set_default_offline_state(int cpu)
{
per_cpu(preferred_offline_state, cpu) = default_offline_state;
}
static void rtas_stop_self(void) static void rtas_stop_self(void)
{ {
static struct rtas_args args; static struct rtas_args args;
...@@ -101,9 +57,7 @@ static void rtas_stop_self(void) ...@@ -101,9 +57,7 @@ static void rtas_stop_self(void)
static void pseries_mach_cpu_die(void) static void pseries_mach_cpu_die(void)
{ {
unsigned int cpu = smp_processor_id();
unsigned int hwcpu = hard_smp_processor_id(); unsigned int hwcpu = hard_smp_processor_id();
u8 cede_latency_hint = 0;
local_irq_disable(); local_irq_disable();
idle_task_exit(); idle_task_exit();
...@@ -112,49 +66,6 @@ static void pseries_mach_cpu_die(void) ...@@ -112,49 +66,6 @@ static void pseries_mach_cpu_die(void)
else else
xics_teardown_cpu(); xics_teardown_cpu();
if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
if (ppc_md.suspend_disable_cpu)
ppc_md.suspend_disable_cpu();
cede_latency_hint = 2;
get_lppaca()->idle = 1;
if (!lppaca_shared_proc(get_lppaca()))
get_lppaca()->donate_dedicated_cpu = 1;
while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
while (!prep_irq_for_idle()) {
local_irq_enable();
local_irq_disable();
}
extended_cede_processor(cede_latency_hint);
}
local_irq_disable();
if (!lppaca_shared_proc(get_lppaca()))
get_lppaca()->donate_dedicated_cpu = 0;
get_lppaca()->idle = 0;
if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
unregister_slb_shadow(hwcpu);
hard_irq_disable();
/*
* Call to start_secondary_resume() will not return.
* Kernel stack will be reset and start_secondary()
* will be called to continue the online operation.
*/
start_secondary_resume();
}
}
/* Requested state is CPU_STATE_OFFLINE at this point */
WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
unregister_slb_shadow(hwcpu); unregister_slb_shadow(hwcpu);
rtas_stop_self(); rtas_stop_self();
...@@ -200,24 +111,13 @@ static void pseries_cpu_die(unsigned int cpu) ...@@ -200,24 +111,13 @@ static void pseries_cpu_die(unsigned int cpu)
int cpu_status = 1; int cpu_status = 1;
unsigned int pcpu = get_hard_smp_processor_id(cpu); unsigned int pcpu = get_hard_smp_processor_id(cpu);
if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { for (tries = 0; tries < 25; tries++) {
cpu_status = 1; cpu_status = smp_query_cpu_stopped(pcpu);
for (tries = 0; tries < 5000; tries++) { if (cpu_status == QCSS_STOPPED ||
if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { cpu_status == QCSS_HARDWARE_ERROR)
cpu_status = 0; break;
break; cpu_relax();
}
msleep(1);
}
} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
for (tries = 0; tries < 25; tries++) {
cpu_status = smp_query_cpu_stopped(pcpu);
if (cpu_status == QCSS_STOPPED ||
cpu_status == QCSS_HARDWARE_ERROR)
break;
cpu_relax();
}
} }
if (cpu_status != 0) { if (cpu_status != 0) {
...@@ -359,28 +259,15 @@ static int dlpar_offline_cpu(struct device_node *dn) ...@@ -359,28 +259,15 @@ static int dlpar_offline_cpu(struct device_node *dn)
if (get_hard_smp_processor_id(cpu) != thread) if (get_hard_smp_processor_id(cpu) != thread)
continue; continue;
if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) if (!cpu_online(cpu))
break; break;
if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { cpu_maps_update_done();
set_preferred_offline_state(cpu, timed_topology_update(1);
CPU_STATE_OFFLINE); rc = device_offline(get_cpu_device(cpu));
cpu_maps_update_done(); if (rc)
timed_topology_update(1); goto out;
rc = device_offline(get_cpu_device(cpu)); cpu_maps_update_begin();
if (rc)
goto out;
cpu_maps_update_begin();
break;
}
/*
* The cpu is in CPU_STATE_INACTIVE.
* Upgrade it's state to CPU_STATE_OFFLINE.
*/
set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS);
__cpu_die(cpu);
break; break;
} }
if (cpu == num_possible_cpus()) { if (cpu == num_possible_cpus()) {
...@@ -414,8 +301,6 @@ static int dlpar_online_cpu(struct device_node *dn) ...@@ -414,8 +301,6 @@ static int dlpar_online_cpu(struct device_node *dn)
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
if (get_hard_smp_processor_id(cpu) != thread) if (get_hard_smp_processor_id(cpu) != thread)
continue; continue;
BUG_ON(get_cpu_current_state(cpu)
!= CPU_STATE_OFFLINE);
cpu_maps_update_done(); cpu_maps_update_done();
timed_topology_update(1); timed_topology_update(1);
find_and_online_cpu_nid(cpu); find_and_online_cpu_nid(cpu);
...@@ -1013,27 +898,8 @@ static struct notifier_block pseries_smp_nb = { ...@@ -1013,27 +898,8 @@ static struct notifier_block pseries_smp_nb = {
.notifier_call = pseries_smp_notifier, .notifier_call = pseries_smp_notifier,
}; };
#define MAX_CEDE_LATENCY_LEVELS 4
#define CEDE_LATENCY_PARAM_LENGTH 10
#define CEDE_LATENCY_PARAM_MAX_LENGTH \
(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
#define CEDE_LATENCY_TOKEN 45
static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
static int parse_cede_parameters(void)
{
memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
NULL,
CEDE_LATENCY_TOKEN,
__pa(cede_parameters),
CEDE_LATENCY_PARAM_MAX_LENGTH);
}
static int __init pseries_cpu_hotplug_init(void) static int __init pseries_cpu_hotplug_init(void)
{ {
int cpu;
int qcss_tok; int qcss_tok;
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
...@@ -1056,16 +922,8 @@ static int __init pseries_cpu_hotplug_init(void) ...@@ -1056,16 +922,8 @@ static int __init pseries_cpu_hotplug_init(void)
smp_ops->cpu_die = pseries_cpu_die; smp_ops->cpu_die = pseries_cpu_die;
/* Processors can be added/removed only on LPAR */ /* Processors can be added/removed only on LPAR */
if (firmware_has_feature(FW_FEATURE_LPAR)) { if (firmware_has_feature(FW_FEATURE_LPAR))
of_reconfig_notifier_register(&pseries_smp_nb); of_reconfig_notifier_register(&pseries_smp_nb);
cpu_maps_update_begin();
if (cede_offline_enabled && parse_cede_parameters() == 0) {
default_offline_state = CPU_STATE_INACTIVE;
for_each_online_cpu(cpu)
set_default_offline_state(cpu);
}
cpu_maps_update_done();
}
return 0; return 0;
} }
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _OFFLINE_STATES_H_
#define _OFFLINE_STATES_H_
/* Cpu offline states go here */
enum cpu_state_vals {
CPU_STATE_OFFLINE,
CPU_STATE_INACTIVE,
CPU_STATE_ONLINE,
CPU_MAX_OFFLINE_STATES
};
#ifdef CONFIG_HOTPLUG_CPU
extern enum cpu_state_vals get_cpu_current_state(int cpu);
extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
extern void set_default_offline_state(int cpu);
#else
static inline enum cpu_state_vals get_cpu_current_state(int cpu)
{
return CPU_STATE_ONLINE;
}
static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state)
{
}
static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
{
}
static inline void set_default_offline_state(int cpu)
{
}
#endif
extern enum cpu_state_vals get_preferred_offline_state(int cpu);
#endif
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
#include <asm/topology.h> #include <asm/topology.h>
#include "pseries.h" #include "pseries.h"
#include "offline_states.h"
static struct device_node *pmem_node; static struct device_node *pmem_node;
......
...@@ -44,8 +44,6 @@ ...@@ -44,8 +44,6 @@
#include <asm/svm.h> #include <asm/svm.h>
#include "pseries.h" #include "pseries.h"
#include "offline_states.h"
/* /*
* The Primary thread of each non-boot processor was started from the OF client * The Primary thread of each non-boot processor was started from the OF client
...@@ -108,10 +106,7 @@ static inline int smp_startup_cpu(unsigned int lcpu) ...@@ -108,10 +106,7 @@ static inline int smp_startup_cpu(unsigned int lcpu)
/* Fixup atomic count: it exited inside IRQ handler. */ /* Fixup atomic count: it exited inside IRQ handler. */
task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0; task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count = 0;
#ifdef CONFIG_HOTPLUG_CPU
if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
goto out;
#endif
/* /*
* If the RTAS start-cpu token does not exist then presume the * If the RTAS start-cpu token does not exist then presume the
* cpu is already spinning. * cpu is already spinning.
...@@ -126,9 +121,6 @@ static inline int smp_startup_cpu(unsigned int lcpu) ...@@ -126,9 +121,6 @@ static inline int smp_startup_cpu(unsigned int lcpu)
return 0; return 0;
} }
#ifdef CONFIG_HOTPLUG_CPU
out:
#endif
return 1; return 1;
} }
...@@ -143,10 +135,6 @@ static void smp_setup_cpu(int cpu) ...@@ -143,10 +135,6 @@ static void smp_setup_cpu(int cpu)
vpa_init(cpu); vpa_init(cpu);
cpumask_clear_cpu(cpu, of_spin_mask); cpumask_clear_cpu(cpu, of_spin_mask);
#ifdef CONFIG_HOTPLUG_CPU
set_cpu_current_state(cpu, CPU_STATE_ONLINE);
set_default_offline_state(cpu);
#endif
} }
static int smp_pSeries_kick_cpu(int nr) static int smp_pSeries_kick_cpu(int nr)
...@@ -163,20 +151,6 @@ static int smp_pSeries_kick_cpu(int nr) ...@@ -163,20 +151,6 @@ static int smp_pSeries_kick_cpu(int nr)
* the processor will continue on to secondary_start * the processor will continue on to secondary_start
*/ */
paca_ptrs[nr]->cpu_start = 1; paca_ptrs[nr]->cpu_start = 1;
#ifdef CONFIG_HOTPLUG_CPU
set_preferred_offline_state(nr, CPU_STATE_ONLINE);
if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
long rc;
unsigned long hcpuid;
hcpuid = get_hard_smp_processor_id(nr);
rc = plpar_hcall_norets(H_PROD, hcpuid);
if (rc != H_SUCCESS)
printk(KERN_ERR "Error: Prod to wake up processor %d "
"Ret= %ld\n", nr, rc);
}
#endif
return 0; return 0;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册