hotplug-cpu.c 10.5 KB
Newer Older
1 2 3
/*
 * pseries CPU Hotplug infrastructure.
 *
4 5
 * Split out from arch/powerpc/platforms/pseries/setup.c
 *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
6 7 8
 *
 * Peter Bergner, IBM	March 2001.
 * Copyright (C) 2001 IBM.
9 10 11
 * Dave Engebretsen, Peter Bergner, and
 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
 * Plus various changes from other IBM teams...
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
 *
 * Copyright (C) 2006 Michael Ellerman, IBM Corporation
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/delay.h>
#include <linux/cpu.h>
#include <asm/system.h>
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
#include <asm/pSeries_reconfig.h>
#include "xics.h"
32
#include "plpar_wrappers.h"
33
#include "offline_states.h"
34 35 36 37 38 39 40 41 42

/* This version can't take the spinlock, because it never returns */
static struct rtas_args rtas_stop_self_args = {
	.token = RTAS_UNKNOWN_SERVICE,
	.nargs = 0,
	.nret = 1,
	.rets = &rtas_stop_self_args.args[0],
};

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
							CPU_STATE_OFFLINE;
static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;

static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;

static int cede_offline_enabled __read_mostly = 1;

/*
 * Enable/disable cede_offline when available.
 */
static int __init setup_cede_offline(char *str)
{
	if (!strcmp(str, "off"))
		cede_offline_enabled = 0;
	else if (!strcmp(str, "on"))
		cede_offline_enabled = 1;
	else
		return 0;
	return 1;
}

__setup("cede_offline=", setup_cede_offline);

enum cpu_state_vals get_cpu_current_state(int cpu)
{
	return per_cpu(current_state, cpu);
}

void set_cpu_current_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(current_state, cpu) = state;
}

enum cpu_state_vals get_preferred_offline_state(int cpu)
{
	return per_cpu(preferred_offline_state, cpu);
}

void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(preferred_offline_state, cpu) = state;
}

void set_default_offline_state(int cpu)
{
	per_cpu(preferred_offline_state, cpu) = default_offline_state;
}

92
static void rtas_stop_self(void)
93 94 95 96 97 98 99 100 101 102 103 104 105 106
{
	struct rtas_args *args = &rtas_stop_self_args;

	local_irq_disable();

	BUG_ON(args->token == RTAS_UNKNOWN_SERVICE);

	printk("cpu %u (hwid %u) Ready to die...\n",
	       smp_processor_id(), hard_smp_processor_id());
	enter_rtas(__pa(args));

	panic("Alas, I survived.\n");
}

107
static void pseries_mach_cpu_die(void)
108
{
109 110 111 112
	unsigned int cpu = smp_processor_id();
	unsigned int hwcpu = hard_smp_processor_id();
	u8 cede_latency_hint = 0;

113 114
	local_irq_disable();
	idle_task_exit();
115
	xics_teardown_cpu();
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132

	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
		cede_latency_hint = 2;

		get_lppaca()->idle = 1;
		if (!get_lppaca()->shared_proc)
			get_lppaca()->donate_dedicated_cpu = 1;

		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
			extended_cede_processor(cede_latency_hint);
		}

		if (!get_lppaca()->shared_proc)
			get_lppaca()->donate_dedicated_cpu = 0;
		get_lppaca()->idle = 0;

133 134
		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
			unregister_slb_shadow(hwcpu, __pa(get_slb_shadow()));
135

136 137 138 139 140 141 142 143
			/*
			 * Call to start_secondary_resume() will not return.
			 * Kernel stack will be reset and start_secondary()
			 * will be called to continue the online operation.
			 */
			start_secondary_resume();
		}
	}
144

145 146
	/* Requested state is CPU_STATE_OFFLINE at this point */
	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
147

148 149 150
	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
	unregister_slb_shadow(hwcpu, __pa(get_slb_shadow()));
	rtas_stop_self();
151

152 153 154 155 156
	/* Should never get here... */
	BUG();
	for(;;);
}

157
static int pseries_cpu_disable(void)
158 159 160
{
	int cpu = smp_processor_id();

161
	set_cpu_online(cpu, false);
162 163 164 165 166 167 168 169 170 171 172
	vdso_data->processorCount--;

	/*fix boot_cpuid here*/
	if (cpu == boot_cpuid)
		boot_cpuid = any_online_cpu(cpu_online_map);

	/* FIXME: abstract this to not be platform specific later on */
	xics_migrate_irqs_away();
	return 0;
}

173 174 175 176 177 178 179 180 181 182 183 184
/*
 * pseries_cpu_die: Wait for the cpu to die.
 * @cpu: logical processor id of the CPU whose death we're awaiting.
 *
 * This function is called from the context of the thread which is performing
 * the cpu-offline. Here we wait for long enough to allow the cpu in question
 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
 * notifications.
 *
 * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
 * self-destruct.
 */
185
static void pseries_cpu_die(unsigned int cpu)
186 187
{
	int tries;
188
	int cpu_status = 1;
189 190
	unsigned int pcpu = get_hard_smp_processor_id(cpu);

191 192 193 194 195 196 197 198 199 200 201 202
	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		cpu_status = 1;
		for (tries = 0; tries < 1000; tries++) {
			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
				cpu_status = 0;
				break;
			}
			cpu_relax();
		}
	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {

		for (tries = 0; tries < 25; tries++) {
203 204 205
			cpu_status = smp_query_cpu_stopped(pcpu);
			if (cpu_status == QCSS_STOPPED ||
			    cpu_status == QCSS_HARDWARE_ERROR)
206 207 208
				break;
			cpu_relax();
		}
209
	}
210

211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
	if (cpu_status != 0) {
		printk("Querying DEAD? cpu %i (%i) shows %i\n",
		       cpu, pcpu, cpu_status);
	}

	/* Isolation and deallocation are definatly done by
	 * drslot_chrp_cpu.  If they were not they would be
	 * done here.  Change isolate state to Isolate and
	 * change allocation-state to Unusable.
	 */
	paca[cpu].cpu_start = 0;
}

/*
 * Update cpu_present_map and paca(s) for a new cpu node.  The wrinkle
 * here is that a cpu device node may represent up to two logical cpus
 * in the SMT case.  We must honor the assumption in other code that
 * the logical ids for sibling SMT threads x and y are adjacent, such
 * that x^1 == y and y^1 == x.
 */
231
static int pseries_add_processor(struct device_node *np)
232 233 234 235 236 237
{
	unsigned int cpu;
	cpumask_t candidate_map, tmp = CPU_MASK_NONE;
	int err = -ENOSPC, len, nthreads, i;
	const u32 *intserv;

238
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
239 240 241 242 243 244 245
	if (!intserv)
		return 0;

	nthreads = len / sizeof(u32);
	for (i = 0; i < nthreads; i++)
		cpu_set(i, tmp);

246
	cpu_maps_update_begin();
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277

	BUG_ON(!cpus_subset(cpu_present_map, cpu_possible_map));

	/* Get a bitmap of unoccupied slots. */
	cpus_xor(candidate_map, cpu_possible_map, cpu_present_map);
	if (cpus_empty(candidate_map)) {
		/* If we get here, it most likely means that NR_CPUS is
		 * less than the partition's max processors setting.
		 */
		printk(KERN_ERR "Cannot add cpu %s; this system configuration"
		       " supports %d logical cpus.\n", np->full_name,
		       cpus_weight(cpu_possible_map));
		goto out_unlock;
	}

	while (!cpus_empty(tmp))
		if (cpus_subset(tmp, candidate_map))
			/* Found a range where we can insert the new cpu(s) */
			break;
		else
			cpus_shift_left(tmp, tmp, nthreads);

	if (cpus_empty(tmp)) {
		printk(KERN_ERR "Unable to find space in cpu_present_map for"
		       " processor %s with %d thread(s)\n", np->name,
		       nthreads);
		goto out_unlock;
	}

	for_each_cpu_mask(cpu, tmp) {
		BUG_ON(cpu_isset(cpu, cpu_present_map));
278
		set_cpu_present(cpu, true);
279 280 281 282
		set_hard_smp_processor_id(cpu, *intserv++);
	}
	err = 0;
out_unlock:
283
	cpu_maps_update_done();
284 285 286 287 288 289 290 291
	return err;
}

/*
 * Update the present map for a cpu node which is going away, and set
 * the hard id in the paca(s) to -1 to be consistent with boot time
 * convention for non-present cpus.
 */
292
static void pseries_remove_processor(struct device_node *np)
293 294 295 296 297
{
	unsigned int cpu;
	int len, nthreads, i;
	const u32 *intserv;

298
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
299 300 301 302 303
	if (!intserv)
		return;

	nthreads = len / sizeof(u32);

304
	cpu_maps_update_begin();
305 306 307 308 309
	for (i = 0; i < nthreads; i++) {
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != intserv[i])
				continue;
			BUG_ON(cpu_online(cpu));
310
			set_cpu_present(cpu, false);
311 312 313 314 315 316 317
			set_hard_smp_processor_id(cpu, -1);
			break;
		}
		if (cpu == NR_CPUS)
			printk(KERN_WARNING "Could not find cpu to remove "
			       "with physical id 0x%x\n", intserv[i]);
	}
318
	cpu_maps_update_done();
319 320
}

321 322
static int pseries_smp_notifier(struct notifier_block *nb,
				unsigned long action, void *node)
323 324 325 326 327
{
	int err = NOTIFY_OK;

	switch (action) {
	case PSERIES_RECONFIG_ADD:
328
		if (pseries_add_processor(node))
329 330 331
			err = NOTIFY_BAD;
		break;
	case PSERIES_RECONFIG_REMOVE:
332
		pseries_remove_processor(node);
333 334 335 336 337 338 339 340
		break;
	default:
		err = NOTIFY_DONE;
		break;
	}
	return err;
}

341 342
static struct notifier_block pseries_smp_nb = {
	.notifier_call = pseries_smp_notifier,
343 344
};

345 346 347 348 349 350 351 352 353 354 355
#define MAX_CEDE_LATENCY_LEVELS		4
#define	CEDE_LATENCY_PARAM_LENGTH	10
#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
#define CEDE_LATENCY_TOKEN		45

static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];

static int parse_cede_parameters(void)
{
	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
356 357 358 359 360
	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
			 NULL,
			 CEDE_LATENCY_TOKEN,
			 __pa(cede_parameters),
			 CEDE_LATENCY_PARAM_MAX_LENGTH);
361 362
}

363 364
static int __init pseries_cpu_hotplug_init(void)
{
365 366
	struct device_node *np;
	const char *typep;
367
	int cpu;
368
	int qcss_tok;
369 370 371 372 373 374 375 376 377 378 379 380

	for_each_node_by_name(np, "interrupt-controller") {
		typep = of_get_property(np, "compatible", NULL);
		if (strstr(typep, "open-pic")) {
			of_node_put(np);

			printk(KERN_INFO "CPU Hotplug not supported on "
				"systems using MPIC\n");
			return 0;
		}
	}

381
	rtas_stop_self_args.token = rtas_token("stop-self");
382
	qcss_tok = rtas_token("query-cpu-stopped-state");
383

384 385 386 387 388 389
	if (rtas_stop_self_args.token == RTAS_UNKNOWN_SERVICE ||
			qcss_tok == RTAS_UNKNOWN_SERVICE) {
		printk(KERN_INFO "CPU Hotplug not supported by firmware "
				"- disabling.\n");
		return 0;
	}
390

391 392 393
	ppc_md.cpu_die = pseries_mach_cpu_die;
	smp_ops->cpu_disable = pseries_cpu_disable;
	smp_ops->cpu_die = pseries_cpu_die;
394 395

	/* Processors can be added/removed only on LPAR */
396
	if (firmware_has_feature(FW_FEATURE_LPAR)) {
397
		pSeries_reconfig_notifier_register(&pseries_smp_nb);
398 399 400 401 402 403 404 405
		cpu_maps_update_begin();
		if (cede_offline_enabled && parse_cede_parameters() == 0) {
			default_offline_state = CPU_STATE_INACTIVE;
			for_each_online_cpu(cpu)
				set_default_offline_state(cpu);
		}
		cpu_maps_update_done();
	}
406

407 408 409
	return 0;
}
arch_initcall(pseries_cpu_hotplug_init);