hotplug-cpu.c 22.0 KB
Newer Older
1 2 3
/*
 * pseries CPU Hotplug infrastructure.
 *
4 5
 * Split out from arch/powerpc/platforms/pseries/setup.c
 *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
6 7 8
 *
 * Peter Bergner, IBM	March 2001.
 * Copyright (C) 2001 IBM.
9 10 11
 * Dave Engebretsen, Peter Bergner, and
 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
 * Plus various changes from other IBM teams...
12 13 14 15 16 17 18 19 20
 *
 * Copyright (C) 2006 Michael Ellerman, IBM Corporation
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

21 22
#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt

23
#include <linux/kernel.h>
24
#include <linux/interrupt.h>
25
#include <linux/delay.h>
26
#include <linux/sched.h>	/* for idle_task_exit */
27
#include <linux/sched/hotplug.h>
28
#include <linux/cpu.h>
29
#include <linux/of.h>
30
#include <linux/slab.h>
31 32 33 34 35
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
36
#include <asm/xics.h>
37
#include <asm/xive.h>
38
#include <asm/plpar_wrappers.h>
39
#include <asm/topology.h>
40

41
#include "pseries.h"
42
#include "offline_states.h"
43 44

/* This version can't take the spinlock, because it never returns */
45
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
46

47 48 49 50 51 52
static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
							CPU_STATE_OFFLINE;
static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;

static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;

53
static bool cede_offline_enabled __read_mostly = true;
54 55 56 57 58 59

/*
 * Enable/disable cede_offline when available.
 */
static int __init setup_cede_offline(char *str)
{
60
	return (kstrtobool(str, &cede_offline_enabled) == 0);
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
}

__setup("cede_offline=", setup_cede_offline);

enum cpu_state_vals get_cpu_current_state(int cpu)
{
	return per_cpu(current_state, cpu);
}

void set_cpu_current_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(current_state, cpu) = state;
}

enum cpu_state_vals get_preferred_offline_state(int cpu)
{
	return per_cpu(preferred_offline_state, cpu);
}

void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(preferred_offline_state, cpu) = state;
}

void set_default_offline_state(int cpu)
{
	per_cpu(preferred_offline_state, cpu) = default_offline_state;
}

90
static void rtas_stop_self(void)
91
{
92
	static struct rtas_args args;
L
Li Zhong 已提交
93

94 95
	local_irq_disable();

96
	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
97 98 99

	printk("cpu %u (hwid %u) Ready to die...\n",
	       smp_processor_id(), hard_smp_processor_id());
100 101

	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
102 103 104 105

	panic("Alas, I survived.\n");
}

106
static void pseries_mach_cpu_die(void)
107
{
108 109 110 111
	unsigned int cpu = smp_processor_id();
	unsigned int hwcpu = hard_smp_processor_id();
	u8 cede_latency_hint = 0;

112 113
	local_irq_disable();
	idle_task_exit();
114 115 116 117
	if (xive_enabled())
		xive_teardown_cpu();
	else
		xics_teardown_cpu();
118 119 120

	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
121 122 123
		if (ppc_md.suspend_disable_cpu)
			ppc_md.suspend_disable_cpu();

124 125 126
		cede_latency_hint = 2;

		get_lppaca()->idle = 1;
127
		if (!lppaca_shared_proc(get_lppaca()))
128 129 130
			get_lppaca()->donate_dedicated_cpu = 1;

		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
131 132 133 134 135
			while (!prep_irq_for_idle()) {
				local_irq_enable();
				local_irq_disable();
			}

136 137 138
			extended_cede_processor(cede_latency_hint);
		}

139 140
		local_irq_disable();

141
		if (!lppaca_shared_proc(get_lppaca()))
142 143 144
			get_lppaca()->donate_dedicated_cpu = 0;
		get_lppaca()->idle = 0;

145
		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
146
			unregister_slb_shadow(hwcpu);
147

148
			hard_irq_disable();
149 150 151 152 153 154 155 156
			/*
			 * Call to start_secondary_resume() will not return.
			 * Kernel stack will be reset and start_secondary()
			 * will be called to continue the online operation.
			 */
			start_secondary_resume();
		}
	}
157

158 159
	/* Requested state is CPU_STATE_OFFLINE at this point */
	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
160

161
	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
162
	unregister_slb_shadow(hwcpu);
163
	rtas_stop_self();
164

165 166 167 168 169
	/* Should never get here... */
	BUG();
	for(;;);
}

170
static int pseries_cpu_disable(void)
171 172 173
{
	int cpu = smp_processor_id();

174
	set_cpu_online(cpu, false);
175 176 177 178
	vdso_data->processorCount--;

	/*fix boot_cpuid here*/
	if (cpu == boot_cpuid)
179
		boot_cpuid = cpumask_any(cpu_online_mask);
180 181

	/* FIXME: abstract this to not be platform specific later on */
182 183 184 185
	if (xive_enabled())
		xive_smp_disable_cpu();
	else
		xics_migrate_irqs_away();
186 187 188
	return 0;
}

189 190 191 192 193 194 195 196 197 198 199 200
/*
 * pseries_cpu_die: Wait for the cpu to die.
 * @cpu: logical processor id of the CPU whose death we're awaiting.
 *
 * This function is called from the context of the thread which is performing
 * the cpu-offline. Here we wait for long enough to allow the cpu in question
 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
 * notifications.
 *
 * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
 * self-destruct.
 */
201
static void pseries_cpu_die(unsigned int cpu)
202 203
{
	int tries;
204
	int cpu_status = 1;
205 206
	unsigned int pcpu = get_hard_smp_processor_id(cpu);

207 208
	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		cpu_status = 1;
209
		for (tries = 0; tries < 5000; tries++) {
210 211 212 213
			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
				cpu_status = 0;
				break;
			}
214
			msleep(1);
215 216 217 218
		}
	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {

		for (tries = 0; tries < 25; tries++) {
219 220 221
			cpu_status = smp_query_cpu_stopped(pcpu);
			if (cpu_status == QCSS_STOPPED ||
			    cpu_status == QCSS_HARDWARE_ERROR)
222 223 224
				break;
			cpu_relax();
		}
225
	}
226

227 228 229 230 231
	if (cpu_status != 0) {
		printk("Querying DEAD? cpu %i (%i) shows %i\n",
		       cpu, pcpu, cpu_status);
	}

L
Lucas De Marchi 已提交
232
	/* Isolation and deallocation are definitely done by
233 234 235 236
	 * drslot_chrp_cpu.  If they were not they would be
	 * done here.  Change isolate state to Isolate and
	 * change allocation-state to Unusable.
	 */
237
	paca_ptrs[cpu]->cpu_start = 0;
238 239 240
}

/*
241
 * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
242 243 244 245 246
 * here is that a cpu device node may represent up to two logical cpus
 * in the SMT case.  We must honor the assumption in other code that
 * the logical ids for sibling SMT threads x and y are adjacent, such
 * that x^1 == y and y^1 == x.
 */
247
static int pseries_add_processor(struct device_node *np)
248 249
{
	unsigned int cpu;
250
	cpumask_var_t candidate_mask, tmp;
251
	int err = -ENOSPC, len, nthreads, i;
252
	const __be32 *intserv;
253

254
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
255 256 257
	if (!intserv)
		return 0;

258 259 260
	zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
	zalloc_cpumask_var(&tmp, GFP_KERNEL);

261 262
	nthreads = len / sizeof(u32);
	for (i = 0; i < nthreads; i++)
263
		cpumask_set_cpu(i, tmp);
264

265
	cpu_maps_update_begin();
266

267
	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
268 269

	/* Get a bitmap of unoccupied slots. */
270 271
	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
	if (cpumask_empty(candidate_mask)) {
272 273 274
		/* If we get here, it most likely means that NR_CPUS is
		 * less than the partition's max processors setting.
		 */
275 276
		printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
		       " supports %d logical cpus.\n", np,
277
		       num_possible_cpus());
278 279 280
		goto out_unlock;
	}

281 282
	while (!cpumask_empty(tmp))
		if (cpumask_subset(tmp, candidate_mask))
283 284 285
			/* Found a range where we can insert the new cpu(s) */
			break;
		else
286
			cpumask_shift_left(tmp, tmp, nthreads);
287

288
	if (cpumask_empty(tmp)) {
289
		printk(KERN_ERR "Unable to find space in cpu_present_mask for"
290 291 292 293 294
		       " processor %s with %d thread(s)\n", np->name,
		       nthreads);
		goto out_unlock;
	}

295
	for_each_cpu(cpu, tmp) {
296
		BUG_ON(cpu_present(cpu));
297
		set_cpu_present(cpu, true);
298
		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
299 300 301
	}
	err = 0;
out_unlock:
302
	cpu_maps_update_done();
303 304
	free_cpumask_var(candidate_mask);
	free_cpumask_var(tmp);
305 306 307 308 309 310 311 312
	return err;
}

/*
 * Update the present map for a cpu node which is going away, and set
 * the hard id in the paca(s) to -1 to be consistent with boot time
 * convention for non-present cpus.
 */
313
static void pseries_remove_processor(struct device_node *np)
314 315 316
{
	unsigned int cpu;
	int len, nthreads, i;
317 318
	const __be32 *intserv;
	u32 thread;
319

320
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
321 322 323 324 325
	if (!intserv)
		return;

	nthreads = len / sizeof(u32);

326
	cpu_maps_update_begin();
327
	for (i = 0; i < nthreads; i++) {
328
		thread = be32_to_cpu(intserv[i]);
329
		for_each_present_cpu(cpu) {
330
			if (get_hard_smp_processor_id(cpu) != thread)
331 332
				continue;
			BUG_ON(cpu_online(cpu));
333
			set_cpu_present(cpu, false);
334
			set_hard_smp_processor_id(cpu, -1);
335
			update_numa_cpu_lookup_table(cpu, -1);
336 337
			break;
		}
338
		if (cpu >= nr_cpu_ids)
339
			printk(KERN_WARNING "Could not find cpu to remove "
340
			       "with physical id 0x%x\n", thread);
341
	}
342
	cpu_maps_update_done();
343 344
}

345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
static int dlpar_online_cpu(struct device_node *dn)
{
	int rc = 0;
	unsigned int cpu;
	int len, nthreads, i;
	const __be32 *intserv;
	u32 thread;

	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
	if (!intserv)
		return -EINVAL;

	nthreads = len / sizeof(u32);

	cpu_maps_update_begin();
	for (i = 0; i < nthreads; i++) {
		thread = be32_to_cpu(intserv[i]);
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != thread)
				continue;
			BUG_ON(get_cpu_current_state(cpu)
					!= CPU_STATE_OFFLINE);
			cpu_maps_update_done();
368
			timed_topology_update(1);
369
			find_and_online_cpu_nid(cpu);
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
			rc = device_online(get_cpu_device(cpu));
			if (rc)
				goto out;
			cpu_maps_update_begin();

			break;
		}
		if (cpu == num_possible_cpus())
			printk(KERN_WARNING "Could not find cpu to online "
			       "with physical id 0x%x\n", thread);
	}
	cpu_maps_update_done();

out:
	return rc;

}

static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
{
	struct device_node *child = NULL;
	u32 my_drc_index;
	bool found;
	int rc;

	/* Assume cpu doesn't exist */
	found = false;

	for_each_child_of_node(parent, child) {
		rc = of_property_read_u32(child, "ibm,my-drc-index",
					  &my_drc_index);
		if (rc)
			continue;

		if (my_drc_index == drc_index) {
			of_node_put(child);
			found = true;
			break;
		}
	}

	return found;
}

414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
{
	bool found = false;
	int rc, index;

	index = 0;
	while (!found) {
		u32 drc;

		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
						index++, &drc);
		if (rc)
			break;

		if (drc == drc_index)
			found = true;
	}

	return found;
}

435
static ssize_t dlpar_cpu_add(u32 drc_index)
436 437
{
	struct device_node *dn, *parent;
438 439 440
	int rc, saved_rc;

	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
441 442

	parent = of_find_node_by_path("/cpus");
443 444
	if (!parent) {
		pr_warn("Failed to find CPU root node \"/cpus\"\n");
445
		return -ENODEV;
446
	}
447 448 449

	if (dlpar_cpu_exists(parent, drc_index)) {
		of_node_put(parent);
450
		pr_warn("CPU with drc index %x already exists\n", drc_index);
451 452 453
		return -EINVAL;
	}

454 455 456 457 458 459
	if (!valid_cpu_drc_index(parent, drc_index)) {
		of_node_put(parent);
		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
		return -EINVAL;
	}

460 461
	rc = dlpar_acquire_drc(drc_index);
	if (rc) {
462 463
		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
			rc, drc_index);
464 465 466 467 468
		of_node_put(parent);
		return -EINVAL;
	}

	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
469 470 471 472
	if (!dn) {
		pr_warn("Failed call to configure-connector, drc index: %x\n",
			drc_index);
		dlpar_release_drc(drc_index);
473
		of_node_put(parent);
474
		return -EINVAL;
475
	}
476

477
	rc = dlpar_attach_node(dn, parent);
478 479 480 481

	/* Regardless we are done with parent now */
	of_node_put(parent);

482
	if (rc) {
483 484 485 486 487 488 489 490 491
		saved_rc = rc;
		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
			dn->name, rc, drc_index);

		rc = dlpar_release_drc(drc_index);
		if (!rc)
			dlpar_free_cc_nodes(dn);

		return saved_rc;
492 493 494
	}

	rc = dlpar_online_cpu(dn);
495 496 497 498 499 500 501 502 503 504 505 506 507 508
	if (rc) {
		saved_rc = rc;
		pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n",
			dn->name, rc, drc_index);

		rc = dlpar_detach_node(dn);
		if (!rc)
			dlpar_release_drc(drc_index);

		return saved_rc;
	}

	pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name,
		 drc_index);
509
	return rc;
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
}

static int dlpar_offline_cpu(struct device_node *dn)
{
	int rc = 0;
	unsigned int cpu;
	int len, nthreads, i;
	const __be32 *intserv;
	u32 thread;

	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
	if (!intserv)
		return -EINVAL;

	nthreads = len / sizeof(u32);

	cpu_maps_update_begin();
	for (i = 0; i < nthreads; i++) {
		thread = be32_to_cpu(intserv[i]);
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != thread)
				continue;

			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
				break;

			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
				set_preferred_offline_state(cpu,
							    CPU_STATE_OFFLINE);
				cpu_maps_update_done();
540
				timed_topology_update(1);
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
				rc = device_offline(get_cpu_device(cpu));
				if (rc)
					goto out;
				cpu_maps_update_begin();
				break;

			}

			/*
			 * The cpu is in CPU_STATE_INACTIVE.
			 * Upgrade it's state to CPU_STATE_OFFLINE.
			 */
			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
			BUG_ON(plpar_hcall_norets(H_PROD, thread)
								!= H_SUCCESS);
			__cpu_die(cpu);
			break;
		}
		if (cpu == num_possible_cpus())
			printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
	}
	cpu_maps_update_done();

out:
	return rc;

}

569 570 571 572
static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
{
	int rc;

573
	pr_debug("Attempting to remove CPU %s, drc index: %x\n",
574 575
		 dn->name, drc_index);

576
	rc = dlpar_offline_cpu(dn);
577 578
	if (rc) {
		pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc);
579
		return -EINVAL;
580
	}
581 582

	rc = dlpar_release_drc(drc_index);
583 584 585 586
	if (rc) {
		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
			drc_index, dn->name, rc);
		dlpar_online_cpu(dn);
587
		return rc;
588
	}
589 590

	rc = dlpar_detach_node(dn);
591 592
	if (rc) {
		int saved_rc = rc;
593

594 595 596 597 598 599 600 601 602 603 604
		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);

		rc = dlpar_acquire_drc(drc_index);
		if (!rc)
			dlpar_online_cpu(dn);

		return saved_rc;
	}

	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
	return 0;
605 606
}

607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
{
	struct device_node *dn;
	u32 my_index;
	int rc;

	for_each_node_by_type(dn, "cpu") {
		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
		if (rc)
			continue;

		if (my_index == drc_index)
			break;
	}

	return dn;
}

static int dlpar_cpu_remove_by_index(u32 drc_index)
{
	struct device_node *dn;
	int rc;

	dn = cpu_drc_index_to_dn(drc_index);
	if (!dn) {
		pr_warn("Cannot find CPU (drc index %x) to remove\n",
			drc_index);
		return -ENODEV;
	}

	rc = dlpar_cpu_remove(dn, drc_index);
	of_node_put(dn);
	return rc;
}

static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
{
	struct device_node *dn;
	int cpus_found = 0;
	int rc;

	/* We want to find cpus_to_remove + 1 CPUs to ensure we do not
	 * remove the last CPU.
	 */
	for_each_node_by_type(dn, "cpu") {
		cpus_found++;

		if (cpus_found > cpus_to_remove) {
			of_node_put(dn);
			break;
		}

		/* Note that cpus_found is always 1 ahead of the index
		 * into the cpu_drcs array, so we use cpus_found - 1
		 */
		rc = of_property_read_u32(dn, "ibm,my-drc-index",
					  &cpu_drcs[cpus_found - 1]);
		if (rc) {
			pr_warn("Error occurred getting drc-index for %s\n",
				dn->name);
			of_node_put(dn);
			return -1;
		}
	}

	if (cpus_found < cpus_to_remove) {
		pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
			cpus_found, cpus_to_remove);
	} else if (cpus_found == cpus_to_remove) {
		pr_warn("Cannot remove all CPUs\n");
	}

	return cpus_found;
}

static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
{
	u32 *cpu_drcs;
	int cpus_found;
	int cpus_removed = 0;
	int i, rc;

	pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove);

	cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL);
	if (!cpu_drcs)
		return -EINVAL;

	cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove);
	if (cpus_found <= cpus_to_remove) {
		kfree(cpu_drcs);
		return -EINVAL;
	}

	for (i = 0; i < cpus_to_remove; i++) {
		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
		if (rc)
			break;

		cpus_removed++;
	}

	if (cpus_removed != cpus_to_remove) {
		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");

		for (i = 0; i < cpus_removed; i++)
			dlpar_cpu_add(cpu_drcs[i]);

		rc = -EINVAL;
	} else {
		rc = 0;
	}

	kfree(cpu_drcs);
	return rc;
}

724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
{
	struct device_node *parent;
	int cpus_found = 0;
	int index, rc;

	parent = of_find_node_by_path("/cpus");
	if (!parent) {
		pr_warn("Could not find CPU root node in device tree\n");
		kfree(cpu_drcs);
		return -1;
	}

	/* Search the ibm,drc-indexes array for possible CPU drcs to
	 * add. Note that the format of the ibm,drc-indexes array is
	 * the number of entries in the array followed by the array
	 * of drc values so we start looking at index = 1.
	 */
	index = 1;
	while (cpus_found < cpus_to_add) {
		u32 drc;

		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
						index++, &drc);
		if (rc)
			break;

		if (dlpar_cpu_exists(parent, drc))
			continue;

		cpu_drcs[cpus_found++] = drc;
	}

	of_node_put(parent);
	return cpus_found;
}

static int dlpar_cpu_add_by_count(u32 cpus_to_add)
{
	u32 *cpu_drcs;
	int cpus_added = 0;
	int cpus_found;
	int i, rc;

	pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add);

	cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL);
	if (!cpu_drcs)
		return -EINVAL;

	cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
	if (cpus_found < cpus_to_add) {
		pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
			cpus_found, cpus_to_add);
		kfree(cpu_drcs);
		return -EINVAL;
	}

	for (i = 0; i < cpus_to_add; i++) {
		rc = dlpar_cpu_add(cpu_drcs[i]);
		if (rc)
			break;

		cpus_added++;
	}

	if (cpus_added < cpus_to_add) {
		pr_warn("CPU hot-add failed, removing any added CPUs\n");

		for (i = 0; i < cpus_added; i++)
			dlpar_cpu_remove_by_index(cpu_drcs[i]);

		rc = -EINVAL;
	} else {
		rc = 0;
	}

	kfree(cpu_drcs);
	return rc;
}

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
int dlpar_cpu_readd(int cpu)
{
	struct device_node *dn;
	struct device *dev;
	u32 drc_index;
	int rc;

	dev = get_cpu_device(cpu);
	dn = dev->of_node;

	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);

	rc = dlpar_cpu_remove_by_index(drc_index);
	if (!rc)
		rc = dlpar_cpu_add(drc_index);

	return rc;
}

824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
{
	u32 count, drc_index;
	int rc;

	count = hp_elog->_drc_u.drc_count;
	drc_index = hp_elog->_drc_u.drc_index;

	lock_device_hotplug();

	switch (hp_elog->action) {
	case PSERIES_HP_ELOG_ACTION_REMOVE:
		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
			rc = dlpar_cpu_remove_by_count(count);
		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
			rc = dlpar_cpu_remove_by_index(drc_index);
		else
			rc = -EINVAL;
		break;
843 844 845 846 847 848 849 850
	case PSERIES_HP_ELOG_ACTION_ADD:
		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
			rc = dlpar_cpu_add_by_count(count);
		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
			rc = dlpar_cpu_add(drc_index);
		else
			rc = -EINVAL;
		break;
851 852 853 854 855 856 857 858 859 860
	default:
		pr_err("Invalid action (%d) specified\n", hp_elog->action);
		rc = -EINVAL;
		break;
	}

	unlock_device_hotplug();
	return rc;
}

861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE

static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
{
	u32 drc_index;
	int rc;

	rc = kstrtou32(buf, 0, &drc_index);
	if (rc)
		return -EINVAL;

	rc = dlpar_cpu_add(drc_index);

	return rc ? rc : count;
}

877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
static ssize_t dlpar_cpu_release(const char *buf, size_t count)
{
	struct device_node *dn;
	u32 drc_index;
	int rc;

	dn = of_find_node_by_path(buf);
	if (!dn)
		return -EINVAL;

	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
	if (rc) {
		of_node_put(dn);
		return -EINVAL;
	}

893
	rc = dlpar_cpu_remove(dn, drc_index);
894 895
	of_node_put(dn);

896
	return rc ? rc : count;
897 898 899 900
}

#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

901
static int pseries_smp_notifier(struct notifier_block *nb,
902
				unsigned long action, void *data)
903
{
904
	struct of_reconfig_data *rd = data;
905
	int err = 0;
906 907

	switch (action) {
908
	case OF_RECONFIG_ATTACH_NODE:
909
		err = pseries_add_processor(rd->dn);
910
		break;
911
	case OF_RECONFIG_DETACH_NODE:
912
		pseries_remove_processor(rd->dn);
913 914
		break;
	}
915
	return notifier_from_errno(err);
916 917
}

918 919
static struct notifier_block pseries_smp_nb = {
	.notifier_call = pseries_smp_notifier,
920 921
};

922 923 924 925 926 927 928 929 930 931 932
#define MAX_CEDE_LATENCY_LEVELS		4
#define	CEDE_LATENCY_PARAM_LENGTH	10
#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
#define CEDE_LATENCY_TOKEN		45

static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];

static int parse_cede_parameters(void)
{
	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
933 934 935 936 937
	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
			 NULL,
			 CEDE_LATENCY_TOKEN,
			 __pa(cede_parameters),
			 CEDE_LATENCY_PARAM_MAX_LENGTH);
938 939
}

940 941
static int __init pseries_cpu_hotplug_init(void)
{
942
	int cpu;
943
	int qcss_tok;
944

945 946 947 948 949
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
	ppc_md.cpu_probe = dlpar_cpu_probe;
	ppc_md.cpu_release = dlpar_cpu_release;
#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

950
	rtas_stop_self_token = rtas_token("stop-self");
951
	qcss_tok = rtas_token("query-cpu-stopped-state");
952

953
	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
954 955 956 957 958
			qcss_tok == RTAS_UNKNOWN_SERVICE) {
		printk(KERN_INFO "CPU Hotplug not supported by firmware "
				"- disabling.\n");
		return 0;
	}
959

960 961 962
	ppc_md.cpu_die = pseries_mach_cpu_die;
	smp_ops->cpu_disable = pseries_cpu_disable;
	smp_ops->cpu_die = pseries_cpu_die;
963 964

	/* Processors can be added/removed only on LPAR */
965
	if (firmware_has_feature(FW_FEATURE_LPAR)) {
966
		of_reconfig_notifier_register(&pseries_smp_nb);
967 968 969 970 971 972 973 974
		cpu_maps_update_begin();
		if (cede_offline_enabled && parse_cede_parameters() == 0) {
			default_offline_state = CPU_STATE_INACTIVE;
			for_each_online_cpu(cpu)
				set_default_offline_state(cpu);
		}
		cpu_maps_update_done();
	}
975

976 977
	return 0;
}
978
machine_arch_initcall(pseries, pseries_cpu_hotplug_init);