hotplug-cpu.c 21.7 KB
Newer Older
1 2 3
/*
 * pseries CPU Hotplug infrastructure.
 *
4 5
 * Split out from arch/powerpc/platforms/pseries/setup.c
 *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
6 7 8
 *
 * Peter Bergner, IBM	March 2001.
 * Copyright (C) 2001 IBM.
9 10 11
 * Dave Engebretsen, Peter Bergner, and
 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
 * Plus various changes from other IBM teams...
12 13 14 15 16 17 18 19 20
 *
 * Copyright (C) 2006 Michael Ellerman, IBM Corporation
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

21 22
#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt

23
#include <linux/kernel.h>
24
#include <linux/interrupt.h>
25
#include <linux/delay.h>
26
#include <linux/sched.h>	/* for idle_task_exit */
27
#include <linux/sched/hotplug.h>
28
#include <linux/cpu.h>
29
#include <linux/of.h>
30
#include <linux/slab.h>
31 32 33 34 35
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
36
#include <asm/xics.h>
37
#include <asm/xive.h>
38
#include <asm/plpar_wrappers.h>
39
#include <asm/topology.h>
40

41
#include "pseries.h"
42
#include "offline_states.h"
43 44

/* This version can't take the spinlock, because it never returns */
45
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
46

47 48 49 50 51 52
static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
							CPU_STATE_OFFLINE;
static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;

static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;

53
static bool cede_offline_enabled __read_mostly = true;
54 55 56 57 58 59

/*
 * Enable/disable cede_offline when available.
 */
static int __init setup_cede_offline(char *str)
{
60
	return (kstrtobool(str, &cede_offline_enabled) == 0);
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
}

__setup("cede_offline=", setup_cede_offline);

enum cpu_state_vals get_cpu_current_state(int cpu)
{
	return per_cpu(current_state, cpu);
}

void set_cpu_current_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(current_state, cpu) = state;
}

enum cpu_state_vals get_preferred_offline_state(int cpu)
{
	return per_cpu(preferred_offline_state, cpu);
}

void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(preferred_offline_state, cpu) = state;
}

void set_default_offline_state(int cpu)
{
	per_cpu(preferred_offline_state, cpu) = default_offline_state;
}

90
static void rtas_stop_self(void)
91
{
92
	static struct rtas_args args;
L
Li Zhong 已提交
93

94 95
	local_irq_disable();

96
	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
97 98 99

	printk("cpu %u (hwid %u) Ready to die...\n",
	       smp_processor_id(), hard_smp_processor_id());
100 101

	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
102 103 104 105

	panic("Alas, I survived.\n");
}

106
static void pseries_mach_cpu_die(void)
107
{
108 109 110 111
	unsigned int cpu = smp_processor_id();
	unsigned int hwcpu = hard_smp_processor_id();
	u8 cede_latency_hint = 0;

112 113
	local_irq_disable();
	idle_task_exit();
114 115 116 117
	if (xive_enabled())
		xive_teardown_cpu();
	else
		xics_teardown_cpu();
118 119 120

	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
121 122 123
		if (ppc_md.suspend_disable_cpu)
			ppc_md.suspend_disable_cpu();

124 125 126
		cede_latency_hint = 2;

		get_lppaca()->idle = 1;
127
		if (!lppaca_shared_proc(get_lppaca()))
128 129 130
			get_lppaca()->donate_dedicated_cpu = 1;

		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
131 132 133 134 135
			while (!prep_irq_for_idle()) {
				local_irq_enable();
				local_irq_disable();
			}

136 137 138
			extended_cede_processor(cede_latency_hint);
		}

139 140
		local_irq_disable();

141
		if (!lppaca_shared_proc(get_lppaca()))
142 143 144
			get_lppaca()->donate_dedicated_cpu = 0;
		get_lppaca()->idle = 0;

145
		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
146
			unregister_slb_shadow(hwcpu);
147

148
			hard_irq_disable();
149 150 151 152 153 154 155 156
			/*
			 * Call to start_secondary_resume() will not return.
			 * Kernel stack will be reset and start_secondary()
			 * will be called to continue the online operation.
			 */
			start_secondary_resume();
		}
	}
157

158 159
	/* Requested state is CPU_STATE_OFFLINE at this point */
	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
160

161
	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
162
	unregister_slb_shadow(hwcpu);
163
	rtas_stop_self();
164

165 166 167 168 169
	/* Should never get here... */
	BUG();
	for(;;);
}

170
static int pseries_cpu_disable(void)
171 172 173
{
	int cpu = smp_processor_id();

174
	set_cpu_online(cpu, false);
175 176 177 178
	vdso_data->processorCount--;

	/*fix boot_cpuid here*/
	if (cpu == boot_cpuid)
179
		boot_cpuid = cpumask_any(cpu_online_mask);
180 181

	/* FIXME: abstract this to not be platform specific later on */
182 183 184 185
	if (xive_enabled())
		xive_smp_disable_cpu();
	else
		xics_migrate_irqs_away();
186 187 188
	return 0;
}

189 190 191 192 193 194 195 196 197 198 199 200
/*
 * pseries_cpu_die: Wait for the cpu to die.
 * @cpu: logical processor id of the CPU whose death we're awaiting.
 *
 * This function is called from the context of the thread which is performing
 * the cpu-offline. Here we wait for long enough to allow the cpu in question
 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
 * notifications.
 *
 * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
 * self-destruct.
 */
201
static void pseries_cpu_die(unsigned int cpu)
202 203
{
	int tries;
204
	int cpu_status = 1;
205 206
	unsigned int pcpu = get_hard_smp_processor_id(cpu);

207 208
	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		cpu_status = 1;
209
		for (tries = 0; tries < 5000; tries++) {
210 211 212 213
			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
				cpu_status = 0;
				break;
			}
214
			msleep(1);
215 216 217 218
		}
	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {

		for (tries = 0; tries < 25; tries++) {
219 220 221
			cpu_status = smp_query_cpu_stopped(pcpu);
			if (cpu_status == QCSS_STOPPED ||
			    cpu_status == QCSS_HARDWARE_ERROR)
222 223 224
				break;
			cpu_relax();
		}
225
	}
226

227 228 229 230 231
	if (cpu_status != 0) {
		printk("Querying DEAD? cpu %i (%i) shows %i\n",
		       cpu, pcpu, cpu_status);
	}

L
Lucas De Marchi 已提交
232
	/* Isolation and deallocation are definitely done by
233 234 235 236 237 238 239 240
	 * drslot_chrp_cpu.  If they were not they would be
	 * done here.  Change isolate state to Isolate and
	 * change allocation-state to Unusable.
	 */
	paca[cpu].cpu_start = 0;
}

/*
241
 * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
242 243 244 245 246
 * here is that a cpu device node may represent up to two logical cpus
 * in the SMT case.  We must honor the assumption in other code that
 * the logical ids for sibling SMT threads x and y are adjacent, such
 * that x^1 == y and y^1 == x.
 */
247
static int pseries_add_processor(struct device_node *np)
248 249
{
	unsigned int cpu;
250
	cpumask_var_t candidate_mask, tmp;
251
	int err = -ENOSPC, len, nthreads, i;
252
	const __be32 *intserv;
253

254
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
255 256 257
	if (!intserv)
		return 0;

258 259 260
	zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
	zalloc_cpumask_var(&tmp, GFP_KERNEL);

261 262
	nthreads = len / sizeof(u32);
	for (i = 0; i < nthreads; i++)
263
		cpumask_set_cpu(i, tmp);
264

265
	cpu_maps_update_begin();
266

267
	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
268 269

	/* Get a bitmap of unoccupied slots. */
270 271
	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
	if (cpumask_empty(candidate_mask)) {
272 273 274
		/* If we get here, it most likely means that NR_CPUS is
		 * less than the partition's max processors setting.
		 */
275 276
		printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
		       " supports %d logical cpus.\n", np,
277
		       num_possible_cpus());
278 279 280
		goto out_unlock;
	}

281 282
	while (!cpumask_empty(tmp))
		if (cpumask_subset(tmp, candidate_mask))
283 284 285
			/* Found a range where we can insert the new cpu(s) */
			break;
		else
286
			cpumask_shift_left(tmp, tmp, nthreads);
287

288
	if (cpumask_empty(tmp)) {
289
		printk(KERN_ERR "Unable to find space in cpu_present_mask for"
290 291 292 293 294
		       " processor %s with %d thread(s)\n", np->name,
		       nthreads);
		goto out_unlock;
	}

295
	for_each_cpu(cpu, tmp) {
296
		BUG_ON(cpu_present(cpu));
297
		set_cpu_present(cpu, true);
298
		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
299 300 301
	}
	err = 0;
out_unlock:
302
	cpu_maps_update_done();
303 304
	free_cpumask_var(candidate_mask);
	free_cpumask_var(tmp);
305 306 307 308 309 310 311 312
	return err;
}

/*
 * Update the present map for a cpu node which is going away, and set
 * the hard id in the paca(s) to -1 to be consistent with boot time
 * convention for non-present cpus.
 */
313
static void pseries_remove_processor(struct device_node *np)
314 315 316
{
	unsigned int cpu;
	int len, nthreads, i;
317 318
	const __be32 *intserv;
	u32 thread;
319

320
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
321 322 323 324 325
	if (!intserv)
		return;

	nthreads = len / sizeof(u32);

326
	cpu_maps_update_begin();
327
	for (i = 0; i < nthreads; i++) {
328
		thread = be32_to_cpu(intserv[i]);
329
		for_each_present_cpu(cpu) {
330
			if (get_hard_smp_processor_id(cpu) != thread)
331 332
				continue;
			BUG_ON(cpu_online(cpu));
333
			set_cpu_present(cpu, false);
334
			set_hard_smp_processor_id(cpu, -1);
335
			update_numa_cpu_lookup_table(cpu, -1);
336 337
			break;
		}
338
		if (cpu >= nr_cpu_ids)
339
			printk(KERN_WARNING "Could not find cpu to remove "
340
			       "with physical id 0x%x\n", thread);
341
	}
342
	cpu_maps_update_done();
343 344
}

345 346
extern int find_and_online_cpu_nid(int cpu);

347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
static int dlpar_online_cpu(struct device_node *dn)
{
	int rc = 0;
	unsigned int cpu;
	int len, nthreads, i;
	const __be32 *intserv;
	u32 thread;

	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
	if (!intserv)
		return -EINVAL;

	nthreads = len / sizeof(u32);

	cpu_maps_update_begin();
	for (i = 0; i < nthreads; i++) {
		thread = be32_to_cpu(intserv[i]);
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != thread)
				continue;
			BUG_ON(get_cpu_current_state(cpu)
					!= CPU_STATE_OFFLINE);
			cpu_maps_update_done();
370
			timed_topology_update(1);
371
			find_and_online_cpu_nid(cpu);
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
			rc = device_online(get_cpu_device(cpu));
			if (rc)
				goto out;
			cpu_maps_update_begin();

			break;
		}
		if (cpu == num_possible_cpus())
			printk(KERN_WARNING "Could not find cpu to online "
			       "with physical id 0x%x\n", thread);
	}
	cpu_maps_update_done();

out:
	return rc;

}

static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
{
	struct device_node *child = NULL;
	u32 my_drc_index;
	bool found;
	int rc;

	/* Assume cpu doesn't exist */
	found = false;

	for_each_child_of_node(parent, child) {
		rc = of_property_read_u32(child, "ibm,my-drc-index",
					  &my_drc_index);
		if (rc)
			continue;

		if (my_drc_index == drc_index) {
			of_node_put(child);
			found = true;
			break;
		}
	}

	return found;
}

416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
{
	bool found = false;
	int rc, index;

	index = 0;
	while (!found) {
		u32 drc;

		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
						index++, &drc);
		if (rc)
			break;

		if (drc == drc_index)
			found = true;
	}

	return found;
}

437
static ssize_t dlpar_cpu_add(u32 drc_index)
438 439
{
	struct device_node *dn, *parent;
440 441 442
	int rc, saved_rc;

	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
443 444

	parent = of_find_node_by_path("/cpus");
445 446
	if (!parent) {
		pr_warn("Failed to find CPU root node \"/cpus\"\n");
447
		return -ENODEV;
448
	}
449 450 451

	if (dlpar_cpu_exists(parent, drc_index)) {
		of_node_put(parent);
452
		pr_warn("CPU with drc index %x already exists\n", drc_index);
453 454 455
		return -EINVAL;
	}

456 457 458 459 460 461
	if (!valid_cpu_drc_index(parent, drc_index)) {
		of_node_put(parent);
		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
		return -EINVAL;
	}

462 463
	rc = dlpar_acquire_drc(drc_index);
	if (rc) {
464 465
		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
			rc, drc_index);
466 467 468 469 470
		of_node_put(parent);
		return -EINVAL;
	}

	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
471 472 473 474
	if (!dn) {
		pr_warn("Failed call to configure-connector, drc index: %x\n",
			drc_index);
		dlpar_release_drc(drc_index);
475
		of_node_put(parent);
476
		return -EINVAL;
477
	}
478

479
	rc = dlpar_attach_node(dn, parent);
480 481 482 483

	/* Regardless we are done with parent now */
	of_node_put(parent);

484
	if (rc) {
485 486 487 488 489 490 491 492 493
		saved_rc = rc;
		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
			dn->name, rc, drc_index);

		rc = dlpar_release_drc(drc_index);
		if (!rc)
			dlpar_free_cc_nodes(dn);

		return saved_rc;
494 495 496
	}

	rc = dlpar_online_cpu(dn);
497 498 499 500 501 502 503 504 505 506 507 508 509 510
	if (rc) {
		saved_rc = rc;
		pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n",
			dn->name, rc, drc_index);

		rc = dlpar_detach_node(dn);
		if (!rc)
			dlpar_release_drc(drc_index);

		return saved_rc;
	}

	pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name,
		 drc_index);
511
	return rc;
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
}

static int dlpar_offline_cpu(struct device_node *dn)
{
	int rc = 0;
	unsigned int cpu;
	int len, nthreads, i;
	const __be32 *intserv;
	u32 thread;

	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
	if (!intserv)
		return -EINVAL;

	nthreads = len / sizeof(u32);

	cpu_maps_update_begin();
	for (i = 0; i < nthreads; i++) {
		thread = be32_to_cpu(intserv[i]);
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != thread)
				continue;

			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
				break;

			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
				set_preferred_offline_state(cpu,
							    CPU_STATE_OFFLINE);
				cpu_maps_update_done();
542
				timed_topology_update(1);
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
				rc = device_offline(get_cpu_device(cpu));
				if (rc)
					goto out;
				cpu_maps_update_begin();
				break;

			}

			/*
			 * The cpu is in CPU_STATE_INACTIVE.
			 * Upgrade it's state to CPU_STATE_OFFLINE.
			 */
			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
			BUG_ON(plpar_hcall_norets(H_PROD, thread)
								!= H_SUCCESS);
			__cpu_die(cpu);
			break;
		}
		if (cpu == num_possible_cpus())
			printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
	}
	cpu_maps_update_done();

out:
	return rc;

}

571 572 573 574
static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
{
	int rc;

575
	pr_debug("Attempting to remove CPU %s, drc index: %x\n",
576 577
		 dn->name, drc_index);

578
	rc = dlpar_offline_cpu(dn);
579 580
	if (rc) {
		pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc);
581
		return -EINVAL;
582
	}
583 584

	rc = dlpar_release_drc(drc_index);
585 586 587 588
	if (rc) {
		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
			drc_index, dn->name, rc);
		dlpar_online_cpu(dn);
589
		return rc;
590
	}
591 592

	rc = dlpar_detach_node(dn);
593 594
	if (rc) {
		int saved_rc = rc;
595

596 597 598 599 600 601 602 603 604 605 606
		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);

		rc = dlpar_acquire_drc(drc_index);
		if (!rc)
			dlpar_online_cpu(dn);

		return saved_rc;
	}

	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
	return 0;
607 608
}

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
{
	struct device_node *dn;
	u32 my_index;
	int rc;

	for_each_node_by_type(dn, "cpu") {
		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
		if (rc)
			continue;

		if (my_index == drc_index)
			break;
	}

	return dn;
}

static int dlpar_cpu_remove_by_index(u32 drc_index)
{
	struct device_node *dn;
	int rc;

	dn = cpu_drc_index_to_dn(drc_index);
	if (!dn) {
		pr_warn("Cannot find CPU (drc index %x) to remove\n",
			drc_index);
		return -ENODEV;
	}

	rc = dlpar_cpu_remove(dn, drc_index);
	of_node_put(dn);
	return rc;
}

static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
{
	struct device_node *dn;
	int cpus_found = 0;
	int rc;

	/* We want to find cpus_to_remove + 1 CPUs to ensure we do not
	 * remove the last CPU.
	 */
	for_each_node_by_type(dn, "cpu") {
		cpus_found++;

		if (cpus_found > cpus_to_remove) {
			of_node_put(dn);
			break;
		}

		/* Note that cpus_found is always 1 ahead of the index
		 * into the cpu_drcs array, so we use cpus_found - 1
		 */
		rc = of_property_read_u32(dn, "ibm,my-drc-index",
					  &cpu_drcs[cpus_found - 1]);
		if (rc) {
			pr_warn("Error occurred getting drc-index for %s\n",
				dn->name);
			of_node_put(dn);
			return -1;
		}
	}

	if (cpus_found < cpus_to_remove) {
		pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
			cpus_found, cpus_to_remove);
	} else if (cpus_found == cpus_to_remove) {
		pr_warn("Cannot remove all CPUs\n");
	}

	return cpus_found;
}

static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
{
	u32 *cpu_drcs;
	int cpus_found;
	int cpus_removed = 0;
	int i, rc;

	pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove);

	cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL);
	if (!cpu_drcs)
		return -EINVAL;

	cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove);
	if (cpus_found <= cpus_to_remove) {
		kfree(cpu_drcs);
		return -EINVAL;
	}

	for (i = 0; i < cpus_to_remove; i++) {
		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
		if (rc)
			break;

		cpus_removed++;
	}

	if (cpus_removed != cpus_to_remove) {
		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");

		for (i = 0; i < cpus_removed; i++)
			dlpar_cpu_add(cpu_drcs[i]);

		rc = -EINVAL;
	} else {
		rc = 0;
	}

	kfree(cpu_drcs);
	return rc;
}

726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806
static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
{
	struct device_node *parent;
	int cpus_found = 0;
	int index, rc;

	parent = of_find_node_by_path("/cpus");
	if (!parent) {
		pr_warn("Could not find CPU root node in device tree\n");
		kfree(cpu_drcs);
		return -1;
	}

	/* Search the ibm,drc-indexes array for possible CPU drcs to
	 * add. Note that the format of the ibm,drc-indexes array is
	 * the number of entries in the array followed by the array
	 * of drc values so we start looking at index = 1.
	 */
	index = 1;
	while (cpus_found < cpus_to_add) {
		u32 drc;

		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
						index++, &drc);
		if (rc)
			break;

		if (dlpar_cpu_exists(parent, drc))
			continue;

		cpu_drcs[cpus_found++] = drc;
	}

	of_node_put(parent);
	return cpus_found;
}

static int dlpar_cpu_add_by_count(u32 cpus_to_add)
{
	u32 *cpu_drcs;
	int cpus_added = 0;
	int cpus_found;
	int i, rc;

	pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add);

	cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL);
	if (!cpu_drcs)
		return -EINVAL;

	cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
	if (cpus_found < cpus_to_add) {
		pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
			cpus_found, cpus_to_add);
		kfree(cpu_drcs);
		return -EINVAL;
	}

	for (i = 0; i < cpus_to_add; i++) {
		rc = dlpar_cpu_add(cpu_drcs[i]);
		if (rc)
			break;

		cpus_added++;
	}

	if (cpus_added < cpus_to_add) {
		pr_warn("CPU hot-add failed, removing any added CPUs\n");

		for (i = 0; i < cpus_added; i++)
			dlpar_cpu_remove_by_index(cpu_drcs[i]);

		rc = -EINVAL;
	} else {
		rc = 0;
	}

	kfree(cpu_drcs);
	return rc;
}

807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
{
	u32 count, drc_index;
	int rc;

	count = hp_elog->_drc_u.drc_count;
	drc_index = hp_elog->_drc_u.drc_index;

	lock_device_hotplug();

	switch (hp_elog->action) {
	case PSERIES_HP_ELOG_ACTION_REMOVE:
		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
			rc = dlpar_cpu_remove_by_count(count);
		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
			rc = dlpar_cpu_remove_by_index(drc_index);
		else
			rc = -EINVAL;
		break;
826 827 828 829 830 831 832 833
	case PSERIES_HP_ELOG_ACTION_ADD:
		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
			rc = dlpar_cpu_add_by_count(count);
		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
			rc = dlpar_cpu_add(drc_index);
		else
			rc = -EINVAL;
		break;
834 835 836 837 838 839 840 841 842 843
	default:
		pr_err("Invalid action (%d) specified\n", hp_elog->action);
		rc = -EINVAL;
		break;
	}

	unlock_device_hotplug();
	return rc;
}

844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE

static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
{
	u32 drc_index;
	int rc;

	rc = kstrtou32(buf, 0, &drc_index);
	if (rc)
		return -EINVAL;

	rc = dlpar_cpu_add(drc_index);

	return rc ? rc : count;
}

860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875
static ssize_t dlpar_cpu_release(const char *buf, size_t count)
{
	struct device_node *dn;
	u32 drc_index;
	int rc;

	dn = of_find_node_by_path(buf);
	if (!dn)
		return -EINVAL;

	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
	if (rc) {
		of_node_put(dn);
		return -EINVAL;
	}

876
	rc = dlpar_cpu_remove(dn, drc_index);
877 878
	of_node_put(dn);

879
	return rc ? rc : count;
880 881 882 883
}

#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

884
static int pseries_smp_notifier(struct notifier_block *nb,
885
				unsigned long action, void *data)
886
{
887
	struct of_reconfig_data *rd = data;
888
	int err = 0;
889 890

	switch (action) {
891
	case OF_RECONFIG_ATTACH_NODE:
892
		err = pseries_add_processor(rd->dn);
893
		break;
894
	case OF_RECONFIG_DETACH_NODE:
895
		pseries_remove_processor(rd->dn);
896 897
		break;
	}
898
	return notifier_from_errno(err);
899 900
}

901 902
static struct notifier_block pseries_smp_nb = {
	.notifier_call = pseries_smp_notifier,
903 904
};

905 906 907 908 909 910 911 912 913 914 915
#define MAX_CEDE_LATENCY_LEVELS		4
#define	CEDE_LATENCY_PARAM_LENGTH	10
#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
#define CEDE_LATENCY_TOKEN		45

static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];

static int parse_cede_parameters(void)
{
	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
916 917 918 919 920
	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
			 NULL,
			 CEDE_LATENCY_TOKEN,
			 __pa(cede_parameters),
			 CEDE_LATENCY_PARAM_MAX_LENGTH);
921 922
}

923 924
static int __init pseries_cpu_hotplug_init(void)
{
925
	int cpu;
926
	int qcss_tok;
927

928 929 930 931 932
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
	ppc_md.cpu_probe = dlpar_cpu_probe;
	ppc_md.cpu_release = dlpar_cpu_release;
#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

933
	rtas_stop_self_token = rtas_token("stop-self");
934
	qcss_tok = rtas_token("query-cpu-stopped-state");
935

936
	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
937 938 939 940 941
			qcss_tok == RTAS_UNKNOWN_SERVICE) {
		printk(KERN_INFO "CPU Hotplug not supported by firmware "
				"- disabling.\n");
		return 0;
	}
942

943 944 945
	ppc_md.cpu_die = pseries_mach_cpu_die;
	smp_ops->cpu_disable = pseries_cpu_disable;
	smp_ops->cpu_die = pseries_cpu_die;
946 947

	/* Processors can be added/removed only on LPAR */
948
	if (firmware_has_feature(FW_FEATURE_LPAR)) {
949
		of_reconfig_notifier_register(&pseries_smp_nb);
950 951 952 953 954 955 956 957
		cpu_maps_update_begin();
		if (cede_offline_enabled && parse_cede_parameters() == 0) {
			default_offline_state = CPU_STATE_INACTIVE;
			for_each_online_cpu(cpu)
				set_default_offline_state(cpu);
		}
		cpu_maps_update_done();
	}
958

959 960
	return 0;
}
961
machine_arch_initcall(pseries, pseries_cpu_hotplug_init);