hotplug-cpu.c 21.7 KB
Newer Older
1 2 3
/*
 * pseries CPU Hotplug infrastructure.
 *
4 5
 * Split out from arch/powerpc/platforms/pseries/setup.c
 *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
6 7 8
 *
 * Peter Bergner, IBM	March 2001.
 * Copyright (C) 2001 IBM.
9 10 11
 * Dave Engebretsen, Peter Bergner, and
 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
 * Plus various changes from other IBM teams...
12 13 14 15 16 17 18 19 20
 *
 * Copyright (C) 2006 Michael Ellerman, IBM Corporation
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

21 22
#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt

23
#include <linux/kernel.h>
24
#include <linux/interrupt.h>
25
#include <linux/delay.h>
26
#include <linux/sched.h>	/* for idle_task_exit */
27
#include <linux/sched/hotplug.h>
28
#include <linux/cpu.h>
29
#include <linux/of.h>
30
#include <linux/slab.h>
31 32 33 34 35
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
36
#include <asm/xics.h>
37
#include <asm/xive.h>
38 39
#include <asm/plpar_wrappers.h>

40
#include "pseries.h"
41
#include "offline_states.h"
42 43

/* This version can't take the spinlock, because it never returns */
44
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
45

46 47 48 49 50 51
static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
							CPU_STATE_OFFLINE;
static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;

static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;

52
static bool cede_offline_enabled __read_mostly = true;
53 54 55 56 57 58

/*
 * Enable/disable cede_offline when available.
 */
static int __init setup_cede_offline(char *str)
{
59
	return (kstrtobool(str, &cede_offline_enabled) == 0);
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
}

__setup("cede_offline=", setup_cede_offline);

enum cpu_state_vals get_cpu_current_state(int cpu)
{
	return per_cpu(current_state, cpu);
}

void set_cpu_current_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(current_state, cpu) = state;
}

enum cpu_state_vals get_preferred_offline_state(int cpu)
{
	return per_cpu(preferred_offline_state, cpu);
}

void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(preferred_offline_state, cpu) = state;
}

void set_default_offline_state(int cpu)
{
	per_cpu(preferred_offline_state, cpu) = default_offline_state;
}

89
static void rtas_stop_self(void)
90
{
91
	static struct rtas_args args;
L
Li Zhong 已提交
92

93 94
	local_irq_disable();

95
	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
96 97 98

	printk("cpu %u (hwid %u) Ready to die...\n",
	       smp_processor_id(), hard_smp_processor_id());
99 100

	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
101 102 103 104

	panic("Alas, I survived.\n");
}

105
static void pseries_mach_cpu_die(void)
106
{
107 108 109 110
	unsigned int cpu = smp_processor_id();
	unsigned int hwcpu = hard_smp_processor_id();
	u8 cede_latency_hint = 0;

111 112
	local_irq_disable();
	idle_task_exit();
113 114 115 116
	if (xive_enabled())
		xive_teardown_cpu();
	else
		xics_teardown_cpu();
117 118 119

	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
120 121 122
		if (ppc_md.suspend_disable_cpu)
			ppc_md.suspend_disable_cpu();

123 124 125
		cede_latency_hint = 2;

		get_lppaca()->idle = 1;
126
		if (!lppaca_shared_proc(get_lppaca()))
127 128 129
			get_lppaca()->donate_dedicated_cpu = 1;

		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
130 131 132 133 134
			while (!prep_irq_for_idle()) {
				local_irq_enable();
				local_irq_disable();
			}

135 136 137
			extended_cede_processor(cede_latency_hint);
		}

138 139
		local_irq_disable();

140
		if (!lppaca_shared_proc(get_lppaca()))
141 142 143
			get_lppaca()->donate_dedicated_cpu = 0;
		get_lppaca()->idle = 0;

144
		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
145
			unregister_slb_shadow(hwcpu);
146

147
			hard_irq_disable();
148 149 150 151 152 153 154 155
			/*
			 * Call to start_secondary_resume() will not return.
			 * Kernel stack will be reset and start_secondary()
			 * will be called to continue the online operation.
			 */
			start_secondary_resume();
		}
	}
156

157 158
	/* Requested state is CPU_STATE_OFFLINE at this point */
	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
159

160
	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
161
	unregister_slb_shadow(hwcpu);
162
	rtas_stop_self();
163

164 165 166 167 168
	/* Should never get here... */
	BUG();
	for(;;);
}

169
static int pseries_cpu_disable(void)
170 171 172
{
	int cpu = smp_processor_id();

173
	set_cpu_online(cpu, false);
174 175 176 177
	vdso_data->processorCount--;

	/*fix boot_cpuid here*/
	if (cpu == boot_cpuid)
178
		boot_cpuid = cpumask_any(cpu_online_mask);
179 180

	/* FIXME: abstract this to not be platform specific later on */
181 182 183 184
	if (xive_enabled())
		xive_smp_disable_cpu();
	else
		xics_migrate_irqs_away();
185 186 187
	return 0;
}

188 189 190 191 192 193 194 195 196 197 198 199
/*
 * pseries_cpu_die: Wait for the cpu to die.
 * @cpu: logical processor id of the CPU whose death we're awaiting.
 *
 * This function is called from the context of the thread which is performing
 * the cpu-offline. Here we wait for long enough to allow the cpu in question
 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
 * notifications.
 *
 * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
 * self-destruct.
 */
200
static void pseries_cpu_die(unsigned int cpu)
201 202
{
	int tries;
203
	int cpu_status = 1;
204 205
	unsigned int pcpu = get_hard_smp_processor_id(cpu);

206 207
	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		cpu_status = 1;
208
		for (tries = 0; tries < 5000; tries++) {
209 210 211 212
			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
				cpu_status = 0;
				break;
			}
213
			msleep(1);
214 215 216 217
		}
	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {

		for (tries = 0; tries < 25; tries++) {
218 219 220
			cpu_status = smp_query_cpu_stopped(pcpu);
			if (cpu_status == QCSS_STOPPED ||
			    cpu_status == QCSS_HARDWARE_ERROR)
221 222 223
				break;
			cpu_relax();
		}
224
	}
225

226 227 228 229 230
	if (cpu_status != 0) {
		printk("Querying DEAD? cpu %i (%i) shows %i\n",
		       cpu, pcpu, cpu_status);
	}

L
Lucas De Marchi 已提交
231
	/* Isolation and deallocation are definitely done by
232 233 234 235 236 237 238 239
	 * drslot_chrp_cpu.  If they were not they would be
	 * done here.  Change isolate state to Isolate and
	 * change allocation-state to Unusable.
	 */
	paca[cpu].cpu_start = 0;
}

/*
240
 * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
241 242 243 244 245
 * here is that a cpu device node may represent up to two logical cpus
 * in the SMT case.  We must honor the assumption in other code that
 * the logical ids for sibling SMT threads x and y are adjacent, such
 * that x^1 == y and y^1 == x.
 */
246
static int pseries_add_processor(struct device_node *np)
247 248
{
	unsigned int cpu;
249
	cpumask_var_t candidate_mask, tmp;
250
	int err = -ENOSPC, len, nthreads, i;
251
	const __be32 *intserv;
252

253
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
254 255 256
	if (!intserv)
		return 0;

257 258 259
	zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
	zalloc_cpumask_var(&tmp, GFP_KERNEL);

260 261
	nthreads = len / sizeof(u32);
	for (i = 0; i < nthreads; i++)
262
		cpumask_set_cpu(i, tmp);
263

264
	cpu_maps_update_begin();
265

266
	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
267 268

	/* Get a bitmap of unoccupied slots. */
269 270
	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
	if (cpumask_empty(candidate_mask)) {
271 272 273
		/* If we get here, it most likely means that NR_CPUS is
		 * less than the partition's max processors setting.
		 */
274 275
		printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
		       " supports %d logical cpus.\n", np,
276
		       num_possible_cpus());
277 278 279
		goto out_unlock;
	}

280 281
	while (!cpumask_empty(tmp))
		if (cpumask_subset(tmp, candidate_mask))
282 283 284
			/* Found a range where we can insert the new cpu(s) */
			break;
		else
285
			cpumask_shift_left(tmp, tmp, nthreads);
286

287
	if (cpumask_empty(tmp)) {
288
		printk(KERN_ERR "Unable to find space in cpu_present_mask for"
289 290 291 292 293
		       " processor %s with %d thread(s)\n", np->name,
		       nthreads);
		goto out_unlock;
	}

294
	for_each_cpu(cpu, tmp) {
295
		BUG_ON(cpu_present(cpu));
296
		set_cpu_present(cpu, true);
297
		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
298 299 300
	}
	err = 0;
out_unlock:
301
	cpu_maps_update_done();
302 303
	free_cpumask_var(candidate_mask);
	free_cpumask_var(tmp);
304 305 306 307 308 309 310 311
	return err;
}

/*
 * Update the present map for a cpu node which is going away, and set
 * the hard id in the paca(s) to -1 to be consistent with boot time
 * convention for non-present cpus.
 */
312
static void pseries_remove_processor(struct device_node *np)
313 314 315
{
	unsigned int cpu;
	int len, nthreads, i;
316 317
	const __be32 *intserv;
	u32 thread;
318

319
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
320 321 322 323 324
	if (!intserv)
		return;

	nthreads = len / sizeof(u32);

325
	cpu_maps_update_begin();
326
	for (i = 0; i < nthreads; i++) {
327
		thread = be32_to_cpu(intserv[i]);
328
		for_each_present_cpu(cpu) {
329
			if (get_hard_smp_processor_id(cpu) != thread)
330 331
				continue;
			BUG_ON(cpu_online(cpu));
332
			set_cpu_present(cpu, false);
333 334 335
			set_hard_smp_processor_id(cpu, -1);
			break;
		}
336
		if (cpu >= nr_cpu_ids)
337
			printk(KERN_WARNING "Could not find cpu to remove "
338
			       "with physical id 0x%x\n", thread);
339
	}
340
	cpu_maps_update_done();
341 342
}

343 344
extern int find_and_online_cpu_nid(int cpu);

345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
static int dlpar_online_cpu(struct device_node *dn)
{
	int rc = 0;
	unsigned int cpu;
	int len, nthreads, i;
	const __be32 *intserv;
	u32 thread;

	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
	if (!intserv)
		return -EINVAL;

	nthreads = len / sizeof(u32);

	cpu_maps_update_begin();
	for (i = 0; i < nthreads; i++) {
		thread = be32_to_cpu(intserv[i]);
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != thread)
				continue;
			BUG_ON(get_cpu_current_state(cpu)
					!= CPU_STATE_OFFLINE);
			cpu_maps_update_done();
368
			timed_topology_update(1);
369
			find_and_online_cpu_nid(cpu);
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
			rc = device_online(get_cpu_device(cpu));
			if (rc)
				goto out;
			cpu_maps_update_begin();

			break;
		}
		if (cpu == num_possible_cpus())
			printk(KERN_WARNING "Could not find cpu to online "
			       "with physical id 0x%x\n", thread);
	}
	cpu_maps_update_done();

out:
	return rc;

}

static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
{
	struct device_node *child = NULL;
	u32 my_drc_index;
	bool found;
	int rc;

	/* Assume cpu doesn't exist */
	found = false;

	for_each_child_of_node(parent, child) {
		rc = of_property_read_u32(child, "ibm,my-drc-index",
					  &my_drc_index);
		if (rc)
			continue;

		if (my_drc_index == drc_index) {
			of_node_put(child);
			found = true;
			break;
		}
	}

	return found;
}

414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
{
	bool found = false;
	int rc, index;

	index = 0;
	while (!found) {
		u32 drc;

		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
						index++, &drc);
		if (rc)
			break;

		if (drc == drc_index)
			found = true;
	}

	return found;
}

435
static ssize_t dlpar_cpu_add(u32 drc_index)
436 437
{
	struct device_node *dn, *parent;
438 439 440
	int rc, saved_rc;

	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
441 442

	parent = of_find_node_by_path("/cpus");
443 444
	if (!parent) {
		pr_warn("Failed to find CPU root node \"/cpus\"\n");
445
		return -ENODEV;
446
	}
447 448 449

	if (dlpar_cpu_exists(parent, drc_index)) {
		of_node_put(parent);
450
		pr_warn("CPU with drc index %x already exists\n", drc_index);
451 452 453
		return -EINVAL;
	}

454 455 456 457 458 459
	if (!valid_cpu_drc_index(parent, drc_index)) {
		of_node_put(parent);
		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
		return -EINVAL;
	}

460 461
	rc = dlpar_acquire_drc(drc_index);
	if (rc) {
462 463
		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
			rc, drc_index);
464 465 466 467 468
		of_node_put(parent);
		return -EINVAL;
	}

	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
469 470 471 472
	if (!dn) {
		pr_warn("Failed call to configure-connector, drc index: %x\n",
			drc_index);
		dlpar_release_drc(drc_index);
473
		of_node_put(parent);
474
		return -EINVAL;
475
	}
476

477
	rc = dlpar_attach_node(dn, parent);
478 479 480 481

	/* Regardless we are done with parent now */
	of_node_put(parent);

482
	if (rc) {
483 484 485 486 487 488 489 490 491
		saved_rc = rc;
		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
			dn->name, rc, drc_index);

		rc = dlpar_release_drc(drc_index);
		if (!rc)
			dlpar_free_cc_nodes(dn);

		return saved_rc;
492 493 494
	}

	rc = dlpar_online_cpu(dn);
495 496 497 498 499 500 501 502 503 504 505 506 507 508
	if (rc) {
		saved_rc = rc;
		pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n",
			dn->name, rc, drc_index);

		rc = dlpar_detach_node(dn);
		if (!rc)
			dlpar_release_drc(drc_index);

		return saved_rc;
	}

	pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name,
		 drc_index);
509
	return rc;
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
}

static int dlpar_offline_cpu(struct device_node *dn)
{
	int rc = 0;
	unsigned int cpu;
	int len, nthreads, i;
	const __be32 *intserv;
	u32 thread;

	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
	if (!intserv)
		return -EINVAL;

	nthreads = len / sizeof(u32);

	cpu_maps_update_begin();
	for (i = 0; i < nthreads; i++) {
		thread = be32_to_cpu(intserv[i]);
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != thread)
				continue;

			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
				break;

			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
				set_preferred_offline_state(cpu,
							    CPU_STATE_OFFLINE);
				cpu_maps_update_done();
540
				timed_topology_update(1);
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
				rc = device_offline(get_cpu_device(cpu));
				if (rc)
					goto out;
				cpu_maps_update_begin();
				break;

			}

			/*
			 * The cpu is in CPU_STATE_INACTIVE.
			 * Upgrade it's state to CPU_STATE_OFFLINE.
			 */
			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
			BUG_ON(plpar_hcall_norets(H_PROD, thread)
								!= H_SUCCESS);
			__cpu_die(cpu);
			break;
		}
		if (cpu == num_possible_cpus())
			printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
	}
	cpu_maps_update_done();

out:
	return rc;

}

569 570 571 572
static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
{
	int rc;

573
	pr_debug("Attempting to remove CPU %s, drc index: %x\n",
574 575
		 dn->name, drc_index);

576
	rc = dlpar_offline_cpu(dn);
577 578
	if (rc) {
		pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc);
579
		return -EINVAL;
580
	}
581 582

	rc = dlpar_release_drc(drc_index);
583 584 585 586
	if (rc) {
		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
			drc_index, dn->name, rc);
		dlpar_online_cpu(dn);
587
		return rc;
588
	}
589 590

	rc = dlpar_detach_node(dn);
591 592
	if (rc) {
		int saved_rc = rc;
593

594 595 596 597 598 599 600 601 602 603 604
		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);

		rc = dlpar_acquire_drc(drc_index);
		if (!rc)
			dlpar_online_cpu(dn);

		return saved_rc;
	}

	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
	return 0;
605 606
}

607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
{
	struct device_node *dn;
	u32 my_index;
	int rc;

	for_each_node_by_type(dn, "cpu") {
		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
		if (rc)
			continue;

		if (my_index == drc_index)
			break;
	}

	return dn;
}

static int dlpar_cpu_remove_by_index(u32 drc_index)
{
	struct device_node *dn;
	int rc;

	dn = cpu_drc_index_to_dn(drc_index);
	if (!dn) {
		pr_warn("Cannot find CPU (drc index %x) to remove\n",
			drc_index);
		return -ENODEV;
	}

	rc = dlpar_cpu_remove(dn, drc_index);
	of_node_put(dn);
	return rc;
}

static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
{
	struct device_node *dn;
	int cpus_found = 0;
	int rc;

	/* We want to find cpus_to_remove + 1 CPUs to ensure we do not
	 * remove the last CPU.
	 */
	for_each_node_by_type(dn, "cpu") {
		cpus_found++;

		if (cpus_found > cpus_to_remove) {
			of_node_put(dn);
			break;
		}

		/* Note that cpus_found is always 1 ahead of the index
		 * into the cpu_drcs array, so we use cpus_found - 1
		 */
		rc = of_property_read_u32(dn, "ibm,my-drc-index",
					  &cpu_drcs[cpus_found - 1]);
		if (rc) {
			pr_warn("Error occurred getting drc-index for %s\n",
				dn->name);
			of_node_put(dn);
			return -1;
		}
	}

	if (cpus_found < cpus_to_remove) {
		pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
			cpus_found, cpus_to_remove);
	} else if (cpus_found == cpus_to_remove) {
		pr_warn("Cannot remove all CPUs\n");
	}

	return cpus_found;
}

static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
{
	u32 *cpu_drcs;
	int cpus_found;
	int cpus_removed = 0;
	int i, rc;

	pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove);

	cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL);
	if (!cpu_drcs)
		return -EINVAL;

	cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove);
	if (cpus_found <= cpus_to_remove) {
		kfree(cpu_drcs);
		return -EINVAL;
	}

	for (i = 0; i < cpus_to_remove; i++) {
		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
		if (rc)
			break;

		cpus_removed++;
	}

	if (cpus_removed != cpus_to_remove) {
		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");

		for (i = 0; i < cpus_removed; i++)
			dlpar_cpu_add(cpu_drcs[i]);

		rc = -EINVAL;
	} else {
		rc = 0;
	}

	kfree(cpu_drcs);
	return rc;
}

724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
{
	struct device_node *parent;
	int cpus_found = 0;
	int index, rc;

	parent = of_find_node_by_path("/cpus");
	if (!parent) {
		pr_warn("Could not find CPU root node in device tree\n");
		kfree(cpu_drcs);
		return -1;
	}

	/* Search the ibm,drc-indexes array for possible CPU drcs to
	 * add. Note that the format of the ibm,drc-indexes array is
	 * the number of entries in the array followed by the array
	 * of drc values so we start looking at index = 1.
	 */
	index = 1;
	while (cpus_found < cpus_to_add) {
		u32 drc;

		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
						index++, &drc);
		if (rc)
			break;

		if (dlpar_cpu_exists(parent, drc))
			continue;

		cpu_drcs[cpus_found++] = drc;
	}

	of_node_put(parent);
	return cpus_found;
}

static int dlpar_cpu_add_by_count(u32 cpus_to_add)
{
	u32 *cpu_drcs;
	int cpus_added = 0;
	int cpus_found;
	int i, rc;

	pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add);

	cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL);
	if (!cpu_drcs)
		return -EINVAL;

	cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
	if (cpus_found < cpus_to_add) {
		pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
			cpus_found, cpus_to_add);
		kfree(cpu_drcs);
		return -EINVAL;
	}

	for (i = 0; i < cpus_to_add; i++) {
		rc = dlpar_cpu_add(cpu_drcs[i]);
		if (rc)
			break;

		cpus_added++;
	}

	if (cpus_added < cpus_to_add) {
		pr_warn("CPU hot-add failed, removing any added CPUs\n");

		for (i = 0; i < cpus_added; i++)
			dlpar_cpu_remove_by_index(cpu_drcs[i]);

		rc = -EINVAL;
	} else {
		rc = 0;
	}

	kfree(cpu_drcs);
	return rc;
}

805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
{
	u32 count, drc_index;
	int rc;

	count = hp_elog->_drc_u.drc_count;
	drc_index = hp_elog->_drc_u.drc_index;

	lock_device_hotplug();

	switch (hp_elog->action) {
	case PSERIES_HP_ELOG_ACTION_REMOVE:
		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
			rc = dlpar_cpu_remove_by_count(count);
		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
			rc = dlpar_cpu_remove_by_index(drc_index);
		else
			rc = -EINVAL;
		break;
824 825 826 827 828 829 830 831
	case PSERIES_HP_ELOG_ACTION_ADD:
		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
			rc = dlpar_cpu_add_by_count(count);
		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
			rc = dlpar_cpu_add(drc_index);
		else
			rc = -EINVAL;
		break;
832 833 834 835 836 837 838 839 840 841
	default:
		pr_err("Invalid action (%d) specified\n", hp_elog->action);
		rc = -EINVAL;
		break;
	}

	unlock_device_hotplug();
	return rc;
}

842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE

static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
{
	u32 drc_index;
	int rc;

	rc = kstrtou32(buf, 0, &drc_index);
	if (rc)
		return -EINVAL;

	rc = dlpar_cpu_add(drc_index);

	return rc ? rc : count;
}

858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
static ssize_t dlpar_cpu_release(const char *buf, size_t count)
{
	struct device_node *dn;
	u32 drc_index;
	int rc;

	dn = of_find_node_by_path(buf);
	if (!dn)
		return -EINVAL;

	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
	if (rc) {
		of_node_put(dn);
		return -EINVAL;
	}

874
	rc = dlpar_cpu_remove(dn, drc_index);
875 876
	of_node_put(dn);

877
	return rc ? rc : count;
878 879 880 881
}

#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

882
static int pseries_smp_notifier(struct notifier_block *nb,
883
				unsigned long action, void *data)
884
{
885
	struct of_reconfig_data *rd = data;
886
	int err = 0;
887 888

	switch (action) {
889
	case OF_RECONFIG_ATTACH_NODE:
890
		err = pseries_add_processor(rd->dn);
891
		break;
892
	case OF_RECONFIG_DETACH_NODE:
893
		pseries_remove_processor(rd->dn);
894 895
		break;
	}
896
	return notifier_from_errno(err);
897 898
}

899 900
static struct notifier_block pseries_smp_nb = {
	.notifier_call = pseries_smp_notifier,
901 902
};

903 904 905 906 907 908 909 910 911 912 913
#define MAX_CEDE_LATENCY_LEVELS		4
#define	CEDE_LATENCY_PARAM_LENGTH	10
#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
#define CEDE_LATENCY_TOKEN		45

static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];

static int parse_cede_parameters(void)
{
	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
914 915 916 917 918
	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
			 NULL,
			 CEDE_LATENCY_TOKEN,
			 __pa(cede_parameters),
			 CEDE_LATENCY_PARAM_MAX_LENGTH);
919 920
}

921 922
static int __init pseries_cpu_hotplug_init(void)
{
923
	int cpu;
924
	int qcss_tok;
925

926 927 928 929 930
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
	ppc_md.cpu_probe = dlpar_cpu_probe;
	ppc_md.cpu_release = dlpar_cpu_release;
#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

931
	rtas_stop_self_token = rtas_token("stop-self");
932
	qcss_tok = rtas_token("query-cpu-stopped-state");
933

934
	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
935 936 937 938 939
			qcss_tok == RTAS_UNKNOWN_SERVICE) {
		printk(KERN_INFO "CPU Hotplug not supported by firmware "
				"- disabling.\n");
		return 0;
	}
940

941 942 943
	ppc_md.cpu_die = pseries_mach_cpu_die;
	smp_ops->cpu_disable = pseries_cpu_disable;
	smp_ops->cpu_die = pseries_cpu_die;
944 945

	/* Processors can be added/removed only on LPAR */
946
	if (firmware_has_feature(FW_FEATURE_LPAR)) {
947
		of_reconfig_notifier_register(&pseries_smp_nb);
948 949 950 951 952 953 954 955
		cpu_maps_update_begin();
		if (cede_offline_enabled && parse_cede_parameters() == 0) {
			default_offline_state = CPU_STATE_INACTIVE;
			for_each_online_cpu(cpu)
				set_default_offline_state(cpu);
		}
		cpu_maps_update_done();
	}
956

957 958
	return 0;
}
959
machine_arch_initcall(pseries, pseries_cpu_hotplug_init);