hotplug-cpu.c 21.5 KB
Newer Older
1 2 3
/*
 * pseries CPU Hotplug infrastructure.
 *
4 5
 * Split out from arch/powerpc/platforms/pseries/setup.c
 *  arch/powerpc/kernel/rtas.c, and arch/powerpc/platforms/pseries/smp.c
6 7 8
 *
 * Peter Bergner, IBM	March 2001.
 * Copyright (C) 2001 IBM.
9 10 11
 * Dave Engebretsen, Peter Bergner, and
 * Mike Corrigan {engebret|bergner|mikec}@us.ibm.com
 * Plus various changes from other IBM teams...
12 13 14 15 16 17 18 19 20
 *
 * Copyright (C) 2006 Michael Ellerman, IBM Corporation
 *
 *      This program is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU General Public License
 *      as published by the Free Software Foundation; either version
 *      2 of the License, or (at your option) any later version.
 */

21 22
#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt

23
#include <linux/kernel.h>
24
#include <linux/interrupt.h>
25
#include <linux/delay.h>
26
#include <linux/sched.h>	/* for idle_task_exit */
27
#include <linux/sched/hotplug.h>
28
#include <linux/cpu.h>
29
#include <linux/of.h>
30
#include <linux/slab.h>
31 32 33 34 35
#include <asm/prom.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/vdso_datapage.h>
36
#include <asm/xics.h>
37
#include <asm/xive.h>
38 39
#include <asm/plpar_wrappers.h>

40
#include "pseries.h"
41
#include "offline_states.h"
42 43

/* This version can't take the spinlock, because it never returns */
44
static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
45

46 47 48 49 50 51
static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
							CPU_STATE_OFFLINE;
static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;

static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;

52
static bool cede_offline_enabled __read_mostly = true;
53 54 55 56 57 58

/*
 * Enable/disable cede_offline when available.
 */
static int __init setup_cede_offline(char *str)
{
59
	return (kstrtobool(str, &cede_offline_enabled) == 0);
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
}

__setup("cede_offline=", setup_cede_offline);

enum cpu_state_vals get_cpu_current_state(int cpu)
{
	return per_cpu(current_state, cpu);
}

void set_cpu_current_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(current_state, cpu) = state;
}

enum cpu_state_vals get_preferred_offline_state(int cpu)
{
	return per_cpu(preferred_offline_state, cpu);
}

void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
{
	per_cpu(preferred_offline_state, cpu) = state;
}

void set_default_offline_state(int cpu)
{
	per_cpu(preferred_offline_state, cpu) = default_offline_state;
}

89
static void rtas_stop_self(void)
90
{
91
	static struct rtas_args args;
L
Li Zhong 已提交
92

93 94
	local_irq_disable();

95
	BUG_ON(rtas_stop_self_token == RTAS_UNKNOWN_SERVICE);
96 97 98

	printk("cpu %u (hwid %u) Ready to die...\n",
	       smp_processor_id(), hard_smp_processor_id());
99 100

	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
101 102 103 104

	panic("Alas, I survived.\n");
}

105
static void pseries_mach_cpu_die(void)
106
{
107 108 109 110
	unsigned int cpu = smp_processor_id();
	unsigned int hwcpu = hard_smp_processor_id();
	u8 cede_latency_hint = 0;

111 112
	local_irq_disable();
	idle_task_exit();
113 114 115 116
	if (xive_enabled())
		xive_teardown_cpu();
	else
		xics_teardown_cpu();
117 118 119

	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
120 121 122
		if (ppc_md.suspend_disable_cpu)
			ppc_md.suspend_disable_cpu();

123 124 125
		cede_latency_hint = 2;

		get_lppaca()->idle = 1;
126
		if (!lppaca_shared_proc(get_lppaca()))
127 128 129
			get_lppaca()->donate_dedicated_cpu = 1;

		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
130 131 132 133 134
			while (!prep_irq_for_idle()) {
				local_irq_enable();
				local_irq_disable();
			}

135 136 137
			extended_cede_processor(cede_latency_hint);
		}

138 139
		local_irq_disable();

140
		if (!lppaca_shared_proc(get_lppaca()))
141 142 143
			get_lppaca()->donate_dedicated_cpu = 0;
		get_lppaca()->idle = 0;

144
		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
145
			unregister_slb_shadow(hwcpu);
146

147
			hard_irq_disable();
148 149 150 151 152 153 154 155
			/*
			 * Call to start_secondary_resume() will not return.
			 * Kernel stack will be reset and start_secondary()
			 * will be called to continue the online operation.
			 */
			start_secondary_resume();
		}
	}
156

157 158
	/* Requested state is CPU_STATE_OFFLINE at this point */
	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
159

160
	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
161
	unregister_slb_shadow(hwcpu);
162
	rtas_stop_self();
163

164 165 166 167 168
	/* Should never get here... */
	BUG();
	for(;;);
}

169
static int pseries_cpu_disable(void)
170 171 172
{
	int cpu = smp_processor_id();

173
	set_cpu_online(cpu, false);
174 175 176 177
	vdso_data->processorCount--;

	/*fix boot_cpuid here*/
	if (cpu == boot_cpuid)
178
		boot_cpuid = cpumask_any(cpu_online_mask);
179 180

	/* FIXME: abstract this to not be platform specific later on */
181 182 183 184
	if (xive_enabled())
		xive_smp_disable_cpu();
	else
		xics_migrate_irqs_away();
185 186 187
	return 0;
}

188 189 190 191 192 193 194 195 196 197 198 199
/*
 * pseries_cpu_die: Wait for the cpu to die.
 * @cpu: logical processor id of the CPU whose death we're awaiting.
 *
 * This function is called from the context of the thread which is performing
 * the cpu-offline. Here we wait for long enough to allow the cpu in question
 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
 * notifications.
 *
 * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
 * self-destruct.
 */
200
static void pseries_cpu_die(unsigned int cpu)
201 202
{
	int tries;
203
	int cpu_status = 1;
204 205
	unsigned int pcpu = get_hard_smp_processor_id(cpu);

206 207
	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
		cpu_status = 1;
208
		for (tries = 0; tries < 5000; tries++) {
209 210 211 212
			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
				cpu_status = 0;
				break;
			}
213
			msleep(1);
214 215 216 217
		}
	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {

		for (tries = 0; tries < 25; tries++) {
218 219 220
			cpu_status = smp_query_cpu_stopped(pcpu);
			if (cpu_status == QCSS_STOPPED ||
			    cpu_status == QCSS_HARDWARE_ERROR)
221 222 223
				break;
			cpu_relax();
		}
224
	}
225

226 227 228 229 230
	if (cpu_status != 0) {
		printk("Querying DEAD? cpu %i (%i) shows %i\n",
		       cpu, pcpu, cpu_status);
	}

L
Lucas De Marchi 已提交
231
	/* Isolation and deallocation are definitely done by
232 233 234 235 236 237 238 239
	 * drslot_chrp_cpu.  If they were not they would be
	 * done here.  Change isolate state to Isolate and
	 * change allocation-state to Unusable.
	 */
	paca[cpu].cpu_start = 0;
}

/*
240
 * Update cpu_present_mask and paca(s) for a new cpu node.  The wrinkle
241 242 243 244 245
 * here is that a cpu device node may represent up to two logical cpus
 * in the SMT case.  We must honor the assumption in other code that
 * the logical ids for sibling SMT threads x and y are adjacent, such
 * that x^1 == y and y^1 == x.
 */
246
static int pseries_add_processor(struct device_node *np)
247 248
{
	unsigned int cpu;
249
	cpumask_var_t candidate_mask, tmp;
250
	int err = -ENOSPC, len, nthreads, i;
251
	const __be32 *intserv;
252

253
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
254 255 256
	if (!intserv)
		return 0;

257 258 259
	zalloc_cpumask_var(&candidate_mask, GFP_KERNEL);
	zalloc_cpumask_var(&tmp, GFP_KERNEL);

260 261
	nthreads = len / sizeof(u32);
	for (i = 0; i < nthreads; i++)
262
		cpumask_set_cpu(i, tmp);
263

264
	cpu_maps_update_begin();
265

266
	BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
267 268

	/* Get a bitmap of unoccupied slots. */
269 270
	cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
	if (cpumask_empty(candidate_mask)) {
271 272 273
		/* If we get here, it most likely means that NR_CPUS is
		 * less than the partition's max processors setting.
		 */
274 275
		printk(KERN_ERR "Cannot add cpu %pOF; this system configuration"
		       " supports %d logical cpus.\n", np,
276
		       num_possible_cpus());
277 278 279
		goto out_unlock;
	}

280 281
	while (!cpumask_empty(tmp))
		if (cpumask_subset(tmp, candidate_mask))
282 283 284
			/* Found a range where we can insert the new cpu(s) */
			break;
		else
285
			cpumask_shift_left(tmp, tmp, nthreads);
286

287
	if (cpumask_empty(tmp)) {
288
		printk(KERN_ERR "Unable to find space in cpu_present_mask for"
289 290 291 292 293
		       " processor %s with %d thread(s)\n", np->name,
		       nthreads);
		goto out_unlock;
	}

294
	for_each_cpu(cpu, tmp) {
295
		BUG_ON(cpu_present(cpu));
296
		set_cpu_present(cpu, true);
297
		set_hard_smp_processor_id(cpu, be32_to_cpu(*intserv++));
298 299 300
	}
	err = 0;
out_unlock:
301
	cpu_maps_update_done();
302 303
	free_cpumask_var(candidate_mask);
	free_cpumask_var(tmp);
304 305 306 307 308 309 310 311
	return err;
}

/*
 * Update the present map for a cpu node which is going away, and set
 * the hard id in the paca(s) to -1 to be consistent with boot time
 * convention for non-present cpus.
 */
312
static void pseries_remove_processor(struct device_node *np)
313 314 315
{
	unsigned int cpu;
	int len, nthreads, i;
316 317
	const __be32 *intserv;
	u32 thread;
318

319
	intserv = of_get_property(np, "ibm,ppc-interrupt-server#s", &len);
320 321 322 323 324
	if (!intserv)
		return;

	nthreads = len / sizeof(u32);

325
	cpu_maps_update_begin();
326
	for (i = 0; i < nthreads; i++) {
327
		thread = be32_to_cpu(intserv[i]);
328
		for_each_present_cpu(cpu) {
329
			if (get_hard_smp_processor_id(cpu) != thread)
330 331
				continue;
			BUG_ON(cpu_online(cpu));
332
			set_cpu_present(cpu, false);
333 334 335
			set_hard_smp_processor_id(cpu, -1);
			break;
		}
336
		if (cpu >= nr_cpu_ids)
337
			printk(KERN_WARNING "Could not find cpu to remove "
338
			       "with physical id 0x%x\n", thread);
339
	}
340
	cpu_maps_update_done();
341 342
}

343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409
static int dlpar_online_cpu(struct device_node *dn)
{
	int rc = 0;
	unsigned int cpu;
	int len, nthreads, i;
	const __be32 *intserv;
	u32 thread;

	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
	if (!intserv)
		return -EINVAL;

	nthreads = len / sizeof(u32);

	cpu_maps_update_begin();
	for (i = 0; i < nthreads; i++) {
		thread = be32_to_cpu(intserv[i]);
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != thread)
				continue;
			BUG_ON(get_cpu_current_state(cpu)
					!= CPU_STATE_OFFLINE);
			cpu_maps_update_done();
			rc = device_online(get_cpu_device(cpu));
			if (rc)
				goto out;
			cpu_maps_update_begin();

			break;
		}
		if (cpu == num_possible_cpus())
			printk(KERN_WARNING "Could not find cpu to online "
			       "with physical id 0x%x\n", thread);
	}
	cpu_maps_update_done();

out:
	return rc;

}

static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
{
	struct device_node *child = NULL;
	u32 my_drc_index;
	bool found;
	int rc;

	/* Assume cpu doesn't exist */
	found = false;

	for_each_child_of_node(parent, child) {
		rc = of_property_read_u32(child, "ibm,my-drc-index",
					  &my_drc_index);
		if (rc)
			continue;

		if (my_drc_index == drc_index) {
			of_node_put(child);
			found = true;
			break;
		}
	}

	return found;
}

410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
{
	bool found = false;
	int rc, index;

	index = 0;
	while (!found) {
		u32 drc;

		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
						index++, &drc);
		if (rc)
			break;

		if (drc == drc_index)
			found = true;
	}

	return found;
}

431
static ssize_t dlpar_cpu_add(u32 drc_index)
432 433
{
	struct device_node *dn, *parent;
434 435 436
	int rc, saved_rc;

	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
437 438

	parent = of_find_node_by_path("/cpus");
439 440
	if (!parent) {
		pr_warn("Failed to find CPU root node \"/cpus\"\n");
441
		return -ENODEV;
442
	}
443 444 445

	if (dlpar_cpu_exists(parent, drc_index)) {
		of_node_put(parent);
446
		pr_warn("CPU with drc index %x already exists\n", drc_index);
447 448 449
		return -EINVAL;
	}

450 451 452 453 454 455
	if (!valid_cpu_drc_index(parent, drc_index)) {
		of_node_put(parent);
		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
		return -EINVAL;
	}

456 457
	rc = dlpar_acquire_drc(drc_index);
	if (rc) {
458 459
		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
			rc, drc_index);
460 461 462 463 464
		of_node_put(parent);
		return -EINVAL;
	}

	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
465 466 467 468
	if (!dn) {
		pr_warn("Failed call to configure-connector, drc index: %x\n",
			drc_index);
		dlpar_release_drc(drc_index);
469
		of_node_put(parent);
470
		return -EINVAL;
471
	}
472

473
	rc = dlpar_attach_node(dn, parent);
474 475 476 477

	/* Regardless we are done with parent now */
	of_node_put(parent);

478
	if (rc) {
479 480 481 482 483 484 485 486 487
		saved_rc = rc;
		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
			dn->name, rc, drc_index);

		rc = dlpar_release_drc(drc_index);
		if (!rc)
			dlpar_free_cc_nodes(dn);

		return saved_rc;
488 489 490
	}

	rc = dlpar_online_cpu(dn);
491 492 493 494 495 496 497 498 499 500 501 502 503 504
	if (rc) {
		saved_rc = rc;
		pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n",
			dn->name, rc, drc_index);

		rc = dlpar_detach_node(dn);
		if (!rc)
			dlpar_release_drc(drc_index);

		return saved_rc;
	}

	pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name,
		 drc_index);
505
	return rc;
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
}

static int dlpar_offline_cpu(struct device_node *dn)
{
	int rc = 0;
	unsigned int cpu;
	int len, nthreads, i;
	const __be32 *intserv;
	u32 thread;

	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
	if (!intserv)
		return -EINVAL;

	nthreads = len / sizeof(u32);

	cpu_maps_update_begin();
	for (i = 0; i < nthreads; i++) {
		thread = be32_to_cpu(intserv[i]);
		for_each_present_cpu(cpu) {
			if (get_hard_smp_processor_id(cpu) != thread)
				continue;

			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
				break;

			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
				set_preferred_offline_state(cpu,
							    CPU_STATE_OFFLINE);
				cpu_maps_update_done();
				rc = device_offline(get_cpu_device(cpu));
				if (rc)
					goto out;
				cpu_maps_update_begin();
				break;

			}

			/*
			 * The cpu is in CPU_STATE_INACTIVE.
			 * Upgrade it's state to CPU_STATE_OFFLINE.
			 */
			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
			BUG_ON(plpar_hcall_norets(H_PROD, thread)
								!= H_SUCCESS);
			__cpu_die(cpu);
			break;
		}
		if (cpu == num_possible_cpus())
			printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
	}
	cpu_maps_update_done();

out:
	return rc;

}

564 565 566 567
static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
{
	int rc;

568
	pr_debug("Attempting to remove CPU %s, drc index: %x\n",
569 570
		 dn->name, drc_index);

571
	rc = dlpar_offline_cpu(dn);
572 573
	if (rc) {
		pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc);
574
		return -EINVAL;
575
	}
576 577

	rc = dlpar_release_drc(drc_index);
578 579 580 581
	if (rc) {
		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
			drc_index, dn->name, rc);
		dlpar_online_cpu(dn);
582
		return rc;
583
	}
584 585

	rc = dlpar_detach_node(dn);
586 587
	if (rc) {
		int saved_rc = rc;
588

589 590 591 592 593 594 595 596 597 598 599
		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);

		rc = dlpar_acquire_drc(drc_index);
		if (!rc)
			dlpar_online_cpu(dn);

		return saved_rc;
	}

	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
	return 0;
600 601
}

602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
{
	struct device_node *dn;
	u32 my_index;
	int rc;

	for_each_node_by_type(dn, "cpu") {
		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
		if (rc)
			continue;

		if (my_index == drc_index)
			break;
	}

	return dn;
}

static int dlpar_cpu_remove_by_index(u32 drc_index)
{
	struct device_node *dn;
	int rc;

	dn = cpu_drc_index_to_dn(drc_index);
	if (!dn) {
		pr_warn("Cannot find CPU (drc index %x) to remove\n",
			drc_index);
		return -ENODEV;
	}

	rc = dlpar_cpu_remove(dn, drc_index);
	of_node_put(dn);
	return rc;
}

static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
{
	struct device_node *dn;
	int cpus_found = 0;
	int rc;

	/* We want to find cpus_to_remove + 1 CPUs to ensure we do not
	 * remove the last CPU.
	 */
	for_each_node_by_type(dn, "cpu") {
		cpus_found++;

		if (cpus_found > cpus_to_remove) {
			of_node_put(dn);
			break;
		}

		/* Note that cpus_found is always 1 ahead of the index
		 * into the cpu_drcs array, so we use cpus_found - 1
		 */
		rc = of_property_read_u32(dn, "ibm,my-drc-index",
					  &cpu_drcs[cpus_found - 1]);
		if (rc) {
			pr_warn("Error occurred getting drc-index for %s\n",
				dn->name);
			of_node_put(dn);
			return -1;
		}
	}

	if (cpus_found < cpus_to_remove) {
		pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
			cpus_found, cpus_to_remove);
	} else if (cpus_found == cpus_to_remove) {
		pr_warn("Cannot remove all CPUs\n");
	}

	return cpus_found;
}

static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
{
	u32 *cpu_drcs;
	int cpus_found;
	int cpus_removed = 0;
	int i, rc;

	pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove);

	cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL);
	if (!cpu_drcs)
		return -EINVAL;

	cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove);
	if (cpus_found <= cpus_to_remove) {
		kfree(cpu_drcs);
		return -EINVAL;
	}

	for (i = 0; i < cpus_to_remove; i++) {
		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
		if (rc)
			break;

		cpus_removed++;
	}

	if (cpus_removed != cpus_to_remove) {
		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");

		for (i = 0; i < cpus_removed; i++)
			dlpar_cpu_add(cpu_drcs[i]);

		rc = -EINVAL;
	} else {
		rc = 0;
	}

	kfree(cpu_drcs);
	return rc;
}

719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
{
	struct device_node *parent;
	int cpus_found = 0;
	int index, rc;

	parent = of_find_node_by_path("/cpus");
	if (!parent) {
		pr_warn("Could not find CPU root node in device tree\n");
		kfree(cpu_drcs);
		return -1;
	}

	/* Search the ibm,drc-indexes array for possible CPU drcs to
	 * add. Note that the format of the ibm,drc-indexes array is
	 * the number of entries in the array followed by the array
	 * of drc values so we start looking at index = 1.
	 */
	index = 1;
	while (cpus_found < cpus_to_add) {
		u32 drc;

		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
						index++, &drc);
		if (rc)
			break;

		if (dlpar_cpu_exists(parent, drc))
			continue;

		cpu_drcs[cpus_found++] = drc;
	}

	of_node_put(parent);
	return cpus_found;
}

static int dlpar_cpu_add_by_count(u32 cpus_to_add)
{
	u32 *cpu_drcs;
	int cpus_added = 0;
	int cpus_found;
	int i, rc;

	pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add);

	cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL);
	if (!cpu_drcs)
		return -EINVAL;

	cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
	if (cpus_found < cpus_to_add) {
		pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
			cpus_found, cpus_to_add);
		kfree(cpu_drcs);
		return -EINVAL;
	}

	for (i = 0; i < cpus_to_add; i++) {
		rc = dlpar_cpu_add(cpu_drcs[i]);
		if (rc)
			break;

		cpus_added++;
	}

	if (cpus_added < cpus_to_add) {
		pr_warn("CPU hot-add failed, removing any added CPUs\n");

		for (i = 0; i < cpus_added; i++)
			dlpar_cpu_remove_by_index(cpu_drcs[i]);

		rc = -EINVAL;
	} else {
		rc = 0;
	}

	kfree(cpu_drcs);
	return rc;
}

800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818
int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
{
	u32 count, drc_index;
	int rc;

	count = hp_elog->_drc_u.drc_count;
	drc_index = hp_elog->_drc_u.drc_index;

	lock_device_hotplug();

	switch (hp_elog->action) {
	case PSERIES_HP_ELOG_ACTION_REMOVE:
		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
			rc = dlpar_cpu_remove_by_count(count);
		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
			rc = dlpar_cpu_remove_by_index(drc_index);
		else
			rc = -EINVAL;
		break;
819 820 821 822 823 824 825 826
	case PSERIES_HP_ELOG_ACTION_ADD:
		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
			rc = dlpar_cpu_add_by_count(count);
		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
			rc = dlpar_cpu_add(drc_index);
		else
			rc = -EINVAL;
		break;
827 828 829 830 831 832 833 834 835 836
	default:
		pr_err("Invalid action (%d) specified\n", hp_elog->action);
		rc = -EINVAL;
		break;
	}

	unlock_device_hotplug();
	return rc;
}

837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE

static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
{
	u32 drc_index;
	int rc;

	rc = kstrtou32(buf, 0, &drc_index);
	if (rc)
		return -EINVAL;

	rc = dlpar_cpu_add(drc_index);

	return rc ? rc : count;
}

853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
static ssize_t dlpar_cpu_release(const char *buf, size_t count)
{
	struct device_node *dn;
	u32 drc_index;
	int rc;

	dn = of_find_node_by_path(buf);
	if (!dn)
		return -EINVAL;

	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
	if (rc) {
		of_node_put(dn);
		return -EINVAL;
	}

869
	rc = dlpar_cpu_remove(dn, drc_index);
870 871
	of_node_put(dn);

872
	return rc ? rc : count;
873 874 875 876
}

#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

877
static int pseries_smp_notifier(struct notifier_block *nb,
878
				unsigned long action, void *data)
879
{
880
	struct of_reconfig_data *rd = data;
881
	int err = 0;
882 883

	switch (action) {
884
	case OF_RECONFIG_ATTACH_NODE:
885
		err = pseries_add_processor(rd->dn);
886
		break;
887
	case OF_RECONFIG_DETACH_NODE:
888
		pseries_remove_processor(rd->dn);
889 890
		break;
	}
891
	return notifier_from_errno(err);
892 893
}

894 895
static struct notifier_block pseries_smp_nb = {
	.notifier_call = pseries_smp_notifier,
896 897
};

898 899 900 901 902 903 904 905 906 907 908
#define MAX_CEDE_LATENCY_LEVELS		4
#define	CEDE_LATENCY_PARAM_LENGTH	10
#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
#define CEDE_LATENCY_TOKEN		45

static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];

static int parse_cede_parameters(void)
{
	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
909 910 911 912 913
	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
			 NULL,
			 CEDE_LATENCY_TOKEN,
			 __pa(cede_parameters),
			 CEDE_LATENCY_PARAM_MAX_LENGTH);
914 915
}

916 917
static int __init pseries_cpu_hotplug_init(void)
{
918
	int cpu;
919
	int qcss_tok;
920

921 922 923 924 925
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
	ppc_md.cpu_probe = dlpar_cpu_probe;
	ppc_md.cpu_release = dlpar_cpu_release;
#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */

926
	rtas_stop_self_token = rtas_token("stop-self");
927
	qcss_tok = rtas_token("query-cpu-stopped-state");
928

929
	if (rtas_stop_self_token == RTAS_UNKNOWN_SERVICE ||
930 931 932 933 934
			qcss_tok == RTAS_UNKNOWN_SERVICE) {
		printk(KERN_INFO "CPU Hotplug not supported by firmware "
				"- disabling.\n");
		return 0;
	}
935

936 937 938
	ppc_md.cpu_die = pseries_mach_cpu_die;
	smp_ops->cpu_disable = pseries_cpu_disable;
	smp_ops->cpu_die = pseries_cpu_die;
939 940

	/* Processors can be added/removed only on LPAR */
941
	if (firmware_has_feature(FW_FEATURE_LPAR)) {
942
		of_reconfig_notifier_register(&pseries_smp_nb);
943 944 945 946 947 948 949 950
		cpu_maps_update_begin();
		if (cede_offline_enabled && parse_cede_parameters() == 0) {
			default_offline_state = CPU_STATE_INACTIVE;
			for_each_online_cpu(cpu)
				set_default_offline_state(cpu);
		}
		cpu_maps_update_done();
	}
951

952 953
	return 0;
}
954
machine_arch_initcall(pseries, pseries_cpu_hotplug_init);