cpu.c 55.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
10
#include <linux/sched/signal.h>
11
#include <linux/sched/hotplug.h>
12
#include <linux/sched/task.h>
13
#include <linux/sched/smt.h>
L
Linus Torvalds 已提交
14 15
#include <linux/unistd.h>
#include <linux/cpu.h>
16 17
#include <linux/oom.h>
#include <linux/rcupdate.h>
18
#include <linux/export.h>
19
#include <linux/bug.h>
L
Linus Torvalds 已提交
20 21
#include <linux/kthread.h>
#include <linux/stop_machine.h>
22
#include <linux/mutex.h>
23
#include <linux/gfp.h>
24
#include <linux/suspend.h>
25
#include <linux/lockdep.h>
26
#include <linux/tick.h>
27
#include <linux/irq.h>
28
#include <linux/nmi.h>
29
#include <linux/smpboot.h>
30
#include <linux/relay.h>
31
#include <linux/slab.h>
32
#include <linux/percpu-rwsem.h>
33

34
#include <trace/events/power.h>
35 36
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>
L
Linus Torvalds 已提交
37

38 39
#include "smpboot.h"

40 41 42 43
/**
 * cpuhp_cpu_state - Per cpu hotplug state storage
 * @state:	The current cpu state
 * @target:	The target state
44 45
 * @thread:	Pointer to the hotplug thread
 * @should_run:	Thread should execute
46
 * @rollback:	Perform a rollback
47 48 49
 * @single:	Single callback invocation
 * @bringup:	Single callback bringup or teardown selector
 * @cb_state:	The state for a single callback (install/uninstall)
50
 * @result:	Result of the operation
51 52
 * @done_up:	Signal completion to the issuer of the task for cpu-up
 * @done_down:	Signal completion to the issuer of the task for cpu-down
53 54 55 56
 */
struct cpuhp_cpu_state {
	enum cpuhp_state	state;
	enum cpuhp_state	target;
57
	enum cpuhp_state	fail;
58 59 60
#ifdef CONFIG_SMP
	struct task_struct	*thread;
	bool			should_run;
61
	bool			rollback;
62 63
	bool			single;
	bool			bringup;
64
	bool			booted_once;
65
	struct hlist_node	*node;
66
	struct hlist_node	*last;
67 68
	enum cpuhp_state	cb_state;
	int			result;
69 70
	struct completion	done_up;
	struct completion	done_down;
71
#endif
72 73
};

74 75 76
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = {
	.fail = CPUHP_INVALID,
};
77

78
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
79 80 81 82 83 84
static struct lockdep_map cpuhp_state_up_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map);
static struct lockdep_map cpuhp_state_down_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map);


85
static inline void cpuhp_lock_acquire(bool bringup)
86 87 88 89
{
	lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}

90
static inline void cpuhp_lock_release(bool bringup)
91 92 93 94 95
{
	lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map);
}
#else

96 97
static inline void cpuhp_lock_acquire(bool bringup) { }
static inline void cpuhp_lock_release(bool bringup) { }
98

99 100
#endif

101 102 103 104 105
/**
 * cpuhp_step - Hotplug state machine step
 * @name:	Name of the step
 * @startup:	Startup function of the step
 * @teardown:	Teardown function of the step
106
 * @cant_stop:	Bringup/teardown can't be stopped at this step
107 108
 */
struct cpuhp_step {
109 110
	const char		*name;
	union {
111 112 113 114
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} startup;
115
	union {
116 117 118 119
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} teardown;
120 121 122
	struct hlist_head	list;
	bool			cant_stop;
	bool			multi_instance;
123 124
};

125
static DEFINE_MUTEX(cpuhp_state_mutex);
126
static struct cpuhp_step cpuhp_hp_states[];
127

128 129
static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
130
	return cpuhp_hp_states + state;
131 132
}

133 134 135
/**
 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 * @cpu:	The cpu for which the callback should be invoked
136
 * @state:	The state to do callbacks for
137
 * @bringup:	True if the bringup callback should be invoked
138 139
 * @node:	For multi-instance, do a single entry callback for install/remove
 * @lastp:	For multi-instance rollback, remember how far we got
140
 *
141
 * Called from cpu hotplug and from the state register machinery.
142
 */
143
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
144 145
				 bool bringup, struct hlist_node *node,
				 struct hlist_node **lastp)
146 147
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
148
	struct cpuhp_step *step = cpuhp_get_step(state);
149 150 151 152
	int (*cbm)(unsigned int cpu, struct hlist_node *node);
	int (*cb)(unsigned int cpu);
	int ret, cnt;

153 154 155 156 157 158 159 160 161
	if (st->fail == state) {
		st->fail = CPUHP_INVALID;

		if (!(bringup ? step->startup.single : step->teardown.single))
			return 0;

		return -EAGAIN;
	}

162
	if (!step->multi_instance) {
163
		WARN_ON_ONCE(lastp && *lastp);
164
		cb = bringup ? step->startup.single : step->teardown.single;
165 166
		if (!cb)
			return 0;
167
		trace_cpuhp_enter(cpu, st->target, state, cb);
168
		ret = cb(cpu);
169
		trace_cpuhp_exit(cpu, st->state, state, ret);
170 171
		return ret;
	}
172
	cbm = bringup ? step->startup.multi : step->teardown.multi;
173 174 175 176 177
	if (!cbm)
		return 0;

	/* Single invocation for instance add/remove */
	if (node) {
178
		WARN_ON_ONCE(lastp && *lastp);
179 180 181 182 183 184 185 186 187
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		return ret;
	}

	/* State transition. Invoke on all instances */
	cnt = 0;
	hlist_for_each(node, &step->list) {
188 189 190
		if (lastp && node == *lastp)
			break;

191 192 193
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
194 195 196 197 198 199 200
		if (ret) {
			if (!lastp)
				goto err;

			*lastp = node;
			return ret;
		}
201 202
		cnt++;
	}
203 204
	if (lastp)
		*lastp = NULL;
205 206 207
	return 0;
err:
	/* Rollback the instances if one failed */
208
	cbm = !bringup ? step->startup.multi : step->teardown.multi;
209 210 211 212 213 214
	if (!cbm)
		return ret;

	hlist_for_each(node, &step->list) {
		if (!cnt--)
			break;
215 216 217 218 219 220 221 222

		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		/*
		 * Rollback must not fail,
		 */
		WARN_ON_ONCE(ret);
223 224 225 226
	}
	return ret;
}

227
#ifdef CONFIG_SMP
228 229 230 231 232 233 234 235 236
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
	/*
	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
	 * purposes as that state is handled explicitly in cpu_down.
	 */
	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}

237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
	struct completion *done = bringup ? &st->done_up : &st->done_down;
	wait_for_completion(done);
}

static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup)
{
	struct completion *done = bringup ? &st->done_up : &st->done_down;
	complete(done);
}

/*
 * The former STARTING/DYING states, ran with IRQs disabled and must not fail.
 */
static bool cpuhp_is_atomic_state(enum cpuhp_state state)
{
	return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE;
}

257
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
258
static DEFINE_MUTEX(cpu_add_remove_lock);
259 260
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
L
Linus Torvalds 已提交
261

262
/*
263 264
 * The following two APIs (cpu_maps_update_begin/done) must be used when
 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
265 266 267 268 269 270 271 272 273 274
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}
L
Linus Torvalds 已提交
275

276 277
/*
 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
278 279 280 281
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

282 283
#ifdef CONFIG_HOTPLUG_CPU

284
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
285

286
void cpus_read_lock(void)
287
{
288
	percpu_down_read(&cpu_hotplug_lock);
289
}
290
EXPORT_SYMBOL_GPL(cpus_read_lock);
291

292 293 294 295 296 297
int cpus_read_trylock(void)
{
	return percpu_down_read_trylock(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(cpus_read_trylock);

298
void cpus_read_unlock(void)
299
{
300
	percpu_up_read(&cpu_hotplug_lock);
301
}
302
EXPORT_SYMBOL_GPL(cpus_read_unlock);
303

304
void cpus_write_lock(void)
305
{
306
	percpu_down_write(&cpu_hotplug_lock);
307
}
308

309
void cpus_write_unlock(void)
310
{
311
	percpu_up_write(&cpu_hotplug_lock);
312 313
}

314
void lockdep_assert_cpus_held(void)
315
{
316 317 318 319 320 321 322 323 324
	/*
	 * We can't have hotplug operations before userspace starts running,
	 * and some init codepaths will knowingly not take the hotplug lock.
	 * This is all valid, so mute lockdep until it makes sense to report
	 * unheld locks.
	 */
	if (system_state < SYSTEM_RUNNING)
		return;

325
	percpu_rwsem_assert_held(&cpu_hotplug_lock);
326
}
327

328 329 330 331 332 333 334 335 336 337
/*
 * Wait for currently running CPU hotplug operations to complete (if any) and
 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 * hotplug path before performing hotplug operations. So acquiring that lock
 * guarantees mutual exclusion from any currently running hotplug operations.
 */
void cpu_hotplug_disable(void)
{
	cpu_maps_update_begin();
338
	cpu_hotplug_disabled++;
339 340
	cpu_maps_update_done();
}
341
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
342

343 344 345 346 347 348 349
static void __cpu_hotplug_enable(void)
{
	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
		return;
	cpu_hotplug_disabled--;
}

350 351 352
void cpu_hotplug_enable(void)
{
	cpu_maps_update_begin();
353
	__cpu_hotplug_enable();
354 355
	cpu_maps_update_done();
}
356
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
357
#endif	/* CONFIG_HOTPLUG_CPU */
358

359 360 361 362 363 364
/*
 * Architectures that need SMT-specific errata handling during SMT hotplug
 * should override this.
 */
void __weak arch_smt_update(void) { }

365 366
#ifdef CONFIG_HOTPLUG_SMT
enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
367

368
void __init cpu_smt_disable(bool force)
369
{
370 371 372 373 374
	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED ||
		cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
		return;

	if (force) {
375 376
		pr_info("SMT: Force disabled\n");
		cpu_smt_control = CPU_SMT_FORCE_DISABLED;
377
	} else {
378
		pr_info("SMT: disabled\n");
379
		cpu_smt_control = CPU_SMT_DISABLED;
380
	}
381 382
}

383 384
/*
 * The decision whether SMT is supported can only be done after the full
385
 * CPU identification. Called from architecture code.
386 387 388
 */
void __init cpu_smt_check_topology(void)
{
389
	if (!topology_smt_supported())
390 391 392
		cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
}

393 394 395
static int __init smt_cmdline_disable(char *str)
{
	cpu_smt_disable(str && !strcmp(str, "force"));
396 397 398 399 400 401
	return 0;
}
early_param("nosmt", smt_cmdline_disable);

static inline bool cpu_smt_allowed(unsigned int cpu)
{
402
	if (cpu_smt_control == CPU_SMT_ENABLED)
403 404
		return true;

405
	if (topology_is_primary_thread(cpu))
406 407 408 409 410 411 412 413 414 415 416 417 418 419
		return true;

	/*
	 * On x86 it's required to boot all logical CPUs at least once so
	 * that the init code can get a chance to set CR4.MCE on each
	 * CPU. Otherwise, a broadacasted MCE observing CR4.MCE=0b on any
	 * core will shutdown the machine.
	 */
	return !per_cpu(cpuhp_state, cpu).booted_once;
}
#else
static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
#endif

420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
	enum cpuhp_state prev_state = st->state;

	st->rollback = false;
	st->last = NULL;

	st->target = target;
	st->single = false;
	st->bringup = st->state < target;

	return prev_state;
}

static inline void
cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
{
	st->rollback = true;

	/*
	 * If we have st->last we need to undo partial multi_instance of this
	 * state first. Otherwise start undo at the previous state.
	 */
	if (!st->last) {
		if (st->bringup)
			st->state--;
		else
			st->state++;
	}

	st->target = prev_state;
	st->bringup = !st->bringup;
}

/* Regular hotplug invocation of the AP hotplug thread */
static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st)
{
	if (!st->single && st->state == st->target)
		return;

	st->result = 0;
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
469
	wait_for_ap_thread(st, st->bringup);
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
}

static int cpuhp_kick_ap(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{
	enum cpuhp_state prev_state;
	int ret;

	prev_state = cpuhp_set_state(st, target);
	__cpuhp_kick_ap(st);
	if ((ret = st->result)) {
		cpuhp_reset_state(st, prev_state);
		__cpuhp_kick_ap(st);
	}

	return ret;
}
486

487 488 489 490
static int bringup_wait_for_ap(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

491
	/* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
492
	wait_for_ap_thread(st, true);
493 494
	if (WARN_ON_ONCE((!cpu_online(cpu))))
		return -ECANCELED;
495 496 497 498 499

	/* Unpark the stopper thread and the hotplug thread of the target cpu */
	stop_machine_unpark(cpu);
	kthread_unpark(st->thread);

500 501 502 503 504 505 506 507 508 509
	/*
	 * SMT soft disabling on X86 requires to bring the CPU out of the
	 * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit.  The
	 * CPU marked itself as booted_once in cpu_notify_starting() so the
	 * cpu_smt_allowed() check will now return false if this is not the
	 * primary sibling.
	 */
	if (!cpu_smt_allowed(cpu))
		return -ECANCELED;

510 511 512 513
	if (st->target <= CPUHP_AP_ONLINE_IDLE)
		return 0;

	return cpuhp_kick_ap(st, st->target);
514 515
}

516 517 518 519 520
static int bringup_cpu(unsigned int cpu)
{
	struct task_struct *idle = idle_thread_get(cpu);
	int ret;

521 522 523 524 525 526 527
	/*
	 * Some architectures have to walk the irq descriptors to
	 * setup the vector space for the cpu which comes online.
	 * Prevent irq alloc/free across the bringup.
	 */
	irq_lock_sparse();

528 529
	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu, idle);
530
	irq_unlock_sparse();
531
	if (ret)
532
		return ret;
533
	return bringup_wait_for_ap(cpu);
534 535
}

536 537 538 539
/*
 * Hotplug state machine related functions
 */

540
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
541
{
542 543
	for (st->state--; st->state > st->target; st->state--)
		cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
544 545
}

546 547 548 549 550 551 552 553 554 555 556 557 558 559
static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
{
	if (IS_ENABLED(CONFIG_HOTPLUG_CPU))
		return true;
	/*
	 * When CPU hotplug is disabled, then taking the CPU down is not
	 * possible because takedown_cpu() and the architecture and
	 * subsystem specific mechanisms are not available. So the CPU
	 * which would be completely unplugged again needs to stay around
	 * in the current state.
	 */
	return st->state <= CPUHP_BRINGUP_CPU;
}

560
static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
561
			      enum cpuhp_state target)
562 563 564 565 566 567
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	while (st->state < target) {
		st->state++;
568
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
569
		if (ret) {
570 571 572 573
			if (can_rollback_cpu(st)) {
				st->target = prev_state;
				undo_cpu_up(cpu, st);
			}
574 575 576 577 578 579
			break;
		}
	}
	return ret;
}

580 581 582 583 584 585 586
/*
 * The cpu hotplug threads manage the bringup and teardown of the cpus
 */
static void cpuhp_create(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

587 588
	init_completion(&st->done_up);
	init_completion(&st->done_down);
589 590 591 592 593 594 595 596 597 598 599 600
}

static int cpuhp_should_run(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	return st->should_run;
}

/*
 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 * callbacks when a state gets [un]installed at runtime.
601 602 603 604 605 606 607 608 609 610
 *
 * Each invocation of this function by the smpboot thread does a single AP
 * state callback.
 *
 * It has 3 modes of operation:
 *  - single: runs st->cb_state
 *  - up:     runs ++st->state, while st->state < st->target
 *  - down:   runs st->state--, while st->state > st->target
 *
 * When complete or on error, should_run is cleared and the completion is fired.
611 612 613 614
 */
static void cpuhp_thread_fun(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
615 616
	bool bringup = st->bringup;
	enum cpuhp_state state;
617

618 619 620
	if (WARN_ON_ONCE(!st->should_run))
		return;

621
	/*
622 623
	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
	 * that if we see ->should_run we also see the rest of the state.
624 625 626
	 */
	smp_mb();

627
	cpuhp_lock_acquire(bringup);
628

629
	if (st->single) {
630 631 632 633 634 635 636 637
		state = st->cb_state;
		st->should_run = false;
	} else {
		if (bringup) {
			st->state++;
			state = st->state;
			st->should_run = (st->state < st->target);
			WARN_ON_ONCE(st->state > st->target);
638
		} else {
639 640 641 642
			state = st->state;
			st->state--;
			st->should_run = (st->state > st->target);
			WARN_ON_ONCE(st->state < st->target);
643
		}
644 645 646 647 648 649 650 651
	}

	WARN_ON_ONCE(!cpuhp_is_ap_state(state));

	if (cpuhp_is_atomic_state(state)) {
		local_irq_disable();
		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
		local_irq_enable();
652

653 654 655 656
		/*
		 * STARTING/DYING must not fail!
		 */
		WARN_ON_ONCE(st->result);
657
	} else {
658 659 660 661 662 663 664 665 666 667 668
		st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last);
	}

	if (st->result) {
		/*
		 * If we fail on a rollback, we're up a creek without no
		 * paddle, no way forward, no way back. We loose, thanks for
		 * playing.
		 */
		WARN_ON_ONCE(st->rollback);
		st->should_run = false;
669
	}
670

671
	cpuhp_lock_release(bringup);
672 673

	if (!st->should_run)
674
		complete_ap_thread(st, bringup);
675 676 677
}

/* Invoke a single callback on a remote cpu */
678
static int
679 680
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
			 struct hlist_node *node)
681 682
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
683
	int ret;
684 685 686 687

	if (!cpu_online(cpu))
		return 0;

688 689 690 691 692
	cpuhp_lock_acquire(false);
	cpuhp_lock_release(false);

	cpuhp_lock_acquire(true);
	cpuhp_lock_release(true);
693

694 695 696 697 698
	/*
	 * If we are up and running, use the hotplug thread. For early calls
	 * we invoke the thread function directly.
	 */
	if (!st->thread)
699
		return cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
700

701 702 703 704 705
	st->rollback = false;
	st->last = NULL;

	st->node = node;
	st->bringup = bringup;
706
	st->cb_state = state;
707 708
	st->single = true;

709
	__cpuhp_kick_ap(st);
710 711

	/*
712
	 * If we failed and did a partial, do a rollback.
713
	 */
714 715 716 717 718 719 720
	if ((ret = st->result) && st->last) {
		st->rollback = true;
		st->bringup = !bringup;

		__cpuhp_kick_ap(st);
	}

721 722 723 724 725
	/*
	 * Clean up the leftovers so the next hotplug operation wont use stale
	 * data.
	 */
	st->node = st->last = NULL;
726
	return ret;
727 728 729 730 731
}

static int cpuhp_kick_ap_work(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
732 733
	enum cpuhp_state prev_state = st->state;
	int ret;
734

735 736 737 738 739
	cpuhp_lock_acquire(false);
	cpuhp_lock_release(false);

	cpuhp_lock_acquire(true);
	cpuhp_lock_release(true);
740 741 742 743 744 745

	trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work);
	ret = cpuhp_kick_ap(st, st->target);
	trace_cpuhp_exit(cpu, st->state, prev_state, ret);

	return ret;
746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
}

static struct smp_hotplug_thread cpuhp_threads = {
	.store			= &cpuhp_state.thread,
	.create			= &cpuhp_create,
	.thread_should_run	= cpuhp_should_run,
	.thread_fn		= cpuhp_thread_fun,
	.thread_comm		= "cpuhp/%u",
	.selfparking		= true,
};

void __init cpuhp_threads_init(void)
{
	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
	kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

763
#ifdef CONFIG_HOTPLUG_CPU
764 765 766 767 768 769 770 771 772 773 774 775
/**
 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 * @cpu: a CPU id
 *
 * This function walks all processes, finds a valid mm struct for each one and
 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 * trivial, there are various non-obvious corner cases, which this function
 * tries to solve in a safe manner.
 *
 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 * be called only for an already offlined CPU.
 */
776 777 778 779 780 781 782 783 784 785 786
void clear_tasks_mm_cpumask(int cpu)
{
	struct task_struct *p;

	/*
	 * This function is called after the cpu is taken down and marked
	 * offline, so its not like new tasks will ever get this cpu set in
	 * their mm mask. -- Peter Zijlstra
	 * Thus, we may use rcu_read_lock() here, instead of grabbing
	 * full-fledged tasklist_lock.
	 */
787
	WARN_ON(cpu_online(cpu));
788 789 790 791
	rcu_read_lock();
	for_each_process(p) {
		struct task_struct *t;

792 793 794 795
		/*
		 * Main thread might exit, but other threads may still have
		 * a valid mm. Find one.
		 */
796 797 798 799 800 801 802 803 804
		t = find_lock_task_mm(p);
		if (!t)
			continue;
		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
		task_unlock(t);
	}
	rcu_read_unlock();
}

L
Linus Torvalds 已提交
805
/* Take this CPU down. */
806
static int take_cpu_down(void *_param)
L
Linus Torvalds 已提交
807
{
808 809
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
810
	int err, cpu = smp_processor_id();
811
	int ret;
L
Linus Torvalds 已提交
812 813 814 815

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
816
		return err;
L
Linus Torvalds 已提交
817

818 819 820 821 822 823
	/*
	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
	 * do this step again.
	 */
	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
	st->state--;
824
	/* Invoke the former CPU_DYING callbacks */
825 826 827 828 829 830 831
	for (; st->state > target; st->state--) {
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
		/*
		 * DYING must not fail!
		 */
		WARN_ON_ONCE(ret);
	}
832

833 834
	/* Give up timekeeping duties */
	tick_handover_do_timer();
835
	/* Park the stopper thread */
836
	stop_machine_park(cpu);
Z
Zwane Mwaikambo 已提交
837
	return 0;
L
Linus Torvalds 已提交
838 839
}

840
static int takedown_cpu(unsigned int cpu)
L
Linus Torvalds 已提交
841
{
842
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
843
	int err;
L
Linus Torvalds 已提交
844

845
	/* Park the smpboot threads */
846 847
	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);

848
	/*
849 850
	 * Prevent irq alloc/free while the dying cpu reorganizes the
	 * interrupt affinities.
851
	 */
852
	irq_lock_sparse();
853

854 855 856
	/*
	 * So now all preempt/rcu users must observe !cpu_active().
	 */
857
	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
858
	if (err) {
859
		/* CPU refused to die */
860
		irq_unlock_sparse();
861 862
		/* Unpark the hotplug thread so we can rollback there */
		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
863
		return err;
864
	}
865
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
866

867
	/*
868 869
	 * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed
	 * all runnable tasks from the CPU, there's only the idle task left now
870
	 * that the migration thread is done doing the stop_machine thing.
P
Peter Zijlstra 已提交
871 872
	 *
	 * Wait for the stop thread to go away.
873
	 */
874
	wait_for_ap_thread(st, false);
875
	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
L
Linus Torvalds 已提交
876

877 878 879
	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
	irq_unlock_sparse();

880
	hotplug_cpu__broadcast_tick_pull(cpu);
L
Linus Torvalds 已提交
881 882 883
	/* This actually kills the CPU. */
	__cpu_die(cpu);

884
	tick_cleanup_dead_cpu(cpu);
885
	rcutree_migrate_callbacks(cpu);
886 887
	return 0;
}
L
Linus Torvalds 已提交
888

889 890 891 892
static void cpuhp_complete_idle_dead(void *arg)
{
	struct cpuhp_cpu_state *st = arg;

893
	complete_ap_thread(st, false);
894 895
}

896 897 898 899 900
void cpuhp_report_idle_dead(void)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	BUG_ON(st->state != CPUHP_AP_OFFLINE);
901
	rcu_report_dead(smp_processor_id());
902 903 904 905 906 907 908
	st->state = CPUHP_AP_IDLE_DEAD;
	/*
	 * We cannot call complete after rcu_report_dead() so we delegate it
	 * to an online cpu.
	 */
	smp_call_function_single(cpumask_first(cpu_online_mask),
				 cpuhp_complete_idle_dead, st, 0);
909 910
}

911 912
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
{
913 914
	for (st->state++; st->state < st->target; st->state++)
		cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
915 916 917 918 919 920 921 922 923 924 925 926
}

static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
				enum cpuhp_state target)
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	for (; st->state > target; st->state--) {
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
		if (ret) {
			st->target = prev_state;
927 928
			if (st->state < prev_state)
				undo_cpu_down(cpu, st);
929 930 931 932 933
			break;
		}
	}
	return ret;
}
934

935
/* Requires cpu_add_remove_lock to be held */
936 937
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
			   enum cpuhp_state target)
938
{
939 940
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	int prev_state, ret = 0;
941 942 943 944

	if (num_online_cpus() == 1)
		return -EBUSY;

945
	if (!cpu_present(cpu))
946 947
		return -EINVAL;

948
	cpus_write_lock();
949 950 951

	cpuhp_tasks_frozen = tasks_frozen;

952
	prev_state = cpuhp_set_state(st, target);
953 954 955 956
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread.
	 */
957
	if (st->state > CPUHP_TEARDOWN_CPU) {
958
		st->target = max((int)target, CPUHP_TEARDOWN_CPU);
959 960 961 962 963 964 965 966 967 968 969 970
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;

		/*
		 * We might have stopped still in the range of the AP hotplug
		 * thread. Nothing to do anymore.
		 */
971
		if (st->state > CPUHP_TEARDOWN_CPU)
972
			goto out;
973 974

		st->target = target;
975 976
	}
	/*
977
	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
978 979
	 * to do the further cleanups.
	 */
980
	ret = cpuhp_down_callbacks(cpu, st, target);
981
	if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
982 983
		cpuhp_reset_state(st, prev_state);
		__cpuhp_kick_ap(st);
984
	}
985

986
out:
987
	cpus_write_unlock();
988 989 990 991 992
	/*
	 * Do post unplug cleanup. This is still protected against
	 * concurrent CPU hotplug via cpu_add_remove_lock.
	 */
	lockup_detector_cleanup();
993
	arch_smt_update();
994
	return ret;
995 996
}

997 998 999 1000 1001 1002 1003
static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target)
{
	if (cpu_hotplug_disabled)
		return -EBUSY;
	return _cpu_down(cpu, 0, target);
}

1004
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
1005
{
1006
	int err;
1007

1008
	cpu_maps_update_begin();
1009
	err = cpu_down_maps_locked(cpu, target);
1010
	cpu_maps_update_done();
L
Linus Torvalds 已提交
1011 1012
	return err;
}
1013

1014 1015 1016 1017
int cpu_down(unsigned int cpu)
{
	return do_cpu_down(cpu, CPUHP_OFFLINE);
}
1018
EXPORT_SYMBOL(cpu_down);
1019 1020 1021

#else
#define takedown_cpu		NULL
L
Linus Torvalds 已提交
1022 1023
#endif /*CONFIG_HOTPLUG_CPU*/

1024
/**
1025
 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
1026 1027 1028 1029 1030 1031 1032 1033 1034
 * @cpu: cpu that just started
 *
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
void notify_cpu_starting(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);
1035
	int ret;
1036

1037
	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
1038
	st->booted_once = true;
1039 1040
	while (st->state < target) {
		st->state++;
1041 1042 1043 1044 1045
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
		/*
		 * STARTING must not fail!
		 */
		WARN_ON_ONCE(ret);
1046 1047 1048
	}
}

1049
/*
1050 1051 1052
 * Called from the idle task. Wake up the controlling task which brings the
 * stopper and the hotplug thread of the upcoming CPU up and then delegates
 * the rest of the online bringup to the hotplug thread.
1053
 */
1054
void cpuhp_online_idle(enum cpuhp_state state)
1055
{
1056 1057 1058 1059 1060 1061 1062
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	/* Happens for the boot cpu */
	if (state != CPUHP_AP_ONLINE_IDLE)
		return;

	st->state = CPUHP_AP_ONLINE_IDLE;
1063
	complete_ap_thread(st, true);
1064 1065
}

1066
/* Requires cpu_add_remove_lock to be held */
1067
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
L
Linus Torvalds 已提交
1068
{
1069
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
1070
	struct task_struct *idle;
1071
	int ret = 0;
L
Linus Torvalds 已提交
1072

1073
	cpus_write_lock();
1074

1075
	if (!cpu_present(cpu)) {
1076 1077 1078 1079
		ret = -EINVAL;
		goto out;
	}

1080 1081 1082 1083 1084
	/*
	 * The caller of do_cpu_up might have raced with another
	 * caller. Ignore it for now.
	 */
	if (st->state >= target)
1085
		goto out;
1086 1087 1088 1089 1090 1091 1092 1093

	if (st->state == CPUHP_OFFLINE) {
		/* Let it fail before we try to bring the cpu up */
		idle = idle_thread_get(cpu);
		if (IS_ERR(idle)) {
			ret = PTR_ERR(idle);
			goto out;
		}
1094
	}
1095

1096 1097
	cpuhp_tasks_frozen = tasks_frozen;

1098
	cpuhp_set_state(st, target);
1099 1100 1101 1102
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread once more.
	 */
1103
	if (st->state > CPUHP_BRINGUP_CPU) {
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;
	}

	/*
	 * Try to reach the target state. We max out on the BP at
1115
	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
1116 1117
	 * responsible for bringing it up to the target state.
	 */
1118
	target = min((int)target, CPUHP_BRINGUP_CPU);
1119
	ret = cpuhp_up_callbacks(cpu, st, target);
1120
out:
1121
	cpus_write_unlock();
1122
	arch_smt_update();
1123 1124 1125
	return ret;
}

1126
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
1127 1128
{
	int err = 0;
1129

R
Rusty Russell 已提交
1130
	if (!cpu_possible(cpu)) {
1131 1132
		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
		       cpu);
1133
#if defined(CONFIG_IA64)
1134
		pr_err("please check additional_cpus= boot parameter\n");
1135 1136 1137
#endif
		return -EINVAL;
	}
1138

1139 1140 1141
	err = try_online_node(cpu_to_node(cpu));
	if (err)
		return err;
1142

1143
	cpu_maps_update_begin();
1144 1145

	if (cpu_hotplug_disabled) {
1146
		err = -EBUSY;
1147 1148
		goto out;
	}
1149 1150 1151 1152
	if (!cpu_smt_allowed(cpu)) {
		err = -EPERM;
		goto out;
	}
1153

1154
	err = _cpu_up(cpu, 0, target);
1155
out:
1156
	cpu_maps_update_done();
1157 1158
	return err;
}
1159 1160 1161 1162 1163

int cpu_up(unsigned int cpu)
{
	return do_cpu_up(cpu, CPUHP_ONLINE);
}
P
Paul E. McKenney 已提交
1164
EXPORT_SYMBOL_GPL(cpu_up);
1165

1166
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
1167
static cpumask_var_t frozen_cpus;
1168

1169
int freeze_secondary_cpus(int primary)
1170
{
1171
	int cpu, error = 0;
1172

1173
	cpu_maps_update_begin();
1174 1175
	if (!cpu_online(primary))
		primary = cpumask_first(cpu_online_mask);
1176 1177
	/*
	 * We take down all of the non-boot CPUs in one shot to avoid races
1178 1179
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
1180
	cpumask_clear(frozen_cpus);
1181

1182
	pr_info("Disabling non-boot CPUs ...\n");
1183
	for_each_online_cpu(cpu) {
1184
		if (cpu == primary)
1185
			continue;
1186
		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1187
		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1188
		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1189
		if (!error)
R
Rusty Russell 已提交
1190
			cpumask_set_cpu(cpu, frozen_cpus);
1191
		else {
1192
			pr_err("Error taking CPU%d down: %d\n", cpu, error);
1193 1194 1195
			break;
		}
	}
1196

1197
	if (!error)
1198
		BUG_ON(num_online_cpus() > 1);
1199
	else
1200
		pr_err("Non-boot CPUs are not disabled\n");
1201 1202 1203 1204 1205 1206 1207 1208

	/*
	 * Make sure the CPUs won't be enabled by someone else. We need to do
	 * this even in case of failure as all disable_nonboot_cpus() users are
	 * supposed to do enable_nonboot_cpus() on the failure path.
	 */
	cpu_hotplug_disabled++;

1209
	cpu_maps_update_done();
1210 1211 1212
	return error;
}

1213 1214 1215 1216 1217 1218 1219 1220
void __weak arch_enable_nonboot_cpus_begin(void)
{
}

void __weak arch_enable_nonboot_cpus_end(void)
{
}

1221
void enable_nonboot_cpus(void)
1222 1223 1224 1225
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
1226
	cpu_maps_update_begin();
1227
	__cpu_hotplug_enable();
R
Rusty Russell 已提交
1228
	if (cpumask_empty(frozen_cpus))
1229
		goto out;
1230

1231
	pr_info("Enabling non-boot CPUs ...\n");
1232 1233 1234

	arch_enable_nonboot_cpus_begin();

R
Rusty Russell 已提交
1235
	for_each_cpu(cpu, frozen_cpus) {
1236
		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1237
		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1238
		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1239
		if (!error) {
1240
			pr_info("CPU%d is up\n", cpu);
1241 1242
			continue;
		}
1243
		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1244
	}
1245 1246 1247

	arch_enable_nonboot_cpus_end();

R
Rusty Russell 已提交
1248
	cpumask_clear(frozen_cpus);
1249
out:
1250
	cpu_maps_update_done();
L
Linus Torvalds 已提交
1251
}
R
Rusty Russell 已提交
1252

1253
static int __init alloc_frozen_cpus(void)
R
Rusty Russell 已提交
1254 1255 1256 1257 1258 1259
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279

/*
 * When callbacks for CPU hotplug notifications are being executed, we must
 * ensure that the state of the system with respect to the tasks being frozen
 * or not, as reported by the notification, remains unchanged *throughout the
 * duration* of the execution of the callbacks.
 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 *
 * This synchronization is implemented by mutually excluding regular CPU
 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 * Hibernate notifications.
 */
static int
cpu_hotplug_pm_callback(struct notifier_block *nb,
			unsigned long action, void *ptr)
{
	switch (action) {

	case PM_SUSPEND_PREPARE:
	case PM_HIBERNATION_PREPARE:
1280
		cpu_hotplug_disable();
1281 1282 1283 1284
		break;

	case PM_POST_SUSPEND:
	case PM_POST_HIBERNATION:
1285
		cpu_hotplug_enable();
1286 1287 1288 1289 1290 1291 1292 1293 1294 1295
		break;

	default:
		return NOTIFY_DONE;
	}

	return NOTIFY_OK;
}


1296
static int __init cpu_hotplug_pm_sync_init(void)
1297
{
1298 1299 1300 1301 1302
	/*
	 * cpu_hotplug_pm_callback has higher priority than x86
	 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
	 * to disable cpu hotplug to avoid cpu hotplug race.
	 */
1303 1304 1305 1306 1307
	pm_notifier(cpu_hotplug_pm_callback, 0);
	return 0;
}
core_initcall(cpu_hotplug_pm_sync_init);

1308
#endif /* CONFIG_PM_SLEEP_SMP */
1309

1310 1311
int __boot_cpu_id;

1312
#endif /* CONFIG_SMP */
1313

1314
/* Boot processor state steps */
1315
static struct cpuhp_step cpuhp_hp_states[] = {
1316 1317
	[CPUHP_OFFLINE] = {
		.name			= "offline",
1318 1319
		.startup.single		= NULL,
		.teardown.single	= NULL,
1320 1321 1322
	},
#ifdef CONFIG_SMP
	[CPUHP_CREATE_THREADS]= {
1323
		.name			= "threads:prepare",
1324 1325
		.startup.single		= smpboot_create_threads,
		.teardown.single	= NULL,
1326
		.cant_stop		= true,
1327
	},
1328
	[CPUHP_PERF_PREPARE] = {
1329 1330 1331
		.name			= "perf:prepare",
		.startup.single		= perf_event_init_cpu,
		.teardown.single	= perf_event_exit_cpu,
1332
	},
1333
	[CPUHP_WORKQUEUE_PREP] = {
1334 1335 1336
		.name			= "workqueue:prepare",
		.startup.single		= workqueue_prepare_cpu,
		.teardown.single	= NULL,
1337
	},
1338
	[CPUHP_HRTIMERS_PREPARE] = {
1339 1340 1341
		.name			= "hrtimers:prepare",
		.startup.single		= hrtimers_prepare_cpu,
		.teardown.single	= hrtimers_dead_cpu,
1342
	},
1343
	[CPUHP_SMPCFD_PREPARE] = {
1344
		.name			= "smpcfd:prepare",
1345 1346
		.startup.single		= smpcfd_prepare_cpu,
		.teardown.single	= smpcfd_dead_cpu,
1347
	},
1348 1349 1350 1351 1352
	[CPUHP_RELAY_PREPARE] = {
		.name			= "relay:prepare",
		.startup.single		= relay_prepare_cpu,
		.teardown.single	= NULL,
	},
1353 1354 1355 1356
	[CPUHP_SLAB_PREPARE] = {
		.name			= "slab:prepare",
		.startup.single		= slab_prepare_cpu,
		.teardown.single	= slab_dead_cpu,
1357
	},
1358
	[CPUHP_RCUTREE_PREP] = {
1359
		.name			= "RCU/tree:prepare",
1360 1361
		.startup.single		= rcutree_prepare_cpu,
		.teardown.single	= rcutree_dead_cpu,
1362
	},
1363 1364 1365 1366 1367
	/*
	 * On the tear-down path, timers_dead_cpu() must be invoked
	 * before blk_mq_queue_reinit_notify() from notify_dead(),
	 * otherwise a RCU stall occurs.
	 */
1368
	[CPUHP_TIMERS_PREPARE] = {
1369
		.name			= "timers:prepare",
1370
		.startup.single		= timers_prepare_cpu,
1371
		.teardown.single	= timers_dead_cpu,
1372
	},
1373
	/* Kicks the plugged cpu into life */
1374 1375
	[CPUHP_BRINGUP_CPU] = {
		.name			= "cpu:bringup",
1376 1377
		.startup.single		= bringup_cpu,
		.teardown.single	= NULL,
1378
		.cant_stop		= true,
1379
	},
1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391
	/* Final state before CPU kills itself */
	[CPUHP_AP_IDLE_DEAD] = {
		.name			= "idle:dead",
	},
	/*
	 * Last state before CPU enters the idle loop to die. Transient state
	 * for synchronization.
	 */
	[CPUHP_AP_OFFLINE] = {
		.name			= "ap:offline",
		.cant_stop		= true,
	},
1392 1393 1394
	/* First state is scheduler control. Interrupts are disabled */
	[CPUHP_AP_SCHED_STARTING] = {
		.name			= "sched:starting",
1395 1396
		.startup.single		= sched_cpu_starting,
		.teardown.single	= sched_cpu_dying,
1397
	},
1398
	[CPUHP_AP_RCUTREE_DYING] = {
1399
		.name			= "RCU/tree:dying",
1400 1401
		.startup.single		= NULL,
		.teardown.single	= rcutree_dying_cpu,
1402
	},
1403 1404 1405 1406 1407
	[CPUHP_AP_SMPCFD_DYING] = {
		.name			= "smpcfd:dying",
		.startup.single		= NULL,
		.teardown.single	= smpcfd_dying_cpu,
	},
1408 1409 1410 1411 1412
	/* Entry state on starting. Interrupts enabled from here on. Transient
	 * state for synchronsization */
	[CPUHP_AP_ONLINE] = {
		.name			= "ap:online",
	},
1413 1414 1415 1416 1417 1418 1419 1420 1421 1422
	/*
	 * Handled on controll processor until the plugged processor manages
	 * this itself.
	 */
	[CPUHP_TEARDOWN_CPU] = {
		.name			= "cpu:teardown",
		.startup.single		= NULL,
		.teardown.single	= takedown_cpu,
		.cant_stop		= true,
	},
1423
	/* Handle smpboot threads park/unpark */
1424
	[CPUHP_AP_SMPBOOT_THREADS] = {
1425
		.name			= "smpboot/threads:online",
1426
		.startup.single		= smpboot_unpark_threads,
1427
		.teardown.single	= smpboot_park_threads,
1428
	},
1429 1430 1431 1432 1433
	[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
		.name			= "irq/affinity:online",
		.startup.single		= irq_affinity_online_cpu,
		.teardown.single	= NULL,
	},
1434
	[CPUHP_AP_PERF_ONLINE] = {
1435 1436 1437
		.name			= "perf:online",
		.startup.single		= perf_event_init_cpu,
		.teardown.single	= perf_event_exit_cpu,
1438
	},
1439 1440 1441 1442 1443
	[CPUHP_AP_WATCHDOG_ONLINE] = {
		.name			= "lockup_detector:online",
		.startup.single		= lockup_detector_online_cpu,
		.teardown.single	= lockup_detector_offline_cpu,
	},
1444
	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1445 1446 1447
		.name			= "workqueue:online",
		.startup.single		= workqueue_online_cpu,
		.teardown.single	= workqueue_offline_cpu,
1448
	},
1449
	[CPUHP_AP_RCUTREE_ONLINE] = {
1450
		.name			= "RCU/tree:online",
1451 1452
		.startup.single		= rcutree_online_cpu,
		.teardown.single	= rcutree_offline_cpu,
1453
	},
1454
#endif
1455 1456 1457 1458
	/*
	 * The dynamically registered state space is here
	 */

1459 1460 1461 1462
#ifdef CONFIG_SMP
	/* Last state is scheduler control setting the cpu active */
	[CPUHP_AP_ACTIVE] = {
		.name			= "sched:active",
1463 1464
		.startup.single		= sched_cpu_activate,
		.teardown.single	= sched_cpu_deactivate,
1465 1466 1467
	},
#endif

1468
	/* CPU is fully up and running. */
1469 1470
	[CPUHP_ONLINE] = {
		.name			= "online",
1471 1472
		.startup.single		= NULL,
		.teardown.single	= NULL,
1473 1474 1475
	},
};

1476 1477 1478 1479 1480 1481 1482 1483
/* Sanity check for callbacks */
static int cpuhp_cb_check(enum cpuhp_state state)
{
	if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
		return -EINVAL;
	return 0;
}

1484 1485 1486 1487 1488 1489 1490
/*
 * Returns a free for dynamic slot assignment of the Online state. The states
 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
 * by having no name assigned.
 */
static int cpuhp_reserve_state(enum cpuhp_state state)
{
1491 1492
	enum cpuhp_state i, end;
	struct cpuhp_step *step;
1493

1494 1495
	switch (state) {
	case CPUHP_AP_ONLINE_DYN:
1496
		step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN;
1497 1498 1499
		end = CPUHP_AP_ONLINE_DYN_END;
		break;
	case CPUHP_BP_PREPARE_DYN:
1500
		step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN;
1501 1502 1503 1504 1505 1506 1507 1508
		end = CPUHP_BP_PREPARE_DYN_END;
		break;
	default:
		return -EINVAL;
	}

	for (i = state; i <= end; i++, step++) {
		if (!step->name)
1509 1510 1511 1512 1513 1514 1515 1516 1517 1518
			return i;
	}
	WARN(1, "No more dynamic states available for CPU hotplug\n");
	return -ENOSPC;
}

static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
				 int (*startup)(unsigned int cpu),
				 int (*teardown)(unsigned int cpu),
				 bool multi_instance)
1519 1520 1521
{
	/* (Un)Install the callbacks for further cpu hotplug operations */
	struct cpuhp_step *sp;
1522
	int ret = 0;
1523

1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
	/*
	 * If name is NULL, then the state gets removed.
	 *
	 * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on
	 * the first allocation from these dynamic ranges, so the removal
	 * would trigger a new allocation and clear the wrong (already
	 * empty) state, leaving the callbacks of the to be cleared state
	 * dangling, which causes wreckage on the next hotplug operation.
	 */
	if (name && (state == CPUHP_AP_ONLINE_DYN ||
		     state == CPUHP_BP_PREPARE_DYN)) {
1535 1536
		ret = cpuhp_reserve_state(state);
		if (ret < 0)
1537
			return ret;
1538 1539
		state = ret;
	}
1540
	sp = cpuhp_get_step(state);
1541 1542 1543
	if (name && sp->name)
		return -EBUSY;

1544 1545
	sp->startup.single = startup;
	sp->teardown.single = teardown;
1546
	sp->name = name;
1547 1548
	sp->multi_instance = multi_instance;
	INIT_HLIST_HEAD(&sp->list);
1549
	return ret;
1550 1551 1552 1553
}

static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
{
1554
	return cpuhp_get_step(state)->teardown.single;
1555 1556 1557 1558 1559 1560
}

/*
 * Call the startup/teardown function for a step either on the AP or
 * on the current CPU.
 */
1561 1562
static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
			    struct hlist_node *node)
1563
{
1564
	struct cpuhp_step *sp = cpuhp_get_step(state);
1565 1566
	int ret;

1567 1568 1569 1570
	/*
	 * If there's nothing to do, we done.
	 * Relies on the union for multi_instance.
	 */
1571 1572
	if ((bringup && !sp->startup.single) ||
	    (!bringup && !sp->teardown.single))
1573 1574 1575 1576 1577
		return 0;
	/*
	 * The non AP bound callbacks can fail on bringup. On teardown
	 * e.g. module removal we crash for now.
	 */
1578 1579
#ifdef CONFIG_SMP
	if (cpuhp_is_ap_state(state))
1580
		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1581
	else
1582
		ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1583
#else
1584
	ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL);
1585
#endif
1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
	BUG_ON(ret && !bringup);
	return ret;
}

/*
 * Called from __cpuhp_setup_state on a recoverable failure.
 *
 * Note: The teardown callbacks for rollback are not allowed to fail!
 */
static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1596
				   struct hlist_node *node)
1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609
{
	int cpu;

	/* Roll back the already executed steps on the other cpus */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpu >= failedcpu)
			break;

		/* Did we invoke the startup call on that cpu ? */
		if (cpustate >= state)
1610
			cpuhp_issue_call(cpu, state, false, node);
1611 1612 1613
	}
}

1614 1615 1616
int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
					  struct hlist_node *node,
					  bool invoke)
1617 1618 1619 1620 1621
{
	struct cpuhp_step *sp;
	int cpu;
	int ret;

1622 1623
	lockdep_assert_cpus_held();

1624 1625 1626 1627
	sp = cpuhp_get_step(state);
	if (sp->multi_instance == false)
		return -EINVAL;

1628
	mutex_lock(&cpuhp_state_mutex);
1629

1630
	if (!invoke || !sp->startup.multi)
1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645
		goto add_node;

	/*
	 * Try to call the startup callback for each present cpu
	 * depending on the hotplug state of the cpu.
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate < state)
			continue;

		ret = cpuhp_issue_call(cpu, state, true, node);
		if (ret) {
1646
			if (sp->teardown.multi)
1647
				cpuhp_rollback_install(cpu, state, node);
1648
			goto unlock;
1649 1650 1651 1652 1653
		}
	}
add_node:
	ret = 0;
	hlist_add_head(node, &sp->list);
1654
unlock:
1655
	mutex_unlock(&cpuhp_state_mutex);
1656 1657 1658 1659 1660 1661 1662 1663 1664 1665
	return ret;
}

int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
			       bool invoke)
{
	int ret;

	cpus_read_lock();
	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1666
	cpus_read_unlock();
1667 1668 1669 1670
	return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);

1671
/**
1672
 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1673 1674 1675 1676 1677 1678 1679
 * @state:		The state to setup
 * @invoke:		If true, the startup function is invoked for cpus where
 *			cpu state >= @state
 * @startup:		startup callback function
 * @teardown:		teardown callback function
 * @multi_instance:	State is set up for multiple instances which get
 *			added afterwards.
1680
 *
1681
 * The caller needs to hold cpus read locked while calling this function.
1682 1683 1684 1685 1686
 * Returns:
 *   On success:
 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
 *      0 for all other states
 *   On failure: proper (negative) error code
1687
 */
1688 1689 1690 1691 1692
int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
				   const char *name, bool invoke,
				   int (*startup)(unsigned int cpu),
				   int (*teardown)(unsigned int cpu),
				   bool multi_instance)
1693 1694
{
	int cpu, ret = 0;
1695
	bool dynstate;
1696

1697 1698
	lockdep_assert_cpus_held();

1699 1700 1701
	if (cpuhp_cb_check(state) || !name)
		return -EINVAL;

1702
	mutex_lock(&cpuhp_state_mutex);
1703

1704 1705
	ret = cpuhp_store_callbacks(state, name, startup, teardown,
				    multi_instance);
1706

1707 1708 1709 1710 1711 1712
	dynstate = state == CPUHP_AP_ONLINE_DYN;
	if (ret > 0 && dynstate) {
		state = ret;
		ret = 0;
	}

1713
	if (ret || !invoke || !startup)
1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
		goto out;

	/*
	 * Try to call the startup callback for each present cpu
	 * depending on the hotplug state of the cpu.
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate < state)
			continue;

1727
		ret = cpuhp_issue_call(cpu, state, true, NULL);
1728
		if (ret) {
1729
			if (teardown)
1730 1731
				cpuhp_rollback_install(cpu, state, NULL);
			cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1732 1733 1734 1735
			goto out;
		}
	}
out:
1736
	mutex_unlock(&cpuhp_state_mutex);
1737 1738 1739 1740
	/*
	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
	 * dynamically allocated state in case of success.
	 */
1741
	if (!ret && dynstate)
1742 1743 1744
		return state;
	return ret;
}
1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760
EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);

int __cpuhp_setup_state(enum cpuhp_state state,
			const char *name, bool invoke,
			int (*startup)(unsigned int cpu),
			int (*teardown)(unsigned int cpu),
			bool multi_instance)
{
	int ret;

	cpus_read_lock();
	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
					     teardown, multi_instance);
	cpus_read_unlock();
	return ret;
}
1761 1762
EXPORT_SYMBOL(__cpuhp_setup_state);

1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773
int __cpuhp_state_remove_instance(enum cpuhp_state state,
				  struct hlist_node *node, bool invoke)
{
	struct cpuhp_step *sp = cpuhp_get_step(state);
	int cpu;

	BUG_ON(cpuhp_cb_check(state));

	if (!sp->multi_instance)
		return -EINVAL;

1774
	cpus_read_lock();
1775 1776
	mutex_lock(&cpuhp_state_mutex);

1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794
	if (!invoke || !cpuhp_get_teardown_cb(state))
		goto remove;
	/*
	 * Call the teardown callback for each present cpu depending
	 * on the hotplug state of the cpu. This function is not
	 * allowed to fail currently!
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate >= state)
			cpuhp_issue_call(cpu, state, false, node);
	}

remove:
	hlist_del(node);
	mutex_unlock(&cpuhp_state_mutex);
1795
	cpus_read_unlock();
1796 1797 1798 1799

	return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1800

1801
/**
1802
 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1803 1804 1805 1806
 * @state:	The state to remove
 * @invoke:	If true, the teardown function is invoked for cpus where
 *		cpu state >= @state
 *
1807
 * The caller needs to hold cpus read locked while calling this function.
1808 1809 1810
 * The teardown callback is currently not allowed to fail. Think
 * about module removal!
 */
1811
void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1812
{
1813
	struct cpuhp_step *sp = cpuhp_get_step(state);
1814 1815 1816 1817
	int cpu;

	BUG_ON(cpuhp_cb_check(state));

1818
	lockdep_assert_cpus_held();
1819

1820
	mutex_lock(&cpuhp_state_mutex);
1821 1822 1823 1824 1825 1826 1827
	if (sp->multi_instance) {
		WARN(!hlist_empty(&sp->list),
		     "Error: Removing state %d which has instances left.\n",
		     state);
		goto remove;
	}

1828
	if (!invoke || !cpuhp_get_teardown_cb(state))
1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840
		goto remove;

	/*
	 * Call the teardown callback for each present cpu depending
	 * on the hotplug state of the cpu. This function is not
	 * allowed to fail currently!
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate >= state)
1841
			cpuhp_issue_call(cpu, state, false, NULL);
1842 1843
	}
remove:
1844
	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1845
	mutex_unlock(&cpuhp_state_mutex);
1846 1847 1848 1849 1850 1851 1852
}
EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);

void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
{
	cpus_read_lock();
	__cpuhp_remove_state_cpuslocked(state, invoke);
1853
	cpus_read_unlock();
1854 1855 1856
}
EXPORT_SYMBOL(__cpuhp_remove_state);

1857 1858 1859 1860 1861 1862 1863 1864 1865 1866
#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
static ssize_t show_cpuhp_state(struct device *dev,
				struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->state);
}
static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);

1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895
static ssize_t write_cpuhp_target(struct device *dev,
				  struct device_attribute *attr,
				  const char *buf, size_t count)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
	struct cpuhp_step *sp;
	int target, ret;

	ret = kstrtoint(buf, 10, &target);
	if (ret)
		return ret;

#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
	if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
		return -EINVAL;
#else
	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
		return -EINVAL;
#endif

	ret = lock_device_hotplug_sysfs();
	if (ret)
		return ret;

	mutex_lock(&cpuhp_state_mutex);
	sp = cpuhp_get_step(target);
	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
	mutex_unlock(&cpuhp_state_mutex);
	if (ret)
1896
		goto out;
1897 1898 1899 1900 1901

	if (st->state < target)
		ret = do_cpu_up(dev->id, target);
	else
		ret = do_cpu_down(dev->id, target);
1902
out:
1903 1904 1905 1906
	unlock_device_hotplug();
	return ret ? ret : count;
}

1907 1908 1909 1910 1911 1912 1913
static ssize_t show_cpuhp_target(struct device *dev,
				 struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->target);
}
1914
static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1915

1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928

static ssize_t write_cpuhp_fail(struct device *dev,
				struct device_attribute *attr,
				const char *buf, size_t count)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
	struct cpuhp_step *sp;
	int fail, ret;

	ret = kstrtoint(buf, 10, &fail);
	if (ret)
		return ret;

1929 1930 1931
	if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
		return -EINVAL;

1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963
	/*
	 * Cannot fail STARTING/DYING callbacks.
	 */
	if (cpuhp_is_atomic_state(fail))
		return -EINVAL;

	/*
	 * Cannot fail anything that doesn't have callbacks.
	 */
	mutex_lock(&cpuhp_state_mutex);
	sp = cpuhp_get_step(fail);
	if (!sp->startup.single && !sp->teardown.single)
		ret = -EINVAL;
	mutex_unlock(&cpuhp_state_mutex);
	if (ret)
		return ret;

	st->fail = fail;

	return count;
}

static ssize_t show_cpuhp_fail(struct device *dev,
			       struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->fail);
}

static DEVICE_ATTR(fail, 0644, show_cpuhp_fail, write_cpuhp_fail);

1964 1965 1966
static struct attribute *cpuhp_cpu_attrs[] = {
	&dev_attr_state.attr,
	&dev_attr_target.attr,
1967
	&dev_attr_fail.attr,
1968 1969 1970
	NULL
};

1971
static const struct attribute_group cpuhp_cpu_attr_group = {
1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983
	.attrs = cpuhp_cpu_attrs,
	.name = "hotplug",
	NULL
};

static ssize_t show_cpuhp_states(struct device *dev,
				 struct device_attribute *attr, char *buf)
{
	ssize_t cur, res = 0;
	int i;

	mutex_lock(&cpuhp_state_mutex);
1984
	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
		struct cpuhp_step *sp = cpuhp_get_step(i);

		if (sp->name) {
			cur = sprintf(buf, "%3d: %s\n", i, sp->name);
			buf += cur;
			res += cur;
		}
	}
	mutex_unlock(&cpuhp_state_mutex);
	return res;
}
static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);

static struct attribute *cpuhp_cpu_root_attrs[] = {
	&dev_attr_states.attr,
	NULL
};

2003
static const struct attribute_group cpuhp_cpu_root_attr_group = {
2004 2005 2006 2007 2008
	.attrs = cpuhp_cpu_root_attrs,
	.name = "hotplug",
	NULL
};

2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032
#ifdef CONFIG_HOTPLUG_SMT

static const char *smt_states[] = {
	[CPU_SMT_ENABLED]		= "on",
	[CPU_SMT_DISABLED]		= "off",
	[CPU_SMT_FORCE_DISABLED]	= "forceoff",
	[CPU_SMT_NOT_SUPPORTED]		= "notsupported",
};

static ssize_t
show_smt_control(struct device *dev, struct device_attribute *attr, char *buf)
{
	return snprintf(buf, PAGE_SIZE - 2, "%s\n", smt_states[cpu_smt_control]);
}

static void cpuhp_offline_cpu_device(unsigned int cpu)
{
	struct device *dev = get_cpu_device(cpu);

	dev->offline = true;
	/* Tell user space about the state change */
	kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
}

2033 2034 2035 2036 2037 2038 2039 2040 2041
static void cpuhp_online_cpu_device(unsigned int cpu)
{
	struct device *dev = get_cpu_device(cpu);

	dev->offline = false;
	/* Tell user space about the state change */
	kobject_uevent(&dev->kobj, KOBJ_ONLINE);
}

2042
int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067
{
	int cpu, ret = 0;

	cpu_maps_update_begin();
	for_each_online_cpu(cpu) {
		if (topology_is_primary_thread(cpu))
			continue;
		ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
		if (ret)
			break;
		/*
		 * As this needs to hold the cpu maps lock it's impossible
		 * to call device_offline() because that ends up calling
		 * cpu_down() which takes cpu maps lock. cpu maps lock
		 * needs to be held as this might race against in kernel
		 * abusers of the hotplug machinery (thermal management).
		 *
		 * So nothing would update device:offline state. That would
		 * leave the sysfs entry stale and prevent onlining after
		 * smt control has been changed to 'off' again. This is
		 * called under the sysfs hotplug lock, so it is properly
		 * serialized against the regular offline usage.
		 */
		cpuhp_offline_cpu_device(cpu);
	}
2068
	if (!ret) {
2069
		cpu_smt_control = ctrlval;
2070 2071
		arch_smt_update();
	}
2072 2073 2074 2075
	cpu_maps_update_done();
	return ret;
}

2076
int cpuhp_smt_enable(void)
2077
{
2078 2079
	int cpu, ret = 0;

2080 2081
	cpu_maps_update_begin();
	cpu_smt_control = CPU_SMT_ENABLED;
2082
	arch_smt_update();
2083 2084 2085 2086 2087 2088 2089 2090 2091 2092
	for_each_present_cpu(cpu) {
		/* Skip online CPUs and CPUs on offline nodes */
		if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
			continue;
		ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
		if (ret)
			break;
		/* See comment in cpuhp_smt_disable() */
		cpuhp_online_cpu_device(cpu);
	}
2093
	cpu_maps_update_done();
2094
	return ret;
2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124
}

static ssize_t
store_smt_control(struct device *dev, struct device_attribute *attr,
		  const char *buf, size_t count)
{
	int ctrlval, ret;

	if (sysfs_streq(buf, "on"))
		ctrlval = CPU_SMT_ENABLED;
	else if (sysfs_streq(buf, "off"))
		ctrlval = CPU_SMT_DISABLED;
	else if (sysfs_streq(buf, "forceoff"))
		ctrlval = CPU_SMT_FORCE_DISABLED;
	else
		return -EINVAL;

	if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
		return -EPERM;

	if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
		return -ENODEV;

	ret = lock_device_hotplug_sysfs();
	if (ret)
		return ret;

	if (ctrlval != cpu_smt_control) {
		switch (ctrlval) {
		case CPU_SMT_ENABLED:
2125
			ret = cpuhp_smt_enable();
2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169
			break;
		case CPU_SMT_DISABLED:
		case CPU_SMT_FORCE_DISABLED:
			ret = cpuhp_smt_disable(ctrlval);
			break;
		}
	}

	unlock_device_hotplug();
	return ret ? ret : count;
}
static DEVICE_ATTR(control, 0644, show_smt_control, store_smt_control);

static ssize_t
show_smt_active(struct device *dev, struct device_attribute *attr, char *buf)
{
	bool active = topology_max_smt_threads() > 1;

	return snprintf(buf, PAGE_SIZE - 2, "%d\n", active);
}
static DEVICE_ATTR(active, 0444, show_smt_active, NULL);

static struct attribute *cpuhp_smt_attrs[] = {
	&dev_attr_control.attr,
	&dev_attr_active.attr,
	NULL
};

static const struct attribute_group cpuhp_smt_attr_group = {
	.attrs = cpuhp_smt_attrs,
	.name = "smt",
	NULL
};

static int __init cpu_smt_state_init(void)
{
	return sysfs_create_group(&cpu_subsys.dev_root->kobj,
				  &cpuhp_smt_attr_group);
}

#else
static inline int cpu_smt_state_init(void) { return 0; }
#endif

2170 2171 2172 2173
static int __init cpuhp_sysfs_init(void)
{
	int cpu, ret;

2174 2175 2176 2177
	ret = cpu_smt_state_init();
	if (ret)
		return ret;

2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196
	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
				 &cpuhp_cpu_root_attr_group);
	if (ret)
		return ret;

	for_each_possible_cpu(cpu) {
		struct device *dev = get_cpu_device(cpu);

		if (!dev)
			continue;
		ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
		if (ret)
			return ret;
	}
	return 0;
}
device_initcall(cpuhp_sysfs_init);
#endif

2197 2198 2199 2200
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
2201
 * It is used by cpumask_of() to get a constant address to a CPU
2202 2203
 * mask value that has a single bit set only.
 */
2204

2205
/* cpu_bit_bitmap[0] is empty - so we can back into it */
2206
#define MASK_DECLARE_1(x)	[x+1][0] = (1UL << (x))
2207 2208 2209
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
2210

2211 2212 2213 2214 2215 2216 2217
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
2218 2219
#endif
};
2220
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
2221 2222 2223

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
2224 2225

#ifdef CONFIG_INIT_ALL_POSSIBLE
2226
struct cpumask __cpu_possible_mask __read_mostly
2227
	= {CPU_BITS_ALL};
2228
#else
2229
struct cpumask __cpu_possible_mask __read_mostly;
2230
#endif
2231
EXPORT_SYMBOL(__cpu_possible_mask);
2232

2233 2234
struct cpumask __cpu_online_mask __read_mostly;
EXPORT_SYMBOL(__cpu_online_mask);
2235

2236 2237
struct cpumask __cpu_present_mask __read_mostly;
EXPORT_SYMBOL(__cpu_present_mask);
2238

2239 2240
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
2241 2242 2243

void init_cpu_present(const struct cpumask *src)
{
2244
	cpumask_copy(&__cpu_present_mask, src);
2245 2246 2247 2248
}

void init_cpu_possible(const struct cpumask *src)
{
2249
	cpumask_copy(&__cpu_possible_mask, src);
2250 2251 2252 2253
}

void init_cpu_online(const struct cpumask *src)
{
2254
	cpumask_copy(&__cpu_online_mask, src);
2255
}
2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268

/*
 * Activate the first processor.
 */
void __init boot_cpu_init(void)
{
	int cpu = smp_processor_id();

	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
	set_cpu_online(cpu, true);
	set_cpu_active(cpu, true);
	set_cpu_present(cpu, true);
	set_cpu_possible(cpu, true);
2269 2270 2271 2272

#ifdef CONFIG_SMP
	__boot_cpu_id = cpu;
#endif
2273 2274 2275 2276 2277
}

/*
 * Must be called _AFTER_ setting up the per_cpu areas
 */
2278
void __init boot_cpu_hotplug_init(void)
2279
{
2280
#ifdef CONFIG_SMP
2281
	this_cpu_write(cpuhp_state.booted_once, true);
2282
#endif
2283
	this_cpu_write(cpuhp_state.state, CPUHP_ONLINE);
2284
}
2285

2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297
/*
 * These are used for a global "mitigations=" cmdline option for toggling
 * optional CPU mitigations.
 */
enum cpu_mitigations {
	CPU_MITIGATIONS_OFF,
	CPU_MITIGATIONS_AUTO,
	CPU_MITIGATIONS_AUTO_NOSMT,
};

static enum cpu_mitigations cpu_mitigations __ro_after_init =
	CPU_MITIGATIONS_AUTO;
2298 2299 2300 2301 2302 2303 2304 2305 2306

static int __init mitigations_parse_cmdline(char *arg)
{
	if (!strcmp(arg, "off"))
		cpu_mitigations = CPU_MITIGATIONS_OFF;
	else if (!strcmp(arg, "auto"))
		cpu_mitigations = CPU_MITIGATIONS_AUTO;
	else if (!strcmp(arg, "auto,nosmt"))
		cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
2307 2308 2309
	else
		pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
			arg);
2310 2311 2312 2313

	return 0;
}
early_param("mitigations", mitigations_parse_cmdline);
2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327

/* mitigations=off */
bool cpu_mitigations_off(void)
{
	return cpu_mitigations == CPU_MITIGATIONS_OFF;
}
EXPORT_SYMBOL_GPL(cpu_mitigations_off);

/* mitigations=auto,nosmt */
bool cpu_mitigations_auto_nosmt(void)
{
	return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
}
EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);