cpu.c 45.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
10
#include <linux/sched/signal.h>
11
#include <linux/sched/hotplug.h>
12
#include <linux/sched/task.h>
L
Linus Torvalds 已提交
13 14
#include <linux/unistd.h>
#include <linux/cpu.h>
15 16
#include <linux/oom.h>
#include <linux/rcupdate.h>
17
#include <linux/export.h>
18
#include <linux/bug.h>
L
Linus Torvalds 已提交
19 20
#include <linux/kthread.h>
#include <linux/stop_machine.h>
21
#include <linux/mutex.h>
22
#include <linux/gfp.h>
23
#include <linux/suspend.h>
24
#include <linux/lockdep.h>
25
#include <linux/tick.h>
26
#include <linux/irq.h>
27
#include <linux/smpboot.h>
28
#include <linux/relay.h>
29
#include <linux/slab.h>
30

31
#include <trace/events/power.h>
32 33
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>
L
Linus Torvalds 已提交
34

35 36
#include "smpboot.h"

37 38 39 40
/**
 * cpuhp_cpu_state - Per cpu hotplug state storage
 * @state:	The current cpu state
 * @target:	The target state
41 42
 * @thread:	Pointer to the hotplug thread
 * @should_run:	Thread should execute
43
 * @rollback:	Perform a rollback
44 45 46
 * @single:	Single callback invocation
 * @bringup:	Single callback bringup or teardown selector
 * @cb_state:	The state for a single callback (install/uninstall)
47 48
 * @result:	Result of the operation
 * @done:	Signal completion to the issuer of the task
49 50 51 52
 */
struct cpuhp_cpu_state {
	enum cpuhp_state	state;
	enum cpuhp_state	target;
53 54 55
#ifdef CONFIG_SMP
	struct task_struct	*thread;
	bool			should_run;
56
	bool			rollback;
57 58
	bool			single;
	bool			bringup;
59
	struct hlist_node	*node;
60 61 62 63
	enum cpuhp_state	cb_state;
	int			result;
	struct completion	done;
#endif
64 65 66 67 68 69 70 71 72 73 74
};

static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);

/**
 * cpuhp_step - Hotplug state machine step
 * @name:	Name of the step
 * @startup:	Startup function of the step
 * @teardown:	Teardown function of the step
 * @skip_onerr:	Do not invoke the functions on error rollback
 *		Will go away once the notifiers	are gone
75
 * @cant_stop:	Bringup/teardown can't be stopped at this step
76 77
 */
struct cpuhp_step {
78 79
	const char		*name;
	union {
80 81 82 83
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} startup;
84
	union {
85 86 87 88
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} teardown;
89 90 91 92
	struct hlist_head	list;
	bool			skip_onerr;
	bool			cant_stop;
	bool			multi_instance;
93 94
};

95
static DEFINE_MUTEX(cpuhp_state_mutex);
96
static struct cpuhp_step cpuhp_bp_states[];
97
static struct cpuhp_step cpuhp_ap_states[];
98

99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
	/*
	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
	 * purposes as that state is handled explicitly in cpu_down.
	 */
	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}

static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
	struct cpuhp_step *sp;

	sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
	return sp + state;
}

116 117 118 119
/**
 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 * @cpu:	The cpu for which the callback should be invoked
 * @step:	The step in the state machine
120
 * @bringup:	True if the bringup callback should be invoked
121
 *
122
 * Called from cpu hotplug and from the state register machinery.
123
 */
124
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
125
				 bool bringup, struct hlist_node *node)
126 127
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
128
	struct cpuhp_step *step = cpuhp_get_step(state);
129 130 131 132 133
	int (*cbm)(unsigned int cpu, struct hlist_node *node);
	int (*cb)(unsigned int cpu);
	int ret, cnt;

	if (!step->multi_instance) {
134
		cb = bringup ? step->startup.single : step->teardown.single;
135 136
		if (!cb)
			return 0;
137
		trace_cpuhp_enter(cpu, st->target, state, cb);
138
		ret = cb(cpu);
139
		trace_cpuhp_exit(cpu, st->state, state, ret);
140 141
		return ret;
	}
142
	cbm = bringup ? step->startup.multi : step->teardown.multi;
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
	if (!cbm)
		return 0;

	/* Single invocation for instance add/remove */
	if (node) {
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		return ret;
	}

	/* State transition. Invoke on all instances */
	cnt = 0;
	hlist_for_each(node, &step->list) {
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		if (ret)
			goto err;
		cnt++;
	}
	return 0;
err:
	/* Rollback the instances if one failed */
167
	cbm = !bringup ? step->startup.multi : step->teardown.multi;
168 169 170 171 172 173 174
	if (!cbm)
		return ret;

	hlist_for_each(node, &step->list) {
		if (!cnt--)
			break;
		cbm(cpu, node);
175 176 177 178
	}
	return ret;
}

179
#ifdef CONFIG_SMP
180
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
181
static DEFINE_MUTEX(cpu_add_remove_lock);
182 183
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
L
Linus Torvalds 已提交
184

185
/*
186 187
 * The following two APIs (cpu_maps_update_begin/done) must be used when
 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
188 189 190 191 192 193 194 195 196 197
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}
L
Linus Torvalds 已提交
198

199 200 201 202 203
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

204 205
#ifdef CONFIG_HOTPLUG_CPU

206 207
static struct {
	struct task_struct *active_writer;
208 209 210 211
	/* wait queue to wake up the active_writer */
	wait_queue_head_t wq;
	/* verifies that no writer will get active while readers are active */
	struct mutex lock;
212 213 214 215
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
216
	atomic_t refcount;
217 218 219 220

#ifdef CONFIG_DEBUG_LOCK_ALLOC
	struct lockdep_map dep_map;
#endif
221 222
} cpu_hotplug = {
	.active_writer = NULL,
223
	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
224
	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
225
#ifdef CONFIG_DEBUG_LOCK_ALLOC
226
	.dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
227
#endif
228
};
229

230 231
/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
232 233
#define cpuhp_lock_acquire_tryread() \
				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
234 235 236
#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)

237

238
void cpus_read_lock(void)
239
{
240 241
	might_sleep();
	if (cpu_hotplug.active_writer == current)
242
		return;
243
	cpuhp_lock_acquire_read();
244
	mutex_lock(&cpu_hotplug.lock);
245
	atomic_inc(&cpu_hotplug.refcount);
246
	mutex_unlock(&cpu_hotplug.lock);
247
}
248
EXPORT_SYMBOL_GPL(cpus_read_lock);
249

250
void cpus_read_unlock(void)
251
{
252 253
	int refcount;

254
	if (cpu_hotplug.active_writer == current)
255
		return;
256

257 258 259 260 261 262
	refcount = atomic_dec_return(&cpu_hotplug.refcount);
	if (WARN_ON(refcount < 0)) /* try to fix things up */
		atomic_inc(&cpu_hotplug.refcount);

	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
		wake_up(&cpu_hotplug.wq);
263

264
	cpuhp_lock_release();
265

266
}
267
EXPORT_SYMBOL_GPL(cpus_read_unlock);
268

269 270 271 272 273 274 275
/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
276 277
 * Since cpu_hotplug_begin() is always called after invoking
 * cpu_maps_update_begin(), we can be sure that only one writer is active.
278 279 280 281 282 283 284 285 286 287
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
288
 * get_online_cpus() not an api which is called all that often.
289 290
 *
 */
291
void cpus_write_lock(void)
292
{
293
	DEFINE_WAIT(wait);
294

295
	cpu_hotplug.active_writer = current;
296
	cpuhp_lock_acquire();
297

298 299
	for (;;) {
		mutex_lock(&cpu_hotplug.lock);
300 301 302
		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
		if (likely(!atomic_read(&cpu_hotplug.refcount)))
				break;
303 304 305
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
	}
306
	finish_wait(&cpu_hotplug.wq, &wait);
307 308
}

309
void cpus_write_unlock(void)
310 311 312
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
313
	cpuhp_lock_release();
314
}
315

316 317 318 319 320 321 322 323 324 325
/*
 * Wait for currently running CPU hotplug operations to complete (if any) and
 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 * hotplug path before performing hotplug operations. So acquiring that lock
 * guarantees mutual exclusion from any currently running hotplug operations.
 */
void cpu_hotplug_disable(void)
{
	cpu_maps_update_begin();
326
	cpu_hotplug_disabled++;
327 328
	cpu_maps_update_done();
}
329
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
330

331 332 333 334 335 336 337
static void __cpu_hotplug_enable(void)
{
	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
		return;
	cpu_hotplug_disabled--;
}

338 339 340
void cpu_hotplug_enable(void)
{
	cpu_maps_update_begin();
341
	__cpu_hotplug_enable();
342 343
	cpu_maps_update_done();
}
344
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
345
#endif	/* CONFIG_HOTPLUG_CPU */
346

347 348
/* Notifier wrappers for transitioning to state machine */

349 350 351 352 353 354 355 356
static int bringup_wait_for_ap(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	wait_for_completion(&st->done);
	return st->result;
}

357 358 359 360 361
static int bringup_cpu(unsigned int cpu)
{
	struct task_struct *idle = idle_thread_get(cpu);
	int ret;

362 363 364 365 366 367 368
	/*
	 * Some architectures have to walk the irq descriptors to
	 * setup the vector space for the cpu which comes online.
	 * Prevent irq alloc/free across the bringup.
	 */
	irq_lock_sparse();

369 370
	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu, idle);
371
	irq_unlock_sparse();
372
	if (ret)
373
		return ret;
374
	ret = bringup_wait_for_ap(cpu);
375
	BUG_ON(!cpu_online(cpu));
376
	return ret;
377 378
}

379 380 381
/*
 * Hotplug state machine related functions
 */
382
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
383 384
{
	for (st->state++; st->state < st->target; st->state++) {
385
		struct cpuhp_step *step = cpuhp_get_step(st->state);
386 387

		if (!step->skip_onerr)
388
			cpuhp_invoke_callback(cpu, st->state, true, NULL);
389 390 391 392
	}
}

static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
393
				enum cpuhp_state target)
394 395 396 397 398
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	for (; st->state > target; st->state--) {
399
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
400 401
		if (ret) {
			st->target = prev_state;
402
			undo_cpu_down(cpu, st);
403 404 405 406 407 408
			break;
		}
	}
	return ret;
}

409
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
410 411
{
	for (st->state--; st->state > st->target; st->state--) {
412
		struct cpuhp_step *step = cpuhp_get_step(st->state);
413 414

		if (!step->skip_onerr)
415
			cpuhp_invoke_callback(cpu, st->state, false, NULL);
416 417 418 419
	}
}

static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
420
			      enum cpuhp_state target)
421 422 423 424 425 426
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	while (st->state < target) {
		st->state++;
427
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
428 429
		if (ret) {
			st->target = prev_state;
430
			undo_cpu_up(cpu, st);
431 432 433 434 435 436
			break;
		}
	}
	return ret;
}

437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
/*
 * The cpu hotplug threads manage the bringup and teardown of the cpus
 */
static void cpuhp_create(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	init_completion(&st->done);
}

static int cpuhp_should_run(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	return st->should_run;
}

/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
{
457
	enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
458

459
	return cpuhp_down_callbacks(cpu, st, target);
460 461 462 463 464
}

/* Execute the online startup callbacks. Used to be CPU_ONLINE */
static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
{
465
	return cpuhp_up_callbacks(cpu, st, st->target);
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487
}

/*
 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 * callbacks when a state gets [un]installed at runtime.
 */
static void cpuhp_thread_fun(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	int ret = 0;

	/*
	 * Paired with the mb() in cpuhp_kick_ap_work and
	 * cpuhp_invoke_ap_callback, so the work set is consistent visible.
	 */
	smp_mb();
	if (!st->should_run)
		return;

	st->should_run = false;

	/* Single callback invocation for [un]install ? */
488
	if (st->single) {
489 490
		if (st->cb_state < CPUHP_AP_ONLINE) {
			local_irq_disable();
491
			ret = cpuhp_invoke_callback(cpu, st->cb_state,
492
						    st->bringup, st->node);
493 494
			local_irq_enable();
		} else {
495
			ret = cpuhp_invoke_callback(cpu, st->cb_state,
496
						    st->bringup, st->node);
497
		}
498 499 500
	} else if (st->rollback) {
		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);

501
		undo_cpu_down(cpu, st);
502
		st->rollback = false;
503
	} else {
504
		/* Cannot happen .... */
505
		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
506

507 508 509 510 511 512 513 514 515 516 517
		/* Regular hotplug work */
		if (st->state < st->target)
			ret = cpuhp_ap_online(cpu, st);
		else if (st->state > st->target)
			ret = cpuhp_ap_offline(cpu, st);
	}
	st->result = ret;
	complete(&st->done);
}

/* Invoke a single callback on a remote cpu */
518
static int
519 520
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
			 struct hlist_node *node)
521 522 523 524 525 526
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	if (!cpu_online(cpu))
		return 0;

527 528 529 530 531
	/*
	 * If we are up and running, use the hotplug thread. For early calls
	 * we invoke the thread function directly.
	 */
	if (!st->thread)
532
		return cpuhp_invoke_callback(cpu, state, bringup, node);
533

534
	st->cb_state = state;
535 536
	st->single = true;
	st->bringup = bringup;
537
	st->node = node;
538

539 540 541 542 543 544 545 546 547 548 549 550
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
	wait_for_completion(&st->done);
	return st->result;
}

/* Regular hotplug invocation of the AP hotplug thread */
551
static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
552 553
{
	st->result = 0;
554
	st->single = false;
555 556 557 558 559 560 561
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
562 563 564 565 566 567 568 569 570
}

static int cpuhp_kick_ap_work(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state state = st->state;

	trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
	__cpuhp_kick_ap_work(st);
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
	wait_for_completion(&st->done);
	trace_cpuhp_exit(cpu, st->state, state, st->result);
	return st->result;
}

static struct smp_hotplug_thread cpuhp_threads = {
	.store			= &cpuhp_state.thread,
	.create			= &cpuhp_create,
	.thread_should_run	= cpuhp_should_run,
	.thread_fn		= cpuhp_thread_fun,
	.thread_comm		= "cpuhp/%u",
	.selfparking		= true,
};

void __init cpuhp_threads_init(void)
{
	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
	kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

591
#ifdef CONFIG_HOTPLUG_CPU
592 593 594 595 596 597 598 599 600 601 602 603
/**
 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 * @cpu: a CPU id
 *
 * This function walks all processes, finds a valid mm struct for each one and
 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 * trivial, there are various non-obvious corner cases, which this function
 * tries to solve in a safe manner.
 *
 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 * be called only for an already offlined CPU.
 */
604 605 606 607 608 609 610 611 612 613 614
void clear_tasks_mm_cpumask(int cpu)
{
	struct task_struct *p;

	/*
	 * This function is called after the cpu is taken down and marked
	 * offline, so its not like new tasks will ever get this cpu set in
	 * their mm mask. -- Peter Zijlstra
	 * Thus, we may use rcu_read_lock() here, instead of grabbing
	 * full-fledged tasklist_lock.
	 */
615
	WARN_ON(cpu_online(cpu));
616 617 618 619
	rcu_read_lock();
	for_each_process(p) {
		struct task_struct *t;

620 621 622 623
		/*
		 * Main thread might exit, but other threads may still have
		 * a valid mm. Find one.
		 */
624 625 626 627 628 629 630 631 632
		t = find_lock_task_mm(p);
		if (!t)
			continue;
		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
		task_unlock(t);
	}
	rcu_read_unlock();
}

K
Kirill Tkhai 已提交
633
static inline void check_for_tasks(int dead_cpu)
L
Linus Torvalds 已提交
634
{
K
Kirill Tkhai 已提交
635
	struct task_struct *g, *p;
L
Linus Torvalds 已提交
636

637 638
	read_lock(&tasklist_lock);
	for_each_process_thread(g, p) {
K
Kirill Tkhai 已提交
639 640 641 642 643 644 645 646 647 648 649 650 651 652
		if (!p->on_rq)
			continue;
		/*
		 * We do the check with unlocked task_rq(p)->lock.
		 * Order the reading to do not warn about a task,
		 * which was running on this cpu in the past, and
		 * it's just been woken on another cpu.
		 */
		rmb();
		if (task_cpu(p) != dead_cpu)
			continue;

		pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
			p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
653 654
	}
	read_unlock(&tasklist_lock);
L
Linus Torvalds 已提交
655 656 657
}

/* Take this CPU down. */
658
static int take_cpu_down(void *_param)
L
Linus Torvalds 已提交
659
{
660 661
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
662
	int err, cpu = smp_processor_id();
L
Linus Torvalds 已提交
663 664 665 666

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
667
		return err;
L
Linus Torvalds 已提交
668

669 670 671 672 673 674
	/*
	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
	 * do this step again.
	 */
	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
	st->state--;
675
	/* Invoke the former CPU_DYING callbacks */
676
	for (; st->state > target; st->state--)
677
		cpuhp_invoke_callback(cpu, st->state, false, NULL);
678

679 680
	/* Give up timekeeping duties */
	tick_handover_do_timer();
681
	/* Park the stopper thread */
682
	stop_machine_park(cpu);
Z
Zwane Mwaikambo 已提交
683
	return 0;
L
Linus Torvalds 已提交
684 685
}

686
static int takedown_cpu(unsigned int cpu)
L
Linus Torvalds 已提交
687
{
688
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
689
	int err;
L
Linus Torvalds 已提交
690

691
	/* Park the smpboot threads */
692
	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
693
	smpboot_park_threads(cpu);
694

695
	/*
696 697
	 * Prevent irq alloc/free while the dying cpu reorganizes the
	 * interrupt affinities.
698
	 */
699
	irq_lock_sparse();
700

701 702 703
	/*
	 * So now all preempt/rcu users must observe !cpu_active().
	 */
704
	err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
705
	if (err) {
706
		/* CPU refused to die */
707
		irq_unlock_sparse();
708 709
		/* Unpark the hotplug thread so we can rollback there */
		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
710
		return err;
711
	}
712
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
713

714
	/*
715
	 * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
716 717
	 * runnable tasks from the cpu, there's only the idle task left now
	 * that the migration thread is done doing the stop_machine thing.
P
Peter Zijlstra 已提交
718 719
	 *
	 * Wait for the stop thread to go away.
720
	 */
721 722
	wait_for_completion(&st->done);
	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
L
Linus Torvalds 已提交
723

724 725 726
	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
	irq_unlock_sparse();

727
	hotplug_cpu__broadcast_tick_pull(cpu);
L
Linus Torvalds 已提交
728 729 730
	/* This actually kills the CPU. */
	__cpu_die(cpu);

731
	tick_cleanup_dead_cpu(cpu);
732 733
	return 0;
}
L
Linus Torvalds 已提交
734

735 736 737 738 739 740 741
static void cpuhp_complete_idle_dead(void *arg)
{
	struct cpuhp_cpu_state *st = arg;

	complete(&st->done);
}

742 743 744 745 746
void cpuhp_report_idle_dead(void)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	BUG_ON(st->state != CPUHP_AP_OFFLINE);
747
	rcu_report_dead(smp_processor_id());
748 749 750 751 752 753 754
	st->state = CPUHP_AP_IDLE_DEAD;
	/*
	 * We cannot call complete after rcu_report_dead() so we delegate it
	 * to an online cpu.
	 */
	smp_call_function_single(cpumask_first(cpu_online_mask),
				 cpuhp_complete_idle_dead, st, 0);
755 756
}

757 758 759 760 761 762
#else
#define takedown_cpu		NULL
#endif

#ifdef CONFIG_HOTPLUG_CPU

763
/* Requires cpu_add_remove_lock to be held */
764 765
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
			   enum cpuhp_state target)
766
{
767 768
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	int prev_state, ret = 0;
769 770 771 772

	if (num_online_cpus() == 1)
		return -EBUSY;

773
	if (!cpu_present(cpu))
774 775
		return -EINVAL;

776
	cpus_write_lock();
777 778 779

	cpuhp_tasks_frozen = tasks_frozen;

780
	prev_state = st->state;
781
	st->target = target;
782 783 784 785
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread.
	 */
786
	if (st->state > CPUHP_TEARDOWN_CPU) {
787 788 789 790 791 792 793 794 795 796 797 798
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;

		/*
		 * We might have stopped still in the range of the AP hotplug
		 * thread. Nothing to do anymore.
		 */
799
		if (st->state > CPUHP_TEARDOWN_CPU)
800 801 802
			goto out;
	}
	/*
803
	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
804 805
	 * to do the further cleanups.
	 */
806
	ret = cpuhp_down_callbacks(cpu, st, target);
807 808 809 810 811
	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
		st->target = prev_state;
		st->rollback = true;
		cpuhp_kick_ap_work(cpu);
	}
812

813
out:
814
	cpus_write_unlock();
815
	return ret;
816 817
}

818
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
819
{
820
	int err;
821

822
	cpu_maps_update_begin();
823 824

	if (cpu_hotplug_disabled) {
825
		err = -EBUSY;
826 827 828
		goto out;
	}

829
	err = _cpu_down(cpu, 0, target);
830

831
out:
832
	cpu_maps_update_done();
L
Linus Torvalds 已提交
833 834
	return err;
}
835 836 837 838
int cpu_down(unsigned int cpu)
{
	return do_cpu_down(cpu, CPUHP_OFFLINE);
}
839
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
840 841
#endif /*CONFIG_HOTPLUG_CPU*/

842
/**
843
 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
844 845 846 847 848 849 850 851 852 853
 * @cpu: cpu that just started
 *
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
void notify_cpu_starting(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);

854
	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
855 856
	while (st->state < target) {
		st->state++;
857
		cpuhp_invoke_callback(cpu, st->state, true, NULL);
858 859 860
	}
}

861 862
/*
 * Called from the idle task. We need to set active here, so we can kick off
863 864 865
 * the stopper thread and unpark the smpboot threads. If the target state is
 * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
 * cpu further.
866
 */
867
void cpuhp_online_idle(enum cpuhp_state state)
868
{
869 870 871 872 873 874 875 876
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	unsigned int cpu = smp_processor_id();

	/* Happens for the boot cpu */
	if (state != CPUHP_AP_ONLINE_IDLE)
		return;

	st->state = CPUHP_AP_ONLINE_IDLE;
877

878
	/* Unpark the stopper thread and the hotplug thread of this cpu */
879
	stop_machine_unpark(cpu);
880
	kthread_unpark(st->thread);
881 882 883 884 885 886

	/* Should we go further up ? */
	if (st->target > CPUHP_AP_ONLINE_IDLE)
		__cpuhp_kick_ap_work(st);
	else
		complete(&st->done);
887 888
}

889
/* Requires cpu_add_remove_lock to be held */
890
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
L
Linus Torvalds 已提交
891
{
892
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
893
	struct task_struct *idle;
894
	int ret = 0;
L
Linus Torvalds 已提交
895

896
	cpus_write_lock();
897

898
	if (!cpu_present(cpu)) {
899 900 901 902
		ret = -EINVAL;
		goto out;
	}

903 904 905 906 907
	/*
	 * The caller of do_cpu_up might have raced with another
	 * caller. Ignore it for now.
	 */
	if (st->state >= target)
908
		goto out;
909 910 911 912 913 914 915 916

	if (st->state == CPUHP_OFFLINE) {
		/* Let it fail before we try to bring the cpu up */
		idle = idle_thread_get(cpu);
		if (IS_ERR(idle)) {
			ret = PTR_ERR(idle);
			goto out;
		}
917
	}
918

919 920
	cpuhp_tasks_frozen = tasks_frozen;

921
	st->target = target;
922 923 924 925
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread once more.
	 */
926
	if (st->state > CPUHP_BRINGUP_CPU) {
927 928 929 930 931 932 933 934 935 936 937
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;
	}

	/*
	 * Try to reach the target state. We max out on the BP at
938
	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
939 940
	 * responsible for bringing it up to the target state.
	 */
941
	target = min((int)target, CPUHP_BRINGUP_CPU);
942
	ret = cpuhp_up_callbacks(cpu, st, target);
943
out:
944
	cpus_write_unlock();
945 946 947
	return ret;
}

948
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
949 950
{
	int err = 0;
951

R
Rusty Russell 已提交
952
	if (!cpu_possible(cpu)) {
953 954
		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
		       cpu);
955
#if defined(CONFIG_IA64)
956
		pr_err("please check additional_cpus= boot parameter\n");
957 958 959
#endif
		return -EINVAL;
	}
960

961 962 963
	err = try_online_node(cpu_to_node(cpu));
	if (err)
		return err;
964

965
	cpu_maps_update_begin();
966 967

	if (cpu_hotplug_disabled) {
968
		err = -EBUSY;
969 970 971
		goto out;
	}

972
	err = _cpu_up(cpu, 0, target);
973
out:
974
	cpu_maps_update_done();
975 976
	return err;
}
977 978 979 980 981

int cpu_up(unsigned int cpu)
{
	return do_cpu_up(cpu, CPUHP_ONLINE);
}
P
Paul E. McKenney 已提交
982
EXPORT_SYMBOL_GPL(cpu_up);
983

984
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
985
static cpumask_var_t frozen_cpus;
986

987
int freeze_secondary_cpus(int primary)
988
{
989
	int cpu, error = 0;
990

991
	cpu_maps_update_begin();
992 993
	if (!cpu_online(primary))
		primary = cpumask_first(cpu_online_mask);
994 995
	/*
	 * We take down all of the non-boot CPUs in one shot to avoid races
996 997
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
998
	cpumask_clear(frozen_cpus);
999

1000
	pr_info("Disabling non-boot CPUs ...\n");
1001
	for_each_online_cpu(cpu) {
1002
		if (cpu == primary)
1003
			continue;
1004
		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1005
		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1006
		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1007
		if (!error)
R
Rusty Russell 已提交
1008
			cpumask_set_cpu(cpu, frozen_cpus);
1009
		else {
1010
			pr_err("Error taking CPU%d down: %d\n", cpu, error);
1011 1012 1013
			break;
		}
	}
1014

1015
	if (!error)
1016
		BUG_ON(num_online_cpus() > 1);
1017
	else
1018
		pr_err("Non-boot CPUs are not disabled\n");
1019 1020 1021 1022 1023 1024 1025 1026

	/*
	 * Make sure the CPUs won't be enabled by someone else. We need to do
	 * this even in case of failure as all disable_nonboot_cpus() users are
	 * supposed to do enable_nonboot_cpus() on the failure path.
	 */
	cpu_hotplug_disabled++;

1027
	cpu_maps_update_done();
1028 1029 1030
	return error;
}

1031 1032 1033 1034 1035 1036 1037 1038
void __weak arch_enable_nonboot_cpus_begin(void)
{
}

void __weak arch_enable_nonboot_cpus_end(void)
{
}

1039
void enable_nonboot_cpus(void)
1040 1041 1042 1043
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
1044
	cpu_maps_update_begin();
1045
	__cpu_hotplug_enable();
R
Rusty Russell 已提交
1046
	if (cpumask_empty(frozen_cpus))
1047
		goto out;
1048

1049
	pr_info("Enabling non-boot CPUs ...\n");
1050 1051 1052

	arch_enable_nonboot_cpus_begin();

R
Rusty Russell 已提交
1053
	for_each_cpu(cpu, frozen_cpus) {
1054
		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1055
		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1056
		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1057
		if (!error) {
1058
			pr_info("CPU%d is up\n", cpu);
1059 1060
			continue;
		}
1061
		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1062
	}
1063 1064 1065

	arch_enable_nonboot_cpus_end();

R
Rusty Russell 已提交
1066
	cpumask_clear(frozen_cpus);
1067
out:
1068
	cpu_maps_update_done();
L
Linus Torvalds 已提交
1069
}
R
Rusty Russell 已提交
1070

1071
static int __init alloc_frozen_cpus(void)
R
Rusty Russell 已提交
1072 1073 1074 1075 1076 1077
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097

/*
 * When callbacks for CPU hotplug notifications are being executed, we must
 * ensure that the state of the system with respect to the tasks being frozen
 * or not, as reported by the notification, remains unchanged *throughout the
 * duration* of the execution of the callbacks.
 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 *
 * This synchronization is implemented by mutually excluding regular CPU
 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 * Hibernate notifications.
 */
static int
cpu_hotplug_pm_callback(struct notifier_block *nb,
			unsigned long action, void *ptr)
{
	switch (action) {

	case PM_SUSPEND_PREPARE:
	case PM_HIBERNATION_PREPARE:
1098
		cpu_hotplug_disable();
1099 1100 1101 1102
		break;

	case PM_POST_SUSPEND:
	case PM_POST_HIBERNATION:
1103
		cpu_hotplug_enable();
1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
		break;

	default:
		return NOTIFY_DONE;
	}

	return NOTIFY_OK;
}


1114
static int __init cpu_hotplug_pm_sync_init(void)
1115
{
1116 1117 1118 1119 1120
	/*
	 * cpu_hotplug_pm_callback has higher priority than x86
	 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
	 * to disable cpu hotplug to avoid cpu hotplug race.
	 */
1121 1122 1123 1124 1125
	pm_notifier(cpu_hotplug_pm_callback, 0);
	return 0;
}
core_initcall(cpu_hotplug_pm_sync_init);

1126
#endif /* CONFIG_PM_SLEEP_SMP */
1127

1128 1129
int __boot_cpu_id;

1130
#endif /* CONFIG_SMP */
1131

1132 1133 1134 1135
/* Boot processor state steps */
static struct cpuhp_step cpuhp_bp_states[] = {
	[CPUHP_OFFLINE] = {
		.name			= "offline",
1136 1137
		.startup.single		= NULL,
		.teardown.single	= NULL,
1138 1139 1140
	},
#ifdef CONFIG_SMP
	[CPUHP_CREATE_THREADS]= {
1141
		.name			= "threads:prepare",
1142 1143
		.startup.single		= smpboot_create_threads,
		.teardown.single	= NULL,
1144
		.cant_stop		= true,
1145
	},
1146
	[CPUHP_PERF_PREPARE] = {
1147 1148 1149
		.name			= "perf:prepare",
		.startup.single		= perf_event_init_cpu,
		.teardown.single	= perf_event_exit_cpu,
1150
	},
1151
	[CPUHP_WORKQUEUE_PREP] = {
1152 1153 1154
		.name			= "workqueue:prepare",
		.startup.single		= workqueue_prepare_cpu,
		.teardown.single	= NULL,
1155
	},
1156
	[CPUHP_HRTIMERS_PREPARE] = {
1157 1158 1159
		.name			= "hrtimers:prepare",
		.startup.single		= hrtimers_prepare_cpu,
		.teardown.single	= hrtimers_dead_cpu,
1160
	},
1161
	[CPUHP_SMPCFD_PREPARE] = {
1162
		.name			= "smpcfd:prepare",
1163 1164
		.startup.single		= smpcfd_prepare_cpu,
		.teardown.single	= smpcfd_dead_cpu,
1165
	},
1166 1167 1168 1169 1170
	[CPUHP_RELAY_PREPARE] = {
		.name			= "relay:prepare",
		.startup.single		= relay_prepare_cpu,
		.teardown.single	= NULL,
	},
1171 1172 1173 1174
	[CPUHP_SLAB_PREPARE] = {
		.name			= "slab:prepare",
		.startup.single		= slab_prepare_cpu,
		.teardown.single	= slab_dead_cpu,
1175
	},
1176
	[CPUHP_RCUTREE_PREP] = {
1177
		.name			= "RCU/tree:prepare",
1178 1179
		.startup.single		= rcutree_prepare_cpu,
		.teardown.single	= rcutree_dead_cpu,
1180
	},
1181 1182 1183 1184 1185 1186
	/*
	 * On the tear-down path, timers_dead_cpu() must be invoked
	 * before blk_mq_queue_reinit_notify() from notify_dead(),
	 * otherwise a RCU stall occurs.
	 */
	[CPUHP_TIMERS_DEAD] = {
1187 1188 1189
		.name			= "timers:dead",
		.startup.single		= NULL,
		.teardown.single	= timers_dead_cpu,
1190
	},
1191
	/* Kicks the plugged cpu into life */
1192 1193
	[CPUHP_BRINGUP_CPU] = {
		.name			= "cpu:bringup",
1194 1195
		.startup.single		= bringup_cpu,
		.teardown.single	= NULL,
1196
		.cant_stop		= true,
1197
	},
1198
	[CPUHP_AP_SMPCFD_DYING] = {
1199
		.name			= "smpcfd:dying",
1200 1201
		.startup.single		= NULL,
		.teardown.single	= smpcfd_dying_cpu,
1202
	},
1203 1204 1205 1206
	/*
	 * Handled on controll processor until the plugged processor manages
	 * this itself.
	 */
1207 1208
	[CPUHP_TEARDOWN_CPU] = {
		.name			= "cpu:teardown",
1209 1210
		.startup.single		= NULL,
		.teardown.single	= takedown_cpu,
1211
		.cant_stop		= true,
1212
	},
1213 1214
#else
	[CPUHP_BRINGUP_CPU] = { },
1215 1216 1217
#endif
};

1218 1219 1220
/* Application processor state steps */
static struct cpuhp_step cpuhp_ap_states[] = {
#ifdef CONFIG_SMP
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
	/* Final state before CPU kills itself */
	[CPUHP_AP_IDLE_DEAD] = {
		.name			= "idle:dead",
	},
	/*
	 * Last state before CPU enters the idle loop to die. Transient state
	 * for synchronization.
	 */
	[CPUHP_AP_OFFLINE] = {
		.name			= "ap:offline",
		.cant_stop		= true,
	},
1233 1234 1235
	/* First state is scheduler control. Interrupts are disabled */
	[CPUHP_AP_SCHED_STARTING] = {
		.name			= "sched:starting",
1236 1237
		.startup.single		= sched_cpu_starting,
		.teardown.single	= sched_cpu_dying,
1238
	},
1239
	[CPUHP_AP_RCUTREE_DYING] = {
1240
		.name			= "RCU/tree:dying",
1241 1242
		.startup.single		= NULL,
		.teardown.single	= rcutree_dying_cpu,
1243
	},
1244 1245 1246 1247 1248 1249
	/* Entry state on starting. Interrupts enabled from here on. Transient
	 * state for synchronsization */
	[CPUHP_AP_ONLINE] = {
		.name			= "ap:online",
	},
	/* Handle smpboot threads park/unpark */
1250
	[CPUHP_AP_SMPBOOT_THREADS] = {
1251
		.name			= "smpboot/threads:online",
1252 1253
		.startup.single		= smpboot_unpark_threads,
		.teardown.single	= NULL,
1254
	},
1255
	[CPUHP_AP_PERF_ONLINE] = {
1256 1257 1258
		.name			= "perf:online",
		.startup.single		= perf_event_init_cpu,
		.teardown.single	= perf_event_exit_cpu,
1259
	},
1260
	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1261 1262 1263
		.name			= "workqueue:online",
		.startup.single		= workqueue_online_cpu,
		.teardown.single	= workqueue_offline_cpu,
1264
	},
1265
	[CPUHP_AP_RCUTREE_ONLINE] = {
1266
		.name			= "RCU/tree:online",
1267 1268
		.startup.single		= rcutree_online_cpu,
		.teardown.single	= rcutree_offline_cpu,
1269
	},
1270
#endif
1271 1272 1273 1274
	/*
	 * The dynamically registered state space is here
	 */

1275 1276 1277 1278
#ifdef CONFIG_SMP
	/* Last state is scheduler control setting the cpu active */
	[CPUHP_AP_ACTIVE] = {
		.name			= "sched:active",
1279 1280
		.startup.single		= sched_cpu_activate,
		.teardown.single	= sched_cpu_deactivate,
1281 1282 1283
	},
#endif

1284
	/* CPU is fully up and running. */
1285 1286
	[CPUHP_ONLINE] = {
		.name			= "online",
1287 1288
		.startup.single		= NULL,
		.teardown.single	= NULL,
1289 1290 1291
	},
};

1292 1293 1294 1295 1296 1297 1298 1299
/* Sanity check for callbacks */
static int cpuhp_cb_check(enum cpuhp_state state)
{
	if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
		return -EINVAL;
	return 0;
}

1300 1301 1302 1303 1304 1305 1306
/*
 * Returns a free for dynamic slot assignment of the Online state. The states
 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
 * by having no name assigned.
 */
static int cpuhp_reserve_state(enum cpuhp_state state)
{
1307 1308
	enum cpuhp_state i, end;
	struct cpuhp_step *step;
1309

1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
	switch (state) {
	case CPUHP_AP_ONLINE_DYN:
		step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN;
		end = CPUHP_AP_ONLINE_DYN_END;
		break;
	case CPUHP_BP_PREPARE_DYN:
		step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN;
		end = CPUHP_BP_PREPARE_DYN_END;
		break;
	default:
		return -EINVAL;
	}

	for (i = state; i <= end; i++, step++) {
		if (!step->name)
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334
			return i;
	}
	WARN(1, "No more dynamic states available for CPU hotplug\n");
	return -ENOSPC;
}

static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
				 int (*startup)(unsigned int cpu),
				 int (*teardown)(unsigned int cpu),
				 bool multi_instance)
1335 1336 1337
{
	/* (Un)Install the callbacks for further cpu hotplug operations */
	struct cpuhp_step *sp;
1338
	int ret = 0;
1339

1340
	if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
1341 1342
		ret = cpuhp_reserve_state(state);
		if (ret < 0)
1343
			return ret;
1344 1345
		state = ret;
	}
1346
	sp = cpuhp_get_step(state);
1347 1348 1349
	if (name && sp->name)
		return -EBUSY;

1350 1351
	sp->startup.single = startup;
	sp->teardown.single = teardown;
1352
	sp->name = name;
1353 1354
	sp->multi_instance = multi_instance;
	INIT_HLIST_HEAD(&sp->list);
1355
	return ret;
1356 1357 1358 1359
}

static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
{
1360
	return cpuhp_get_step(state)->teardown.single;
1361 1362 1363 1364 1365 1366
}

/*
 * Call the startup/teardown function for a step either on the AP or
 * on the current CPU.
 */
1367 1368
static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
			    struct hlist_node *node)
1369
{
1370
	struct cpuhp_step *sp = cpuhp_get_step(state);
1371 1372
	int ret;

1373 1374
	if ((bringup && !sp->startup.single) ||
	    (!bringup && !sp->teardown.single))
1375 1376 1377 1378 1379
		return 0;
	/*
	 * The non AP bound callbacks can fail on bringup. On teardown
	 * e.g. module removal we crash for now.
	 */
1380 1381
#ifdef CONFIG_SMP
	if (cpuhp_is_ap_state(state))
1382
		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1383
	else
1384
		ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1385
#else
1386
	ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1387
#endif
1388 1389 1390 1391 1392 1393 1394 1395 1396 1397
	BUG_ON(ret && !bringup);
	return ret;
}

/*
 * Called from __cpuhp_setup_state on a recoverable failure.
 *
 * Note: The teardown callbacks for rollback are not allowed to fail!
 */
static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1398
				   struct hlist_node *node)
1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411
{
	int cpu;

	/* Roll back the already executed steps on the other cpus */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpu >= failedcpu)
			break;

		/* Did we invoke the startup call on that cpu ? */
		if (cpustate >= state)
1412
			cpuhp_issue_call(cpu, state, false, node);
1413 1414 1415
	}
}

1416 1417 1418
int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
					  struct hlist_node *node,
					  bool invoke)
1419 1420 1421 1422 1423
{
	struct cpuhp_step *sp;
	int cpu;
	int ret;

1424 1425
	lockdep_assert_cpus_held();

1426 1427 1428 1429
	sp = cpuhp_get_step(state);
	if (sp->multi_instance == false)
		return -EINVAL;

1430
	mutex_lock(&cpuhp_state_mutex);
1431

1432
	if (!invoke || !sp->startup.multi)
1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447
		goto add_node;

	/*
	 * Try to call the startup callback for each present cpu
	 * depending on the hotplug state of the cpu.
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate < state)
			continue;

		ret = cpuhp_issue_call(cpu, state, true, node);
		if (ret) {
1448
			if (sp->teardown.multi)
1449
				cpuhp_rollback_install(cpu, state, node);
1450
			goto unlock;
1451 1452 1453 1454 1455
		}
	}
add_node:
	ret = 0;
	hlist_add_head(node, &sp->list);
1456
unlock:
1457
	mutex_unlock(&cpuhp_state_mutex);
1458 1459 1460 1461 1462 1463 1464 1465 1466 1467
	return ret;
}

int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
			       bool invoke)
{
	int ret;

	cpus_read_lock();
	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1468
	cpus_read_unlock();
1469 1470 1471 1472
	return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);

1473
/**
1474
 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1475 1476 1477 1478 1479 1480 1481
 * @state:		The state to setup
 * @invoke:		If true, the startup function is invoked for cpus where
 *			cpu state >= @state
 * @startup:		startup callback function
 * @teardown:		teardown callback function
 * @multi_instance:	State is set up for multiple instances which get
 *			added afterwards.
1482
 *
1483
 * The caller needs to hold cpus read locked while calling this function.
1484 1485 1486 1487 1488
 * Returns:
 *   On success:
 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
 *      0 for all other states
 *   On failure: proper (negative) error code
1489
 */
1490 1491 1492 1493 1494
int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
				   const char *name, bool invoke,
				   int (*startup)(unsigned int cpu),
				   int (*teardown)(unsigned int cpu),
				   bool multi_instance)
1495 1496
{
	int cpu, ret = 0;
1497
	bool dynstate;
1498

1499 1500
	lockdep_assert_cpus_held();

1501 1502 1503
	if (cpuhp_cb_check(state) || !name)
		return -EINVAL;

1504
	mutex_lock(&cpuhp_state_mutex);
1505

1506 1507
	ret = cpuhp_store_callbacks(state, name, startup, teardown,
				    multi_instance);
1508

1509 1510 1511 1512 1513 1514
	dynstate = state == CPUHP_AP_ONLINE_DYN;
	if (ret > 0 && dynstate) {
		state = ret;
		ret = 0;
	}

1515
	if (ret || !invoke || !startup)
1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
		goto out;

	/*
	 * Try to call the startup callback for each present cpu
	 * depending on the hotplug state of the cpu.
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate < state)
			continue;

1529
		ret = cpuhp_issue_call(cpu, state, true, NULL);
1530
		if (ret) {
1531
			if (teardown)
1532 1533
				cpuhp_rollback_install(cpu, state, NULL);
			cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1534 1535 1536 1537
			goto out;
		}
	}
out:
1538
	mutex_unlock(&cpuhp_state_mutex);
1539 1540 1541 1542
	/*
	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
	 * dynamically allocated state in case of success.
	 */
1543
	if (!ret && dynstate)
1544 1545 1546
		return state;
	return ret;
}
1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562
EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);

int __cpuhp_setup_state(enum cpuhp_state state,
			const char *name, bool invoke,
			int (*startup)(unsigned int cpu),
			int (*teardown)(unsigned int cpu),
			bool multi_instance)
{
	int ret;

	cpus_read_lock();
	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
					     teardown, multi_instance);
	cpus_read_unlock();
	return ret;
}
1563 1564
EXPORT_SYMBOL(__cpuhp_setup_state);

1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575
int __cpuhp_state_remove_instance(enum cpuhp_state state,
				  struct hlist_node *node, bool invoke)
{
	struct cpuhp_step *sp = cpuhp_get_step(state);
	int cpu;

	BUG_ON(cpuhp_cb_check(state));

	if (!sp->multi_instance)
		return -EINVAL;

1576
	cpus_read_lock();
1577 1578
	mutex_lock(&cpuhp_state_mutex);

1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596
	if (!invoke || !cpuhp_get_teardown_cb(state))
		goto remove;
	/*
	 * Call the teardown callback for each present cpu depending
	 * on the hotplug state of the cpu. This function is not
	 * allowed to fail currently!
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate >= state)
			cpuhp_issue_call(cpu, state, false, node);
	}

remove:
	hlist_del(node);
	mutex_unlock(&cpuhp_state_mutex);
1597
	cpus_read_unlock();
1598 1599 1600 1601

	return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1602

1603
/**
1604
 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1605 1606 1607 1608
 * @state:	The state to remove
 * @invoke:	If true, the teardown function is invoked for cpus where
 *		cpu state >= @state
 *
1609
 * The caller needs to hold cpus read locked while calling this function.
1610 1611 1612
 * The teardown callback is currently not allowed to fail. Think
 * about module removal!
 */
1613
void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1614
{
1615
	struct cpuhp_step *sp = cpuhp_get_step(state);
1616 1617 1618 1619
	int cpu;

	BUG_ON(cpuhp_cb_check(state));

1620
	lockdep_assert_cpus_held();
1621

1622
	mutex_lock(&cpuhp_state_mutex);
1623 1624 1625 1626 1627 1628 1629
	if (sp->multi_instance) {
		WARN(!hlist_empty(&sp->list),
		     "Error: Removing state %d which has instances left.\n",
		     state);
		goto remove;
	}

1630
	if (!invoke || !cpuhp_get_teardown_cb(state))
1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642
		goto remove;

	/*
	 * Call the teardown callback for each present cpu depending
	 * on the hotplug state of the cpu. This function is not
	 * allowed to fail currently!
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate >= state)
1643
			cpuhp_issue_call(cpu, state, false, NULL);
1644 1645
	}
remove:
1646
	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1647
	mutex_unlock(&cpuhp_state_mutex);
1648 1649 1650 1651 1652 1653 1654
}
EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);

void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
{
	cpus_read_lock();
	__cpuhp_remove_state_cpuslocked(state, invoke);
1655
	cpus_read_unlock();
1656 1657 1658
}
EXPORT_SYMBOL(__cpuhp_remove_state);

1659 1660 1661 1662 1663 1664 1665 1666 1667 1668
#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
static ssize_t show_cpuhp_state(struct device *dev,
				struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->state);
}
static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);

1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708
static ssize_t write_cpuhp_target(struct device *dev,
				  struct device_attribute *attr,
				  const char *buf, size_t count)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
	struct cpuhp_step *sp;
	int target, ret;

	ret = kstrtoint(buf, 10, &target);
	if (ret)
		return ret;

#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
	if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
		return -EINVAL;
#else
	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
		return -EINVAL;
#endif

	ret = lock_device_hotplug_sysfs();
	if (ret)
		return ret;

	mutex_lock(&cpuhp_state_mutex);
	sp = cpuhp_get_step(target);
	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
	mutex_unlock(&cpuhp_state_mutex);
	if (ret)
		return ret;

	if (st->state < target)
		ret = do_cpu_up(dev->id, target);
	else
		ret = do_cpu_down(dev->id, target);

	unlock_device_hotplug();
	return ret ? ret : count;
}

1709 1710 1711 1712 1713 1714 1715
static ssize_t show_cpuhp_target(struct device *dev,
				 struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->target);
}
1716
static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736

static struct attribute *cpuhp_cpu_attrs[] = {
	&dev_attr_state.attr,
	&dev_attr_target.attr,
	NULL
};

static struct attribute_group cpuhp_cpu_attr_group = {
	.attrs = cpuhp_cpu_attrs,
	.name = "hotplug",
	NULL
};

static ssize_t show_cpuhp_states(struct device *dev,
				 struct device_attribute *attr, char *buf)
{
	ssize_t cur, res = 0;
	int i;

	mutex_lock(&cpuhp_state_mutex);
1737
	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784
		struct cpuhp_step *sp = cpuhp_get_step(i);

		if (sp->name) {
			cur = sprintf(buf, "%3d: %s\n", i, sp->name);
			buf += cur;
			res += cur;
		}
	}
	mutex_unlock(&cpuhp_state_mutex);
	return res;
}
static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);

static struct attribute *cpuhp_cpu_root_attrs[] = {
	&dev_attr_states.attr,
	NULL
};

static struct attribute_group cpuhp_cpu_root_attr_group = {
	.attrs = cpuhp_cpu_root_attrs,
	.name = "hotplug",
	NULL
};

static int __init cpuhp_sysfs_init(void)
{
	int cpu, ret;

	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
				 &cpuhp_cpu_root_attr_group);
	if (ret)
		return ret;

	for_each_possible_cpu(cpu) {
		struct device *dev = get_cpu_device(cpu);

		if (!dev)
			continue;
		ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
		if (ret)
			return ret;
	}
	return 0;
}
device_initcall(cpuhp_sysfs_init);
#endif

1785 1786 1787 1788
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
1789
 * It is used by cpumask_of() to get a constant address to a CPU
1790 1791
 * mask value that has a single bit set only.
 */
1792

1793
/* cpu_bit_bitmap[0] is empty - so we can back into it */
1794
#define MASK_DECLARE_1(x)	[x+1][0] = (1UL << (x))
1795 1796 1797
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1798

1799 1800 1801 1802 1803 1804 1805
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
1806 1807
#endif
};
1808
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1809 1810 1811

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
1812 1813

#ifdef CONFIG_INIT_ALL_POSSIBLE
1814
struct cpumask __cpu_possible_mask __read_mostly
1815
	= {CPU_BITS_ALL};
1816
#else
1817
struct cpumask __cpu_possible_mask __read_mostly;
1818
#endif
1819
EXPORT_SYMBOL(__cpu_possible_mask);
1820

1821 1822
struct cpumask __cpu_online_mask __read_mostly;
EXPORT_SYMBOL(__cpu_online_mask);
1823

1824 1825
struct cpumask __cpu_present_mask __read_mostly;
EXPORT_SYMBOL(__cpu_present_mask);
1826

1827 1828
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
1829 1830 1831

void init_cpu_present(const struct cpumask *src)
{
1832
	cpumask_copy(&__cpu_present_mask, src);
1833 1834 1835 1836
}

void init_cpu_possible(const struct cpumask *src)
{
1837
	cpumask_copy(&__cpu_possible_mask, src);
1838 1839 1840 1841
}

void init_cpu_online(const struct cpumask *src)
{
1842
	cpumask_copy(&__cpu_online_mask, src);
1843
}
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856

/*
 * Activate the first processor.
 */
void __init boot_cpu_init(void)
{
	int cpu = smp_processor_id();

	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
	set_cpu_online(cpu, true);
	set_cpu_active(cpu, true);
	set_cpu_present(cpu, true);
	set_cpu_possible(cpu, true);
1857 1858 1859 1860

#ifdef CONFIG_SMP
	__boot_cpu_id = cpu;
#endif
1861 1862 1863 1864 1865 1866 1867 1868 1869
}

/*
 * Must be called _AFTER_ setting up the per_cpu areas
 */
void __init boot_cpu_state_init(void)
{
	per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
}