cpu.c 44.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
10
#include <linux/sched/signal.h>
L
Linus Torvalds 已提交
11 12
#include <linux/unistd.h>
#include <linux/cpu.h>
13 14
#include <linux/oom.h>
#include <linux/rcupdate.h>
15
#include <linux/export.h>
16
#include <linux/bug.h>
L
Linus Torvalds 已提交
17 18
#include <linux/kthread.h>
#include <linux/stop_machine.h>
19
#include <linux/mutex.h>
20
#include <linux/gfp.h>
21
#include <linux/suspend.h>
22
#include <linux/lockdep.h>
23
#include <linux/tick.h>
24
#include <linux/irq.h>
25
#include <linux/smpboot.h>
26
#include <linux/relay.h>
27
#include <linux/slab.h>
28

29
#include <trace/events/power.h>
30 31
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>
L
Linus Torvalds 已提交
32

33 34
#include "smpboot.h"

35 36 37 38
/**
 * cpuhp_cpu_state - Per cpu hotplug state storage
 * @state:	The current cpu state
 * @target:	The target state
39 40
 * @thread:	Pointer to the hotplug thread
 * @should_run:	Thread should execute
41
 * @rollback:	Perform a rollback
42 43 44
 * @single:	Single callback invocation
 * @bringup:	Single callback bringup or teardown selector
 * @cb_state:	The state for a single callback (install/uninstall)
45 46
 * @result:	Result of the operation
 * @done:	Signal completion to the issuer of the task
47 48 49 50
 */
struct cpuhp_cpu_state {
	enum cpuhp_state	state;
	enum cpuhp_state	target;
51 52 53
#ifdef CONFIG_SMP
	struct task_struct	*thread;
	bool			should_run;
54
	bool			rollback;
55 56
	bool			single;
	bool			bringup;
57
	struct hlist_node	*node;
58 59 60 61
	enum cpuhp_state	cb_state;
	int			result;
	struct completion	done;
#endif
62 63 64 65 66 67 68 69 70 71 72
};

static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);

/**
 * cpuhp_step - Hotplug state machine step
 * @name:	Name of the step
 * @startup:	Startup function of the step
 * @teardown:	Teardown function of the step
 * @skip_onerr:	Do not invoke the functions on error rollback
 *		Will go away once the notifiers	are gone
73
 * @cant_stop:	Bringup/teardown can't be stopped at this step
74 75
 */
struct cpuhp_step {
76 77
	const char		*name;
	union {
78 79 80 81
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} startup;
82
	union {
83 84 85 86
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} teardown;
87 88 89 90
	struct hlist_head	list;
	bool			skip_onerr;
	bool			cant_stop;
	bool			multi_instance;
91 92
};

93
static DEFINE_MUTEX(cpuhp_state_mutex);
94
static struct cpuhp_step cpuhp_bp_states[];
95
static struct cpuhp_step cpuhp_ap_states[];
96

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
	/*
	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
	 * purposes as that state is handled explicitly in cpu_down.
	 */
	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}

static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
	struct cpuhp_step *sp;

	sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
	return sp + state;
}

114 115 116 117
/**
 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 * @cpu:	The cpu for which the callback should be invoked
 * @step:	The step in the state machine
118
 * @bringup:	True if the bringup callback should be invoked
119
 *
120
 * Called from cpu hotplug and from the state register machinery.
121
 */
122
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
123
				 bool bringup, struct hlist_node *node)
124 125
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
126
	struct cpuhp_step *step = cpuhp_get_step(state);
127 128 129 130 131
	int (*cbm)(unsigned int cpu, struct hlist_node *node);
	int (*cb)(unsigned int cpu);
	int ret, cnt;

	if (!step->multi_instance) {
132
		cb = bringup ? step->startup.single : step->teardown.single;
133 134
		if (!cb)
			return 0;
135
		trace_cpuhp_enter(cpu, st->target, state, cb);
136
		ret = cb(cpu);
137
		trace_cpuhp_exit(cpu, st->state, state, ret);
138 139
		return ret;
	}
140
	cbm = bringup ? step->startup.multi : step->teardown.multi;
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
	if (!cbm)
		return 0;

	/* Single invocation for instance add/remove */
	if (node) {
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		return ret;
	}

	/* State transition. Invoke on all instances */
	cnt = 0;
	hlist_for_each(node, &step->list) {
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		if (ret)
			goto err;
		cnt++;
	}
	return 0;
err:
	/* Rollback the instances if one failed */
165
	cbm = !bringup ? step->startup.multi : step->teardown.multi;
166 167 168 169 170 171 172
	if (!cbm)
		return ret;

	hlist_for_each(node, &step->list) {
		if (!cnt--)
			break;
		cbm(cpu, node);
173 174 175 176
	}
	return ret;
}

177
#ifdef CONFIG_SMP
178
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
179
static DEFINE_MUTEX(cpu_add_remove_lock);
180 181
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
L
Linus Torvalds 已提交
182

183
/*
184 185
 * The following two APIs (cpu_maps_update_begin/done) must be used when
 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
186 187 188 189 190 191 192 193 194 195
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}
L
Linus Torvalds 已提交
196

197 198 199 200 201
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

202 203
#ifdef CONFIG_HOTPLUG_CPU

204 205
static struct {
	struct task_struct *active_writer;
206 207 208 209
	/* wait queue to wake up the active_writer */
	wait_queue_head_t wq;
	/* verifies that no writer will get active while readers are active */
	struct mutex lock;
210 211 212 213
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
214
	atomic_t refcount;
215 216 217 218

#ifdef CONFIG_DEBUG_LOCK_ALLOC
	struct lockdep_map dep_map;
#endif
219 220
} cpu_hotplug = {
	.active_writer = NULL,
221
	.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
222
	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
223
#ifdef CONFIG_DEBUG_LOCK_ALLOC
224
	.dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
225
#endif
226
};
227

228 229
/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
230 231
#define cpuhp_lock_acquire_tryread() \
				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
232 233 234
#define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
#define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)

235

236
void get_online_cpus(void)
237
{
238 239
	might_sleep();
	if (cpu_hotplug.active_writer == current)
240
		return;
241
	cpuhp_lock_acquire_read();
242
	mutex_lock(&cpu_hotplug.lock);
243
	atomic_inc(&cpu_hotplug.refcount);
244
	mutex_unlock(&cpu_hotplug.lock);
245
}
246
EXPORT_SYMBOL_GPL(get_online_cpus);
247

248
void put_online_cpus(void)
249
{
250 251
	int refcount;

252
	if (cpu_hotplug.active_writer == current)
253
		return;
254

255 256 257 258 259 260
	refcount = atomic_dec_return(&cpu_hotplug.refcount);
	if (WARN_ON(refcount < 0)) /* try to fix things up */
		atomic_inc(&cpu_hotplug.refcount);

	if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
		wake_up(&cpu_hotplug.wq);
261

262
	cpuhp_lock_release();
263

264
}
265
EXPORT_SYMBOL_GPL(put_online_cpus);
266

267 268 269 270 271 272 273
/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
274 275
 * Since cpu_hotplug_begin() is always called after invoking
 * cpu_maps_update_begin(), we can be sure that only one writer is active.
276 277 278 279 280 281 282 283 284 285
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
286
 * get_online_cpus() not an api which is called all that often.
287 288
 *
 */
289
void cpu_hotplug_begin(void)
290
{
291
	DEFINE_WAIT(wait);
292

293
	cpu_hotplug.active_writer = current;
294
	cpuhp_lock_acquire();
295

296 297
	for (;;) {
		mutex_lock(&cpu_hotplug.lock);
298 299 300
		prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
		if (likely(!atomic_read(&cpu_hotplug.refcount)))
				break;
301 302 303
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
	}
304
	finish_wait(&cpu_hotplug.wq, &wait);
305 306
}

307
void cpu_hotplug_done(void)
308 309 310
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
311
	cpuhp_lock_release();
312
}
313

314 315 316 317 318 319 320 321 322 323
/*
 * Wait for currently running CPU hotplug operations to complete (if any) and
 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 * hotplug path before performing hotplug operations. So acquiring that lock
 * guarantees mutual exclusion from any currently running hotplug operations.
 */
void cpu_hotplug_disable(void)
{
	cpu_maps_update_begin();
324
	cpu_hotplug_disabled++;
325 326
	cpu_maps_update_done();
}
327
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
328

329 330 331 332 333 334 335
static void __cpu_hotplug_enable(void)
{
	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
		return;
	cpu_hotplug_disabled--;
}

336 337 338
void cpu_hotplug_enable(void)
{
	cpu_maps_update_begin();
339
	__cpu_hotplug_enable();
340 341
	cpu_maps_update_done();
}
342
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
343
#endif	/* CONFIG_HOTPLUG_CPU */
344

345 346
/* Notifier wrappers for transitioning to state machine */

347 348 349 350 351 352 353 354
static int bringup_wait_for_ap(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	wait_for_completion(&st->done);
	return st->result;
}

355 356 357 358 359
static int bringup_cpu(unsigned int cpu)
{
	struct task_struct *idle = idle_thread_get(cpu);
	int ret;

360 361 362 363 364 365 366
	/*
	 * Some architectures have to walk the irq descriptors to
	 * setup the vector space for the cpu which comes online.
	 * Prevent irq alloc/free across the bringup.
	 */
	irq_lock_sparse();

367 368
	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu, idle);
369
	irq_unlock_sparse();
370
	if (ret)
371
		return ret;
372
	ret = bringup_wait_for_ap(cpu);
373
	BUG_ON(!cpu_online(cpu));
374
	return ret;
375 376
}

377 378 379
/*
 * Hotplug state machine related functions
 */
380
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
381 382
{
	for (st->state++; st->state < st->target; st->state++) {
383
		struct cpuhp_step *step = cpuhp_get_step(st->state);
384 385

		if (!step->skip_onerr)
386
			cpuhp_invoke_callback(cpu, st->state, true, NULL);
387 388 389 390
	}
}

static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
391
				enum cpuhp_state target)
392 393 394 395 396
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	for (; st->state > target; st->state--) {
397
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
398 399
		if (ret) {
			st->target = prev_state;
400
			undo_cpu_down(cpu, st);
401 402 403 404 405 406
			break;
		}
	}
	return ret;
}

407
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
408 409
{
	for (st->state--; st->state > st->target; st->state--) {
410
		struct cpuhp_step *step = cpuhp_get_step(st->state);
411 412

		if (!step->skip_onerr)
413
			cpuhp_invoke_callback(cpu, st->state, false, NULL);
414 415 416 417
	}
}

static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
418
			      enum cpuhp_state target)
419 420 421 422 423 424
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	while (st->state < target) {
		st->state++;
425
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
426 427
		if (ret) {
			st->target = prev_state;
428
			undo_cpu_up(cpu, st);
429 430 431 432 433 434
			break;
		}
	}
	return ret;
}

435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
/*
 * The cpu hotplug threads manage the bringup and teardown of the cpus
 */
static void cpuhp_create(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	init_completion(&st->done);
}

static int cpuhp_should_run(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	return st->should_run;
}

/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
{
455
	enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
456

457
	return cpuhp_down_callbacks(cpu, st, target);
458 459 460 461 462
}

/* Execute the online startup callbacks. Used to be CPU_ONLINE */
static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
{
463
	return cpuhp_up_callbacks(cpu, st, st->target);
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
}

/*
 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 * callbacks when a state gets [un]installed at runtime.
 */
static void cpuhp_thread_fun(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	int ret = 0;

	/*
	 * Paired with the mb() in cpuhp_kick_ap_work and
	 * cpuhp_invoke_ap_callback, so the work set is consistent visible.
	 */
	smp_mb();
	if (!st->should_run)
		return;

	st->should_run = false;

	/* Single callback invocation for [un]install ? */
486
	if (st->single) {
487 488
		if (st->cb_state < CPUHP_AP_ONLINE) {
			local_irq_disable();
489
			ret = cpuhp_invoke_callback(cpu, st->cb_state,
490
						    st->bringup, st->node);
491 492
			local_irq_enable();
		} else {
493
			ret = cpuhp_invoke_callback(cpu, st->cb_state,
494
						    st->bringup, st->node);
495
		}
496 497 498
	} else if (st->rollback) {
		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);

499
		undo_cpu_down(cpu, st);
500
		st->rollback = false;
501
	} else {
502
		/* Cannot happen .... */
503
		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
504

505 506 507 508 509 510 511 512 513 514 515
		/* Regular hotplug work */
		if (st->state < st->target)
			ret = cpuhp_ap_online(cpu, st);
		else if (st->state > st->target)
			ret = cpuhp_ap_offline(cpu, st);
	}
	st->result = ret;
	complete(&st->done);
}

/* Invoke a single callback on a remote cpu */
516
static int
517 518
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
			 struct hlist_node *node)
519 520 521 522 523 524
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	if (!cpu_online(cpu))
		return 0;

525 526 527 528 529
	/*
	 * If we are up and running, use the hotplug thread. For early calls
	 * we invoke the thread function directly.
	 */
	if (!st->thread)
530
		return cpuhp_invoke_callback(cpu, state, bringup, node);
531

532
	st->cb_state = state;
533 534
	st->single = true;
	st->bringup = bringup;
535
	st->node = node;
536

537 538 539 540 541 542 543 544 545 546 547 548
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
	wait_for_completion(&st->done);
	return st->result;
}

/* Regular hotplug invocation of the AP hotplug thread */
549
static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
550 551
{
	st->result = 0;
552
	st->single = false;
553 554 555 556 557 558 559
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
560 561 562 563 564 565 566 567 568
}

static int cpuhp_kick_ap_work(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state state = st->state;

	trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
	__cpuhp_kick_ap_work(st);
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
	wait_for_completion(&st->done);
	trace_cpuhp_exit(cpu, st->state, state, st->result);
	return st->result;
}

static struct smp_hotplug_thread cpuhp_threads = {
	.store			= &cpuhp_state.thread,
	.create			= &cpuhp_create,
	.thread_should_run	= cpuhp_should_run,
	.thread_fn		= cpuhp_thread_fun,
	.thread_comm		= "cpuhp/%u",
	.selfparking		= true,
};

void __init cpuhp_threads_init(void)
{
	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
	kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

589
#ifdef CONFIG_HOTPLUG_CPU
590 591 592 593 594 595 596 597 598 599 600 601
/**
 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 * @cpu: a CPU id
 *
 * This function walks all processes, finds a valid mm struct for each one and
 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 * trivial, there are various non-obvious corner cases, which this function
 * tries to solve in a safe manner.
 *
 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 * be called only for an already offlined CPU.
 */
602 603 604 605 606 607 608 609 610 611 612
void clear_tasks_mm_cpumask(int cpu)
{
	struct task_struct *p;

	/*
	 * This function is called after the cpu is taken down and marked
	 * offline, so its not like new tasks will ever get this cpu set in
	 * their mm mask. -- Peter Zijlstra
	 * Thus, we may use rcu_read_lock() here, instead of grabbing
	 * full-fledged tasklist_lock.
	 */
613
	WARN_ON(cpu_online(cpu));
614 615 616 617
	rcu_read_lock();
	for_each_process(p) {
		struct task_struct *t;

618 619 620 621
		/*
		 * Main thread might exit, but other threads may still have
		 * a valid mm. Find one.
		 */
622 623 624 625 626 627 628 629 630
		t = find_lock_task_mm(p);
		if (!t)
			continue;
		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
		task_unlock(t);
	}
	rcu_read_unlock();
}

K
Kirill Tkhai 已提交
631
static inline void check_for_tasks(int dead_cpu)
L
Linus Torvalds 已提交
632
{
K
Kirill Tkhai 已提交
633
	struct task_struct *g, *p;
L
Linus Torvalds 已提交
634

635 636
	read_lock(&tasklist_lock);
	for_each_process_thread(g, p) {
K
Kirill Tkhai 已提交
637 638 639 640 641 642 643 644 645 646 647 648 649 650
		if (!p->on_rq)
			continue;
		/*
		 * We do the check with unlocked task_rq(p)->lock.
		 * Order the reading to do not warn about a task,
		 * which was running on this cpu in the past, and
		 * it's just been woken on another cpu.
		 */
		rmb();
		if (task_cpu(p) != dead_cpu)
			continue;

		pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
			p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
651 652
	}
	read_unlock(&tasklist_lock);
L
Linus Torvalds 已提交
653 654 655
}

/* Take this CPU down. */
656
static int take_cpu_down(void *_param)
L
Linus Torvalds 已提交
657
{
658 659
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
660
	int err, cpu = smp_processor_id();
L
Linus Torvalds 已提交
661 662 663 664

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
665
		return err;
L
Linus Torvalds 已提交
666

667 668 669 670 671 672
	/*
	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
	 * do this step again.
	 */
	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
	st->state--;
673
	/* Invoke the former CPU_DYING callbacks */
674
	for (; st->state > target; st->state--)
675
		cpuhp_invoke_callback(cpu, st->state, false, NULL);
676

677 678
	/* Give up timekeeping duties */
	tick_handover_do_timer();
679
	/* Park the stopper thread */
680
	stop_machine_park(cpu);
Z
Zwane Mwaikambo 已提交
681
	return 0;
L
Linus Torvalds 已提交
682 683
}

684
static int takedown_cpu(unsigned int cpu)
L
Linus Torvalds 已提交
685
{
686
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
687
	int err;
L
Linus Torvalds 已提交
688

689
	/* Park the smpboot threads */
690
	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
691
	smpboot_park_threads(cpu);
692

693
	/*
694 695
	 * Prevent irq alloc/free while the dying cpu reorganizes the
	 * interrupt affinities.
696
	 */
697
	irq_lock_sparse();
698

699 700 701
	/*
	 * So now all preempt/rcu users must observe !cpu_active().
	 */
702
	err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
703
	if (err) {
704
		/* CPU refused to die */
705
		irq_unlock_sparse();
706 707
		/* Unpark the hotplug thread so we can rollback there */
		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
708
		return err;
709
	}
710
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
711

712
	/*
713
	 * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
714 715
	 * runnable tasks from the cpu, there's only the idle task left now
	 * that the migration thread is done doing the stop_machine thing.
P
Peter Zijlstra 已提交
716 717
	 *
	 * Wait for the stop thread to go away.
718
	 */
719 720
	wait_for_completion(&st->done);
	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
L
Linus Torvalds 已提交
721

722 723 724
	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
	irq_unlock_sparse();

725
	hotplug_cpu__broadcast_tick_pull(cpu);
L
Linus Torvalds 已提交
726 727 728
	/* This actually kills the CPU. */
	__cpu_die(cpu);

729
	tick_cleanup_dead_cpu(cpu);
730 731
	return 0;
}
L
Linus Torvalds 已提交
732

733 734 735 736 737 738 739
static void cpuhp_complete_idle_dead(void *arg)
{
	struct cpuhp_cpu_state *st = arg;

	complete(&st->done);
}

740 741 742 743 744
void cpuhp_report_idle_dead(void)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	BUG_ON(st->state != CPUHP_AP_OFFLINE);
745
	rcu_report_dead(smp_processor_id());
746 747 748 749 750 751 752
	st->state = CPUHP_AP_IDLE_DEAD;
	/*
	 * We cannot call complete after rcu_report_dead() so we delegate it
	 * to an online cpu.
	 */
	smp_call_function_single(cpumask_first(cpu_online_mask),
				 cpuhp_complete_idle_dead, st, 0);
753 754
}

755 756 757 758 759 760
#else
#define takedown_cpu		NULL
#endif

#ifdef CONFIG_HOTPLUG_CPU

761
/* Requires cpu_add_remove_lock to be held */
762 763
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
			   enum cpuhp_state target)
764
{
765 766
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	int prev_state, ret = 0;
767 768 769 770

	if (num_online_cpus() == 1)
		return -EBUSY;

771
	if (!cpu_present(cpu))
772 773 774 775 776 777
		return -EINVAL;

	cpu_hotplug_begin();

	cpuhp_tasks_frozen = tasks_frozen;

778
	prev_state = st->state;
779
	st->target = target;
780 781 782 783
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread.
	 */
784
	if (st->state > CPUHP_TEARDOWN_CPU) {
785 786 787 788 789 790 791 792 793 794 795 796
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;

		/*
		 * We might have stopped still in the range of the AP hotplug
		 * thread. Nothing to do anymore.
		 */
797
		if (st->state > CPUHP_TEARDOWN_CPU)
798 799 800
			goto out;
	}
	/*
801
	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
802 803
	 * to do the further cleanups.
	 */
804
	ret = cpuhp_down_callbacks(cpu, st, target);
805 806 807 808 809
	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
		st->target = prev_state;
		st->rollback = true;
		cpuhp_kick_ap_work(cpu);
	}
810

811
out:
812
	cpu_hotplug_done();
813
	return ret;
814 815
}

816
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
817
{
818
	int err;
819

820
	cpu_maps_update_begin();
821 822

	if (cpu_hotplug_disabled) {
823
		err = -EBUSY;
824 825 826
		goto out;
	}

827
	err = _cpu_down(cpu, 0, target);
828

829
out:
830
	cpu_maps_update_done();
L
Linus Torvalds 已提交
831 832
	return err;
}
833 834 835 836
int cpu_down(unsigned int cpu)
{
	return do_cpu_down(cpu, CPUHP_OFFLINE);
}
837
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
838 839
#endif /*CONFIG_HOTPLUG_CPU*/

840
/**
841
 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
842 843 844 845 846 847 848 849 850 851
 * @cpu: cpu that just started
 *
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
void notify_cpu_starting(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);

852
	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
853 854
	while (st->state < target) {
		st->state++;
855
		cpuhp_invoke_callback(cpu, st->state, true, NULL);
856 857 858
	}
}

859 860
/*
 * Called from the idle task. We need to set active here, so we can kick off
861 862 863
 * the stopper thread and unpark the smpboot threads. If the target state is
 * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
 * cpu further.
864
 */
865
void cpuhp_online_idle(enum cpuhp_state state)
866
{
867 868 869 870 871 872 873 874
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	unsigned int cpu = smp_processor_id();

	/* Happens for the boot cpu */
	if (state != CPUHP_AP_ONLINE_IDLE)
		return;

	st->state = CPUHP_AP_ONLINE_IDLE;
875

876
	/* Unpark the stopper thread and the hotplug thread of this cpu */
877
	stop_machine_unpark(cpu);
878
	kthread_unpark(st->thread);
879 880 881 882 883 884

	/* Should we go further up ? */
	if (st->target > CPUHP_AP_ONLINE_IDLE)
		__cpuhp_kick_ap_work(st);
	else
		complete(&st->done);
885 886
}

887
/* Requires cpu_add_remove_lock to be held */
888
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
L
Linus Torvalds 已提交
889
{
890
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
891
	struct task_struct *idle;
892
	int ret = 0;
L
Linus Torvalds 已提交
893

894
	cpu_hotplug_begin();
895

896
	if (!cpu_present(cpu)) {
897 898 899 900
		ret = -EINVAL;
		goto out;
	}

901 902 903 904 905
	/*
	 * The caller of do_cpu_up might have raced with another
	 * caller. Ignore it for now.
	 */
	if (st->state >= target)
906
		goto out;
907 908 909 910 911 912 913 914

	if (st->state == CPUHP_OFFLINE) {
		/* Let it fail before we try to bring the cpu up */
		idle = idle_thread_get(cpu);
		if (IS_ERR(idle)) {
			ret = PTR_ERR(idle);
			goto out;
		}
915
	}
916

917 918
	cpuhp_tasks_frozen = tasks_frozen;

919
	st->target = target;
920 921 922 923
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread once more.
	 */
924
	if (st->state > CPUHP_BRINGUP_CPU) {
925 926 927 928 929 930 931 932 933 934 935
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;
	}

	/*
	 * Try to reach the target state. We max out on the BP at
936
	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
937 938
	 * responsible for bringing it up to the target state.
	 */
939
	target = min((int)target, CPUHP_BRINGUP_CPU);
940
	ret = cpuhp_up_callbacks(cpu, st, target);
941
out:
942
	cpu_hotplug_done();
943 944 945
	return ret;
}

946
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
947 948
{
	int err = 0;
949

R
Rusty Russell 已提交
950
	if (!cpu_possible(cpu)) {
951 952
		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
		       cpu);
953
#if defined(CONFIG_IA64)
954
		pr_err("please check additional_cpus= boot parameter\n");
955 956 957
#endif
		return -EINVAL;
	}
958

959 960 961
	err = try_online_node(cpu_to_node(cpu));
	if (err)
		return err;
962

963
	cpu_maps_update_begin();
964 965

	if (cpu_hotplug_disabled) {
966
		err = -EBUSY;
967 968 969
		goto out;
	}

970
	err = _cpu_up(cpu, 0, target);
971
out:
972
	cpu_maps_update_done();
973 974
	return err;
}
975 976 977 978 979

int cpu_up(unsigned int cpu)
{
	return do_cpu_up(cpu, CPUHP_ONLINE);
}
P
Paul E. McKenney 已提交
980
EXPORT_SYMBOL_GPL(cpu_up);
981

982
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
983
static cpumask_var_t frozen_cpus;
984

985
int freeze_secondary_cpus(int primary)
986
{
987
	int cpu, error = 0;
988

989
	cpu_maps_update_begin();
990 991
	if (!cpu_online(primary))
		primary = cpumask_first(cpu_online_mask);
992 993
	/*
	 * We take down all of the non-boot CPUs in one shot to avoid races
994 995
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
996
	cpumask_clear(frozen_cpus);
997

998
	pr_info("Disabling non-boot CPUs ...\n");
999
	for_each_online_cpu(cpu) {
1000
		if (cpu == primary)
1001
			continue;
1002
		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
1003
		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
1004
		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
1005
		if (!error)
R
Rusty Russell 已提交
1006
			cpumask_set_cpu(cpu, frozen_cpus);
1007
		else {
1008
			pr_err("Error taking CPU%d down: %d\n", cpu, error);
1009 1010 1011
			break;
		}
	}
1012

1013
	if (!error)
1014
		BUG_ON(num_online_cpus() > 1);
1015
	else
1016
		pr_err("Non-boot CPUs are not disabled\n");
1017 1018 1019 1020 1021 1022 1023 1024

	/*
	 * Make sure the CPUs won't be enabled by someone else. We need to do
	 * this even in case of failure as all disable_nonboot_cpus() users are
	 * supposed to do enable_nonboot_cpus() on the failure path.
	 */
	cpu_hotplug_disabled++;

1025
	cpu_maps_update_done();
1026 1027 1028
	return error;
}

1029 1030 1031 1032 1033 1034 1035 1036
void __weak arch_enable_nonboot_cpus_begin(void)
{
}

void __weak arch_enable_nonboot_cpus_end(void)
{
}

1037
void enable_nonboot_cpus(void)
1038 1039 1040 1041
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
1042
	cpu_maps_update_begin();
1043
	__cpu_hotplug_enable();
R
Rusty Russell 已提交
1044
	if (cpumask_empty(frozen_cpus))
1045
		goto out;
1046

1047
	pr_info("Enabling non-boot CPUs ...\n");
1048 1049 1050

	arch_enable_nonboot_cpus_begin();

R
Rusty Russell 已提交
1051
	for_each_cpu(cpu, frozen_cpus) {
1052
		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
1053
		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
1054
		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
1055
		if (!error) {
1056
			pr_info("CPU%d is up\n", cpu);
1057 1058
			continue;
		}
1059
		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
1060
	}
1061 1062 1063

	arch_enable_nonboot_cpus_end();

R
Rusty Russell 已提交
1064
	cpumask_clear(frozen_cpus);
1065
out:
1066
	cpu_maps_update_done();
L
Linus Torvalds 已提交
1067
}
R
Rusty Russell 已提交
1068

1069
static int __init alloc_frozen_cpus(void)
R
Rusty Russell 已提交
1070 1071 1072 1073 1074 1075
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095

/*
 * When callbacks for CPU hotplug notifications are being executed, we must
 * ensure that the state of the system with respect to the tasks being frozen
 * or not, as reported by the notification, remains unchanged *throughout the
 * duration* of the execution of the callbacks.
 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 *
 * This synchronization is implemented by mutually excluding regular CPU
 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 * Hibernate notifications.
 */
static int
cpu_hotplug_pm_callback(struct notifier_block *nb,
			unsigned long action, void *ptr)
{
	switch (action) {

	case PM_SUSPEND_PREPARE:
	case PM_HIBERNATION_PREPARE:
1096
		cpu_hotplug_disable();
1097 1098 1099 1100
		break;

	case PM_POST_SUSPEND:
	case PM_POST_HIBERNATION:
1101
		cpu_hotplug_enable();
1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
		break;

	default:
		return NOTIFY_DONE;
	}

	return NOTIFY_OK;
}


1112
static int __init cpu_hotplug_pm_sync_init(void)
1113
{
1114 1115 1116 1117 1118
	/*
	 * cpu_hotplug_pm_callback has higher priority than x86
	 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
	 * to disable cpu hotplug to avoid cpu hotplug race.
	 */
1119 1120 1121 1122 1123
	pm_notifier(cpu_hotplug_pm_callback, 0);
	return 0;
}
core_initcall(cpu_hotplug_pm_sync_init);

1124
#endif /* CONFIG_PM_SLEEP_SMP */
1125 1126

#endif /* CONFIG_SMP */
1127

1128 1129 1130 1131
/* Boot processor state steps */
static struct cpuhp_step cpuhp_bp_states[] = {
	[CPUHP_OFFLINE] = {
		.name			= "offline",
1132 1133
		.startup.single		= NULL,
		.teardown.single	= NULL,
1134 1135 1136
	},
#ifdef CONFIG_SMP
	[CPUHP_CREATE_THREADS]= {
1137
		.name			= "threads:prepare",
1138 1139
		.startup.single		= smpboot_create_threads,
		.teardown.single	= NULL,
1140
		.cant_stop		= true,
1141
	},
1142
	[CPUHP_PERF_PREPARE] = {
1143 1144 1145
		.name			= "perf:prepare",
		.startup.single		= perf_event_init_cpu,
		.teardown.single	= perf_event_exit_cpu,
1146
	},
1147
	[CPUHP_WORKQUEUE_PREP] = {
1148 1149 1150
		.name			= "workqueue:prepare",
		.startup.single		= workqueue_prepare_cpu,
		.teardown.single	= NULL,
1151
	},
1152
	[CPUHP_HRTIMERS_PREPARE] = {
1153 1154 1155
		.name			= "hrtimers:prepare",
		.startup.single		= hrtimers_prepare_cpu,
		.teardown.single	= hrtimers_dead_cpu,
1156
	},
1157
	[CPUHP_SMPCFD_PREPARE] = {
1158
		.name			= "smpcfd:prepare",
1159 1160
		.startup.single		= smpcfd_prepare_cpu,
		.teardown.single	= smpcfd_dead_cpu,
1161
	},
1162 1163 1164 1165 1166
	[CPUHP_RELAY_PREPARE] = {
		.name			= "relay:prepare",
		.startup.single		= relay_prepare_cpu,
		.teardown.single	= NULL,
	},
1167 1168 1169 1170
	[CPUHP_SLAB_PREPARE] = {
		.name			= "slab:prepare",
		.startup.single		= slab_prepare_cpu,
		.teardown.single	= slab_dead_cpu,
1171
	},
1172
	[CPUHP_RCUTREE_PREP] = {
1173
		.name			= "RCU/tree:prepare",
1174 1175
		.startup.single		= rcutree_prepare_cpu,
		.teardown.single	= rcutree_dead_cpu,
1176
	},
1177 1178 1179 1180 1181 1182
	/*
	 * On the tear-down path, timers_dead_cpu() must be invoked
	 * before blk_mq_queue_reinit_notify() from notify_dead(),
	 * otherwise a RCU stall occurs.
	 */
	[CPUHP_TIMERS_DEAD] = {
1183 1184 1185
		.name			= "timers:dead",
		.startup.single		= NULL,
		.teardown.single	= timers_dead_cpu,
1186
	},
1187
	/* Kicks the plugged cpu into life */
1188 1189
	[CPUHP_BRINGUP_CPU] = {
		.name			= "cpu:bringup",
1190 1191
		.startup.single		= bringup_cpu,
		.teardown.single	= NULL,
1192
		.cant_stop		= true,
1193
	},
1194
	[CPUHP_AP_SMPCFD_DYING] = {
1195
		.name			= "smpcfd:dying",
1196 1197
		.startup.single		= NULL,
		.teardown.single	= smpcfd_dying_cpu,
1198
	},
1199 1200 1201 1202
	/*
	 * Handled on controll processor until the plugged processor manages
	 * this itself.
	 */
1203 1204
	[CPUHP_TEARDOWN_CPU] = {
		.name			= "cpu:teardown",
1205 1206
		.startup.single		= NULL,
		.teardown.single	= takedown_cpu,
1207
		.cant_stop		= true,
1208
	},
1209 1210
#else
	[CPUHP_BRINGUP_CPU] = { },
1211 1212 1213
#endif
};

1214 1215 1216
/* Application processor state steps */
static struct cpuhp_step cpuhp_ap_states[] = {
#ifdef CONFIG_SMP
1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
	/* Final state before CPU kills itself */
	[CPUHP_AP_IDLE_DEAD] = {
		.name			= "idle:dead",
	},
	/*
	 * Last state before CPU enters the idle loop to die. Transient state
	 * for synchronization.
	 */
	[CPUHP_AP_OFFLINE] = {
		.name			= "ap:offline",
		.cant_stop		= true,
	},
1229 1230 1231
	/* First state is scheduler control. Interrupts are disabled */
	[CPUHP_AP_SCHED_STARTING] = {
		.name			= "sched:starting",
1232 1233
		.startup.single		= sched_cpu_starting,
		.teardown.single	= sched_cpu_dying,
1234
	},
1235
	[CPUHP_AP_RCUTREE_DYING] = {
1236
		.name			= "RCU/tree:dying",
1237 1238
		.startup.single		= NULL,
		.teardown.single	= rcutree_dying_cpu,
1239
	},
1240 1241 1242 1243 1244 1245
	/* Entry state on starting. Interrupts enabled from here on. Transient
	 * state for synchronsization */
	[CPUHP_AP_ONLINE] = {
		.name			= "ap:online",
	},
	/* Handle smpboot threads park/unpark */
1246
	[CPUHP_AP_SMPBOOT_THREADS] = {
1247
		.name			= "smpboot/threads:online",
1248 1249
		.startup.single		= smpboot_unpark_threads,
		.teardown.single	= NULL,
1250
	},
1251
	[CPUHP_AP_PERF_ONLINE] = {
1252 1253 1254
		.name			= "perf:online",
		.startup.single		= perf_event_init_cpu,
		.teardown.single	= perf_event_exit_cpu,
1255
	},
1256
	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1257 1258 1259
		.name			= "workqueue:online",
		.startup.single		= workqueue_online_cpu,
		.teardown.single	= workqueue_offline_cpu,
1260
	},
1261
	[CPUHP_AP_RCUTREE_ONLINE] = {
1262
		.name			= "RCU/tree:online",
1263 1264
		.startup.single		= rcutree_online_cpu,
		.teardown.single	= rcutree_offline_cpu,
1265
	},
1266
#endif
1267 1268 1269 1270
	/*
	 * The dynamically registered state space is here
	 */

1271 1272 1273 1274
#ifdef CONFIG_SMP
	/* Last state is scheduler control setting the cpu active */
	[CPUHP_AP_ACTIVE] = {
		.name			= "sched:active",
1275 1276
		.startup.single		= sched_cpu_activate,
		.teardown.single	= sched_cpu_deactivate,
1277 1278 1279
	},
#endif

1280
	/* CPU is fully up and running. */
1281 1282
	[CPUHP_ONLINE] = {
		.name			= "online",
1283 1284
		.startup.single		= NULL,
		.teardown.single	= NULL,
1285 1286 1287
	},
};

1288 1289 1290 1291 1292 1293 1294 1295
/* Sanity check for callbacks */
static int cpuhp_cb_check(enum cpuhp_state state)
{
	if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
		return -EINVAL;
	return 0;
}

1296 1297 1298 1299 1300 1301 1302
/*
 * Returns a free for dynamic slot assignment of the Online state. The states
 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
 * by having no name assigned.
 */
static int cpuhp_reserve_state(enum cpuhp_state state)
{
1303 1304
	enum cpuhp_state i, end;
	struct cpuhp_step *step;
1305

1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320
	switch (state) {
	case CPUHP_AP_ONLINE_DYN:
		step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN;
		end = CPUHP_AP_ONLINE_DYN_END;
		break;
	case CPUHP_BP_PREPARE_DYN:
		step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN;
		end = CPUHP_BP_PREPARE_DYN_END;
		break;
	default:
		return -EINVAL;
	}

	for (i = state; i <= end; i++, step++) {
		if (!step->name)
1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
			return i;
	}
	WARN(1, "No more dynamic states available for CPU hotplug\n");
	return -ENOSPC;
}

static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
				 int (*startup)(unsigned int cpu),
				 int (*teardown)(unsigned int cpu),
				 bool multi_instance)
1331 1332 1333
{
	/* (Un)Install the callbacks for further cpu hotplug operations */
	struct cpuhp_step *sp;
1334
	int ret = 0;
1335 1336

	mutex_lock(&cpuhp_state_mutex);
1337

1338
	if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
1339 1340 1341 1342 1343
		ret = cpuhp_reserve_state(state);
		if (ret < 0)
			goto out;
		state = ret;
	}
1344
	sp = cpuhp_get_step(state);
1345 1346 1347 1348
	if (name && sp->name) {
		ret = -EBUSY;
		goto out;
	}
1349 1350
	sp->startup.single = startup;
	sp->teardown.single = teardown;
1351
	sp->name = name;
1352 1353
	sp->multi_instance = multi_instance;
	INIT_HLIST_HEAD(&sp->list);
1354
out:
1355
	mutex_unlock(&cpuhp_state_mutex);
1356
	return ret;
1357 1358 1359 1360
}

static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
{
1361
	return cpuhp_get_step(state)->teardown.single;
1362 1363 1364 1365 1366 1367
}

/*
 * Call the startup/teardown function for a step either on the AP or
 * on the current CPU.
 */
1368 1369
static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
			    struct hlist_node *node)
1370
{
1371
	struct cpuhp_step *sp = cpuhp_get_step(state);
1372 1373
	int ret;

1374 1375
	if ((bringup && !sp->startup.single) ||
	    (!bringup && !sp->teardown.single))
1376 1377 1378 1379 1380
		return 0;
	/*
	 * The non AP bound callbacks can fail on bringup. On teardown
	 * e.g. module removal we crash for now.
	 */
1381 1382
#ifdef CONFIG_SMP
	if (cpuhp_is_ap_state(state))
1383
		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1384
	else
1385
		ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1386
#else
1387
	ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1388
#endif
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
	BUG_ON(ret && !bringup);
	return ret;
}

/*
 * Called from __cpuhp_setup_state on a recoverable failure.
 *
 * Note: The teardown callbacks for rollback are not allowed to fail!
 */
static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1399
				   struct hlist_node *node)
1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
{
	int cpu;

	/* Roll back the already executed steps on the other cpus */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpu >= failedcpu)
			break;

		/* Did we invoke the startup call on that cpu ? */
		if (cpustate >= state)
1413
			cpuhp_issue_call(cpu, state, false, node);
1414 1415 1416
	}
}

1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429
int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
			       bool invoke)
{
	struct cpuhp_step *sp;
	int cpu;
	int ret;

	sp = cpuhp_get_step(state);
	if (sp->multi_instance == false)
		return -EINVAL;

	get_online_cpus();

1430
	if (!invoke || !sp->startup.multi)
1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
		goto add_node;

	/*
	 * Try to call the startup callback for each present cpu
	 * depending on the hotplug state of the cpu.
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate < state)
			continue;

		ret = cpuhp_issue_call(cpu, state, true, node);
		if (ret) {
1446
			if (sp->teardown.multi)
1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462
				cpuhp_rollback_install(cpu, state, node);
			goto err;
		}
	}
add_node:
	ret = 0;
	mutex_lock(&cpuhp_state_mutex);
	hlist_add_head(node, &sp->list);
	mutex_unlock(&cpuhp_state_mutex);

err:
	put_online_cpus();
	return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);

1463 1464
/**
 * __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
1465 1466 1467 1468 1469 1470 1471
 * @state:		The state to setup
 * @invoke:		If true, the startup function is invoked for cpus where
 *			cpu state >= @state
 * @startup:		startup callback function
 * @teardown:		teardown callback function
 * @multi_instance:	State is set up for multiple instances which get
 *			added afterwards.
1472
 *
1473 1474 1475 1476 1477
 * Returns:
 *   On success:
 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
 *      0 for all other states
 *   On failure: proper (negative) error code
1478 1479 1480 1481
 */
int __cpuhp_setup_state(enum cpuhp_state state,
			const char *name, bool invoke,
			int (*startup)(unsigned int cpu),
1482 1483
			int (*teardown)(unsigned int cpu),
			bool multi_instance)
1484 1485
{
	int cpu, ret = 0;
1486
	bool dynstate;
1487 1488 1489 1490 1491 1492

	if (cpuhp_cb_check(state) || !name)
		return -EINVAL;

	get_online_cpus();

1493 1494
	ret = cpuhp_store_callbacks(state, name, startup, teardown,
				    multi_instance);
1495

1496 1497 1498 1499 1500 1501
	dynstate = state == CPUHP_AP_ONLINE_DYN;
	if (ret > 0 && dynstate) {
		state = ret;
		ret = 0;
	}

1502
	if (ret || !invoke || !startup)
1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
		goto out;

	/*
	 * Try to call the startup callback for each present cpu
	 * depending on the hotplug state of the cpu.
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate < state)
			continue;

1516
		ret = cpuhp_issue_call(cpu, state, true, NULL);
1517
		if (ret) {
1518
			if (teardown)
1519 1520
				cpuhp_rollback_install(cpu, state, NULL);
			cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1521 1522 1523 1524 1525
			goto out;
		}
	}
out:
	put_online_cpus();
1526 1527 1528 1529
	/*
	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
	 * dynamically allocated state in case of success.
	 */
1530
	if (!ret && dynstate)
1531 1532 1533 1534 1535
		return state;
	return ret;
}
EXPORT_SYMBOL(__cpuhp_setup_state);

1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571
int __cpuhp_state_remove_instance(enum cpuhp_state state,
				  struct hlist_node *node, bool invoke)
{
	struct cpuhp_step *sp = cpuhp_get_step(state);
	int cpu;

	BUG_ON(cpuhp_cb_check(state));

	if (!sp->multi_instance)
		return -EINVAL;

	get_online_cpus();
	if (!invoke || !cpuhp_get_teardown_cb(state))
		goto remove;
	/*
	 * Call the teardown callback for each present cpu depending
	 * on the hotplug state of the cpu. This function is not
	 * allowed to fail currently!
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate >= state)
			cpuhp_issue_call(cpu, state, false, node);
	}

remove:
	mutex_lock(&cpuhp_state_mutex);
	hlist_del(node);
	mutex_unlock(&cpuhp_state_mutex);
	put_online_cpus();

	return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
/**
 * __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
 * @state:	The state to remove
 * @invoke:	If true, the teardown function is invoked for cpus where
 *		cpu state >= @state
 *
 * The teardown callback is currently not allowed to fail. Think
 * about module removal!
 */
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
{
1583
	struct cpuhp_step *sp = cpuhp_get_step(state);
1584 1585 1586 1587 1588 1589
	int cpu;

	BUG_ON(cpuhp_cb_check(state));

	get_online_cpus();

1590 1591 1592 1593 1594 1595 1596
	if (sp->multi_instance) {
		WARN(!hlist_empty(&sp->list),
		     "Error: Removing state %d which has instances left.\n",
		     state);
		goto remove;
	}

1597
	if (!invoke || !cpuhp_get_teardown_cb(state))
1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609
		goto remove;

	/*
	 * Call the teardown callback for each present cpu depending
	 * on the hotplug state of the cpu. This function is not
	 * allowed to fail currently!
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate >= state)
1610
			cpuhp_issue_call(cpu, state, false, NULL);
1611 1612
	}
remove:
1613
	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1614 1615 1616 1617
	put_online_cpus();
}
EXPORT_SYMBOL(__cpuhp_remove_state);

1618 1619 1620 1621 1622 1623 1624 1625 1626 1627
#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
static ssize_t show_cpuhp_state(struct device *dev,
				struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->state);
}
static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);

1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667
static ssize_t write_cpuhp_target(struct device *dev,
				  struct device_attribute *attr,
				  const char *buf, size_t count)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
	struct cpuhp_step *sp;
	int target, ret;

	ret = kstrtoint(buf, 10, &target);
	if (ret)
		return ret;

#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
	if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
		return -EINVAL;
#else
	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
		return -EINVAL;
#endif

	ret = lock_device_hotplug_sysfs();
	if (ret)
		return ret;

	mutex_lock(&cpuhp_state_mutex);
	sp = cpuhp_get_step(target);
	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
	mutex_unlock(&cpuhp_state_mutex);
	if (ret)
		return ret;

	if (st->state < target)
		ret = do_cpu_up(dev->id, target);
	else
		ret = do_cpu_down(dev->id, target);

	unlock_device_hotplug();
	return ret ? ret : count;
}

1668 1669 1670 1671 1672 1673 1674
static ssize_t show_cpuhp_target(struct device *dev,
				 struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->target);
}
1675
static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695

static struct attribute *cpuhp_cpu_attrs[] = {
	&dev_attr_state.attr,
	&dev_attr_target.attr,
	NULL
};

static struct attribute_group cpuhp_cpu_attr_group = {
	.attrs = cpuhp_cpu_attrs,
	.name = "hotplug",
	NULL
};

static ssize_t show_cpuhp_states(struct device *dev,
				 struct device_attribute *attr, char *buf)
{
	ssize_t cur, res = 0;
	int i;

	mutex_lock(&cpuhp_state_mutex);
1696
	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743
		struct cpuhp_step *sp = cpuhp_get_step(i);

		if (sp->name) {
			cur = sprintf(buf, "%3d: %s\n", i, sp->name);
			buf += cur;
			res += cur;
		}
	}
	mutex_unlock(&cpuhp_state_mutex);
	return res;
}
static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);

static struct attribute *cpuhp_cpu_root_attrs[] = {
	&dev_attr_states.attr,
	NULL
};

static struct attribute_group cpuhp_cpu_root_attr_group = {
	.attrs = cpuhp_cpu_root_attrs,
	.name = "hotplug",
	NULL
};

static int __init cpuhp_sysfs_init(void)
{
	int cpu, ret;

	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
				 &cpuhp_cpu_root_attr_group);
	if (ret)
		return ret;

	for_each_possible_cpu(cpu) {
		struct device *dev = get_cpu_device(cpu);

		if (!dev)
			continue;
		ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
		if (ret)
			return ret;
	}
	return 0;
}
device_initcall(cpuhp_sysfs_init);
#endif

1744 1745 1746 1747
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
1748
 * It is used by cpumask_of() to get a constant address to a CPU
1749 1750
 * mask value that has a single bit set only.
 */
1751

1752
/* cpu_bit_bitmap[0] is empty - so we can back into it */
1753
#define MASK_DECLARE_1(x)	[x+1][0] = (1UL << (x))
1754 1755 1756
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1757

1758 1759 1760 1761 1762 1763 1764
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
1765 1766
#endif
};
1767
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1768 1769 1770

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
1771 1772

#ifdef CONFIG_INIT_ALL_POSSIBLE
1773
struct cpumask __cpu_possible_mask __read_mostly
1774
	= {CPU_BITS_ALL};
1775
#else
1776
struct cpumask __cpu_possible_mask __read_mostly;
1777
#endif
1778
EXPORT_SYMBOL(__cpu_possible_mask);
1779

1780 1781
struct cpumask __cpu_online_mask __read_mostly;
EXPORT_SYMBOL(__cpu_online_mask);
1782

1783 1784
struct cpumask __cpu_present_mask __read_mostly;
EXPORT_SYMBOL(__cpu_present_mask);
1785

1786 1787
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
1788 1789 1790

void init_cpu_present(const struct cpumask *src)
{
1791
	cpumask_copy(&__cpu_present_mask, src);
1792 1793 1794 1795
}

void init_cpu_possible(const struct cpumask *src)
{
1796
	cpumask_copy(&__cpu_possible_mask, src);
1797 1798 1799 1800
}

void init_cpu_online(const struct cpumask *src)
{
1801
	cpumask_copy(&__cpu_online_mask, src);
1802
}
1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824

/*
 * Activate the first processor.
 */
void __init boot_cpu_init(void)
{
	int cpu = smp_processor_id();

	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
	set_cpu_online(cpu, true);
	set_cpu_active(cpu, true);
	set_cpu_present(cpu, true);
	set_cpu_possible(cpu, true);
}

/*
 * Must be called _AFTER_ setting up the per_cpu areas
 */
void __init boot_cpu_state_init(void)
{
	per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
}