cpu.c 43.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
10
#include <linux/sched/signal.h>
11
#include <linux/sched/hotplug.h>
12
#include <linux/sched/task.h>
L
Linus Torvalds 已提交
13 14
#include <linux/unistd.h>
#include <linux/cpu.h>
15 16
#include <linux/oom.h>
#include <linux/rcupdate.h>
17
#include <linux/export.h>
18
#include <linux/bug.h>
L
Linus Torvalds 已提交
19 20
#include <linux/kthread.h>
#include <linux/stop_machine.h>
21
#include <linux/mutex.h>
22
#include <linux/gfp.h>
23
#include <linux/suspend.h>
24
#include <linux/lockdep.h>
25
#include <linux/tick.h>
26
#include <linux/irq.h>
27
#include <linux/smpboot.h>
28
#include <linux/relay.h>
29
#include <linux/slab.h>
30
#include <linux/percpu-rwsem.h>
31

32
#include <trace/events/power.h>
33 34
#define CREATE_TRACE_POINTS
#include <trace/events/cpuhp.h>
L
Linus Torvalds 已提交
35

36 37
#include "smpboot.h"

38 39 40 41
/**
 * cpuhp_cpu_state - Per cpu hotplug state storage
 * @state:	The current cpu state
 * @target:	The target state
42 43
 * @thread:	Pointer to the hotplug thread
 * @should_run:	Thread should execute
44
 * @rollback:	Perform a rollback
45 46 47
 * @single:	Single callback invocation
 * @bringup:	Single callback bringup or teardown selector
 * @cb_state:	The state for a single callback (install/uninstall)
48 49
 * @result:	Result of the operation
 * @done:	Signal completion to the issuer of the task
50 51 52 53
 */
struct cpuhp_cpu_state {
	enum cpuhp_state	state;
	enum cpuhp_state	target;
54 55 56
#ifdef CONFIG_SMP
	struct task_struct	*thread;
	bool			should_run;
57
	bool			rollback;
58 59
	bool			single;
	bool			bringup;
60
	struct hlist_node	*node;
61 62 63 64
	enum cpuhp_state	cb_state;
	int			result;
	struct completion	done;
#endif
65 66 67 68
};

static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);

69 70 71 72 73 74
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
static struct lock_class_key cpuhp_state_key;
static struct lockdep_map cpuhp_state_lock_map =
	STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
#endif

75 76 77 78 79 80 81
/**
 * cpuhp_step - Hotplug state machine step
 * @name:	Name of the step
 * @startup:	Startup function of the step
 * @teardown:	Teardown function of the step
 * @skip_onerr:	Do not invoke the functions on error rollback
 *		Will go away once the notifiers	are gone
82
 * @cant_stop:	Bringup/teardown can't be stopped at this step
83 84
 */
struct cpuhp_step {
85 86
	const char		*name;
	union {
87 88 89 90
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} startup;
91
	union {
92 93 94 95
		int		(*single)(unsigned int cpu);
		int		(*multi)(unsigned int cpu,
					 struct hlist_node *node);
	} teardown;
96 97 98 99
	struct hlist_head	list;
	bool			skip_onerr;
	bool			cant_stop;
	bool			multi_instance;
100 101
};

102
static DEFINE_MUTEX(cpuhp_state_mutex);
103
static struct cpuhp_step cpuhp_bp_states[];
104
static struct cpuhp_step cpuhp_ap_states[];
105

106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
static bool cpuhp_is_ap_state(enum cpuhp_state state)
{
	/*
	 * The extra check for CPUHP_TEARDOWN_CPU is only for documentation
	 * purposes as that state is handled explicitly in cpu_down.
	 */
	return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU;
}

static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
{
	struct cpuhp_step *sp;

	sp = cpuhp_is_ap_state(state) ? cpuhp_ap_states : cpuhp_bp_states;
	return sp + state;
}

123 124 125 126
/**
 * cpuhp_invoke_callback _ Invoke the callbacks for a given state
 * @cpu:	The cpu for which the callback should be invoked
 * @step:	The step in the state machine
127
 * @bringup:	True if the bringup callback should be invoked
128
 *
129
 * Called from cpu hotplug and from the state register machinery.
130
 */
131
static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
132
				 bool bringup, struct hlist_node *node)
133 134
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
135
	struct cpuhp_step *step = cpuhp_get_step(state);
136 137 138 139 140
	int (*cbm)(unsigned int cpu, struct hlist_node *node);
	int (*cb)(unsigned int cpu);
	int ret, cnt;

	if (!step->multi_instance) {
141
		cb = bringup ? step->startup.single : step->teardown.single;
142 143
		if (!cb)
			return 0;
144
		trace_cpuhp_enter(cpu, st->target, state, cb);
145
		ret = cb(cpu);
146
		trace_cpuhp_exit(cpu, st->state, state, ret);
147 148
		return ret;
	}
149
	cbm = bringup ? step->startup.multi : step->teardown.multi;
150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
	if (!cbm)
		return 0;

	/* Single invocation for instance add/remove */
	if (node) {
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		return ret;
	}

	/* State transition. Invoke on all instances */
	cnt = 0;
	hlist_for_each(node, &step->list) {
		trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node);
		ret = cbm(cpu, node);
		trace_cpuhp_exit(cpu, st->state, state, ret);
		if (ret)
			goto err;
		cnt++;
	}
	return 0;
err:
	/* Rollback the instances if one failed */
174
	cbm = !bringup ? step->startup.multi : step->teardown.multi;
175 176 177 178 179 180 181
	if (!cbm)
		return ret;

	hlist_for_each(node, &step->list) {
		if (!cnt--)
			break;
		cbm(cpu, node);
182 183 184 185
	}
	return ret;
}

186
#ifdef CONFIG_SMP
187
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
188
static DEFINE_MUTEX(cpu_add_remove_lock);
189 190
bool cpuhp_tasks_frozen;
EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen);
L
Linus Torvalds 已提交
191

192
/*
193 194
 * The following two APIs (cpu_maps_update_begin/done) must be used when
 * attempting to serialize the updates to cpu_online_mask & cpu_present_mask.
195 196 197 198 199 200 201 202 203 204
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}
L
Linus Torvalds 已提交
205

206 207
/*
 * If set, cpu_up and cpu_down will return -EBUSY and do nothing.
208 209 210 211
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

212 213
#ifdef CONFIG_HOTPLUG_CPU

214
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
215

216
void cpus_read_lock(void)
217
{
218
	percpu_down_read(&cpu_hotplug_lock);
219
}
220
EXPORT_SYMBOL_GPL(cpus_read_lock);
221

222
void cpus_read_unlock(void)
223
{
224
	percpu_up_read(&cpu_hotplug_lock);
225
}
226
EXPORT_SYMBOL_GPL(cpus_read_unlock);
227

228
void cpus_write_lock(void)
229
{
230
	percpu_down_write(&cpu_hotplug_lock);
231 232
}

233
void cpus_write_unlock(void)
234
{
235 236 237 238 239 240
	percpu_up_write(&cpu_hotplug_lock);
}

void lockdep_assert_cpus_held(void)
{
	percpu_rwsem_assert_held(&cpu_hotplug_lock);
241
}
242

243 244 245 246 247 248 249 250 251 252
/*
 * Wait for currently running CPU hotplug operations to complete (if any) and
 * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects
 * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the
 * hotplug path before performing hotplug operations. So acquiring that lock
 * guarantees mutual exclusion from any currently running hotplug operations.
 */
void cpu_hotplug_disable(void)
{
	cpu_maps_update_begin();
253
	cpu_hotplug_disabled++;
254 255
	cpu_maps_update_done();
}
256
EXPORT_SYMBOL_GPL(cpu_hotplug_disable);
257

258 259 260 261 262 263 264
static void __cpu_hotplug_enable(void)
{
	if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n"))
		return;
	cpu_hotplug_disabled--;
}

265 266 267
void cpu_hotplug_enable(void)
{
	cpu_maps_update_begin();
268
	__cpu_hotplug_enable();
269 270
	cpu_maps_update_done();
}
271
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
272
#endif	/* CONFIG_HOTPLUG_CPU */
273

274 275 276 277 278 279 280 281
static int bringup_wait_for_ap(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	wait_for_completion(&st->done);
	return st->result;
}

282 283 284 285 286
static int bringup_cpu(unsigned int cpu)
{
	struct task_struct *idle = idle_thread_get(cpu);
	int ret;

287 288 289 290 291 292 293
	/*
	 * Some architectures have to walk the irq descriptors to
	 * setup the vector space for the cpu which comes online.
	 * Prevent irq alloc/free across the bringup.
	 */
	irq_lock_sparse();

294 295
	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu, idle);
296
	irq_unlock_sparse();
297
	if (ret)
298
		return ret;
299
	ret = bringup_wait_for_ap(cpu);
300
	BUG_ON(!cpu_online(cpu));
301
	return ret;
302 303
}

304 305 306
/*
 * Hotplug state machine related functions
 */
307
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
308 309
{
	for (st->state++; st->state < st->target; st->state++) {
310
		struct cpuhp_step *step = cpuhp_get_step(st->state);
311 312

		if (!step->skip_onerr)
313
			cpuhp_invoke_callback(cpu, st->state, true, NULL);
314 315 316 317
	}
}

static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
318
				enum cpuhp_state target)
319 320 321 322 323
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	for (; st->state > target; st->state--) {
324
		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL);
325 326
		if (ret) {
			st->target = prev_state;
327
			undo_cpu_down(cpu, st);
328 329 330 331 332 333
			break;
		}
	}
	return ret;
}

334
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st)
335 336
{
	for (st->state--; st->state > st->target; st->state--) {
337
		struct cpuhp_step *step = cpuhp_get_step(st->state);
338 339

		if (!step->skip_onerr)
340
			cpuhp_invoke_callback(cpu, st->state, false, NULL);
341 342 343 344
	}
}

static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
345
			      enum cpuhp_state target)
346 347 348 349 350 351
{
	enum cpuhp_state prev_state = st->state;
	int ret = 0;

	while (st->state < target) {
		st->state++;
352
		ret = cpuhp_invoke_callback(cpu, st->state, true, NULL);
353 354
		if (ret) {
			st->target = prev_state;
355
			undo_cpu_up(cpu, st);
356 357 358 359 360 361
			break;
		}
	}
	return ret;
}

362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
/*
 * The cpu hotplug threads manage the bringup and teardown of the cpus
 */
static void cpuhp_create(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	init_completion(&st->done);
}

static int cpuhp_should_run(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	return st->should_run;
}

/* Execute the teardown callbacks. Used to be CPU_DOWN_PREPARE */
static int cpuhp_ap_offline(unsigned int cpu, struct cpuhp_cpu_state *st)
{
382
	enum cpuhp_state target = max((int)st->target, CPUHP_TEARDOWN_CPU);
383

384
	return cpuhp_down_callbacks(cpu, st, target);
385 386 387 388 389
}

/* Execute the online startup callbacks. Used to be CPU_ONLINE */
static int cpuhp_ap_online(unsigned int cpu, struct cpuhp_cpu_state *st)
{
390
	return cpuhp_up_callbacks(cpu, st, st->target);
391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
}

/*
 * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke
 * callbacks when a state gets [un]installed at runtime.
 */
static void cpuhp_thread_fun(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	int ret = 0;

	/*
	 * Paired with the mb() in cpuhp_kick_ap_work and
	 * cpuhp_invoke_ap_callback, so the work set is consistent visible.
	 */
	smp_mb();
	if (!st->should_run)
		return;

	st->should_run = false;

412
	lock_map_acquire(&cpuhp_state_lock_map);
413
	/* Single callback invocation for [un]install ? */
414
	if (st->single) {
415 416
		if (st->cb_state < CPUHP_AP_ONLINE) {
			local_irq_disable();
417
			ret = cpuhp_invoke_callback(cpu, st->cb_state,
418
						    st->bringup, st->node);
419 420
			local_irq_enable();
		} else {
421
			ret = cpuhp_invoke_callback(cpu, st->cb_state,
422
						    st->bringup, st->node);
423
		}
424 425 426
	} else if (st->rollback) {
		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);

427
		undo_cpu_down(cpu, st);
428
		st->rollback = false;
429
	} else {
430
		/* Cannot happen .... */
431
		BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
432

433 434 435 436 437 438
		/* Regular hotplug work */
		if (st->state < st->target)
			ret = cpuhp_ap_online(cpu, st);
		else if (st->state > st->target)
			ret = cpuhp_ap_offline(cpu, st);
	}
439
	lock_map_release(&cpuhp_state_lock_map);
440 441 442 443 444
	st->result = ret;
	complete(&st->done);
}

/* Invoke a single callback on a remote cpu */
445
static int
446 447
cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
			 struct hlist_node *node)
448 449 450 451 452 453
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);

	if (!cpu_online(cpu))
		return 0;

454 455 456
	lock_map_acquire(&cpuhp_state_lock_map);
	lock_map_release(&cpuhp_state_lock_map);

457 458 459 460 461
	/*
	 * If we are up and running, use the hotplug thread. For early calls
	 * we invoke the thread function directly.
	 */
	if (!st->thread)
462
		return cpuhp_invoke_callback(cpu, state, bringup, node);
463

464
	st->cb_state = state;
465 466
	st->single = true;
	st->bringup = bringup;
467
	st->node = node;
468

469 470 471 472 473 474 475 476 477 478 479 480
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
	wait_for_completion(&st->done);
	return st->result;
}

/* Regular hotplug invocation of the AP hotplug thread */
481
static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st)
482 483
{
	st->result = 0;
484
	st->single = false;
485 486 487 488 489 490 491
	/*
	 * Make sure the above stores are visible before should_run becomes
	 * true. Paired with the mb() above in cpuhp_thread_fun()
	 */
	smp_mb();
	st->should_run = true;
	wake_up_process(st->thread);
492 493 494 495 496 497 498 499
}

static int cpuhp_kick_ap_work(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state state = st->state;

	trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
500 501
	lock_map_acquire(&cpuhp_state_lock_map);
	lock_map_release(&cpuhp_state_lock_map);
502
	__cpuhp_kick_ap_work(st);
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
	wait_for_completion(&st->done);
	trace_cpuhp_exit(cpu, st->state, state, st->result);
	return st->result;
}

static struct smp_hotplug_thread cpuhp_threads = {
	.store			= &cpuhp_state.thread,
	.create			= &cpuhp_create,
	.thread_should_run	= cpuhp_should_run,
	.thread_fn		= cpuhp_thread_fun,
	.thread_comm		= "cpuhp/%u",
	.selfparking		= true,
};

void __init cpuhp_threads_init(void)
{
	BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads));
	kthread_unpark(this_cpu_read(cpuhp_state.thread));
}

523
#ifdef CONFIG_HOTPLUG_CPU
524 525 526 527 528 529 530 531 532 533 534 535
/**
 * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU
 * @cpu: a CPU id
 *
 * This function walks all processes, finds a valid mm struct for each one and
 * then clears a corresponding bit in mm's cpumask.  While this all sounds
 * trivial, there are various non-obvious corner cases, which this function
 * tries to solve in a safe manner.
 *
 * Also note that the function uses a somewhat relaxed locking scheme, so it may
 * be called only for an already offlined CPU.
 */
536 537 538 539 540 541 542 543 544 545 546
void clear_tasks_mm_cpumask(int cpu)
{
	struct task_struct *p;

	/*
	 * This function is called after the cpu is taken down and marked
	 * offline, so its not like new tasks will ever get this cpu set in
	 * their mm mask. -- Peter Zijlstra
	 * Thus, we may use rcu_read_lock() here, instead of grabbing
	 * full-fledged tasklist_lock.
	 */
547
	WARN_ON(cpu_online(cpu));
548 549 550 551
	rcu_read_lock();
	for_each_process(p) {
		struct task_struct *t;

552 553 554 555
		/*
		 * Main thread might exit, but other threads may still have
		 * a valid mm. Find one.
		 */
556 557 558 559 560 561 562 563 564
		t = find_lock_task_mm(p);
		if (!t)
			continue;
		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
		task_unlock(t);
	}
	rcu_read_unlock();
}

K
Kirill Tkhai 已提交
565
static inline void check_for_tasks(int dead_cpu)
L
Linus Torvalds 已提交
566
{
K
Kirill Tkhai 已提交
567
	struct task_struct *g, *p;
L
Linus Torvalds 已提交
568

569 570
	read_lock(&tasklist_lock);
	for_each_process_thread(g, p) {
K
Kirill Tkhai 已提交
571 572 573 574 575 576 577 578 579 580 581 582 583 584
		if (!p->on_rq)
			continue;
		/*
		 * We do the check with unlocked task_rq(p)->lock.
		 * Order the reading to do not warn about a task,
		 * which was running on this cpu in the past, and
		 * it's just been woken on another cpu.
		 */
		rmb();
		if (task_cpu(p) != dead_cpu)
			continue;

		pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
			p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
585 586
	}
	read_unlock(&tasklist_lock);
L
Linus Torvalds 已提交
587 588 589
}

/* Take this CPU down. */
590
static int take_cpu_down(void *_param)
L
Linus Torvalds 已提交
591
{
592 593
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE);
594
	int err, cpu = smp_processor_id();
L
Linus Torvalds 已提交
595 596 597 598

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
599
		return err;
L
Linus Torvalds 已提交
600

601 602 603 604 605 606
	/*
	 * We get here while we are in CPUHP_TEARDOWN_CPU state and we must not
	 * do this step again.
	 */
	WARN_ON(st->state != CPUHP_TEARDOWN_CPU);
	st->state--;
607
	/* Invoke the former CPU_DYING callbacks */
608
	for (; st->state > target; st->state--)
609
		cpuhp_invoke_callback(cpu, st->state, false, NULL);
610

611 612
	/* Give up timekeeping duties */
	tick_handover_do_timer();
613
	/* Park the stopper thread */
614
	stop_machine_park(cpu);
Z
Zwane Mwaikambo 已提交
615
	return 0;
L
Linus Torvalds 已提交
616 617
}

618
static int takedown_cpu(unsigned int cpu)
L
Linus Torvalds 已提交
619
{
620
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
621
	int err;
L
Linus Torvalds 已提交
622

623
	/* Park the smpboot threads */
624
	kthread_park(per_cpu_ptr(&cpuhp_state, cpu)->thread);
625
	smpboot_park_threads(cpu);
626

627
	/*
628 629
	 * Prevent irq alloc/free while the dying cpu reorganizes the
	 * interrupt affinities.
630
	 */
631
	irq_lock_sparse();
632

633 634 635
	/*
	 * So now all preempt/rcu users must observe !cpu_active().
	 */
636
	err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
637
	if (err) {
638
		/* CPU refused to die */
639
		irq_unlock_sparse();
640 641
		/* Unpark the hotplug thread so we can rollback there */
		kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
642
		return err;
643
	}
644
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
645

646
	/*
647
	 * The CPUHP_AP_SCHED_MIGRATE_DYING callback will have removed all
648 649
	 * runnable tasks from the cpu, there's only the idle task left now
	 * that the migration thread is done doing the stop_machine thing.
P
Peter Zijlstra 已提交
650 651
	 *
	 * Wait for the stop thread to go away.
652
	 */
653 654
	wait_for_completion(&st->done);
	BUG_ON(st->state != CPUHP_AP_IDLE_DEAD);
L
Linus Torvalds 已提交
655

656 657 658
	/* Interrupts are moved away from the dying cpu, reenable alloc/free */
	irq_unlock_sparse();

659
	hotplug_cpu__broadcast_tick_pull(cpu);
L
Linus Torvalds 已提交
660 661 662
	/* This actually kills the CPU. */
	__cpu_die(cpu);

663
	tick_cleanup_dead_cpu(cpu);
664 665
	return 0;
}
L
Linus Torvalds 已提交
666

667 668 669 670 671 672 673
static void cpuhp_complete_idle_dead(void *arg)
{
	struct cpuhp_cpu_state *st = arg;

	complete(&st->done);
}

674 675 676 677 678
void cpuhp_report_idle_dead(void)
{
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);

	BUG_ON(st->state != CPUHP_AP_OFFLINE);
679
	rcu_report_dead(smp_processor_id());
680 681 682 683 684 685 686
	st->state = CPUHP_AP_IDLE_DEAD;
	/*
	 * We cannot call complete after rcu_report_dead() so we delegate it
	 * to an online cpu.
	 */
	smp_call_function_single(cpumask_first(cpu_online_mask),
				 cpuhp_complete_idle_dead, st, 0);
687 688
}

689 690 691 692 693 694
#else
#define takedown_cpu		NULL
#endif

#ifdef CONFIG_HOTPLUG_CPU

695
/* Requires cpu_add_remove_lock to be held */
696 697
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
			   enum cpuhp_state target)
698
{
699 700
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	int prev_state, ret = 0;
701 702 703 704

	if (num_online_cpus() == 1)
		return -EBUSY;

705
	if (!cpu_present(cpu))
706 707
		return -EINVAL;

708
	cpus_write_lock();
709 710 711

	cpuhp_tasks_frozen = tasks_frozen;

712
	prev_state = st->state;
713
	st->target = target;
714 715 716 717
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread.
	 */
718
	if (st->state > CPUHP_TEARDOWN_CPU) {
719 720 721 722 723 724 725 726 727 728 729 730
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;

		/*
		 * We might have stopped still in the range of the AP hotplug
		 * thread. Nothing to do anymore.
		 */
731
		if (st->state > CPUHP_TEARDOWN_CPU)
732 733 734
			goto out;
	}
	/*
735
	 * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need
736 737
	 * to do the further cleanups.
	 */
738
	ret = cpuhp_down_callbacks(cpu, st, target);
739 740 741 742 743
	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
		st->target = prev_state;
		st->rollback = true;
		cpuhp_kick_ap_work(cpu);
	}
744

745
out:
746
	cpus_write_unlock();
747
	return ret;
748 749
}

750
static int do_cpu_down(unsigned int cpu, enum cpuhp_state target)
751
{
752
	int err;
753

754
	cpu_maps_update_begin();
755 756

	if (cpu_hotplug_disabled) {
757
		err = -EBUSY;
758 759 760
		goto out;
	}

761
	err = _cpu_down(cpu, 0, target);
762

763
out:
764
	cpu_maps_update_done();
L
Linus Torvalds 已提交
765 766
	return err;
}
767 768 769 770
int cpu_down(unsigned int cpu)
{
	return do_cpu_down(cpu, CPUHP_OFFLINE);
}
771
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
772 773
#endif /*CONFIG_HOTPLUG_CPU*/

774
/**
775
 * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU
776 777 778 779 780 781 782 783 784 785
 * @cpu: cpu that just started
 *
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
void notify_cpu_starting(unsigned int cpu)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
	enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE);

786
	rcu_cpu_starting(cpu);	/* Enables RCU usage on this CPU. */
787 788
	while (st->state < target) {
		st->state++;
789
		cpuhp_invoke_callback(cpu, st->state, true, NULL);
790 791 792
	}
}

793 794
/*
 * Called from the idle task. We need to set active here, so we can kick off
795 796 797
 * the stopper thread and unpark the smpboot threads. If the target state is
 * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the
 * cpu further.
798
 */
799
void cpuhp_online_idle(enum cpuhp_state state)
800
{
801 802 803 804 805 806 807 808
	struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
	unsigned int cpu = smp_processor_id();

	/* Happens for the boot cpu */
	if (state != CPUHP_AP_ONLINE_IDLE)
		return;

	st->state = CPUHP_AP_ONLINE_IDLE;
809

810
	/* Unpark the stopper thread and the hotplug thread of this cpu */
811
	stop_machine_unpark(cpu);
812
	kthread_unpark(st->thread);
813 814 815 816 817 818

	/* Should we go further up ? */
	if (st->target > CPUHP_AP_ONLINE_IDLE)
		__cpuhp_kick_ap_work(st);
	else
		complete(&st->done);
819 820
}

821
/* Requires cpu_add_remove_lock to be held */
822
static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
L
Linus Torvalds 已提交
823
{
824
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
825
	struct task_struct *idle;
826
	int ret = 0;
L
Linus Torvalds 已提交
827

828
	cpus_write_lock();
829

830
	if (!cpu_present(cpu)) {
831 832 833 834
		ret = -EINVAL;
		goto out;
	}

835 836 837 838 839
	/*
	 * The caller of do_cpu_up might have raced with another
	 * caller. Ignore it for now.
	 */
	if (st->state >= target)
840
		goto out;
841 842 843 844 845 846 847 848

	if (st->state == CPUHP_OFFLINE) {
		/* Let it fail before we try to bring the cpu up */
		idle = idle_thread_get(cpu);
		if (IS_ERR(idle)) {
			ret = PTR_ERR(idle);
			goto out;
		}
849
	}
850

851 852
	cpuhp_tasks_frozen = tasks_frozen;

853
	st->target = target;
854 855 856 857
	/*
	 * If the current CPU state is in the range of the AP hotplug thread,
	 * then we need to kick the thread once more.
	 */
858
	if (st->state > CPUHP_BRINGUP_CPU) {
859 860 861 862 863 864 865 866 867 868 869
		ret = cpuhp_kick_ap_work(cpu);
		/*
		 * The AP side has done the error rollback already. Just
		 * return the error code..
		 */
		if (ret)
			goto out;
	}

	/*
	 * Try to reach the target state. We max out on the BP at
870
	 * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is
871 872
	 * responsible for bringing it up to the target state.
	 */
873
	target = min((int)target, CPUHP_BRINGUP_CPU);
874
	ret = cpuhp_up_callbacks(cpu, st, target);
875
out:
876
	cpus_write_unlock();
877 878 879
	return ret;
}

880
static int do_cpu_up(unsigned int cpu, enum cpuhp_state target)
881 882
{
	int err = 0;
883

R
Rusty Russell 已提交
884
	if (!cpu_possible(cpu)) {
885 886
		pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n",
		       cpu);
887
#if defined(CONFIG_IA64)
888
		pr_err("please check additional_cpus= boot parameter\n");
889 890 891
#endif
		return -EINVAL;
	}
892

893 894 895
	err = try_online_node(cpu_to_node(cpu));
	if (err)
		return err;
896

897
	cpu_maps_update_begin();
898 899

	if (cpu_hotplug_disabled) {
900
		err = -EBUSY;
901 902 903
		goto out;
	}

904
	err = _cpu_up(cpu, 0, target);
905
out:
906
	cpu_maps_update_done();
907 908
	return err;
}
909 910 911 912 913

int cpu_up(unsigned int cpu)
{
	return do_cpu_up(cpu, CPUHP_ONLINE);
}
P
Paul E. McKenney 已提交
914
EXPORT_SYMBOL_GPL(cpu_up);
915

916
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
917
static cpumask_var_t frozen_cpus;
918

919
int freeze_secondary_cpus(int primary)
920
{
921
	int cpu, error = 0;
922

923
	cpu_maps_update_begin();
924 925
	if (!cpu_online(primary))
		primary = cpumask_first(cpu_online_mask);
926 927
	/*
	 * We take down all of the non-boot CPUs in one shot to avoid races
928 929
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
930
	cpumask_clear(frozen_cpus);
931

932
	pr_info("Disabling non-boot CPUs ...\n");
933
	for_each_online_cpu(cpu) {
934
		if (cpu == primary)
935
			continue;
936
		trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
937
		error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
938
		trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
939
		if (!error)
R
Rusty Russell 已提交
940
			cpumask_set_cpu(cpu, frozen_cpus);
941
		else {
942
			pr_err("Error taking CPU%d down: %d\n", cpu, error);
943 944 945
			break;
		}
	}
946

947
	if (!error)
948
		BUG_ON(num_online_cpus() > 1);
949
	else
950
		pr_err("Non-boot CPUs are not disabled\n");
951 952 953 954 955 956 957 958

	/*
	 * Make sure the CPUs won't be enabled by someone else. We need to do
	 * this even in case of failure as all disable_nonboot_cpus() users are
	 * supposed to do enable_nonboot_cpus() on the failure path.
	 */
	cpu_hotplug_disabled++;

959
	cpu_maps_update_done();
960 961 962
	return error;
}

963 964 965 966 967 968 969 970
void __weak arch_enable_nonboot_cpus_begin(void)
{
}

void __weak arch_enable_nonboot_cpus_end(void)
{
}

971
void enable_nonboot_cpus(void)
972 973 974 975
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
976
	cpu_maps_update_begin();
977
	__cpu_hotplug_enable();
R
Rusty Russell 已提交
978
	if (cpumask_empty(frozen_cpus))
979
		goto out;
980

981
	pr_info("Enabling non-boot CPUs ...\n");
982 983 984

	arch_enable_nonboot_cpus_begin();

R
Rusty Russell 已提交
985
	for_each_cpu(cpu, frozen_cpus) {
986
		trace_suspend_resume(TPS("CPU_ON"), cpu, true);
987
		error = _cpu_up(cpu, 1, CPUHP_ONLINE);
988
		trace_suspend_resume(TPS("CPU_ON"), cpu, false);
989
		if (!error) {
990
			pr_info("CPU%d is up\n", cpu);
991 992
			continue;
		}
993
		pr_warn("Error taking CPU%d up: %d\n", cpu, error);
994
	}
995 996 997

	arch_enable_nonboot_cpus_end();

R
Rusty Russell 已提交
998
	cpumask_clear(frozen_cpus);
999
out:
1000
	cpu_maps_update_done();
L
Linus Torvalds 已提交
1001
}
R
Rusty Russell 已提交
1002

1003
static int __init alloc_frozen_cpus(void)
R
Rusty Russell 已提交
1004 1005 1006 1007 1008 1009
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029

/*
 * When callbacks for CPU hotplug notifications are being executed, we must
 * ensure that the state of the system with respect to the tasks being frozen
 * or not, as reported by the notification, remains unchanged *throughout the
 * duration* of the execution of the callbacks.
 * Hence we need to prevent the freezer from racing with regular CPU hotplug.
 *
 * This synchronization is implemented by mutually excluding regular CPU
 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/
 * Hibernate notifications.
 */
static int
cpu_hotplug_pm_callback(struct notifier_block *nb,
			unsigned long action, void *ptr)
{
	switch (action) {

	case PM_SUSPEND_PREPARE:
	case PM_HIBERNATION_PREPARE:
1030
		cpu_hotplug_disable();
1031 1032 1033 1034
		break;

	case PM_POST_SUSPEND:
	case PM_POST_HIBERNATION:
1035
		cpu_hotplug_enable();
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045
		break;

	default:
		return NOTIFY_DONE;
	}

	return NOTIFY_OK;
}


1046
static int __init cpu_hotplug_pm_sync_init(void)
1047
{
1048 1049 1050 1051 1052
	/*
	 * cpu_hotplug_pm_callback has higher priority than x86
	 * bsp_pm_callback which depends on cpu_hotplug_pm_callback
	 * to disable cpu hotplug to avoid cpu hotplug race.
	 */
1053 1054 1055 1056 1057
	pm_notifier(cpu_hotplug_pm_callback, 0);
	return 0;
}
core_initcall(cpu_hotplug_pm_sync_init);

1058
#endif /* CONFIG_PM_SLEEP_SMP */
1059

1060 1061
int __boot_cpu_id;

1062
#endif /* CONFIG_SMP */
1063

1064 1065 1066 1067
/* Boot processor state steps */
static struct cpuhp_step cpuhp_bp_states[] = {
	[CPUHP_OFFLINE] = {
		.name			= "offline",
1068 1069
		.startup.single		= NULL,
		.teardown.single	= NULL,
1070 1071 1072
	},
#ifdef CONFIG_SMP
	[CPUHP_CREATE_THREADS]= {
1073
		.name			= "threads:prepare",
1074 1075
		.startup.single		= smpboot_create_threads,
		.teardown.single	= NULL,
1076
		.cant_stop		= true,
1077
	},
1078
	[CPUHP_PERF_PREPARE] = {
1079 1080 1081
		.name			= "perf:prepare",
		.startup.single		= perf_event_init_cpu,
		.teardown.single	= perf_event_exit_cpu,
1082
	},
1083
	[CPUHP_WORKQUEUE_PREP] = {
1084 1085 1086
		.name			= "workqueue:prepare",
		.startup.single		= workqueue_prepare_cpu,
		.teardown.single	= NULL,
1087
	},
1088
	[CPUHP_HRTIMERS_PREPARE] = {
1089 1090 1091
		.name			= "hrtimers:prepare",
		.startup.single		= hrtimers_prepare_cpu,
		.teardown.single	= hrtimers_dead_cpu,
1092
	},
1093
	[CPUHP_SMPCFD_PREPARE] = {
1094
		.name			= "smpcfd:prepare",
1095 1096
		.startup.single		= smpcfd_prepare_cpu,
		.teardown.single	= smpcfd_dead_cpu,
1097
	},
1098 1099 1100 1101 1102
	[CPUHP_RELAY_PREPARE] = {
		.name			= "relay:prepare",
		.startup.single		= relay_prepare_cpu,
		.teardown.single	= NULL,
	},
1103 1104 1105 1106
	[CPUHP_SLAB_PREPARE] = {
		.name			= "slab:prepare",
		.startup.single		= slab_prepare_cpu,
		.teardown.single	= slab_dead_cpu,
1107
	},
1108
	[CPUHP_RCUTREE_PREP] = {
1109
		.name			= "RCU/tree:prepare",
1110 1111
		.startup.single		= rcutree_prepare_cpu,
		.teardown.single	= rcutree_dead_cpu,
1112
	},
1113 1114 1115 1116 1117 1118
	/*
	 * On the tear-down path, timers_dead_cpu() must be invoked
	 * before blk_mq_queue_reinit_notify() from notify_dead(),
	 * otherwise a RCU stall occurs.
	 */
	[CPUHP_TIMERS_DEAD] = {
1119 1120 1121
		.name			= "timers:dead",
		.startup.single		= NULL,
		.teardown.single	= timers_dead_cpu,
1122
	},
1123
	/* Kicks the plugged cpu into life */
1124 1125
	[CPUHP_BRINGUP_CPU] = {
		.name			= "cpu:bringup",
1126 1127
		.startup.single		= bringup_cpu,
		.teardown.single	= NULL,
1128
		.cant_stop		= true,
1129
	},
1130
	[CPUHP_AP_SMPCFD_DYING] = {
1131
		.name			= "smpcfd:dying",
1132 1133
		.startup.single		= NULL,
		.teardown.single	= smpcfd_dying_cpu,
1134
	},
1135 1136 1137 1138
	/*
	 * Handled on controll processor until the plugged processor manages
	 * this itself.
	 */
1139 1140
	[CPUHP_TEARDOWN_CPU] = {
		.name			= "cpu:teardown",
1141 1142
		.startup.single		= NULL,
		.teardown.single	= takedown_cpu,
1143
		.cant_stop		= true,
1144
	},
1145 1146
#else
	[CPUHP_BRINGUP_CPU] = { },
1147 1148 1149
#endif
};

1150 1151 1152
/* Application processor state steps */
static struct cpuhp_step cpuhp_ap_states[] = {
#ifdef CONFIG_SMP
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
	/* Final state before CPU kills itself */
	[CPUHP_AP_IDLE_DEAD] = {
		.name			= "idle:dead",
	},
	/*
	 * Last state before CPU enters the idle loop to die. Transient state
	 * for synchronization.
	 */
	[CPUHP_AP_OFFLINE] = {
		.name			= "ap:offline",
		.cant_stop		= true,
	},
1165 1166 1167
	/* First state is scheduler control. Interrupts are disabled */
	[CPUHP_AP_SCHED_STARTING] = {
		.name			= "sched:starting",
1168 1169
		.startup.single		= sched_cpu_starting,
		.teardown.single	= sched_cpu_dying,
1170
	},
1171
	[CPUHP_AP_RCUTREE_DYING] = {
1172
		.name			= "RCU/tree:dying",
1173 1174
		.startup.single		= NULL,
		.teardown.single	= rcutree_dying_cpu,
1175
	},
1176 1177 1178 1179 1180 1181
	/* Entry state on starting. Interrupts enabled from here on. Transient
	 * state for synchronsization */
	[CPUHP_AP_ONLINE] = {
		.name			= "ap:online",
	},
	/* Handle smpboot threads park/unpark */
1182
	[CPUHP_AP_SMPBOOT_THREADS] = {
1183
		.name			= "smpboot/threads:online",
1184 1185
		.startup.single		= smpboot_unpark_threads,
		.teardown.single	= NULL,
1186
	},
1187
	[CPUHP_AP_PERF_ONLINE] = {
1188 1189 1190
		.name			= "perf:online",
		.startup.single		= perf_event_init_cpu,
		.teardown.single	= perf_event_exit_cpu,
1191
	},
1192
	[CPUHP_AP_WORKQUEUE_ONLINE] = {
1193 1194 1195
		.name			= "workqueue:online",
		.startup.single		= workqueue_online_cpu,
		.teardown.single	= workqueue_offline_cpu,
1196
	},
1197
	[CPUHP_AP_RCUTREE_ONLINE] = {
1198
		.name			= "RCU/tree:online",
1199 1200
		.startup.single		= rcutree_online_cpu,
		.teardown.single	= rcutree_offline_cpu,
1201
	},
1202
#endif
1203 1204 1205 1206
	/*
	 * The dynamically registered state space is here
	 */

1207 1208 1209 1210
#ifdef CONFIG_SMP
	/* Last state is scheduler control setting the cpu active */
	[CPUHP_AP_ACTIVE] = {
		.name			= "sched:active",
1211 1212
		.startup.single		= sched_cpu_activate,
		.teardown.single	= sched_cpu_deactivate,
1213 1214 1215
	},
#endif

1216
	/* CPU is fully up and running. */
1217 1218
	[CPUHP_ONLINE] = {
		.name			= "online",
1219 1220
		.startup.single		= NULL,
		.teardown.single	= NULL,
1221 1222 1223
	},
};

1224 1225 1226 1227 1228 1229 1230 1231
/* Sanity check for callbacks */
static int cpuhp_cb_check(enum cpuhp_state state)
{
	if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE)
		return -EINVAL;
	return 0;
}

1232 1233 1234 1235 1236 1237 1238
/*
 * Returns a free for dynamic slot assignment of the Online state. The states
 * are protected by the cpuhp_slot_states mutex and an empty slot is identified
 * by having no name assigned.
 */
static int cpuhp_reserve_state(enum cpuhp_state state)
{
1239 1240
	enum cpuhp_state i, end;
	struct cpuhp_step *step;
1241

1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256
	switch (state) {
	case CPUHP_AP_ONLINE_DYN:
		step = cpuhp_ap_states + CPUHP_AP_ONLINE_DYN;
		end = CPUHP_AP_ONLINE_DYN_END;
		break;
	case CPUHP_BP_PREPARE_DYN:
		step = cpuhp_bp_states + CPUHP_BP_PREPARE_DYN;
		end = CPUHP_BP_PREPARE_DYN_END;
		break;
	default:
		return -EINVAL;
	}

	for (i = state; i <= end; i++, step++) {
		if (!step->name)
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
			return i;
	}
	WARN(1, "No more dynamic states available for CPU hotplug\n");
	return -ENOSPC;
}

static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name,
				 int (*startup)(unsigned int cpu),
				 int (*teardown)(unsigned int cpu),
				 bool multi_instance)
1267 1268 1269
{
	/* (Un)Install the callbacks for further cpu hotplug operations */
	struct cpuhp_step *sp;
1270
	int ret = 0;
1271

1272
	if (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN) {
1273 1274
		ret = cpuhp_reserve_state(state);
		if (ret < 0)
1275
			return ret;
1276 1277
		state = ret;
	}
1278
	sp = cpuhp_get_step(state);
1279 1280 1281
	if (name && sp->name)
		return -EBUSY;

1282 1283
	sp->startup.single = startup;
	sp->teardown.single = teardown;
1284
	sp->name = name;
1285 1286
	sp->multi_instance = multi_instance;
	INIT_HLIST_HEAD(&sp->list);
1287
	return ret;
1288 1289 1290 1291
}

static void *cpuhp_get_teardown_cb(enum cpuhp_state state)
{
1292
	return cpuhp_get_step(state)->teardown.single;
1293 1294 1295 1296 1297 1298
}

/*
 * Call the startup/teardown function for a step either on the AP or
 * on the current CPU.
 */
1299 1300
static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
			    struct hlist_node *node)
1301
{
1302
	struct cpuhp_step *sp = cpuhp_get_step(state);
1303 1304
	int ret;

1305 1306
	if ((bringup && !sp->startup.single) ||
	    (!bringup && !sp->teardown.single))
1307 1308 1309 1310 1311
		return 0;
	/*
	 * The non AP bound callbacks can fail on bringup. On teardown
	 * e.g. module removal we crash for now.
	 */
1312 1313
#ifdef CONFIG_SMP
	if (cpuhp_is_ap_state(state))
1314
		ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node);
1315
	else
1316
		ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1317
#else
1318
	ret = cpuhp_invoke_callback(cpu, state, bringup, node);
1319
#endif
1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
	BUG_ON(ret && !bringup);
	return ret;
}

/*
 * Called from __cpuhp_setup_state on a recoverable failure.
 *
 * Note: The teardown callbacks for rollback are not allowed to fail!
 */
static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
1330
				   struct hlist_node *node)
1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
{
	int cpu;

	/* Roll back the already executed steps on the other cpus */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpu >= failedcpu)
			break;

		/* Did we invoke the startup call on that cpu ? */
		if (cpustate >= state)
1344
			cpuhp_issue_call(cpu, state, false, node);
1345 1346 1347
	}
}

1348 1349 1350
int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
					  struct hlist_node *node,
					  bool invoke)
1351 1352 1353 1354 1355
{
	struct cpuhp_step *sp;
	int cpu;
	int ret;

1356 1357
	lockdep_assert_cpus_held();

1358 1359 1360 1361
	sp = cpuhp_get_step(state);
	if (sp->multi_instance == false)
		return -EINVAL;

1362
	mutex_lock(&cpuhp_state_mutex);
1363

1364
	if (!invoke || !sp->startup.multi)
1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379
		goto add_node;

	/*
	 * Try to call the startup callback for each present cpu
	 * depending on the hotplug state of the cpu.
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate < state)
			continue;

		ret = cpuhp_issue_call(cpu, state, true, node);
		if (ret) {
1380
			if (sp->teardown.multi)
1381
				cpuhp_rollback_install(cpu, state, node);
1382
			goto unlock;
1383 1384 1385 1386 1387
		}
	}
add_node:
	ret = 0;
	hlist_add_head(node, &sp->list);
1388
unlock:
1389
	mutex_unlock(&cpuhp_state_mutex);
1390 1391 1392 1393 1394 1395 1396 1397 1398 1399
	return ret;
}

int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
			       bool invoke)
{
	int ret;

	cpus_read_lock();
	ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
1400
	cpus_read_unlock();
1401 1402 1403 1404
	return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);

1405
/**
1406
 * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
1407 1408 1409 1410 1411 1412 1413
 * @state:		The state to setup
 * @invoke:		If true, the startup function is invoked for cpus where
 *			cpu state >= @state
 * @startup:		startup callback function
 * @teardown:		teardown callback function
 * @multi_instance:	State is set up for multiple instances which get
 *			added afterwards.
1414
 *
1415
 * The caller needs to hold cpus read locked while calling this function.
1416 1417 1418 1419 1420
 * Returns:
 *   On success:
 *      Positive state number if @state is CPUHP_AP_ONLINE_DYN
 *      0 for all other states
 *   On failure: proper (negative) error code
1421
 */
1422 1423 1424 1425 1426
int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
				   const char *name, bool invoke,
				   int (*startup)(unsigned int cpu),
				   int (*teardown)(unsigned int cpu),
				   bool multi_instance)
1427 1428
{
	int cpu, ret = 0;
1429
	bool dynstate;
1430

1431 1432
	lockdep_assert_cpus_held();

1433 1434 1435
	if (cpuhp_cb_check(state) || !name)
		return -EINVAL;

1436
	mutex_lock(&cpuhp_state_mutex);
1437

1438 1439
	ret = cpuhp_store_callbacks(state, name, startup, teardown,
				    multi_instance);
1440

1441 1442 1443 1444 1445 1446
	dynstate = state == CPUHP_AP_ONLINE_DYN;
	if (ret > 0 && dynstate) {
		state = ret;
		ret = 0;
	}

1447
	if (ret || !invoke || !startup)
1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460
		goto out;

	/*
	 * Try to call the startup callback for each present cpu
	 * depending on the hotplug state of the cpu.
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate < state)
			continue;

1461
		ret = cpuhp_issue_call(cpu, state, true, NULL);
1462
		if (ret) {
1463
			if (teardown)
1464 1465
				cpuhp_rollback_install(cpu, state, NULL);
			cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1466 1467 1468 1469
			goto out;
		}
	}
out:
1470
	mutex_unlock(&cpuhp_state_mutex);
1471 1472 1473 1474
	/*
	 * If the requested state is CPUHP_AP_ONLINE_DYN, return the
	 * dynamically allocated state in case of success.
	 */
1475
	if (!ret && dynstate)
1476 1477 1478
		return state;
	return ret;
}
1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494
EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);

int __cpuhp_setup_state(enum cpuhp_state state,
			const char *name, bool invoke,
			int (*startup)(unsigned int cpu),
			int (*teardown)(unsigned int cpu),
			bool multi_instance)
{
	int ret;

	cpus_read_lock();
	ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
					     teardown, multi_instance);
	cpus_read_unlock();
	return ret;
}
1495 1496
EXPORT_SYMBOL(__cpuhp_setup_state);

1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507
int __cpuhp_state_remove_instance(enum cpuhp_state state,
				  struct hlist_node *node, bool invoke)
{
	struct cpuhp_step *sp = cpuhp_get_step(state);
	int cpu;

	BUG_ON(cpuhp_cb_check(state));

	if (!sp->multi_instance)
		return -EINVAL;

1508
	cpus_read_lock();
1509 1510
	mutex_lock(&cpuhp_state_mutex);

1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528
	if (!invoke || !cpuhp_get_teardown_cb(state))
		goto remove;
	/*
	 * Call the teardown callback for each present cpu depending
	 * on the hotplug state of the cpu. This function is not
	 * allowed to fail currently!
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate >= state)
			cpuhp_issue_call(cpu, state, false, node);
	}

remove:
	hlist_del(node);
	mutex_unlock(&cpuhp_state_mutex);
1529
	cpus_read_unlock();
1530 1531 1532 1533

	return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
1534

1535
/**
1536
 * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
1537 1538 1539 1540
 * @state:	The state to remove
 * @invoke:	If true, the teardown function is invoked for cpus where
 *		cpu state >= @state
 *
1541
 * The caller needs to hold cpus read locked while calling this function.
1542 1543 1544
 * The teardown callback is currently not allowed to fail. Think
 * about module removal!
 */
1545
void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
1546
{
1547
	struct cpuhp_step *sp = cpuhp_get_step(state);
1548 1549 1550 1551
	int cpu;

	BUG_ON(cpuhp_cb_check(state));

1552
	lockdep_assert_cpus_held();
1553

1554
	mutex_lock(&cpuhp_state_mutex);
1555 1556 1557 1558 1559 1560 1561
	if (sp->multi_instance) {
		WARN(!hlist_empty(&sp->list),
		     "Error: Removing state %d which has instances left.\n",
		     state);
		goto remove;
	}

1562
	if (!invoke || !cpuhp_get_teardown_cb(state))
1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574
		goto remove;

	/*
	 * Call the teardown callback for each present cpu depending
	 * on the hotplug state of the cpu. This function is not
	 * allowed to fail currently!
	 */
	for_each_present_cpu(cpu) {
		struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
		int cpustate = st->state;

		if (cpustate >= state)
1575
			cpuhp_issue_call(cpu, state, false, NULL);
1576 1577
	}
remove:
1578
	cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
1579
	mutex_unlock(&cpuhp_state_mutex);
1580 1581 1582 1583 1584 1585 1586
}
EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);

void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
{
	cpus_read_lock();
	__cpuhp_remove_state_cpuslocked(state, invoke);
1587
	cpus_read_unlock();
1588 1589 1590
}
EXPORT_SYMBOL(__cpuhp_remove_state);

1591 1592 1593 1594 1595 1596 1597 1598 1599 1600
#if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU)
static ssize_t show_cpuhp_state(struct device *dev,
				struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->state);
}
static DEVICE_ATTR(state, 0444, show_cpuhp_state, NULL);

1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640
static ssize_t write_cpuhp_target(struct device *dev,
				  struct device_attribute *attr,
				  const char *buf, size_t count)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);
	struct cpuhp_step *sp;
	int target, ret;

	ret = kstrtoint(buf, 10, &target);
	if (ret)
		return ret;

#ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL
	if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE)
		return -EINVAL;
#else
	if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE)
		return -EINVAL;
#endif

	ret = lock_device_hotplug_sysfs();
	if (ret)
		return ret;

	mutex_lock(&cpuhp_state_mutex);
	sp = cpuhp_get_step(target);
	ret = !sp->name || sp->cant_stop ? -EINVAL : 0;
	mutex_unlock(&cpuhp_state_mutex);
	if (ret)
		return ret;

	if (st->state < target)
		ret = do_cpu_up(dev->id, target);
	else
		ret = do_cpu_down(dev->id, target);

	unlock_device_hotplug();
	return ret ? ret : count;
}

1641 1642 1643 1644 1645 1646 1647
static ssize_t show_cpuhp_target(struct device *dev,
				 struct device_attribute *attr, char *buf)
{
	struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id);

	return sprintf(buf, "%d\n", st->target);
}
1648
static DEVICE_ATTR(target, 0644, show_cpuhp_target, write_cpuhp_target);
1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668

static struct attribute *cpuhp_cpu_attrs[] = {
	&dev_attr_state.attr,
	&dev_attr_target.attr,
	NULL
};

static struct attribute_group cpuhp_cpu_attr_group = {
	.attrs = cpuhp_cpu_attrs,
	.name = "hotplug",
	NULL
};

static ssize_t show_cpuhp_states(struct device *dev,
				 struct device_attribute *attr, char *buf)
{
	ssize_t cur, res = 0;
	int i;

	mutex_lock(&cpuhp_state_mutex);
1669
	for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) {
1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716
		struct cpuhp_step *sp = cpuhp_get_step(i);

		if (sp->name) {
			cur = sprintf(buf, "%3d: %s\n", i, sp->name);
			buf += cur;
			res += cur;
		}
	}
	mutex_unlock(&cpuhp_state_mutex);
	return res;
}
static DEVICE_ATTR(states, 0444, show_cpuhp_states, NULL);

static struct attribute *cpuhp_cpu_root_attrs[] = {
	&dev_attr_states.attr,
	NULL
};

static struct attribute_group cpuhp_cpu_root_attr_group = {
	.attrs = cpuhp_cpu_root_attrs,
	.name = "hotplug",
	NULL
};

static int __init cpuhp_sysfs_init(void)
{
	int cpu, ret;

	ret = sysfs_create_group(&cpu_subsys.dev_root->kobj,
				 &cpuhp_cpu_root_attr_group);
	if (ret)
		return ret;

	for_each_possible_cpu(cpu) {
		struct device *dev = get_cpu_device(cpu);

		if (!dev)
			continue;
		ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group);
		if (ret)
			return ret;
	}
	return 0;
}
device_initcall(cpuhp_sysfs_init);
#endif

1717 1718 1719 1720
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
1721
 * It is used by cpumask_of() to get a constant address to a CPU
1722 1723
 * mask value that has a single bit set only.
 */
1724

1725
/* cpu_bit_bitmap[0] is empty - so we can back into it */
1726
#define MASK_DECLARE_1(x)	[x+1][0] = (1UL << (x))
1727 1728 1729
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
1730

1731 1732 1733 1734 1735 1736 1737
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
1738 1739
#endif
};
1740
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
1741 1742 1743

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
1744 1745

#ifdef CONFIG_INIT_ALL_POSSIBLE
1746
struct cpumask __cpu_possible_mask __read_mostly
1747
	= {CPU_BITS_ALL};
1748
#else
1749
struct cpumask __cpu_possible_mask __read_mostly;
1750
#endif
1751
EXPORT_SYMBOL(__cpu_possible_mask);
1752

1753 1754
struct cpumask __cpu_online_mask __read_mostly;
EXPORT_SYMBOL(__cpu_online_mask);
1755

1756 1757
struct cpumask __cpu_present_mask __read_mostly;
EXPORT_SYMBOL(__cpu_present_mask);
1758

1759 1760
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
1761 1762 1763

void init_cpu_present(const struct cpumask *src)
{
1764
	cpumask_copy(&__cpu_present_mask, src);
1765 1766 1767 1768
}

void init_cpu_possible(const struct cpumask *src)
{
1769
	cpumask_copy(&__cpu_possible_mask, src);
1770 1771 1772 1773
}

void init_cpu_online(const struct cpumask *src)
{
1774
	cpumask_copy(&__cpu_online_mask, src);
1775
}
1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788

/*
 * Activate the first processor.
 */
void __init boot_cpu_init(void)
{
	int cpu = smp_processor_id();

	/* Mark the boot cpu "present", "online" etc for SMP and UP case */
	set_cpu_online(cpu, true);
	set_cpu_active(cpu, true);
	set_cpu_present(cpu, true);
	set_cpu_possible(cpu, true);
1789 1790 1791 1792

#ifdef CONFIG_SMP
	__boot_cpu_id = cpu;
#endif
1793 1794 1795 1796 1797 1798 1799 1800 1801
}

/*
 * Must be called _AFTER_ setting up the per_cpu areas
 */
void __init boot_cpu_state_init(void)
{
	per_cpu_ptr(&cpuhp_state, smp_processor_id())->state = CPUHP_ONLINE;
}