cpu.c 13.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
16
#include <linux/mutex.h>
17
#include <linux/gfp.h>
L
Linus Torvalds 已提交
18

19
#ifdef CONFIG_SMP
20
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
21
static DEFINE_MUTEX(cpu_add_remove_lock);
L
Linus Torvalds 已提交
22

23 24 25 26 27 28 29 30 31 32 33 34 35 36
/*
 * The following two API's must be used when attempting
 * to serialize the updates to cpu_online_mask, cpu_present_mask.
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}

37
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
L
Linus Torvalds 已提交
38

39 40 41 42 43
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

44 45
#ifdef CONFIG_HOTPLUG_CPU

46 47 48 49 50 51 52 53
static struct {
	struct task_struct *active_writer;
	struct mutex lock; /* Synchronizes accesses to refcount, */
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
	int refcount;
54 55 56 57 58
} cpu_hotplug = {
	.active_writer = NULL,
	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
	.refcount = 0,
};
59

60
void get_online_cpus(void)
61
{
62 63
	might_sleep();
	if (cpu_hotplug.active_writer == current)
64
		return;
65 66 67 68
	mutex_lock(&cpu_hotplug.lock);
	cpu_hotplug.refcount++;
	mutex_unlock(&cpu_hotplug.lock);

69
}
70
EXPORT_SYMBOL_GPL(get_online_cpus);
71

72
void put_online_cpus(void)
73
{
74
	if (cpu_hotplug.active_writer == current)
75
		return;
76
	mutex_lock(&cpu_hotplug.lock);
77 78
	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
		wake_up_process(cpu_hotplug.active_writer);
79 80
	mutex_unlock(&cpu_hotplug.lock);

81
}
82
EXPORT_SYMBOL_GPL(put_online_cpus);
83

84 85 86 87 88 89 90
/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
91 92
 * Since cpu_hotplug_begin() is always called after invoking
 * cpu_maps_update_begin(), we can be sure that only one writer is active.
93 94 95 96 97 98 99 100 101 102
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
103
 * get_online_cpus() not an api which is called all that often.
104 105 106 107 108
 *
 */
static void cpu_hotplug_begin(void)
{
	cpu_hotplug.active_writer = current;
109 110 111 112 113 114

	for (;;) {
		mutex_lock(&cpu_hotplug.lock);
		if (likely(!cpu_hotplug.refcount))
			break;
		__set_current_state(TASK_UNINTERRUPTIBLE);
115 116 117 118 119 120 121 122 123 124
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
	}
}

static void cpu_hotplug_done(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
}
125 126 127 128 129 130

#else /* #if CONFIG_HOTPLUG_CPU */
static void cpu_hotplug_begin(void) {}
static void cpu_hotplug_done(void) {}
#endif	/* #esle #if CONFIG_HOTPLUG_CPU */

L
Linus Torvalds 已提交
131
/* Need to know about CPUs going up/down? */
132
int __ref register_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
133
{
134
	int ret;
135
	cpu_maps_update_begin();
136
	ret = raw_notifier_chain_register(&cpu_chain, nb);
137
	cpu_maps_update_done();
138
	return ret;
L
Linus Torvalds 已提交
139
}
140

141 142 143
static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
			int *nr_calls)
{
144 145 146
	int ret;

	ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
147
					nr_calls);
148 149

	return notifier_to_errno(ret);
150 151 152 153 154 155 156 157 158 159 160 161
}

static int cpu_notify(unsigned long val, void *v)
{
	return __cpu_notify(val, v, -1, NULL);
}

static void cpu_notify_nofail(unsigned long val, void *v)
{
	int err;

	err = cpu_notify(val, v);
162
	BUG_ON(err);
163 164
}

165 166
#ifdef CONFIG_HOTPLUG_CPU

L
Linus Torvalds 已提交
167 168
EXPORT_SYMBOL(register_cpu_notifier);

169
void __ref unregister_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
170
{
171
	cpu_maps_update_begin();
172
	raw_notifier_chain_unregister(&cpu_chain, nb);
173
	cpu_maps_update_done();
L
Linus Torvalds 已提交
174 175 176 177 178 179 180 181 182
}
EXPORT_SYMBOL(unregister_cpu_notifier);

static inline void check_for_tasks(int cpu)
{
	struct task_struct *p;

	write_lock_irq(&tasklist_lock);
	for_each_process(p) {
183
		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
L
Linus Torvalds 已提交
184 185
		    (!cputime_eq(p->utime, cputime_zero) ||
		     !cputime_eq(p->stime, cputime_zero)))
186 187 188 189
			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
				"(state = %ld, flags = %x)\n",
				p->comm, task_pid_nr(p), cpu,
				p->state, p->flags);
L
Linus Torvalds 已提交
190 191 192 193
	}
	write_unlock_irq(&tasklist_lock);
}

A
Avi Kivity 已提交
194
struct take_cpu_down_param {
195
	struct task_struct *caller;
A
Avi Kivity 已提交
196 197 198 199
	unsigned long mod;
	void *hcpu;
};

L
Linus Torvalds 已提交
200
/* Take this CPU down. */
201
static int __ref take_cpu_down(void *_param)
L
Linus Torvalds 已提交
202
{
A
Avi Kivity 已提交
203
	struct take_cpu_down_param *param = _param;
204
	unsigned int cpu = (unsigned long)param->hcpu;
L
Linus Torvalds 已提交
205 206 207 208 209
	int err;

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
210
		return err;
L
Linus Torvalds 已提交
211

212
	cpu_notify(CPU_DYING | param->mod, param->hcpu);
213

214 215
	if (task_cpu(param->caller) == cpu)
		move_task_off_dead_cpu(cpu, param->caller);
Z
Zwane Mwaikambo 已提交
216 217 218 219
	/* Force idle task to run as soon as we yield: it should
	   immediately notice cpu is offline and die quickly. */
	sched_idle_next();
	return 0;
L
Linus Torvalds 已提交
220 221
}

222
/* Requires cpu_add_remove_lock to be held */
223
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
224
{
225 226
	int err, nr_calls = 0;
	void *hcpu = (void *)(long)cpu;
227
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
A
Avi Kivity 已提交
228
	struct take_cpu_down_param tcd_param = {
229
		.caller = current,
A
Avi Kivity 已提交
230 231 232
		.mod = mod,
		.hcpu = hcpu,
	};
L
Linus Torvalds 已提交
233

234 235
	if (num_online_cpus() == 1)
		return -EBUSY;
L
Linus Torvalds 已提交
236

237 238
	if (!cpu_online(cpu))
		return -EINVAL;
L
Linus Torvalds 已提交
239

240
	cpu_hotplug_begin();
241
	set_cpu_active(cpu, false);
242
	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
243
	if (err) {
244 245
		set_cpu_active(cpu, true);

246
		nr_calls--;
247
		__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
L
Linus Torvalds 已提交
248
		printk("%s: attempt to take down CPU %u failed\n",
249
				__func__, cpu);
250
		goto out_release;
L
Linus Torvalds 已提交
251 252
	}

R
Rusty Russell 已提交
253
	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
254
	if (err) {
255
		set_cpu_active(cpu, true);
L
Linus Torvalds 已提交
256
		/* CPU didn't die: tell everyone.  Can't complain. */
257
		cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
L
Linus Torvalds 已提交
258

259
		goto out_release;
260
	}
261
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
262 263 264 265 266 267 268 269 270

	/* Wait for it to sleep (leaving idle task). */
	while (!idle_cpu(cpu))
		yield();

	/* This actually kills the CPU. */
	__cpu_die(cpu);

	/* CPU is completely dead: tell everyone.  Too late to complain. */
271
	cpu_notify_nofail(CPU_DEAD | mod, hcpu);
L
Linus Torvalds 已提交
272 273 274

	check_for_tasks(cpu);

275
out_release:
276
	cpu_hotplug_done();
277 278
	if (!err)
		cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
279 280 281
	return err;
}

282
int __ref cpu_down(unsigned int cpu)
283
{
284
	int err;
285

286
	cpu_maps_update_begin();
287 288

	if (cpu_hotplug_disabled) {
289
		err = -EBUSY;
290 291 292 293
		goto out;
	}

	err = _cpu_down(cpu, 0);
294

295
out:
296
	cpu_maps_update_done();
L
Linus Torvalds 已提交
297 298
	return err;
}
299
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
300 301
#endif /*CONFIG_HOTPLUG_CPU*/

302
/* Requires cpu_add_remove_lock to be held */
303
static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
304
{
305
	int ret, nr_calls = 0;
L
Linus Torvalds 已提交
306
	void *hcpu = (void *)(long)cpu;
307
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
L
Linus Torvalds 已提交
308

309 310
	if (cpu_online(cpu) || !cpu_present(cpu))
		return -EINVAL;
311

312
	cpu_hotplug_begin();
313
	ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
314
	if (ret) {
315
		nr_calls--;
L
Linus Torvalds 已提交
316
		printk("%s: attempt to bring up CPU %u failed\n",
317
				__func__, cpu);
L
Linus Torvalds 已提交
318 319 320 321 322 323 324
		goto out_notify;
	}

	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu);
	if (ret != 0)
		goto out_notify;
325
	BUG_ON(!cpu_online(cpu));
L
Linus Torvalds 已提交
326

327
	set_cpu_active(cpu, true);
328

L
Linus Torvalds 已提交
329
	/* Now call notifier in preparation. */
330
	cpu_notify(CPU_ONLINE | mod, hcpu);
L
Linus Torvalds 已提交
331 332 333

out_notify:
	if (ret != 0)
334
		__cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
335
	cpu_hotplug_done();
336 337 338 339

	return ret;
}

340
int __cpuinit cpu_up(unsigned int cpu)
341 342
{
	int err = 0;
343 344 345 346 347 348

#ifdef	CONFIG_MEMORY_HOTPLUG
	int nid;
	pg_data_t	*pgdat;
#endif

R
Rusty Russell 已提交
349
	if (!cpu_possible(cpu)) {
350 351
		printk(KERN_ERR "can't online cpu %d because it is not "
			"configured as may-hotadd at boot time\n", cpu);
352
#if defined(CONFIG_IA64)
353 354 355 356 357
		printk(KERN_ERR "please check additional_cpus= boot "
				"parameter\n");
#endif
		return -EINVAL;
	}
358

359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
#ifdef	CONFIG_MEMORY_HOTPLUG
	nid = cpu_to_node(cpu);
	if (!node_online(nid)) {
		err = mem_online_node(nid);
		if (err)
			return err;
	}

	pgdat = NODE_DATA(nid);
	if (!pgdat) {
		printk(KERN_ERR
			"Can't online cpu %d due to NULL pgdat\n", cpu);
		return -ENOMEM;
	}

374 375
	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
		mutex_lock(&zonelists_mutex);
376
		build_all_zonelists(NULL);
377 378
		mutex_unlock(&zonelists_mutex);
	}
379 380
#endif

381
	cpu_maps_update_begin();
382 383

	if (cpu_hotplug_disabled) {
384
		err = -EBUSY;
385 386 387 388 389 390
		goto out;
	}

	err = _cpu_up(cpu, 0);

out:
391
	cpu_maps_update_done();
392 393 394
	return err;
}

395
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
396
static cpumask_var_t frozen_cpus;
397 398 399

int disable_nonboot_cpus(void)
{
400
	int cpu, first_cpu, error;
401

402
	cpu_maps_update_begin();
R
Rusty Russell 已提交
403
	first_cpu = cpumask_first(cpu_online_mask);
404 405
	/*
	 * We take down all of the non-boot CPUs in one shot to avoid races
406 407
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
408
	cpumask_clear(frozen_cpus);
409

410 411 412 413
	printk("Disabling non-boot CPUs ...\n");
	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;
414
		error = _cpu_down(cpu, 1);
415
		if (!error)
R
Rusty Russell 已提交
416
			cpumask_set_cpu(cpu, frozen_cpus);
417
		else {
418 419 420 421 422
			printk(KERN_ERR "Error taking CPU%d down: %d\n",
				cpu, error);
			break;
		}
	}
423

424 425 426 427 428
	if (!error) {
		BUG_ON(num_online_cpus() > 1);
		/* Make sure the CPUs won't be enabled by someone else */
		cpu_hotplug_disabled = 1;
	} else {
429
		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
430
	}
431
	cpu_maps_update_done();
432 433 434
	return error;
}

435 436 437 438 439 440 441 442
void __weak arch_enable_nonboot_cpus_begin(void)
{
}

void __weak arch_enable_nonboot_cpus_end(void)
{
}

443
void __ref enable_nonboot_cpus(void)
444 445 446 447
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
448
	cpu_maps_update_begin();
449
	cpu_hotplug_disabled = 0;
R
Rusty Russell 已提交
450
	if (cpumask_empty(frozen_cpus))
451
		goto out;
452 453

	printk("Enabling non-boot CPUs ...\n");
454 455 456

	arch_enable_nonboot_cpus_begin();

R
Rusty Russell 已提交
457
	for_each_cpu(cpu, frozen_cpus) {
458
		error = _cpu_up(cpu, 1);
459 460 461 462
		if (!error) {
			printk("CPU%d is up\n", cpu);
			continue;
		}
463
		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
464
	}
465 466 467

	arch_enable_nonboot_cpus_end();

R
Rusty Russell 已提交
468
	cpumask_clear(frozen_cpus);
469
out:
470
	cpu_maps_update_done();
L
Linus Torvalds 已提交
471
}
R
Rusty Russell 已提交
472 473 474 475 476 477 478 479

static int alloc_frozen_cpus(void)
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
480
#endif /* CONFIG_PM_SLEEP_SMP */
481

482 483 484 485 486 487 488 489
/**
 * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
 * @cpu: cpu that just started
 *
 * This function calls the cpu_chain notifiers with CPU_STARTING.
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
A
Al Viro 已提交
490
void __cpuinit notify_cpu_starting(unsigned int cpu)
491 492 493 494
{
	unsigned long val = CPU_STARTING;

#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
495
	if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
496 497
		val = CPU_STARTING_FROZEN;
#endif /* CONFIG_PM_SLEEP_SMP */
498
	cpu_notify(val, (void *)(long)cpu);
499 500
}

501
#endif /* CONFIG_SMP */
502

503 504 505 506
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
507
 * It is used by cpumask_of() to get a constant address to a CPU
508 509
 * mask value that has a single bit set only.
 */
510

511 512 513 514 515
/* cpu_bit_bitmap[0] is empty - so we can back into it */
#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
516

517 518 519 520 521 522 523
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
524 525
#endif
};
526
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
527 528 529

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550

#ifdef CONFIG_INIT_ALL_POSSIBLE
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
	= CPU_BITS_ALL;
#else
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
#endif
const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
EXPORT_SYMBOL(cpu_possible_mask);

static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
EXPORT_SYMBOL(cpu_online_mask);

static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
EXPORT_SYMBOL(cpu_present_mask);

static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
EXPORT_SYMBOL(cpu_active_mask);
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597

void set_cpu_possible(unsigned int cpu, bool possible)
{
	if (possible)
		cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
}

void set_cpu_present(unsigned int cpu, bool present)
{
	if (present)
		cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
}

void set_cpu_online(unsigned int cpu, bool online)
{
	if (online)
		cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
}

void set_cpu_active(unsigned int cpu, bool active)
{
	if (active)
		cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
}

void init_cpu_present(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_present_bits), src);
}

void init_cpu_possible(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_possible_bits), src);
}

void init_cpu_online(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_online_bits), src);
}