cpu.c 14.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
16
#include <linux/mutex.h>
L
Linus Torvalds 已提交
17

18
#ifdef CONFIG_SMP
19
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
20
static DEFINE_MUTEX(cpu_add_remove_lock);
L
Linus Torvalds 已提交
21

22
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
L
Linus Torvalds 已提交
23

24 25 26 27 28
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

29 30 31 32 33 34 35 36
static struct {
	struct task_struct *active_writer;
	struct mutex lock; /* Synchronizes accesses to refcount, */
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
	int refcount;
37 38 39 40 41
} cpu_hotplug = {
	.active_writer = NULL,
	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
	.refcount = 0,
};
42 43

#ifdef CONFIG_HOTPLUG_CPU
44

45
void get_online_cpus(void)
46
{
47 48
	might_sleep();
	if (cpu_hotplug.active_writer == current)
49
		return;
50 51 52 53
	mutex_lock(&cpu_hotplug.lock);
	cpu_hotplug.refcount++;
	mutex_unlock(&cpu_hotplug.lock);

54
}
55
EXPORT_SYMBOL_GPL(get_online_cpus);
56

57
void put_online_cpus(void)
58
{
59
	if (cpu_hotplug.active_writer == current)
60
		return;
61
	mutex_lock(&cpu_hotplug.lock);
62 63
	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
		wake_up_process(cpu_hotplug.active_writer);
64 65
	mutex_unlock(&cpu_hotplug.lock);

66
}
67
EXPORT_SYMBOL_GPL(put_online_cpus);
68 69

#endif	/* CONFIG_HOTPLUG_CPU */
70

71 72
/*
 * The following two API's must be used when attempting
73
 * to serialize the updates to cpu_online_mask, cpu_present_mask.
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}

/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
92 93
 * Since cpu_hotplug_begin() is always called after invoking
 * cpu_maps_update_begin(), we can be sure that only one writer is active.
94 95 96 97 98 99 100 101 102 103
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
104
 * get_online_cpus() not an api which is called all that often.
105 106 107 108 109
 *
 */
static void cpu_hotplug_begin(void)
{
	cpu_hotplug.active_writer = current;
110 111 112 113 114 115

	for (;;) {
		mutex_lock(&cpu_hotplug.lock);
		if (likely(!cpu_hotplug.refcount))
			break;
		__set_current_state(TASK_UNINTERRUPTIBLE);
116 117 118 119 120 121 122 123 124 125
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
	}
}

static void cpu_hotplug_done(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
}
L
Linus Torvalds 已提交
126
/* Need to know about CPUs going up/down? */
127
int __ref register_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
128
{
129
	int ret;
130
	cpu_maps_update_begin();
131
	ret = raw_notifier_chain_register(&cpu_chain, nb);
132
	cpu_maps_update_done();
133
	return ret;
L
Linus Torvalds 已提交
134
}
135 136 137

#ifdef CONFIG_HOTPLUG_CPU

L
Linus Torvalds 已提交
138 139
EXPORT_SYMBOL(register_cpu_notifier);

140
void __ref unregister_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
141
{
142
	cpu_maps_update_begin();
143
	raw_notifier_chain_unregister(&cpu_chain, nb);
144
	cpu_maps_update_done();
L
Linus Torvalds 已提交
145 146 147 148 149 150 151 152 153 154 155 156 157
}
EXPORT_SYMBOL(unregister_cpu_notifier);

static inline void check_for_tasks(int cpu)
{
	struct task_struct *p;

	write_lock_irq(&tasklist_lock);
	for_each_process(p) {
		if (task_cpu(p) == cpu &&
		    (!cputime_eq(p->utime, cputime_zero) ||
		     !cputime_eq(p->stime, cputime_zero)))
			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
158
				(state = %ld, flags = %x) \n",
159 160
				 p->comm, task_pid_nr(p), cpu,
				 p->state, p->flags);
L
Linus Torvalds 已提交
161 162 163 164
	}
	write_unlock_irq(&tasklist_lock);
}

A
Avi Kivity 已提交
165 166 167 168 169
struct take_cpu_down_param {
	unsigned long mod;
	void *hcpu;
};

L
Linus Torvalds 已提交
170
/* Take this CPU down. */
171
static int __ref take_cpu_down(void *_param)
L
Linus Torvalds 已提交
172
{
A
Avi Kivity 已提交
173
	struct take_cpu_down_param *param = _param;
L
Linus Torvalds 已提交
174 175 176 177 178
	int err;

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
179
		return err;
L
Linus Torvalds 已提交
180

181 182 183
	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
				param->hcpu);

Z
Zwane Mwaikambo 已提交
184 185 186 187
	/* Force idle task to run as soon as we yield: it should
	   immediately notice cpu is offline and die quickly. */
	sched_idle_next();
	return 0;
L
Linus Torvalds 已提交
188 189
}

190
/* Requires cpu_add_remove_lock to be held */
191
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
192
{
193
	int err, nr_calls = 0;
R
Rusty Russell 已提交
194
	cpumask_var_t old_allowed;
195
	void *hcpu = (void *)(long)cpu;
196
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
A
Avi Kivity 已提交
197 198 199 200
	struct take_cpu_down_param tcd_param = {
		.mod = mod,
		.hcpu = hcpu,
	};
L
Linus Torvalds 已提交
201

202 203
	if (num_online_cpus() == 1)
		return -EBUSY;
L
Linus Torvalds 已提交
204

205 206
	if (!cpu_online(cpu))
		return -EINVAL;
L
Linus Torvalds 已提交
207

R
Rusty Russell 已提交
208 209 210
	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
		return -ENOMEM;

211
	cpu_hotplug_begin();
212
	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
213
					hcpu, -1, &nr_calls);
L
Linus Torvalds 已提交
214
	if (err == NOTIFY_BAD) {
215 216
		set_cpu_active(cpu, true);

217
		nr_calls--;
218 219
		__raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
					  hcpu, nr_calls, NULL);
L
Linus Torvalds 已提交
220
		printk("%s: attempt to take down CPU %u failed\n",
221
				__func__, cpu);
222 223
		err = -EINVAL;
		goto out_release;
L
Linus Torvalds 已提交
224 225 226
	}

	/* Ensure that we are not runnable on dying cpu */
R
Rusty Russell 已提交
227
	cpumask_copy(old_allowed, &current->cpus_allowed);
228
	set_cpus_allowed_ptr(current, cpu_active_mask);
L
Linus Torvalds 已提交
229

R
Rusty Russell 已提交
230
	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
231
	if (err) {
232
		set_cpu_active(cpu, true);
L
Linus Torvalds 已提交
233
		/* CPU didn't die: tell everyone.  Can't complain. */
234
		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
235
					    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
236 237
			BUG();

R
Rusty Russell 已提交
238
		goto out_allowed;
239
	}
240
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
241 242 243 244 245 246 247 248 249

	/* Wait for it to sleep (leaving idle task). */
	while (!idle_cpu(cpu))
		yield();

	/* This actually kills the CPU. */
	__cpu_die(cpu);

	/* CPU is completely dead: tell everyone.  Too late to complain. */
250 251
	if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
				    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
252 253 254 255 256
		BUG();

	check_for_tasks(cpu);

out_allowed:
R
Rusty Russell 已提交
257
	set_cpus_allowed_ptr(current, old_allowed);
258
out_release:
259
	cpu_hotplug_done();
260 261 262 263 264
	if (!err) {
		if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
					    hcpu) == NOTIFY_BAD)
			BUG();
	}
R
Rusty Russell 已提交
265
	free_cpumask_var(old_allowed);
266 267 268
	return err;
}

269
int __ref cpu_down(unsigned int cpu)
270
{
271
	int err;
272

273 274 275
	err = stop_machine_create();
	if (err)
		return err;
276
	cpu_maps_update_begin();
277 278

	if (cpu_hotplug_disabled) {
279
		err = -EBUSY;
280 281 282
		goto out;
	}

283
	set_cpu_active(cpu, false);
284

285 286
	/*
	 * Make sure the all cpus did the reschedule and are not
R
Rusty Russell 已提交
287
	 * using stale version of the cpu_active_mask.
288 289 290 291 292 293
	 * This is not strictly necessary becuase stop_machine()
	 * that we run down the line already provides the required
	 * synchronization. But it's really a side effect and we do not
	 * want to depend on the innards of the stop_machine here.
	 */
	synchronize_sched();
294

295
	err = _cpu_down(cpu, 0);
296

297
out:
298
	cpu_maps_update_done();
299
	stop_machine_destroy();
L
Linus Torvalds 已提交
300 301
	return err;
}
302
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
303 304
#endif /*CONFIG_HOTPLUG_CPU*/

305
/* Requires cpu_add_remove_lock to be held */
306
static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
307
{
308
	int ret, nr_calls = 0;
L
Linus Torvalds 已提交
309
	void *hcpu = (void *)(long)cpu;
310
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
L
Linus Torvalds 已提交
311

312 313
	if (cpu_online(cpu) || !cpu_present(cpu))
		return -EINVAL;
314

315
	cpu_hotplug_begin();
316
	ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
317
							-1, &nr_calls);
L
Linus Torvalds 已提交
318
	if (ret == NOTIFY_BAD) {
319
		nr_calls--;
L
Linus Torvalds 已提交
320
		printk("%s: attempt to bring up CPU %u failed\n",
321
				__func__, cpu);
L
Linus Torvalds 已提交
322 323 324 325 326 327 328 329
		ret = -EINVAL;
		goto out_notify;
	}

	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu);
	if (ret != 0)
		goto out_notify;
330
	BUG_ON(!cpu_online(cpu));
L
Linus Torvalds 已提交
331

332
	set_cpu_active(cpu, true);
333

L
Linus Torvalds 已提交
334
	/* Now call notifier in preparation. */
335
	raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
L
Linus Torvalds 已提交
336 337 338

out_notify:
	if (ret != 0)
339
		__raw_notifier_call_chain(&cpu_chain,
340
				CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
341
	cpu_hotplug_done();
342 343 344 345

	return ret;
}

346
int __cpuinit cpu_up(unsigned int cpu)
347 348
{
	int err = 0;
R
Rusty Russell 已提交
349
	if (!cpu_possible(cpu)) {
350 351
		printk(KERN_ERR "can't online cpu %d because it is not "
			"configured as may-hotadd at boot time\n", cpu);
352
#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
353 354 355 356 357
		printk(KERN_ERR "please check additional_cpus= boot "
				"parameter\n");
#endif
		return -EINVAL;
	}
358

359
	cpu_maps_update_begin();
360 361

	if (cpu_hotplug_disabled) {
362
		err = -EBUSY;
363 364 365 366 367 368
		goto out;
	}

	err = _cpu_up(cpu, 0);

out:
369
	cpu_maps_update_done();
370 371 372
	return err;
}

373
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
374
static cpumask_var_t frozen_cpus;
375 376 377

int disable_nonboot_cpus(void)
{
378
	int cpu, first_cpu, error;
379

380 381 382
	error = stop_machine_create();
	if (error)
		return error;
383
	cpu_maps_update_begin();
R
Rusty Russell 已提交
384
	first_cpu = cpumask_first(cpu_online_mask);
385 386 387
	/* We take down all of the non-boot CPUs in one shot to avoid races
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
388
	cpumask_clear(frozen_cpus);
389 390 391 392 393 394 395 396 397

	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;
		set_cpu_active(cpu, false);
	}

	synchronize_sched();

398 399 400 401
	printk("Disabling non-boot CPUs ...\n");
	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;
402
		error = _cpu_down(cpu, 1);
403
		if (!error)
R
Rusty Russell 已提交
404
			cpumask_set_cpu(cpu, frozen_cpus);
405
		else {
406 407 408 409 410
			printk(KERN_ERR "Error taking CPU%d down: %d\n",
				cpu, error);
			break;
		}
	}
411

412 413 414 415 416
	if (!error) {
		BUG_ON(num_online_cpus() > 1);
		/* Make sure the CPUs won't be enabled by someone else */
		cpu_hotplug_disabled = 1;
	} else {
417
		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
418
	}
419
	cpu_maps_update_done();
420
	stop_machine_destroy();
421 422 423
	return error;
}

424 425 426 427 428 429 430 431
void __weak arch_enable_nonboot_cpus_begin(void)
{
}

void __weak arch_enable_nonboot_cpus_end(void)
{
}

432
void __ref enable_nonboot_cpus(void)
433 434 435 436
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
437
	cpu_maps_update_begin();
438
	cpu_hotplug_disabled = 0;
R
Rusty Russell 已提交
439
	if (cpumask_empty(frozen_cpus))
440
		goto out;
441 442

	printk("Enabling non-boot CPUs ...\n");
443 444 445

	arch_enable_nonboot_cpus_begin();

R
Rusty Russell 已提交
446
	for_each_cpu(cpu, frozen_cpus) {
447
		error = _cpu_up(cpu, 1);
448 449 450 451
		if (!error) {
			printk("CPU%d is up\n", cpu);
			continue;
		}
452
		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
453
	}
454 455 456

	arch_enable_nonboot_cpus_end();

R
Rusty Russell 已提交
457
	cpumask_clear(frozen_cpus);
458
out:
459
	cpu_maps_update_done();
L
Linus Torvalds 已提交
460
}
R
Rusty Russell 已提交
461 462 463 464 465 466 467 468

static int alloc_frozen_cpus(void)
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
469
#endif /* CONFIG_PM_SLEEP_SMP */
470

471 472 473 474 475 476 477 478
/**
 * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
 * @cpu: cpu that just started
 *
 * This function calls the cpu_chain notifiers with CPU_STARTING.
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
A
Al Viro 已提交
479
void __cpuinit notify_cpu_starting(unsigned int cpu)
480 481 482 483
{
	unsigned long val = CPU_STARTING;

#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
484
	if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
485 486 487 488 489
		val = CPU_STARTING_FROZEN;
#endif /* CONFIG_PM_SLEEP_SMP */
	raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
}

490
#endif /* CONFIG_SMP */
491

492 493 494 495
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
496
 * It is used by cpumask_of() to get a constant address to a CPU
497 498
 * mask value that has a single bit set only.
 */
499

500 501 502 503 504
/* cpu_bit_bitmap[0] is empty - so we can back into it */
#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
505

506 507 508 509 510 511 512
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
513 514
#endif
};
515
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
516 517 518

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539

#ifdef CONFIG_INIT_ALL_POSSIBLE
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
	= CPU_BITS_ALL;
#else
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
#endif
const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
EXPORT_SYMBOL(cpu_possible_mask);

static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
EXPORT_SYMBOL(cpu_online_mask);

static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
EXPORT_SYMBOL(cpu_present_mask);

static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
EXPORT_SYMBOL(cpu_active_mask);
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586

void set_cpu_possible(unsigned int cpu, bool possible)
{
	if (possible)
		cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
}

void set_cpu_present(unsigned int cpu, bool present)
{
	if (present)
		cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
}

void set_cpu_online(unsigned int cpu, bool online)
{
	if (online)
		cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
}

void set_cpu_active(unsigned int cpu, bool active)
{
	if (active)
		cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
}

void init_cpu_present(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_present_bits), src);
}

void init_cpu_possible(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_possible_bits), src);
}

void init_cpu_online(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_online_bits), src);
}