cpu.c 14.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
16
#include <linux/mutex.h>
L
Linus Torvalds 已提交
17

18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
/*
 * Represents all cpu's present in the system
 * In systems capable of hotplug, this map could dynamically grow
 * as new cpu's are detected in the system via any platform specific
 * method, such as ACPI for e.g.
 */
cpumask_t cpu_present_map __read_mostly;
EXPORT_SYMBOL(cpu_present_map);

#ifndef CONFIG_SMP

/*
 * Represents all cpu's that are currently online.
 */
cpumask_t cpu_online_map __read_mostly = CPU_MASK_ALL;
EXPORT_SYMBOL(cpu_online_map);

cpumask_t cpu_possible_map __read_mostly = CPU_MASK_ALL;
EXPORT_SYMBOL(cpu_possible_map);

#else /* CONFIG_SMP */

40
/* Serializes the updates to cpu_online_map, cpu_present_map */
41
static DEFINE_MUTEX(cpu_add_remove_lock);
L
Linus Torvalds 已提交
42

43
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
L
Linus Torvalds 已提交
44

45 46 47 48 49
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

50 51 52 53 54 55 56 57 58
static struct {
	struct task_struct *active_writer;
	struct mutex lock; /* Synchronizes accesses to refcount, */
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
	int refcount;
} cpu_hotplug;
59

60 61 62 63 64 65 66
void __init cpu_hotplug_init(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_init(&cpu_hotplug.lock);
	cpu_hotplug.refcount = 0;
}

67 68
cpumask_t cpu_active_map;

69
#ifdef CONFIG_HOTPLUG_CPU
70

71
void get_online_cpus(void)
72
{
73 74
	might_sleep();
	if (cpu_hotplug.active_writer == current)
75
		return;
76 77 78 79
	mutex_lock(&cpu_hotplug.lock);
	cpu_hotplug.refcount++;
	mutex_unlock(&cpu_hotplug.lock);

80
}
81
EXPORT_SYMBOL_GPL(get_online_cpus);
82

83
void put_online_cpus(void)
84
{
85
	if (cpu_hotplug.active_writer == current)
86
		return;
87
	mutex_lock(&cpu_hotplug.lock);
88 89
	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
		wake_up_process(cpu_hotplug.active_writer);
90 91
	mutex_unlock(&cpu_hotplug.lock);

92
}
93
EXPORT_SYMBOL_GPL(put_online_cpus);
94 95

#endif	/* CONFIG_HOTPLUG_CPU */
96

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
/*
 * The following two API's must be used when attempting
 * to serialize the updates to cpu_online_map, cpu_present_map.
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}

/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
118 119
 * Since cpu_hotplug_begin() is always called after invoking
 * cpu_maps_update_begin(), we can be sure that only one writer is active.
120 121 122 123 124 125 126 127 128 129
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
130
 * get_online_cpus() not an api which is called all that often.
131 132 133 134 135
 *
 */
static void cpu_hotplug_begin(void)
{
	cpu_hotplug.active_writer = current;
136 137 138 139 140 141

	for (;;) {
		mutex_lock(&cpu_hotplug.lock);
		if (likely(!cpu_hotplug.refcount))
			break;
		__set_current_state(TASK_UNINTERRUPTIBLE);
142 143 144 145 146 147 148 149 150 151
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
	}
}

static void cpu_hotplug_done(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
}
L
Linus Torvalds 已提交
152
/* Need to know about CPUs going up/down? */
153
int __ref register_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
154
{
155
	int ret;
156
	cpu_maps_update_begin();
157
	ret = raw_notifier_chain_register(&cpu_chain, nb);
158
	cpu_maps_update_done();
159
	return ret;
L
Linus Torvalds 已提交
160
}
161 162 163

#ifdef CONFIG_HOTPLUG_CPU

L
Linus Torvalds 已提交
164 165
EXPORT_SYMBOL(register_cpu_notifier);

166
void __ref unregister_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
167
{
168
	cpu_maps_update_begin();
169
	raw_notifier_chain_unregister(&cpu_chain, nb);
170
	cpu_maps_update_done();
L
Linus Torvalds 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183
}
EXPORT_SYMBOL(unregister_cpu_notifier);

static inline void check_for_tasks(int cpu)
{
	struct task_struct *p;

	write_lock_irq(&tasklist_lock);
	for_each_process(p) {
		if (task_cpu(p) == cpu &&
		    (!cputime_eq(p->utime, cputime_zero) ||
		     !cputime_eq(p->stime, cputime_zero)))
			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
184
				(state = %ld, flags = %x) \n",
185 186
				 p->comm, task_pid_nr(p), cpu,
				 p->state, p->flags);
L
Linus Torvalds 已提交
187 188 189 190
	}
	write_unlock_irq(&tasklist_lock);
}

A
Avi Kivity 已提交
191 192 193 194 195
struct take_cpu_down_param {
	unsigned long mod;
	void *hcpu;
};

L
Linus Torvalds 已提交
196
/* Take this CPU down. */
197
static int __ref take_cpu_down(void *_param)
L
Linus Torvalds 已提交
198
{
A
Avi Kivity 已提交
199
	struct take_cpu_down_param *param = _param;
L
Linus Torvalds 已提交
200 201
	int err;

A
Avi Kivity 已提交
202 203
	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
				param->hcpu);
L
Linus Torvalds 已提交
204 205 206
	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
207
		return err;
L
Linus Torvalds 已提交
208

Z
Zwane Mwaikambo 已提交
209 210 211 212
	/* Force idle task to run as soon as we yield: it should
	   immediately notice cpu is offline and die quickly. */
	sched_idle_next();
	return 0;
L
Linus Torvalds 已提交
213 214
}

215
/* Requires cpu_add_remove_lock to be held */
216
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
217
{
218
	int err, nr_calls = 0;
L
Linus Torvalds 已提交
219 220
	struct task_struct *p;
	cpumask_t old_allowed, tmp;
221
	void *hcpu = (void *)(long)cpu;
222
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
A
Avi Kivity 已提交
223 224 225 226
	struct take_cpu_down_param tcd_param = {
		.mod = mod,
		.hcpu = hcpu,
	};
L
Linus Torvalds 已提交
227

228 229
	if (num_online_cpus() == 1)
		return -EBUSY;
L
Linus Torvalds 已提交
230

231 232
	if (!cpu_online(cpu))
		return -EINVAL;
L
Linus Torvalds 已提交
233

234
	cpu_hotplug_begin();
235
	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
236
					hcpu, -1, &nr_calls);
L
Linus Torvalds 已提交
237
	if (err == NOTIFY_BAD) {
238
		nr_calls--;
239 240
		__raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
					  hcpu, nr_calls, NULL);
L
Linus Torvalds 已提交
241
		printk("%s: attempt to take down CPU %u failed\n",
242
				__func__, cpu);
243 244
		err = -EINVAL;
		goto out_release;
L
Linus Torvalds 已提交
245 246 247 248
	}

	/* Ensure that we are not runnable on dying cpu */
	old_allowed = current->cpus_allowed;
249
	cpus_setall(tmp);
L
Linus Torvalds 已提交
250
	cpu_clear(cpu, tmp);
251
	set_cpus_allowed_ptr(current, &tmp);
L
Linus Torvalds 已提交
252

A
Avi Kivity 已提交
253
	p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
254

255
	if (IS_ERR(p) || cpu_online(cpu)) {
L
Linus Torvalds 已提交
256
		/* CPU didn't die: tell everyone.  Can't complain. */
257
		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
258
					    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
259 260
			BUG();

261 262 263 264
		if (IS_ERR(p)) {
			err = PTR_ERR(p);
			goto out_allowed;
		}
L
Linus Torvalds 已提交
265
		goto out_thread;
266
	}
L
Linus Torvalds 已提交
267 268 269 270 271 272 273 274 275

	/* Wait for it to sleep (leaving idle task). */
	while (!idle_cpu(cpu))
		yield();

	/* This actually kills the CPU. */
	__cpu_die(cpu);

	/* CPU is completely dead: tell everyone.  Too late to complain. */
276 277
	if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
				    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
278 279 280 281 282 283 284
		BUG();

	check_for_tasks(cpu);

out_thread:
	err = kthread_stop(p);
out_allowed:
285
	set_cpus_allowed_ptr(current, &old_allowed);
286
out_release:
287
	cpu_hotplug_done();
288 289 290 291 292
	if (!err) {
		if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
					    hcpu) == NOTIFY_BAD)
			BUG();
	}
293 294 295
	return err;
}

296
int __ref cpu_down(unsigned int cpu)
297 298 299
{
	int err = 0;

300
	cpu_maps_update_begin();
301 302

	if (cpu_hotplug_disabled) {
303
		err = -EBUSY;
304 305 306 307 308
		goto out;
	}

	cpu_clear(cpu, cpu_active_map);

309 310 311 312 313 314 315 316 317
	/*
	 * Make sure the all cpus did the reschedule and are not
	 * using stale version of the cpu_active_map.
	 * This is not strictly necessary becuase stop_machine()
	 * that we run down the line already provides the required
	 * synchronization. But it's really a side effect and we do not
	 * want to depend on the innards of the stop_machine here.
	 */
	synchronize_sched();
318

319
	err = _cpu_down(cpu, 0);
320

321 322 323 324
	if (cpu_online(cpu))
		cpu_set(cpu, cpu_active_map);

out:
325
	cpu_maps_update_done();
L
Linus Torvalds 已提交
326 327
	return err;
}
328
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
329 330
#endif /*CONFIG_HOTPLUG_CPU*/

331
/* Requires cpu_add_remove_lock to be held */
332
static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
333
{
334
	int ret, nr_calls = 0;
L
Linus Torvalds 已提交
335
	void *hcpu = (void *)(long)cpu;
336
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
L
Linus Torvalds 已提交
337

338 339
	if (cpu_online(cpu) || !cpu_present(cpu))
		return -EINVAL;
340

341
	cpu_hotplug_begin();
342
	ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
343
							-1, &nr_calls);
L
Linus Torvalds 已提交
344
	if (ret == NOTIFY_BAD) {
345
		nr_calls--;
L
Linus Torvalds 已提交
346
		printk("%s: attempt to bring up CPU %u failed\n",
347
				__func__, cpu);
L
Linus Torvalds 已提交
348 349 350 351 352 353 354 355
		ret = -EINVAL;
		goto out_notify;
	}

	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu);
	if (ret != 0)
		goto out_notify;
356
	BUG_ON(!cpu_online(cpu));
L
Linus Torvalds 已提交
357 358

	/* Now call notifier in preparation. */
359
	raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
L
Linus Torvalds 已提交
360 361 362

out_notify:
	if (ret != 0)
363
		__raw_notifier_call_chain(&cpu_chain,
364
				CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
365
	cpu_hotplug_done();
366 367 368 369

	return ret;
}

370
int __cpuinit cpu_up(unsigned int cpu)
371 372
{
	int err = 0;
373 374 375 376 377 378 379 380 381
	if (!cpu_isset(cpu, cpu_possible_map)) {
		printk(KERN_ERR "can't online cpu %d because it is not "
			"configured as may-hotadd at boot time\n", cpu);
#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
		printk(KERN_ERR "please check additional_cpus= boot "
				"parameter\n");
#endif
		return -EINVAL;
	}
382

383
	cpu_maps_update_begin();
384 385

	if (cpu_hotplug_disabled) {
386
		err = -EBUSY;
387 388 389 390 391 392 393
		goto out;
	}

	err = _cpu_up(cpu, 0);

	if (cpu_online(cpu))
		cpu_set(cpu, cpu_active_map);
394

395
out:
396
	cpu_maps_update_done();
397 398 399
	return err;
}

400
#ifdef CONFIG_PM_SLEEP_SMP
401 402 403 404
static cpumask_t frozen_cpus;

int disable_nonboot_cpus(void)
{
405
	int cpu, first_cpu, error = 0;
406

407
	cpu_maps_update_begin();
408
	first_cpu = first_cpu(cpu_online_map);
409 410 411 412 413 414 415 416
	/* We take down all of the non-boot CPUs in one shot to avoid races
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
	cpus_clear(frozen_cpus);
	printk("Disabling non-boot CPUs ...\n");
	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;
417
		error = _cpu_down(cpu, 1);
418 419 420 421 422 423 424 425 426 427 428 429 430 431
		if (!error) {
			cpu_set(cpu, frozen_cpus);
			printk("CPU%d is down\n", cpu);
		} else {
			printk(KERN_ERR "Error taking CPU%d down: %d\n",
				cpu, error);
			break;
		}
	}
	if (!error) {
		BUG_ON(num_online_cpus() > 1);
		/* Make sure the CPUs won't be enabled by someone else */
		cpu_hotplug_disabled = 1;
	} else {
432
		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
433
	}
434
	cpu_maps_update_done();
435 436 437
	return error;
}

438
void __ref enable_nonboot_cpus(void)
439 440 441 442
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
443
	cpu_maps_update_begin();
444
	cpu_hotplug_disabled = 0;
445
	if (cpus_empty(frozen_cpus))
446
		goto out;
447 448

	printk("Enabling non-boot CPUs ...\n");
449
	for_each_cpu_mask_nr(cpu, frozen_cpus) {
450
		error = _cpu_up(cpu, 1);
451 452 453 454
		if (!error) {
			printk("CPU%d is up\n", cpu);
			continue;
		}
455
		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
456 457
	}
	cpus_clear(frozen_cpus);
458
out:
459
	cpu_maps_update_done();
L
Linus Torvalds 已提交
460
}
461
#endif /* CONFIG_PM_SLEEP_SMP */
462 463

#endif /* CONFIG_SMP */
464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510

/* 64 bits of zeros, for initializers. */
#if BITS_PER_LONG == 32
#define Z64 0, 0
#else
#define Z64 0
#endif

/* Initializer macros. */
#define CMI0(n) { .bits = { 1UL << (n) } }
#define CMI(n, ...) { .bits = { __VA_ARGS__, 1UL << ((n) % BITS_PER_LONG) } }

#define CMI8(n, ...)						\
	CMI((n), __VA_ARGS__), CMI((n)+1, __VA_ARGS__),		\
	CMI((n)+2, __VA_ARGS__), CMI((n)+3, __VA_ARGS__),	\
	CMI((n)+4, __VA_ARGS__), CMI((n)+5, __VA_ARGS__),	\
	CMI((n)+6, __VA_ARGS__), CMI((n)+7, __VA_ARGS__)

#if BITS_PER_LONG == 32
#define CMI64(n, ...)							\
	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
	CMI8((n)+32, 0, __VA_ARGS__), CMI8((n)+40, 0, __VA_ARGS__),	\
	CMI8((n)+48, 0, __VA_ARGS__), CMI8((n)+56, 0, __VA_ARGS__)
#else
#define CMI64(n, ...)							\
	CMI8((n), __VA_ARGS__), CMI8((n)+8, __VA_ARGS__),		\
	CMI8((n)+16, __VA_ARGS__), CMI8((n)+24, __VA_ARGS__),		\
	CMI8((n)+32, __VA_ARGS__), CMI8((n)+40, __VA_ARGS__),	\
	CMI8((n)+48, __VA_ARGS__), CMI8((n)+56, __VA_ARGS__)
#endif

#define CMI256(n, ...)							\
	CMI64((n), __VA_ARGS__), CMI64((n)+64, Z64, __VA_ARGS__),	\
	CMI64((n)+128, Z64, Z64, __VA_ARGS__),				\
	CMI64((n)+192, Z64, Z64, Z64, __VA_ARGS__)
#define Z256 Z64, Z64, Z64, Z64

#define CMI1024(n, ...)					\
	CMI256((n), __VA_ARGS__),			\
	CMI256((n)+256, Z256, __VA_ARGS__),		\
	CMI256((n)+512, Z256, Z256, __VA_ARGS__),	\
	CMI256((n)+768, Z256, Z256, Z256, __VA_ARGS__)
#define Z1024 Z256, Z256, Z256, Z256

/* We want this statically initialized, just to be safe.  We try not
 * to waste too much space, either. */
511 512 513 514 515
static const cpumask_t cpumask_map[]
#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
__initdata
#endif
= {
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
	CMI0(0), CMI0(1), CMI0(2), CMI0(3),
#if NR_CPUS > 4
	CMI0(4), CMI0(5), CMI0(6), CMI0(7),
#endif
#if NR_CPUS > 8
	CMI0(8), CMI0(9), CMI0(10), CMI0(11),
	CMI0(12), CMI0(13), CMI0(14), CMI0(15),
#endif
#if NR_CPUS > 16
	CMI0(16), CMI0(17), CMI0(18), CMI0(19),
	CMI0(20), CMI0(21), CMI0(22), CMI0(23),
	CMI0(24), CMI0(25), CMI0(26), CMI0(27),
	CMI0(28), CMI0(29), CMI0(30), CMI0(31),
#endif
#if NR_CPUS > 32
#if BITS_PER_LONG == 32
	CMI(32, 0), CMI(33, 0), CMI(34, 0), CMI(35, 0),
	CMI(36, 0), CMI(37, 0), CMI(38, 0), CMI(39, 0),
	CMI(40, 0), CMI(41, 0), CMI(42, 0), CMI(43, 0),
	CMI(44, 0), CMI(45, 0), CMI(46, 0), CMI(47, 0),
	CMI(48, 0), CMI(49, 0), CMI(50, 0), CMI(51, 0),
	CMI(52, 0), CMI(53, 0), CMI(54, 0), CMI(55, 0),
	CMI(56, 0), CMI(57, 0), CMI(58, 0), CMI(59, 0),
	CMI(60, 0), CMI(61, 0), CMI(62, 0), CMI(63, 0),
#else
	CMI0(32), CMI0(33), CMI0(34), CMI0(35),
	CMI0(36), CMI0(37), CMI0(38), CMI0(39),
	CMI0(40), CMI0(41), CMI0(42), CMI0(43),
	CMI0(44), CMI0(45), CMI0(46), CMI0(47),
	CMI0(48), CMI0(49), CMI0(50), CMI0(51),
	CMI0(52), CMI0(53), CMI0(54), CMI0(55),
	CMI0(56), CMI0(57), CMI0(58), CMI0(59),
	CMI0(60), CMI0(61), CMI0(62), CMI0(63),
#endif /* BITS_PER_LONG == 64 */
#endif
#if NR_CPUS > 64
	CMI64(64, Z64),
#endif
#if NR_CPUS > 128
	CMI64(128, Z64, Z64), CMI64(192, Z64, Z64, Z64),
#endif
#if NR_CPUS > 256
	CMI256(256, Z256),
#endif
#if NR_CPUS > 512
	CMI256(512, Z256, Z256), CMI256(768, Z256, Z256, Z256),
#endif
#if NR_CPUS > 1024
	CMI1024(1024, Z1024),
#endif
#if NR_CPUS > 2048
	CMI1024(2048, Z1024, Z1024), CMI1024(3072, Z1024, Z1024, Z1024),
#endif
#if NR_CPUS > 4096
#error NR_CPUS too big.  Fix initializers or set CONFIG_HAVE_CPUMASK_OF_CPU_MAP
#endif
};

const cpumask_t *cpumask_of_cpu_map = cpumask_map;