cpu.c 13.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
16
#include <linux/mutex.h>
L
Linus Torvalds 已提交
17

18
#ifdef CONFIG_SMP
19
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
20
static DEFINE_MUTEX(cpu_add_remove_lock);
L
Linus Torvalds 已提交
21

22
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
L
Linus Torvalds 已提交
23

24 25 26 27 28
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

29 30 31 32 33 34 35 36 37
static struct {
	struct task_struct *active_writer;
	struct mutex lock; /* Synchronizes accesses to refcount, */
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
	int refcount;
} cpu_hotplug;
38

39 40 41 42 43 44 45 46
void __init cpu_hotplug_init(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_init(&cpu_hotplug.lock);
	cpu_hotplug.refcount = 0;
}

#ifdef CONFIG_HOTPLUG_CPU
47

48
void get_online_cpus(void)
49
{
50 51
	might_sleep();
	if (cpu_hotplug.active_writer == current)
52
		return;
53 54 55 56
	mutex_lock(&cpu_hotplug.lock);
	cpu_hotplug.refcount++;
	mutex_unlock(&cpu_hotplug.lock);

57
}
58
EXPORT_SYMBOL_GPL(get_online_cpus);
59

60
void put_online_cpus(void)
61
{
62
	if (cpu_hotplug.active_writer == current)
63
		return;
64
	mutex_lock(&cpu_hotplug.lock);
65 66
	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
		wake_up_process(cpu_hotplug.active_writer);
67 68
	mutex_unlock(&cpu_hotplug.lock);

69
}
70
EXPORT_SYMBOL_GPL(put_online_cpus);
71 72

#endif	/* CONFIG_HOTPLUG_CPU */
73

74 75
/*
 * The following two API's must be used when attempting
76
 * to serialize the updates to cpu_online_mask, cpu_present_mask.
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}

/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
95 96
 * Since cpu_hotplug_begin() is always called after invoking
 * cpu_maps_update_begin(), we can be sure that only one writer is active.
97 98 99 100 101 102 103 104 105 106
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
107
 * get_online_cpus() not an api which is called all that often.
108 109 110 111 112
 *
 */
static void cpu_hotplug_begin(void)
{
	cpu_hotplug.active_writer = current;
113 114 115 116 117 118

	for (;;) {
		mutex_lock(&cpu_hotplug.lock);
		if (likely(!cpu_hotplug.refcount))
			break;
		__set_current_state(TASK_UNINTERRUPTIBLE);
119 120 121 122 123 124 125 126 127 128
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
	}
}

static void cpu_hotplug_done(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
}
L
Linus Torvalds 已提交
129
/* Need to know about CPUs going up/down? */
130
int __ref register_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
131
{
132
	int ret;
133
	cpu_maps_update_begin();
134
	ret = raw_notifier_chain_register(&cpu_chain, nb);
135
	cpu_maps_update_done();
136
	return ret;
L
Linus Torvalds 已提交
137
}
138 139 140

#ifdef CONFIG_HOTPLUG_CPU

L
Linus Torvalds 已提交
141 142
EXPORT_SYMBOL(register_cpu_notifier);

143
void __ref unregister_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
144
{
145
	cpu_maps_update_begin();
146
	raw_notifier_chain_unregister(&cpu_chain, nb);
147
	cpu_maps_update_done();
L
Linus Torvalds 已提交
148 149 150 151 152 153 154 155 156 157 158 159 160
}
EXPORT_SYMBOL(unregister_cpu_notifier);

static inline void check_for_tasks(int cpu)
{
	struct task_struct *p;

	write_lock_irq(&tasklist_lock);
	for_each_process(p) {
		if (task_cpu(p) == cpu &&
		    (!cputime_eq(p->utime, cputime_zero) ||
		     !cputime_eq(p->stime, cputime_zero)))
			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
161
				(state = %ld, flags = %x) \n",
162 163
				 p->comm, task_pid_nr(p), cpu,
				 p->state, p->flags);
L
Linus Torvalds 已提交
164 165 166 167
	}
	write_unlock_irq(&tasklist_lock);
}

A
Avi Kivity 已提交
168 169 170 171 172
struct take_cpu_down_param {
	unsigned long mod;
	void *hcpu;
};

L
Linus Torvalds 已提交
173
/* Take this CPU down. */
174
static int __ref take_cpu_down(void *_param)
L
Linus Torvalds 已提交
175
{
A
Avi Kivity 已提交
176
	struct take_cpu_down_param *param = _param;
L
Linus Torvalds 已提交
177 178 179 180 181
	int err;

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
182
		return err;
L
Linus Torvalds 已提交
183

184 185 186
	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
				param->hcpu);

Z
Zwane Mwaikambo 已提交
187 188 189 190
	/* Force idle task to run as soon as we yield: it should
	   immediately notice cpu is offline and die quickly. */
	sched_idle_next();
	return 0;
L
Linus Torvalds 已提交
191 192
}

193
/* Requires cpu_add_remove_lock to be held */
194
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
195
{
196
	int err, nr_calls = 0;
R
Rusty Russell 已提交
197
	cpumask_var_t old_allowed;
198
	void *hcpu = (void *)(long)cpu;
199
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
A
Avi Kivity 已提交
200 201 202 203
	struct take_cpu_down_param tcd_param = {
		.mod = mod,
		.hcpu = hcpu,
	};
L
Linus Torvalds 已提交
204

205 206
	if (num_online_cpus() == 1)
		return -EBUSY;
L
Linus Torvalds 已提交
207

208 209
	if (!cpu_online(cpu))
		return -EINVAL;
L
Linus Torvalds 已提交
210

R
Rusty Russell 已提交
211 212 213
	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
		return -ENOMEM;

214
	cpu_hotplug_begin();
215
	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
216
					hcpu, -1, &nr_calls);
L
Linus Torvalds 已提交
217
	if (err == NOTIFY_BAD) {
218
		nr_calls--;
219 220
		__raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
					  hcpu, nr_calls, NULL);
L
Linus Torvalds 已提交
221
		printk("%s: attempt to take down CPU %u failed\n",
222
				__func__, cpu);
223 224
		err = -EINVAL;
		goto out_release;
L
Linus Torvalds 已提交
225 226 227
	}

	/* Ensure that we are not runnable on dying cpu */
R
Rusty Russell 已提交
228 229 230
	cpumask_copy(old_allowed, &current->cpus_allowed);
	set_cpus_allowed_ptr(current,
			     cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
L
Linus Torvalds 已提交
231

R
Rusty Russell 已提交
232
	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
233
	if (err) {
L
Linus Torvalds 已提交
234
		/* CPU didn't die: tell everyone.  Can't complain. */
235
		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
236
					    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
237 238
			BUG();

R
Rusty Russell 已提交
239
		goto out_allowed;
240
	}
241
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
242 243 244 245 246 247 248 249 250

	/* Wait for it to sleep (leaving idle task). */
	while (!idle_cpu(cpu))
		yield();

	/* This actually kills the CPU. */
	__cpu_die(cpu);

	/* CPU is completely dead: tell everyone.  Too late to complain. */
251 252
	if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
				    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
253 254 255 256 257
		BUG();

	check_for_tasks(cpu);

out_allowed:
R
Rusty Russell 已提交
258
	set_cpus_allowed_ptr(current, old_allowed);
259
out_release:
260
	cpu_hotplug_done();
261 262 263 264 265
	if (!err) {
		if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
					    hcpu) == NOTIFY_BAD)
			BUG();
	}
R
Rusty Russell 已提交
266
	free_cpumask_var(old_allowed);
267 268 269
	return err;
}

270
int __ref cpu_down(unsigned int cpu)
271
{
272
	int err;
273

274 275 276
	err = stop_machine_create();
	if (err)
		return err;
277
	cpu_maps_update_begin();
278 279

	if (cpu_hotplug_disabled) {
280
		err = -EBUSY;
281 282 283 284 285
		goto out;
	}

	cpu_clear(cpu, cpu_active_map);

286 287
	/*
	 * Make sure the all cpus did the reschedule and are not
R
Rusty Russell 已提交
288
	 * using stale version of the cpu_active_mask.
289 290 291 292 293 294
	 * This is not strictly necessary becuase stop_machine()
	 * that we run down the line already provides the required
	 * synchronization. But it's really a side effect and we do not
	 * want to depend on the innards of the stop_machine here.
	 */
	synchronize_sched();
295

296
	err = _cpu_down(cpu, 0);
297

298 299 300 301
	if (cpu_online(cpu))
		cpu_set(cpu, cpu_active_map);

out:
302
	cpu_maps_update_done();
303
	stop_machine_destroy();
L
Linus Torvalds 已提交
304 305
	return err;
}
306
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
307 308
#endif /*CONFIG_HOTPLUG_CPU*/

309
/* Requires cpu_add_remove_lock to be held */
310
static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
311
{
312
	int ret, nr_calls = 0;
L
Linus Torvalds 已提交
313
	void *hcpu = (void *)(long)cpu;
314
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
L
Linus Torvalds 已提交
315

316 317
	if (cpu_online(cpu) || !cpu_present(cpu))
		return -EINVAL;
318

319
	cpu_hotplug_begin();
320
	ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
321
							-1, &nr_calls);
L
Linus Torvalds 已提交
322
	if (ret == NOTIFY_BAD) {
323
		nr_calls--;
L
Linus Torvalds 已提交
324
		printk("%s: attempt to bring up CPU %u failed\n",
325
				__func__, cpu);
L
Linus Torvalds 已提交
326 327 328 329 330 331 332 333
		ret = -EINVAL;
		goto out_notify;
	}

	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu);
	if (ret != 0)
		goto out_notify;
334
	BUG_ON(!cpu_online(cpu));
L
Linus Torvalds 已提交
335

336 337
	cpu_set(cpu, cpu_active_map);

L
Linus Torvalds 已提交
338
	/* Now call notifier in preparation. */
339
	raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
L
Linus Torvalds 已提交
340 341 342

out_notify:
	if (ret != 0)
343
		__raw_notifier_call_chain(&cpu_chain,
344
				CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
345
	cpu_hotplug_done();
346 347 348 349

	return ret;
}

350
int __cpuinit cpu_up(unsigned int cpu)
351 352
{
	int err = 0;
R
Rusty Russell 已提交
353
	if (!cpu_possible(cpu)) {
354 355
		printk(KERN_ERR "can't online cpu %d because it is not "
			"configured as may-hotadd at boot time\n", cpu);
356
#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
357 358 359 360 361
		printk(KERN_ERR "please check additional_cpus= boot "
				"parameter\n");
#endif
		return -EINVAL;
	}
362

363
	cpu_maps_update_begin();
364 365

	if (cpu_hotplug_disabled) {
366
		err = -EBUSY;
367 368 369 370 371 372
		goto out;
	}

	err = _cpu_up(cpu, 0);

out:
373
	cpu_maps_update_done();
374 375 376
	return err;
}

377
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
378
static cpumask_var_t frozen_cpus;
379 380 381

int disable_nonboot_cpus(void)
{
382
	int cpu, first_cpu, error = 0;
383

384
	cpu_maps_update_begin();
R
Rusty Russell 已提交
385
	first_cpu = cpumask_first(cpu_online_mask);
386 387 388
	/* We take down all of the non-boot CPUs in one shot to avoid races
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
389
	cpumask_clear(frozen_cpus);
390 391 392 393
	printk("Disabling non-boot CPUs ...\n");
	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;
394
		error = _cpu_down(cpu, 1);
395
		if (!error) {
R
Rusty Russell 已提交
396
			cpumask_set_cpu(cpu, frozen_cpus);
397 398 399 400 401 402 403 404 405 406 407 408
			printk("CPU%d is down\n", cpu);
		} else {
			printk(KERN_ERR "Error taking CPU%d down: %d\n",
				cpu, error);
			break;
		}
	}
	if (!error) {
		BUG_ON(num_online_cpus() > 1);
		/* Make sure the CPUs won't be enabled by someone else */
		cpu_hotplug_disabled = 1;
	} else {
409
		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
410
	}
411
	cpu_maps_update_done();
412 413 414
	return error;
}

415
void __ref enable_nonboot_cpus(void)
416 417 418 419
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
420
	cpu_maps_update_begin();
421
	cpu_hotplug_disabled = 0;
R
Rusty Russell 已提交
422
	if (cpumask_empty(frozen_cpus))
423
		goto out;
424 425

	printk("Enabling non-boot CPUs ...\n");
R
Rusty Russell 已提交
426
	for_each_cpu(cpu, frozen_cpus) {
427
		error = _cpu_up(cpu, 1);
428 429 430 431
		if (!error) {
			printk("CPU%d is up\n", cpu);
			continue;
		}
432
		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
433
	}
R
Rusty Russell 已提交
434
	cpumask_clear(frozen_cpus);
435
out:
436
	cpu_maps_update_done();
L
Linus Torvalds 已提交
437
}
R
Rusty Russell 已提交
438 439 440 441 442 443 444 445

static int alloc_frozen_cpus(void)
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
446
#endif /* CONFIG_PM_SLEEP_SMP */
447

448 449 450 451 452 453 454 455
/**
 * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
 * @cpu: cpu that just started
 *
 * This function calls the cpu_chain notifiers with CPU_STARTING.
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
A
Al Viro 已提交
456
void __cpuinit notify_cpu_starting(unsigned int cpu)
457 458 459 460
{
	unsigned long val = CPU_STARTING;

#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
461
	if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
462 463 464 465 466
		val = CPU_STARTING_FROZEN;
#endif /* CONFIG_PM_SLEEP_SMP */
	raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
}

467
#endif /* CONFIG_SMP */
468

469 470 471 472
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
473
 * It is used by cpumask_of() to get a constant address to a CPU
474 475
 * mask value that has a single bit set only.
 */
476

477 478 479 480 481
/* cpu_bit_bitmap[0] is empty - so we can back into it */
#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
482

483 484 485 486 487 488 489
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
490 491
#endif
};
492
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
493 494 495

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516

#ifdef CONFIG_INIT_ALL_POSSIBLE
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
	= CPU_BITS_ALL;
#else
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
#endif
const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
EXPORT_SYMBOL(cpu_possible_mask);

static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
EXPORT_SYMBOL(cpu_online_mask);

static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
EXPORT_SYMBOL(cpu_present_mask);

static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
EXPORT_SYMBOL(cpu_active_mask);
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563

void set_cpu_possible(unsigned int cpu, bool possible)
{
	if (possible)
		cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
}

void set_cpu_present(unsigned int cpu, bool present)
{
	if (present)
		cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
}

void set_cpu_online(unsigned int cpu, bool online)
{
	if (online)
		cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
}

void set_cpu_active(unsigned int cpu, bool active)
{
	if (active)
		cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
}

void init_cpu_present(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_present_bits), src);
}

void init_cpu_possible(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_possible_bits), src);
}

void init_cpu_online(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_online_bits), src);
}