cpu.c 13.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
16
#include <linux/mutex.h>
17
#include <linux/gfp.h>
L
Linus Torvalds 已提交
18

19
#ifdef CONFIG_SMP
20
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
21
static DEFINE_MUTEX(cpu_add_remove_lock);
L
Linus Torvalds 已提交
22

23 24 25 26 27 28 29 30 31 32 33 34 35 36
/*
 * The following two API's must be used when attempting
 * to serialize the updates to cpu_online_mask, cpu_present_mask.
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}

37
static RAW_NOTIFIER_HEAD(cpu_chain);
L
Linus Torvalds 已提交
38

39 40 41 42 43
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

44 45
#ifdef CONFIG_HOTPLUG_CPU

46 47 48 49 50 51 52 53
static struct {
	struct task_struct *active_writer;
	struct mutex lock; /* Synchronizes accesses to refcount, */
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
	int refcount;
54 55 56 57 58
} cpu_hotplug = {
	.active_writer = NULL,
	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
	.refcount = 0,
};
59

60
void get_online_cpus(void)
61
{
62 63
	might_sleep();
	if (cpu_hotplug.active_writer == current)
64
		return;
65 66 67 68
	mutex_lock(&cpu_hotplug.lock);
	cpu_hotplug.refcount++;
	mutex_unlock(&cpu_hotplug.lock);

69
}
70
EXPORT_SYMBOL_GPL(get_online_cpus);
71

72
void put_online_cpus(void)
73
{
74
	if (cpu_hotplug.active_writer == current)
75
		return;
76
	mutex_lock(&cpu_hotplug.lock);
77 78
	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
		wake_up_process(cpu_hotplug.active_writer);
79 80
	mutex_unlock(&cpu_hotplug.lock);

81
}
82
EXPORT_SYMBOL_GPL(put_online_cpus);
83

84 85 86 87 88 89 90
/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
91 92
 * Since cpu_hotplug_begin() is always called after invoking
 * cpu_maps_update_begin(), we can be sure that only one writer is active.
93 94 95 96 97 98 99 100 101 102
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
103
 * get_online_cpus() not an api which is called all that often.
104 105 106 107 108
 *
 */
static void cpu_hotplug_begin(void)
{
	cpu_hotplug.active_writer = current;
109 110 111 112 113 114

	for (;;) {
		mutex_lock(&cpu_hotplug.lock);
		if (likely(!cpu_hotplug.refcount))
			break;
		__set_current_state(TASK_UNINTERRUPTIBLE);
115 116 117 118 119 120 121 122 123 124
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
	}
}

static void cpu_hotplug_done(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
}
125 126 127 128 129 130

#else /* #if CONFIG_HOTPLUG_CPU */
static void cpu_hotplug_begin(void) {}
static void cpu_hotplug_done(void) {}
#endif	/* #esle #if CONFIG_HOTPLUG_CPU */

L
Linus Torvalds 已提交
131
/* Need to know about CPUs going up/down? */
132
int __ref register_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
133
{
134
	int ret;
135
	cpu_maps_update_begin();
136
	ret = raw_notifier_chain_register(&cpu_chain, nb);
137
	cpu_maps_update_done();
138
	return ret;
L
Linus Torvalds 已提交
139
}
140

141 142 143
static int __cpu_notify(unsigned long val, void *v, int nr_to_call,
			int *nr_calls)
{
144 145 146
	int ret;

	ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call,
147
					nr_calls);
148 149

	return notifier_to_errno(ret);
150 151 152 153 154 155 156
}

static int cpu_notify(unsigned long val, void *v)
{
	return __cpu_notify(val, v, -1, NULL);
}

157 158
#ifdef CONFIG_HOTPLUG_CPU

159 160
static void cpu_notify_nofail(unsigned long val, void *v)
{
161
	BUG_ON(cpu_notify(val, v));
162 163
}

L
Linus Torvalds 已提交
164 165
EXPORT_SYMBOL(register_cpu_notifier);

166
void __ref unregister_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
167
{
168
	cpu_maps_update_begin();
169
	raw_notifier_chain_unregister(&cpu_chain, nb);
170
	cpu_maps_update_done();
L
Linus Torvalds 已提交
171 172 173 174 175 176 177 178 179
}
EXPORT_SYMBOL(unregister_cpu_notifier);

static inline void check_for_tasks(int cpu)
{
	struct task_struct *p;

	write_lock_irq(&tasklist_lock);
	for_each_process(p) {
180
		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
L
Linus Torvalds 已提交
181 182
		    (!cputime_eq(p->utime, cputime_zero) ||
		     !cputime_eq(p->stime, cputime_zero)))
183 184 185 186
			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d "
				"(state = %ld, flags = %x)\n",
				p->comm, task_pid_nr(p), cpu,
				p->state, p->flags);
L
Linus Torvalds 已提交
187 188 189 190
	}
	write_unlock_irq(&tasklist_lock);
}

A
Avi Kivity 已提交
191 192 193 194 195
struct take_cpu_down_param {
	unsigned long mod;
	void *hcpu;
};

L
Linus Torvalds 已提交
196
/* Take this CPU down. */
197
static int __ref take_cpu_down(void *_param)
L
Linus Torvalds 已提交
198
{
A
Avi Kivity 已提交
199
	struct take_cpu_down_param *param = _param;
200
	unsigned int cpu = (unsigned long)param->hcpu;
L
Linus Torvalds 已提交
201 202 203 204 205
	int err;

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
206
		return err;
L
Linus Torvalds 已提交
207

208
	cpu_notify(CPU_DYING | param->mod, param->hcpu);
209

Z
Zwane Mwaikambo 已提交
210
	return 0;
L
Linus Torvalds 已提交
211 212
}

213
/* Requires cpu_add_remove_lock to be held */
214
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
215
{
216 217
	int err, nr_calls = 0;
	void *hcpu = (void *)(long)cpu;
218
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
A
Avi Kivity 已提交
219 220 221 222
	struct take_cpu_down_param tcd_param = {
		.mod = mod,
		.hcpu = hcpu,
	};
L
Linus Torvalds 已提交
223

224 225
	if (num_online_cpus() == 1)
		return -EBUSY;
L
Linus Torvalds 已提交
226

227 228
	if (!cpu_online(cpu))
		return -EINVAL;
L
Linus Torvalds 已提交
229

230
	cpu_hotplug_begin();
231
	err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
232
	if (err) {
233
		nr_calls--;
234
		__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
L
Linus Torvalds 已提交
235
		printk("%s: attempt to take down CPU %u failed\n",
236
				__func__, cpu);
237
		goto out_release;
L
Linus Torvalds 已提交
238 239
	}

R
Rusty Russell 已提交
240
	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
241
	if (err) {
L
Linus Torvalds 已提交
242
		/* CPU didn't die: tell everyone.  Can't complain. */
243
		cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
L
Linus Torvalds 已提交
244

245
		goto out_release;
246
	}
247
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
248

249 250 251 252 253 254
	/*
	 * The migration_call() CPU_DYING callback will have removed all
	 * runnable tasks from the cpu, there's only the idle task left now
	 * that the migration thread is done doing the stop_machine thing.
	 */
	BUG_ON(!idle_cpu(cpu));
L
Linus Torvalds 已提交
255 256 257 258 259

	/* This actually kills the CPU. */
	__cpu_die(cpu);

	/* CPU is completely dead: tell everyone.  Too late to complain. */
260
	cpu_notify_nofail(CPU_DEAD | mod, hcpu);
L
Linus Torvalds 已提交
261 262 263

	check_for_tasks(cpu);

264
out_release:
265
	cpu_hotplug_done();
266 267
	if (!err)
		cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu);
268 269 270
	return err;
}

271
int __ref cpu_down(unsigned int cpu)
272
{
273
	int err;
274

275
	cpu_maps_update_begin();
276 277

	if (cpu_hotplug_disabled) {
278
		err = -EBUSY;
279 280 281 282
		goto out;
	}

	err = _cpu_down(cpu, 0);
283

284
out:
285
	cpu_maps_update_done();
L
Linus Torvalds 已提交
286 287
	return err;
}
288
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
289 290
#endif /*CONFIG_HOTPLUG_CPU*/

291
/* Requires cpu_add_remove_lock to be held */
292
static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
293
{
294
	int ret, nr_calls = 0;
L
Linus Torvalds 已提交
295
	void *hcpu = (void *)(long)cpu;
296
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
L
Linus Torvalds 已提交
297

298 299
	if (cpu_online(cpu) || !cpu_present(cpu))
		return -EINVAL;
300

301
	cpu_hotplug_begin();
302
	ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls);
303
	if (ret) {
304
		nr_calls--;
L
Linus Torvalds 已提交
305
		printk("%s: attempt to bring up CPU %u failed\n",
306
				__func__, cpu);
L
Linus Torvalds 已提交
307 308 309 310 311 312 313
		goto out_notify;
	}

	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu);
	if (ret != 0)
		goto out_notify;
314
	BUG_ON(!cpu_online(cpu));
L
Linus Torvalds 已提交
315 316

	/* Now call notifier in preparation. */
317
	cpu_notify(CPU_ONLINE | mod, hcpu);
L
Linus Torvalds 已提交
318 319 320

out_notify:
	if (ret != 0)
321
		__cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
322
	cpu_hotplug_done();
323 324 325 326

	return ret;
}

327
int __cpuinit cpu_up(unsigned int cpu)
328 329
{
	int err = 0;
330 331 332 333 334 335

#ifdef	CONFIG_MEMORY_HOTPLUG
	int nid;
	pg_data_t	*pgdat;
#endif

R
Rusty Russell 已提交
336
	if (!cpu_possible(cpu)) {
337 338
		printk(KERN_ERR "can't online cpu %d because it is not "
			"configured as may-hotadd at boot time\n", cpu);
339
#if defined(CONFIG_IA64)
340 341 342 343 344
		printk(KERN_ERR "please check additional_cpus= boot "
				"parameter\n");
#endif
		return -EINVAL;
	}
345

346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
#ifdef	CONFIG_MEMORY_HOTPLUG
	nid = cpu_to_node(cpu);
	if (!node_online(nid)) {
		err = mem_online_node(nid);
		if (err)
			return err;
	}

	pgdat = NODE_DATA(nid);
	if (!pgdat) {
		printk(KERN_ERR
			"Can't online cpu %d due to NULL pgdat\n", cpu);
		return -ENOMEM;
	}

361 362
	if (pgdat->node_zonelists->_zonerefs->zone == NULL) {
		mutex_lock(&zonelists_mutex);
363
		build_all_zonelists(NULL);
364 365
		mutex_unlock(&zonelists_mutex);
	}
366 367
#endif

368
	cpu_maps_update_begin();
369 370

	if (cpu_hotplug_disabled) {
371
		err = -EBUSY;
372 373 374 375 376 377
		goto out;
	}

	err = _cpu_up(cpu, 0);

out:
378
	cpu_maps_update_done();
379 380 381
	return err;
}

382
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
383
static cpumask_var_t frozen_cpus;
384 385 386

int disable_nonboot_cpus(void)
{
387
	int cpu, first_cpu, error = 0;
388

389
	cpu_maps_update_begin();
R
Rusty Russell 已提交
390
	first_cpu = cpumask_first(cpu_online_mask);
391 392
	/*
	 * We take down all of the non-boot CPUs in one shot to avoid races
393 394
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
395
	cpumask_clear(frozen_cpus);
396

397 398 399 400
	printk("Disabling non-boot CPUs ...\n");
	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;
401
		error = _cpu_down(cpu, 1);
402
		if (!error)
R
Rusty Russell 已提交
403
			cpumask_set_cpu(cpu, frozen_cpus);
404
		else {
405 406 407 408 409
			printk(KERN_ERR "Error taking CPU%d down: %d\n",
				cpu, error);
			break;
		}
	}
410

411 412 413 414 415
	if (!error) {
		BUG_ON(num_online_cpus() > 1);
		/* Make sure the CPUs won't be enabled by someone else */
		cpu_hotplug_disabled = 1;
	} else {
416
		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
417
	}
418
	cpu_maps_update_done();
419 420 421
	return error;
}

422 423 424 425 426 427 428 429
void __weak arch_enable_nonboot_cpus_begin(void)
{
}

void __weak arch_enable_nonboot_cpus_end(void)
{
}

430
void __ref enable_nonboot_cpus(void)
431 432 433 434
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
435
	cpu_maps_update_begin();
436
	cpu_hotplug_disabled = 0;
R
Rusty Russell 已提交
437
	if (cpumask_empty(frozen_cpus))
438
		goto out;
439 440

	printk("Enabling non-boot CPUs ...\n");
441 442 443

	arch_enable_nonboot_cpus_begin();

R
Rusty Russell 已提交
444
	for_each_cpu(cpu, frozen_cpus) {
445
		error = _cpu_up(cpu, 1);
446 447 448 449
		if (!error) {
			printk("CPU%d is up\n", cpu);
			continue;
		}
450
		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
451
	}
452 453 454

	arch_enable_nonboot_cpus_end();

R
Rusty Russell 已提交
455
	cpumask_clear(frozen_cpus);
456
out:
457
	cpu_maps_update_done();
L
Linus Torvalds 已提交
458
}
R
Rusty Russell 已提交
459 460 461 462 463 464 465 466

static int alloc_frozen_cpus(void)
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
467
#endif /* CONFIG_PM_SLEEP_SMP */
468

469 470 471 472 473 474 475 476
/**
 * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
 * @cpu: cpu that just started
 *
 * This function calls the cpu_chain notifiers with CPU_STARTING.
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
A
Al Viro 已提交
477
void __cpuinit notify_cpu_starting(unsigned int cpu)
478 479 480 481
{
	unsigned long val = CPU_STARTING;

#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
482
	if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
483 484
		val = CPU_STARTING_FROZEN;
#endif /* CONFIG_PM_SLEEP_SMP */
485
	cpu_notify(val, (void *)(long)cpu);
486 487
}

488
#endif /* CONFIG_SMP */
489

490 491 492 493
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
494
 * It is used by cpumask_of() to get a constant address to a CPU
495 496
 * mask value that has a single bit set only.
 */
497

498 499 500 501 502
/* cpu_bit_bitmap[0] is empty - so we can back into it */
#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
503

504 505 506 507 508 509 510
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
511 512
#endif
};
513
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
514 515 516

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537

#ifdef CONFIG_INIT_ALL_POSSIBLE
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
	= CPU_BITS_ALL;
#else
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
#endif
const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
EXPORT_SYMBOL(cpu_possible_mask);

static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
EXPORT_SYMBOL(cpu_online_mask);

static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
EXPORT_SYMBOL(cpu_present_mask);

static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
EXPORT_SYMBOL(cpu_active_mask);
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584

void set_cpu_possible(unsigned int cpu, bool possible)
{
	if (possible)
		cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
}

void set_cpu_present(unsigned int cpu, bool present)
{
	if (present)
		cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
}

void set_cpu_online(unsigned int cpu, bool online)
{
	if (online)
		cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
}

void set_cpu_active(unsigned int cpu, bool active)
{
	if (active)
		cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
}

void init_cpu_present(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_present_bits), src);
}

void init_cpu_possible(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_possible_bits), src);
}

void init_cpu_online(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_online_bits), src);
}