cpu.c 13.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
16
#include <linux/mutex.h>
17
#include <asm/tboot.h>
L
Linus Torvalds 已提交
18

19
#ifdef CONFIG_SMP
20
/* Serializes the updates to cpu_online_mask, cpu_present_mask */
21
static DEFINE_MUTEX(cpu_add_remove_lock);
L
Linus Torvalds 已提交
22

23
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
L
Linus Torvalds 已提交
24

25 26 27 28 29
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

30 31 32 33 34 35 36 37
static struct {
	struct task_struct *active_writer;
	struct mutex lock; /* Synchronizes accesses to refcount, */
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
	int refcount;
38 39 40 41 42
} cpu_hotplug = {
	.active_writer = NULL,
	.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
	.refcount = 0,
};
43 44

#ifdef CONFIG_HOTPLUG_CPU
45

46
void get_online_cpus(void)
47
{
48 49
	might_sleep();
	if (cpu_hotplug.active_writer == current)
50
		return;
51 52 53 54
	mutex_lock(&cpu_hotplug.lock);
	cpu_hotplug.refcount++;
	mutex_unlock(&cpu_hotplug.lock);

55
}
56
EXPORT_SYMBOL_GPL(get_online_cpus);
57

58
void put_online_cpus(void)
59
{
60
	if (cpu_hotplug.active_writer == current)
61
		return;
62
	mutex_lock(&cpu_hotplug.lock);
63 64
	if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer))
		wake_up_process(cpu_hotplug.active_writer);
65 66
	mutex_unlock(&cpu_hotplug.lock);

67
}
68
EXPORT_SYMBOL_GPL(put_online_cpus);
69 70

#endif	/* CONFIG_HOTPLUG_CPU */
71

72 73
/*
 * The following two API's must be used when attempting
74
 * to serialize the updates to cpu_online_mask, cpu_present_mask.
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}

/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
93 94
 * Since cpu_hotplug_begin() is always called after invoking
 * cpu_maps_update_begin(), we can be sure that only one writer is active.
95 96 97 98 99 100 101 102 103 104
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
105
 * get_online_cpus() not an api which is called all that often.
106 107 108 109 110
 *
 */
static void cpu_hotplug_begin(void)
{
	cpu_hotplug.active_writer = current;
111 112 113 114 115 116

	for (;;) {
		mutex_lock(&cpu_hotplug.lock);
		if (likely(!cpu_hotplug.refcount))
			break;
		__set_current_state(TASK_UNINTERRUPTIBLE);
117 118 119 120 121 122 123 124 125 126
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
	}
}

static void cpu_hotplug_done(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
}
L
Linus Torvalds 已提交
127
/* Need to know about CPUs going up/down? */
128
int __ref register_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
129
{
130
	int ret;
131
	cpu_maps_update_begin();
132
	ret = raw_notifier_chain_register(&cpu_chain, nb);
133
	cpu_maps_update_done();
134
	return ret;
L
Linus Torvalds 已提交
135
}
136 137 138

#ifdef CONFIG_HOTPLUG_CPU

L
Linus Torvalds 已提交
139 140
EXPORT_SYMBOL(register_cpu_notifier);

141
void __ref unregister_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
142
{
143
	cpu_maps_update_begin();
144
	raw_notifier_chain_unregister(&cpu_chain, nb);
145
	cpu_maps_update_done();
L
Linus Torvalds 已提交
146 147 148 149 150 151 152 153 154 155 156 157 158
}
EXPORT_SYMBOL(unregister_cpu_notifier);

static inline void check_for_tasks(int cpu)
{
	struct task_struct *p;

	write_lock_irq(&tasklist_lock);
	for_each_process(p) {
		if (task_cpu(p) == cpu &&
		    (!cputime_eq(p->utime, cputime_zero) ||
		     !cputime_eq(p->stime, cputime_zero)))
			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
159
				(state = %ld, flags = %x) \n",
160 161
				 p->comm, task_pid_nr(p), cpu,
				 p->state, p->flags);
L
Linus Torvalds 已提交
162 163 164 165
	}
	write_unlock_irq(&tasklist_lock);
}

A
Avi Kivity 已提交
166 167 168 169 170
struct take_cpu_down_param {
	unsigned long mod;
	void *hcpu;
};

L
Linus Torvalds 已提交
171
/* Take this CPU down. */
172
static int __ref take_cpu_down(void *_param)
L
Linus Torvalds 已提交
173
{
A
Avi Kivity 已提交
174
	struct take_cpu_down_param *param = _param;
L
Linus Torvalds 已提交
175 176 177 178 179
	int err;

	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
180
		return err;
L
Linus Torvalds 已提交
181

182 183 184
	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
				param->hcpu);

Z
Zwane Mwaikambo 已提交
185 186 187 188
	/* Force idle task to run as soon as we yield: it should
	   immediately notice cpu is offline and die quickly. */
	sched_idle_next();
	return 0;
L
Linus Torvalds 已提交
189 190
}

191
/* Requires cpu_add_remove_lock to be held */
192
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
193
{
194
	int err, nr_calls = 0;
R
Rusty Russell 已提交
195
	cpumask_var_t old_allowed;
196
	void *hcpu = (void *)(long)cpu;
197
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
A
Avi Kivity 已提交
198 199 200 201
	struct take_cpu_down_param tcd_param = {
		.mod = mod,
		.hcpu = hcpu,
	};
L
Linus Torvalds 已提交
202

203 204
	if (num_online_cpus() == 1)
		return -EBUSY;
L
Linus Torvalds 已提交
205

206 207
	if (!cpu_online(cpu))
		return -EINVAL;
L
Linus Torvalds 已提交
208

R
Rusty Russell 已提交
209 210 211
	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
		return -ENOMEM;

212
	cpu_hotplug_begin();
213
	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
214
					hcpu, -1, &nr_calls);
L
Linus Torvalds 已提交
215
	if (err == NOTIFY_BAD) {
216
		nr_calls--;
217 218
		__raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
					  hcpu, nr_calls, NULL);
L
Linus Torvalds 已提交
219
		printk("%s: attempt to take down CPU %u failed\n",
220
				__func__, cpu);
221 222
		err = -EINVAL;
		goto out_release;
L
Linus Torvalds 已提交
223 224 225
	}

	/* Ensure that we are not runnable on dying cpu */
R
Rusty Russell 已提交
226 227 228
	cpumask_copy(old_allowed, &current->cpus_allowed);
	set_cpus_allowed_ptr(current,
			     cpumask_of(cpumask_any_but(cpu_online_mask, cpu)));
L
Linus Torvalds 已提交
229

R
Rusty Russell 已提交
230
	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
231
	if (err) {
L
Linus Torvalds 已提交
232
		/* CPU didn't die: tell everyone.  Can't complain. */
233
		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
234
					    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
235 236
			BUG();

R
Rusty Russell 已提交
237
		goto out_allowed;
238
	}
239
	BUG_ON(cpu_online(cpu));
L
Linus Torvalds 已提交
240 241 242 243 244 245 246 247 248

	/* Wait for it to sleep (leaving idle task). */
	while (!idle_cpu(cpu))
		yield();

	/* This actually kills the CPU. */
	__cpu_die(cpu);

	/* CPU is completely dead: tell everyone.  Too late to complain. */
249 250
	if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
				    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
251 252 253 254 255
		BUG();

	check_for_tasks(cpu);

out_allowed:
R
Rusty Russell 已提交
256
	set_cpus_allowed_ptr(current, old_allowed);
257
out_release:
258
	cpu_hotplug_done();
259 260 261 262 263
	if (!err) {
		if (raw_notifier_call_chain(&cpu_chain, CPU_POST_DEAD | mod,
					    hcpu) == NOTIFY_BAD)
			BUG();
	}
R
Rusty Russell 已提交
264
	free_cpumask_var(old_allowed);
265 266 267
	return err;
}

268
int __ref cpu_down(unsigned int cpu)
269
{
270
	int err;
271

272 273 274
	err = stop_machine_create();
	if (err)
		return err;
275
	cpu_maps_update_begin();
276 277

	if (cpu_hotplug_disabled) {
278
		err = -EBUSY;
279 280 281
		goto out;
	}

282
	set_cpu_active(cpu, false);
283

284 285
	/*
	 * Make sure the all cpus did the reschedule and are not
R
Rusty Russell 已提交
286
	 * using stale version of the cpu_active_mask.
287 288 289 290 291 292
	 * This is not strictly necessary becuase stop_machine()
	 * that we run down the line already provides the required
	 * synchronization. But it's really a side effect and we do not
	 * want to depend on the innards of the stop_machine here.
	 */
	synchronize_sched();
293

294
	err = _cpu_down(cpu, 0);
295

296
	if (cpu_online(cpu))
297
		set_cpu_active(cpu, true);
298 299

out:
300
	cpu_maps_update_done();
301
	stop_machine_destroy();
L
Linus Torvalds 已提交
302 303
	return err;
}
304
EXPORT_SYMBOL(cpu_down);
L
Linus Torvalds 已提交
305 306
#endif /*CONFIG_HOTPLUG_CPU*/

307
/* Requires cpu_add_remove_lock to be held */
308
static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
309
{
310
	int ret, nr_calls = 0;
L
Linus Torvalds 已提交
311
	void *hcpu = (void *)(long)cpu;
312
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
L
Linus Torvalds 已提交
313

314 315
	if (cpu_online(cpu) || !cpu_present(cpu))
		return -EINVAL;
316

317
	cpu_hotplug_begin();
318
	ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
319
							-1, &nr_calls);
L
Linus Torvalds 已提交
320
	if (ret == NOTIFY_BAD) {
321
		nr_calls--;
L
Linus Torvalds 已提交
322
		printk("%s: attempt to bring up CPU %u failed\n",
323
				__func__, cpu);
L
Linus Torvalds 已提交
324 325 326 327 328 329 330 331
		ret = -EINVAL;
		goto out_notify;
	}

	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu);
	if (ret != 0)
		goto out_notify;
332
	BUG_ON(!cpu_online(cpu));
L
Linus Torvalds 已提交
333

334
	set_cpu_active(cpu, true);
335

L
Linus Torvalds 已提交
336
	/* Now call notifier in preparation. */
337
	raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
L
Linus Torvalds 已提交
338 339 340

out_notify:
	if (ret != 0)
341
		__raw_notifier_call_chain(&cpu_chain,
342
				CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
343
	cpu_hotplug_done();
344 345 346 347

	return ret;
}

348
int __cpuinit cpu_up(unsigned int cpu)
349 350
{
	int err = 0;
R
Rusty Russell 已提交
351
	if (!cpu_possible(cpu)) {
352 353
		printk(KERN_ERR "can't online cpu %d because it is not "
			"configured as may-hotadd at boot time\n", cpu);
354
#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
355 356 357 358 359
		printk(KERN_ERR "please check additional_cpus= boot "
				"parameter\n");
#endif
		return -EINVAL;
	}
360

361
	cpu_maps_update_begin();
362 363

	if (cpu_hotplug_disabled) {
364
		err = -EBUSY;
365 366 367 368 369 370
		goto out;
	}

	err = _cpu_up(cpu, 0);

out:
371
	cpu_maps_update_done();
372 373 374
	return err;
}

375
#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
376
static cpumask_var_t frozen_cpus;
377 378 379

int disable_nonboot_cpus(void)
{
380
	int cpu, first_cpu, error, num_cpus = 0;
381

382 383 384
	error = stop_machine_create();
	if (error)
		return error;
385
	cpu_maps_update_begin();
R
Rusty Russell 已提交
386
	first_cpu = cpumask_first(cpu_online_mask);
387 388 389
	/* We take down all of the non-boot CPUs in one shot to avoid races
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
R
Rusty Russell 已提交
390
	cpumask_clear(frozen_cpus);
391 392 393 394
	printk("Disabling non-boot CPUs ...\n");
	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;
395
		num_cpus++;
396
		error = _cpu_down(cpu, 1);
397
		if (!error) {
R
Rusty Russell 已提交
398
			cpumask_set_cpu(cpu, frozen_cpus);
399 400 401 402 403 404 405
			printk("CPU%d is down\n", cpu);
		} else {
			printk(KERN_ERR "Error taking CPU%d down: %d\n",
				cpu, error);
			break;
		}
	}
406 407 408
	/* ensure all CPUs have gone into wait-for-SIPI */
	error |= tboot_wait_for_aps(num_cpus);

409 410 411 412 413
	if (!error) {
		BUG_ON(num_online_cpus() > 1);
		/* Make sure the CPUs won't be enabled by someone else */
		cpu_hotplug_disabled = 1;
	} else {
414
		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
415
	}
416
	cpu_maps_update_done();
417
	stop_machine_destroy();
418 419 420
	return error;
}

421
void __ref enable_nonboot_cpus(void)
422 423 424 425
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
426
	cpu_maps_update_begin();
427
	cpu_hotplug_disabled = 0;
R
Rusty Russell 已提交
428
	if (cpumask_empty(frozen_cpus))
429
		goto out;
430 431

	printk("Enabling non-boot CPUs ...\n");
R
Rusty Russell 已提交
432
	for_each_cpu(cpu, frozen_cpus) {
433
		error = _cpu_up(cpu, 1);
434 435 436 437
		if (!error) {
			printk("CPU%d is up\n", cpu);
			continue;
		}
438
		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
439
	}
R
Rusty Russell 已提交
440
	cpumask_clear(frozen_cpus);
441
out:
442
	cpu_maps_update_done();
L
Linus Torvalds 已提交
443
}
R
Rusty Russell 已提交
444 445 446 447 448 449 450 451

static int alloc_frozen_cpus(void)
{
	if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO))
		return -ENOMEM;
	return 0;
}
core_initcall(alloc_frozen_cpus);
452
#endif /* CONFIG_PM_SLEEP_SMP */
453

454 455 456 457 458 459 460 461
/**
 * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
 * @cpu: cpu that just started
 *
 * This function calls the cpu_chain notifiers with CPU_STARTING.
 * It must be called by the arch code on the new cpu, before the new cpu
 * enables interrupts and before the "boot" cpu returns from __cpu_up().
 */
A
Al Viro 已提交
462
void __cpuinit notify_cpu_starting(unsigned int cpu)
463 464 465 466
{
	unsigned long val = CPU_STARTING;

#ifdef CONFIG_PM_SLEEP_SMP
R
Rusty Russell 已提交
467
	if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus))
468 469 470 471 472
		val = CPU_STARTING_FROZEN;
#endif /* CONFIG_PM_SLEEP_SMP */
	raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
}

473
#endif /* CONFIG_SMP */
474

475 476 477 478
/*
 * cpu_bit_bitmap[] is a special, "compressed" data structure that
 * represents all NR_CPUS bits binary values of 1<<nr.
 *
R
Rusty Russell 已提交
479
 * It is used by cpumask_of() to get a constant address to a CPU
480 481
 * mask value that has a single bit set only.
 */
482

483 484 485 486 487
/* cpu_bit_bitmap[0] is empty - so we can back into it */
#define MASK_DECLARE_1(x)	[x+1][0] = 1UL << (x)
#define MASK_DECLARE_2(x)	MASK_DECLARE_1(x), MASK_DECLARE_1(x+1)
#define MASK_DECLARE_4(x)	MASK_DECLARE_2(x), MASK_DECLARE_2(x+2)
#define MASK_DECLARE_8(x)	MASK_DECLARE_4(x), MASK_DECLARE_4(x+4)
488

489 490 491 492 493 494 495
const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {

	MASK_DECLARE_8(0),	MASK_DECLARE_8(8),
	MASK_DECLARE_8(16),	MASK_DECLARE_8(24),
#if BITS_PER_LONG > 32
	MASK_DECLARE_8(32),	MASK_DECLARE_8(40),
	MASK_DECLARE_8(48),	MASK_DECLARE_8(56),
496 497
#endif
};
498
EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
499 500 501

const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
EXPORT_SYMBOL(cpu_all_bits);
502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522

#ifdef CONFIG_INIT_ALL_POSSIBLE
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly
	= CPU_BITS_ALL;
#else
static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly;
#endif
const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits);
EXPORT_SYMBOL(cpu_possible_mask);

static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits);
EXPORT_SYMBOL(cpu_online_mask);

static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits);
EXPORT_SYMBOL(cpu_present_mask);

static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly;
const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits);
EXPORT_SYMBOL(cpu_active_mask);
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569

void set_cpu_possible(unsigned int cpu, bool possible)
{
	if (possible)
		cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits));
}

void set_cpu_present(unsigned int cpu, bool present)
{
	if (present)
		cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits));
}

void set_cpu_online(unsigned int cpu, bool online)
{
	if (online)
		cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits));
}

void set_cpu_active(unsigned int cpu, bool active)
{
	if (active)
		cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits));
	else
		cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits));
}

void init_cpu_present(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_present_bits), src);
}

void init_cpu_possible(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_possible_bits), src);
}

void init_cpu_online(const struct cpumask *src)
{
	cpumask_copy(to_cpumask(cpu_online_bits), src);
}