cpu.c 9.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* CPU control.
 * (C) 2001, 2002, 2003, 2004 Rusty Russell
 *
 * This code is licenced under the GPL.
 */
#include <linux/proc_fs.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/notifier.h>
#include <linux/sched.h>
#include <linux/unistd.h>
#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/stop_machine.h>
16
#include <linux/mutex.h>
L
Linus Torvalds 已提交
17

18
/* Serializes the updates to cpu_online_map, cpu_present_map */
19
static DEFINE_MUTEX(cpu_add_remove_lock);
L
Linus Torvalds 已提交
20

21
static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
L
Linus Torvalds 已提交
22

23 24 25 26 27
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
 * Should always be manipulated under cpu_add_remove_lock
 */
static int cpu_hotplug_disabled;

28 29 30 31 32 33 34 35 36 37
static struct {
	struct task_struct *active_writer;
	struct mutex lock; /* Synchronizes accesses to refcount, */
	/*
	 * Also blocks the new readers during
	 * an ongoing cpu hotplug operation.
	 */
	int refcount;
	wait_queue_head_t writer_queue;
} cpu_hotplug;
38

39 40 41 42 43 44 45 46 47 48 49
#define writer_exists() (cpu_hotplug.active_writer != NULL)

void __init cpu_hotplug_init(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_init(&cpu_hotplug.lock);
	cpu_hotplug.refcount = 0;
	init_waitqueue_head(&cpu_hotplug.writer_queue);
}

#ifdef CONFIG_HOTPLUG_CPU
50

51
void get_online_cpus(void)
52
{
53 54
	might_sleep();
	if (cpu_hotplug.active_writer == current)
55
		return;
56 57 58 59
	mutex_lock(&cpu_hotplug.lock);
	cpu_hotplug.refcount++;
	mutex_unlock(&cpu_hotplug.lock);

60
}
61
EXPORT_SYMBOL_GPL(get_online_cpus);
62

63
void put_online_cpus(void)
64
{
65
	if (cpu_hotplug.active_writer == current)
66
		return;
67 68 69 70 71 72 73 74
	mutex_lock(&cpu_hotplug.lock);
	cpu_hotplug.refcount--;

	if (unlikely(writer_exists()) && !cpu_hotplug.refcount)
		wake_up(&cpu_hotplug.writer_queue);

	mutex_unlock(&cpu_hotplug.lock);

75
}
76
EXPORT_SYMBOL_GPL(put_online_cpus);
77 78

#endif	/* CONFIG_HOTPLUG_CPU */
79

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
/*
 * The following two API's must be used when attempting
 * to serialize the updates to cpu_online_map, cpu_present_map.
 */
void cpu_maps_update_begin(void)
{
	mutex_lock(&cpu_add_remove_lock);
}

void cpu_maps_update_done(void)
{
	mutex_unlock(&cpu_add_remove_lock);
}

/*
 * This ensures that the hotplug operation can begin only when the
 * refcount goes to zero.
 *
 * Note that during a cpu-hotplug operation, the new readers, if any,
 * will be blocked by the cpu_hotplug.lock
 *
 * Since cpu_maps_update_begin is always called after invoking
 * cpu_maps_update_begin, we can be sure that only one writer is active.
 *
 * Note that theoretically, there is a possibility of a livelock:
 * - Refcount goes to zero, last reader wakes up the sleeping
 *   writer.
 * - Last reader unlocks the cpu_hotplug.lock.
 * - A new reader arrives at this moment, bumps up the refcount.
 * - The writer acquires the cpu_hotplug.lock finds the refcount
 *   non zero and goes to sleep again.
 *
 * However, this is very difficult to achieve in practice since
113
 * get_online_cpus() not an api which is called all that often.
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
 *
 */
static void cpu_hotplug_begin(void)
{
	DECLARE_WAITQUEUE(wait, current);

	mutex_lock(&cpu_hotplug.lock);

	cpu_hotplug.active_writer = current;
	add_wait_queue_exclusive(&cpu_hotplug.writer_queue, &wait);
	while (cpu_hotplug.refcount) {
		set_current_state(TASK_UNINTERRUPTIBLE);
		mutex_unlock(&cpu_hotplug.lock);
		schedule();
		mutex_lock(&cpu_hotplug.lock);
	}
	remove_wait_queue_locked(&cpu_hotplug.writer_queue, &wait);
}

static void cpu_hotplug_done(void)
{
	cpu_hotplug.active_writer = NULL;
	mutex_unlock(&cpu_hotplug.lock);
}
L
Linus Torvalds 已提交
138
/* Need to know about CPUs going up/down? */
139
int __cpuinit register_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
140
{
141
	int ret;
142
	cpu_maps_update_begin();
143
	ret = raw_notifier_chain_register(&cpu_chain, nb);
144
	cpu_maps_update_done();
145
	return ret;
L
Linus Torvalds 已提交
146
}
147 148 149

#ifdef CONFIG_HOTPLUG_CPU

L
Linus Torvalds 已提交
150 151
EXPORT_SYMBOL(register_cpu_notifier);

152
void __ref unregister_cpu_notifier(struct notifier_block *nb)
L
Linus Torvalds 已提交
153
{
154
	cpu_maps_update_begin();
155
	raw_notifier_chain_unregister(&cpu_chain, nb);
156
	cpu_maps_update_done();
L
Linus Torvalds 已提交
157 158 159 160 161 162 163 164 165 166 167 168 169
}
EXPORT_SYMBOL(unregister_cpu_notifier);

static inline void check_for_tasks(int cpu)
{
	struct task_struct *p;

	write_lock_irq(&tasklist_lock);
	for_each_process(p) {
		if (task_cpu(p) == cpu &&
		    (!cputime_eq(p->utime, cputime_zero) ||
		     !cputime_eq(p->stime, cputime_zero)))
			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
170
				(state = %ld, flags = %x) \n",
171 172
				 p->comm, task_pid_nr(p), cpu,
				 p->state, p->flags);
L
Linus Torvalds 已提交
173 174 175 176
	}
	write_unlock_irq(&tasklist_lock);
}

A
Avi Kivity 已提交
177 178 179 180 181
struct take_cpu_down_param {
	unsigned long mod;
	void *hcpu;
};

L
Linus Torvalds 已提交
182
/* Take this CPU down. */
183
static int __ref take_cpu_down(void *_param)
L
Linus Torvalds 已提交
184
{
A
Avi Kivity 已提交
185
	struct take_cpu_down_param *param = _param;
L
Linus Torvalds 已提交
186 187
	int err;

A
Avi Kivity 已提交
188 189
	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
				param->hcpu);
L
Linus Torvalds 已提交
190 191 192
	/* Ensure this CPU doesn't handle any more interrupts. */
	err = __cpu_disable();
	if (err < 0)
Z
Zwane Mwaikambo 已提交
193
		return err;
L
Linus Torvalds 已提交
194

Z
Zwane Mwaikambo 已提交
195 196 197 198
	/* Force idle task to run as soon as we yield: it should
	   immediately notice cpu is offline and die quickly. */
	sched_idle_next();
	return 0;
L
Linus Torvalds 已提交
199 200
}

201
/* Requires cpu_add_remove_lock to be held */
202
static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
203
{
204
	int err, nr_calls = 0;
L
Linus Torvalds 已提交
205 206
	struct task_struct *p;
	cpumask_t old_allowed, tmp;
207
	void *hcpu = (void *)(long)cpu;
208
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
A
Avi Kivity 已提交
209 210 211 212
	struct take_cpu_down_param tcd_param = {
		.mod = mod,
		.hcpu = hcpu,
	};
L
Linus Torvalds 已提交
213

214 215
	if (num_online_cpus() == 1)
		return -EBUSY;
L
Linus Torvalds 已提交
216

217 218
	if (!cpu_online(cpu))
		return -EINVAL;
L
Linus Torvalds 已提交
219

220
	cpu_hotplug_begin();
221
	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
222
					hcpu, -1, &nr_calls);
L
Linus Torvalds 已提交
223
	if (err == NOTIFY_BAD) {
224
		nr_calls--;
225 226
		__raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
					  hcpu, nr_calls, NULL);
L
Linus Torvalds 已提交
227 228
		printk("%s: attempt to take down CPU %u failed\n",
				__FUNCTION__, cpu);
229 230
		err = -EINVAL;
		goto out_release;
L
Linus Torvalds 已提交
231 232 233 234
	}

	/* Ensure that we are not runnable on dying cpu */
	old_allowed = current->cpus_allowed;
235
	cpus_setall(tmp);
L
Linus Torvalds 已提交
236
	cpu_clear(cpu, tmp);
237
	set_cpus_allowed_ptr(current, &tmp);
L
Linus Torvalds 已提交
238

A
Avi Kivity 已提交
239
	p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
240

241
	if (IS_ERR(p) || cpu_online(cpu)) {
L
Linus Torvalds 已提交
242
		/* CPU didn't die: tell everyone.  Can't complain. */
243
		if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
244
					    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
245 246
			BUG();

247 248 249 250
		if (IS_ERR(p)) {
			err = PTR_ERR(p);
			goto out_allowed;
		}
L
Linus Torvalds 已提交
251
		goto out_thread;
252
	}
L
Linus Torvalds 已提交
253 254 255 256 257 258 259 260 261

	/* Wait for it to sleep (leaving idle task). */
	while (!idle_cpu(cpu))
		yield();

	/* This actually kills the CPU. */
	__cpu_die(cpu);

	/* CPU is completely dead: tell everyone.  Too late to complain. */
262 263
	if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
				    hcpu) == NOTIFY_BAD)
L
Linus Torvalds 已提交
264 265 266 267 268 269 270
		BUG();

	check_for_tasks(cpu);

out_thread:
	err = kthread_stop(p);
out_allowed:
271
	set_cpus_allowed_ptr(current, &old_allowed);
272
out_release:
273
	cpu_hotplug_done();
274 275 276
	return err;
}

277
int __ref cpu_down(unsigned int cpu)
278 279 280
{
	int err = 0;

281
	cpu_maps_update_begin();
282 283 284
	if (cpu_hotplug_disabled)
		err = -EBUSY;
	else
285
		err = _cpu_down(cpu, 0);
286

287
	cpu_maps_update_done();
L
Linus Torvalds 已提交
288 289 290 291
	return err;
}
#endif /*CONFIG_HOTPLUG_CPU*/

292
/* Requires cpu_add_remove_lock to be held */
293
static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
L
Linus Torvalds 已提交
294
{
295
	int ret, nr_calls = 0;
L
Linus Torvalds 已提交
296
	void *hcpu = (void *)(long)cpu;
297
	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
L
Linus Torvalds 已提交
298

299 300
	if (cpu_online(cpu) || !cpu_present(cpu))
		return -EINVAL;
301

302
	cpu_hotplug_begin();
303
	ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
304
							-1, &nr_calls);
L
Linus Torvalds 已提交
305
	if (ret == NOTIFY_BAD) {
306
		nr_calls--;
L
Linus Torvalds 已提交
307 308 309 310 311 312 313 314 315 316
		printk("%s: attempt to bring up CPU %u failed\n",
				__FUNCTION__, cpu);
		ret = -EINVAL;
		goto out_notify;
	}

	/* Arch-specific enabling code. */
	ret = __cpu_up(cpu);
	if (ret != 0)
		goto out_notify;
317
	BUG_ON(!cpu_online(cpu));
L
Linus Torvalds 已提交
318 319

	/* Now call notifier in preparation. */
320
	raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
L
Linus Torvalds 已提交
321 322 323

out_notify:
	if (ret != 0)
324
		__raw_notifier_call_chain(&cpu_chain,
325
				CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
326
	cpu_hotplug_done();
327 328 329 330

	return ret;
}

331
int __cpuinit cpu_up(unsigned int cpu)
332 333
{
	int err = 0;
334 335 336 337 338 339 340 341 342
	if (!cpu_isset(cpu, cpu_possible_map)) {
		printk(KERN_ERR "can't online cpu %d because it is not "
			"configured as may-hotadd at boot time\n", cpu);
#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) || defined(CONFIG_S390)
		printk(KERN_ERR "please check additional_cpus= boot "
				"parameter\n");
#endif
		return -EINVAL;
	}
343

344
	cpu_maps_update_begin();
345 346 347
	if (cpu_hotplug_disabled)
		err = -EBUSY;
	else
348
		err = _cpu_up(cpu, 0);
349

350
	cpu_maps_update_done();
351 352 353
	return err;
}

354
#ifdef CONFIG_PM_SLEEP_SMP
355 356 357 358
static cpumask_t frozen_cpus;

int disable_nonboot_cpus(void)
{
359
	int cpu, first_cpu, error = 0;
360

361
	cpu_maps_update_begin();
362
	first_cpu = first_cpu(cpu_online_map);
363 364 365 366 367 368 369 370
	/* We take down all of the non-boot CPUs in one shot to avoid races
	 * with the userspace trying to use the CPU hotplug at the same time
	 */
	cpus_clear(frozen_cpus);
	printk("Disabling non-boot CPUs ...\n");
	for_each_online_cpu(cpu) {
		if (cpu == first_cpu)
			continue;
371
		error = _cpu_down(cpu, 1);
372 373 374 375 376 377 378 379 380 381 382 383 384 385
		if (!error) {
			cpu_set(cpu, frozen_cpus);
			printk("CPU%d is down\n", cpu);
		} else {
			printk(KERN_ERR "Error taking CPU%d down: %d\n",
				cpu, error);
			break;
		}
	}
	if (!error) {
		BUG_ON(num_online_cpus() > 1);
		/* Make sure the CPUs won't be enabled by someone else */
		cpu_hotplug_disabled = 1;
	} else {
386
		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
387
	}
388
	cpu_maps_update_done();
389 390 391
	return error;
}

392
void __ref enable_nonboot_cpus(void)
393 394 395 396
{
	int cpu, error;

	/* Allow everyone to use the CPU hotplug again */
397
	cpu_maps_update_begin();
398
	cpu_hotplug_disabled = 0;
399
	if (cpus_empty(frozen_cpus))
400
		goto out;
401 402 403

	printk("Enabling non-boot CPUs ...\n");
	for_each_cpu_mask(cpu, frozen_cpus) {
404
		error = _cpu_up(cpu, 1);
405 406 407 408
		if (!error) {
			printk("CPU%d is up\n", cpu);
			continue;
		}
409
		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
410 411
	}
	cpus_clear(frozen_cpus);
412
out:
413
	cpu_maps_update_done();
L
Linus Torvalds 已提交
414
}
415
#endif /* CONFIG_PM_SLEEP_SMP */