smp.c 11.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/*
 * Generic helpers for smp ipi calls
 *
 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
 *
 */
#include <linux/init.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/rcupdate.h>
11
#include <linux/rculist.h>
12
#include <linux/smp.h>
P
Peter Zijlstra 已提交
13
#include <linux/cpu.h>
14 15

static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
P
Peter Zijlstra 已提交
16 17 18 19 20 21 22 23

static struct {
	struct list_head	queue;
	spinlock_t		lock;
} call_function __cacheline_aligned_in_smp = {
	.queue = LIST_HEAD_INIT(call_function.queue),
	.lock  = __SPIN_LOCK_UNLOCKED(call_function.lock),
};
24 25

enum {
26
	CSD_FLAG_LOCK		= 0x01,
27 28 29 30 31 32
};

struct call_function_data {
	struct call_single_data csd;
	spinlock_t lock;
	unsigned int refs;
P
Peter Zijlstra 已提交
33
	cpumask_var_t cpumask;
34 35 36 37 38 39 40
};

struct call_single_queue {
	struct list_head list;
	spinlock_t lock;
};

P
Peter Zijlstra 已提交
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
	.lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock),
};

static int
hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
	long cpu = (long)hcpu;
	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);

	switch (action) {
	case CPU_UP_PREPARE:
	case CPU_UP_PREPARE_FROZEN:
		if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
				cpu_to_node(cpu)))
			return NOTIFY_BAD;
		break;

#ifdef CONFIG_CPU_HOTPLUG
	case CPU_UP_CANCELED:
	case CPU_UP_CANCELED_FROZEN:

	case CPU_DEAD:
	case CPU_DEAD_FROZEN:
		free_cpumask_var(cfd->cpumask);
		break;
#endif
	};

	return NOTIFY_OK;
}

static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
	.notifier_call = hotplug_cfd,
};

77
static int __cpuinit init_call_single_data(void)
78
{
P
Peter Zijlstra 已提交
79
	void *cpu = (void *)(long)smp_processor_id();
80 81 82 83 84 85 86 87
	int i;

	for_each_possible_cpu(i) {
		struct call_single_queue *q = &per_cpu(call_single_queue, i);

		spin_lock_init(&q->lock);
		INIT_LIST_HEAD(&q->list);
	}
P
Peter Zijlstra 已提交
88 89 90 91

	hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
	register_cpu_notifier(&hotplug_cfd_notifier);

92
	return 0;
93
}
94
early_initcall(init_call_single_data);
95

P
Peter Zijlstra 已提交
96 97 98 99 100 101 102
/*
 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
 *
 * For non-synchronous ipi calls the csd can still be in use by the previous
 * function call. For multi-cpu calls its even more interesting as we'll have
 * to ensure no other cpu is observing our csd.
 */
103
static void csd_lock_wait(struct call_single_data *data)
P
Peter Zijlstra 已提交
104 105 106
{
	while (data->flags & CSD_FLAG_LOCK)
		cpu_relax();
107 108 109 110 111
}

static void csd_lock(struct call_single_data *data)
{
	csd_lock_wait(data);
P
Peter Zijlstra 已提交
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
	data->flags = CSD_FLAG_LOCK;

	/*
	 * prevent CPU from reordering the above assignment to ->flags
	 * with any subsequent assignments to other fields of the
	 * specified call_single_data structure.
	 */

	smp_mb();
}

static void csd_unlock(struct call_single_data *data)
{
	WARN_ON(!(data->flags & CSD_FLAG_LOCK));
	/*
	 * ensure we're all done before releasing data
	 */
	smp_mb();
	data->flags &= ~CSD_FLAG_LOCK;
131 132 133 134 135 136
}

/*
 * Insert a previously allocated call_single_data element for execution
 * on the given CPU. data must already have ->func, ->info, and ->flags set.
 */
137 138
static
void generic_exec_single(int cpu, struct call_single_data *data, int wait)
139 140 141
{
	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
	unsigned long flags;
142
	int ipi;
143 144 145 146 147 148

	spin_lock_irqsave(&dst->lock, flags);
	ipi = list_empty(&dst->list);
	list_add_tail(&data->list, &dst->list);
	spin_unlock_irqrestore(&dst->lock, flags);

149
	/*
150 151 152 153 154 155 156 157 158
	 * The list addition should be visible before sending the IPI
	 * handler locks the list to pull the entry off it because of
	 * normal cache coherency rules implied by spinlocks.
	 *
	 * If IPIs can go out of order to the cache coherency protocol
	 * in an architecture, sufficient synchronisation should be added
	 * to arch code to make it appear to obey cache coherency WRT
	 * locking and barrier primitives. Generic code isn't really equipped
	 * to do the right thing...
159 160
	 */

161 162 163 164
	if (ipi)
		arch_send_call_function_single_ipi(cpu);

	if (wait)
165
		csd_lock_wait(data);
166 167 168 169 170 171 172 173 174 175 176
}

/*
 * Invoked by arch to handle an IPI for call function. Must be called with
 * interrupts disabled.
 */
void generic_smp_call_function_interrupt(void)
{
	struct call_function_data *data;
	int cpu = get_cpu();

177 178 179 180 181 182 183 184
	/*
	 * Ensure entry is visible on call_function_queue after we have
	 * entered the IPI. See comment in smp_call_function_many.
	 * If we don't have this, then we may miss an entry on the list
	 * and never get another IPI to process it.
	 */
	smp_mb();

185 186 187 188
	/*
	 * It's ok to use list_for_each_rcu() here even though we may delete
	 * 'pos', since list_del_rcu() doesn't clear ->next
	 */
P
Peter Zijlstra 已提交
189
	list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
190 191
		int refs;

P
Peter Zijlstra 已提交
192 193 194
		spin_lock(&data->lock);
		if (!cpumask_test_cpu(cpu, data->cpumask)) {
			spin_unlock(&data->lock);
195
			continue;
P
Peter Zijlstra 已提交
196 197 198
		}
		cpumask_clear_cpu(cpu, data->cpumask);
		spin_unlock(&data->lock);
199 200 201 202 203

		data->csd.func(data->csd.info);

		spin_lock(&data->lock);
		WARN_ON(data->refs == 0);
P
Peter Zijlstra 已提交
204 205 206 207 208 209
		refs = --data->refs;
		if (!refs) {
			spin_lock(&call_function.lock);
			list_del_rcu(&data->csd.list);
			spin_unlock(&call_function.lock);
		}
210 211 212 213 214
		spin_unlock(&data->lock);

		if (refs)
			continue;

P
Peter Zijlstra 已提交
215
		csd_unlock(&data->csd);
216 217 218 219 220 221 222 223 224 225 226 227 228
	}

	put_cpu();
}

/*
 * Invoked by arch to handle an IPI for call function single. Must be called
 * from the arch with interrupts disabled.
 */
void generic_smp_call_function_single_interrupt(void)
{
	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
	LIST_HEAD(list);
229
	unsigned int data_flags;
230

231 232 233
	spin_lock(&q->lock);
	list_replace_init(&q->list, &list);
	spin_unlock(&q->lock);
234

235 236
	while (!list_empty(&list)) {
		struct call_single_data *data;
237

238 239 240
		data = list_entry(list.next, struct call_single_data,
					list);
		list_del(&data->list);
241 242

		/*
243 244 245 246
		 * 'data' can be invalid after this call if
		 * flags == 0 (when called through
		 * generic_exec_single(), so save them away before
		 * making the call.
247
		 */
248 249 250 251
		data_flags = data->flags;

		data->func(data->info);

P
Peter Zijlstra 已提交
252 253 254 255 256
		/*
		 * Unlocked CSDs are valid through generic_exec_single()
		 */
		if (data_flags & CSD_FLAG_LOCK)
			csd_unlock(data);
257 258 259
	}
}

260 261
static DEFINE_PER_CPU(struct call_single_data, csd_data);

262 263 264 265 266 267 268 269 270 271 272
/*
 * smp_call_function_single - Run a function on a specific CPU
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait until function has completed on other CPUs.
 *
 * Returns 0 on success, else a negative status code. Note that @wait
 * will be implicitly turned on in case of allocation failures, since
 * we fall back to on-stack allocation.
 */
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
273
			     int wait)
274
{
P
Peter Zijlstra 已提交
275 276 277
	struct call_single_data d = {
		.flags = 0,
	};
278
	unsigned long flags;
279 280
	/* prevent preemption and reschedule on another processor,
	   as well as CPU removal */
281
	int me = get_cpu();
282
	int err = 0;
283 284 285 286 287 288 289 290

	/* Can deadlock when called with interrupts disabled */
	WARN_ON(irqs_disabled());

	if (cpu == me) {
		local_irq_save(flags);
		func(info);
		local_irq_restore(flags);
291
	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
292
		struct call_single_data *data = &d;
293

294
		if (!wait)
P
Peter Zijlstra 已提交
295
			data = &__get_cpu_var(csd_data);
296 297

		csd_lock(data);
298 299 300

		data->func = func;
		data->info = info;
301
		generic_exec_single(cpu, data, wait);
302 303
	} else {
		err = -ENXIO;	/* CPU not online */
304 305 306
	}

	put_cpu();
307
	return err;
308 309 310 311 312 313 314 315 316 317 318 319 320
}
EXPORT_SYMBOL(smp_call_function_single);

/**
 * __smp_call_function_single(): Run a function on another CPU
 * @cpu: The CPU to run on.
 * @data: Pre-allocated and setup data structure
 *
 * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
 * data structure. Useful for embedding @data inside other structures, for
 * instance.
 *
 */
321 322
void __smp_call_function_single(int cpu, struct call_single_data *data,
				int wait)
323
{
324 325
	csd_lock(data);

326
	/* Can deadlock when called with interrupts disabled */
327
	WARN_ON(wait && irqs_disabled());
328

329
	generic_exec_single(cpu, data, wait);
330 331
}

332 333 334 335 336 337
/* FIXME: Shim for archs using old arch_send_call_function_ipi API. */
#ifndef arch_send_call_function_ipi_mask
#define arch_send_call_function_ipi_mask(maskp) \
	arch_send_call_function_ipi(*(maskp))
#endif

338
/**
R
Rusty Russell 已提交
339 340
 * smp_call_function_many(): Run a function on a set of other CPUs.
 * @mask: The set of cpus to run on (only runs on online subset).
341 342 343 344 345 346 347 348 349 350 351 352
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait (atomically) until function has completed on other CPUs.
 *
 * If @wait is true, then returns once @func has returned. Note that @wait
 * will be implicitly turned on in case of allocation failures, since
 * we fall back to on-stack allocation.
 *
 * You must not call this function with disabled interrupts or from a
 * hardware interrupt handler or from a bottom half handler. Preemption
 * must be disabled when calling this function.
 */
R
Rusty Russell 已提交
353 354 355
void smp_call_function_many(const struct cpumask *mask,
			    void (*func)(void *), void *info,
			    bool wait)
356
{
R
Rusty Russell 已提交
357
	struct call_function_data *data;
358
	unsigned long flags;
P
Peter Zijlstra 已提交
359
	int cpu, next_cpu, me = smp_processor_id();
360 361 362 363

	/* Can deadlock when called with interrupts disabled */
	WARN_ON(irqs_disabled());

R
Rusty Russell 已提交
364 365
	/* So, what's a CPU they want?  Ignoring this one. */
	cpu = cpumask_first_and(mask, cpu_online_mask);
P
Peter Zijlstra 已提交
366
	if (cpu == me)
R
Rusty Russell 已提交
367 368 369 370 371 372 373
		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
	/* No online cpus?  We're done. */
	if (cpu >= nr_cpu_ids)
		return;

	/* Do we have another CPU which isn't us? */
	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
P
Peter Zijlstra 已提交
374
	if (next_cpu == me)
R
Rusty Russell 已提交
375 376 377 378 379 380
		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);

	/* Fastpath: do that cpu by itself. */
	if (next_cpu >= nr_cpu_ids) {
		smp_call_function_single(cpu, func, info, wait);
		return;
381 382
	}

P
Peter Zijlstra 已提交
383 384
	data = &__get_cpu_var(cfd_data);
	csd_lock(&data->csd);
385

P
Peter Zijlstra 已提交
386
	spin_lock_irqsave(&data->lock, flags);
387 388
	data->csd.func = func;
	data->csd.info = info;
P
Peter Zijlstra 已提交
389 390 391
	cpumask_and(data->cpumask, mask, cpu_online_mask);
	cpumask_clear_cpu(me, data->cpumask);
	data->refs = cpumask_weight(data->cpumask);
392

P
Peter Zijlstra 已提交
393 394 395 396 397 398 399 400 401
	spin_lock(&call_function.lock);
	/*
	 * Place entry at the _HEAD_ of the list, so that any cpu still
	 * observing the entry in generic_smp_call_function_interrupt() will
	 * not miss any other list entries.
	 */
	list_add_rcu(&data->csd.list, &call_function.queue);
	spin_unlock(&call_function.lock);
	spin_unlock_irqrestore(&data->lock, flags);
402

403 404
	/*
	 * Make the list addition visible before sending the ipi.
405 406
	 * (IPIs must obey or appear to obey normal Linux cache coherency
	 * rules -- see comment in generic_exec_single).
407 408 409
	 */
	smp_mb();

410
	/* Send a message to all CPUs in the map */
P
Peter Zijlstra 已提交
411
	arch_send_call_function_ipi_mask(data->cpumask);
412 413

	/* optionally wait for the CPUs to complete */
R
Rusty Russell 已提交
414
	if (wait)
415
		csd_lock_wait(&data->csd);
416
}
R
Rusty Russell 已提交
417
EXPORT_SYMBOL(smp_call_function_many);
418 419 420 421 422 423 424

/**
 * smp_call_function(): Run a function on all other CPUs.
 * @func: The function to run. This must be fast and non-blocking.
 * @info: An arbitrary pointer to pass to the function.
 * @wait: If true, wait (atomically) until function has completed on other CPUs.
 *
R
Rusty Russell 已提交
425
 * Returns 0.
426 427 428 429 430 431 432 433
 *
 * If @wait is true, then returns once @func has returned; otherwise
 * it returns just before the target cpu calls @func. In case of allocation
 * failure, @wait will be implicitly turned on.
 *
 * You must not call this function with disabled interrupts or from a
 * hardware interrupt handler or from a bottom half handler.
 */
434
int smp_call_function(void (*func)(void *), void *info, int wait)
435 436
{
	preempt_disable();
R
Rusty Russell 已提交
437
	smp_call_function_many(cpu_online_mask, func, info, wait);
438
	preempt_enable();
R
Rusty Russell 已提交
439
	return 0;
440 441 442 443 444
}
EXPORT_SYMBOL(smp_call_function);

void ipi_call_lock(void)
{
P
Peter Zijlstra 已提交
445
	spin_lock(&call_function.lock);
446 447 448 449
}

void ipi_call_unlock(void)
{
P
Peter Zijlstra 已提交
450
	spin_unlock(&call_function.lock);
451 452 453 454
}

void ipi_call_lock_irq(void)
{
P
Peter Zijlstra 已提交
455
	spin_lock_irq(&call_function.lock);
456 457 458 459
}

void ipi_call_unlock_irq(void)
{
P
Peter Zijlstra 已提交
460
	spin_unlock_irq(&call_function.lock);
461
}