rcupdate.c 9.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Read-Copy Update mechanism for mutual exclusion
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
18
 * Copyright IBM Corporation, 2001
L
Linus Torvalds 已提交
19 20 21
 *
 * Authors: Dipankar Sarma <dipankar@in.ibm.com>
 *	    Manfred Spraul <manfred@colorfullife.com>
22
 *
L
Linus Torvalds 已提交
23 24 25 26 27 28 29
 * Based on the original work by Paul McKenney <paulmck@us.ibm.com>
 * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
 * Papers:
 * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
 * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
 *
 * For detailed explanation of Read-Copy Update mechanism see -
30
 *		http://lse.sourceforge.net/locking/rcupdate.html
L
Linus Torvalds 已提交
31 32 33 34 35 36 37 38 39 40 41 42 43 44
 *
 */
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/smp.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <asm/atomic.h>
#include <linux/bitops.h>
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
45
#include <linux/mutex.h>
46
#include <linux/module.h>
47
#include <linux/kernel_stat.h>
L
Linus Torvalds 已提交
48

49 50 51 52 53 54 55
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
	STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key);
EXPORT_SYMBOL_GPL(rcu_lock_map);
#endif

56
static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
D
Dipankar Sarma 已提交
57
static atomic_t rcu_barrier_cpu_count;
58
static DEFINE_MUTEX(rcu_barrier_mutex);
D
Dipankar Sarma 已提交
59
static struct completion rcu_barrier_completion;
60
int rcu_scheduler_active __read_mostly;
D
Dipankar Sarma 已提交
61

62 63 64 65
static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
static struct rcu_head rcu_migrate_head[3];
static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);

P
Paul E. McKenney 已提交
66 67 68 69
/*
 * Awaken the corresponding synchronize_rcu() instance now that a
 * grace period has elapsed.
 */
P
Paul E. McKenney 已提交
70
void wakeme_after_rcu(struct rcu_head  *head)
D
Dipankar Sarma 已提交
71
{
72 73 74 75
	struct rcu_synchronize *rcu;

	rcu = container_of(head, struct rcu_synchronize, head);
	complete(&rcu->completion);
D
Dipankar Sarma 已提交
76
}
L
Linus Torvalds 已提交
77

78 79
#ifdef CONFIG_TREE_PREEMPT_RCU

L
Linus Torvalds 已提交
80
/**
81
 * synchronize_rcu - wait until a grace period has elapsed.
L
Linus Torvalds 已提交
82
 *
83 84
 * Control will return to the caller some time after a full grace
 * period has elapsed, in other words after all currently executing RCU
L
Linus Torvalds 已提交
85 86 87 88
 * read-side critical sections have completed.  RCU read-side critical
 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
 * and may be nested.
 */
89 90 91
void synchronize_rcu(void)
{
	struct rcu_synchronize rcu;
92

93
	if (!rcu_scheduler_active)
94 95
		return;

96 97 98 99 100 101
	init_completion(&rcu.completion);
	/* Will wake me after RCU finished. */
	call_rcu(&rcu.head, wakeme_after_rcu);
	/* Wait for it. */
	wait_for_completion(&rcu.completion);
}
102
EXPORT_SYMBOL_GPL(synchronize_rcu);
103

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */

/**
 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
 *
 * Control will return to the caller some time after a full rcu-sched
 * grace period has elapsed, in other words after all currently executing
 * rcu-sched read-side critical sections have completed.   These read-side
 * critical sections are delimited by rcu_read_lock_sched() and
 * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
 * local_irq_disable(), and so on may be used in place of
 * rcu_read_lock_sched().
 *
 * This means that all preempt_disable code sequences, including NMI and
 * hardware-interrupt handlers, in progress on entry will have completed
 * before this primitive returns.  However, this does not guarantee that
 * softirq handlers will have completed, since in some kernels, these
 * handlers can run in process context, and can block.
 *
 * This primitive provides the guarantees made by the (now removed)
 * synchronize_kernel() API.  In contrast, synchronize_rcu() only
 * guarantees that rcu_read_lock() sections will have completed.
 * In "classic RCU", these two guarantees happen to be one and
 * the same, but can differ in realtime RCU implementations.
 */
void synchronize_sched(void)
{
	struct rcu_synchronize rcu;

	if (rcu_blocking_is_gp())
		return;

	init_completion(&rcu.completion);
	/* Will wake me after RCU finished. */
	call_rcu_sched(&rcu.head, wakeme_after_rcu);
	/* Wait for it. */
	wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_sched);

144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
/**
 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
 *
 * Control will return to the caller some time after a full rcu_bh grace
 * period has elapsed, in other words after all currently executing rcu_bh
 * read-side critical sections have completed.  RCU read-side critical
 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
 * and may be nested.
 */
void synchronize_rcu_bh(void)
{
	struct rcu_synchronize rcu;

	if (rcu_blocking_is_gp())
		return;

	init_completion(&rcu.completion);
	/* Will wake me after RCU finished. */
	call_rcu_bh(&rcu.head, wakeme_after_rcu);
	/* Wait for it. */
	wait_for_completion(&rcu.completion);
}
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);

168 169 170 171 172 173 174 175 176
static void rcu_barrier_callback(struct rcu_head *notused)
{
	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
		complete(&rcu_barrier_completion);
}

/*
 * Called with preemption disabled, and from cross-cpu IRQ context.
 */
177
static void rcu_barrier_func(void *type)
178 179
{
	int cpu = smp_processor_id();
180
	struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
181 182
	void (*call_rcu_func)(struct rcu_head *head,
			      void (*func)(struct rcu_head *head));
183 184

	atomic_inc(&rcu_barrier_cpu_count);
185 186
	call_rcu_func = type;
	call_rcu_func(head, rcu_barrier_callback);
187 188
}

189 190 191
static inline void wait_migrated_callbacks(void)
{
	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
192
	smp_mb(); /* In case we didn't sleep. */
193
}
194

195 196 197
/*
 * Orchestrate the specified type of RCU barrier, waiting for all
 * RCU callbacks of the specified type to complete.
198
 */
199 200
static void _rcu_barrier(void (*call_rcu_func)(struct rcu_head *head,
					       void (*func)(struct rcu_head *head)))
201 202
{
	BUG_ON(in_interrupt());
203 204
	/* Take cpucontrol mutex to protect against CPU hotplug */
	mutex_lock(&rcu_barrier_mutex);
205
	init_completion(&rcu_barrier_completion);
206
	/*
207 208 209 210 211 212 213
	 * Initialize rcu_barrier_cpu_count to 1, then invoke
	 * rcu_barrier_func() on each CPU, so that each CPU also has
	 * incremented rcu_barrier_cpu_count.  Only then is it safe to
	 * decrement rcu_barrier_cpu_count -- otherwise the first CPU
	 * might complete its grace period before all of the other CPUs
	 * did their increment, causing this function to return too
	 * early.
214
	 */
215
	atomic_set(&rcu_barrier_cpu_count, 1);
216
	on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1);
217 218
	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
		complete(&rcu_barrier_completion);
219
	wait_for_completion(&rcu_barrier_completion);
220
	mutex_unlock(&rcu_barrier_mutex);
221
	wait_migrated_callbacks();
222
}
223 224 225 226 227 228

/**
 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
 */
void rcu_barrier(void)
{
229
	_rcu_barrier(call_rcu);
230
}
231 232
EXPORT_SYMBOL_GPL(rcu_barrier);

233 234 235 236 237
/**
 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
 */
void rcu_barrier_bh(void)
{
238
	_rcu_barrier(call_rcu_bh);
239 240 241 242 243 244 245 246
}
EXPORT_SYMBOL_GPL(rcu_barrier_bh);

/**
 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
 */
void rcu_barrier_sched(void)
{
247
	_rcu_barrier(call_rcu_sched);
248 249 250
}
EXPORT_SYMBOL_GPL(rcu_barrier_sched);

251 252 253 254 255 256 257 258 259
static void rcu_migrate_callback(struct rcu_head *notused)
{
	if (atomic_dec_and_test(&rcu_migrate_type_count))
		wake_up(&rcu_migrate_wq);
}

static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
		unsigned long action, void *hcpu)
{
260
	rcu_cpu_notify(self, action, hcpu);
261 262 263 264 265 266 267 268 269 270 271 272 273 274
	if (action == CPU_DYING) {
		/*
		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
		 * returns, all online cpus have queued rcu_barrier_func(),
		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
		 *
		 * These callbacks ensure _rcu_barrier() waits for all
		 * RCU callbacks of the specified type to complete.
		 */
		atomic_set(&rcu_migrate_type_count, 3);
		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
275 276
	} else if (action == CPU_DOWN_PREPARE) {
		/* Don't need to wait until next removal operation. */
277 278 279 280 281 282 283
		/* rcu_migrate_head is protected by cpu_add_remove_lock */
		wait_migrated_callbacks();
	}

	return NOTIFY_OK;
}

L
Linus Torvalds 已提交
284 285
void __init rcu_init(void)
{
286 287
	int i;

288
	__rcu_init();
289 290 291 292 293 294 295 296 297
	cpu_notifier(rcu_barrier_cpu_hotplug, 0);

	/*
	 * We don't need protection against CPU-hotplug here because
	 * this is called early in boot, before either interrupts
	 * or the scheduler are operational.
	 */
	for_each_online_cpu(i)
		rcu_barrier_cpu_hotplug(NULL, CPU_UP_PREPARE, (void *)(long)i);
L
Linus Torvalds 已提交
298 299
}

300 301 302 303 304 305
void rcu_scheduler_starting(void)
{
	WARN_ON(num_online_cpus() != 1);
	WARN_ON(nr_context_switches() > 0);
	rcu_scheduler_active = 1;
}