spinlock.c 9.6 KB
Newer Older
1 2 3 4 5 6
/*
 * Split spinlock implementation out into its own file, so it can be
 * compiled in a FTRACE-compatible way.
 */
#include <linux/kernel_stat.h>
#include <linux/spinlock.h>
J
Jeremy Fitzhardinge 已提交
7 8
#include <linux/debugfs.h>
#include <linux/log2.h>
9
#include <linux/gfp.h>
10
#include <linux/slab.h>
11 12 13 14 15 16 17

#include <asm/paravirt.h>

#include <xen/interface/xen.h>
#include <xen/events.h>

#include "xen-ops.h"
J
Jeremy Fitzhardinge 已提交
18 19
#include "debugfs.h"

20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(char *, irq_name);
static bool xen_pvspin = true;

#ifdef CONFIG_QUEUED_SPINLOCK

#include <asm/qspinlock.h>

static void xen_qlock_kick(int cpu)
{
	xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
}

/*
 * Halt the current CPU & release it back to the host
 */
static void xen_qlock_wait(u8 *byte, u8 val)
{
	int irq = __this_cpu_read(lock_kicker_irq);

	/* If kicker interrupts not initialized yet, just spin */
	if (irq == -1)
		return;

	/* clear pending */
	xen_clear_irq_pending(irq);
	barrier();

	/*
	 * We check the byte value after clearing pending IRQ to make sure
	 * that we won't miss a wakeup event because of the clearing.
	 *
	 * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
	 * So it is effectively a memory barrier for x86.
	 */
	if (READ_ONCE(*byte) != val)
		return;

	/*
	 * If an interrupt happens here, it will leave the wakeup irq
	 * pending, which will cause xen_poll_irq() to return
	 * immediately.
	 */

	/* Block until irq becomes pending (or perhaps a spurious wakeup) */
	xen_poll_irq(irq);
}

#else /* CONFIG_QUEUED_SPINLOCK */

70 71 72 73 74 75 76 77
enum xen_contention_stat {
	TAKEN_SLOW,
	TAKEN_SLOW_PICKUP,
	TAKEN_SLOW_SPURIOUS,
	RELEASED_SLOW,
	RELEASED_SLOW_KICKED,
	NR_CONTENTION_STATS
};
J
Jeremy Fitzhardinge 已提交
78 79


80
#ifdef CONFIG_XEN_DEBUG_FS
81
#define HISTO_BUCKETS	30
82 83 84
static struct xen_spinlock_stats
{
	u32 contention_stats[NR_CONTENTION_STATS];
85 86
	u32 histo_spin_blocked[HISTO_BUCKETS+1];
	u64 time_blocked;
J
Jeremy Fitzhardinge 已提交
87 88 89 90 91 92
} spinlock_stats;

static u8 zero_stats;

static inline void check_zero(void)
{
93
	u8 ret;
94
	u8 old = READ_ONCE(zero_stats);
95 96 97 98 99
	if (unlikely(old)) {
		ret = cmpxchg(&zero_stats, old, 0);
		/* This ensures only one fellow resets the stat */
		if (ret == old)
			memset(&spinlock_stats, 0, sizeof(spinlock_stats));
J
Jeremy Fitzhardinge 已提交
100 101 102
	}
}

103 104 105 106 107
static inline void add_stats(enum xen_contention_stat var, u32 val)
{
	check_zero();
	spinlock_stats.contention_stats[var] += val;
}
J
Jeremy Fitzhardinge 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125

static inline u64 spin_time_start(void)
{
	return xen_clocksource_read();
}

static void __spin_time_accum(u64 delta, u32 *array)
{
	unsigned index = ilog2(delta);

	check_zero();

	if (index < HISTO_BUCKETS)
		array[index]++;
	else
		array[HISTO_BUCKETS]++;
}

126
static inline void spin_time_accum_blocked(u64 start)
J
Jeremy Fitzhardinge 已提交
127 128 129
{
	u32 delta = xen_clocksource_read() - start;

130 131
	__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
	spinlock_stats.time_blocked += delta;
J
Jeremy Fitzhardinge 已提交
132 133
}
#else  /* !CONFIG_XEN_DEBUG_FS */
134 135 136
static inline void add_stats(enum xen_contention_stat var, u32 val)
{
}
J
Jeremy Fitzhardinge 已提交
137 138 139 140 141 142

static inline u64 spin_time_start(void)
{
	return 0;
}

143
static inline void spin_time_accum_blocked(u64 start)
J
Jeremy Fitzhardinge 已提交
144 145 146
{
}
#endif  /* CONFIG_XEN_DEBUG_FS */
147

148 149 150
struct xen_lock_waiting {
	struct arch_spinlock *lock;
	__ticket_t want;
151 152
};

153 154
static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
static cpumask_t waiting_cpus;
155

156
__visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
157
{
C
Christoph Lameter 已提交
158
	int irq = __this_cpu_read(lock_kicker_irq);
159
	struct xen_lock_waiting *w = this_cpu_ptr(&lock_waiting);
160
	int cpu = smp_processor_id();
161
	u64 start;
162
	__ticket_t head;
163
	unsigned long flags;
164 165 166

	/* If kicker interrupts not initialized yet, just spin */
	if (irq == -1)
167
		return;
168

169 170
	start = spin_time_start();

171 172 173 174 175
	/*
	 * Make sure an interrupt handler can't upset things in a
	 * partially setup state.
	 */
	local_irq_save(flags);
176 177 178 179 180 181 182 183 184 185 186 187 188 189
	/*
	 * We don't really care if we're overwriting some other
	 * (lock,want) pair, as that would mean that we're currently
	 * in an interrupt context, and the outer context had
	 * interrupts enabled.  That has already kicked the VCPU out
	 * of xen_poll_irq(), so it will just return spuriously and
	 * retry with newly setup (lock,want).
	 *
	 * The ordering protocol on this is that the "lock" pointer
	 * may only be set non-NULL if the "want" ticket is correct.
	 * If we're updating "want", we must first clear "lock".
	 */
	w->lock = NULL;
	smp_wmb();
190 191 192
	w->want = want;
	smp_wmb();
	w->lock = lock;
193

194 195 196
	/* This uses set_bit, which atomic and therefore a barrier */
	cpumask_set_cpu(cpu, &waiting_cpus);
	add_stats(TAKEN_SLOW, 1);
197

198 199
	/* clear pending */
	xen_clear_irq_pending(irq);
200

201 202
	/* Only check lock once pending cleared */
	barrier();
203

204 205 206 207
	/*
	 * Mark entry to slowpath before doing the pickup test to make
	 * sure we don't deadlock with an unlocker.
	 */
208 209
	__ticket_enter_slowpath(lock);

210 211 212
	/* make sure enter_slowpath, which is atomic does not cross the read */
	smp_mb__after_atomic();

213 214 215 216
	/*
	 * check again make sure it didn't become free while
	 * we weren't looking
	 */
217 218
	head = READ_ONCE(lock->tickets.head);
	if (__tickets_equal(head, want)) {
219 220 221
		add_stats(TAKEN_SLOW_PICKUP, 1);
		goto out;
	}
222 223 224 225 226 227 228 229 230 231

	/* Allow interrupts while blocked */
	local_irq_restore(flags);

	/*
	 * If an interrupt happens here, it will leave the wakeup irq
	 * pending, which will cause xen_poll_irq() to return
	 * immediately.
	 */

232 233 234
	/* Block until irq becomes pending (or perhaps a spurious wakeup) */
	xen_poll_irq(irq);
	add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq));
235 236 237

	local_irq_save(flags);

238
	kstat_incr_irq_this_cpu(irq);
239
out:
240 241
	cpumask_clear_cpu(cpu, &waiting_cpus);
	w->lock = NULL;
242

243
	local_irq_restore(flags);
244

245
	spin_time_accum_blocked(start);
246
}
247
PV_CALLEE_SAVE_REGS_THUNK(xen_lock_spinning);
248

249
static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
250 251 252
{
	int cpu;

253 254 255 256
	add_stats(RELEASED_SLOW, 1);

	for_each_cpu(cpu, &waiting_cpus) {
		const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu);
J
Jeremy Fitzhardinge 已提交
257

258
		/* Make sure we read lock before want */
259 260
		if (READ_ONCE(w->lock) == lock &&
		    READ_ONCE(w->want) == next) {
261
			add_stats(RELEASED_SLOW_KICKED, 1);
262
			xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
263
			break;
264 265 266
		}
	}
}
267
#endif /* CONFIG_QUEUED_SPINLOCK */
268 269 270 271 272 273 274

static irqreturn_t dummy_handler(int irq, void *dev_id)
{
	BUG();
	return IRQ_HANDLED;
}

275
void xen_init_lock_cpu(int cpu)
276 277
{
	int irq;
278
	char *name;
279

280 281 282
	if (!xen_pvspin)
		return;

283
	WARN(per_cpu(lock_kicker_irq, cpu) >= 0, "spinlock on CPU%d exists on IRQ%d!\n",
284 285
	     cpu, per_cpu(lock_kicker_irq, cpu));

286 287 288 289
	name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
	irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
				     cpu,
				     dummy_handler,
290
				     IRQF_PERCPU|IRQF_NOBALANCING,
291 292 293 294 295 296
				     name,
				     NULL);

	if (irq >= 0) {
		disable_irq(irq); /* make sure it's never delivered */
		per_cpu(lock_kicker_irq, cpu) = irq;
297
		per_cpu(irq_name, cpu) = name;
298 299 300 301 302
	}

	printk("cpu %d spinlock event irq %d\n", cpu, irq);
}

A
Alex Nixon 已提交
303 304
void xen_uninit_lock_cpu(int cpu)
{
305 306 307
	if (!xen_pvspin)
		return;

A
Alex Nixon 已提交
308
	unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
309
	per_cpu(lock_kicker_irq, cpu) = -1;
310 311
	kfree(per_cpu(irq_name, cpu));
	per_cpu(irq_name, cpu) = NULL;
A
Alex Nixon 已提交
312 313
}

314

315 316 317 318 319 320 321 322
/*
 * Our init of PV spinlocks is split in two init functions due to us
 * using paravirt patching and jump labels patching and having to do
 * all of this before SMP code is invoked.
 *
 * The paravirt patching needs to be done _before_ the alternative asm code
 * is started, otherwise we would not patch the core kernel code.
 */
323 324
void __init xen_init_spinlocks(void)
{
325

326 327 328 329
	if (!xen_pvspin) {
		printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
		return;
	}
330
	printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
331 332 333 334 335 336 337
#ifdef CONFIG_QUEUED_SPINLOCK
	__pv_init_lock_hash();
	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
	pv_lock_ops.wait = xen_qlock_wait;
	pv_lock_ops.kick = xen_qlock_kick;
#else
338
	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
339
	pv_lock_ops.unlock_kick = xen_unlock_kick;
340
#endif
341
}
J
Jeremy Fitzhardinge 已提交
342

343 344 345 346 347 348 349 350 351 352 353
/*
 * While the jump_label init code needs to happend _after_ the jump labels are
 * enabled and before SMP is started. Hence we use pre-SMP initcall level
 * init. We cannot do it in xen_init_spinlocks as that is done before
 * jump labels are activated.
 */
static __init int xen_init_spinlocks_jump(void)
{
	if (!xen_pvspin)
		return 0;

354 355 356
	if (!xen_domain())
		return 0;

357 358 359 360 361
	static_key_slow_inc(&paravirt_ticketlocks_enabled);
	return 0;
}
early_initcall(xen_init_spinlocks_jump);

362 363 364 365 366 367 368
static __init int xen_parse_nopvspin(char *arg)
{
	xen_pvspin = false;
	return 0;
}
early_param("xen_nopvspin", xen_parse_nopvspin);

369
#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCK)
J
Jeremy Fitzhardinge 已提交
370 371 372 373 374 375 376 377 378 379

static struct dentry *d_spin_debug;

static int __init xen_spinlock_debugfs(void)
{
	struct dentry *d_xen = xen_init_debugfs();

	if (d_xen == NULL)
		return -ENOMEM;

380 381 382
	if (!xen_pvspin)
		return 0;

J
Jeremy Fitzhardinge 已提交
383 384 385 386 387
	d_spin_debug = debugfs_create_dir("spinlocks", d_xen);

	debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);

	debugfs_create_u32("taken_slow", 0444, d_spin_debug,
388
			   &spinlock_stats.contention_stats[TAKEN_SLOW]);
J
Jeremy Fitzhardinge 已提交
389
	debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
390
			   &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]);
J
Jeremy Fitzhardinge 已提交
391
	debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
392
			   &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]);
J
Jeremy Fitzhardinge 已提交
393 394

	debugfs_create_u32("released_slow", 0444, d_spin_debug,
395
			   &spinlock_stats.contention_stats[RELEASED_SLOW]);
J
Jeremy Fitzhardinge 已提交
396
	debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
397
			   &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]);
J
Jeremy Fitzhardinge 已提交
398

399 400
	debugfs_create_u64("time_blocked", 0444, d_spin_debug,
			   &spinlock_stats.time_blocked);
J
Jeremy Fitzhardinge 已提交
401

402 403
	debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
				spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
J
Jeremy Fitzhardinge 已提交
404 405 406 407 408 409

	return 0;
}
fs_initcall(xen_spinlock_debugfs);

#endif	/* CONFIG_XEN_DEBUG_FS */