events_fifo.c 11.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
/*
 * Xen event channels (FIFO-based ABI)
 *
 * Copyright (C) 2013 Citrix Systems R&D ltd.
 *
 * This source code is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * Or, when distributed separately from the Linux kernel or
 * incorporated into other software packages, subject to the following
 * license:
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this source file (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use, copy, modify,
 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
 * and to permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 */

#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt

#include <linux/linkage.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/smp.h>
#include <linux/percpu.h>
#include <linux/cpu.h>

43
#include <asm/barrier.h>
44 45 46 47 48 49 50 51 52
#include <asm/sync_bitops.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>

#include <xen/xen.h>
#include <xen/xen-ops.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
53
#include <xen/page.h>
54 55 56

#include "events_internal.h"

57
#define EVENT_WORDS_PER_PAGE (XEN_PAGE_SIZE / sizeof(event_word_t))
58 59 60 61 62 63 64 65 66 67 68
#define MAX_EVENT_ARRAY_PAGES (EVTCHN_FIFO_NR_CHANNELS / EVENT_WORDS_PER_PAGE)

struct evtchn_fifo_queue {
	uint32_t head[EVTCHN_FIFO_MAX_QUEUES];
};

static DEFINE_PER_CPU(struct evtchn_fifo_control_block *, cpu_control_block);
static DEFINE_PER_CPU(struct evtchn_fifo_queue, cpu_queue);
static event_word_t *event_array[MAX_EVENT_ARRAY_PAGES] __read_mostly;
static unsigned event_array_pages __read_mostly;

69
/*
70
 * sync_set_bit() and friends must be unsigned long aligned.
71
 */
72
#if BITS_PER_LONG > 32
73 74 75 76 77 78 79

#define BM(w) (unsigned long *)((unsigned long)w & ~0x7UL)
#define EVTCHN_FIFO_BIT(b, w) \
    (((unsigned long)w & 0x4UL) ? (EVTCHN_FIFO_ ##b + 32) : EVTCHN_FIFO_ ##b)

#else

80
#define BM(w) ((unsigned long *)(w))
81 82 83
#define EVTCHN_FIFO_BIT(b, w) EVTCHN_FIFO_ ##b

#endif
84

85
static inline event_word_t *event_word_from_port(evtchn_port_t port)
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
{
	unsigned i = port / EVENT_WORDS_PER_PAGE;

	return event_array[i] + port % EVENT_WORDS_PER_PAGE;
}

static unsigned evtchn_fifo_max_channels(void)
{
	return EVTCHN_FIFO_NR_CHANNELS;
}

static unsigned evtchn_fifo_nr_channels(void)
{
	return event_array_pages * EVENT_WORDS_PER_PAGE;
}

102 103 104 105 106 107 108 109 110 111 112 113
static int init_control_block(int cpu,
                              struct evtchn_fifo_control_block *control_block)
{
	struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
	struct evtchn_init_control init_control;
	unsigned int i;

	/* Reset the control block and the local HEADs. */
	clear_page(control_block);
	for (i = 0; i < EVTCHN_FIFO_MAX_QUEUES; i++)
		q->head[i] = 0;

114
	init_control.control_gfn = virt_to_gfn(control_block);
115
	init_control.offset      = 0;
116
	init_control.vcpu        = xen_vcpu_nr(cpu);
117 118 119 120

	return HYPERVISOR_event_channel_op(EVTCHNOP_init_control, &init_control);
}

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
static void free_unused_array_pages(void)
{
	unsigned i;

	for (i = event_array_pages; i < MAX_EVENT_ARRAY_PAGES; i++) {
		if (!event_array[i])
			break;
		free_page((unsigned long)event_array[i]);
		event_array[i] = NULL;
	}
}

static void init_array_page(event_word_t *array_page)
{
	unsigned i;

	for (i = 0; i < EVENT_WORDS_PER_PAGE; i++)
		array_page[i] = 1 << EVTCHN_FIFO_MASKED;
}

static int evtchn_fifo_setup(struct irq_info *info)
{
143
	evtchn_port_t port = info->evtchn;
144
	unsigned new_array_pages;
145
	int ret;
146 147 148 149 150 151 152 153 154 155 156 157 158 159

	new_array_pages = port / EVENT_WORDS_PER_PAGE + 1;

	if (new_array_pages > MAX_EVENT_ARRAY_PAGES)
		return -EINVAL;

	while (event_array_pages < new_array_pages) {
		void *array_page;
		struct evtchn_expand_array expand_array;

		/* Might already have a page if we've resumed. */
		array_page = event_array[event_array_pages];
		if (!array_page) {
			array_page = (void *)__get_free_page(GFP_KERNEL);
160 161
			if (array_page == NULL) {
				ret = -ENOMEM;
162
				goto error;
163
			}
164 165 166 167 168 169
			event_array[event_array_pages] = array_page;
		}

		/* Mask all events in this page before adding it. */
		init_array_page(array_page);

170
		expand_array.array_gfn = virt_to_gfn(array_page);
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193

		ret = HYPERVISOR_event_channel_op(EVTCHNOP_expand_array, &expand_array);
		if (ret < 0)
			goto error;

		event_array_pages++;
	}
	return 0;

  error:
	if (event_array_pages == 0)
		panic("xen: unable to expand event array with initial page (%d)\n", ret);
	else
		pr_err("unable to expand event array (%d)\n", ret);
	free_unused_array_pages();
	return ret;
}

static void evtchn_fifo_bind_to_cpu(struct irq_info *info, unsigned cpu)
{
	/* no-op */
}

194
static void evtchn_fifo_clear_pending(evtchn_port_t port)
195 196
{
	event_word_t *word = event_word_from_port(port);
197
	sync_clear_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
198 199
}

200
static void evtchn_fifo_set_pending(evtchn_port_t port)
201 202
{
	event_word_t *word = event_word_from_port(port);
203
	sync_set_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
204 205
}

206
static bool evtchn_fifo_is_pending(evtchn_port_t port)
207 208
{
	event_word_t *word = event_word_from_port(port);
209
	return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
210 211
}

212
static bool evtchn_fifo_test_and_set_mask(evtchn_port_t port)
213 214
{
	event_word_t *word = event_word_from_port(port);
215
	return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
216 217
}

218
static void evtchn_fifo_mask(evtchn_port_t port)
219 220
{
	event_word_t *word = event_word_from_port(port);
221
	sync_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
222 223
}

224
static bool evtchn_fifo_is_masked(evtchn_port_t port)
225 226 227 228
{
	event_word_t *word = event_word_from_port(port);
	return sync_test_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
}
229
/*
230 231
 * Clear MASKED if not PENDING, spinning if BUSY is set.
 * Return true if mask was cleared.
232
 */
233
static bool clear_masked_cond(volatile event_word_t *word)
234 235 236 237 238 239
{
	event_word_t new, old, w;

	w = *word;

	do {
240 241 242
		if (w & (1 << EVTCHN_FIFO_PENDING))
			return false;

243 244 245 246
		old = w & ~(1 << EVTCHN_FIFO_BUSY);
		new = old & ~(1 << EVTCHN_FIFO_MASKED);
		w = sync_cmpxchg(word, old, new);
	} while (w != old);
247 248

	return true;
249 250
}

251
static void evtchn_fifo_unmask(evtchn_port_t port)
252 253 254 255 256
{
	event_word_t *word = event_word_from_port(port);

	BUG_ON(!irqs_disabled());

257
	if (!clear_masked_cond(word)) {
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
		struct evtchn_unmask unmask = { .port = port };
		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
	}
}

static uint32_t clear_linked(volatile event_word_t *word)
{
	event_word_t new, old, w;

	w = *word;

	do {
		old = w;
		new = (w & ~((1 << EVTCHN_FIFO_LINKED)
			     | EVTCHN_FIFO_LINK_MASK));
	} while ((w = sync_cmpxchg(word, old, new)) != old);

	return w & EVTCHN_FIFO_LINK_MASK;
}

278
static void handle_irq_for_port(evtchn_port_t port)
279 280 281 282
{
	int irq;

	irq = get_evtchn_to_irq(port);
283 284
	if (irq != -1)
		generic_handle_irq(irq);
285 286 287 288
}

static void consume_one_event(unsigned cpu,
			      struct evtchn_fifo_control_block *control_block,
289 290
			      unsigned priority, unsigned long *ready,
			      bool drop)
291 292 293
{
	struct evtchn_fifo_queue *q = &per_cpu(cpu_queue, cpu);
	uint32_t head;
294
	evtchn_port_t port;
295 296 297 298 299 300 301 302 303
	event_word_t *word;

	head = q->head[priority];

	/*
	 * Reached the tail last time?  Read the new HEAD from the
	 * control block.
	 */
	if (head == 0) {
304
		virt_rmb(); /* Ensure word is up-to-date before reading head. */
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
		head = control_block->head[priority];
	}

	port = head;
	word = event_word_from_port(port);
	head = clear_linked(word);

	/*
	 * If the link is non-zero, there are more events in the
	 * queue, otherwise the queue is empty.
	 *
	 * If the queue is empty, clear this priority from our local
	 * copy of the ready word.
	 */
	if (head == 0)
320
		clear_bit(priority, ready);
321

322 323 324 325 326 327
	if (evtchn_fifo_is_pending(port) && !evtchn_fifo_is_masked(port)) {
		if (unlikely(drop))
			pr_warn("Dropping pending event for port %u\n", port);
		else
			handle_irq_for_port(port);
	}
328 329 330 331

	q->head[priority] = head;
}

332
static void __evtchn_fifo_handle_events(unsigned cpu, bool drop)
333 334
{
	struct evtchn_fifo_control_block *control_block;
335
	unsigned long ready;
336 337 338 339 340 341 342
	unsigned q;

	control_block = per_cpu(cpu_control_block, cpu);

	ready = xchg(&control_block->ready, 0);

	while (ready) {
343
		q = find_first_bit(&ready, EVTCHN_FIFO_MAX_QUEUES);
344
		consume_one_event(cpu, control_block, q, &ready, drop);
345 346 347 348
		ready |= xchg(&control_block->ready, 0);
	}
}

349 350 351 352 353
static void evtchn_fifo_handle_events(unsigned cpu)
{
	__evtchn_fifo_handle_events(cpu, false);
}

354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
static void evtchn_fifo_resume(void)
{
	unsigned cpu;

	for_each_possible_cpu(cpu) {
		void *control_block = per_cpu(cpu_control_block, cpu);
		int ret;

		if (!control_block)
			continue;

		/*
		 * If this CPU is offline, take the opportunity to
		 * free the control block while it is not being
		 * used.
		 */
		if (!cpu_online(cpu)) {
			free_page((unsigned long)control_block);
			per_cpu(cpu_control_block, cpu) = NULL;
			continue;
		}

376
		ret = init_control_block(cpu, control_block);
377
		BUG_ON(ret < 0);
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
	}

	/*
	 * The event array starts out as empty again and is extended
	 * as normal when events are bound.  The existing pages will
	 * be reused.
	 */
	event_array_pages = 0;
}

static const struct evtchn_ops evtchn_ops_fifo = {
	.max_channels      = evtchn_fifo_max_channels,
	.nr_channels       = evtchn_fifo_nr_channels,
	.setup             = evtchn_fifo_setup,
	.bind_to_cpu       = evtchn_fifo_bind_to_cpu,
	.clear_pending     = evtchn_fifo_clear_pending,
	.set_pending       = evtchn_fifo_set_pending,
	.is_pending        = evtchn_fifo_is_pending,
	.test_and_set_mask = evtchn_fifo_test_and_set_mask,
	.mask              = evtchn_fifo_mask,
	.unmask            = evtchn_fifo_unmask,
	.handle_events     = evtchn_fifo_handle_events,
	.resume            = evtchn_fifo_resume,
};

403
static int evtchn_fifo_alloc_control_block(unsigned cpu)
404
{
405
	void *control_block = NULL;
406 407
	int ret = -ENOMEM;

408
	control_block = (void *)__get_free_page(GFP_KERNEL);
409 410 411
	if (control_block == NULL)
		goto error;

412
	ret = init_control_block(cpu, control_block);
413 414 415
	if (ret < 0)
		goto error;

416
	per_cpu(cpu_control_block, cpu) = control_block;
417 418 419 420

	return 0;

  error:
421
	free_page((unsigned long)control_block);
422 423 424
	return ret;
}

425
static int xen_evtchn_cpu_prepare(unsigned int cpu)
426
{
427 428 429
	if (!per_cpu(cpu_control_block, cpu))
		return evtchn_fifo_alloc_control_block(cpu);
	return 0;
430 431
}

432 433 434 435 436
static int xen_evtchn_cpu_dead(unsigned int cpu)
{
	__evtchn_fifo_handle_events(cpu, true);
	return 0;
}
437 438 439

int __init xen_evtchn_fifo_init(void)
{
440
	int cpu = smp_processor_id();
441 442
	int ret;

443
	ret = evtchn_fifo_alloc_control_block(cpu);
444
	if (ret < 0)
445
		return ret;
446 447 448 449 450

	pr_info("Using FIFO-based ABI\n");

	evtchn_ops = &evtchn_ops_fifo;

451
	cpuhp_setup_state_nocalls(CPUHP_XEN_EVTCHN_PREPARE,
T
Thomas Gleixner 已提交
452
				  "xen/evtchn:prepare",
453
				  xen_evtchn_cpu_prepare, xen_evtchn_cpu_dead);
454

455 456
	return ret;
}