events.c 39.7 KB
Newer Older
J
Jeremy Fitzhardinge 已提交
1 2 3 4 5 6 7
/*
 * Xen event channels
 *
 * Xen models interrupts with abstract event channels.  Because each
 * domain gets 1024 event channels, but NR_IRQ is not that large, we
 * must dynamically map irqs<->event channels.  The event channels
 * interface with the rest of the kernel by defining a xen interrupt
L
Lucas De Marchi 已提交
8
 * chip.  When an event is received, it is mapped to an irq and sent
J
Jeremy Fitzhardinge 已提交
9 10 11 12 13 14 15 16 17 18
 * through the normal interrupt processing path.
 *
 * There are four kinds of events which can be mapped to an event
 * channel:
 *
 * 1. Inter-domain notifications.  This includes all the virtual
 *    device events, since they're driven by front-ends in another domain
 *    (typically dom0).
 * 2. VIRQs, typically used for timers.  These are per-cpu events.
 * 3. IPIs.
19
 * 4. PIRQs - Hardware interrupts.
J
Jeremy Fitzhardinge 已提交
20 21 22 23 24 25 26 27 28
 *
 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
 */

#include <linux/linkage.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/string.h>
29
#include <linux/bootmem.h>
30
#include <linux/slab.h>
31
#include <linux/irqnr.h>
32
#include <linux/pci.h>
J
Jeremy Fitzhardinge 已提交
33

34
#include <asm/desc.h>
J
Jeremy Fitzhardinge 已提交
35 36
#include <asm/ptrace.h>
#include <asm/irq.h>
37
#include <asm/idle.h>
38
#include <asm/io_apic.h>
J
Jeremy Fitzhardinge 已提交
39
#include <asm/sync_bitops.h>
40
#include <asm/xen/pci.h>
J
Jeremy Fitzhardinge 已提交
41
#include <asm/xen/hypercall.h>
42
#include <asm/xen/hypervisor.h>
J
Jeremy Fitzhardinge 已提交
43

44 45
#include <xen/xen.h>
#include <xen/hvm.h>
46
#include <xen/xen-ops.h>
J
Jeremy Fitzhardinge 已提交
47 48 49
#include <xen/events.h>
#include <xen/interface/xen.h>
#include <xen/interface/event_channel.h>
50 51
#include <xen/interface/hvm/hvm_op.h>
#include <xen/interface/hvm/params.h>
J
Jeremy Fitzhardinge 已提交
52 53 54 55 56

/*
 * This lock protects updates to the following mapping and reference-count
 * arrays. The lock does not need to be acquired to read the mapping tables.
 */
57
static DEFINE_MUTEX(irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
58

59 60
static LIST_HEAD(xen_irq_list_head);

J
Jeremy Fitzhardinge 已提交
61
/* IRQ <-> VIRQ mapping. */
62
static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
J
Jeremy Fitzhardinge 已提交
63

J
Jeremy Fitzhardinge 已提交
64
/* IRQ <-> IPI mapping */
65
static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
J
Jeremy Fitzhardinge 已提交
66

67 68
/* Interrupt types. */
enum xen_irq_type {
J
Jeremy Fitzhardinge 已提交
69
	IRQT_UNBOUND = 0,
J
Jeremy Fitzhardinge 已提交
70 71 72 73 74
	IRQT_PIRQ,
	IRQT_VIRQ,
	IRQT_IPI,
	IRQT_EVTCHN
};
J
Jeremy Fitzhardinge 已提交
75

76 77 78 79 80 81
/*
 * Packed IRQ information:
 * type - enum xen_irq_type
 * event channel - irq->event channel mapping
 * cpu - cpu this event channel is bound to
 * index - type-specific information:
82 83
 *    PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM
 *           guest, or GSI (real passthrough IRQ) of the device.
84 85 86 87
 *    VIRQ - virq number
 *    IPI - IPI vector
 *    EVTCHN -
 */
88
struct irq_info {
89
	struct list_head list;
90
	enum xen_irq_type type;	/* type */
91
	unsigned irq;
92 93 94 95 96 97 98
	unsigned short evtchn;	/* event channel */
	unsigned short cpu;	/* cpu bound */

	union {
		unsigned short virq;
		enum ipi_vector ipi;
		struct {
S
Stefano Stabellini 已提交
99
			unsigned short pirq;
100
			unsigned short gsi;
101 102
			unsigned char vector;
			unsigned char flags;
103
			uint16_t domid;
104 105 106
		} pirq;
	} u;
};
107
#define PIRQ_NEEDS_EOI	(1 << 0)
108
#define PIRQ_SHAREABLE	(1 << 1)
109

110
static int *evtchn_to_irq;
111

112 113
static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG],
		      cpu_evtchn_mask);
J
Jeremy Fitzhardinge 已提交
114 115 116 117 118

/* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn)	((chn) != 0)

static struct irq_chip xen_dynamic_chip;
119
static struct irq_chip xen_percpu_chip;
120
static struct irq_chip xen_pirq_chip;
121 122
static void enable_dynirq(struct irq_data *data);
static void disable_dynirq(struct irq_data *data);
J
Jeremy Fitzhardinge 已提交
123

124 125
/* Get info for IRQ */
static struct irq_info *info_for_irq(unsigned irq)
126
{
T
Thomas Gleixner 已提交
127
	return irq_get_handler_data(irq);
128 129
}

130 131
/* Constructors for packed IRQ information. */
static void xen_irq_info_common_init(struct irq_info *info,
132
				     unsigned irq,
133 134 135
				     enum xen_irq_type type,
				     unsigned short evtchn,
				     unsigned short cpu)
136
{
137 138 139 140

	BUG_ON(info->type != IRQT_UNBOUND && info->type != type);

	info->type = type;
141
	info->irq = irq;
142 143
	info->evtchn = evtchn;
	info->cpu = cpu;
144 145

	evtchn_to_irq[evtchn] = irq;
146 147
}

148 149
static void xen_irq_info_evtchn_init(unsigned irq,
				     unsigned short evtchn)
150
{
151 152
	struct irq_info *info = info_for_irq(irq);

153
	xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
154 155
}

156 157
static void xen_irq_info_ipi_init(unsigned cpu,
				  unsigned irq,
158 159
				  unsigned short evtchn,
				  enum ipi_vector ipi)
J
Jeremy Fitzhardinge 已提交
160
{
161 162
	struct irq_info *info = info_for_irq(irq);

163
	xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
164 165

	info->u.ipi = ipi;
166 167

	per_cpu(ipi_to_irq, cpu)[ipi] = irq;
168 169
}

170 171
static void xen_irq_info_virq_init(unsigned cpu,
				   unsigned irq,
172 173
				   unsigned short evtchn,
				   unsigned short virq)
174
{
175 176
	struct irq_info *info = info_for_irq(irq);

177
	xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
178 179

	info->u.virq = virq;
180 181

	per_cpu(virq_to_irq, cpu)[virq] = irq;
182 183
}

184 185 186 187 188
static void xen_irq_info_pirq_init(unsigned irq,
				   unsigned short evtchn,
				   unsigned short pirq,
				   unsigned short gsi,
				   unsigned short vector,
189
				   uint16_t domid,
190
				   unsigned char flags)
191
{
192 193
	struct irq_info *info = info_for_irq(irq);

194
	xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
195 196 197 198

	info->u.pirq.pirq = pirq;
	info->u.pirq.gsi = gsi;
	info->u.pirq.vector = vector;
199
	info->u.pirq.domid = domid;
200
	info->u.pirq.flags = flags;
J
Jeremy Fitzhardinge 已提交
201 202 203 204 205
}

/*
 * Accessors for packed IRQ information.
 */
206
static unsigned int evtchn_from_irq(unsigned irq)
J
Jeremy Fitzhardinge 已提交
207
{
208 209 210
	if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
		return 0;

211
	return info_for_irq(irq)->evtchn;
J
Jeremy Fitzhardinge 已提交
212 213
}

I
Ian Campbell 已提交
214 215 216 217 218 219
unsigned irq_from_evtchn(unsigned int evtchn)
{
	return evtchn_to_irq[evtchn];
}
EXPORT_SYMBOL_GPL(irq_from_evtchn);

220
static enum ipi_vector ipi_from_irq(unsigned irq)
J
Jeremy Fitzhardinge 已提交
221
{
222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
	struct irq_info *info = info_for_irq(irq);

	BUG_ON(info == NULL);
	BUG_ON(info->type != IRQT_IPI);

	return info->u.ipi;
}

static unsigned virq_from_irq(unsigned irq)
{
	struct irq_info *info = info_for_irq(irq);

	BUG_ON(info == NULL);
	BUG_ON(info->type != IRQT_VIRQ);

	return info->u.virq;
}

S
Stefano Stabellini 已提交
240 241 242 243 244 245 246 247 248 249
static unsigned pirq_from_irq(unsigned irq)
{
	struct irq_info *info = info_for_irq(irq);

	BUG_ON(info == NULL);
	BUG_ON(info->type != IRQT_PIRQ);

	return info->u.pirq.pirq;
}

250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
static enum xen_irq_type type_from_irq(unsigned irq)
{
	return info_for_irq(irq)->type;
}

static unsigned cpu_from_irq(unsigned irq)
{
	return info_for_irq(irq)->cpu;
}

static unsigned int cpu_from_evtchn(unsigned int evtchn)
{
	int irq = evtchn_to_irq[evtchn];
	unsigned ret = 0;

	if (irq != -1)
		ret = cpu_from_irq(irq);

	return ret;
J
Jeremy Fitzhardinge 已提交
269 270
}

271 272 273 274 275 276 277 278 279
static bool pirq_needs_eoi(unsigned irq)
{
	struct irq_info *info = info_for_irq(irq);

	BUG_ON(info->type != IRQT_PIRQ);

	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
}

J
Jeremy Fitzhardinge 已提交
280 281 282 283
static inline unsigned long active_evtchns(unsigned int cpu,
					   struct shared_info *sh,
					   unsigned int idx)
{
284
	return sh->evtchn_pending[idx] &
285
		per_cpu(cpu_evtchn_mask, cpu)[idx] &
286
		~sh->evtchn_mask[idx];
J
Jeremy Fitzhardinge 已提交
287 288 289 290 291 292 293 294
}

static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
{
	int irq = evtchn_to_irq[chn];

	BUG_ON(irq == -1);
#ifdef CONFIG_SMP
295
	cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
J
Jeremy Fitzhardinge 已提交
296 297
#endif

298 299
	clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq)));
	set_bit(chn, per_cpu(cpu_evtchn_mask, cpu));
J
Jeremy Fitzhardinge 已提交
300

301
	info_for_irq(irq)->cpu = cpu;
J
Jeremy Fitzhardinge 已提交
302 303 304 305
}

static void init_evtchn_cpu_bindings(void)
{
306
	int i;
J
Jeremy Fitzhardinge 已提交
307
#ifdef CONFIG_SMP
308
	struct irq_info *info;
309

J
Jeremy Fitzhardinge 已提交
310
	/* By default all event channels notify CPU#0. */
311 312
	list_for_each_entry(info, &xen_irq_list_head, list) {
		struct irq_desc *desc = irq_to_desc(info->irq);
313
		cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
314
	}
J
Jeremy Fitzhardinge 已提交
315 316
#endif

317
	for_each_possible_cpu(i)
318 319
		memset(per_cpu(cpu_evtchn_mask, i),
		       (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i)));
J
Jeremy Fitzhardinge 已提交
320 321 322 323 324 325 326 327 328 329 330 331 332 333
}

static inline void clear_evtchn(int port)
{
	struct shared_info *s = HYPERVISOR_shared_info;
	sync_clear_bit(port, &s->evtchn_pending[0]);
}

static inline void set_evtchn(int port)
{
	struct shared_info *s = HYPERVISOR_shared_info;
	sync_set_bit(port, &s->evtchn_pending[0]);
}

334 335 336 337 338 339
static inline int test_evtchn(int port)
{
	struct shared_info *s = HYPERVISOR_shared_info;
	return sync_test_bit(port, &s->evtchn_pending[0]);
}

J
Jeremy Fitzhardinge 已提交
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375

/**
 * notify_remote_via_irq - send event to remote end of event channel via irq
 * @irq: irq of event channel to send event to
 *
 * Unlike notify_remote_via_evtchn(), this is safe to use across
 * save/restore. Notifications on a broken connection are silently
 * dropped.
 */
void notify_remote_via_irq(int irq)
{
	int evtchn = evtchn_from_irq(irq);

	if (VALID_EVTCHN(evtchn))
		notify_remote_via_evtchn(evtchn);
}
EXPORT_SYMBOL_GPL(notify_remote_via_irq);

static void mask_evtchn(int port)
{
	struct shared_info *s = HYPERVISOR_shared_info;
	sync_set_bit(port, &s->evtchn_mask[0]);
}

static void unmask_evtchn(int port)
{
	struct shared_info *s = HYPERVISOR_shared_info;
	unsigned int cpu = get_cpu();

	BUG_ON(!irqs_disabled());

	/* Slow path (hypercall) if this is a non-local port. */
	if (unlikely(cpu != cpu_from_evtchn(port))) {
		struct evtchn_unmask unmask = { .port = port };
		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
	} else {
C
Christoph Lameter 已提交
376
		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
J
Jeremy Fitzhardinge 已提交
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393

		sync_clear_bit(port, &s->evtchn_mask[0]);

		/*
		 * The following is basically the equivalent of
		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
		 * the interrupt edge' if the channel is masked.
		 */
		if (sync_test_bit(port, &s->evtchn_pending[0]) &&
		    !sync_test_and_set_bit(port / BITS_PER_LONG,
					   &vcpu_info->evtchn_pending_sel))
			vcpu_info->evtchn_upcall_pending = 1;
	}

	put_cpu();
}

394 395 396
static void xen_irq_init(unsigned irq)
{
	struct irq_info *info;
397
#ifdef CONFIG_SMP
398 399 400 401
	struct irq_desc *desc = irq_to_desc(irq);

	/* By default all event channels notify CPU#0. */
	cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
402
#endif
403

404 405 406
	info = kzalloc(sizeof(*info), GFP_KERNEL);
	if (info == NULL)
		panic("Unable to allocate metadata for IRQ%d\n", irq);
407 408 409

	info->type = IRQT_UNBOUND;

T
Thomas Gleixner 已提交
410
	irq_set_handler_data(irq, info);
411

412 413 414
	list_add_tail(&info->list, &xen_irq_list_head);
}

415
static int __must_check xen_allocate_irq_dynamic(void)
416
{
417 418
	int first = 0;
	int irq;
419 420

#ifdef CONFIG_X86_IO_APIC
421 422
	/*
	 * For an HVM guest or domain 0 which see "real" (emulated or
L
Lucas De Marchi 已提交
423
	 * actual respectively) GSIs we allocate dynamic IRQs
424 425 426 427 428 429
	 * e.g. those corresponding to event channels or MSIs
	 * etc. from the range above those "real" GSIs to avoid
	 * collisions.
	 */
	if (xen_initial_domain() || xen_hvm_domain())
		first = get_nr_irqs_gsi();
430 431
#endif

432
	irq = irq_alloc_desc_from(first, -1);
433

434 435
	if (irq >= 0)
		xen_irq_init(irq);
436

J
Jeremy Fitzhardinge 已提交
437
	return irq;
438 439
}

440
static int __must_check xen_allocate_irq_gsi(unsigned gsi)
441 442 443
{
	int irq;

444 445 446 447 448 449 450
	/*
	 * A PV guest has no concept of a GSI (since it has no ACPI
	 * nor access to/knowledge of the physical APICs). Therefore
	 * all IRQs are dynamically allocated from the entire IRQ
	 * space.
	 */
	if (xen_pv_domain() && !xen_initial_domain())
451 452 453 454
		return xen_allocate_irq_dynamic();

	/* Legacy IRQ descriptors are already allocated by the arch. */
	if (gsi < NR_IRQS_LEGACY)
455 456 457
		irq = gsi;
	else
		irq = irq_alloc_desc_at(gsi, -1);
458

459
	xen_irq_init(irq);
460 461 462 463 464 465

	return irq;
}

static void xen_free_irq(unsigned irq)
{
T
Thomas Gleixner 已提交
466
	struct irq_info *info = irq_get_handler_data(irq);
467 468

	list_del(&info->list);
469

T
Thomas Gleixner 已提交
470
	irq_set_handler_data(irq, NULL);
471 472 473

	kfree(info);

474 475 476 477
	/* Legacy IRQ descriptors are managed by the arch. */
	if (irq < NR_IRQS_LEGACY)
		return;

478 479 480
	irq_free_desc(irq);
}

481 482 483 484 485 486 487
static void pirq_query_unmask(int irq)
{
	struct physdev_irq_status_query irq_status;
	struct irq_info *info = info_for_irq(irq);

	BUG_ON(info->type != IRQT_PIRQ);

S
Stefano Stabellini 已提交
488
	irq_status.irq = pirq_from_irq(irq);
489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
		irq_status.flags = 0;

	info->u.pirq.flags &= ~PIRQ_NEEDS_EOI;
	if (irq_status.flags & XENIRQSTAT_needs_eoi)
		info->u.pirq.flags |= PIRQ_NEEDS_EOI;
}

static bool probing_irq(int irq)
{
	struct irq_desc *desc = irq_to_desc(irq);

	return desc && desc->action == NULL;
}

504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
static void eoi_pirq(struct irq_data *data)
{
	int evtchn = evtchn_from_irq(data->irq);
	struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
	int rc = 0;

	irq_move_irq(data);

	if (VALID_EVTCHN(evtchn))
		clear_evtchn(evtchn);

	if (pirq_needs_eoi(data->irq)) {
		rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
		WARN_ON(rc);
	}
}

static void mask_ack_pirq(struct irq_data *data)
{
	disable_dynirq(data);
	eoi_pirq(data);
}

527
static unsigned int __startup_pirq(unsigned int irq)
528 529 530 531
{
	struct evtchn_bind_pirq bind_pirq;
	struct irq_info *info = info_for_irq(irq);
	int evtchn = evtchn_from_irq(irq);
532
	int rc;
533 534 535 536 537 538

	BUG_ON(info->type != IRQT_PIRQ);

	if (VALID_EVTCHN(evtchn))
		goto out;

S
Stefano Stabellini 已提交
539
	bind_pirq.pirq = pirq_from_irq(irq);
540
	/* NB. We are happy to share unless we are probing. */
541 542 543 544
	bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ?
					BIND_PIRQ__WILL_SHARE : 0;
	rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq);
	if (rc != 0) {
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
		if (!probing_irq(irq))
			printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
			       irq);
		return 0;
	}
	evtchn = bind_pirq.port;

	pirq_query_unmask(irq);

	evtchn_to_irq[evtchn] = irq;
	bind_evtchn_to_cpu(evtchn, 0);
	info->evtchn = evtchn;

out:
	unmask_evtchn(evtchn);
560
	eoi_pirq(irq_get_irq_data(irq));
561 562 563 564

	return 0;
}

565 566 567 568 569 570
static unsigned int startup_pirq(struct irq_data *data)
{
	return __startup_pirq(data->irq);
}

static void shutdown_pirq(struct irq_data *data)
571 572
{
	struct evtchn_close close;
573
	unsigned int irq = data->irq;
574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
	struct irq_info *info = info_for_irq(irq);
	int evtchn = evtchn_from_irq(irq);

	BUG_ON(info->type != IRQT_PIRQ);

	if (!VALID_EVTCHN(evtchn))
		return;

	mask_evtchn(evtchn);

	close.port = evtchn;
	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
		BUG();

	bind_evtchn_to_cpu(evtchn, 0);
	evtchn_to_irq[evtchn] = -1;
	info->evtchn = 0;
}

593
static void enable_pirq(struct irq_data *data)
594
{
595
	startup_pirq(data);
596 597
}

598
static void disable_pirq(struct irq_data *data)
599
{
600
	disable_dynirq(data);
601 602 603 604
}

static int find_irq_by_gsi(unsigned gsi)
{
605
	struct irq_info *info;
606

607 608
	list_for_each_entry(info, &xen_irq_list_head, list) {
		if (info->type != IRQT_PIRQ)
609 610
			continue;

611 612
		if (info->u.pirq.gsi == gsi)
			return info->irq;
613 614 615 616 617
	}

	return -1;
}

I
Ian Campbell 已提交
618 619 620
/*
 * Do not make any assumptions regarding the relationship between the
 * IRQ number returned here and the Xen pirq argument.
S
Stefano Stabellini 已提交
621 622 623
 *
 * Note: We don't assign an event channel until the irq actually started
 * up.  Return an existing irq if we've already got one for the gsi.
624 625 626
 *
 * Shareable implies level triggered, not shareable implies edge
 * triggered here.
627
 */
628 629
int xen_bind_pirq_gsi_to_irq(unsigned gsi,
			     unsigned pirq, int shareable, char *name)
630
{
631
	int irq = -1;
632 633
	struct physdev_irq irq_op;

634
	mutex_lock(&irq_mapping_update_lock);
635 636 637

	irq = find_irq_by_gsi(gsi);
	if (irq != -1) {
S
Stefano Stabellini 已提交
638
		printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n",
639 640 641 642
		       irq, gsi);
		goto out;	/* XXX need refcount? */
	}

643
	irq = xen_allocate_irq_gsi(gsi);
644 645
	if (irq < 0)
		goto out;
646 647

	irq_op.irq = irq;
648 649 650 651 652 653 654
	irq_op.vector = 0;

	/* Only the privileged domain can do this. For non-priv, the pcifront
	 * driver provides a PCI bus that does the call to do exactly
	 * this in the priv domain. */
	if (xen_initial_domain() &&
	    HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
655
		xen_free_irq(irq);
656 657 658 659
		irq = -ENOSPC;
		goto out;
	}

660
	xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, DOMID_SELF,
661
			       shareable ? PIRQ_SHAREABLE : 0);
662

663 664
	pirq_query_unmask(irq);
	/* We try to use the handler with the appropriate semantic for the
665 666
	 * type of interrupt: if the interrupt is an edge triggered
	 * interrupt we use handle_edge_irq.
667
	 *
668 669
	 * On the other hand if the interrupt is level triggered we use
	 * handle_fasteoi_irq like the native code does for this kind of
670
	 * interrupts.
671
	 *
672 673 674 675 676 677 678
	 * Depending on the Xen version, pirq_needs_eoi might return true
	 * not only for level triggered interrupts but for edge triggered
	 * interrupts too. In any case Xen always honors the eoi mechanism,
	 * not injecting any more pirqs of the same kind if the first one
	 * hasn't received an eoi yet. Therefore using the fasteoi handler
	 * is the right choice either way.
	 */
679
	if (shareable)
680 681 682 683 684 685
		irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
				handle_fasteoi_irq, name);
	else
		irq_set_chip_and_handler_name(irq, &xen_pirq_chip,
				handle_edge_irq, name);

686
out:
687
	mutex_unlock(&irq_mapping_update_lock);
688 689 690 691

	return irq;
}

692
#ifdef CONFIG_PCI_MSI
693
int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc)
694
{
695
	int rc;
696 697
	struct physdev_get_free_pirq op_get_free_pirq;

698
	op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI;
699 700
	rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, &op_get_free_pirq);

701 702 703 704
	WARN_ONCE(rc == -ENOSYS,
		  "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n");

	return rc ? -1 : op_get_free_pirq.pirq;
705 706
}

707
int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc,
708 709
			     int pirq, int vector, const char *name,
			     domid_t domid)
S
Stefano Stabellini 已提交
710
{
711
	int irq, ret;
712

713
	mutex_lock(&irq_mapping_update_lock);
S
Stefano Stabellini 已提交
714

715
	irq = xen_allocate_irq_dynamic();
716
	if (irq < 0)
717
		goto out;
S
Stefano Stabellini 已提交
718

719 720
	irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
			name);
S
Stefano Stabellini 已提交
721

722
	xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, domid, 0);
723
	ret = irq_set_msi_desc(irq, msidesc);
724 725
	if (ret < 0)
		goto error_irq;
S
Stefano Stabellini 已提交
726
out:
727
	mutex_unlock(&irq_mapping_update_lock);
728
	return irq;
729
error_irq:
730
	mutex_unlock(&irq_mapping_update_lock);
731
	xen_free_irq(irq);
732
	return ret;
S
Stefano Stabellini 已提交
733
}
734 735
#endif

736 737 738
int xen_destroy_irq(int irq)
{
	struct irq_desc *desc;
739 740
	struct physdev_unmap_pirq unmap_irq;
	struct irq_info *info = info_for_irq(irq);
741 742
	int rc = -ENOENT;

743
	mutex_lock(&irq_mapping_update_lock);
744 745 746 747 748

	desc = irq_to_desc(irq);
	if (!desc)
		goto out;

749
	if (xen_initial_domain()) {
750
		unmap_irq.pirq = info->u.pirq.pirq;
751
		unmap_irq.domid = info->u.pirq.domid;
752
		rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap_irq);
753 754 755 756 757 758 759 760
		/* If another domain quits without making the pci_disable_msix
		 * call, the Xen hypervisor takes care of freeing the PIRQs
		 * (free_domain_pirqs).
		 */
		if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF))
			printk(KERN_INFO "domain %d does not have %d anymore\n",
				info->u.pirq.domid, info->u.pirq.pirq);
		else if (rc) {
761 762 763 764
			printk(KERN_WARNING "unmap irq failed %d\n", rc);
			goto out;
		}
	}
765

766
	xen_free_irq(irq);
767 768

out:
769
	mutex_unlock(&irq_mapping_update_lock);
770 771 772
	return rc;
}

773
int xen_irq_from_pirq(unsigned pirq)
774
{
775
	int irq;
776

777
	struct irq_info *info;
J
Jeremy Fitzhardinge 已提交
778

779
	mutex_lock(&irq_mapping_update_lock);
780 781

	list_for_each_entry(info, &xen_irq_list_head, list) {
782
		if (info->type != IRQT_PIRQ)
783 784 785 786 787 788 789
			continue;
		irq = info->irq;
		if (info->u.pirq.pirq == pirq)
			goto out;
	}
	irq = -1;
out:
790
	mutex_unlock(&irq_mapping_update_lock);
791 792

	return irq;
793 794
}

795 796 797 798 799 800

int xen_pirq_from_irq(unsigned irq)
{
	return pirq_from_irq(irq);
}
EXPORT_SYMBOL_GPL(xen_pirq_from_irq);
801
int bind_evtchn_to_irq(unsigned int evtchn)
J
Jeremy Fitzhardinge 已提交
802 803 804
{
	int irq;

805
	mutex_lock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
806 807 808 809

	irq = evtchn_to_irq[evtchn];

	if (irq == -1) {
810
		irq = xen_allocate_irq_dynamic();
811 812
		if (irq == -1)
			goto out;
J
Jeremy Fitzhardinge 已提交
813

T
Thomas Gleixner 已提交
814
		irq_set_chip_and_handler_name(irq, &xen_dynamic_chip,
815
					      handle_edge_irq, "event");
J
Jeremy Fitzhardinge 已提交
816

817
		xen_irq_info_evtchn_init(irq, evtchn);
J
Jeremy Fitzhardinge 已提交
818 819
	}

820
out:
821
	mutex_unlock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
822 823 824

	return irq;
}
825
EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
J
Jeremy Fitzhardinge 已提交
826

J
Jeremy Fitzhardinge 已提交
827 828 829 830 831
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
	struct evtchn_bind_ipi bind_ipi;
	int evtchn, irq;

832
	mutex_lock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
833 834

	irq = per_cpu(ipi_to_irq, cpu)[ipi];
835

J
Jeremy Fitzhardinge 已提交
836
	if (irq == -1) {
837
		irq = xen_allocate_irq_dynamic();
J
Jeremy Fitzhardinge 已提交
838 839 840
		if (irq < 0)
			goto out;

T
Thomas Gleixner 已提交
841
		irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
842
					      handle_percpu_irq, "ipi");
J
Jeremy Fitzhardinge 已提交
843 844 845 846 847 848 849

		bind_ipi.vcpu = cpu;
		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
						&bind_ipi) != 0)
			BUG();
		evtchn = bind_ipi.port;

850
		xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
J
Jeremy Fitzhardinge 已提交
851 852 853 854 855

		bind_evtchn_to_cpu(evtchn, cpu);
	}

 out:
856
	mutex_unlock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
857 858 859
	return irq;
}

860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
					  unsigned int remote_port)
{
	struct evtchn_bind_interdomain bind_interdomain;
	int err;

	bind_interdomain.remote_dom  = remote_domain;
	bind_interdomain.remote_port = remote_port;

	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
					  &bind_interdomain);

	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
}

875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895
static int find_virq(unsigned int virq, unsigned int cpu)
{
	struct evtchn_status status;
	int port, rc = -ENOENT;

	memset(&status, 0, sizeof(status));
	for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
		status.dom = DOMID_SELF;
		status.port = port;
		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
		if (rc < 0)
			continue;
		if (status.status != EVTCHNSTAT_virq)
			continue;
		if (status.u.virq == virq && status.vcpu == cpu) {
			rc = port;
			break;
		}
	}
	return rc;
}
J
Jeremy Fitzhardinge 已提交
896

897
int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
J
Jeremy Fitzhardinge 已提交
898 899
{
	struct evtchn_bind_virq bind_virq;
900
	int evtchn, irq, ret;
J
Jeremy Fitzhardinge 已提交
901

902
	mutex_lock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
903 904 905 906

	irq = per_cpu(virq_to_irq, cpu)[virq];

	if (irq == -1) {
907
		irq = xen_allocate_irq_dynamic();
908 909
		if (irq == -1)
			goto out;
910

T
Thomas Gleixner 已提交
911
		irq_set_chip_and_handler_name(irq, &xen_percpu_chip,
912 913
					      handle_percpu_irq, "virq");

J
Jeremy Fitzhardinge 已提交
914 915
		bind_virq.virq = virq;
		bind_virq.vcpu = cpu;
916 917 918 919 920 921 922 923 924 925
		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
						&bind_virq);
		if (ret == 0)
			evtchn = bind_virq.port;
		else {
			if (ret == -EEXIST)
				ret = find_virq(virq, cpu);
			BUG_ON(ret < 0);
			evtchn = ret;
		}
J
Jeremy Fitzhardinge 已提交
926

927
		xen_irq_info_virq_init(cpu, irq, evtchn, virq);
J
Jeremy Fitzhardinge 已提交
928 929 930 931

		bind_evtchn_to_cpu(evtchn, cpu);
	}

932
out:
933
	mutex_unlock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
934 935 936 937 938 939 940 941 942

	return irq;
}

static void unbind_from_irq(unsigned int irq)
{
	struct evtchn_close close;
	int evtchn = evtchn_from_irq(irq);

943
	mutex_lock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
944

J
Jeremy Fitzhardinge 已提交
945
	if (VALID_EVTCHN(evtchn)) {
J
Jeremy Fitzhardinge 已提交
946 947 948 949 950 951 952
		close.port = evtchn;
		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
			BUG();

		switch (type_from_irq(irq)) {
		case IRQT_VIRQ:
			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
953
				[virq_from_irq(irq)] = -1;
J
Jeremy Fitzhardinge 已提交
954
			break;
A
Alex Nixon 已提交
955 956
		case IRQT_IPI:
			per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
957
				[ipi_from_irq(irq)] = -1;
A
Alex Nixon 已提交
958
			break;
J
Jeremy Fitzhardinge 已提交
959 960 961 962 963 964 965 966
		default:
			break;
		}

		/* Closed ports are implicitly re-bound to VCPU0. */
		bind_evtchn_to_cpu(evtchn, 0);

		evtchn_to_irq[evtchn] = -1;
967 968
	}

969
	BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
J
Jeremy Fitzhardinge 已提交
970

971
	xen_free_irq(irq);
J
Jeremy Fitzhardinge 已提交
972

973
	mutex_unlock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
974 975 976
}

int bind_evtchn_to_irqhandler(unsigned int evtchn,
977
			      irq_handler_t handler,
J
Jeremy Fitzhardinge 已提交
978 979 980
			      unsigned long irqflags,
			      const char *devname, void *dev_id)
{
981
	int irq, retval;
J
Jeremy Fitzhardinge 已提交
982 983

	irq = bind_evtchn_to_irq(evtchn);
984 985
	if (irq < 0)
		return irq;
J
Jeremy Fitzhardinge 已提交
986 987 988 989 990 991 992 993 994 995
	retval = request_irq(irq, handler, irqflags, devname, dev_id);
	if (retval != 0) {
		unbind_from_irq(irq);
		return retval;
	}

	return irq;
}
EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);

996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
					  unsigned int remote_port,
					  irq_handler_t handler,
					  unsigned long irqflags,
					  const char *devname,
					  void *dev_id)
{
	int irq, retval;

	irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
	if (irq < 0)
		return irq;

	retval = request_irq(irq, handler, irqflags, devname, dev_id);
	if (retval != 0) {
		unbind_from_irq(irq);
		return retval;
	}

	return irq;
}
EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler);

J
Jeremy Fitzhardinge 已提交
1019
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
1020
			    irq_handler_t handler,
J
Jeremy Fitzhardinge 已提交
1021 1022
			    unsigned long irqflags, const char *devname, void *dev_id)
{
1023
	int irq, retval;
J
Jeremy Fitzhardinge 已提交
1024 1025

	irq = bind_virq_to_irq(virq, cpu);
1026 1027
	if (irq < 0)
		return irq;
J
Jeremy Fitzhardinge 已提交
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
	retval = request_irq(irq, handler, irqflags, devname, dev_id);
	if (retval != 0) {
		unbind_from_irq(irq);
		return retval;
	}

	return irq;
}
EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);

J
Jeremy Fitzhardinge 已提交
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
int bind_ipi_to_irqhandler(enum ipi_vector ipi,
			   unsigned int cpu,
			   irq_handler_t handler,
			   unsigned long irqflags,
			   const char *devname,
			   void *dev_id)
{
	int irq, retval;

	irq = bind_ipi_to_irq(ipi, cpu);
	if (irq < 0)
		return irq;

1051
	irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME;
J
Jeremy Fitzhardinge 已提交
1052 1053 1054 1055 1056 1057 1058 1059 1060
	retval = request_irq(irq, handler, irqflags, devname, dev_id);
	if (retval != 0) {
		unbind_from_irq(irq);
		return retval;
	}

	return irq;
}

J
Jeremy Fitzhardinge 已提交
1061 1062 1063 1064 1065 1066 1067
void unbind_from_irqhandler(unsigned int irq, void *dev_id)
{
	free_irq(irq, dev_id);
	unbind_from_irq(irq);
}
EXPORT_SYMBOL_GPL(unbind_from_irqhandler);

J
Jeremy Fitzhardinge 已提交
1068 1069 1070 1071 1072 1073 1074
void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
{
	int irq = per_cpu(ipi_to_irq, cpu)[vector];
	BUG_ON(irq < 0);
	notify_remote_via_irq(irq);
}

1075 1076 1077 1078
irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
{
	struct shared_info *sh = HYPERVISOR_shared_info;
	int cpu = smp_processor_id();
1079
	unsigned long *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
1080 1081 1082
	int i;
	unsigned long flags;
	static DEFINE_SPINLOCK(debug_lock);
1083
	struct vcpu_info *v;
1084 1085 1086

	spin_lock_irqsave(&debug_lock, flags);

1087
	printk("\nvcpu %d\n  ", cpu);
1088 1089

	for_each_online_cpu(i) {
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132
		int pending;
		v = per_cpu(xen_vcpu, i);
		pending = (get_irq_regs() && i == cpu)
			? xen_irqs_disabled(get_irq_regs())
			: v->evtchn_upcall_mask;
		printk("%d: masked=%d pending=%d event_sel %0*lx\n  ", i,
		       pending, v->evtchn_upcall_pending,
		       (int)(sizeof(v->evtchn_pending_sel)*2),
		       v->evtchn_pending_sel);
	}
	v = per_cpu(xen_vcpu, cpu);

	printk("\npending:\n   ");
	for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
		printk("%0*lx%s", (int)sizeof(sh->evtchn_pending[0])*2,
		       sh->evtchn_pending[i],
		       i % 8 == 0 ? "\n   " : " ");
	printk("\nglobal mask:\n   ");
	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
		printk("%0*lx%s",
		       (int)(sizeof(sh->evtchn_mask[0])*2),
		       sh->evtchn_mask[i],
		       i % 8 == 0 ? "\n   " : " ");

	printk("\nglobally unmasked:\n   ");
	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
		printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
		       sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
		       i % 8 == 0 ? "\n   " : " ");

	printk("\nlocal cpu%d mask:\n   ", cpu);
	for (i = (NR_EVENT_CHANNELS/BITS_PER_LONG)-1; i >= 0; i--)
		printk("%0*lx%s", (int)(sizeof(cpu_evtchn[0])*2),
		       cpu_evtchn[i],
		       i % 8 == 0 ? "\n   " : " ");

	printk("\nlocally unmasked:\n   ");
	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
		unsigned long pending = sh->evtchn_pending[i]
			& ~sh->evtchn_mask[i]
			& cpu_evtchn[i];
		printk("%0*lx%s", (int)(sizeof(sh->evtchn_mask[0])*2),
		       pending, i % 8 == 0 ? "\n   " : " ");
1133 1134 1135
	}

	printk("\npending list:\n");
1136
	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
1137
		if (sync_test_bit(i, sh->evtchn_pending)) {
1138 1139
			int word_idx = i / BITS_PER_LONG;
			printk("  %d: event %d -> irq %d%s%s%s\n",
1140
			       cpu_from_evtchn(i), i,
1141 1142 1143 1144 1145 1146 1147
			       evtchn_to_irq[i],
			       sync_test_bit(word_idx, &v->evtchn_pending_sel)
					     ? "" : " l2-clear",
			       !sync_test_bit(i, sh->evtchn_mask)
					     ? "" : " globally-masked",
			       sync_test_bit(i, cpu_evtchn)
					     ? "" : " locally-masked");
1148 1149 1150 1151 1152 1153 1154 1155
		}
	}

	spin_unlock_irqrestore(&debug_lock, flags);

	return IRQ_HANDLED;
}

1156
static DEFINE_PER_CPU(unsigned, xed_nesting_count);
1157 1158
static DEFINE_PER_CPU(unsigned int, current_word_idx);
static DEFINE_PER_CPU(unsigned int, current_bit_idx);
1159

1160 1161 1162 1163
/*
 * Mask out the i least significant bits of w
 */
#define MASK_LSBS(w, i) (w & ((~0UL) << i))
1164

J
Jeremy Fitzhardinge 已提交
1165 1166 1167 1168 1169 1170 1171 1172 1173
/*
 * Search the CPUs pending events bitmasks.  For each one found, map
 * the event number to an irq, and feed it into do_IRQ() for
 * handling.
 *
 * Xen uses a two-level bitmap to speed searching.  The first level is
 * a bitset of words which contain pending event bits.  The second
 * level is a bitset of pending events themselves.
 */
1174
static void __xen_evtchn_do_upcall(void)
J
Jeremy Fitzhardinge 已提交
1175
{
1176
	int start_word_idx, start_bit_idx;
1177
	int word_idx, bit_idx;
1178
	int i;
J
Jeremy Fitzhardinge 已提交
1179 1180
	int cpu = get_cpu();
	struct shared_info *s = HYPERVISOR_shared_info;
C
Christoph Lameter 已提交
1181
	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
1182
	unsigned count;
J
Jeremy Fitzhardinge 已提交
1183

1184 1185
	do {
		unsigned long pending_words;
J
Jeremy Fitzhardinge 已提交
1186

1187
		vcpu_info->evtchn_upcall_pending = 0;
J
Jeremy Fitzhardinge 已提交
1188

C
Christoph Lameter 已提交
1189
		if (__this_cpu_inc_return(xed_nesting_count) - 1)
1190
			goto out;
J
Jeremy Fitzhardinge 已提交
1191

1192 1193
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
		/* Clear master flag /before/ clearing selector flag. */
1194
		wmb();
1195
#endif
1196
		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
1197

1198 1199 1200 1201
		start_word_idx = __this_cpu_read(current_word_idx);
		start_bit_idx = __this_cpu_read(current_bit_idx);

		word_idx = start_word_idx;
1202

1203
		for (i = 0; pending_words != 0; i++) {
1204
			unsigned long pending_bits;
1205
			unsigned long words;
1206

1207 1208 1209
			words = MASK_LSBS(pending_words, word_idx);

			/*
1210
			 * If we masked out all events, wrap to beginning.
1211 1212
			 */
			if (words == 0) {
1213 1214
				word_idx = 0;
				bit_idx = 0;
1215 1216 1217
				continue;
			}
			word_idx = __ffs(words);
1218

1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
			pending_bits = active_evtchns(cpu, s, word_idx);
			bit_idx = 0; /* usually scan entire word from start */
			if (word_idx == start_word_idx) {
				/* We scan the starting word in two parts */
				if (i == 0)
					/* 1st time: start in the middle */
					bit_idx = start_bit_idx;
				else
					/* 2nd time: mask bits done already */
					bit_idx &= (1UL << start_bit_idx) - 1;
			}

1231 1232 1233
			do {
				unsigned long bits;
				int port, irq;
1234
				struct irq_desc *desc;
1235

1236 1237 1238
				bits = MASK_LSBS(pending_bits, bit_idx);

				/* If we masked out all events, move on. */
1239
				if (bits == 0)
1240 1241 1242 1243 1244 1245 1246 1247
					break;

				bit_idx = __ffs(bits);

				/* Process port. */
				port = (word_idx * BITS_PER_LONG) + bit_idx;
				irq = evtchn_to_irq[port];

1248 1249 1250 1251 1252
				if (irq != -1) {
					desc = irq_to_desc(irq);
					if (desc)
						generic_handle_irq_desc(irq, desc);
				}
1253

1254 1255 1256 1257 1258 1259 1260 1261
				bit_idx = (bit_idx + 1) % BITS_PER_LONG;

				/* Next caller starts at last processed + 1 */
				__this_cpu_write(current_word_idx,
						 bit_idx ? word_idx :
						 (word_idx+1) % BITS_PER_LONG);
				__this_cpu_write(current_bit_idx, bit_idx);
			} while (bit_idx != 0);
1262

1263 1264
			/* Scan start_l1i twice; all others once. */
			if ((word_idx != start_word_idx) || (i != 0))
1265
				pending_words &= ~(1UL << word_idx);
1266 1267

			word_idx = (word_idx + 1) % BITS_PER_LONG;
J
Jeremy Fitzhardinge 已提交
1268 1269
		}

1270 1271
		BUG_ON(!irqs_disabled());

C
Christoph Lameter 已提交
1272 1273
		count = __this_cpu_read(xed_nesting_count);
		__this_cpu_write(xed_nesting_count, 0);
1274
	} while (count != 1 || vcpu_info->evtchn_upcall_pending);
1275 1276

out:
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289

	put_cpu();
}

void xen_evtchn_do_upcall(struct pt_regs *regs)
{
	struct pt_regs *old_regs = set_irq_regs(regs);

	exit_idle();
	irq_enter();

	__xen_evtchn_do_upcall();

1290 1291
	irq_exit();
	set_irq_regs(old_regs);
1292
}
1293

1294 1295 1296
void xen_hvm_evtchn_do_upcall(void)
{
	__xen_evtchn_do_upcall();
J
Jeremy Fitzhardinge 已提交
1297
}
1298
EXPORT_SYMBOL_GPL(xen_hvm_evtchn_do_upcall);
J
Jeremy Fitzhardinge 已提交
1299

J
Jeremy Fitzhardinge 已提交
1300 1301 1302
/* Rebind a new event channel to an existing irq. */
void rebind_evtchn_irq(int evtchn, int irq)
{
J
Jeremy Fitzhardinge 已提交
1303 1304
	struct irq_info *info = info_for_irq(irq);

J
Jeremy Fitzhardinge 已提交
1305 1306 1307 1308
	/* Make sure the irq is masked, since the new event channel
	   will also be masked. */
	disable_irq(irq);

1309
	mutex_lock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
1310 1311 1312 1313

	/* After resume the irq<->evtchn mappings are all cleared out */
	BUG_ON(evtchn_to_irq[evtchn] != -1);
	/* Expect irq to have been bound before,
J
Jeremy Fitzhardinge 已提交
1314 1315
	   so there should be a proper type */
	BUG_ON(info->type == IRQT_UNBOUND);
J
Jeremy Fitzhardinge 已提交
1316

1317
	xen_irq_info_evtchn_init(irq, evtchn);
J
Jeremy Fitzhardinge 已提交
1318

1319
	mutex_unlock(&irq_mapping_update_lock);
J
Jeremy Fitzhardinge 已提交
1320 1321

	/* new event channels are always bound to cpu 0 */
1322
	irq_set_affinity(irq, cpumask_of(0));
J
Jeremy Fitzhardinge 已提交
1323 1324 1325 1326 1327

	/* Unmask the event channel. */
	enable_irq(irq);
}

J
Jeremy Fitzhardinge 已提交
1328
/* Rebind an evtchn so that it gets delivered to a specific cpu */
1329
static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
J
Jeremy Fitzhardinge 已提交
1330 1331 1332 1333
{
	struct evtchn_bind_vcpu bind_vcpu;
	int evtchn = evtchn_from_irq(irq);

1334 1335 1336 1337 1338 1339 1340 1341
	if (!VALID_EVTCHN(evtchn))
		return -1;

	/*
	 * Events delivered via platform PCI interrupts are always
	 * routed to vcpu 0 and hence cannot be rebound.
	 */
	if (xen_hvm_domain() && !xen_have_vector_callback)
1342
		return -1;
J
Jeremy Fitzhardinge 已提交
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355

	/* Send future instances of this interrupt to other vcpu. */
	bind_vcpu.port = evtchn;
	bind_vcpu.vcpu = tcpu;

	/*
	 * If this fails, it usually just indicates that we're dealing with a
	 * virq or IPI channel, which don't actually need to be rebound. Ignore
	 * it, but don't do the xenlinux-level rebind in that case.
	 */
	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
		bind_evtchn_to_cpu(evtchn, tcpu);

1356 1357
	return 0;
}
J
Jeremy Fitzhardinge 已提交
1358

1359 1360
static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
			    bool force)
J
Jeremy Fitzhardinge 已提交
1361
{
1362
	unsigned tcpu = cpumask_first(dest);
1363

1364
	return rebind_irq_to_cpu(data->irq, tcpu);
J
Jeremy Fitzhardinge 已提交
1365 1366
}

1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382
int resend_irq_on_evtchn(unsigned int irq)
{
	int masked, evtchn = evtchn_from_irq(irq);
	struct shared_info *s = HYPERVISOR_shared_info;

	if (!VALID_EVTCHN(evtchn))
		return 1;

	masked = sync_test_and_set_bit(evtchn, s->evtchn_mask);
	sync_set_bit(evtchn, s->evtchn_pending);
	if (!masked)
		unmask_evtchn(evtchn);

	return 1;
}

1383
static void enable_dynirq(struct irq_data *data)
J
Jeremy Fitzhardinge 已提交
1384
{
1385
	int evtchn = evtchn_from_irq(data->irq);
J
Jeremy Fitzhardinge 已提交
1386 1387 1388 1389 1390

	if (VALID_EVTCHN(evtchn))
		unmask_evtchn(evtchn);
}

1391
static void disable_dynirq(struct irq_data *data)
J
Jeremy Fitzhardinge 已提交
1392
{
1393
	int evtchn = evtchn_from_irq(data->irq);
J
Jeremy Fitzhardinge 已提交
1394 1395 1396 1397 1398

	if (VALID_EVTCHN(evtchn))
		mask_evtchn(evtchn);
}

1399
static void ack_dynirq(struct irq_data *data)
J
Jeremy Fitzhardinge 已提交
1400
{
1401
	int evtchn = evtchn_from_irq(data->irq);
J
Jeremy Fitzhardinge 已提交
1402

1403
	irq_move_irq(data);
J
Jeremy Fitzhardinge 已提交
1404 1405

	if (VALID_EVTCHN(evtchn))
1406 1407 1408 1409 1410 1411 1412
		clear_evtchn(evtchn);
}

static void mask_ack_dynirq(struct irq_data *data)
{
	disable_dynirq(data);
	ack_dynirq(data);
J
Jeremy Fitzhardinge 已提交
1413 1414
}

1415
static int retrigger_dynirq(struct irq_data *data)
J
Jeremy Fitzhardinge 已提交
1416
{
1417
	int evtchn = evtchn_from_irq(data->irq);
1418
	struct shared_info *sh = HYPERVISOR_shared_info;
J
Jeremy Fitzhardinge 已提交
1419 1420 1421
	int ret = 0;

	if (VALID_EVTCHN(evtchn)) {
1422 1423 1424 1425 1426 1427
		int masked;

		masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
		sync_set_bit(evtchn, sh->evtchn_pending);
		if (!masked)
			unmask_evtchn(evtchn);
J
Jeremy Fitzhardinge 已提交
1428 1429 1430 1431 1432 1433
		ret = 1;
	}

	return ret;
}

1434
static void restore_pirqs(void)
1435 1436 1437
{
	int pirq, rc, irq, gsi;
	struct physdev_map_pirq map_irq;
1438
	struct irq_info *info;
1439

1440 1441
	list_for_each_entry(info, &xen_irq_list_head, list) {
		if (info->type != IRQT_PIRQ)
1442 1443
			continue;

1444 1445 1446 1447
		pirq = info->u.pirq.pirq;
		gsi = info->u.pirq.gsi;
		irq = info->irq;

1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461
		/* save/restore of PT devices doesn't work, so at this point the
		 * only devices present are GSI based emulated devices */
		if (!gsi)
			continue;

		map_irq.domid = DOMID_SELF;
		map_irq.type = MAP_PIRQ_TYPE_GSI;
		map_irq.index = gsi;
		map_irq.pirq = pirq;

		rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
		if (rc) {
			printk(KERN_WARNING "xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n",
					gsi, irq, pirq, rc);
1462
			xen_free_irq(irq);
1463 1464 1465 1466 1467
			continue;
		}

		printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n", irq, map_irq.pirq);

1468
		__startup_pirq(irq);
1469 1470 1471
	}
}

1472 1473 1474 1475 1476 1477 1478 1479 1480
static void restore_cpu_virqs(unsigned int cpu)
{
	struct evtchn_bind_virq bind_virq;
	int virq, irq, evtchn;

	for (virq = 0; virq < NR_VIRQS; virq++) {
		if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
			continue;

1481
		BUG_ON(virq_from_irq(irq) != virq);
1482 1483 1484 1485 1486 1487 1488 1489 1490 1491

		/* Get a new binding from Xen. */
		bind_virq.virq = virq;
		bind_virq.vcpu = cpu;
		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
						&bind_virq) != 0)
			BUG();
		evtchn = bind_virq.port;

		/* Record the new mapping. */
1492
		xen_irq_info_virq_init(cpu, irq, evtchn, virq);
1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505
		bind_evtchn_to_cpu(evtchn, cpu);
	}
}

static void restore_cpu_ipis(unsigned int cpu)
{
	struct evtchn_bind_ipi bind_ipi;
	int ipi, irq, evtchn;

	for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
		if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
			continue;

1506
		BUG_ON(ipi_from_irq(irq) != ipi);
1507 1508 1509 1510 1511 1512 1513 1514 1515

		/* Get a new binding from Xen. */
		bind_ipi.vcpu = cpu;
		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
						&bind_ipi) != 0)
			BUG();
		evtchn = bind_ipi.port;

		/* Record the new mapping. */
1516
		xen_irq_info_ipi_init(cpu, irq, evtchn, ipi);
1517 1518 1519 1520
		bind_evtchn_to_cpu(evtchn, cpu);
	}
}

1521 1522 1523 1524 1525 1526 1527 1528
/* Clear an irq's pending state, in preparation for polling on it */
void xen_clear_irq_pending(int irq)
{
	int evtchn = evtchn_from_irq(irq);

	if (VALID_EVTCHN(evtchn))
		clear_evtchn(evtchn);
}
1529
EXPORT_SYMBOL(xen_clear_irq_pending);
1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
void xen_set_irq_pending(int irq)
{
	int evtchn = evtchn_from_irq(irq);

	if (VALID_EVTCHN(evtchn))
		set_evtchn(evtchn);
}

bool xen_test_irq_pending(int irq)
{
	int evtchn = evtchn_from_irq(irq);
	bool ret = false;

	if (VALID_EVTCHN(evtchn))
		ret = test_evtchn(evtchn);

	return ret;
}

1549 1550 1551
/* Poll waiting for an irq to become pending with timeout.  In the usual case,
 * the irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq_timeout(int irq, u64 timeout)
1552 1553 1554 1555 1556 1557 1558
{
	evtchn_port_t evtchn = evtchn_from_irq(irq);

	if (VALID_EVTCHN(evtchn)) {
		struct sched_poll poll;

		poll.nr_ports = 1;
1559
		poll.timeout = timeout;
1560
		set_xen_guest_handle(poll.ports, &evtchn);
1561 1562 1563 1564 1565

		if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0)
			BUG();
	}
}
1566 1567 1568 1569 1570 1571 1572
EXPORT_SYMBOL(xen_poll_irq_timeout);
/* Poll waiting for an irq to become pending.  In the usual case, the
 * irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq)
{
	xen_poll_irq_timeout(irq, 0 /* no timeout */);
}
1573

1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
/* Check whether the IRQ line is shared with other guests. */
int xen_test_irq_shared(int irq)
{
	struct irq_info *info = info_for_irq(irq);
	struct physdev_irq_status_query irq_status = { .irq = info->u.pirq.pirq };

	if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, &irq_status))
		return 0;
	return !(irq_status.flags & XENIRQSTAT_shared);
}
EXPORT_SYMBOL_GPL(xen_test_irq_shared);

1586 1587
void xen_irq_resume(void)
{
1588 1589
	unsigned int cpu, evtchn;
	struct irq_info *info;
1590 1591 1592 1593 1594 1595 1596 1597

	init_evtchn_cpu_bindings();

	/* New event-channel space is not 'live' yet. */
	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
		mask_evtchn(evtchn);

	/* No IRQ <-> event-channel mappings. */
1598 1599
	list_for_each_entry(info, &xen_irq_list_head, list)
		info->evtchn = 0; /* zap event-channel binding */
1600 1601 1602 1603 1604 1605 1606 1607

	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
		evtchn_to_irq[evtchn] = -1;

	for_each_possible_cpu(cpu) {
		restore_cpu_virqs(cpu);
		restore_cpu_ipis(cpu);
	}
1608

1609
	restore_pirqs();
1610 1611
}

J
Jeremy Fitzhardinge 已提交
1612
static struct irq_chip xen_dynamic_chip __read_mostly = {
1613
	.name			= "xen-dyn",
J
Jeremy Fitzhardinge 已提交
1614

1615 1616 1617
	.irq_disable		= disable_dynirq,
	.irq_mask		= disable_dynirq,
	.irq_unmask		= enable_dynirq,
J
Jeremy Fitzhardinge 已提交
1618

1619 1620 1621
	.irq_ack		= ack_dynirq,
	.irq_mask_ack		= mask_ack_dynirq,

1622 1623
	.irq_set_affinity	= set_affinity_irq,
	.irq_retrigger		= retrigger_dynirq,
J
Jeremy Fitzhardinge 已提交
1624 1625
};

1626
static struct irq_chip xen_pirq_chip __read_mostly = {
1627
	.name			= "xen-pirq",
1628

1629 1630 1631 1632
	.irq_startup		= startup_pirq,
	.irq_shutdown		= shutdown_pirq,
	.irq_enable		= enable_pirq,
	.irq_disable		= disable_pirq,
1633

1634 1635 1636 1637 1638 1639
	.irq_mask		= disable_dynirq,
	.irq_unmask		= enable_dynirq,

	.irq_ack		= eoi_pirq,
	.irq_eoi		= eoi_pirq,
	.irq_mask_ack		= mask_ack_pirq,
1640

1641
	.irq_set_affinity	= set_affinity_irq,
1642

1643
	.irq_retrigger		= retrigger_dynirq,
1644 1645
};

1646
static struct irq_chip xen_percpu_chip __read_mostly = {
1647
	.name			= "xen-percpu",
1648

1649 1650 1651
	.irq_disable		= disable_dynirq,
	.irq_mask		= disable_dynirq,
	.irq_unmask		= enable_dynirq,
1652

1653
	.irq_ack		= ack_dynirq,
1654 1655
};

1656 1657 1658 1659 1660 1661 1662 1663 1664 1665
int xen_set_callback_via(uint64_t via)
{
	struct xen_hvm_param a;
	a.domid = DOMID_SELF;
	a.index = HVM_PARAM_CALLBACK_IRQ;
	a.value = via;
	return HYPERVISOR_hvm_op(HVMOP_set_param, &a);
}
EXPORT_SYMBOL_GPL(xen_set_callback_via);

1666
#ifdef CONFIG_XEN_PVHVM
1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689
/* Vector callbacks are better than PCI interrupts to receive event
 * channel notifications because we can receive vector callbacks on any
 * vcpu and we don't need PCI support or APIC interactions. */
void xen_callback_vector(void)
{
	int rc;
	uint64_t callback_via;
	if (xen_have_vector_callback) {
		callback_via = HVM_CALLBACK_VECTOR(XEN_HVM_EVTCHN_CALLBACK);
		rc = xen_set_callback_via(callback_via);
		if (rc) {
			printk(KERN_ERR "Request for Xen HVM callback vector"
					" failed.\n");
			xen_have_vector_callback = 0;
			return;
		}
		printk(KERN_INFO "Xen HVM callback vector for event delivery is "
				"enabled\n");
		/* in the restore case the vector has already been allocated */
		if (!test_bit(XEN_HVM_EVTCHN_CALLBACK, used_vectors))
			alloc_intr_gate(XEN_HVM_EVTCHN_CALLBACK, xen_hvm_callback_vector);
	}
}
1690 1691 1692
#else
void xen_callback_vector(void) {}
#endif
1693

J
Jeremy Fitzhardinge 已提交
1694 1695
void __init xen_init_IRQ(void)
{
1696
	int i;
1697

1698 1699
	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
				    GFP_KERNEL);
1700
	BUG_ON(!evtchn_to_irq);
1701 1702
	for (i = 0; i < NR_EVENT_CHANNELS; i++)
		evtchn_to_irq[i] = -1;
J
Jeremy Fitzhardinge 已提交
1703 1704 1705 1706 1707 1708 1709

	init_evtchn_cpu_bindings();

	/* No event channels are 'live' right now. */
	for (i = 0; i < NR_EVENT_CHANNELS; i++)
		mask_evtchn(i);

1710 1711 1712
	if (xen_hvm_domain()) {
		xen_callback_vector();
		native_init_IRQ();
1713 1714 1715
		/* pci_xen_hvm_init must be called after native_init_IRQ so that
		 * __acpi_register_gsi can point at the right function */
		pci_xen_hvm_init();
1716 1717
	} else {
		irq_ctx_init(smp_processor_id());
1718
		if (xen_initial_domain())
1719
			pci_xen_initial_domain();
1720
	}
J
Jeremy Fitzhardinge 已提交
1721
}