io_apic.c 99.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 *	Intel IO-APIC support for multi-Pentium hosts.
 *
I
Ingo Molnar 已提交
4
 *	Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
L
Linus Torvalds 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 *	Many thanks to Stig Venaas for trying out countless experimental
 *	patches and reporting/debugging problems patiently!
 *
 *	(c) 1999, Multiple IO-APIC support, developed by
 *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
 *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
 *	further tested and cleaned up by Zach Brown <zab@redhat.com>
 *	and Ingo Molnar <mingo@redhat.com>
 *
 *	Fixes
 *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
 *					thanks to Eric Gilmore
 *					and Rolf G. Tews
 *					for testing these extensively
 *	Paul Diefenbaugh	:	Added full ACPI support
 */

#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/sched.h>
28
#include <linux/pci.h>
L
Linus Torvalds 已提交
29 30 31
#include <linux/mc146818rtc.h>
#include <linux/compiler.h>
#include <linux/acpi.h>
32
#include <linux/module.h>
L
Linus Torvalds 已提交
33
#include <linux/sysdev.h>
34
#include <linux/msi.h>
35
#include <linux/htirq.h>
36
#include <linux/freezer.h>
37
#include <linux/kthread.h>
38
#include <linux/jiffies.h>	/* time_after() */
39
#include <linux/slab.h>
40 41 42 43 44
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
#include <linux/bootmem.h>
#include <linux/dmar.h>
45
#include <linux/hpet.h>
46

47
#include <asm/idle.h>
L
Linus Torvalds 已提交
48 49
#include <asm/io.h>
#include <asm/smp.h>
50
#include <asm/cpu.h>
L
Linus Torvalds 已提交
51
#include <asm/desc.h>
52 53 54
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/dma.h>
L
Linus Torvalds 已提交
55
#include <asm/timer.h>
56
#include <asm/i8259.h>
57
#include <asm/msidef.h>
58
#include <asm/hypertransport.h>
59
#include <asm/setup.h>
60
#include <asm/irq_remapping.h>
61
#include <asm/hpet.h>
62
#include <asm/hw_irq.h>
L
Linus Torvalds 已提交
63

I
Ingo Molnar 已提交
64
#include <asm/apic.h>
L
Linus Torvalds 已提交
65

66
#define __apicdebuginit(type) static type __init
67 68
#define for_each_irq_pin(entry, head) \
	for (entry = head; entry; entry = entry->next)
69

L
Linus Torvalds 已提交
70
/*
71 72
 *      Is the SiS APIC rmw bug present ?
 *      -1 = don't know, 0 = no, 1 = yes
L
Linus Torvalds 已提交
73 74 75
 */
int sis_apic_bug = -1;

76 77
static DEFINE_RAW_SPINLOCK(ioapic_lock);
static DEFINE_RAW_SPINLOCK(vector_lock);
Y
Yinghai Lu 已提交
78

L
Linus Torvalds 已提交
79 80 81 82 83
/*
 * # of IRQ routing registers
 */
int nr_ioapic_registers[MAX_IO_APICS];

84
/* I/O APIC entries */
85
struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86 87
int nr_ioapics;

88 89 90
/* IO APIC gsi routing info */
struct mp_ioapic_gsi  mp_gsi_routing[MAX_IO_APICS];

91 92
/* The one past the highest gsi number used */
u32 gsi_top;
93

94
/* MP IRQ source entries */
95
struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
96 97 98 99

/* # of MP IRQ source entries */
int mp_irq_entries;

100 101 102
/* GSI interrupts */
static int nr_irqs_gsi = NR_IRQS_LEGACY;

103 104 105 106 107 108
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif

DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);

Y
Yinghai Lu 已提交
109 110
int skip_ioapic_setup;

111 112 113 114 115 116 117 118 119
void arch_disable_smp_support(void)
{
#ifdef CONFIG_PCI
	noioapicquirk = 1;
	noioapicreroute = -1;
#endif
	skip_ioapic_setup = 1;
}

120
static int __init parse_noapic(char *str)
Y
Yinghai Lu 已提交
121 122
{
	/* disable IO-APIC */
123
	arch_disable_smp_support();
Y
Yinghai Lu 已提交
124 125 126
	return 0;
}
early_param("noapic", parse_noapic);
127

128 129 130 131 132 133 134 135 136 137 138
/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
void mp_save_irq(struct mpc_intsrc *m)
{
	int i;

	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
		m->srcbusirq, m->dstapic, m->dstirq);

	for (i = 0; i < mp_irq_entries; i++) {
139
		if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
140 141 142
			return;
	}

143
	memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
144 145 146 147
	if (++mp_irq_entries == MAX_IRQ_SOURCES)
		panic("Max # of irq sources exceeded!!\n");
}

148 149 150 151 152
struct irq_pin_list {
	int apic, pin;
	struct irq_pin_list *next;
};

T
Thomas Gleixner 已提交
153
static struct irq_pin_list *alloc_irq_pin_list(int node)
154
{
155
	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
156 157
}

158

Y
Yinghai Lu 已提交
159
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
160
#ifdef CONFIG_SPARSE_IRQ
161
static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
162
#else
163
static struct irq_cfg irq_cfgx[NR_IRQS];
164
#endif
Y
Yinghai Lu 已提交
165

166
int __init arch_early_irq_init(void)
167
{
168
	struct irq_cfg *cfg;
169
	int count, node, i;
T
Thomas Gleixner 已提交
170

171 172 173 174 175
	if (!legacy_pic->nr_legacy_irqs) {
		nr_irqs_gsi = 0;
		io_apic_irqs = ~0UL;
	}

176 177
	cfg = irq_cfgx;
	count = ARRAY_SIZE(irq_cfgx);
178
	node = cpu_to_node(0);
179

180 181 182
	/* Make sure the legacy interrupts are marked in the bitmap */
	irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);

183
	for (i = 0; i < count; i++) {
184
		set_irq_chip_data(i, &cfg[i]);
185 186
		zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
187 188 189 190
		/*
		 * For legacy IRQ's, start with assigning irq0 to irq15 to
		 * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
		 */
191
		if (i < legacy_pic->nr_legacy_irqs) {
192 193 194
			cfg[i].vector = IRQ0_VECTOR + i;
			cpumask_set_cpu(0, cfg[i].domain);
		}
195
	}
196 197

	return 0;
198
}
199

200
#ifdef CONFIG_SPARSE_IRQ
201
static struct irq_cfg *irq_cfg(unsigned int irq)
202
{
203
	return get_irq_chip_data(irq);
204
}
T
Thomas Gleixner 已提交
205

206
static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
207
{
208
	struct irq_cfg *cfg;
209

210
	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
211 212
	if (!cfg)
		return NULL;
213
	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
214
		goto out_cfg;
215
	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
216
		goto out_domain;
217
	return cfg;
218 219 220 221 222
out_domain:
	free_cpumask_var(cfg->domain);
out_cfg:
	kfree(cfg);
	return NULL;
223 224
}

225
static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
226
{
227 228 229
	if (!cfg)
		return;
	set_irq_chip_data(at, NULL);
230 231 232 233 234
	free_cpumask_var(cfg->domain);
	free_cpumask_var(cfg->old_domain);
	kfree(cfg);
}

235
#else
236

237
struct irq_cfg *irq_cfg(unsigned int irq)
238 239
{
	return irq < nr_irqs ? irq_cfgx + irq : NULL;
240
}
L
Linus Torvalds 已提交
241

242
static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
243 244 245 246
{
	return irq_cfgx + irq;
}

247
static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
248

249 250
#endif

251 252 253 254 255 256 257 258 259 260 261 262 263
static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
{
	int res = irq_alloc_desc_at(at, node);
	struct irq_cfg *cfg;

	if (res < 0) {
		if (res != -EEXIST)
			return NULL;
		cfg = get_irq_chip_data(at);
		if (cfg)
			return cfg;
	}

264
	cfg = alloc_irq_cfg(at, node);
265 266 267 268 269 270 271 272 273 274 275 276 277 278
	if (cfg)
		set_irq_chip_data(at, cfg);
	else
		irq_free_desc(at);
	return cfg;
}

static int alloc_irq_from(unsigned int from, int node)
{
	return irq_alloc_desc_from(from, node);
}

static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
{
279
	free_irq_cfg(at, cfg);
280 281 282
	irq_free_desc(at);
}

L
Linus Torvalds 已提交
283 284 285 286
struct io_apic {
	unsigned int index;
	unsigned int unused[3];
	unsigned int data;
287 288
	unsigned int unused2[11];
	unsigned int eoi;
L
Linus Torvalds 已提交
289 290 291 292 293
};

static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
{
	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
294
		+ (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
L
Linus Torvalds 已提交
295 296
}

297 298 299 300 301 302
static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
{
	struct io_apic __iomem *io_apic = io_apic_base(apic);
	writel(vector, &io_apic->eoi);
}

L
Linus Torvalds 已提交
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
	struct io_apic __iomem *io_apic = io_apic_base(apic);
	writel(reg, &io_apic->index);
	return readl(&io_apic->data);
}

static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
{
	struct io_apic __iomem *io_apic = io_apic_base(apic);
	writel(reg, &io_apic->index);
	writel(value, &io_apic->data);
}

/*
 * Re-write a value: to be used for read-modify-write
 * cycles where the read already set up the index register.
 *
 * Older SiS APIC requires we rewrite the index register
 */
static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
{
325
	struct io_apic __iomem *io_apic = io_apic_base(apic);
T
Thomas Gleixner 已提交
326 327 328

	if (sis_apic_bug)
		writel(reg, &io_apic->index);
L
Linus Torvalds 已提交
329 330 331
	writel(value, &io_apic->data);
}

Y
Yinghai Lu 已提交
332
static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
333 334 335 336
{
	struct irq_pin_list *entry;
	unsigned long flags;

337
	raw_spin_lock_irqsave(&ioapic_lock, flags);
338
	for_each_irq_pin(entry, cfg->irq_2_pin) {
339 340 341 342 343 344 345
		unsigned int reg;
		int pin;

		pin = entry->pin;
		reg = io_apic_read(entry->apic, 0x10 + pin*2);
		/* Is the remote IRR bit set? */
		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
346
			raw_spin_unlock_irqrestore(&ioapic_lock, flags);
347 348 349
			return true;
		}
	}
350
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
351 352 353 354

	return false;
}

355 356 357 358 359 360 361 362 363
union entry_union {
	struct { u32 w1, w2; };
	struct IO_APIC_route_entry entry;
};

static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
{
	union entry_union eu;
	unsigned long flags;
364
	raw_spin_lock_irqsave(&ioapic_lock, flags);
365 366
	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
367
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
368 369 370
	return eu.entry;
}

371 372 373 374 375 376
/*
 * When we write a new IO APIC routing entry, we need to write the high
 * word first! If the mask bit in the low word is clear, we will enable
 * the interrupt, and we need to make sure the entry is fully populated
 * before that happens.
 */
377 378
static void
__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
379
{
380 381
	union entry_union eu = {{0, 0}};

382
	eu.entry = e;
383 384
	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
385 386
}

387
static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
388 389
{
	unsigned long flags;
390
	raw_spin_lock_irqsave(&ioapic_lock, flags);
391
	__ioapic_write_entry(apic, pin, e);
392
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
393 394 395 396 397 398 399 400 401 402 403 404
}

/*
 * When we mask an IO APIC routing entry, we need to write the low
 * word first, in order to set the mask bit before we change the
 * high bits!
 */
static void ioapic_mask_entry(int apic, int pin)
{
	unsigned long flags;
	union entry_union eu = { .entry.mask = 1 };

405
	raw_spin_lock_irqsave(&ioapic_lock, flags);
406 407
	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
408
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
409 410
}

L
Linus Torvalds 已提交
411 412 413 414 415
/*
 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
 * shared ISA-space IRQs, so we have to support them. We are super
 * fast in the common case, and fast for shared ISA-space IRQs.
 */
416
static int
T
Thomas Gleixner 已提交
417
__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
L
Linus Torvalds 已提交
418
{
419
	struct irq_pin_list **last, *entry;
420

421 422 423
	/* don't allow duplicates */
	last = &cfg->irq_2_pin;
	for_each_irq_pin(entry, cfg->irq_2_pin) {
424
		if (entry->apic == apic && entry->pin == pin)
425
			return 0;
426
		last = &entry->next;
L
Linus Torvalds 已提交
427
	}
428

T
Thomas Gleixner 已提交
429
	entry = alloc_irq_pin_list(node);
430
	if (!entry) {
431 432 433
		printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
				node, apic, pin);
		return -ENOMEM;
434
	}
L
Linus Torvalds 已提交
435 436
	entry->apic = apic;
	entry->pin = pin;
437

438
	*last = entry;
439 440 441 442 443
	return 0;
}

static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
T
Thomas Gleixner 已提交
444
	if (__add_pin_to_irq_node(cfg, node, apic, pin))
445
		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
L
Linus Torvalds 已提交
446 447 448 449 450
}

/*
 * Reroute an IRQ to a different pin.
 */
451
static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
452 453
					   int oldapic, int oldpin,
					   int newapic, int newpin)
L
Linus Torvalds 已提交
454
{
455
	struct irq_pin_list *entry;
L
Linus Torvalds 已提交
456

457
	for_each_irq_pin(entry, cfg->irq_2_pin) {
L
Linus Torvalds 已提交
458 459 460
		if (entry->apic == oldapic && entry->pin == oldpin) {
			entry->apic = newapic;
			entry->pin = newpin;
461
			/* every one is different, right? */
462
			return;
463
		}
L
Linus Torvalds 已提交
464
	}
465

466 467
	/* old apic/pin didn't exist, so just add new ones */
	add_pin_to_irq_node(cfg, node, newapic, newpin);
L
Linus Torvalds 已提交
468 469
}

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
static void __io_apic_modify_irq(struct irq_pin_list *entry,
				 int mask_and, int mask_or,
				 void (*final)(struct irq_pin_list *entry))
{
	unsigned int reg, pin;

	pin = entry->pin;
	reg = io_apic_read(entry->apic, 0x10 + pin * 2);
	reg &= mask_and;
	reg |= mask_or;
	io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
	if (final)
		final(entry);
}

485 486 487
static void io_apic_modify_irq(struct irq_cfg *cfg,
			       int mask_and, int mask_or,
			       void (*final)(struct irq_pin_list *entry))
488 489
{
	struct irq_pin_list *entry;
490

491 492 493 494 495 496 497 498 499 500 501 502 503 504
	for_each_irq_pin(entry, cfg->irq_2_pin)
		__io_apic_modify_irq(entry, mask_and, mask_or, final);
}

static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
{
	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
			     IO_APIC_REDIR_MASKED, NULL);
}

static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
{
	__io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
			     IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
505
}
506

507
static void io_apic_sync(struct irq_pin_list *entry)
L
Linus Torvalds 已提交
508
{
509 510 511 512 513 514
	/*
	 * Synchronize the IO-APIC and the CPU by doing
	 * a dummy read from the IO-APIC
	 */
	struct io_apic __iomem *io_apic;
	io_apic = io_apic_base(entry->apic);
Y
Yinghai Lu 已提交
515
	readl(&io_apic->data);
L
Linus Torvalds 已提交
516 517
}

T
Thomas Gleixner 已提交
518
static void mask_ioapic(struct irq_cfg *cfg)
519
{
T
Thomas Gleixner 已提交
520 521 522
	unsigned long flags;

	raw_spin_lock_irqsave(&ioapic_lock, flags);
Y
Yinghai Lu 已提交
523
	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
T
Thomas Gleixner 已提交
524
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
525
}
L
Linus Torvalds 已提交
526

527
static void mask_ioapic_irq(struct irq_data *data)
L
Linus Torvalds 已提交
528
{
529
	mask_ioapic(data->chip_data);
T
Thomas Gleixner 已提交
530
}
Y
Yinghai Lu 已提交
531

T
Thomas Gleixner 已提交
532 533 534
static void __unmask_ioapic(struct irq_cfg *cfg)
{
	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
L
Linus Torvalds 已提交
535 536
}

T
Thomas Gleixner 已提交
537
static void unmask_ioapic(struct irq_cfg *cfg)
L
Linus Torvalds 已提交
538 539 540
{
	unsigned long flags;

541
	raw_spin_lock_irqsave(&ioapic_lock, flags);
T
Thomas Gleixner 已提交
542
	__unmask_ioapic(cfg);
543
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
L
Linus Torvalds 已提交
544 545
}

546
static void unmask_ioapic_irq(struct irq_data *data)
Y
Yinghai Lu 已提交
547
{
548
	unmask_ioapic(data->chip_data);
Y
Yinghai Lu 已提交
549 550
}

L
Linus Torvalds 已提交
551 552 553
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
{
	struct IO_APIC_route_entry entry;
554

L
Linus Torvalds 已提交
555
	/* Check delivery_mode to be sure we're not clearing an SMI pin */
556
	entry = ioapic_read_entry(apic, pin);
L
Linus Torvalds 已提交
557 558 559 560 561
	if (entry.delivery_mode == dest_SMI)
		return;
	/*
	 * Disable it in the IO-APIC irq-routing table:
	 */
562
	ioapic_mask_entry(apic, pin);
L
Linus Torvalds 已提交
563 564
}

565
static void clear_IO_APIC (void)
L
Linus Torvalds 已提交
566 567 568 569 570 571 572 573
{
	int apic, pin;

	for (apic = 0; apic < nr_ioapics; apic++)
		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
			clear_IO_APIC_pin(apic, pin);
}

574
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
575 576 577 578 579 580
/*
 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
 * specific CPU-side IRQs.
 */

#define MAX_PIRQS 8
Y
Yinghai Lu 已提交
581 582 583
static int pirq_entries[MAX_PIRQS] = {
	[0 ... MAX_PIRQS - 1] = -1
};
L
Linus Torvalds 已提交
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609

static int __init ioapic_pirq_setup(char *str)
{
	int i, max;
	int ints[MAX_PIRQS+1];

	get_options(str, ARRAY_SIZE(ints), ints);

	apic_printk(APIC_VERBOSE, KERN_INFO
			"PIRQ redirection, working around broken MP-BIOS.\n");
	max = MAX_PIRQS;
	if (ints[0] < MAX_PIRQS)
		max = ints[0];

	for (i = 0; i < max; i++) {
		apic_printk(APIC_VERBOSE, KERN_DEBUG
				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
		/*
		 * PIRQs are mapped upside down, usually.
		 */
		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
	}
	return 1;
}

__setup("pirq=", ioapic_pirq_setup);
610 611
#endif /* CONFIG_X86_32 */

612 613 614 615 616 617
struct IO_APIC_route_entry **alloc_ioapic_entries(void)
{
	int apic;
	struct IO_APIC_route_entry **ioapic_entries;

	ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
618
				GFP_KERNEL);
619 620 621 622 623 624
	if (!ioapic_entries)
		return 0;

	for (apic = 0; apic < nr_ioapics; apic++) {
		ioapic_entries[apic] =
			kzalloc(sizeof(struct IO_APIC_route_entry) *
625
				nr_ioapic_registers[apic], GFP_KERNEL);
626 627 628 629 630 631 632 633 634 635 636 637 638
		if (!ioapic_entries[apic])
			goto nomem;
	}

	return ioapic_entries;

nomem:
	while (--apic >= 0)
		kfree(ioapic_entries[apic]);
	kfree(ioapic_entries);

	return 0;
}
639 640

/*
641
 * Saves all the IO-APIC RTE's
642
 */
643
int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
644 645 646
{
	int apic, pin;

647 648
	if (!ioapic_entries)
		return -ENOMEM;
649 650

	for (apic = 0; apic < nr_ioapics; apic++) {
651 652
		if (!ioapic_entries[apic])
			return -ENOMEM;
653

654
		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
655
			ioapic_entries[apic][pin] =
656
				ioapic_read_entry(apic, pin);
657
	}
658

659 660 661
	return 0;
}

662 663 664 665
/*
 * Mask all IO APIC entries.
 */
void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
666 667 668
{
	int apic, pin;

669 670 671
	if (!ioapic_entries)
		return;

672
	for (apic = 0; apic < nr_ioapics; apic++) {
673
		if (!ioapic_entries[apic])
674
			break;
675

676 677 678
		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
			struct IO_APIC_route_entry entry;

679
			entry = ioapic_entries[apic][pin];
680 681 682 683 684 685 686 687
			if (!entry.mask) {
				entry.mask = 1;
				ioapic_write_entry(apic, pin, entry);
			}
		}
	}
}

688 689 690 691
/*
 * Restore IO APIC entries which was saved in ioapic_entries.
 */
int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
692 693 694
{
	int apic, pin;

695 696 697
	if (!ioapic_entries)
		return -ENOMEM;

698
	for (apic = 0; apic < nr_ioapics; apic++) {
699 700 701
		if (!ioapic_entries[apic])
			return -ENOMEM;

702 703
		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
			ioapic_write_entry(apic, pin,
704
					ioapic_entries[apic][pin]);
705
	}
706
	return 0;
707 708
}

709 710 711 712 713 714 715 716
void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
{
	int apic;

	for (apic = 0; apic < nr_ioapics; apic++)
		kfree(ioapic_entries[apic]);

	kfree(ioapic_entries);
717
}
L
Linus Torvalds 已提交
718 719 720 721 722 723 724 725 726

/*
 * Find the IRQ entry number of a certain pin.
 */
static int find_irq_entry(int apic, int pin, int type)
{
	int i;

	for (i = 0; i < mp_irq_entries; i++)
727 728 729 730
		if (mp_irqs[i].irqtype == type &&
		    (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
		    mp_irqs[i].dstirq == pin)
L
Linus Torvalds 已提交
731 732 733 734 735 736 737 738
			return i;

	return -1;
}

/*
 * Find the pin to which IRQ[irq] (ISA) is connected
 */
739
static int __init find_isa_irq_pin(int irq, int type)
L
Linus Torvalds 已提交
740 741 742 743
{
	int i;

	for (i = 0; i < mp_irq_entries; i++) {
744
		int lbus = mp_irqs[i].srcbus;
L
Linus Torvalds 已提交
745

A
Alexey Starikovskiy 已提交
746
		if (test_bit(lbus, mp_bus_not_pci) &&
747 748
		    (mp_irqs[i].irqtype == type) &&
		    (mp_irqs[i].srcbusirq == irq))
L
Linus Torvalds 已提交
749

750
			return mp_irqs[i].dstirq;
L
Linus Torvalds 已提交
751 752 753 754
	}
	return -1;
}

755 756 757 758 759
static int __init find_isa_irq_apic(int irq, int type)
{
	int i;

	for (i = 0; i < mp_irq_entries; i++) {
760
		int lbus = mp_irqs[i].srcbus;
761

A
Alexey Starikovskiy 已提交
762
		if (test_bit(lbus, mp_bus_not_pci) &&
763 764
		    (mp_irqs[i].irqtype == type) &&
		    (mp_irqs[i].srcbusirq == irq))
765 766 767 768
			break;
	}
	if (i < mp_irq_entries) {
		int apic;
769
		for(apic = 0; apic < nr_ioapics; apic++) {
770
			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
771 772 773 774 775 776 777
				return apic;
		}
	}

	return -1;
}

778
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
L
Linus Torvalds 已提交
779 780 781 782 783
/*
 * EISA Edge/Level control register, ELCR
 */
static int EISA_ELCR(unsigned int irq)
{
784
	if (irq < legacy_pic->nr_legacy_irqs) {
L
Linus Torvalds 已提交
785 786 787 788 789 790 791
		unsigned int port = 0x4d0 + (irq >> 3);
		return (inb(port) >> (irq & 7)) & 1;
	}
	apic_printk(APIC_VERBOSE, KERN_INFO
			"Broken MPtable reports ISA irq %d\n", irq);
	return 0;
}
792

793
#endif
L
Linus Torvalds 已提交
794

A
Alexey Starikovskiy 已提交
795 796 797 798 799 800
/* ISA interrupts are always polarity zero edge triggered,
 * when listed as conforming in the MP table. */

#define default_ISA_trigger(idx)	(0)
#define default_ISA_polarity(idx)	(0)

L
Linus Torvalds 已提交
801 802 803 804 805
/* EISA interrupts are always polarity zero and can be edge or level
 * trigger depending on the ELCR value.  If an interrupt is listed as
 * EISA conforming in the MP table, that means its trigger type must
 * be read in from the ELCR */

806
#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].srcbusirq))
A
Alexey Starikovskiy 已提交
807
#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
L
Linus Torvalds 已提交
808 809 810 811 812 813 814 815 816 817 818

/* PCI interrupts are always polarity one level triggered,
 * when listed as conforming in the MP table. */

#define default_PCI_trigger(idx)	(1)
#define default_PCI_polarity(idx)	(1)

/* MCA interrupts are always polarity zero level triggered,
 * when listed as conforming in the MP table. */

#define default_MCA_trigger(idx)	(1)
A
Alexey Starikovskiy 已提交
819
#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
L
Linus Torvalds 已提交
820

821
static int MPBIOS_polarity(int idx)
L
Linus Torvalds 已提交
822
{
823
	int bus = mp_irqs[idx].srcbus;
L
Linus Torvalds 已提交
824 825 826 827 828
	int polarity;

	/*
	 * Determine IRQ line polarity (high active or low active):
	 */
829
	switch (mp_irqs[idx].irqflag & 3)
830
	{
831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858
		case 0: /* conforms, ie. bus-type dependent polarity */
			if (test_bit(bus, mp_bus_not_pci))
				polarity = default_ISA_polarity(idx);
			else
				polarity = default_PCI_polarity(idx);
			break;
		case 1: /* high active */
		{
			polarity = 0;
			break;
		}
		case 2: /* reserved */
		{
			printk(KERN_WARNING "broken BIOS!!\n");
			polarity = 1;
			break;
		}
		case 3: /* low active */
		{
			polarity = 1;
			break;
		}
		default: /* invalid */
		{
			printk(KERN_WARNING "broken BIOS!!\n");
			polarity = 1;
			break;
		}
L
Linus Torvalds 已提交
859 860 861 862 863 864
	}
	return polarity;
}

static int MPBIOS_trigger(int idx)
{
865
	int bus = mp_irqs[idx].srcbus;
L
Linus Torvalds 已提交
866 867 868 869 870
	int trigger;

	/*
	 * Determine IRQ trigger mode (edge or level sensitive):
	 */
871
	switch ((mp_irqs[idx].irqflag>>2) & 3)
L
Linus Torvalds 已提交
872
	{
873 874 875 876 877
		case 0: /* conforms, ie. bus-type dependent */
			if (test_bit(bus, mp_bus_not_pci))
				trigger = default_ISA_trigger(idx);
			else
				trigger = default_PCI_trigger(idx);
878
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907
			switch (mp_bus_id_to_type[bus]) {
				case MP_BUS_ISA: /* ISA pin */
				{
					/* set before the switch */
					break;
				}
				case MP_BUS_EISA: /* EISA pin */
				{
					trigger = default_EISA_trigger(idx);
					break;
				}
				case MP_BUS_PCI: /* PCI pin */
				{
					/* set before the switch */
					break;
				}
				case MP_BUS_MCA: /* MCA pin */
				{
					trigger = default_MCA_trigger(idx);
					break;
				}
				default:
				{
					printk(KERN_WARNING "broken BIOS!!\n");
					trigger = 1;
					break;
				}
			}
#endif
L
Linus Torvalds 已提交
908
			break;
909
		case 1: /* edge */
L
Linus Torvalds 已提交
910
		{
911
			trigger = 0;
L
Linus Torvalds 已提交
912 913
			break;
		}
914
		case 2: /* reserved */
L
Linus Torvalds 已提交
915
		{
916 917
			printk(KERN_WARNING "broken BIOS!!\n");
			trigger = 1;
L
Linus Torvalds 已提交
918 919
			break;
		}
920
		case 3: /* level */
L
Linus Torvalds 已提交
921
		{
922
			trigger = 1;
L
Linus Torvalds 已提交
923 924
			break;
		}
925
		default: /* invalid */
L
Linus Torvalds 已提交
926 927
		{
			printk(KERN_WARNING "broken BIOS!!\n");
928
			trigger = 0;
L
Linus Torvalds 已提交
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
			break;
		}
	}
	return trigger;
}

static inline int irq_polarity(int idx)
{
	return MPBIOS_polarity(idx);
}

static inline int irq_trigger(int idx)
{
	return MPBIOS_trigger(idx);
}

static int pin_2_irq(int idx, int apic, int pin)
{
947
	int irq;
948
	int bus = mp_irqs[idx].srcbus;
L
Linus Torvalds 已提交
949 950 951 952

	/*
	 * Debugging check, we are in big trouble if this message pops up!
	 */
953
	if (mp_irqs[idx].dstirq != pin)
L
Linus Torvalds 已提交
954 955
		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");

956
	if (test_bit(bus, mp_bus_not_pci)) {
957
		irq = mp_irqs[idx].srcbusirq;
958
	} else {
959
		u32 gsi = mp_gsi_routing[apic].gsi_base + pin;
960 961 962 963

		if (gsi >= NR_IRQS_LEGACY)
			irq = gsi;
		else
964
			irq = gsi_top + gsi;
L
Linus Torvalds 已提交
965 966
	}

967
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
	/*
	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
	 */
	if ((pin >= 16) && (pin <= 23)) {
		if (pirq_entries[pin-16] != -1) {
			if (!pirq_entries[pin-16]) {
				apic_printk(APIC_VERBOSE, KERN_DEBUG
						"disabling PIRQ%d\n", pin-16);
			} else {
				irq = pirq_entries[pin-16];
				apic_printk(APIC_VERBOSE, KERN_DEBUG
						"using PIRQ%d -> IRQ %d\n",
						pin-16, irq);
			}
		}
	}
984 985
#endif

L
Linus Torvalds 已提交
986 987 988
	return irq;
}

989 990 991 992 993
/*
 * Find a specific PCI IRQ entry.
 * Not an __init, possibly needed by modules
 */
int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
994
				struct io_apic_irq_attr *irq_attr)
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
{
	int apic, i, best_guess = -1;

	apic_printk(APIC_DEBUG,
		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
		    bus, slot, pin);
	if (test_bit(bus, mp_bus_not_pci)) {
		apic_printk(APIC_VERBOSE,
			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
		return -1;
	}
	for (i = 0; i < mp_irq_entries; i++) {
		int lbus = mp_irqs[i].srcbus;

		for (apic = 0; apic < nr_ioapics; apic++)
			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
			    mp_irqs[i].dstapic == MP_APIC_ALL)
				break;

		if (!test_bit(lbus, mp_bus_not_pci) &&
		    !mp_irqs[i].irqtype &&
		    (bus == lbus) &&
		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);

			if (!(apic || IO_APIC_IRQ(irq)))
				continue;

			if (pin == (mp_irqs[i].srcbusirq & 3)) {
1024 1025 1026 1027
				set_io_apic_irq_attr(irq_attr, apic,
						     mp_irqs[i].dstirq,
						     irq_trigger(i),
						     irq_polarity(i));
1028 1029 1030 1031 1032 1033 1034
				return irq;
			}
			/*
			 * Use the first all-but-pin matching entry as a
			 * best-guess fuzzy result for broken mptables.
			 */
			if (best_guess < 0) {
1035 1036 1037 1038
				set_io_apic_irq_attr(irq_attr, apic,
						     mp_irqs[i].dstirq,
						     irq_trigger(i),
						     irq_polarity(i));
1039 1040 1041 1042 1043 1044 1045 1046
				best_guess = irq;
			}
		}
	}
	return best_guess;
}
EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);

1047 1048 1049 1050 1051
void lock_vector_lock(void)
{
	/* Used to the online set of cpus does not change
	 * during assign_irq_vector.
	 */
1052
	raw_spin_lock(&vector_lock);
1053
}
L
Linus Torvalds 已提交
1054

1055
void unlock_vector_lock(void)
L
Linus Torvalds 已提交
1056
{
1057
	raw_spin_unlock(&vector_lock);
1058
}
L
Linus Torvalds 已提交
1059

1060 1061
static int
__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1062
{
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
	/*
	 * NOTE! The local APIC isn't very good at handling
	 * multiple interrupts at the same interrupt level.
	 * As the interrupt level is determined by taking the
	 * vector number and shifting that right by 4, we
	 * want to spread these out a bit so that they don't
	 * all fall in the same interrupt level.
	 *
	 * Also, we've got to be careful not to trash gate
	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
	 */
1074
	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
1075
	static int current_offset = VECTOR_OFFSET_START % 8;
1076
	unsigned int old_vector;
1077 1078
	int cpu, err;
	cpumask_var_t tmp_mask;
1079

1080
	if (cfg->move_in_progress)
1081
		return -EBUSY;
1082

1083 1084
	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
		return -ENOMEM;
1085

1086 1087
	old_vector = cfg->vector;
	if (old_vector) {
1088 1089 1090 1091
		cpumask_and(tmp_mask, mask, cpu_online_mask);
		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
		if (!cpumask_empty(tmp_mask)) {
			free_cpumask_var(tmp_mask);
1092
			return 0;
1093
		}
1094
	}
1095

1096
	/* Only try and allocate irqs on cpus that are present */
1097 1098
	err = -ENOSPC;
	for_each_cpu_and(cpu, mask, cpu_online_mask) {
1099 1100
		int new_cpu;
		int vector, offset;
1101

1102
		apic->vector_allocation_domain(cpu, tmp_mask);
1103

1104 1105
		vector = current_vector;
		offset = current_offset;
1106
next:
1107 1108
		vector += 8;
		if (vector >= first_system_vector) {
1109
			/* If out of vectors on large boxen, must share them. */
1110
			offset = (offset + 1) % 8;
1111
			vector = FIRST_EXTERNAL_VECTOR + offset;
1112 1113 1114
		}
		if (unlikely(current_vector == vector))
			continue;
1115 1116

		if (test_bit(vector, used_vectors))
1117
			goto next;
1118

1119
		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1120 1121 1122 1123 1124 1125 1126
			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
				goto next;
		/* Found one! */
		current_vector = vector;
		current_offset = offset;
		if (old_vector) {
			cfg->move_in_progress = 1;
1127
			cpumask_copy(cfg->old_domain, cfg->domain);
1128
		}
1129
		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
1130 1131
			per_cpu(vector_irq, new_cpu)[vector] = irq;
		cfg->vector = vector;
1132 1133 1134
		cpumask_copy(cfg->domain, tmp_mask);
		err = 0;
		break;
1135
	}
1136 1137
	free_cpumask_var(tmp_mask);
	return err;
1138 1139
}

1140
int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1141 1142
{
	int err;
1143 1144
	unsigned long flags;

1145
	raw_spin_lock_irqsave(&vector_lock, flags);
Y
Yinghai Lu 已提交
1146
	err = __assign_irq_vector(irq, cfg, mask);
1147
	raw_spin_unlock_irqrestore(&vector_lock, flags);
1148 1149 1150
	return err;
}

Y
Yinghai Lu 已提交
1151
static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
1152 1153 1154 1155 1156 1157
{
	int cpu, vector;

	BUG_ON(!cfg->vector);

	vector = cfg->vector;
1158
	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
1159 1160 1161
		per_cpu(vector_irq, cpu)[vector] = -1;

	cfg->vector = 0;
1162
	cpumask_clear(cfg->domain);
1163 1164 1165

	if (likely(!cfg->move_in_progress))
		return;
1166
	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
1167 1168 1169 1170 1171 1172 1173 1174 1175
		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
								vector++) {
			if (per_cpu(vector_irq, cpu)[vector] != irq)
				continue;
			per_cpu(vector_irq, cpu)[vector] = -1;
			break;
		}
	}
	cfg->move_in_progress = 0;
1176 1177 1178 1179 1180 1181 1182 1183
}

void __setup_vector_irq(int cpu)
{
	/* Initialize vector_irq on a new cpu */
	int irq, vector;
	struct irq_cfg *cfg;

1184 1185 1186 1187 1188
	/*
	 * vector_lock will make sure that we don't run into irq vector
	 * assignments that might be happening on another cpu in parallel,
	 * while we setup our initial vector to irq mappings.
	 */
1189
	raw_spin_lock(&vector_lock);
1190
	/* Mark the inuse vectors */
T
Thomas Gleixner 已提交
1191 1192 1193 1194
	for_each_active_irq(irq) {
		cfg = get_irq_chip_data(irq);
		if (!cfg)
			continue;
1195 1196 1197 1198 1199 1200 1201
		/*
		 * If it is a legacy IRQ handled by the legacy PIC, this cpu
		 * will be part of the irq_cfg's domain.
		 */
		if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
			cpumask_set_cpu(cpu, cfg->domain);

1202
		if (!cpumask_test_cpu(cpu, cfg->domain))
1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
			continue;
		vector = cfg->vector;
		per_cpu(vector_irq, cpu)[vector] = irq;
	}
	/* Mark the free vectors */
	for (vector = 0; vector < NR_VECTORS; ++vector) {
		irq = per_cpu(vector_irq, cpu)[vector];
		if (irq < 0)
			continue;

		cfg = irq_cfg(irq);
1214
		if (!cpumask_test_cpu(cpu, cfg->domain))
1215
			per_cpu(vector_irq, cpu)[vector] = -1;
1216
	}
1217
	raw_spin_unlock(&vector_lock);
L
Linus Torvalds 已提交
1218
}
1219

1220
static struct irq_chip ioapic_chip;
1221
static struct irq_chip ir_ioapic_chip;
L
Linus Torvalds 已提交
1222

1223 1224 1225
#define IOAPIC_AUTO     -1
#define IOAPIC_EDGE     0
#define IOAPIC_LEVEL    1
L
Linus Torvalds 已提交
1226

1227
#ifdef CONFIG_X86_32
1228 1229
static inline int IO_APIC_irq_trigger(int irq)
{
T
Thomas Gleixner 已提交
1230
	int apic, idx, pin;
1231

T
Thomas Gleixner 已提交
1232 1233 1234 1235 1236 1237 1238 1239
	for (apic = 0; apic < nr_ioapics; apic++) {
		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
			idx = find_irq_entry(apic, pin, mp_INT);
			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
				return irq_trigger(idx);
		}
	}
	/*
1240 1241
         * nonexistent IRQs are edge default
         */
T
Thomas Gleixner 已提交
1242
	return 0;
1243
}
1244 1245 1246
#else
static inline int IO_APIC_irq_trigger(int irq)
{
1247
	return 1;
1248 1249
}
#endif
1250

1251
static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
L
Linus Torvalds 已提交
1252
{
Y
Yinghai Lu 已提交
1253

1254
	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
1255
	    trigger == IOAPIC_LEVEL)
1256
		irq_set_status_flags(irq, IRQ_LEVEL);
1257
	else
1258
		irq_clear_status_flags(irq, IRQ_LEVEL);
1259

1260
	if (irq_remapped(get_irq_chip_data(irq))) {
1261
		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
1262 1263 1264 1265 1266 1267 1268 1269 1270
		if (trigger)
			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
						      handle_fasteoi_irq,
						     "fasteoi");
		else
			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
						      handle_edge_irq, "edge");
		return;
	}
1271

1272 1273
	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
	    trigger == IOAPIC_LEVEL)
1274
		set_irq_chip_and_handler_name(irq, &ioapic_chip,
1275 1276
					      handle_fasteoi_irq,
					      "fasteoi");
1277
	else
1278
		set_irq_chip_and_handler_name(irq, &ioapic_chip,
1279
					      handle_edge_irq, "edge");
L
Linus Torvalds 已提交
1280 1281
}

1282 1283 1284 1285
static int setup_ioapic_entry(int apic_id, int irq,
			      struct IO_APIC_route_entry *entry,
			      unsigned int destination, int trigger,
			      int polarity, int vector, int pin)
L
Linus Torvalds 已提交
1286
{
1287 1288 1289 1290 1291
	/*
	 * add it to the IO-APIC irq-routing table:
	 */
	memset(entry,0,sizeof(*entry));

1292
	if (intr_remapping_enabled) {
I
Ingo Molnar 已提交
1293
		struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
1294 1295 1296 1297 1298 1299
		struct irte irte;
		struct IR_IO_APIC_route_entry *ir_entry =
			(struct IR_IO_APIC_route_entry *) entry;
		int index;

		if (!iommu)
I
Ingo Molnar 已提交
1300
			panic("No mapping iommu for ioapic %d\n", apic_id);
1301 1302 1303

		index = alloc_irte(iommu, irq, 1);
		if (index < 0)
I
Ingo Molnar 已提交
1304
			panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1305

1306
		prepare_irte(&irte, vector, destination);
1307

1308 1309 1310
		/* Set source-id of interrupt request */
		set_ioapic_sid(&irte, apic_id);

1311 1312 1313 1314 1315 1316
		modify_irte(irq, &irte);

		ir_entry->index2 = (index >> 15) & 0x1;
		ir_entry->zero = 0;
		ir_entry->format = 1;
		ir_entry->index = (index & 0x7fff);
1317 1318 1319 1320 1321
		/*
		 * IO-APIC RTE will be configured with virtual vector.
		 * irq handler will do the explicit EOI to the io-apic.
		 */
		ir_entry->vector = pin;
1322
	} else {
1323 1324
		entry->delivery_mode = apic->irq_delivery_mode;
		entry->dest_mode = apic->irq_dest_mode;
1325
		entry->dest = destination;
1326
		entry->vector = vector;
1327
	}
1328

1329
	entry->mask = 0;				/* enable IRQ */
1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340
	entry->trigger = trigger;
	entry->polarity = polarity;

	/* Mask level triggered irqs.
	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
	 */
	if (trigger)
		entry->mask = 1;
	return 0;
}

1341 1342
static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
			     struct irq_cfg *cfg, int trigger, int polarity)
1343
{
L
Linus Torvalds 已提交
1344
	struct IO_APIC_route_entry entry;
1345
	unsigned int dest;
1346 1347 1348

	if (!IO_APIC_IRQ(irq))
		return;
1349 1350 1351 1352 1353
	/*
	 * For legacy irqs, cfg->domain starts with cpu 0 for legacy
	 * controllers like 8259. Now that IO-APIC can handle this irq, update
	 * the cfg->domain.
	 */
1354
	if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
1355 1356
		apic->vector_allocation_domain(0, cfg->domain);

1357
	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1358 1359
		return;

1360
	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
1361 1362 1363 1364

	apic_printk(APIC_VERBOSE,KERN_DEBUG
		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
		    "IRQ %d Mode:%i Active:%i)\n",
I
Ingo Molnar 已提交
1365
		    apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
1366 1367 1368
		    irq, trigger, polarity);


I
Ingo Molnar 已提交
1369
	if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
1370
			       dest, trigger, polarity, cfg->vector, pin)) {
1371
		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
I
Ingo Molnar 已提交
1372
		       mp_ioapics[apic_id].apicid, pin);
Y
Yinghai Lu 已提交
1373
		__clear_irq_vector(irq, cfg);
1374 1375 1376
		return;
	}

1377
	ioapic_register_intr(irq, trigger);
1378
	if (irq < legacy_pic->nr_legacy_irqs)
1379
		legacy_pic->mask(irq);
1380

I
Ingo Molnar 已提交
1381
	ioapic_write_entry(apic_id, pin, entry);
1382 1383
}

1384 1385 1386 1387
static struct {
	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
} mp_ioapic_routing[MAX_IO_APICS];

1388 1389
static void __init setup_IO_APIC_irqs(void)
{
1390
	int apic_id, pin, idx, irq, notcon = 0;
1391
	int node = cpu_to_node(0);
1392
	struct irq_cfg *cfg;
L
Linus Torvalds 已提交
1393 1394 1395

	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");

E
Eric W. Biederman 已提交
1396
	for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
	for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
		idx = find_irq_entry(apic_id, pin, mp_INT);
		if (idx == -1) {
			if (!notcon) {
				notcon = 1;
				apic_printk(APIC_VERBOSE,
					KERN_DEBUG " %d-%d",
					mp_ioapics[apic_id].apicid, pin);
			} else
				apic_printk(APIC_VERBOSE, " %d-%d",
					mp_ioapics[apic_id].apicid, pin);
			continue;
		}
		if (notcon) {
			apic_printk(APIC_VERBOSE,
				" (apicid-pin) not connected\n");
			notcon = 0;
		}
1415

1416
		irq = pin_2_irq(idx, apic_id, pin);
1417

E
Eric W. Biederman 已提交
1418 1419 1420
		if ((apic_id > 0) && (irq > 16))
			continue;

1421 1422 1423 1424 1425 1426 1427
		/*
		 * Skip the timer IRQ if there's a quirk handler
		 * installed and if it returns 1:
		 */
		if (apic->multi_timer_check &&
				apic->multi_timer_check(apic_id, irq))
			continue;
1428

1429 1430
		cfg = alloc_irq_and_cfg_at(irq, node);
		if (!cfg)
1431
			continue;
1432

1433
		add_pin_to_irq_node(cfg, node, apic_id, pin);
1434 1435 1436 1437
		/*
		 * don't mark it in pin_programmed, so later acpi could
		 * set it correctly when irq < 16
		 */
1438 1439
		setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
				  irq_polarity(idx));
L
Linus Torvalds 已提交
1440 1441
	}

1442 1443
	if (notcon)
		apic_printk(APIC_VERBOSE,
1444
			" (apicid-pin) not connected\n");
L
Linus Torvalds 已提交
1445 1446
}

Y
Yinghai Lu 已提交
1447 1448 1449 1450 1451 1452 1453
/*
 * for the gsit that is not in first ioapic
 * but could not use acpi_register_gsi()
 * like some special sci in IBM x3330
 */
void setup_IO_APIC_irq_extra(u32 gsi)
{
1454
	int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
Y
Yinghai Lu 已提交
1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
	struct irq_cfg *cfg;

	/*
	 * Convert 'gsi' to 'ioapic.pin'.
	 */
	apic_id = mp_find_ioapic(gsi);
	if (apic_id < 0)
		return;

	pin = mp_find_ioapic_pin(apic_id, gsi);
	idx = find_irq_entry(apic_id, pin, mp_INT);
	if (idx == -1)
		return;

	irq = pin_2_irq(idx, apic_id, pin);
1470 1471 1472

	/* Only handle the non legacy irqs on secondary ioapics */
	if (apic_id == 0 || irq < NR_IRQS_LEGACY)
Y
Yinghai Lu 已提交
1473
		return;
1474

1475 1476
	cfg = alloc_irq_and_cfg_at(irq, node);
	if (!cfg)
Y
Yinghai Lu 已提交
1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
		return;

	add_pin_to_irq_node(cfg, node, apic_id, pin);

	if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
		pr_debug("Pin %d-%d already programmed\n",
			 mp_ioapics[apic_id].apicid, pin);
		return;
	}
	set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);

1488
	setup_ioapic_irq(apic_id, pin, irq, cfg,
Y
Yinghai Lu 已提交
1489 1490 1491
			irq_trigger(idx), irq_polarity(idx));
}

L
Linus Torvalds 已提交
1492
/*
1493
 * Set up the timer pin, possibly with the 8259A-master behind.
L
Linus Torvalds 已提交
1494
 */
I
Ingo Molnar 已提交
1495
static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1496
					int vector)
L
Linus Torvalds 已提交
1497 1498 1499
{
	struct IO_APIC_route_entry entry;

1500 1501 1502
	if (intr_remapping_enabled)
		return;

1503
	memset(&entry, 0, sizeof(entry));
L
Linus Torvalds 已提交
1504 1505 1506 1507 1508

	/*
	 * We use logical delivery to get the timer IRQ
	 * to the first CPU.
	 */
1509
	entry.dest_mode = apic->irq_dest_mode;
Y
Yinghai Lu 已提交
1510
	entry.mask = 0;			/* don't mask IRQ for edge */
1511
	entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
1512
	entry.delivery_mode = apic->irq_delivery_mode;
L
Linus Torvalds 已提交
1513 1514 1515 1516 1517 1518
	entry.polarity = 0;
	entry.trigger = 0;
	entry.vector = vector;

	/*
	 * The timer IRQ doesn't have to know that behind the
1519
	 * scene we may have a 8259A-master in AEOI mode ...
L
Linus Torvalds 已提交
1520
	 */
1521
	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
L
Linus Torvalds 已提交
1522 1523 1524 1525

	/*
	 * Add it to the IO-APIC irq-routing table:
	 */
I
Ingo Molnar 已提交
1526
	ioapic_write_entry(apic_id, pin, entry);
L
Linus Torvalds 已提交
1527 1528
}

1529 1530

__apicdebuginit(void) print_IO_APIC(void)
L
Linus Torvalds 已提交
1531 1532 1533 1534 1535 1536 1537
{
	int apic, i;
	union IO_APIC_reg_00 reg_00;
	union IO_APIC_reg_01 reg_01;
	union IO_APIC_reg_02 reg_02;
	union IO_APIC_reg_03 reg_03;
	unsigned long flags;
1538
	struct irq_cfg *cfg;
1539
	unsigned int irq;
L
Linus Torvalds 已提交
1540

1541
	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
L
Linus Torvalds 已提交
1542 1543
	for (i = 0; i < nr_ioapics; i++)
		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1544
		       mp_ioapics[i].apicid, nr_ioapic_registers[i]);
L
Linus Torvalds 已提交
1545 1546 1547 1548 1549 1550 1551 1552 1553

	/*
	 * We are a bit conservative about what we expect.  We have to
	 * know about every hardware change ASAP.
	 */
	printk(KERN_INFO "testing the IO APIC.......................\n");

	for (apic = 0; apic < nr_ioapics; apic++) {

1554
	raw_spin_lock_irqsave(&ioapic_lock, flags);
L
Linus Torvalds 已提交
1555 1556 1557 1558
	reg_00.raw = io_apic_read(apic, 0);
	reg_01.raw = io_apic_read(apic, 1);
	if (reg_01.bits.version >= 0x10)
		reg_02.raw = io_apic_read(apic, 2);
T
Thomas Gleixner 已提交
1559 1560
	if (reg_01.bits.version >= 0x20)
		reg_03.raw = io_apic_read(apic, 3);
1561
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
L
Linus Torvalds 已提交
1562

1563
	printk("\n");
1564
	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
L
Linus Torvalds 已提交
1565 1566 1567 1568 1569
	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);

1570
	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
L
Linus Torvalds 已提交
1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598
	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);

	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);

	/*
	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
	 * but the value of reg_02 is read as the previous read register
	 * value, so ignore it if reg_02 == reg_01.
	 */
	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
	}

	/*
	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
	 * or reg_03, but the value of reg_0[23] is read as the previous read
	 * register value, so ignore it if reg_03 == reg_0[12].
	 */
	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
	    reg_03.raw != reg_01.raw) {
		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
	}

	printk(KERN_DEBUG ".... IRQ redirection table:\n");

1599
	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
1600
			  " Stat Dmod Deli Vect:\n");
L
Linus Torvalds 已提交
1601 1602 1603 1604

	for (i = 0; i <= reg_01.bits.entries; i++) {
		struct IO_APIC_route_entry entry;

1605
		entry = ioapic_read_entry(apic, i);
L
Linus Torvalds 已提交
1606

1607 1608 1609 1610
		printk(KERN_DEBUG " %02x %03X ",
			i,
			entry.dest
		);
L
Linus Torvalds 已提交
1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624

		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
			entry.mask,
			entry.trigger,
			entry.irr,
			entry.polarity,
			entry.delivery_status,
			entry.dest_mode,
			entry.delivery_mode,
			entry.vector
		);
	}
	}
	printk(KERN_DEBUG "IRQ to pin mappings:\n");
T
Thomas Gleixner 已提交
1625
	for_each_active_irq(irq) {
1626 1627
		struct irq_pin_list *entry;

T
Thomas Gleixner 已提交
1628
		cfg = get_irq_chip_data(irq);
1629 1630
		if (!cfg)
			continue;
1631
		entry = cfg->irq_2_pin;
1632
		if (!entry)
L
Linus Torvalds 已提交
1633
			continue;
1634
		printk(KERN_DEBUG "IRQ%d ", irq);
1635
		for_each_irq_pin(entry, cfg->irq_2_pin)
L
Linus Torvalds 已提交
1636 1637 1638 1639 1640 1641 1642 1643 1644
			printk("-> %d:%d", entry->apic, entry->pin);
		printk("\n");
	}

	printk(KERN_INFO ".................................... done.\n");

	return;
}

1645
__apicdebuginit(void) print_APIC_field(int base)
L
Linus Torvalds 已提交
1646
{
1647
	int i;
L
Linus Torvalds 已提交
1648

1649 1650 1651 1652 1653 1654
	printk(KERN_DEBUG);

	for (i = 0; i < 8; i++)
		printk(KERN_CONT "%08x", apic_read(base + i*0x10));

	printk(KERN_CONT "\n");
L
Linus Torvalds 已提交
1655 1656
}

1657
__apicdebuginit(void) print_local_APIC(void *dummy)
L
Linus Torvalds 已提交
1658
{
1659
	unsigned int i, v, ver, maxlvt;
1660
	u64 icr;
L
Linus Torvalds 已提交
1661

1662
	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
L
Linus Torvalds 已提交
1663
		smp_processor_id(), hard_smp_processor_id());
1664
	v = apic_read(APIC_ID);
1665
	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
L
Linus Torvalds 已提交
1666 1667 1668
	v = apic_read(APIC_LVR);
	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
	ver = GET_APIC_VERSION(v);
1669
	maxlvt = lapic_get_maxlvt();
L
Linus Torvalds 已提交
1670 1671 1672 1673

	v = apic_read(APIC_TASKPRI);
	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);

1674
	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
1675 1676 1677 1678 1679
		if (!APIC_XAPIC(ver)) {
			v = apic_read(APIC_ARBPRI);
			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
			       v & APIC_ARBPRI_MASK);
		}
L
Linus Torvalds 已提交
1680 1681 1682 1683
		v = apic_read(APIC_PROCPRI);
		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
	}

1684 1685 1686 1687 1688 1689 1690 1691 1692
	/*
	 * Remote read supported only in the 82489DX and local APIC for
	 * Pentium processors.
	 */
	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
		v = apic_read(APIC_RRR);
		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
	}

L
Linus Torvalds 已提交
1693 1694
	v = apic_read(APIC_LDR);
	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
1695 1696 1697 1698
	if (!x2apic_enabled()) {
		v = apic_read(APIC_DFR);
		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
	}
L
Linus Torvalds 已提交
1699 1700 1701 1702
	v = apic_read(APIC_SPIV);
	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);

	printk(KERN_DEBUG "... APIC ISR field:\n");
1703
	print_APIC_field(APIC_ISR);
L
Linus Torvalds 已提交
1704
	printk(KERN_DEBUG "... APIC TMR field:\n");
1705
	print_APIC_field(APIC_TMR);
L
Linus Torvalds 已提交
1706
	printk(KERN_DEBUG "... APIC IRR field:\n");
1707
	print_APIC_field(APIC_IRR);
L
Linus Torvalds 已提交
1708

1709 1710
	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
L
Linus Torvalds 已提交
1711
			apic_write(APIC_ESR, 0);
1712

L
Linus Torvalds 已提交
1713 1714 1715 1716
		v = apic_read(APIC_ESR);
		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
	}

1717
	icr = apic_icr_read();
1718 1719
	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
L
Linus Torvalds 已提交
1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743

	v = apic_read(APIC_LVTT);
	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);

	if (maxlvt > 3) {                       /* PC is LVT#4. */
		v = apic_read(APIC_LVTPC);
		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
	}
	v = apic_read(APIC_LVT0);
	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
	v = apic_read(APIC_LVT1);
	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);

	if (maxlvt > 2) {			/* ERR is LVT#3. */
		v = apic_read(APIC_LVTERR);
		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
	}

	v = apic_read(APIC_TMICT);
	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
	v = apic_read(APIC_TMCCT);
	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
	v = apic_read(APIC_TDCR);
	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755

	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
		v = apic_read(APIC_EFEAT);
		maxlvt = (v >> 16) & 0xff;
		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
		v = apic_read(APIC_ECTRL);
		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
		for (i = 0; i < maxlvt; i++) {
			v = apic_read(APIC_EILVTn(i));
			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
		}
	}
L
Linus Torvalds 已提交
1756 1757 1758
	printk("\n");
}

1759
__apicdebuginit(void) print_local_APICs(int maxcpu)
L
Linus Torvalds 已提交
1760
{
1761 1762
	int cpu;

1763 1764 1765
	if (!maxcpu)
		return;

1766
	preempt_disable();
1767 1768 1769
	for_each_online_cpu(cpu) {
		if (cpu >= maxcpu)
			break;
1770
		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
1771
	}
1772
	preempt_enable();
L
Linus Torvalds 已提交
1773 1774
}

1775
__apicdebuginit(void) print_PIC(void)
L
Linus Torvalds 已提交
1776 1777 1778 1779
{
	unsigned int v;
	unsigned long flags;

1780
	if (!legacy_pic->nr_legacy_irqs)
L
Linus Torvalds 已提交
1781 1782 1783 1784
		return;

	printk(KERN_DEBUG "\nprinting PIC contents\n");

1785
	raw_spin_lock_irqsave(&i8259A_lock, flags);
L
Linus Torvalds 已提交
1786 1787 1788 1789 1790 1791 1792

	v = inb(0xa1) << 8 | inb(0x21);
	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);

	v = inb(0xa0) << 8 | inb(0x20);
	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);

1793 1794
	outb(0x0b,0xa0);
	outb(0x0b,0x20);
L
Linus Torvalds 已提交
1795
	v = inb(0xa0) << 8 | inb(0x20);
1796 1797
	outb(0x0a,0xa0);
	outb(0x0a,0x20);
L
Linus Torvalds 已提交
1798

1799
	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
L
Linus Torvalds 已提交
1800 1801 1802 1803 1804 1805 1806

	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);

	v = inb(0x4d1) << 8 | inb(0x4d0);
	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}

1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824
static int __initdata show_lapic = 1;
static __init int setup_show_lapic(char *arg)
{
	int num = -1;

	if (strcmp(arg, "all") == 0) {
		show_lapic = CONFIG_NR_CPUS;
	} else {
		get_option(&arg, &num);
		if (num >= 0)
			show_lapic = num;
	}

	return 1;
}
__setup("show_lapic=", setup_show_lapic);

__apicdebuginit(int) print_ICs(void)
1825
{
1826 1827 1828
	if (apic_verbosity == APIC_QUIET)
		return 0;

1829
	print_PIC();
1830 1831

	/* don't print out if apic is not there */
1832
	if (!cpu_has_apic && !apic_from_smp_config())
1833 1834
		return 0;

1835
	print_local_APICs(show_lapic);
1836 1837 1838 1839 1840
	print_IO_APIC();

	return 0;
}

1841
fs_initcall(print_ICs);
1842

L
Linus Torvalds 已提交
1843

Y
Yinghai Lu 已提交
1844 1845 1846
/* Where if anywhere is the i8259 connect in external int mode */
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };

1847
void __init enable_IO_APIC(void)
L
Linus Torvalds 已提交
1848
{
1849
	int i8259_apic, i8259_pin;
1850
	int apic;
1851

1852
	if (!legacy_pic->nr_legacy_irqs)
1853 1854
		return;

1855
	for(apic = 0; apic < nr_ioapics; apic++) {
1856 1857
		int pin;
		/* See if any of the pins is in ExtINT mode */
1858
		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1859
			struct IO_APIC_route_entry entry;
1860
			entry = ioapic_read_entry(apic, pin);
1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890

			/* If the interrupt line is enabled and in ExtInt mode
			 * I have found the pin where the i8259 is connected.
			 */
			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
				ioapic_i8259.apic = apic;
				ioapic_i8259.pin  = pin;
				goto found_i8259;
			}
		}
	}
 found_i8259:
	/* Look to see what if the MP table has reported the ExtINT */
	/* If we could not find the appropriate pin by looking at the ioapic
	 * the i8259 probably is not connected the ioapic but give the
	 * mptable a chance anyway.
	 */
	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
	/* Trust the MP table if nothing is setup in the hardware */
	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
		ioapic_i8259.pin  = i8259_pin;
		ioapic_i8259.apic = i8259_apic;
	}
	/* Complain if the MP table and the hardware disagree */
	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
	{
		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
L
Linus Torvalds 已提交
1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908
	}

	/*
	 * Do not trust the IO-APIC being empty at bootup
	 */
	clear_IO_APIC();
}

/*
 * Not an __init, needed by the reboot code
 */
void disable_IO_APIC(void)
{
	/*
	 * Clear the IO-APIC before rebooting:
	 */
	clear_IO_APIC();

1909
	if (!legacy_pic->nr_legacy_irqs)
1910 1911
		return;

1912
	/*
1913
	 * If the i8259 is routed through an IOAPIC
1914
	 * Put that IOAPIC in virtual wire mode
1915
	 * so legacy interrupts can be delivered.
1916 1917 1918 1919 1920
	 *
	 * With interrupt-remapping, for now we will use virtual wire A mode,
	 * as virtual wire B is little complex (need to configure both
	 * IOAPIC RTE aswell as interrupt-remapping table entry).
	 * As this gets called during crash dump, keep this simple for now.
1921
	 */
1922
	if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
1923 1924 1925 1926 1927 1928 1929 1930 1931
		struct IO_APIC_route_entry entry;

		memset(&entry, 0, sizeof(entry));
		entry.mask            = 0; /* Enabled */
		entry.trigger         = 0; /* Edge */
		entry.irr             = 0;
		entry.polarity        = 0; /* High */
		entry.delivery_status = 0;
		entry.dest_mode       = 0; /* Physical */
1932
		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
1933
		entry.vector          = 0;
1934
		entry.dest            = read_apic_id();
1935 1936 1937 1938

		/*
		 * Add it to the IO-APIC irq-routing table:
		 */
1939
		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1940
	}
1941

1942 1943 1944
	/*
	 * Use virtual wire A mode when interrupt remapping is enabled.
	 */
1945
	if (cpu_has_apic || apic_from_smp_config())
1946 1947
		disconnect_bsp_APIC(!intr_remapping_enabled &&
				ioapic_i8259.pin != -1);
L
Linus Torvalds 已提交
1948 1949
}

1950
#ifdef CONFIG_X86_32
L
Linus Torvalds 已提交
1951 1952 1953 1954 1955 1956
/*
 * function to set the IO-APIC physical IDs based on the
 * values stored in the MPC table.
 *
 * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
 */
1957
void __init setup_ioapic_ids_from_mpc_nocheck(void)
L
Linus Torvalds 已提交
1958 1959 1960
{
	union IO_APIC_reg_00 reg_00;
	physid_mask_t phys_id_present_map;
I
Ingo Molnar 已提交
1961
	int apic_id;
L
Linus Torvalds 已提交
1962 1963 1964 1965 1966 1967 1968 1969
	int i;
	unsigned char old_id;
	unsigned long flags;

	/*
	 * This is broken; anything with a real cpu count has to
	 * circumvent this idiocy regardless.
	 */
1970
	apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
L
Linus Torvalds 已提交
1971 1972 1973 1974

	/*
	 * Set the IOAPIC ID to the value stored in the MPC table.
	 */
I
Ingo Molnar 已提交
1975
	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
L
Linus Torvalds 已提交
1976 1977

		/* Read the register 0 value */
1978
		raw_spin_lock_irqsave(&ioapic_lock, flags);
I
Ingo Molnar 已提交
1979
		reg_00.raw = io_apic_read(apic_id, 0);
1980
		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
1981

I
Ingo Molnar 已提交
1982
		old_id = mp_ioapics[apic_id].apicid;
L
Linus Torvalds 已提交
1983

I
Ingo Molnar 已提交
1984
		if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
L
Linus Torvalds 已提交
1985
			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
I
Ingo Molnar 已提交
1986
				apic_id, mp_ioapics[apic_id].apicid);
L
Linus Torvalds 已提交
1987 1988
			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
				reg_00.bits.ID);
I
Ingo Molnar 已提交
1989
			mp_ioapics[apic_id].apicid = reg_00.bits.ID;
L
Linus Torvalds 已提交
1990 1991 1992 1993 1994 1995 1996
		}

		/*
		 * Sanity check, is the ID really free? Every APIC in a
		 * system must have a unique ID or we get lots of nice
		 * 'stuck on smp_invalidate_needed IPI wait' messages.
		 */
1997
		if (apic->check_apicid_used(&phys_id_present_map,
I
Ingo Molnar 已提交
1998
					mp_ioapics[apic_id].apicid)) {
L
Linus Torvalds 已提交
1999
			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
I
Ingo Molnar 已提交
2000
				apic_id, mp_ioapics[apic_id].apicid);
L
Linus Torvalds 已提交
2001 2002 2003 2004 2005 2006 2007 2008
			for (i = 0; i < get_physical_broadcast(); i++)
				if (!physid_isset(i, phys_id_present_map))
					break;
			if (i >= get_physical_broadcast())
				panic("Max APIC ID exceeded!\n");
			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
				i);
			physid_set(i, phys_id_present_map);
I
Ingo Molnar 已提交
2009
			mp_ioapics[apic_id].apicid = i;
L
Linus Torvalds 已提交
2010 2011
		} else {
			physid_mask_t tmp;
2012
			apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
L
Linus Torvalds 已提交
2013 2014
			apic_printk(APIC_VERBOSE, "Setting %d in the "
					"phys_id_present_map\n",
I
Ingo Molnar 已提交
2015
					mp_ioapics[apic_id].apicid);
L
Linus Torvalds 已提交
2016 2017 2018 2019 2020 2021 2022
			physids_or(phys_id_present_map, phys_id_present_map, tmp);
		}

		/*
		 * We need to adjust the IRQ routing table
		 * if the ID changed.
		 */
I
Ingo Molnar 已提交
2023
		if (old_id != mp_ioapics[apic_id].apicid)
L
Linus Torvalds 已提交
2024
			for (i = 0; i < mp_irq_entries; i++)
2025 2026
				if (mp_irqs[i].dstapic == old_id)
					mp_irqs[i].dstapic
I
Ingo Molnar 已提交
2027
						= mp_ioapics[apic_id].apicid;
L
Linus Torvalds 已提交
2028 2029

		/*
2030 2031
		 * Update the ID register according to the right value
		 * from the MPC table if they are different.
2032
		 */
2033 2034 2035
		if (mp_ioapics[apic_id].apicid == reg_00.bits.ID)
			continue;

L
Linus Torvalds 已提交
2036 2037
		apic_printk(APIC_VERBOSE, KERN_INFO
			"...changing IO-APIC physical APIC ID to %d ...",
I
Ingo Molnar 已提交
2038
			mp_ioapics[apic_id].apicid);
L
Linus Torvalds 已提交
2039

I
Ingo Molnar 已提交
2040
		reg_00.bits.ID = mp_ioapics[apic_id].apicid;
2041
		raw_spin_lock_irqsave(&ioapic_lock, flags);
I
Ingo Molnar 已提交
2042
		io_apic_write(apic_id, 0, reg_00.raw);
2043
		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
L
Linus Torvalds 已提交
2044 2045 2046 2047

		/*
		 * Sanity check
		 */
2048
		raw_spin_lock_irqsave(&ioapic_lock, flags);
I
Ingo Molnar 已提交
2049
		reg_00.raw = io_apic_read(apic_id, 0);
2050
		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
I
Ingo Molnar 已提交
2051
		if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
L
Linus Torvalds 已提交
2052 2053 2054 2055 2056
			printk("could not set ID!\n");
		else
			apic_printk(APIC_VERBOSE, " ok.\n");
	}
}
2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071

void __init setup_ioapic_ids_from_mpc(void)
{

	if (acpi_ioapic)
		return;
	/*
	 * Don't check I/O APIC IDs for xAPIC systems.  They have
	 * no meaning without the serial APIC bus.
	 */
	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
		return;
	setup_ioapic_ids_from_mpc_nocheck();
}
2072
#endif
L
Linus Torvalds 已提交
2073

2074
int no_timer_check __initdata;
2075 2076 2077 2078 2079 2080 2081 2082

static int __init notimercheck(char *s)
{
	no_timer_check = 1;
	return 1;
}
__setup("no_timer_check", notimercheck);

L
Linus Torvalds 已提交
2083 2084 2085 2086 2087 2088 2089 2090
/*
 * There is a nasty bug in some older SMP boards, their mptable lies
 * about the timer IRQ. We do the following to work around the situation:
 *
 *	- timer IRQ defaults to IO-APIC IRQ
 *	- if this function detects that timer IRQs are defunct, then we fall
 *	  back to ISA timer IRQs
 */
2091
static int __init timer_irq_works(void)
L
Linus Torvalds 已提交
2092 2093
{
	unsigned long t1 = jiffies;
2094
	unsigned long flags;
L
Linus Torvalds 已提交
2095

2096 2097 2098
	if (no_timer_check)
		return 1;

2099
	local_save_flags(flags);
L
Linus Torvalds 已提交
2100 2101 2102
	local_irq_enable();
	/* Let ten ticks pass... */
	mdelay((10 * 1000) / HZ);
2103
	local_irq_restore(flags);
L
Linus Torvalds 已提交
2104 2105 2106 2107 2108 2109 2110 2111

	/*
	 * Expect a few ticks at least, to be sure some possible
	 * glue logic does not lock up after one or two first
	 * ticks in a non-ExtINT mode.  Also the local APIC
	 * might have cached one ExtINT interrupt.  Finally, at
	 * least one tick may be lost due to delays.
	 */
2112 2113

	/* jiffies wrap? */
2114
	if (time_after(jiffies, t1 + 4))
L
Linus Torvalds 已提交
2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140
		return 1;
	return 0;
}

/*
 * In the SMP+IOAPIC case it might happen that there are an unspecified
 * number of pending IRQ events unhandled. These cases are very rare,
 * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
 * better to do it this way as thus we do not have to be aware of
 * 'pending' interrupts in the IRQ path, except at this point.
 */
/*
 * Edge triggered needs to resend any interrupt
 * that was delayed but this is now handled in the device
 * independent code.
 */

/*
 * Starting up a edge-triggered IO-APIC interrupt is
 * nasty - we need to make sure that we get the edge.
 * If it is already asserted for some reason, we need
 * return 1 to indicate that is was pending.
 *
 * This is not complete - we should be able to fake
 * an edge even if it isn't on the 8259A...
 */
2141

2142
static unsigned int startup_ioapic_irq(struct irq_data *data)
L
Linus Torvalds 已提交
2143
{
2144
	int was_pending = 0, irq = data->irq;
L
Linus Torvalds 已提交
2145 2146
	unsigned long flags;

2147
	raw_spin_lock_irqsave(&ioapic_lock, flags);
2148
	if (irq < legacy_pic->nr_legacy_irqs) {
2149
		legacy_pic->mask(irq);
2150
		if (legacy_pic->irq_pending(irq))
L
Linus Torvalds 已提交
2151 2152
			was_pending = 1;
	}
2153
	__unmask_ioapic(data->chip_data);
2154
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
L
Linus Torvalds 已提交
2155 2156 2157 2158

	return was_pending;
}

2159
static int ioapic_retrigger_irq(struct irq_data *data)
L
Linus Torvalds 已提交
2160
{
2161
	struct irq_cfg *cfg = data->chip_data;
2162 2163
	unsigned long flags;

2164
	raw_spin_lock_irqsave(&vector_lock, flags);
2165
	apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2166
	raw_spin_unlock_irqrestore(&vector_lock, flags);
2167 2168 2169

	return 1;
}
2170

2171 2172 2173 2174 2175 2176 2177 2178
/*
 * Level and edge triggered IO-APIC interrupts need different handling,
 * so we use two separate IRQ descriptors. Edge triggered IRQs can be
 * handled with the level-triggered descriptor, but that one has slightly
 * more overhead. Level-triggered interrupts cannot be handled with the
 * edge-triggered handler, without risking IRQ storms and other ugly
 * races.
 */
2179

2180
#ifdef CONFIG_SMP
2181
void send_cleanup_vector(struct irq_cfg *cfg)
2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196
{
	cpumask_var_t cleanup_mask;

	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
		unsigned int i;
		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
	} else {
		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
		free_cpumask_var(cleanup_mask);
	}
	cfg->move_in_progress = 0;
}

2197
static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
2198 2199 2200 2201 2202
{
	int apic, pin;
	struct irq_pin_list *entry;
	u8 vector = cfg->vector;

2203
	for_each_irq_pin(entry, cfg->irq_2_pin) {
2204 2205 2206 2207 2208 2209 2210 2211
		unsigned int reg;

		apic = entry->apic;
		pin = entry->pin;
		/*
		 * With interrupt-remapping, destination information comes
		 * from interrupt-remapping table entry.
		 */
2212
		if (!irq_remapped(cfg))
2213 2214 2215 2216 2217 2218 2219 2220 2221
			io_apic_write(apic, 0x11 + pin*2, dest);
		reg = io_apic_read(apic, 0x10 + pin*2);
		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
		reg |= vector;
		io_apic_modify(apic, 0x10 + pin*2, reg);
	}
}

/*
2222
 * Either sets data->affinity to a valid value, and returns
2223
 * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
2224
 * leaves data->affinity untouched.
2225
 */
2226 2227
int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
			  unsigned int *dest_id)
2228
{
2229
	struct irq_cfg *cfg = data->chip_data;
2230 2231

	if (!cpumask_intersects(mask, cpu_online_mask))
2232
		return -1;
2233

2234
	if (assign_irq_vector(data->irq, data->chip_data, mask))
2235
		return -1;
2236

2237
	cpumask_copy(data->affinity, mask);
2238

2239
	*dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
2240
	return 0;
2241 2242
}

2243
static int
2244 2245
ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
		    bool force)
2246
{
2247
	unsigned int dest, irq = data->irq;
2248
	unsigned long flags;
2249
	int ret;
2250

2251
	raw_spin_lock_irqsave(&ioapic_lock, flags);
2252
	ret = __ioapic_set_affinity(data, mask, &dest);
2253
	if (!ret) {
2254 2255
		/* Only the high 8 bits are valid. */
		dest = SET_APIC_LOGICAL_ID(dest);
2256
		__target_IO_APIC_irq(irq, dest, data->chip_data);
2257
	}
2258
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2259
	return ret;
2260 2261
}

2262
#ifdef CONFIG_INTR_REMAP
2263

2264 2265 2266
/*
 * Migrate the IO-APIC irq in the presence of intr-remapping.
 *
2267 2268
 * For both level and edge triggered, irq migration is a simple atomic
 * update(of vector and cpu destination) of IRTE and flush the hardware cache.
2269
 *
2270 2271 2272 2273
 * For level triggered, we eliminate the io-apic RTE modification (with the
 * updated vector information), by using a virtual vector (io-apic pin number).
 * Real vector that is used for interrupting cpu will be coming from
 * the interrupt-remapping table entry.
2274
 */
2275
static int
2276 2277
ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
		       bool force)
2278
{
2279 2280
	struct irq_cfg *cfg = data->chip_data;
	unsigned int dest, irq = data->irq;
2281
	struct irte irte;
2282

2283
	if (!cpumask_intersects(mask, cpu_online_mask))
2284
		return -EINVAL;
2285

2286
	if (get_irte(irq, &irte))
2287
		return -EBUSY;
2288

Y
Yinghai Lu 已提交
2289
	if (assign_irq_vector(irq, cfg, mask))
2290
		return -EBUSY;
2291

2292
	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2293 2294 2295 2296 2297 2298 2299 2300 2301

	irte.vector = cfg->vector;
	irte.dest_id = IRTE_DEST(dest);

	/*
	 * Modified the IRTE and flushes the Interrupt entry cache.
	 */
	modify_irte(irq, &irte);

2302 2303
	if (cfg->move_in_progress)
		send_cleanup_vector(cfg);
2304

2305
	cpumask_copy(data->affinity, mask);
2306
	return 0;
2307 2308
}

2309
#else
2310 2311 2312
static inline int
ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
		       bool force)
2313
{
2314
	return 0;
2315
}
2316 2317 2318 2319 2320
#endif

asmlinkage void smp_irq_move_cleanup_interrupt(void)
{
	unsigned vector, me;
2321

2322 2323 2324 2325 2326 2327 2328
	ack_APIC_irq();
	exit_idle();
	irq_enter();

	me = smp_processor_id();
	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
		unsigned int irq;
2329
		unsigned int irr;
2330 2331
		struct irq_desc *desc;
		struct irq_cfg *cfg;
T
Tejun Heo 已提交
2332
		irq = __this_cpu_read(vector_irq[vector]);
2333

2334 2335 2336
		if (irq == -1)
			continue;

2337 2338 2339 2340 2341
		desc = irq_to_desc(irq);
		if (!desc)
			continue;

		cfg = irq_cfg(irq);
2342
		raw_spin_lock(&desc->lock);
2343

2344 2345 2346 2347 2348 2349 2350
		/*
		 * Check if the irq migration is in progress. If so, we
		 * haven't received the cleanup request yet for this irq.
		 */
		if (cfg->move_in_progress)
			goto unlock;

2351
		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2352 2353
			goto unlock;

2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365
		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
		/*
		 * Check if the vector that needs to be cleanedup is
		 * registered at the cpu's IRR. If so, then this is not
		 * the best time to clean it up. Lets clean it up in the
		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
		 * to myself.
		 */
		if (irr  & (1 << (vector % 32))) {
			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
			goto unlock;
		}
T
Tejun Heo 已提交
2366
		__this_cpu_write(vector_irq[vector], -1);
2367
unlock:
2368
		raw_spin_unlock(&desc->lock);
2369 2370 2371 2372 2373
	}

	irq_exit();
}

T
Thomas Gleixner 已提交
2374
static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
2375
{
2376
	unsigned me;
2377

2378
	if (likely(!cfg->move_in_progress))
2379 2380 2381
		return;

	me = smp_processor_id();
2382

2383
	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
2384
		send_cleanup_vector(cfg);
2385
}
2386

T
Thomas Gleixner 已提交
2387
static void irq_complete_move(struct irq_cfg *cfg)
2388
{
T
Thomas Gleixner 已提交
2389
	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
2390 2391 2392 2393
}

void irq_force_complete_move(int irq)
{
T
Thomas Gleixner 已提交
2394
	struct irq_cfg *cfg = get_irq_chip_data(irq);
2395

2396 2397 2398
	if (!cfg)
		return;

T
Thomas Gleixner 已提交
2399
	__irq_complete_move(cfg, cfg->vector);
2400
}
2401
#else
T
Thomas Gleixner 已提交
2402
static inline void irq_complete_move(struct irq_cfg *cfg) { }
2403
#endif
Y
Yinghai Lu 已提交
2404

2405
static void ack_apic_edge(struct irq_data *data)
2406
{
2407 2408
	irq_complete_move(data->chip_data);
	move_native_irq(data->irq);
2409 2410 2411
	ack_APIC_irq();
}

Y
Yinghai Lu 已提交
2412 2413
atomic_t irq_mis_count;

2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429
/*
 * IO-APIC versions below 0x20 don't support EOI register.
 * For the record, here is the information about various versions:
 *     0Xh     82489DX
 *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
 *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
 *     30h-FFh Reserved
 *
 * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
 * version as 0x2. This is an error with documentation and these ICH chips
 * use io-apic's of version 0x20.
 *
 * For IO-APIC's with EOI register, we use that to do an explicit EOI.
 * Otherwise, we simulate the EOI message manually by changing the trigger
 * mode to edge and then back to level, with RTE being masked during this.
*/
T
Thomas Gleixner 已提交
2430
static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2431 2432
{
	struct irq_pin_list *entry;
T
Thomas Gleixner 已提交
2433
	unsigned long flags;
2434

T
Thomas Gleixner 已提交
2435
	raw_spin_lock_irqsave(&ioapic_lock, flags);
2436
	for_each_irq_pin(entry, cfg->irq_2_pin) {
2437 2438 2439 2440 2441 2442 2443
		if (mp_ioapics[entry->apic].apicver >= 0x20) {
			/*
			 * Intr-remapping uses pin number as the virtual vector
			 * in the RTE. Actual vector is programmed in
			 * intr-remapping table entry. Hence for the io-apic
			 * EOI we use the pin number.
			 */
2444
			if (irq_remapped(cfg))
2445 2446 2447 2448 2449 2450 2451
				io_apic_eoi(entry->apic, entry->pin);
			else
				io_apic_eoi(entry->apic, cfg->vector);
		} else {
			__mask_and_edge_IO_APIC_irq(entry);
			__unmask_and_level_IO_APIC_irq(entry);
		}
2452
	}
2453
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2454 2455
}

2456
static void ack_apic_level(struct irq_data *data)
2457
{
2458 2459
	struct irq_cfg *cfg = data->chip_data;
	int i, do_unmask_irq = 0, irq = data->irq;
Y
Yinghai Lu 已提交
2460
	unsigned long v;
2461

T
Thomas Gleixner 已提交
2462
	irq_complete_move(cfg);
2463
#ifdef CONFIG_GENERIC_PENDING_IRQ
2464
	/* If we are moving the irq we need to mask it */
2465
	if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
2466
		do_unmask_irq = 1;
T
Thomas Gleixner 已提交
2467
		mask_ioapic(cfg);
2468
	}
2469 2470
#endif

Y
Yinghai Lu 已提交
2471
	/*
2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488
	 * It appears there is an erratum which affects at least version 0x11
	 * of I/O APIC (that's the 82093AA and cores integrated into various
	 * chipsets).  Under certain conditions a level-triggered interrupt is
	 * erroneously delivered as edge-triggered one but the respective IRR
	 * bit gets set nevertheless.  As a result the I/O unit expects an EOI
	 * message but it will never arrive and further interrupts are blocked
	 * from the source.  The exact reason is so far unknown, but the
	 * phenomenon was observed when two consecutive interrupt requests
	 * from a given source get delivered to the same CPU and the source is
	 * temporarily disabled in between.
	 *
	 * A workaround is to simulate an EOI message manually.  We achieve it
	 * by setting the trigger mode to edge and then to level when the edge
	 * trigger mode gets detected in the TMR of a local APIC for a
	 * level-triggered interrupt.  We mask the source for the time of the
	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
	 * The idea is from Manfred Spraul.  --macro
2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501
	 *
	 * Also in the case when cpu goes offline, fixup_irqs() will forward
	 * any unhandled interrupt on the offlined cpu to the new cpu
	 * destination that is handling the corresponding interrupt. This
	 * interrupt forwarding is done via IPI's. Hence, in this case also
	 * level-triggered io-apic interrupt will be seen as an edge
	 * interrupt in the IRR. And we can't rely on the cpu's EOI
	 * to be broadcasted to the IO-APIC's which will clear the remoteIRR
	 * corresponding to the level-triggered interrupt. Hence on IO-APIC's
	 * supporting EOI register, we do an explicit EOI to clear the
	 * remote IRR and on IO-APIC's which don't have an EOI register,
	 * we use the above logic (mask+edge followed by unmask+level) from
	 * Manfred Spraul to clear the remote IRR.
2502
	 */
Y
Yinghai Lu 已提交
2503
	i = cfg->vector;
Y
Yinghai Lu 已提交
2504 2505
	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));

2506 2507 2508 2509 2510 2511
	/*
	 * We must acknowledge the irq before we move it or the acknowledge will
	 * not propagate properly.
	 */
	ack_APIC_irq();

2512 2513 2514 2515 2516 2517 2518
	/*
	 * Tail end of clearing remote IRR bit (either by delivering the EOI
	 * message via io-apic EOI register write or simulating it using
	 * mask+edge followed by unnask+level logic) manually when the
	 * level triggered interrupt is seen as the edge triggered interrupt
	 * at the cpu.
	 */
2519 2520 2521
	if (!(v & (1 << (i & 0x1f)))) {
		atomic_inc(&irq_mis_count);

T
Thomas Gleixner 已提交
2522
		eoi_ioapic_irq(irq, cfg);
2523 2524
	}

2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552
	/* Now we can move and renable the irq */
	if (unlikely(do_unmask_irq)) {
		/* Only migrate the irq if the ack has been received.
		 *
		 * On rare occasions the broadcast level triggered ack gets
		 * delayed going to ioapics, and if we reprogram the
		 * vector while Remote IRR is still set the irq will never
		 * fire again.
		 *
		 * To prevent this scenario we read the Remote IRR bit
		 * of the ioapic.  This has two effects.
		 * - On any sane system the read of the ioapic will
		 *   flush writes (and acks) going to the ioapic from
		 *   this cpu.
		 * - We get to see if the ACK has actually been delivered.
		 *
		 * Based on failed experiments of reprogramming the
		 * ioapic entry from outside of irq context starting
		 * with masking the ioapic entry and then polling until
		 * Remote IRR was clear before reprogramming the
		 * ioapic I don't trust the Remote IRR bit to be
		 * completey accurate.
		 *
		 * However there appears to be no other way to plug
		 * this race, so if the Remote IRR bit is not
		 * accurate and is causing problems then it is a hardware bug
		 * and you can go talk to the chipset vendor about it.
		 */
Y
Yinghai Lu 已提交
2553
		if (!io_apic_level_ack_pending(cfg))
2554
			move_masked_irq(irq);
T
Thomas Gleixner 已提交
2555
		unmask_ioapic(cfg);
2556
	}
Y
Yinghai Lu 已提交
2557
}
2558

2559
#ifdef CONFIG_INTR_REMAP
2560
static void ir_ack_apic_edge(struct irq_data *data)
2561
{
2562
	ack_APIC_irq();
2563 2564
}

2565
static void ir_ack_apic_level(struct irq_data *data)
2566
{
2567
	ack_APIC_irq();
2568
	eoi_ioapic_irq(data->irq, data->chip_data);
2569 2570 2571
}
#endif /* CONFIG_INTR_REMAP */

2572
static struct irq_chip ioapic_chip __read_mostly = {
2573 2574 2575 2576 2577 2578
	.name			= "IO-APIC",
	.irq_startup		= startup_ioapic_irq,
	.irq_mask		= mask_ioapic_irq,
	.irq_unmask		= unmask_ioapic_irq,
	.irq_ack		= ack_apic_edge,
	.irq_eoi		= ack_apic_level,
2579
#ifdef CONFIG_SMP
2580
	.irq_set_affinity	= ioapic_set_affinity,
2581
#endif
2582
	.irq_retrigger		= ioapic_retrigger_irq,
L
Linus Torvalds 已提交
2583 2584
};

2585
static struct irq_chip ir_ioapic_chip __read_mostly = {
2586 2587 2588 2589
	.name			= "IR-IO-APIC",
	.irq_startup		= startup_ioapic_irq,
	.irq_mask		= mask_ioapic_irq,
	.irq_unmask		= unmask_ioapic_irq,
2590
#ifdef CONFIG_INTR_REMAP
2591 2592
	.irq_ack		= ir_ack_apic_edge,
	.irq_eoi		= ir_ack_apic_level,
2593
#ifdef CONFIG_SMP
2594
	.irq_set_affinity	= ir_ioapic_set_affinity,
2595
#endif
2596
#endif
2597
	.irq_retrigger		= ioapic_retrigger_irq,
2598
};
L
Linus Torvalds 已提交
2599 2600 2601

static inline void init_IO_APIC_traps(void)
{
2602
	struct irq_cfg *cfg;
T
Thomas Gleixner 已提交
2603
	unsigned int irq;
L
Linus Torvalds 已提交
2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615

	/*
	 * NOTE! The local APIC isn't very good at handling
	 * multiple interrupts at the same interrupt level.
	 * As the interrupt level is determined by taking the
	 * vector number and shifting that right by 4, we
	 * want to spread these out a bit so that they don't
	 * all fall in the same interrupt level.
	 *
	 * Also, we've got to be careful not to trash gate
	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
	 */
T
Thomas Gleixner 已提交
2616 2617
	for_each_active_irq(irq) {
		cfg = get_irq_chip_data(irq);
2618
		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
L
Linus Torvalds 已提交
2619 2620 2621 2622 2623
			/*
			 * Hmm.. We don't have an entry for this,
			 * so default to an old-fashioned 8259
			 * interrupt if we can..
			 */
2624 2625
			if (irq < legacy_pic->nr_legacy_irqs)
				legacy_pic->make_irq(irq);
2626
			else
L
Linus Torvalds 已提交
2627
				/* Strange. Oh, well.. */
T
Thomas Gleixner 已提交
2628
				set_irq_chip(irq, &no_irq_chip);
L
Linus Torvalds 已提交
2629 2630 2631 2632
		}
	}
}

2633 2634 2635
/*
 * The local APIC irq-chip implementation:
 */
L
Linus Torvalds 已提交
2636

2637
static void mask_lapic_irq(struct irq_data *data)
L
Linus Torvalds 已提交
2638 2639 2640 2641
{
	unsigned long v;

	v = apic_read(APIC_LVT0);
2642
	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
L
Linus Torvalds 已提交
2643 2644
}

2645
static void unmask_lapic_irq(struct irq_data *data)
L
Linus Torvalds 已提交
2646
{
2647
	unsigned long v;
L
Linus Torvalds 已提交
2648

2649
	v = apic_read(APIC_LVT0);
2650
	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
2651
}
L
Linus Torvalds 已提交
2652

2653
static void ack_lapic_irq(struct irq_data *data)
2654 2655 2656 2657
{
	ack_APIC_irq();
}

2658
static struct irq_chip lapic_chip __read_mostly = {
2659
	.name		= "local-APIC",
2660 2661 2662
	.irq_mask	= mask_lapic_irq,
	.irq_unmask	= unmask_lapic_irq,
	.irq_ack	= ack_lapic_irq,
L
Linus Torvalds 已提交
2663 2664
};

2665
static void lapic_register_intr(int irq)
2666
{
2667
	irq_clear_status_flags(irq, IRQ_LEVEL);
2668 2669 2670 2671
	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
				      "edge");
}

L
Linus Torvalds 已提交
2672 2673 2674 2675 2676 2677 2678
/*
 * This looks a bit hackish but it's about the only one way of sending
 * a few INTA cycles to 8259As and any associated glue logic.  ICR does
 * not support the ExtINT mode, unfortunately.  We need to send these
 * cycles as some i82489DX-based boards have glue logic that keeps the
 * 8259A interrupt line asserted until INTA.  --macro
 */
2679
static inline void __init unlock_ExtINT_logic(void)
L
Linus Torvalds 已提交
2680
{
2681
	int apic, pin, i;
L
Linus Torvalds 已提交
2682 2683 2684
	struct IO_APIC_route_entry entry0, entry1;
	unsigned char save_control, save_freq_select;

2685
	pin  = find_isa_irq_pin(8, mp_INT);
2686 2687 2688 2689
	if (pin == -1) {
		WARN_ON_ONCE(1);
		return;
	}
2690
	apic = find_isa_irq_apic(8, mp_INT);
2691 2692
	if (apic == -1) {
		WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
2693
		return;
2694
	}
L
Linus Torvalds 已提交
2695

2696
	entry0 = ioapic_read_entry(apic, pin);
2697
	clear_IO_APIC_pin(apic, pin);
L
Linus Torvalds 已提交
2698 2699 2700 2701 2702

	memset(&entry1, 0, sizeof(entry1));

	entry1.dest_mode = 0;			/* physical delivery */
	entry1.mask = 0;			/* unmask IRQ now */
2703
	entry1.dest = hard_smp_processor_id();
L
Linus Torvalds 已提交
2704 2705 2706 2707 2708
	entry1.delivery_mode = dest_ExtINT;
	entry1.polarity = entry0.polarity;
	entry1.trigger = 0;
	entry1.vector = 0;

2709
	ioapic_write_entry(apic, pin, entry1);
L
Linus Torvalds 已提交
2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725

	save_control = CMOS_READ(RTC_CONTROL);
	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
		   RTC_FREQ_SELECT);
	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);

	i = 100;
	while (i-- > 0) {
		mdelay(10);
		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
			i -= 10;
	}

	CMOS_WRITE(save_control, RTC_CONTROL);
	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2726
	clear_IO_APIC_pin(apic, pin);
L
Linus Torvalds 已提交
2727

2728
	ioapic_write_entry(apic, pin, entry0);
L
Linus Torvalds 已提交
2729 2730
}

Y
Yinghai Lu 已提交
2731
static int disable_timer_pin_1 __initdata;
2732
/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
2733
static int __init disable_timer_pin_setup(char *arg)
Y
Yinghai Lu 已提交
2734 2735 2736 2737
{
	disable_timer_pin_1 = 1;
	return 0;
}
2738
early_param("disable_timer_pin_1", disable_timer_pin_setup);
Y
Yinghai Lu 已提交
2739 2740 2741

int timer_through_8259 __initdata;

L
Linus Torvalds 已提交
2742 2743 2744 2745 2746
/*
 * This code may look a bit paranoid, but it's supposed to cooperate with
 * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
 * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
 * fanatically on his truly buggy board.
2747 2748
 *
 * FIXME: really need to revamp this for all platforms.
L
Linus Torvalds 已提交
2749
 */
2750
static inline void __init check_timer(void)
L
Linus Torvalds 已提交
2751
{
2752
	struct irq_cfg *cfg = get_irq_chip_data(0);
2753
	int node = cpu_to_node(0);
2754
	int apic1, pin1, apic2, pin2;
2755
	unsigned long flags;
2756
	int no_pin1 = 0;
2757 2758

	local_irq_save(flags);
2759

L
Linus Torvalds 已提交
2760 2761 2762
	/*
	 * get/set the timer IRQ vector:
	 */
2763
	legacy_pic->mask(0);
2764
	assign_irq_vector(0, cfg, apic->target_cpus());
L
Linus Torvalds 已提交
2765 2766

	/*
2767 2768 2769 2770 2771 2772 2773
	 * As IRQ0 is to be enabled in the 8259A, the virtual
	 * wire has to be disabled in the local APIC.  Also
	 * timer interrupts need to be acknowledged manually in
	 * the 8259A for the i82489DX when using the NMI
	 * watchdog as that APIC treats NMIs as level-triggered.
	 * The AEOI mode will finish them in the 8259A
	 * automatically.
L
Linus Torvalds 已提交
2774
	 */
2775
	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2776
	legacy_pic->init(1);
L
Linus Torvalds 已提交
2777

2778 2779 2780 2781
	pin1  = find_isa_irq_pin(0, mp_INT);
	apic1 = find_isa_irq_apic(0, mp_INT);
	pin2  = ioapic_i8259.pin;
	apic2 = ioapic_i8259.apic;
L
Linus Torvalds 已提交
2782

2783 2784
	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
2785
		    cfg->vector, apic1, pin1, apic2, pin2);
L
Linus Torvalds 已提交
2786

2787 2788 2789 2790 2791 2792 2793 2794
	/*
	 * Some BIOS writers are clueless and report the ExtINTA
	 * I/O APIC input from the cascaded 8259A as the timer
	 * interrupt input.  So just in case, if only one pin
	 * was found above, try it both directly and through the
	 * 8259A.
	 */
	if (pin1 == -1) {
2795 2796
		if (intr_remapping_enabled)
			panic("BIOS bug: timer not connected to IO-APIC");
2797 2798 2799 2800 2801 2802 2803 2804
		pin1 = pin2;
		apic1 = apic2;
		no_pin1 = 1;
	} else if (pin2 == -1) {
		pin2 = pin1;
		apic2 = apic1;
	}

L
Linus Torvalds 已提交
2805 2806 2807 2808
	if (pin1 != -1) {
		/*
		 * Ok, does IRQ0 through the IOAPIC work?
		 */
2809
		if (no_pin1) {
2810
			add_pin_to_irq_node(cfg, node, apic1, pin1);
2811
			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
Y
Yinghai Lu 已提交
2812
		} else {
2813
			/* for edge trigger, setup_ioapic_irq already
Y
Yinghai Lu 已提交
2814 2815 2816 2817 2818 2819 2820
			 * leave it unmasked.
			 * so only need to unmask if it is level-trigger
			 * do we really have level trigger timer?
			 */
			int idx;
			idx = find_irq_entry(apic1, pin1, mp_INT);
			if (idx != -1 && irq_trigger(idx))
T
Thomas Gleixner 已提交
2821
				unmask_ioapic(cfg);
2822
		}
L
Linus Torvalds 已提交
2823
		if (timer_irq_works()) {
2824 2825
			if (disable_timer_pin_1 > 0)
				clear_IO_APIC_pin(0, pin1);
2826
			goto out;
L
Linus Torvalds 已提交
2827
		}
2828 2829
		if (intr_remapping_enabled)
			panic("timer doesn't work through Interrupt-remapped IO-APIC");
Y
Yinghai Lu 已提交
2830
		local_irq_disable();
2831
		clear_IO_APIC_pin(apic1, pin1);
2832
		if (!no_pin1)
2833 2834
			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
				    "8254 timer not connected to IO-APIC\n");
L
Linus Torvalds 已提交
2835

2836 2837 2838 2839
		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
			    "(IRQ0) through the 8259A ...\n");
		apic_printk(APIC_QUIET, KERN_INFO
			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
L
Linus Torvalds 已提交
2840 2841 2842
		/*
		 * legacy devices should be connected to IO APIC #0
		 */
2843
		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
2844
		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2845
		legacy_pic->unmask(0);
L
Linus Torvalds 已提交
2846
		if (timer_irq_works()) {
2847
			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
2848
			timer_through_8259 = 1;
2849
			goto out;
L
Linus Torvalds 已提交
2850 2851 2852 2853
		}
		/*
		 * Cleanup, just in case ...
		 */
Y
Yinghai Lu 已提交
2854
		local_irq_disable();
2855
		legacy_pic->mask(0);
2856
		clear_IO_APIC_pin(apic2, pin2);
2857
		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
L
Linus Torvalds 已提交
2858 2859
	}

2860 2861
	apic_printk(APIC_QUIET, KERN_INFO
		    "...trying to set up timer as Virtual Wire IRQ...\n");
L
Linus Torvalds 已提交
2862

2863
	lapic_register_intr(0);
2864
	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
2865
	legacy_pic->unmask(0);
L
Linus Torvalds 已提交
2866 2867

	if (timer_irq_works()) {
2868
		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2869
		goto out;
L
Linus Torvalds 已提交
2870
	}
Y
Yinghai Lu 已提交
2871
	local_irq_disable();
2872
	legacy_pic->mask(0);
2873
	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
2874
	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
L
Linus Torvalds 已提交
2875

2876 2877
	apic_printk(APIC_QUIET, KERN_INFO
		    "...trying to set up timer as ExtINT IRQ...\n");
L
Linus Torvalds 已提交
2878

2879 2880
	legacy_pic->init(0);
	legacy_pic->make_irq(0);
2881
	apic_write(APIC_LVT0, APIC_DM_EXTINT);
L
Linus Torvalds 已提交
2882 2883 2884 2885

	unlock_ExtINT_logic();

	if (timer_irq_works()) {
2886
		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
2887
		goto out;
L
Linus Torvalds 已提交
2888
	}
Y
Yinghai Lu 已提交
2889
	local_irq_disable();
2890
	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
L
Linus Torvalds 已提交
2891
	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
2892
		"report.  Then try booting with the 'noapic' option.\n");
2893 2894
out:
	local_irq_restore(flags);
L
Linus Torvalds 已提交
2895 2896 2897
}

/*
2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912
 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
 * to devices.  However there may be an I/O APIC pin available for
 * this interrupt regardless.  The pin may be left unconnected, but
 * typically it will be reused as an ExtINT cascade interrupt for
 * the master 8259A.  In the MPS case such a pin will normally be
 * reported as an ExtINT interrupt in the MP table.  With ACPI
 * there is no provision for ExtINT interrupts, and in the absence
 * of an override it would be treated as an ordinary ISA I/O APIC
 * interrupt, that is edge-triggered and unmasked by default.  We
 * used to do this, but it caused problems on some systems because
 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
 * the same ExtINT cascade interrupt to drive the local APIC of the
 * bootstrap processor.  Therefore we refrain from routing IRQ2 to
 * the I/O APIC in all cases now.  No actual device should request
 * it anyway.  --macro
L
Linus Torvalds 已提交
2913
 */
2914
#define PIC_IRQS	(1UL << PIC_CASCADE_IR)
L
Linus Torvalds 已提交
2915 2916 2917

void __init setup_IO_APIC(void)
{
2918 2919 2920 2921

	/*
	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
	 */
2922
	io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
L
Linus Torvalds 已提交
2923

2924
	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
T
Thomas Gleixner 已提交
2925
	/*
2926 2927
         * Set up IO-APIC IRQ routing.
         */
2928 2929
	x86_init.mpparse.setup_ioapic_ids();

L
Linus Torvalds 已提交
2930 2931 2932
	sync_Arb_IDs();
	setup_IO_APIC_irqs();
	init_IO_APIC_traps();
2933
	if (legacy_pic->nr_legacy_irqs)
2934
		check_timer();
L
Linus Torvalds 已提交
2935 2936 2937
}

/*
2938 2939
 *      Called after all the initialization is done. If we didnt find any
 *      APIC bugs then we can allow the modify fast path
L
Linus Torvalds 已提交
2940
 */
2941

L
Linus Torvalds 已提交
2942 2943
static int __init io_apic_bug_finalize(void)
{
T
Thomas Gleixner 已提交
2944 2945 2946
	if (sis_apic_bug == -1)
		sis_apic_bug = 0;
	return 0;
L
Linus Torvalds 已提交
2947 2948 2949 2950 2951 2952 2953 2954
}

late_initcall(io_apic_bug_finalize);

struct sysfs_ioapic_data {
	struct sys_device dev;
	struct IO_APIC_route_entry entry[0];
};
2955
static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
L
Linus Torvalds 已提交
2956

2957
static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
L
Linus Torvalds 已提交
2958 2959 2960 2961
{
	struct IO_APIC_route_entry *entry;
	struct sysfs_ioapic_data *data;
	int i;
2962

L
Linus Torvalds 已提交
2963 2964
	data = container_of(dev, struct sysfs_ioapic_data, dev);
	entry = data->entry;
2965 2966
	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
		*entry = ioapic_read_entry(dev->id, i);
L
Linus Torvalds 已提交
2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977

	return 0;
}

static int ioapic_resume(struct sys_device *dev)
{
	struct IO_APIC_route_entry *entry;
	struct sysfs_ioapic_data *data;
	unsigned long flags;
	union IO_APIC_reg_00 reg_00;
	int i;
2978

L
Linus Torvalds 已提交
2979 2980 2981
	data = container_of(dev, struct sysfs_ioapic_data, dev);
	entry = data->entry;

2982
	raw_spin_lock_irqsave(&ioapic_lock, flags);
L
Linus Torvalds 已提交
2983
	reg_00.raw = io_apic_read(dev->id, 0);
2984 2985
	if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
		reg_00.bits.ID = mp_ioapics[dev->id].apicid;
L
Linus Torvalds 已提交
2986 2987
		io_apic_write(dev->id, 0, reg_00.raw);
	}
2988
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
2989
	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
2990
		ioapic_write_entry(dev->id, i, entry[i]);
L
Linus Torvalds 已提交
2991 2992 2993 2994 2995

	return 0;
}

static struct sysdev_class ioapic_sysdev_class = {
2996
	.name = "ioapic",
L
Linus Torvalds 已提交
2997 2998 2999 3000 3001 3002
	.suspend = ioapic_suspend,
	.resume = ioapic_resume,
};

static int __init ioapic_init_sysfs(void)
{
3003 3004
	struct sys_device * dev;
	int i, size, error;
L
Linus Torvalds 已提交
3005 3006 3007 3008 3009

	error = sysdev_class_register(&ioapic_sysdev_class);
	if (error)
		return error;

3010
	for (i = 0; i < nr_ioapics; i++ ) {
3011
		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
L
Linus Torvalds 已提交
3012
			* sizeof(struct IO_APIC_route_entry);
3013
		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
3014 3015 3016 3017 3018
		if (!mp_ioapic_data[i]) {
			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
			continue;
		}
		dev = &mp_ioapic_data[i]->dev;
3019
		dev->id = i;
L
Linus Torvalds 已提交
3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034
		dev->cls = &ioapic_sysdev_class;
		error = sysdev_register(dev);
		if (error) {
			kfree(mp_ioapic_data[i]);
			mp_ioapic_data[i] = NULL;
			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
			continue;
		}
	}

	return 0;
}

device_initcall(ioapic_init_sysfs);

3035
/*
3036
 * Dynamic irq allocate and deallocation
3037
 */
3038
unsigned int create_irq_nr(unsigned int from, int node)
3039
{
3040
	struct irq_cfg *cfg;
3041
	unsigned long flags;
3042 3043
	unsigned int ret = 0;
	int irq;
3044

3045 3046
	if (from < nr_irqs_gsi)
		from = nr_irqs_gsi;
3047

3048 3049 3050 3051 3052 3053 3054
	irq = alloc_irq_from(from, node);
	if (irq < 0)
		return 0;
	cfg = alloc_irq_cfg(irq, node);
	if (!cfg) {
		free_irq_at(irq, NULL);
		return 0;
3055
	}
3056

3057 3058 3059 3060
	raw_spin_lock_irqsave(&vector_lock, flags);
	if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
		ret = irq;
	raw_spin_unlock_irqrestore(&vector_lock, flags);
3061

3062 3063 3064 3065 3066 3067 3068
	if (ret) {
		set_irq_chip_data(irq, cfg);
		irq_clear_status_flags(irq, IRQ_NOREQUEST);
	} else {
		free_irq_at(irq, cfg);
	}
	return ret;
3069 3070
}

Y
Yinghai Lu 已提交
3071 3072
int create_irq(void)
{
3073
	int node = cpu_to_node(0);
3074
	unsigned int irq_want;
3075 3076
	int irq;

3077
	irq_want = nr_irqs_gsi;
3078
	irq = create_irq_nr(irq_want, node);
3079 3080 3081 3082 3083

	if (irq == 0)
		irq = -1;

	return irq;
Y
Yinghai Lu 已提交
3084 3085
}

3086 3087
void destroy_irq(unsigned int irq)
{
3088
	struct irq_cfg *cfg = get_irq_chip_data(irq);
3089 3090
	unsigned long flags;

3091
	irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
3092

3093
	if (irq_remapped(cfg))
3094
		free_irte(irq);
3095
	raw_spin_lock_irqsave(&vector_lock, flags);
3096
	__clear_irq_vector(irq, cfg);
3097
	raw_spin_unlock_irqrestore(&vector_lock, flags);
3098
	free_irq_at(irq, cfg);
3099 3100
}

3101
/*
S
Simon Arlott 已提交
3102
 * MSI message composition
3103 3104
 */
#ifdef CONFIG_PCI_MSI
3105 3106
static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
			   struct msi_msg *msg, u8 hpet_id)
3107
{
3108 3109
	struct irq_cfg *cfg;
	int err;
3110 3111
	unsigned dest;

J
Jan Beulich 已提交
3112 3113 3114
	if (disable_apic)
		return -ENXIO;

Y
Yinghai Lu 已提交
3115
	cfg = irq_cfg(irq);
3116
	err = assign_irq_vector(irq, cfg, apic->target_cpus());
3117 3118
	if (err)
		return err;
3119

3120
	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3121

3122
	if (irq_remapped(get_irq_chip_data(irq))) {
3123 3124 3125 3126 3127 3128 3129
		struct irte irte;
		int ir_index;
		u16 sub_handle;

		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
		BUG_ON(ir_index == -1);

3130
		prepare_irte(&irte, cfg->vector, dest);
3131

3132
		/* Set source-id of interrupt request */
3133 3134 3135 3136
		if (pdev)
			set_msi_sid(&irte, pdev);
		else
			set_hpet_sid(&irte, hpet_id);
3137

3138 3139 3140 3141 3142 3143 3144 3145
		modify_irte(irq, &irte);

		msg->address_hi = MSI_ADDR_BASE_HI;
		msg->data = sub_handle;
		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
				  MSI_ADDR_IR_SHV |
				  MSI_ADDR_IR_INDEX1(ir_index) |
				  MSI_ADDR_IR_INDEX2(ir_index);
3146
	} else {
3147 3148 3149 3150 3151 3152
		if (x2apic_enabled())
			msg->address_hi = MSI_ADDR_BASE_HI |
					  MSI_ADDR_EXT_DEST_ID(dest);
		else
			msg->address_hi = MSI_ADDR_BASE_HI;

3153 3154
		msg->address_lo =
			MSI_ADDR_BASE_LO |
3155
			((apic->irq_dest_mode == 0) ?
3156 3157
				MSI_ADDR_DEST_MODE_PHYSICAL:
				MSI_ADDR_DEST_MODE_LOGICAL) |
3158
			((apic->irq_delivery_mode != dest_LowestPrio) ?
3159 3160 3161
				MSI_ADDR_REDIRECTION_CPU:
				MSI_ADDR_REDIRECTION_LOWPRI) |
			MSI_ADDR_DEST_ID(dest);
3162

3163 3164 3165
		msg->data =
			MSI_DATA_TRIGGER_EDGE |
			MSI_DATA_LEVEL_ASSERT |
3166
			((apic->irq_delivery_mode != dest_LowestPrio) ?
3167 3168 3169 3170
				MSI_DATA_DELIVERY_FIXED:
				MSI_DATA_DELIVERY_LOWPRI) |
			MSI_DATA_VECTOR(cfg->vector);
	}
3171
	return err;
3172 3173
}

3174
#ifdef CONFIG_SMP
3175 3176
static int
msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3177
{
3178
	struct irq_cfg *cfg = data->chip_data;
3179 3180 3181
	struct msi_msg msg;
	unsigned int dest;

3182
	if (__ioapic_set_affinity(data, mask, &dest))
3183
		return -1;
3184

3185
	__get_cached_msi_msg(data->msi_desc, &msg);
3186 3187

	msg.data &= ~MSI_DATA_VECTOR_MASK;
3188
	msg.data |= MSI_DATA_VECTOR(cfg->vector);
3189 3190 3191
	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
	msg.address_lo |= MSI_ADDR_DEST_ID(dest);

3192
	__write_msi_msg(data->msi_desc, &msg);
3193 3194

	return 0;
3195
}
3196 3197 3198 3199 3200
#ifdef CONFIG_INTR_REMAP
/*
 * Migrate the MSI irq to another cpumask. This migration is
 * done in the process context using interrupt-remapping hardware.
 */
3201
static int
3202 3203
ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
		    bool force)
3204
{
3205 3206
	struct irq_cfg *cfg = data->chip_data;
	unsigned int dest, irq = data->irq;
3207 3208 3209
	struct irte irte;

	if (get_irte(irq, &irte))
3210
		return -1;
3211

3212
	if (__ioapic_set_affinity(data, mask, &dest))
3213
		return -1;
3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227

	irte.vector = cfg->vector;
	irte.dest_id = IRTE_DEST(dest);

	/*
	 * atomically update the IRTE with the new destination and vector.
	 */
	modify_irte(irq, &irte);

	/*
	 * After this point, all the interrupts will start arriving
	 * at the new destination. So, time to cleanup the previous
	 * vector allocation.
	 */
3228 3229
	if (cfg->move_in_progress)
		send_cleanup_vector(cfg);
3230 3231

	return 0;
3232
}
Y
Yinghai Lu 已提交
3233

3234
#endif
3235
#endif /* CONFIG_SMP */
3236

3237 3238 3239 3240 3241
/*
 * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
 * which implement the MSI or MSI-X Capability Structure.
 */
static struct irq_chip msi_chip = {
3242 3243 3244 3245
	.name			= "PCI-MSI",
	.irq_unmask		= unmask_msi_irq,
	.irq_mask		= mask_msi_irq,
	.irq_ack		= ack_apic_edge,
3246
#ifdef CONFIG_SMP
3247
	.irq_set_affinity	= msi_set_affinity,
3248
#endif
3249
	.irq_retrigger		= ioapic_retrigger_irq,
3250 3251
};

3252
static struct irq_chip msi_ir_chip = {
3253 3254 3255
	.name			= "IR-PCI-MSI",
	.irq_unmask		= unmask_msi_irq,
	.irq_mask		= mask_msi_irq,
3256
#ifdef CONFIG_INTR_REMAP
3257
	.irq_ack		= ir_ack_apic_edge,
3258
#ifdef CONFIG_SMP
3259
	.irq_set_affinity	= ir_msi_set_affinity,
3260
#endif
3261
#endif
3262
	.irq_retrigger		= ioapic_retrigger_irq,
3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285
};

/*
 * Map the PCI dev to the corresponding remapping hardware unit
 * and allocate 'nvec' consecutive interrupt-remapping table entries
 * in it.
 */
static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
{
	struct intel_iommu *iommu;
	int index;

	iommu = map_dev_to_ir(dev);
	if (!iommu) {
		printk(KERN_ERR
		       "Unable to map PCI %s to iommu\n", pci_name(dev));
		return -ENOENT;
	}

	index = alloc_irte(iommu, irq, nvec);
	if (index < 0) {
		printk(KERN_ERR
		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
T
Thomas Gleixner 已提交
3286
		       pci_name(dev));
3287 3288 3289 3290
		return -ENOSPC;
	}
	return index;
}
3291

Y
Yinghai Lu 已提交
3292
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3293 3294
{
	struct msi_msg msg;
3295
	int ret;
3296

3297
	ret = msi_compose_msg(dev, irq, &msg, -1);
3298 3299 3300
	if (ret < 0)
		return ret;

Y
Yinghai Lu 已提交
3301
	set_irq_msi(irq, msidesc);
3302 3303
	write_msi_msg(irq, &msg);

3304
	if (irq_remapped(get_irq_chip_data(irq))) {
3305
		irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3306 3307 3308
		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
	} else
		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
3309

Y
Yinghai Lu 已提交
3310 3311
	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);

3312 3313 3314
	return 0;
}

S
Stefano Stabellini 已提交
3315
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3316
{
3317 3318
	int node, ret, sub_handle, index = 0;
	unsigned int irq, irq_want;
3319
	struct msi_desc *msidesc;
3320
	struct intel_iommu *iommu = NULL;
3321

3322 3323 3324 3325
	/* x86 doesn't support multiple MSI yet */
	if (type == PCI_CAP_ID_MSI && nvec > 1)
		return 1;

3326
	node = dev_to_node(&dev->dev);
3327
	irq_want = nr_irqs_gsi;
3328
	sub_handle = 0;
3329
	list_for_each_entry(msidesc, &dev->msi_list, list) {
3330
		irq = create_irq_nr(irq_want, node);
3331 3332
		if (irq == 0)
			return -1;
Y
Yinghai Lu 已提交
3333
		irq_want = irq + 1;
3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360
		if (!intr_remapping_enabled)
			goto no_ir;

		if (!sub_handle) {
			/*
			 * allocate the consecutive block of IRTE's
			 * for 'nvec'
			 */
			index = msi_alloc_irte(dev, irq, nvec);
			if (index < 0) {
				ret = index;
				goto error;
			}
		} else {
			iommu = map_dev_to_ir(dev);
			if (!iommu) {
				ret = -ENOENT;
				goto error;
			}
			/*
			 * setup the mapping between the irq and the IRTE
			 * base index, the sub_handle pointing to the
			 * appropriate interrupt remap table entry.
			 */
			set_irte_irq(irq, iommu, index, sub_handle);
		}
no_ir:
3361
		ret = setup_msi_irq(dev, msidesc, irq);
3362 3363 3364 3365 3366
		if (ret < 0)
			goto error;
		sub_handle++;
	}
	return 0;
3367 3368

error:
3369 3370
	destroy_irq(irq);
	return ret;
3371 3372
}

S
Stefano Stabellini 已提交
3373
void native_teardown_msi_irq(unsigned int irq)
3374
{
3375
	destroy_irq(irq);
3376 3377
}

3378
#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
3379
#ifdef CONFIG_SMP
3380 3381 3382
static int
dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
		      bool force)
3383
{
3384 3385
	struct irq_cfg *cfg = data->chip_data;
	unsigned int dest, irq = data->irq;
3386 3387
	struct msi_msg msg;

3388
	if (__ioapic_set_affinity(data, mask, &dest))
3389
		return -1;
3390 3391 3392 3393 3394 3395 3396

	dmar_msi_read(irq, &msg);

	msg.data &= ~MSI_DATA_VECTOR_MASK;
	msg.data |= MSI_DATA_VECTOR(cfg->vector);
	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
3397
	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
3398 3399

	dmar_msi_write(irq, &msg);
3400 3401

	return 0;
3402
}
Y
Yinghai Lu 已提交
3403

3404 3405
#endif /* CONFIG_SMP */

3406
static struct irq_chip dmar_msi_type = {
3407 3408 3409 3410
	.name			= "DMAR_MSI",
	.irq_unmask		= dmar_msi_unmask,
	.irq_mask		= dmar_msi_mask,
	.irq_ack		= ack_apic_edge,
3411
#ifdef CONFIG_SMP
3412
	.irq_set_affinity	= dmar_msi_set_affinity,
3413
#endif
3414
	.irq_retrigger		= ioapic_retrigger_irq,
3415 3416 3417 3418 3419 3420
};

int arch_setup_dmar_msi(unsigned int irq)
{
	int ret;
	struct msi_msg msg;
3421

3422
	ret = msi_compose_msg(NULL, irq, &msg, -1);
3423 3424 3425 3426 3427 3428 3429 3430 3431
	if (ret < 0)
		return ret;
	dmar_msi_write(irq, &msg);
	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
		"edge");
	return 0;
}
#endif

3432 3433 3434
#ifdef CONFIG_HPET_TIMER

#ifdef CONFIG_SMP
3435 3436
static int hpet_msi_set_affinity(struct irq_data *data,
				 const struct cpumask *mask, bool force)
3437
{
3438
	struct irq_cfg *cfg = data->chip_data;
3439 3440 3441
	struct msi_msg msg;
	unsigned int dest;

3442
	if (__ioapic_set_affinity(data, mask, &dest))
3443
		return -1;
3444

3445
	hpet_msi_read(data->handler_data, &msg);
3446 3447 3448 3449 3450 3451

	msg.data &= ~MSI_DATA_VECTOR_MASK;
	msg.data |= MSI_DATA_VECTOR(cfg->vector);
	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
	msg.address_lo |= MSI_ADDR_DEST_ID(dest);

3452
	hpet_msi_write(data->handler_data, &msg);
3453 3454

	return 0;
3455
}
Y
Yinghai Lu 已提交
3456

3457 3458
#endif /* CONFIG_SMP */

3459
static struct irq_chip ir_hpet_msi_type = {
3460 3461 3462
	.name			= "IR-HPET_MSI",
	.irq_unmask		= hpet_msi_unmask,
	.irq_mask		= hpet_msi_mask,
3463
#ifdef CONFIG_INTR_REMAP
3464
	.irq_ack		= ir_ack_apic_edge,
3465
#ifdef CONFIG_SMP
3466
	.irq_set_affinity	= ir_msi_set_affinity,
3467 3468
#endif
#endif
3469
	.irq_retrigger		= ioapic_retrigger_irq,
3470 3471
};

3472
static struct irq_chip hpet_msi_type = {
3473
	.name = "HPET_MSI",
3474 3475
	.irq_unmask = hpet_msi_unmask,
	.irq_mask = hpet_msi_mask,
3476
	.irq_ack = ack_apic_edge,
3477
#ifdef CONFIG_SMP
3478
	.irq_set_affinity = hpet_msi_set_affinity,
3479
#endif
3480
	.irq_retrigger = ioapic_retrigger_irq,
3481 3482
};

3483
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
3484 3485
{
	struct msi_msg msg;
3486
	int ret;
3487

3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500
	if (intr_remapping_enabled) {
		struct intel_iommu *iommu = map_hpet_to_ir(id);
		int index;

		if (!iommu)
			return -1;

		index = alloc_irte(iommu, irq, 1);
		if (index < 0)
			return -1;
	}

	ret = msi_compose_msg(NULL, irq, &msg, id);
3501 3502 3503
	if (ret < 0)
		return ret;

3504
	hpet_msi_write(get_irq_data(irq), &msg);
3505
	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
3506
	if (irq_remapped(get_irq_chip_data(irq)))
3507 3508 3509 3510 3511
		set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
					      handle_edge_irq, "edge");
	else
		set_irq_chip_and_handler_name(irq, &hpet_msi_type,
					      handle_edge_irq, "edge");
Y
Yinghai Lu 已提交
3512

3513 3514 3515 3516
	return 0;
}
#endif

3517
#endif /* CONFIG_PCI_MSI */
3518 3519 3520 3521 3522 3523 3524
/*
 * Hypertransport interrupt support
 */
#ifdef CONFIG_HT_IRQ

#ifdef CONFIG_SMP

3525
static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
3526
{
3527 3528
	struct ht_irq_msg msg;
	fetch_ht_irq_msg(irq, &msg);
3529

3530
	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
3531
	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
3532

3533
	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
3534
	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
3535

3536
	write_ht_irq_msg(irq, &msg);
3537 3538
}

3539 3540
static int
ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
3541
{
3542
	struct irq_cfg *cfg = data->chip_data;
3543 3544
	unsigned int dest;

3545
	if (__ioapic_set_affinity(data, mask, &dest))
3546
		return -1;
3547

3548
	target_ht_irq(data->irq, dest, cfg->vector);
3549
	return 0;
3550
}
Y
Yinghai Lu 已提交
3551

3552 3553
#endif

3554
static struct irq_chip ht_irq_chip = {
3555 3556 3557 3558
	.name			= "PCI-HT",
	.irq_mask		= mask_ht_irq,
	.irq_unmask		= unmask_ht_irq,
	.irq_ack		= ack_apic_edge,
3559
#ifdef CONFIG_SMP
3560
	.irq_set_affinity	= ht_set_affinity,
3561
#endif
3562
	.irq_retrigger		= ioapic_retrigger_irq,
3563 3564 3565 3566
};

int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
{
3567 3568
	struct irq_cfg *cfg;
	int err;
3569

J
Jan Beulich 已提交
3570 3571 3572
	if (disable_apic)
		return -ENXIO;

Y
Yinghai Lu 已提交
3573
	cfg = irq_cfg(irq);
3574
	err = assign_irq_vector(irq, cfg, apic->target_cpus());
3575
	if (!err) {
3576
		struct ht_irq_msg msg;
3577 3578
		unsigned dest;

3579 3580
		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
						    apic->target_cpus());
3581

3582
		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3583

3584 3585
		msg.address_lo =
			HT_IRQ_LOW_BASE |
3586
			HT_IRQ_LOW_DEST_ID(dest) |
3587
			HT_IRQ_LOW_VECTOR(cfg->vector) |
3588
			((apic->irq_dest_mode == 0) ?
3589 3590 3591
				HT_IRQ_LOW_DM_PHYSICAL :
				HT_IRQ_LOW_DM_LOGICAL) |
			HT_IRQ_LOW_RQEOI_EDGE |
3592
			((apic->irq_delivery_mode != dest_LowestPrio) ?
3593 3594 3595 3596
				HT_IRQ_LOW_MT_FIXED :
				HT_IRQ_LOW_MT_ARBITRATED) |
			HT_IRQ_LOW_IRQ_MASKED;

3597
		write_ht_irq_msg(irq, &msg);
3598

3599 3600
		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
					      handle_edge_irq, "edge");
Y
Yinghai Lu 已提交
3601 3602

		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
3603
	}
3604
	return err;
3605 3606 3607
}
#endif /* CONFIG_HT_IRQ */

3608 3609 3610 3611 3612
int __init io_apic_get_redir_entries (int ioapic)
{
	union IO_APIC_reg_01	reg_01;
	unsigned long flags;

3613
	raw_spin_lock_irqsave(&ioapic_lock, flags);
3614
	reg_01.raw = io_apic_read(ioapic, 1);
3615
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
3616

3617 3618 3619 3620 3621
	/* The register returns the maximum index redir index
	 * supported, which is one less than the total number of redir
	 * entries.
	 */
	return reg_01.bits.entries + 1;
3622 3623
}

3624
static void __init probe_nr_irqs_gsi(void)
3625
{
3626
	int nr;
3627

3628
	nr = gsi_top + NR_IRQS_LEGACY;
3629
	if (nr > nr_irqs_gsi)
3630
		nr_irqs_gsi = nr;
3631 3632

	printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3633 3634
}

3635 3636 3637 3638 3639
int get_nr_irqs_gsi(void)
{
	return nr_irqs_gsi;
}

Y
Yinghai Lu 已提交
3640 3641 3642 3643 3644
#ifdef CONFIG_SPARSE_IRQ
int __init arch_probe_nr_irqs(void)
{
	int nr;

Y
Yinghai Lu 已提交
3645 3646
	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
		nr_irqs = NR_VECTORS * nr_cpu_ids;
Y
Yinghai Lu 已提交
3647

Y
Yinghai Lu 已提交
3648 3649 3650 3651 3652 3653 3654 3655
	nr = nr_irqs_gsi + 8 * nr_cpu_ids;
#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
	/*
	 * for MSI and HT dyn irq
	 */
	nr += nr_irqs_gsi * 16;
#endif
	if (nr < nr_irqs)
Y
Yinghai Lu 已提交
3656 3657
		nr_irqs = nr;

3658
	return NR_IRQS_LEGACY;
Y
Yinghai Lu 已提交
3659 3660 3661
}
#endif

3662 3663
static int __io_apic_set_pci_routing(struct device *dev, int irq,
				struct io_apic_irq_attr *irq_attr)
3664 3665 3666
{
	struct irq_cfg *cfg;
	int node;
3667 3668
	int ioapic, pin;
	int trigger, polarity;
3669

3670
	ioapic = irq_attr->ioapic;
3671 3672 3673 3674 3675 3676 3677 3678 3679
	if (!IO_APIC_IRQ(irq)) {
		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
			ioapic);
		return -EINVAL;
	}

	if (dev)
		node = dev_to_node(dev);
	else
3680
		node = cpu_to_node(0);
3681

3682 3683
	cfg = alloc_irq_and_cfg_at(irq, node);
	if (!cfg)
3684 3685
		return 0;

3686 3687 3688 3689
	pin = irq_attr->ioapic_pin;
	trigger = irq_attr->trigger;
	polarity = irq_attr->polarity;

3690 3691 3692
	/*
	 * IRQs < 16 are already in the irq_2_pin[] map
	 */
3693
	if (irq >= legacy_pic->nr_legacy_irqs) {
T
Thomas Gleixner 已提交
3694
		if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
3695 3696 3697 3698
			printk(KERN_INFO "can not add pin %d for irq %d\n",
				pin, irq);
			return 0;
		}
3699 3700
	}

3701
	setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
3702 3703 3704 3705

	return 0;
}

3706 3707
int io_apic_set_pci_routing(struct device *dev, int irq,
				struct io_apic_irq_attr *irq_attr)
3708
{
3709
	int ioapic, pin;
3710 3711 3712 3713 3714
	/*
	 * Avoid pin reprogramming.  PRTs typically include entries
	 * with redundant pin->gsi mappings (but unique PCI devices);
	 * we only program the IOAPIC on the first.
	 */
3715 3716
	ioapic = irq_attr->ioapic;
	pin = irq_attr->ioapic_pin;
3717 3718 3719 3720 3721 3722 3723
	if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
		pr_debug("Pin %d-%d already programmed\n",
			 mp_ioapics[ioapic].apicid, pin);
		return 0;
	}
	set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);

3724
	return __io_apic_set_pci_routing(dev, irq, irq_attr);
3725 3726
}

3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737
u8 __init io_apic_unique_id(u8 id)
{
#ifdef CONFIG_X86_32
	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
		return io_apic_get_unique_id(nr_ioapics, id);
	else
		return id;
#else
	int i;
	DECLARE_BITMAP(used, 256);
L
Linus Torvalds 已提交
3738

3739 3740 3741 3742 3743 3744 3745 3746 3747 3748
	bitmap_zero(used, 256);
	for (i = 0; i < nr_ioapics; i++) {
		struct mpc_ioapic *ia = &mp_ioapics[i];
		__set_bit(ia->apicid, used);
	}
	if (!test_bit(id, used))
		return id;
	return find_first_zero_bit(used, 256);
#endif
}
L
Linus Torvalds 已提交
3749

3750
#ifdef CONFIG_X86_32
3751
int __init io_apic_get_unique_id(int ioapic, int apic_id)
L
Linus Torvalds 已提交
3752 3753 3754 3755 3756 3757 3758 3759
{
	union IO_APIC_reg_00 reg_00;
	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
	physid_mask_t tmp;
	unsigned long flags;
	int i = 0;

	/*
3760 3761
	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
L
Linus Torvalds 已提交
3762
	 * supports up to 16 on one shared APIC bus.
3763
	 *
L
Linus Torvalds 已提交
3764 3765 3766 3767 3768
	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
	 *      advantage of new APIC bus architecture.
	 */

	if (physids_empty(apic_id_map))
3769
		apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
L
Linus Torvalds 已提交
3770

3771
	raw_spin_lock_irqsave(&ioapic_lock, flags);
L
Linus Torvalds 已提交
3772
	reg_00.raw = io_apic_read(ioapic, 0);
3773
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
L
Linus Torvalds 已提交
3774 3775 3776 3777 3778 3779 3780 3781

	if (apic_id >= get_physical_broadcast()) {
		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
			"%d\n", ioapic, apic_id, reg_00.bits.ID);
		apic_id = reg_00.bits.ID;
	}

	/*
3782
	 * Every APIC in a system must have a unique ID or we get lots of nice
L
Linus Torvalds 已提交
3783 3784
	 * 'stuck on smp_invalidate_needed IPI wait' messages.
	 */
3785
	if (apic->check_apicid_used(&apic_id_map, apic_id)) {
L
Linus Torvalds 已提交
3786 3787

		for (i = 0; i < get_physical_broadcast(); i++) {
3788
			if (!apic->check_apicid_used(&apic_id_map, i))
L
Linus Torvalds 已提交
3789 3790 3791 3792 3793 3794 3795 3796 3797 3798
				break;
		}

		if (i == get_physical_broadcast())
			panic("Max apic_id exceeded!\n");

		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
			"trying %d\n", ioapic, apic_id, i);

		apic_id = i;
3799
	}
L
Linus Torvalds 已提交
3800

3801
	apic->apicid_to_cpu_present(apic_id, &tmp);
L
Linus Torvalds 已提交
3802 3803 3804 3805 3806
	physids_or(apic_id_map, apic_id_map, tmp);

	if (reg_00.bits.ID != apic_id) {
		reg_00.bits.ID = apic_id;

3807
		raw_spin_lock_irqsave(&ioapic_lock, flags);
L
Linus Torvalds 已提交
3808 3809
		io_apic_write(ioapic, 0, reg_00.raw);
		reg_00.raw = io_apic_read(ioapic, 0);
3810
		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
L
Linus Torvalds 已提交
3811 3812

		/* Sanity check */
3813 3814 3815 3816
		if (reg_00.bits.ID != apic_id) {
			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
			return -1;
		}
L
Linus Torvalds 已提交
3817 3818 3819 3820 3821 3822 3823
	}

	apic_printk(APIC_VERBOSE, KERN_INFO
			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);

	return apic_id;
}
3824
#endif
L
Linus Torvalds 已提交
3825

3826
int __init io_apic_get_version(int ioapic)
L
Linus Torvalds 已提交
3827 3828 3829 3830
{
	union IO_APIC_reg_01	reg_01;
	unsigned long flags;

3831
	raw_spin_lock_irqsave(&ioapic_lock, flags);
L
Linus Torvalds 已提交
3832
	reg_01.raw = io_apic_read(ioapic, 1);
3833
	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
L
Linus Torvalds 已提交
3834 3835 3836 3837

	return reg_01.bits.version;
}

3838
int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
3839
{
3840
	int ioapic, pin, idx;
3841 3842 3843 3844

	if (skip_ioapic_setup)
		return -1;

3845 3846
	ioapic = mp_find_ioapic(gsi);
	if (ioapic < 0)
3847 3848
		return -1;

3849 3850 3851 3852 3853 3854
	pin = mp_find_ioapic_pin(ioapic, gsi);
	if (pin < 0)
		return -1;

	idx = find_irq_entry(ioapic, pin, mp_INT);
	if (idx < 0)
3855 3856
		return -1;

3857 3858
	*trigger = irq_trigger(idx);
	*polarity = irq_polarity(idx);
3859 3860 3861
	return 0;
}

3862 3863 3864
/*
 * This function currently is only a helper for the i386 smp boot process where
 * we need to reprogram the ioredtbls to cater for the cpus which have come online
3865
 * so mask in all cases should simply be apic->target_cpus()
3866 3867 3868 3869
 */
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
{
E
Eric W. Biederman 已提交
3870
	int pin, ioapic, irq, irq_entry;
3871
	struct irq_desc *desc;
3872
	const struct cpumask *mask;
3873 3874 3875 3876

	if (skip_ioapic_setup == 1)
		return;

E
Eric W. Biederman 已提交
3877
	for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
3878 3879 3880 3881 3882
	for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
		if (irq_entry == -1)
			continue;
		irq = pin_2_irq(irq_entry, ioapic, pin);
3883

E
Eric W. Biederman 已提交
3884 3885 3886
		if ((ioapic > 0) && (irq > 16))
			continue;

3887
		desc = irq_to_desc(irq);
3888

3889 3890 3891 3892 3893
		/*
		 * Honour affinities which have been set in early boot
		 */
		if (desc->status &
		    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
3894
			mask = desc->irq_data.affinity;
3895 3896
		else
			mask = apic->target_cpus();
3897

3898
		if (intr_remapping_enabled)
3899
			ir_ioapic_set_affinity(&desc->irq_data, mask, false);
3900
		else
3901
			ioapic_set_affinity(&desc->irq_data, mask, false);
3902
	}
3903

3904 3905 3906
}
#endif

3907 3908 3909 3910
#define IOAPIC_RESOURCE_NAME_SIZE 11

static struct resource *ioapic_resources;

3911
static struct resource * __init ioapic_setup_resources(int nr_ioapics)
3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926
{
	unsigned long n;
	struct resource *res;
	char *mem;
	int i;

	if (nr_ioapics <= 0)
		return NULL;

	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
	n *= nr_ioapics;

	mem = alloc_bootmem(n);
	res = (void *)mem;

3927
	mem += sizeof(struct resource) * nr_ioapics;
3928

3929 3930 3931
	for (i = 0; i < nr_ioapics; i++) {
		res[i].name = mem;
		res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
3932
		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
3933
		mem += IOAPIC_RESOURCE_NAME_SIZE;
3934 3935 3936 3937 3938 3939 3940
	}

	ioapic_resources = res;

	return res;
}

3941
void __init ioapic_and_gsi_init(void)
3942 3943
{
	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
3944
	struct resource *ioapic_res;
T
Thomas Gleixner 已提交
3945
	int i;
3946

3947
	ioapic_res = ioapic_setup_resources(nr_ioapics);
3948 3949
	for (i = 0; i < nr_ioapics; i++) {
		if (smp_found_config) {
3950
			ioapic_phys = mp_ioapics[i].apicaddr;
3951
#ifdef CONFIG_X86_32
T
Thomas Gleixner 已提交
3952 3953 3954 3955 3956 3957 3958 3959 3960
			if (!ioapic_phys) {
				printk(KERN_ERR
				       "WARNING: bogus zero IO-APIC "
				       "address found in MPTABLE, "
				       "disabling IO/APIC support!\n");
				smp_found_config = 0;
				skip_ioapic_setup = 1;
				goto fake_ioapic_page;
			}
3961
#endif
3962
		} else {
3963
#ifdef CONFIG_X86_32
3964
fake_ioapic_page:
3965
#endif
3966
			ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
3967 3968 3969
			ioapic_phys = __pa(ioapic_phys);
		}
		set_fixmap_nocache(idx, ioapic_phys);
3970 3971 3972
		apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
			__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
			ioapic_phys);
3973
		idx++;
3974

3975
		ioapic_res->start = ioapic_phys;
3976
		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
3977
		ioapic_res++;
3978
	}
3979 3980

	probe_nr_irqs_gsi();
3981 3982
}

3983
void __init ioapic_insert_resources(void)
3984 3985 3986 3987 3988
{
	int i;
	struct resource *r = ioapic_resources;

	if (!r) {
3989
		if (nr_ioapics > 0)
3990 3991
			printk(KERN_ERR
				"IO APIC resources couldn't be allocated.\n");
3992
		return;
3993 3994 3995 3996 3997 3998 3999
	}

	for (i = 0; i < nr_ioapics; i++) {
		insert_resource(&iomem_resource, r);
		r++;
	}
}
4000

4001
int mp_find_ioapic(u32 gsi)
4002 4003 4004 4005 4006 4007 4008 4009 4010
{
	int i = 0;

	/* Find the IOAPIC that manages this GSI. */
	for (i = 0; i < nr_ioapics; i++) {
		if ((gsi >= mp_gsi_routing[i].gsi_base)
		    && (gsi <= mp_gsi_routing[i].gsi_end))
			return i;
	}
4011

4012 4013 4014 4015
	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
	return -1;
}

4016
int mp_find_ioapic_pin(int ioapic, u32 gsi)
4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028
{
	if (WARN_ON(ioapic == -1))
		return -1;
	if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
		return -1;

	return gsi - mp_gsi_routing[ioapic].gsi_base;
}

static int bad_ioapic(unsigned long address)
{
	if (nr_ioapics >= MAX_IO_APICS) {
P
Paul Bolle 已提交
4029
		printk(KERN_WARNING "WARNING: Max # of I/O APICs (%d) exceeded "
4030 4031 4032 4033 4034 4035 4036 4037
		       "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
		return 1;
	}
	if (!address) {
		printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
		       " found in table, skipping!\n");
		return 1;
	}
4038 4039 4040
	return 0;
}

4041 4042 4043
void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
{
	int idx = 0;
4044
	int entries;
4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062

	if (bad_ioapic(address))
		return;

	idx = nr_ioapics;

	mp_ioapics[idx].type = MP_IOAPIC;
	mp_ioapics[idx].flags = MPC_APIC_USABLE;
	mp_ioapics[idx].apicaddr = address;

	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
	mp_ioapics[idx].apicid = io_apic_unique_id(id);
	mp_ioapics[idx].apicver = io_apic_get_version(idx);

	/*
	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
	 */
4063
	entries = io_apic_get_redir_entries(idx);
4064
	mp_gsi_routing[idx].gsi_base = gsi_base;
4065 4066 4067 4068 4069 4070
	mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1;

	/*
	 * The number of IO-APIC IRQ registers (== #pins):
	 */
	nr_ioapic_registers[idx] = entries;
4071

4072 4073
	if (mp_gsi_routing[idx].gsi_end >= gsi_top)
		gsi_top = mp_gsi_routing[idx].gsi_end + 1;
4074 4075 4076 4077 4078 4079 4080 4081

	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
	       "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
	       mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
	       mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);

	nr_ioapics++;
}
4082 4083 4084 4085 4086 4087 4088 4089

/* Enable IOAPIC early just for system timer */
void __init pre_init_apic_IRQ0(void)
{
	struct irq_cfg *cfg;

	printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
4090 4091
	physid_set_mask_of_physid(boot_cpu_physical_apicid,
					 &phys_cpu_present_map);
4092
#endif
4093 4094
	/* Make sure the irq descriptor is set up */
	cfg = alloc_irq_and_cfg_at(0, 0);
4095 4096 4097 4098 4099 4100

	setup_local_APIC();

	add_pin_to_irq_node(cfg, 0, 0, 0);
	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");

4101
	setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
4102
}