msi.c 27.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/smp_lock.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>

#include <asm/errno.h>
#include <asm/io.h>
#include <asm/smp.h>

#include "pci.h"
#include "msi.h"

static DEFINE_SPINLOCK(msi_lock);
static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
static kmem_cache_t* msi_cachep;

static int pci_msi_enable = 1;

32 33 34 35 36 37 38 39 40
static struct msi_ops *msi_ops;

int
msi_register(struct msi_ops *ops)
{
	msi_ops = ops;
	return 0;
}

L
Linus Torvalds 已提交
41 42
static int msi_cache_init(void)
{
43 44
	msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
					0, SLAB_HWCACHE_ALIGN, NULL, NULL);
L
Linus Torvalds 已提交
45 46 47 48 49 50
	if (!msi_cachep)
		return -ENOMEM;

	return 0;
}

51
static void msi_set_mask_bit(unsigned int irq, int flag)
L
Linus Torvalds 已提交
52 53 54
{
	struct msi_desc *entry;

55
	entry = msi_desc[irq];
L
Linus Torvalds 已提交
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
	if (!entry || !entry->dev || !entry->mask_base)
		return;
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
		int		pos;
		u32		mask_bits;

		pos = (long)entry->mask_base;
		pci_read_config_dword(entry->dev, pos, &mask_bits);
		mask_bits &= ~(1);
		mask_bits |= flag;
		pci_write_config_dword(entry->dev, pos, mask_bits);
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
		writel(flag, entry->mask_base + offset);
		break;
	}
	default:
		break;
	}
}

83
static void read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
L
Linus Torvalds 已提交
84
{
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
	switch(entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;
		u16 data;

		pci_read_config_dword(dev, msi_lower_address_reg(pos),
					&msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_read_config_dword(dev, msi_upper_address_reg(pos),
						&msg->address_hi);
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		} else {
			msg->address_hi = 0;
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		}
		msg->data = data;
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
 		break;
 	}
 	default:
		BUG();
	}
}
L
Linus Torvalds 已提交
120

121 122
static void write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{
L
Linus Torvalds 已提交
123 124 125
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
126 127 128 129 130 131 132 133 134 135 136 137 138 139
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;

		pci_write_config_dword(dev, msi_lower_address_reg(pos),
					msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_write_config_dword(dev, msi_upper_address_reg(pos),
						msg->address_hi);
			pci_write_config_word(dev, msi_data_reg(pos, 1),
						msg->data);
		} else {
			pci_write_config_word(dev, msi_data_reg(pos, 0),
						msg->data);
		}
L
Linus Torvalds 已提交
140 141 142 143
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
144 145 146 147 148 149 150 151 152
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		writel(msg->address_lo,
			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		writel(msg->address_hi,
			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
L
Linus Torvalds 已提交
153 154 155
		break;
	}
	default:
156
		BUG();
L
Linus Torvalds 已提交
157 158
	}
}
159 160

#ifdef CONFIG_SMP
161
static void set_msi_affinity(unsigned int irq, cpumask_t cpu_mask)
162 163 164 165
{
	struct msi_desc *entry;
	struct msi_msg msg;

166
	entry = msi_desc[irq];
167 168 169 170
	if (!entry || !entry->dev)
		return;

	read_msi_msg(entry, &msg);
171
	msi_ops->target(irq, cpu_mask, &msg);
172 173 174
	write_msi_msg(entry, &msg);
	set_native_irq_info(irq, cpu_mask);
}
175 176
#else
#define set_msi_affinity NULL
L
Linus Torvalds 已提交
177 178
#endif /* CONFIG_SMP */

179
static void mask_MSI_irq(unsigned int irq)
L
Linus Torvalds 已提交
180
{
181
	msi_set_mask_bit(irq, 1);
L
Linus Torvalds 已提交
182 183
}

184
static void unmask_MSI_irq(unsigned int irq)
L
Linus Torvalds 已提交
185
{
186
	msi_set_mask_bit(irq, 0);
L
Linus Torvalds 已提交
187 188
}

189
static unsigned int startup_msi_irq_wo_maskbit(unsigned int irq)
L
Linus Torvalds 已提交
190 191 192 193
{
	return 0;	/* never anything pending */
}

194
static unsigned int startup_msi_irq_w_maskbit(unsigned int irq)
L
Linus Torvalds 已提交
195
{
196 197
	startup_msi_irq_wo_maskbit(irq);
	unmask_MSI_irq(irq);
198
	return 0;	/* never anything pending */
L
Linus Torvalds 已提交
199 200
}

201
static void shutdown_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
202 203 204
{
}

205
static void end_msi_irq_wo_maskbit(unsigned int irq)
206
{
207
	move_native_irq(irq);
208 209
	ack_APIC_irq();
}
L
Linus Torvalds 已提交
210

211
static void end_msi_irq_w_maskbit(unsigned int irq)
L
Linus Torvalds 已提交
212
{
213 214
	move_native_irq(irq);
	unmask_MSI_irq(irq);
L
Linus Torvalds 已提交
215 216 217
	ack_APIC_irq();
}

218
static void do_nothing(unsigned int irq)
219 220 221
{
}

L
Linus Torvalds 已提交
222 223 224 225 226 227 228
/*
 * Interrupt Type for MSI-X PCI/PCI-X/PCI-Express Devices,
 * which implement the MSI-X Capability Structure.
 */
static struct hw_interrupt_type msix_irq_type = {
	.typename	= "PCI-MSI-X",
	.startup	= startup_msi_irq_w_maskbit,
229 230 231 232
	.shutdown	= shutdown_msi_irq,
	.enable		= unmask_MSI_irq,
	.disable	= mask_MSI_irq,
	.ack		= mask_MSI_irq,
L
Linus Torvalds 已提交
233
	.end		= end_msi_irq_w_maskbit,
234
	.set_affinity	= set_msi_affinity
L
Linus Torvalds 已提交
235 236 237 238 239 240 241 242 243 244
};

/*
 * Interrupt Type for MSI PCI/PCI-X/PCI-Express Devices,
 * which implement the MSI Capability Structure with
 * Mask-and-Pending Bits.
 */
static struct hw_interrupt_type msi_irq_w_maskbit_type = {
	.typename	= "PCI-MSI",
	.startup	= startup_msi_irq_w_maskbit,
245 246 247 248
	.shutdown	= shutdown_msi_irq,
	.enable		= unmask_MSI_irq,
	.disable	= mask_MSI_irq,
	.ack		= mask_MSI_irq,
L
Linus Torvalds 已提交
249
	.end		= end_msi_irq_w_maskbit,
250
	.set_affinity	= set_msi_affinity
L
Linus Torvalds 已提交
251 252 253 254 255 256 257 258 259 260
};

/*
 * Interrupt Type for MSI PCI/PCI-X/PCI-Express Devices,
 * which implement the MSI Capability Structure without
 * Mask-and-Pending Bits.
 */
static struct hw_interrupt_type msi_irq_wo_maskbit_type = {
	.typename	= "PCI-MSI",
	.startup	= startup_msi_irq_wo_maskbit,
261 262 263 264
	.shutdown	= shutdown_msi_irq,
	.enable		= do_nothing,
	.disable	= do_nothing,
	.ack		= do_nothing,
L
Linus Torvalds 已提交
265
	.end		= end_msi_irq_wo_maskbit,
266
	.set_affinity	= set_msi_affinity
L
Linus Torvalds 已提交
267 268
};

269
static int msi_free_irq(struct pci_dev* dev, int irq);
L
Linus Torvalds 已提交
270 271 272 273 274 275 276 277 278 279 280 281 282 283
static int msi_init(void)
{
	static int status = -ENOMEM;

	if (!status)
		return status;

	if (pci_msi_quirk) {
		pci_msi_enable = 0;
		printk(KERN_WARNING "PCI: MSI quirk detected. MSI disabled.\n");
		status = -EINVAL;
		return status;
	}

284 285 286 287 288 289 290 291 292
	status = msi_arch_init();
	if (status < 0) {
		pci_msi_enable = 0;
		printk(KERN_WARNING
		       "PCI: MSI arch init failed.  MSI disabled.\n");
		return status;
	}

	if (! msi_ops) {
293
		pci_msi_enable = 0;
294 295 296 297 298 299
		printk(KERN_WARNING
		       "PCI: MSI ops not registered. MSI disabled.\n");
		status = -EINVAL;
		return status;
	}

300 301
	status = msi_cache_init();
	if (status < 0) {
L
Linus Torvalds 已提交
302 303 304 305
		pci_msi_enable = 0;
		printk(KERN_WARNING "PCI: MSI cache init failed\n");
		return status;
	}
306

L
Linus Torvalds 已提交
307 308 309 310 311 312 313
	return status;
}

static struct msi_desc* alloc_msi_entry(void)
{
	struct msi_desc *entry;

314
	entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
315 316 317 318 319 320 321 322 323
	if (!entry)
		return NULL;

	entry->link.tail = entry->link.head = 0;	/* single message */
	entry->dev = NULL;

	return entry;
}

324
static void attach_msi_entry(struct msi_desc *entry, int irq)
L
Linus Torvalds 已提交
325 326 327 328
{
	unsigned long flags;

	spin_lock_irqsave(&msi_lock, flags);
329
	msi_desc[irq] = entry;
L
Linus Torvalds 已提交
330 331 332
	spin_unlock_irqrestore(&msi_lock, flags);
}

333
static int create_msi_irq(struct hw_interrupt_type *handler)
L
Linus Torvalds 已提交
334
{
335 336 337 338 339 340
	struct msi_desc *entry;
	int irq;

	entry = alloc_msi_entry();
	if (!entry)
		return -ENOMEM;
341

342 343 344 345
	irq = create_irq();
	if (irq < 0) {
		kmem_cache_free(msi_cachep, entry);
		return -EBUSY;
L
Linus Torvalds 已提交
346
	}
347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362

	set_irq_chip(irq, handler);
	set_irq_data(irq, entry);

	return irq;
}

static void destroy_msi_irq(unsigned int irq)
{
	struct msi_desc *entry;

	entry = get_irq_data(irq);
	set_irq_chip(irq, NULL);
	set_irq_data(irq, NULL);
	destroy_irq(irq);
	kmem_cache_free(msi_cachep, entry);
L
Linus Torvalds 已提交
363 364 365 366 367 368 369 370 371 372 373
}

static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
{
	u16 control;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (type == PCI_CAP_ID_MSI) {
		/* Set enabled bits to single MSI & enable MSI_enable bit */
		msi_enable(control, 1);
		pci_write_config_word(dev, msi_control_reg(pos), control);
374
		dev->msi_enabled = 1;
L
Linus Torvalds 已提交
375 376 377
	} else {
		msix_enable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
378
		dev->msix_enabled = 1;
L
Linus Torvalds 已提交
379 380 381
	}
    	if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
		/* PCI Express Endpoint device detected */
B
Brett M Russ 已提交
382
		pci_intx(dev, 0);  /* disable intx */
L
Linus Torvalds 已提交
383 384 385
	}
}

386
void disable_msi_mode(struct pci_dev *dev, int pos, int type)
L
Linus Torvalds 已提交
387 388 389 390 391 392 393 394
{
	u16 control;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (type == PCI_CAP_ID_MSI) {
		/* Set enabled bits to single MSI & enable MSI_enable bit */
		msi_disable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
395
		dev->msi_enabled = 0;
L
Linus Torvalds 已提交
396 397 398
	} else {
		msix_disable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
399
		dev->msix_enabled = 0;
L
Linus Torvalds 已提交
400 401 402
	}
    	if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
		/* PCI Express Endpoint device detected */
B
Brett M Russ 已提交
403
		pci_intx(dev, 1);  /* enable intx */
L
Linus Torvalds 已提交
404 405 406
	}
}

407
static int msi_lookup_irq(struct pci_dev *dev, int type)
L
Linus Torvalds 已提交
408
{
409
	int irq;
L
Linus Torvalds 已提交
410 411 412
	unsigned long flags;

	spin_lock_irqsave(&msi_lock, flags);
413 414 415 416
	for (irq = 0; irq < NR_IRQS; irq++) {
		if (!msi_desc[irq] || msi_desc[irq]->dev != dev ||
			msi_desc[irq]->msi_attrib.type != type ||
			msi_desc[irq]->msi_attrib.default_irq != dev->irq)
L
Linus Torvalds 已提交
417 418
			continue;
		spin_unlock_irqrestore(&msi_lock, flags);
419 420 421
		/* This pre-assigned MSI irq for this device
		   already exits. Override dev->irq with this irq */
		dev->irq = irq;
L
Linus Torvalds 已提交
422 423 424 425 426 427 428 429 430 431 432 433 434
		return 0;
	}
	spin_unlock_irqrestore(&msi_lock, flags);

	return -EACCES;
}

void pci_scan_msi_device(struct pci_dev *dev)
{
	if (!dev)
		return;
}

435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504
#ifdef CONFIG_PM
int pci_save_msi_state(struct pci_dev *dev)
{
	int pos, i = 0;
	u16 control;
	struct pci_cap_saved_state *save_state;
	u32 *cap;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (pos <= 0 || dev->no_msi)
		return 0;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSI_FLAGS_ENABLE))
		return 0;

	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5,
		GFP_KERNEL);
	if (!save_state) {
		printk(KERN_ERR "Out of memory in pci_save_msi_state\n");
		return -ENOMEM;
	}
	cap = &save_state->data[0];

	pci_read_config_dword(dev, pos, &cap[i++]);
	control = cap[0] >> 16;
	pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, &cap[i++]);
	if (control & PCI_MSI_FLAGS_64BIT) {
		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, &cap[i++]);
		pci_read_config_dword(dev, pos + PCI_MSI_DATA_64, &cap[i++]);
	} else
		pci_read_config_dword(dev, pos + PCI_MSI_DATA_32, &cap[i++]);
	if (control & PCI_MSI_FLAGS_MASKBIT)
		pci_read_config_dword(dev, pos + PCI_MSI_MASK_BIT, &cap[i++]);
	save_state->cap_nr = PCI_CAP_ID_MSI;
	pci_add_saved_cap(dev, save_state);
	return 0;
}

void pci_restore_msi_state(struct pci_dev *dev)
{
	int i = 0, pos;
	u16 control;
	struct pci_cap_saved_state *save_state;
	u32 *cap;

	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI);
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!save_state || pos <= 0)
		return;
	cap = &save_state->data[0];

	control = cap[i++] >> 16;
	pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]);
	if (control & PCI_MSI_FLAGS_64BIT) {
		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]);
		pci_write_config_dword(dev, pos + PCI_MSI_DATA_64, cap[i++]);
	} else
		pci_write_config_dword(dev, pos + PCI_MSI_DATA_32, cap[i++]);
	if (control & PCI_MSI_FLAGS_MASKBIT)
		pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]);
	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
	pci_remove_saved_cap(save_state);
	kfree(save_state);
}

int pci_save_msix_state(struct pci_dev *dev)
{
	int pos;
505
	int temp;
506
	int irq, head, tail = 0;
507 508 509 510 511 512 513
	u16 control;
	struct pci_cap_saved_state *save_state;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos <= 0 || dev->no_msi)
		return 0;

514
	/* save the capability */
515 516 517 518 519 520 521 522 523 524 525
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSIX_FLAGS_ENABLE))
		return 0;
	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16),
		GFP_KERNEL);
	if (!save_state) {
		printk(KERN_ERR "Out of memory in pci_save_msix_state\n");
		return -ENOMEM;
	}
	*((u16 *)&save_state->data[0]) = control;

526 527
	/* save the table */
	temp = dev->irq;
528
	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
529 530 531 532
		kfree(save_state);
		return -EINVAL;
	}

533
	irq = head = dev->irq;
534 535 536
	while (head != tail) {
		struct msi_desc *entry;

537
		entry = msi_desc[irq];
538
		read_msi_msg(entry, &entry->msg_save);
539

540 541
		tail = msi_desc[irq]->link.tail;
		irq = tail;
542 543 544
	}
	dev->irq = temp;

545 546 547 548 549 550 551 552 553
	save_state->cap_nr = PCI_CAP_ID_MSIX;
	pci_add_saved_cap(dev, save_state);
	return 0;
}

void pci_restore_msix_state(struct pci_dev *dev)
{
	u16 save;
	int pos;
554
	int irq, head, tail = 0;
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
	struct msi_desc *entry;
	int temp;
	struct pci_cap_saved_state *save_state;

	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSIX);
	if (!save_state)
		return;
	save = *((u16 *)&save_state->data[0]);
	pci_remove_saved_cap(save_state);
	kfree(save_state);

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos <= 0)
		return;

	/* route the table */
	temp = dev->irq;
572
	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX))
573
		return;
574
	irq = head = dev->irq;
575
	while (head != tail) {
576
		entry = msi_desc[irq];
577
		write_msi_msg(entry, &entry->msg_save);
578

579 580
		tail = msi_desc[irq]->link.tail;
		irq = tail;
581 582 583 584 585 586 587 588
	}
	dev->irq = temp;

	pci_write_config_word(dev, msi_control_reg(pos), save);
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
}
#endif

589
static int msi_register_init(struct pci_dev *dev, struct msi_desc *entry)
590
{
591
	int status;
592
	struct msi_msg msg;
593
	int pos;
594 595
	u16 control;

596
	pos = entry->msi_attrib.pos;
597
	pci_read_config_word(dev, msi_control_reg(pos), &control);
598

599
	/* Configure MSI capability structure */
600
	status = msi_ops->setup(dev, dev->irq, &msg);
601 602 603
	if (status < 0)
		return status;

604
	write_msi_msg(entry, &msg);
605 606 607 608 609 610 611 612 613 614 615 616 617
	if (entry->msi_attrib.maskbit) {
		unsigned int maskbits, temp;
		/* All MSIs are unmasked by default, Mask them all */
		pci_read_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			&maskbits);
		temp = (1 << multi_msi_capable(control));
		temp = ((temp - 1) & ~temp);
		maskbits |= temp;
		pci_write_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			maskbits);
	}
618 619

	return 0;
620 621
}

L
Linus Torvalds 已提交
622 623 624 625
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
626
 * Setup the MSI capability structure of device function with a single
627
 * MSI irq, regardless of device function is capable of handling
L
Linus Torvalds 已提交
628
 * multiple messages. A return of zero indicates the successful setup
629
 * of an entry zero with the new MSI irq or non-zero for otherwise.
L
Linus Torvalds 已提交
630 631 632
 **/
static int msi_capability_init(struct pci_dev *dev)
{
633
	int status;
L
Linus Torvalds 已提交
634
	struct msi_desc *entry;
635
	int pos, irq;
L
Linus Torvalds 已提交
636
	u16 control;
637
	struct hw_interrupt_type *handler;
L
Linus Torvalds 已提交
638 639 640 641

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	/* MSI Entry Initialization */
642 643 644
	handler = &msi_irq_wo_maskbit_type;
	if (is_mask_bit_support(control))
		handler = &msi_irq_w_maskbit_type;
L
Linus Torvalds 已提交
645

646 647 648 649 650 651 652
	irq = create_msi_irq(handler);
	if (irq < 0)
		return irq;

	entry = get_irq_data(irq);
	entry->link.head = irq;
	entry->link.tail = irq;
L
Linus Torvalds 已提交
653
	entry->msi_attrib.type = PCI_CAP_ID_MSI;
654
	entry->msi_attrib.is_64 = is_64bit_address(control);
L
Linus Torvalds 已提交
655 656
	entry->msi_attrib.entry_nr = 0;
	entry->msi_attrib.maskbit = is_mask_bit_support(control);
657
	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
658
	entry->msi_attrib.pos = pos;
659
	dev->irq = irq;
L
Linus Torvalds 已提交
660 661 662 663 664 665
	entry->dev = dev;
	if (is_mask_bit_support(control)) {
		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
				is_64bit_address(control));
	}
	/* Configure MSI capability structure */
666 667
	status = msi_register_init(dev, entry);
	if (status != 0) {
668 669
		dev->irq = entry->msi_attrib.default_irq;
		destroy_msi_irq(irq);
670 671
		return status;
	}
672

673
	attach_msi_entry(entry, irq);
L
Linus Torvalds 已提交
674 675 676 677 678 679 680 681 682
	/* Set MSI enabled bits	 */
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);

	return 0;
}

/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
683 684
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
685
 *
686
 * Setup the MSI-X capability structure of device function with a
687 688
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
689 690 691 692 693
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
	struct msi_desc *head = NULL, *tail = NULL, *entry = NULL;
694
	struct msi_msg msg;
695
	int status;
696
	int irq, pos, i, j, nr_entries, temp = 0;
697 698
	unsigned long phys_addr;
	u32 table_offset;
L
Linus Torvalds 已提交
699 700 701 702 703 704 705 706
 	u16 control;
	u8 bir;
	void __iomem *base;

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	/* Request & Map MSI-X table region */
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
707 708

 	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
L
Linus Torvalds 已提交
709
	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
710 711
	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
	phys_addr = pci_resource_start (dev, bir) + table_offset;
L
Linus Torvalds 已提交
712 713 714 715 716 717
	base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
	if (base == NULL)
		return -ENOMEM;

	/* MSI-X Table Initialization */
	for (i = 0; i < nvec; i++) {
718 719
		irq = create_msi_irq(&msix_irq_type);
		if (irq < 0)
L
Linus Torvalds 已提交
720 721
			break;

722
		entry = get_irq_data(irq);
L
Linus Torvalds 已提交
723
 		j = entries[i].entry;
724
 		entries[i].vector = irq;
L
Linus Torvalds 已提交
725
		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
726
		entry->msi_attrib.is_64 = 1;
L
Linus Torvalds 已提交
727 728
		entry->msi_attrib.entry_nr = j;
		entry->msi_attrib.maskbit = 1;
729
		entry->msi_attrib.default_irq = dev->irq;
730
		entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
731 732 733
		entry->dev = dev;
		entry->mask_base = base;
		if (!head) {
734 735
			entry->link.head = irq;
			entry->link.tail = irq;
L
Linus Torvalds 已提交
736 737 738 739
			head = entry;
		} else {
			entry->link.head = temp;
			entry->link.tail = tail->link.tail;
740 741
			tail->link.tail = irq;
			head->link.head = irq;
L
Linus Torvalds 已提交
742
		}
743
		temp = irq;
L
Linus Torvalds 已提交
744 745
		tail = entry;
		/* Configure MSI-X capability structure */
746 747 748
		status = msi_ops->setup(dev, irq, &msg);
		if (status < 0) {
			destroy_msi_irq(irq);
749
			break;
750
		}
751

752
		write_msi_msg(entry, &msg);
753
		attach_msi_entry(entry, irq);
L
Linus Torvalds 已提交
754 755
	}
	if (i != nvec) {
756
		int avail = i - 1;
L
Linus Torvalds 已提交
757 758
		i--;
		for (; i >= 0; i--) {
759 760
			irq = (entries + i)->vector;
			msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
761 762
			(entries + i)->vector = 0;
		}
763 764 765 766 767 768
		/* If we had some success report the number of irqs
		 * we succeeded in setting up.
		 */
		if (avail <= 0)
			avail = -EBUSY;
		return avail;
L
Linus Torvalds 已提交
769 770 771 772 773 774 775
	}
	/* Set MSI-X enabled bits */
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);

	return 0;
}

776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
/**
 * pci_msi_supported - check whether MSI may be enabled on device
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * MSI must be globally enabled and supported by the device and its root
 * bus. But, the root bus is not easy to find since some architectures
 * have virtual busses on top of the PCI hierarchy (for instance the
 * hypertransport bus), while the actual bus where MSI must be supported
 * is below. So we test the MSI flag on all parent busses and assume
 * that no quirk will ever set the NO_MSI flag on a non-root bus.
 **/
static
int pci_msi_supported(struct pci_dev * dev)
{
	struct pci_bus *bus;

	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

	/* check MSI flags of all parent busses */
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

	return 0;
}

L
Linus Torvalds 已提交
803 804 805 806 807
/**
 * pci_enable_msi - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * Setup the MSI capability structure of device function with
808
 * a single MSI irq upon its software driver call to request for
L
Linus Torvalds 已提交
809 810
 * MSI mode enabled on its hardware device function. A return of zero
 * indicates the successful setup of an entry zero with the new MSI
811
 * irq or non-zero for otherwise.
L
Linus Torvalds 已提交
812 813 814
 **/
int pci_enable_msi(struct pci_dev* dev)
{
815
	int pos, temp, status;
816
	u16 control;
L
Linus Torvalds 已提交
817

818 819
	if (pci_msi_supported(dev) < 0)
		return -EINVAL;
820

L
Linus Torvalds 已提交
821 822
	temp = dev->irq;

823 824
	status = msi_init();
	if (status < 0)
L
Linus Torvalds 已提交
825 826
		return status;

827 828
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
L
Linus Torvalds 已提交
829 830
		return -EINVAL;

831 832 833 834
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!is_64bit_address(control) && msi_ops->needs_64bit_address)
		return -EINVAL;

835
	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSI));
L
Linus Torvalds 已提交
836

837
	/* Check whether driver already requested for MSI-X irqs */
838
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
839
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
L
Linus Torvalds 已提交
840
			printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
841
			       "Device already has MSI-X irq assigned\n",
L
Linus Torvalds 已提交
842 843 844 845 846 847 848 849 850 851 852
			       pci_name(dev));
			dev->irq = temp;
			return -EINVAL;
	}
	status = msi_capability_init(dev);
	return status;
}

void pci_disable_msi(struct pci_dev* dev)
{
	struct msi_desc *entry;
853
	int pos, default_irq;
L
Linus Torvalds 已提交
854 855 856
	u16 control;
	unsigned long flags;

857 858
	if (!pci_msi_enable)
		return;
859 860
	if (!dev)
		return;
861

862 863
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
L
Linus Torvalds 已提交
864 865 866 867 868 869
		return;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSI_FLAGS_ENABLE))
		return;

870 871
	disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);

L
Linus Torvalds 已提交
872 873 874 875 876 877
	spin_lock_irqsave(&msi_lock, flags);
	entry = msi_desc[dev->irq];
	if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
		spin_unlock_irqrestore(&msi_lock, flags);
		return;
	}
878
	if (irq_has_action(dev->irq)) {
L
Linus Torvalds 已提交
879 880
		spin_unlock_irqrestore(&msi_lock, flags);
		printk(KERN_WARNING "PCI: %s: pci_disable_msi() called without "
881
		       "free_irq() on MSI irq %d\n",
L
Linus Torvalds 已提交
882
		       pci_name(dev), dev->irq);
883
		BUG_ON(irq_has_action(dev->irq));
L
Linus Torvalds 已提交
884
	} else {
885
		default_irq = entry->msi_attrib.default_irq;
L
Linus Torvalds 已提交
886
		spin_unlock_irqrestore(&msi_lock, flags);
887
		msi_free_irq(dev, dev->irq);
888

889 890
		/* Restore dev->irq to its default pin-assertion irq */
		dev->irq = default_irq;
L
Linus Torvalds 已提交
891 892 893
	}
}

894
static int msi_free_irq(struct pci_dev* dev, int irq)
L
Linus Torvalds 已提交
895 896 897 898 899 900
{
	struct msi_desc *entry;
	int head, entry_nr, type;
	void __iomem *base;
	unsigned long flags;

901
	msi_ops->teardown(irq);
902

L
Linus Torvalds 已提交
903
	spin_lock_irqsave(&msi_lock, flags);
904
	entry = msi_desc[irq];
L
Linus Torvalds 已提交
905 906 907 908 909 910 911 912 913 914 915
	if (!entry || entry->dev != dev) {
		spin_unlock_irqrestore(&msi_lock, flags);
		return -EINVAL;
	}
	type = entry->msi_attrib.type;
	entry_nr = entry->msi_attrib.entry_nr;
	head = entry->link.head;
	base = entry->mask_base;
	msi_desc[entry->link.head]->link.tail = entry->link.tail;
	msi_desc[entry->link.tail]->link.head = entry->link.head;
	entry->dev = NULL;
916
	msi_desc[irq] = NULL;
L
Linus Torvalds 已提交
917 918
	spin_unlock_irqrestore(&msi_lock, flags);

919
	destroy_msi_irq(irq);
L
Linus Torvalds 已提交
920 921

	if (type == PCI_CAP_ID_MSIX) {
922 923
		writel(1, base + entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
L
Linus Torvalds 已提交
924

925
		if (head == irq)
L
Linus Torvalds 已提交
926 927 928 929 930 931 932 933 934
			iounmap(base);
	}

	return 0;
}

/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
935
 * @entries: pointer to an array of MSI-X entries
936
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
937 938
 *
 * Setup the MSI-X capability structure of device function with the number
939
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
940 941
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
942
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
943
 * Or a return of > 0 indicates that driver request is exceeding the number
944
 * of irqs available. Driver should use the returned value to re-send
L
Linus Torvalds 已提交
945 946 947 948
 * its request.
 **/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
949
	int status, pos, nr_entries;
L
Linus Torvalds 已提交
950 951 952
	int i, j, temp;
	u16 control;

953
	if (!entries || pci_msi_supported(dev) < 0)
L
Linus Torvalds 已提交
954 955
 		return -EINVAL;

956 957
	status = msi_init();
	if (status < 0)
L
Linus Torvalds 已提交
958 959
		return status;

960 961
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
L
Linus Torvalds 已提交
962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
 		return -EINVAL;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
	if (nvec > nr_entries)
		return -EINVAL;

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
	temp = dev->irq;
979
	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSIX));
980

981
	/* Check whether driver already requested for MSI irq */
L
Linus Torvalds 已提交
982
   	if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
983
		!msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
L
Linus Torvalds 已提交
984
		printk(KERN_INFO "PCI: %s: Can't enable MSI-X.  "
985
		       "Device already has an MSI irq assigned\n",
L
Linus Torvalds 已提交
986 987 988 989 990 991 992 993 994 995 996 997 998
		       pci_name(dev));
		dev->irq = temp;
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}

void pci_disable_msix(struct pci_dev* dev)
{
	int pos, temp;
	u16 control;

999 1000
	if (!pci_msi_enable)
		return;
1001 1002 1003 1004 1005
	if (!dev)
		return;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
L
Linus Torvalds 已提交
1006 1007 1008 1009 1010 1011
		return;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSIX_FLAGS_ENABLE))
		return;

1012 1013
	disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);

L
Linus Torvalds 已提交
1014
	temp = dev->irq;
1015
	if (!msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
1016
		int irq, head, tail = 0, warning = 0;
L
Linus Torvalds 已提交
1017 1018
		unsigned long flags;

1019
		irq = head = dev->irq;
1020
		dev->irq = temp;			/* Restore pin IRQ */
L
Linus Torvalds 已提交
1021
		while (head != tail) {
1022
			spin_lock_irqsave(&msi_lock, flags);
1023
			tail = msi_desc[irq]->link.tail;
1024
			spin_unlock_irqrestore(&msi_lock, flags);
1025
			if (irq_has_action(irq))
L
Linus Torvalds 已提交
1026
				warning = 1;
1027 1028 1029
			else if (irq != head)	/* Release MSI-X irq */
				msi_free_irq(dev, irq);
			irq = tail;
L
Linus Torvalds 已提交
1030
		}
1031
		msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
1032 1033
		if (warning) {
			printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
1034
			       "free_irq() on all MSI-X irqs\n",
L
Linus Torvalds 已提交
1035 1036 1037 1038 1039 1040 1041
			       pci_name(dev));
			BUG_ON(warning > 0);
		}
	}
}

/**
1042
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
1043 1044
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
1045
 * Being called during hotplug remove, from which the device function
1046
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
1047 1048 1049 1050 1051
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
void msi_remove_pci_irq_vectors(struct pci_dev* dev)
{
1052
	int pos, temp;
L
Linus Torvalds 已提交
1053 1054 1055 1056 1057 1058
	unsigned long flags;

	if (!pci_msi_enable || !dev)
 		return;

	temp = dev->irq;		/* Save IOAPIC IRQ */
1059
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
1060
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
1061
		if (irq_has_action(dev->irq)) {
L
Linus Torvalds 已提交
1062
			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
1063
			       "called without free_irq() on MSI irq %d\n",
L
Linus Torvalds 已提交
1064
			       pci_name(dev), dev->irq);
1065
			BUG_ON(irq_has_action(dev->irq));
1066 1067
		} else /* Release MSI irq assigned to this device */
			msi_free_irq(dev, dev->irq);
L
Linus Torvalds 已提交
1068 1069
		dev->irq = temp;		/* Restore IOAPIC IRQ */
	}
1070
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
1071 1072
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
		int irq, head, tail = 0, warning = 0;
L
Linus Torvalds 已提交
1073 1074
		void __iomem *base = NULL;

1075
		irq = head = dev->irq;
L
Linus Torvalds 已提交
1076 1077
		while (head != tail) {
			spin_lock_irqsave(&msi_lock, flags);
1078 1079
			tail = msi_desc[irq]->link.tail;
			base = msi_desc[irq]->mask_base;
L
Linus Torvalds 已提交
1080
			spin_unlock_irqrestore(&msi_lock, flags);
1081
			if (irq_has_action(irq))
L
Linus Torvalds 已提交
1082
				warning = 1;
1083 1084 1085
			else if (irq != head) /* Release MSI-X irq */
				msi_free_irq(dev, irq);
			irq = tail;
L
Linus Torvalds 已提交
1086
		}
1087
		msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
1088 1089 1090
		if (warning) {
			iounmap(base);
			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
1091
			       "called without free_irq() on all MSI-X irqs\n",
L
Linus Torvalds 已提交
1092 1093 1094 1095 1096 1097 1098
			       pci_name(dev));
			BUG_ON(warning > 0);
		}
		dev->irq = temp;		/* Restore IOAPIC IRQ */
	}
}

1099 1100 1101 1102 1103
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}

L
Linus Torvalds 已提交
1104 1105 1106 1107
EXPORT_SYMBOL(pci_enable_msi);
EXPORT_SYMBOL(pci_disable_msi);
EXPORT_SYMBOL(pci_enable_msix);
EXPORT_SYMBOL(pci_disable_msix);