msi.c 18.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13 14 15 16
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
17
#include <linux/msi.h>
D
Dan Williams 已提交
18
#include <linux/smp.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26 27

#include <asm/errno.h>
#include <asm/io.h>

#include "pci.h"
#include "msi.h"

static int pci_msi_enable = 1;

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
/* Arch hooks */

int __attribute__ ((weak))
arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
	return 0;
}

int __attribute__ ((weak))
arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry)
{
	return 0;
}

int __attribute__ ((weak))
arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
	struct msi_desc *entry;
	int ret;

	list_for_each_entry(entry, &dev->msi_list, list) {
		ret = arch_setup_msi_irq(dev, entry);
		if (ret)
			return ret;
	}

	return 0;
}

void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq)
{
	return;
}

void __attribute__ ((weak))
arch_teardown_msi_irqs(struct pci_dev *dev)
{
	struct msi_desc *entry;

	list_for_each_entry(entry, &dev->msi_list, list) {
		if (entry->irq != 0)
			arch_teardown_msi_irq(entry->irq);
	}
}

73
static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
74 75 76 77 78 79 80 81 82 83 84 85
{
	u16 control;

	if (pos) {
		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
		control &= ~PCI_MSI_FLAGS_ENABLE;
		if (enable)
			control |= PCI_MSI_FLAGS_ENABLE;
		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
	}
}

86 87 88 89 90
static void msi_set_enable(struct pci_dev *dev, int enable)
{
	__msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable);
}

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
static void msix_set_enable(struct pci_dev *dev, int enable)
{
	int pos;
	u16 control;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos) {
		pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
		control &= ~PCI_MSIX_FLAGS_ENABLE;
		if (enable)
			control |= PCI_MSIX_FLAGS_ENABLE;
		pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
	}
}

M
Mitch Williams 已提交
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
static void msix_flush_writes(unsigned int irq)
{
	struct msi_desc *entry;

	entry = get_irq_msi(irq);
	BUG_ON(!entry || !entry->dev);
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
		/* nothing to do */
		break;
	case PCI_CAP_ID_MSIX:
	{
		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
		readl(entry->mask_base + offset);
		break;
	}
	default:
		BUG();
		break;
	}
}

129 130 131 132 133 134 135 136 137 138
/*
 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
 * mask all MSI interrupts by clearing the MSI enable bit does not work
 * reliably as devices without an INTx disable bit will then generate a
 * level IRQ which will never be cleared.
 *
 * Returns 1 if it succeeded in masking the interrupt and 0 if the device
 * doesn't support MSI masking.
 */
static int msi_set_mask_bits(unsigned int irq, u32 mask, u32 flag)
L
Linus Torvalds 已提交
139 140 141
{
	struct msi_desc *entry;

142
	entry = get_irq_msi(irq);
143
	BUG_ON(!entry || !entry->dev);
L
Linus Torvalds 已提交
144 145
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
146
		if (entry->msi_attrib.maskbit) {
S
Satoru Takeuchi 已提交
147 148
			int pos;
			u32 mask_bits;
149 150 151

			pos = (long)entry->mask_base;
			pci_read_config_dword(entry->dev, pos, &mask_bits);
152 153
			mask_bits &= ~(mask);
			mask_bits |= flag & mask;
154
			pci_write_config_dword(entry->dev, pos, mask_bits);
155
		} else {
156
			return 0;
157
		}
L
Linus Torvalds 已提交
158 159 160 161 162 163
		break;
	case PCI_CAP_ID_MSIX:
	{
		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
		writel(flag, entry->mask_base + offset);
164
		readl(entry->mask_base + offset);
L
Linus Torvalds 已提交
165 166 167
		break;
	}
	default:
168
		BUG();
L
Linus Torvalds 已提交
169 170
		break;
	}
171
	entry->msi_attrib.masked = !!flag;
172
	return 1;
L
Linus Torvalds 已提交
173 174
}

175
void read_msi_msg(unsigned int irq, struct msi_msg *msg)
L
Linus Torvalds 已提交
176
{
177
	struct msi_desc *entry = get_irq_msi(irq);
178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
	switch(entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;
		u16 data;

		pci_read_config_dword(dev, msi_lower_address_reg(pos),
					&msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_read_config_dword(dev, msi_upper_address_reg(pos),
						&msg->address_hi);
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		} else {
			msg->address_hi = 0;
193
			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
		}
		msg->data = data;
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
 		break;
 	}
 	default:
		BUG();
	}
}
L
Linus Torvalds 已提交
213

214
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
215
{
216
	struct msi_desc *entry = get_irq_msi(irq);
L
Linus Torvalds 已提交
217 218 219
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
220 221 222 223 224 225 226 227 228 229 230 231 232 233
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;

		pci_write_config_dword(dev, msi_lower_address_reg(pos),
					msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_write_config_dword(dev, msi_upper_address_reg(pos),
						msg->address_hi);
			pci_write_config_word(dev, msi_data_reg(pos, 1),
						msg->data);
		} else {
			pci_write_config_word(dev, msi_data_reg(pos, 0),
						msg->data);
		}
L
Linus Torvalds 已提交
234 235 236 237
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
238 239 240 241 242 243 244 245 246
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		writel(msg->address_lo,
			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		writel(msg->address_hi,
			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
L
Linus Torvalds 已提交
247 248 249
		break;
	}
	default:
250
		BUG();
L
Linus Torvalds 已提交
251
	}
252
	entry->msg = *msg;
L
Linus Torvalds 已提交
253
}
254

255
void mask_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
256
{
257
	msi_set_mask_bits(irq, 1, 1);
M
Mitch Williams 已提交
258
	msix_flush_writes(irq);
L
Linus Torvalds 已提交
259 260
}

261
void unmask_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
262
{
263
	msi_set_mask_bits(irq, 1, 0);
M
Mitch Williams 已提交
264
	msix_flush_writes(irq);
L
Linus Torvalds 已提交
265 266
}

267
static int msi_free_irqs(struct pci_dev* dev);
S
Satoru Takeuchi 已提交
268

L
Linus Torvalds 已提交
269 270 271 272 273

static struct msi_desc* alloc_msi_entry(void)
{
	struct msi_desc *entry;

M
Michael Ellerman 已提交
274
	entry = kzalloc(sizeof(struct msi_desc), GFP_KERNEL);
L
Linus Torvalds 已提交
275 276 277
	if (!entry)
		return NULL;

278 279
	INIT_LIST_HEAD(&entry->list);
	entry->irq = 0;
L
Linus Torvalds 已提交
280 281 282 283 284
	entry->dev = NULL;

	return entry;
}

285 286 287 288 289 290
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
{
	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
		pci_intx(dev, enable);
}

291
static void __pci_restore_msi_state(struct pci_dev *dev)
292
{
293
	int pos;
294
	u16 control;
295
	struct msi_desc *entry;
296

297 298 299
	if (!dev->msi_enabled)
		return;

300 301
	entry = get_irq_msi(dev->irq);
	pos = entry->msi_attrib.pos;
302

303
	pci_intx_for_msi(dev, 0);
304
	msi_set_enable(dev, 0);
305 306
	write_msi_msg(dev->irq, &entry->msg);
	if (entry->msi_attrib.maskbit)
307 308
		msi_set_mask_bits(dev->irq, entry->msi_attrib.maskbits_mask,
				  entry->msi_attrib.masked);
309 310

	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
311 312
	control &= ~PCI_MSI_FLAGS_QSIZE;
	control |= PCI_MSI_FLAGS_ENABLE;
313
	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
314 315 316
}

static void __pci_restore_msix_state(struct pci_dev *dev)
317 318 319
{
	int pos;
	struct msi_desc *entry;
320
	u16 control;
321

E
Eric W. Biederman 已提交
322 323 324
	if (!dev->msix_enabled)
		return;

325
	/* route the table */
326
	pci_intx_for_msi(dev, 0);
327
	msix_set_enable(dev, 0);
328

329 330
	list_for_each_entry(entry, &dev->msi_list, list) {
		write_msi_msg(entry->irq, &entry->msg);
331
		msi_set_mask_bits(entry->irq, 1, entry->msi_attrib.masked);
332 333
	}

334 335
	BUG_ON(list_empty(&dev->msi_list));
	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
336
	pos = entry->msi_attrib.pos;
337 338 339 340
	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
	control &= ~PCI_MSIX_FLAGS_MASKALL;
	control |= PCI_MSIX_FLAGS_ENABLE;
	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
341
}
342 343 344 345 346 347

void pci_restore_msi_state(struct pci_dev *dev)
{
	__pci_restore_msi_state(dev);
	__pci_restore_msix_state(dev);
}
348
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
349

L
Linus Torvalds 已提交
350 351 352 353
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
354
 * Setup the MSI capability structure of device function with a single
355
 * MSI irq, regardless of device function is capable of handling
L
Linus Torvalds 已提交
356
 * multiple messages. A return of zero indicates the successful setup
357
 * of an entry zero with the new MSI irq or non-zero for otherwise.
L
Linus Torvalds 已提交
358 359 360 361
 **/
static int msi_capability_init(struct pci_dev *dev)
{
	struct msi_desc *entry;
362
	int pos, ret;
L
Linus Torvalds 已提交
363 364
	u16 control;

365 366
	msi_set_enable(dev, 0);	/* Ensure msi is disabled as I set it up */

L
Linus Torvalds 已提交
367 368 369
   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	/* MSI Entry Initialization */
370 371 372
	entry = alloc_msi_entry();
	if (!entry)
		return -ENOMEM;
373

L
Linus Torvalds 已提交
374
	entry->msi_attrib.type = PCI_CAP_ID_MSI;
375
	entry->msi_attrib.is_64 = is_64bit_address(control);
L
Linus Torvalds 已提交
376 377
	entry->msi_attrib.entry_nr = 0;
	entry->msi_attrib.maskbit = is_mask_bit_support(control);
378
	entry->msi_attrib.masked = 1;
379
	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
380
	entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
381 382 383 384
	if (is_mask_bit_support(control)) {
		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
				is_64bit_address(control));
	}
385 386 387 388 389 390 391 392 393 394 395 396 397
	entry->dev = dev;
	if (entry->msi_attrib.maskbit) {
		unsigned int maskbits, temp;
		/* All MSIs are unmasked by default, Mask them all */
		pci_read_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			&maskbits);
		temp = (1 << multi_msi_capable(control));
		temp = ((temp - 1) & ~temp);
		maskbits |= temp;
		pci_write_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			maskbits);
398
		entry->msi_attrib.maskbits_mask = temp;
399
	}
400
	list_add_tail(&entry->list, &dev->msi_list);
401

L
Linus Torvalds 已提交
402
	/* Configure MSI capability structure */
403
	ret = arch_setup_msi_irqs(dev, 1, PCI_CAP_ID_MSI);
404
	if (ret) {
405
		msi_free_irqs(dev);
406
		return ret;
407
	}
408

L
Linus Torvalds 已提交
409
	/* Set MSI enabled bits	 */
410
	pci_intx_for_msi(dev, 0);
411 412
	msi_set_enable(dev, 1);
	dev->msi_enabled = 1;
L
Linus Torvalds 已提交
413

414
	dev->irq = entry->irq;
L
Linus Torvalds 已提交
415 416 417 418 419 420
	return 0;
}

/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
421 422
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
423
 *
424
 * Setup the MSI-X capability structure of device function with a
425 426
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
427 428 429 430
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
431
	struct msi_desc *entry;
432
	int pos, i, j, nr_entries, ret;
433 434
	unsigned long phys_addr;
	u32 table_offset;
L
Linus Torvalds 已提交
435 436 437 438
 	u16 control;
	u8 bir;
	void __iomem *base;

439 440
	msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */

L
Linus Torvalds 已提交
441 442 443 444
   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	/* Request & Map MSI-X table region */
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
445 446

 	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
L
Linus Torvalds 已提交
447
	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
448 449
	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
	phys_addr = pci_resource_start (dev, bir) + table_offset;
L
Linus Torvalds 已提交
450 451 452 453 454 455
	base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
	if (base == NULL)
		return -ENOMEM;

	/* MSI-X Table Initialization */
	for (i = 0; i < nvec; i++) {
456 457
		entry = alloc_msi_entry();
		if (!entry)
L
Linus Torvalds 已提交
458 459 460 461
			break;

 		j = entries[i].entry;
		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
462
		entry->msi_attrib.is_64 = 1;
L
Linus Torvalds 已提交
463 464
		entry->msi_attrib.entry_nr = j;
		entry->msi_attrib.maskbit = 1;
465
		entry->msi_attrib.masked = 1;
466
		entry->msi_attrib.default_irq = dev->irq;
467
		entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
468 469
		entry->dev = dev;
		entry->mask_base = base;
470

471
		list_add_tail(&entry->list, &dev->msi_list);
L
Linus Torvalds 已提交
472
	}
473 474 475 476 477 478 479 480

	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
	if (ret) {
		int avail = 0;
		list_for_each_entry(entry, &dev->msi_list, list) {
			if (entry->irq != 0) {
				avail++;
			}
L
Linus Torvalds 已提交
481
		}
482

483 484
		msi_free_irqs(dev);

485 486 487
		/* If we had some success report the number of irqs
		 * we succeeded in setting up.
		 */
488 489
		if (avail == 0)
			avail = ret;
490
		return avail;
L
Linus Torvalds 已提交
491
	}
492 493 494 495 496 497 498

	i = 0;
	list_for_each_entry(entry, &dev->msi_list, list) {
		entries[i].vector = entry->irq;
		set_irq_msi(entry->irq, entry);
		i++;
	}
L
Linus Torvalds 已提交
499
	/* Set MSI-X enabled bits */
500
	pci_intx_for_msi(dev, 0);
501 502
	msix_set_enable(dev, 1);
	dev->msix_enabled = 1;
L
Linus Torvalds 已提交
503 504 505 506

	return 0;
}

507
/**
508
 * pci_msi_check_device - check whether MSI may be enabled on a device
509
 * @dev: pointer to the pci_dev data structure of MSI device function
510
 * @nvec: how many MSIs have been requested ?
511
 * @type: are we checking for MSI or MSI-X ?
512
 *
513
 * Look at global flags, the device itself, and its parent busses
514 515
 * to determine if MSI/-X are supported for the device. If MSI/-X is
 * supported return 0, else return an error code.
516
 **/
517
static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
518 519
{
	struct pci_bus *bus;
520
	int ret;
521

522
	/* MSI must be globally enabled and supported by the device */
523 524 525
	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

526 527 528 529 530 531 532 533
	/*
	 * You can't ask to have 0 or less MSIs configured.
	 *  a) it's stupid ..
	 *  b) the list manipulation code assumes nvec >= 1.
	 */
	if (nvec < 1)
		return -ERANGE;

534 535 536 537 538 539
	/* Any bridge which does NOT route MSI transactions from it's
	 * secondary bus to it's primary bus must set NO_MSI flag on
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
540 541 542 543
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

544 545 546 547
	ret = arch_msi_check_device(dev, nvec, type);
	if (ret)
		return ret;

548 549 550
	if (!pci_find_capability(dev, type))
		return -EINVAL;

551 552 553
	return 0;
}

L
Linus Torvalds 已提交
554 555 556 557 558
/**
 * pci_enable_msi - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * Setup the MSI capability structure of device function with
559
 * a single MSI irq upon its software driver call to request for
L
Linus Torvalds 已提交
560 561
 * MSI mode enabled on its hardware device function. A return of zero
 * indicates the successful setup of an entry zero with the new MSI
562
 * irq or non-zero for otherwise.
L
Linus Torvalds 已提交
563 564 565
 **/
int pci_enable_msi(struct pci_dev* dev)
{
566
	int status;
L
Linus Torvalds 已提交
567

568 569 570
	status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
	if (status)
		return status;
L
Linus Torvalds 已提交
571

E
Eric W. Biederman 已提交
572
	WARN_ON(!!dev->msi_enabled);
L
Linus Torvalds 已提交
573

574
	/* Check whether driver already requested for MSI-X irqs */
575
	if (dev->msix_enabled) {
576 577
		dev_info(&dev->dev, "can't enable MSI "
			 "(MSI-X already enabled)\n");
578
		return -EINVAL;
L
Linus Torvalds 已提交
579 580 581 582
	}
	status = msi_capability_init(dev);
	return status;
}
583
EXPORT_SYMBOL(pci_enable_msi);
L
Linus Torvalds 已提交
584

585
void pci_msi_shutdown(struct pci_dev* dev)
L
Linus Torvalds 已提交
586 587 588
{
	struct msi_desc *entry;

589
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
E
Eric W. Biederman 已提交
590 591
		return;

592
	msi_set_enable(dev, 0);
593
	pci_intx_for_msi(dev, 1);
594
	dev->msi_enabled = 0;
595

596 597
	BUG_ON(list_empty(&dev->msi_list));
	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
598 599 600 601 602
	/* Return the the pci reset with msi irqs unmasked */
	if (entry->msi_attrib.maskbit) {
		u32 mask = entry->msi_attrib.maskbits_mask;
		msi_set_mask_bits(dev->irq, mask, ~mask);
	}
603
	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
L
Linus Torvalds 已提交
604
		return;
605 606

	/* Restore dev->irq to its default pin-assertion irq */
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
	dev->irq = entry->msi_attrib.default_irq;
}
void pci_disable_msi(struct pci_dev* dev)
{
	struct msi_desc *entry;

	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

	pci_msi_shutdown(dev);

	entry = list_entry(dev->msi_list.next, struct msi_desc, list);
	if (!entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI)
		return;

	msi_free_irqs(dev);
L
Linus Torvalds 已提交
623
}
624
EXPORT_SYMBOL(pci_disable_msi);
L
Linus Torvalds 已提交
625

626
static int msi_free_irqs(struct pci_dev* dev)
L
Linus Torvalds 已提交
627
{
628
	struct msi_desc *entry, *tmp;
M
Michael Ellerman 已提交
629

630 631 632 633
	list_for_each_entry(entry, &dev->msi_list, list) {
		if (entry->irq)
			BUG_ON(irq_has_action(entry->irq));
	}
L
Linus Torvalds 已提交
634

635
	arch_teardown_msi_irqs(dev);
L
Linus Torvalds 已提交
636

637 638 639 640 641
	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
		if (entry->msi_attrib.type == PCI_CAP_ID_MSIX) {
			writel(1, entry->mask_base + entry->msi_attrib.entry_nr
				  * PCI_MSIX_ENTRY_SIZE
				  + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
642 643 644

			if (list_is_last(&entry->list, &dev->msi_list))
				iounmap(entry->mask_base);
645 646 647
		}
		list_del(&entry->list);
		kfree(entry);
L
Linus Torvalds 已提交
648 649 650 651 652 653 654 655
	}

	return 0;
}

/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
656
 * @entries: pointer to an array of MSI-X entries
657
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
658 659
 *
 * Setup the MSI-X capability structure of device function with the number
660
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
661 662
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
663
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
664
 * Or a return of > 0 indicates that driver request is exceeding the number
665
 * of irqs available. Driver should use the returned value to re-send
L
Linus Torvalds 已提交
666 667 668 669
 * its request.
 **/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
670
	int status, pos, nr_entries;
E
Eric W. Biederman 已提交
671
	int i, j;
L
Linus Torvalds 已提交
672 673
	u16 control;

674
	if (!entries)
L
Linus Torvalds 已提交
675 676
 		return -EINVAL;

677 678 679 680
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
	if (status)
		return status;

681
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
L
Linus Torvalds 已提交
682 683 684 685 686 687 688 689 690 691 692 693 694 695
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
	if (nvec > nr_entries)
		return -EINVAL;

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
E
Eric W. Biederman 已提交
696
	WARN_ON(!!dev->msix_enabled);
697

698
	/* Check whether driver already requested for MSI irq */
699
   	if (dev->msi_enabled) {
700 701
		dev_info(&dev->dev, "can't enable MSI-X "
		       "(MSI IRQ already assigned)\n");
L
Linus Torvalds 已提交
702 703 704 705 706
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}
707
EXPORT_SYMBOL(pci_enable_msix);
L
Linus Torvalds 已提交
708

709
static void msix_free_all_irqs(struct pci_dev *dev)
L
Linus Torvalds 已提交
710
{
711
	msi_free_irqs(dev);
712 713
}

714
void pci_msix_shutdown(struct pci_dev* dev)
715
{
716
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
E
Eric W. Biederman 已提交
717 718
		return;

719
	msix_set_enable(dev, 0);
720
	pci_intx_for_msi(dev, 1);
721
	dev->msix_enabled = 0;
722 723 724 725 726 727 728
}
void pci_disable_msix(struct pci_dev* dev)
{
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
		return;

	pci_msix_shutdown(dev);
729

730
	msix_free_all_irqs(dev);
L
Linus Torvalds 已提交
731
}
732
EXPORT_SYMBOL(pci_disable_msix);
L
Linus Torvalds 已提交
733 734

/**
735
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
736 737
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
738
 * Being called during hotplug remove, from which the device function
739
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
740 741 742 743 744 745 746 747
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
void msi_remove_pci_irq_vectors(struct pci_dev* dev)
{
	if (!pci_msi_enable || !dev)
 		return;

748 749
	if (dev->msi_enabled)
		msi_free_irqs(dev);
L
Linus Torvalds 已提交
750

751 752
	if (dev->msix_enabled)
		msix_free_all_irqs(dev);
L
Linus Torvalds 已提交
753 754
}

755 756 757 758
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}
759

760 761 762 763
void pci_msi_init_pci_dev(struct pci_dev *dev)
{
	INIT_LIST_HEAD(&dev->msi_list);
}