msi.c 21.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13 14 15 16
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
17
#include <linux/msi.h>
D
Dan Williams 已提交
18
#include <linux/smp.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26 27

#include <asm/errno.h>
#include <asm/io.h>

#include "pci.h"
#include "msi.h"

static int pci_msi_enable = 1;

28 29
/* Arch hooks */

30 31
#ifndef arch_msi_check_device
int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
32 33 34
{
	return 0;
}
35
#endif
36

37 38
#ifndef arch_setup_msi_irqs
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
39 40 41 42
{
	struct msi_desc *entry;
	int ret;

43 44 45 46 47 48 49
	/*
	 * If an architecture wants to support multiple MSI, it needs to
	 * override arch_setup_msi_irqs()
	 */
	if (type == PCI_CAP_ID_MSI && nvec > 1)
		return 1;

50 51
	list_for_each_entry(entry, &dev->msi_list, list) {
		ret = arch_setup_msi_irq(dev, entry);
52
		if (ret < 0)
53
			return ret;
54 55
		if (ret > 0)
			return -ENOSPC;
56 57 58 59
	}

	return 0;
}
60
#endif
61

62 63
#ifndef arch_teardown_msi_irqs
void arch_teardown_msi_irqs(struct pci_dev *dev)
64 65 66 67
{
	struct msi_desc *entry;

	list_for_each_entry(entry, &dev->msi_list, list) {
68 69 70 71 72 73
		int i, nvec;
		if (entry->irq == 0)
			continue;
		nvec = 1 << entry->msi_attrib.multiple;
		for (i = 0; i < nvec; i++)
			arch_teardown_msi_irq(entry->irq + i);
74 75
	}
}
76
#endif
77

78
static void msi_set_enable(struct pci_dev *dev, int pos, int enable)
79 80 81
{
	u16 control;

82
	BUG_ON(!pos);
83

84 85 86 87 88
	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
	control &= ~PCI_MSI_FLAGS_ENABLE;
	if (enable)
		control |= PCI_MSI_FLAGS_ENABLE;
	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
89 90
}

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
static void msix_set_enable(struct pci_dev *dev, int enable)
{
	int pos;
	u16 control;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos) {
		pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
		control &= ~PCI_MSIX_FLAGS_ENABLE;
		if (enable)
			control |= PCI_MSIX_FLAGS_ENABLE;
		pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
	}
}

106 107
static inline __attribute_const__ u32 msi_mask(unsigned x)
{
108 109 110 111
	/* Don't shift by >= width of type */
	if (x >= 5)
		return 0xffffffff;
	return (1 << (1 << x)) - 1;
112 113
}

114
static inline __attribute_const__ u32 msi_capable_mask(u16 control)
M
Mitch Williams 已提交
115
{
116 117
	return msi_mask((control >> 1) & 7);
}
M
Mitch Williams 已提交
118

119 120 121
static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
{
	return msi_mask((control >> 4) & 7);
M
Mitch Williams 已提交
122 123
}

124 125 126 127 128 129
/*
 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
 * mask all MSI interrupts by clearing the MSI enable bit does not work
 * reliably as devices without an INTx disable bit will then generate a
 * level IRQ which will never be cleared.
 */
130
static u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
L
Linus Torvalds 已提交
131
{
132
	u32 mask_bits = desc->masked;
L
Linus Torvalds 已提交
133

134
	if (!desc->msi_attrib.maskbit)
135
		return 0;
136 137 138 139

	mask_bits &= ~mask;
	mask_bits |= flag;
	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
140 141 142 143 144 145 146

	return mask_bits;
}

static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
	desc->masked = __msi_mask_irq(desc, mask, flag);
147 148 149 150 151 152 153 154 155
}

/*
 * This internal function does not flush PCI writes to the device.
 * All users must ensure that they read from the device before either
 * assuming that the device state is up to date, or returning out of this
 * file.  This saves a few milliseconds when initialising devices with lots
 * of MSI-X interrupts.
 */
156
static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
157 158 159
{
	u32 mask_bits = desc->masked;
	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
160
						PCI_MSIX_ENTRY_VECTOR_CTRL;
161 162 163
	mask_bits &= ~1;
	mask_bits |= flag;
	writel(mask_bits, desc->mask_base + offset);
164 165 166 167 168 169 170

	return mask_bits;
}

static void msix_mask_irq(struct msi_desc *desc, u32 flag)
{
	desc->masked = __msix_mask_irq(desc, flag);
171
}
172

173 174 175
static void msi_set_mask_bit(unsigned irq, u32 flag)
{
	struct msi_desc *desc = get_irq_msi(irq);
176

177 178 179 180
	if (desc->msi_attrib.is_msix) {
		msix_mask_irq(desc, flag);
		readl(desc->mask_base);		/* Flush write to device */
	} else {
181 182
		unsigned offset = irq - desc->dev->irq;
		msi_mask_irq(desc, 1 << offset, flag << offset);
L
Linus Torvalds 已提交
183
	}
184 185 186 187 188 189 190 191 192 193
}

void mask_msi_irq(unsigned int irq)
{
	msi_set_mask_bit(irq, 1);
}

void unmask_msi_irq(unsigned int irq)
{
	msi_set_mask_bit(irq, 0);
L
Linus Torvalds 已提交
194 195
}

Y
Yinghai Lu 已提交
196
void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
L
Linus Torvalds 已提交
197
{
Y
Yinghai Lu 已提交
198
	struct msi_desc *entry = get_irq_desc_msi(desc);
199 200 201 202
	if (entry->msi_attrib.is_msix) {
		void __iomem *base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

203 204 205
		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
206
	} else {
207 208 209 210 211 212 213 214 215 216 217 218
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;
		u16 data;

		pci_read_config_dword(dev, msi_lower_address_reg(pos),
					&msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_read_config_dword(dev, msi_upper_address_reg(pos),
						&msg->address_hi);
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		} else {
			msg->address_hi = 0;
219
			pci_read_config_word(dev, msi_data_reg(pos, 0), &data);
220 221 222 223
		}
		msg->data = data;
	}
}
L
Linus Torvalds 已提交
224

Y
Yinghai Lu 已提交
225
void read_msi_msg(unsigned int irq, struct msi_msg *msg)
226
{
Y
Yinghai Lu 已提交
227 228 229 230 231 232 233 234
	struct irq_desc *desc = irq_to_desc(irq);

	read_msi_msg_desc(desc, msg);
}

void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
{
	struct msi_desc *entry = get_irq_desc_msi(desc);
235 236 237 238 239
	if (entry->msi_attrib.is_msix) {
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

240 241 242
		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
243
	} else {
244 245
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;
246 247 248 249 250 251
		u16 msgctl;

		pci_read_config_word(dev, msi_control_reg(pos), &msgctl);
		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
		msgctl |= entry->msi_attrib.multiple << 4;
		pci_write_config_word(dev, msi_control_reg(pos), msgctl);
252 253 254 255 256 257 258 259 260 261 262 263

		pci_write_config_dword(dev, msi_lower_address_reg(pos),
					msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_write_config_dword(dev, msi_upper_address_reg(pos),
						msg->address_hi);
			pci_write_config_word(dev, msi_data_reg(pos, 1),
						msg->data);
		} else {
			pci_write_config_word(dev, msi_data_reg(pos, 0),
						msg->data);
		}
L
Linus Torvalds 已提交
264
	}
265
	entry->msg = *msg;
L
Linus Torvalds 已提交
266
}
267

Y
Yinghai Lu 已提交
268 269 270 271 272 273 274
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
{
	struct irq_desc *desc = irq_to_desc(irq);

	write_msi_msg_desc(desc, msg);
}

275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
static void free_msi_irqs(struct pci_dev *dev)
{
	struct msi_desc *entry, *tmp;

	list_for_each_entry(entry, &dev->msi_list, list) {
		int i, nvec;
		if (!entry->irq)
			continue;
		nvec = 1 << entry->msi_attrib.multiple;
		for (i = 0; i < nvec; i++)
			BUG_ON(irq_has_action(entry->irq + i));
	}

	arch_teardown_msi_irqs(dev);

	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
		if (entry->msi_attrib.is_msix) {
			if (list_is_last(&entry->list, &dev->msi_list))
				iounmap(entry->mask_base);
		}
		list_del(&entry->list);
		kfree(entry);
	}
}
S
Satoru Takeuchi 已提交
299

300
static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
L
Linus Torvalds 已提交
301
{
302 303
	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
	if (!desc)
L
Linus Torvalds 已提交
304 305
		return NULL;

306 307
	INIT_LIST_HEAD(&desc->list);
	desc->dev = dev;
L
Linus Torvalds 已提交
308

309
	return desc;
L
Linus Torvalds 已提交
310 311
}

312 313 314 315 316 317
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
{
	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
		pci_intx(dev, enable);
}

318
static void __pci_restore_msi_state(struct pci_dev *dev)
319
{
320
	int pos;
321
	u16 control;
322
	struct msi_desc *entry;
323

324 325 326
	if (!dev->msi_enabled)
		return;

327 328
	entry = get_irq_msi(dev->irq);
	pos = entry->msi_attrib.pos;
329

330
	pci_intx_for_msi(dev, 0);
331
	msi_set_enable(dev, pos, 0);
332 333 334
	write_msi_msg(dev->irq, &entry->msg);

	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
335
	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
336
	control &= ~PCI_MSI_FLAGS_QSIZE;
337
	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
338
	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
339 340 341
}

static void __pci_restore_msix_state(struct pci_dev *dev)
342 343 344
{
	int pos;
	struct msi_desc *entry;
345
	u16 control;
346

E
Eric W. Biederman 已提交
347 348
	if (!dev->msix_enabled)
		return;
349
	BUG_ON(list_empty(&dev->msi_list));
H
Hidetoshi Seto 已提交
350
	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
351 352
	pos = entry->msi_attrib.pos;
	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
E
Eric W. Biederman 已提交
353

354
	/* route the table */
355
	pci_intx_for_msi(dev, 0);
356 357
	control |= PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL;
	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
358

359 360
	list_for_each_entry(entry, &dev->msi_list, list) {
		write_msi_msg(entry->irq, &entry->msg);
361
		msix_mask_irq(entry, entry->masked);
362 363
	}

364 365
	control &= ~PCI_MSIX_FLAGS_MASKALL;
	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
366
}
367 368 369 370 371 372

void pci_restore_msi_state(struct pci_dev *dev)
{
	__pci_restore_msi_state(dev);
	__pci_restore_msix_state(dev);
}
373
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
374

L
Linus Torvalds 已提交
375 376 377
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
378
 * @nvec: number of interrupts to allocate
L
Linus Torvalds 已提交
379
 *
380 381 382 383 384 385 386
 * Setup the MSI capability structure of the device with the requested
 * number of interrupts.  A return value of zero indicates the successful
 * setup of an entry with the new MSI irq.  A negative return value indicates
 * an error, and a positive return value indicates the number of interrupts
 * which could have been allocated.
 */
static int msi_capability_init(struct pci_dev *dev, int nvec)
L
Linus Torvalds 已提交
387 388
{
	struct msi_desc *entry;
389
	int pos, ret;
L
Linus Torvalds 已提交
390
	u16 control;
391
	unsigned mask;
L
Linus Torvalds 已提交
392 393

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
394 395
	msi_set_enable(dev, pos, 0);	/* Disable MSI during set up */

L
Linus Torvalds 已提交
396 397
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	/* MSI Entry Initialization */
398
	entry = alloc_msi_entry(dev);
399 400
	if (!entry)
		return -ENOMEM;
401

402
	entry->msi_attrib.is_msix = 0;
403
	entry->msi_attrib.is_64 = is_64bit_address(control);
L
Linus Torvalds 已提交
404 405
	entry->msi_attrib.entry_nr = 0;
	entry->msi_attrib.maskbit = is_mask_bit_support(control);
406
	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
407
	entry->msi_attrib.pos = pos;
408

409
	entry->mask_pos = msi_mask_reg(pos, entry->msi_attrib.is_64);
410 411 412 413 414 415
	/* All MSIs are unmasked by default, Mask them all */
	if (entry->msi_attrib.maskbit)
		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
	mask = msi_capable_mask(control);
	msi_mask_irq(entry, mask, mask);

416
	list_add_tail(&entry->list, &dev->msi_list);
417

L
Linus Torvalds 已提交
418
	/* Configure MSI capability structure */
419
	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
420
	if (ret) {
421
		msi_mask_irq(entry, mask, ~mask);
422
		free_msi_irqs(dev);
423
		return ret;
424
	}
425

L
Linus Torvalds 已提交
426
	/* Set MSI enabled bits	 */
427
	pci_intx_for_msi(dev, 0);
428
	msi_set_enable(dev, pos, 1);
429
	dev->msi_enabled = 1;
L
Linus Torvalds 已提交
430

431
	dev->irq = entry->irq;
L
Linus Torvalds 已提交
432 433 434
	return 0;
}

435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
static void __iomem *msix_map_region(struct pci_dev *dev, unsigned pos,
							unsigned nr_entries)
{
	unsigned long phys_addr;
	u32 table_offset;
	u8 bir;

	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
	phys_addr = pci_resource_start(dev, bir) + table_offset;

	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
}

450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
static int msix_setup_entries(struct pci_dev *dev, unsigned pos,
				void __iomem *base, struct msix_entry *entries,
				int nvec)
{
	struct msi_desc *entry;
	int i;

	for (i = 0; i < nvec; i++) {
		entry = alloc_msi_entry(dev);
		if (!entry) {
			if (!i)
				iounmap(base);
			else
				free_msi_irqs(dev);
			/* No enough memory. Don't try again */
			return -ENOMEM;
		}

		entry->msi_attrib.is_msix	= 1;
		entry->msi_attrib.is_64		= 1;
		entry->msi_attrib.entry_nr	= entries[i].entry;
		entry->msi_attrib.default_irq	= dev->irq;
		entry->msi_attrib.pos		= pos;
		entry->mask_base		= base;

		list_add_tail(&entry->list, &dev->msi_list);
	}

	return 0;
}

481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
static void msix_program_entries(struct pci_dev *dev,
					struct msix_entry *entries)
{
	struct msi_desc *entry;
	int i = 0;

	list_for_each_entry(entry, &dev->msi_list, list) {
		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
						PCI_MSIX_ENTRY_VECTOR_CTRL;

		entries[i].vector = entry->irq;
		set_irq_msi(entry->irq, entry);
		entry->masked = readl(entry->mask_base + offset);
		msix_mask_irq(entry, 1);
		i++;
	}
}

L
Linus Torvalds 已提交
499 500 501
/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
502 503
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
504
 *
505
 * Setup the MSI-X capability structure of device function with a
506 507
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
508 509 510 511
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
512
	int pos, ret;
513
	u16 control;
L
Linus Torvalds 已提交
514 515 516
	void __iomem *base;

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
517 518 519 520 521 522
	pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);

	/* Ensure MSI-X is disabled while it is set up */
	control &= ~PCI_MSIX_FLAGS_ENABLE;
	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);

L
Linus Torvalds 已提交
523
	/* Request & Map MSI-X table region */
524 525
	base = msix_map_region(dev, pos, multi_msix_capable(control));
	if (!base)
L
Linus Torvalds 已提交
526 527
		return -ENOMEM;

528 529 530
	ret = msix_setup_entries(dev, pos, base, entries, nvec);
	if (ret)
		return ret;
531 532

	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
533 534
	if (ret)
		goto error;
535

536 537 538 539 540 541 542 543
	/*
	 * Some devices require MSI-X to be enabled before we can touch the
	 * MSI-X registers.  We need to mask all the vectors to prevent
	 * interrupts coming in before they're fully set up.
	 */
	control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);

544
	msix_program_entries(dev, entries);
545 546

	/* Set MSI-X enabled bits and unmask the function */
547
	pci_intx_for_msi(dev, 0);
548
	dev->msix_enabled = 1;
L
Linus Torvalds 已提交
549

550 551
	control &= ~PCI_MSIX_FLAGS_MASKALL;
	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
552

L
Linus Torvalds 已提交
553
	return 0;
554 555 556 557 558 559 560

error:
	if (ret < 0) {
		/*
		 * If we had some success, report the number of irqs
		 * we succeeded in setting up.
		 */
561
		struct msi_desc *entry;
562 563 564 565 566 567 568 569 570 571 572 573 574
		int avail = 0;

		list_for_each_entry(entry, &dev->msi_list, list) {
			if (entry->irq != 0)
				avail++;
		}
		if (avail != 0)
			ret = avail;
	}

	free_msi_irqs(dev);

	return ret;
L
Linus Torvalds 已提交
575 576
}

577
/**
578
 * pci_msi_check_device - check whether MSI may be enabled on a device
579
 * @dev: pointer to the pci_dev data structure of MSI device function
580
 * @nvec: how many MSIs have been requested ?
581
 * @type: are we checking for MSI or MSI-X ?
582
 *
583
 * Look at global flags, the device itself, and its parent busses
584 585
 * to determine if MSI/-X are supported for the device. If MSI/-X is
 * supported return 0, else return an error code.
586
 **/
587
static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
588 589
{
	struct pci_bus *bus;
590
	int ret;
591

592
	/* MSI must be globally enabled and supported by the device */
593 594 595
	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

596 597 598 599 600 601 602 603
	/*
	 * You can't ask to have 0 or less MSIs configured.
	 *  a) it's stupid ..
	 *  b) the list manipulation code assumes nvec >= 1.
	 */
	if (nvec < 1)
		return -ERANGE;

604 605 606 607 608 609
	/* Any bridge which does NOT route MSI transactions from it's
	 * secondary bus to it's primary bus must set NO_MSI flag on
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
610 611 612 613
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

614 615 616 617
	ret = arch_msi_check_device(dev, nvec, type);
	if (ret)
		return ret;

618 619 620
	if (!pci_find_capability(dev, type))
		return -EINVAL;

621 622 623
	return 0;
}

L
Linus Torvalds 已提交
624
/**
625 626 627
 * pci_enable_msi_block - configure device's MSI capability structure
 * @dev: device to configure
 * @nvec: number of interrupts to configure
L
Linus Torvalds 已提交
628
 *
629 630 631 632 633 634 635 636 637
 * Allocate IRQs for a device with the MSI capability.
 * This function returns a negative errno if an error occurs.  If it
 * is unable to allocate the number of interrupts requested, it returns
 * the number of interrupts it might be able to allocate.  If it successfully
 * allocates at least the number of interrupts requested, it returns 0 and
 * updates the @dev's irq member to the lowest new interrupt number; the
 * other interrupt numbers allocated to this device are consecutive.
 */
int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
L
Linus Torvalds 已提交
638
{
639 640 641 642 643 644 645 646 647 648
	int status, pos, maxvec;
	u16 msgctl;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
		return -EINVAL;
	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
	maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
	if (nvec > maxvec)
		return maxvec;
L
Linus Torvalds 已提交
649

650
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
651 652
	if (status)
		return status;
L
Linus Torvalds 已提交
653

E
Eric W. Biederman 已提交
654
	WARN_ON(!!dev->msi_enabled);
L
Linus Torvalds 已提交
655

656
	/* Check whether driver already requested MSI-X irqs */
657
	if (dev->msix_enabled) {
658 659
		dev_info(&dev->dev, "can't enable MSI "
			 "(MSI-X already enabled)\n");
660
		return -EINVAL;
L
Linus Torvalds 已提交
661
	}
662 663

	status = msi_capability_init(dev, nvec);
L
Linus Torvalds 已提交
664 665
	return status;
}
666
EXPORT_SYMBOL(pci_enable_msi_block);
L
Linus Torvalds 已提交
667

668
void pci_msi_shutdown(struct pci_dev *dev)
L
Linus Torvalds 已提交
669
{
670 671 672
	struct msi_desc *desc;
	u32 mask;
	u16 ctrl;
673
	unsigned pos;
L
Linus Torvalds 已提交
674

675
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
E
Eric W. Biederman 已提交
676 677
		return;

678 679 680 681 682
	BUG_ON(list_empty(&dev->msi_list));
	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);
	pos = desc->msi_attrib.pos;

	msi_set_enable(dev, pos, 0);
683
	pci_intx_for_msi(dev, 1);
684
	dev->msi_enabled = 0;
685

686
	/* Return the device with MSI unmasked as initial states */
687
	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &ctrl);
688
	mask = msi_capable_mask(ctrl);
689 690
	/* Keep cached state to be restored */
	__msi_mask_irq(desc, mask, ~mask);
691 692

	/* Restore dev->irq to its default pin-assertion irq */
693
	dev->irq = desc->msi_attrib.default_irq;
694
}
695

696 697 698 699 700 701
void pci_disable_msi(struct pci_dev* dev)
{
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

	pci_msi_shutdown(dev);
702
	free_msi_irqs(dev);
L
Linus Torvalds 已提交
703
}
704
EXPORT_SYMBOL(pci_disable_msi);
L
Linus Torvalds 已提交
705

706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
/**
 * pci_msix_table_size - return the number of device's MSI-X table entries
 * @dev: pointer to the pci_dev data structure of MSI-X device function
 */
int pci_msix_table_size(struct pci_dev *dev)
{
	int pos;
	u16 control;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
		return 0;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	return multi_msix_capable(control);
}

L
Linus Torvalds 已提交
723 724 725
/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
726
 * @entries: pointer to an array of MSI-X entries
727
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
728 729
 *
 * Setup the MSI-X capability structure of device function with the number
730
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
731 732
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
733
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
734
 * Or a return of > 0 indicates that driver request is exceeding the number
735 736
 * of irqs or MSI-X vectors available. Driver should use the returned value to
 * re-send its request.
L
Linus Torvalds 已提交
737 738 739
 **/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
740
	int status, nr_entries;
E
Eric W. Biederman 已提交
741
	int i, j;
L
Linus Torvalds 已提交
742

743
	if (!entries)
L
Linus Torvalds 已提交
744 745
 		return -EINVAL;

746 747 748 749
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
	if (status)
		return status;

750
	nr_entries = pci_msix_table_size(dev);
L
Linus Torvalds 已提交
751
	if (nvec > nr_entries)
752
		return nr_entries;
L
Linus Torvalds 已提交
753 754 755 756 757 758 759 760 761 762

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
E
Eric W. Biederman 已提交
763
	WARN_ON(!!dev->msix_enabled);
764

765
	/* Check whether driver already requested for MSI irq */
766
   	if (dev->msi_enabled) {
767 768
		dev_info(&dev->dev, "can't enable MSI-X "
		       "(MSI IRQ already assigned)\n");
L
Linus Torvalds 已提交
769 770 771 772 773
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}
774
EXPORT_SYMBOL(pci_enable_msix);
L
Linus Torvalds 已提交
775

776
void pci_msix_shutdown(struct pci_dev* dev)
777
{
778 779
	struct msi_desc *entry;

780
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
E
Eric W. Biederman 已提交
781 782
		return;

783 784 785 786 787 788
	/* Return the device with MSI-X masked as initial states */
	list_for_each_entry(entry, &dev->msi_list, list) {
		/* Keep cached states to be restored */
		__msix_mask_irq(entry, 1);
	}

789
	msix_set_enable(dev, 0);
790
	pci_intx_for_msi(dev, 1);
791
	dev->msix_enabled = 0;
792
}
793

794 795 796 797 798 799
void pci_disable_msix(struct pci_dev* dev)
{
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
		return;

	pci_msix_shutdown(dev);
800
	free_msi_irqs(dev);
L
Linus Torvalds 已提交
801
}
802
EXPORT_SYMBOL(pci_disable_msix);
L
Linus Torvalds 已提交
803 804

/**
805
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
806 807
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
808
 * Being called during hotplug remove, from which the device function
809
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
810 811 812 813 814 815 816 817
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
void msi_remove_pci_irq_vectors(struct pci_dev* dev)
{
	if (!pci_msi_enable || !dev)
 		return;

818 819
	if (dev->msi_enabled || dev->msix_enabled)
		free_msi_irqs(dev);
L
Linus Torvalds 已提交
820 821
}

822 823 824 825
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}
826

827 828 829 830 831 832 833
/**
 * pci_msi_enabled - is MSI enabled?
 *
 * Returns true if MSI has not been disabled by the command-line option
 * pci=nomsi.
 **/
int pci_msi_enabled(void)
834
{
835
	return pci_msi_enable;
836
}
837
EXPORT_SYMBOL(pci_msi_enabled);
838

839
void pci_msi_init_pci_dev(struct pci_dev *dev)
840
{
841
	INIT_LIST_HEAD(&dev->msi_list);
842
}