msi.c 27.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
13
#include <linux/export.h>
L
Linus Torvalds 已提交
14 15 16
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
17
#include <linux/msi.h>
D
Dan Williams 已提交
18
#include <linux/smp.h>
H
Hidetoshi Seto 已提交
19 20
#include <linux/errno.h>
#include <linux/io.h>
21
#include <linux/slab.h>
L
Linus Torvalds 已提交
22 23 24 25

#include "pci.h"

static int pci_msi_enable = 1;
26
int pci_msi_ignore_mask;
L
Linus Torvalds 已提交
27

28 29 30
#define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)


31 32
/* Arch hooks */

33 34
int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
35
	struct msi_controller *chip = dev->bus->msi;
36 37 38 39 40 41 42 43 44 45 46 47
	int err;

	if (!chip || !chip->setup_irq)
		return -EINVAL;

	err = chip->setup_irq(chip, dev, desc);
	if (err < 0)
		return err;

	irq_set_chip_data(desc->irq, chip);

	return 0;
48 49 50
}

void __weak arch_teardown_msi_irq(unsigned int irq)
51
{
52
	struct msi_controller *chip = irq_get_chip_data(irq);
53 54 55 56 57

	if (!chip || !chip->teardown_irq)
		return;

	chip->teardown_irq(chip, irq);
58 59
}

60
int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
61 62 63 64
{
	struct msi_desc *entry;
	int ret;

65 66 67 68 69 70 71
	/*
	 * If an architecture wants to support multiple MSI, it needs to
	 * override arch_setup_msi_irqs()
	 */
	if (type == PCI_CAP_ID_MSI && nvec > 1)
		return 1;

72 73
	list_for_each_entry(entry, &dev->msi_list, list) {
		ret = arch_setup_msi_irq(dev, entry);
74
		if (ret < 0)
75
			return ret;
76 77
		if (ret > 0)
			return -ENOSPC;
78 79 80 81
	}

	return 0;
}
82

83 84 85 86
/*
 * We have a default implementation available as a separate non-weak
 * function, as it is used by the Xen x86 PCI code
 */
87
void default_teardown_msi_irqs(struct pci_dev *dev)
88 89 90 91
{
	struct msi_desc *entry;

	list_for_each_entry(entry, &dev->msi_list, list) {
92 93 94
		int i, nvec;
		if (entry->irq == 0)
			continue;
95 96 97 98
		if (entry->nvec_used)
			nvec = entry->nvec_used;
		else
			nvec = 1 << entry->msi_attrib.multiple;
99 100
		for (i = 0; i < nvec; i++)
			arch_teardown_msi_irq(entry->irq + i);
101 102 103
	}
}

104 105 106 107
void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
{
	return default_teardown_msi_irqs(dev);
}
108

109
static void default_restore_msi_irq(struct pci_dev *dev, int irq)
110 111 112 113 114 115 116 117 118 119 120 121 122 123
{
	struct msi_desc *entry;

	entry = NULL;
	if (dev->msix_enabled) {
		list_for_each_entry(entry, &dev->msi_list, list) {
			if (irq == entry->irq)
				break;
		}
	} else if (dev->msi_enabled)  {
		entry = irq_get_msi_desc(irq);
	}

	if (entry)
124
		__write_msi_msg(entry, &entry->msg);
125
}
126

127
void __weak arch_restore_msi_irqs(struct pci_dev *dev)
128
{
129
	return default_restore_msi_irqs(dev);
130
}
131

132
static void msi_set_enable(struct pci_dev *dev, int enable)
133 134 135
{
	u16 control;

136
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
137 138 139
	control &= ~PCI_MSI_FLAGS_ENABLE;
	if (enable)
		control |= PCI_MSI_FLAGS_ENABLE;
140
	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
141 142
}

143
static void msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set)
144
{
145
	u16 ctrl;
146

147 148 149 150
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl);
	ctrl &= ~clear;
	ctrl |= set;
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl);
151 152
}

153 154
static inline __attribute_const__ u32 msi_mask(unsigned x)
{
155 156 157 158
	/* Don't shift by >= width of type */
	if (x >= 5)
		return 0xffffffff;
	return (1 << (1 << x)) - 1;
159 160
}

161 162 163 164 165 166
/*
 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
 * mask all MSI interrupts by clearing the MSI enable bit does not work
 * reliably as devices without an INTx disable bit will then generate a
 * level IRQ which will never be cleared.
 */
167
u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
L
Linus Torvalds 已提交
168
{
169
	u32 mask_bits = desc->masked;
L
Linus Torvalds 已提交
170

171
	if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit)
172
		return 0;
173 174 175 176

	mask_bits &= ~mask;
	mask_bits |= flag;
	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
177 178 179 180 181 182

	return mask_bits;
}

static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
183
	desc->masked = __msi_mask_irq(desc, mask, flag);
184 185 186 187 188 189 190 191 192
}

/*
 * This internal function does not flush PCI writes to the device.
 * All users must ensure that they read from the device before either
 * assuming that the device state is up to date, or returning out of this
 * file.  This saves a few milliseconds when initialising devices with lots
 * of MSI-X interrupts.
 */
193
u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
194 195 196
{
	u32 mask_bits = desc->masked;
	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
197
						PCI_MSIX_ENTRY_VECTOR_CTRL;
198 199 200 201

	if (pci_msi_ignore_mask)
		return 0;

202 203 204
	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
	if (flag)
		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
205
	writel(mask_bits, desc->mask_base + offset);
206 207 208 209 210 211

	return mask_bits;
}

static void msix_mask_irq(struct msi_desc *desc, u32 flag)
{
212
	desc->masked = __msix_mask_irq(desc, flag);
213
}
214

215
static void msi_set_mask_bit(struct irq_data *data, u32 flag)
216
{
217
	struct msi_desc *desc = irq_data_get_msi(data);
218

219 220 221 222
	if (desc->msi_attrib.is_msix) {
		msix_mask_irq(desc, flag);
		readl(desc->mask_base);		/* Flush write to device */
	} else {
223
		unsigned offset = data->irq - desc->irq;
224
		msi_mask_irq(desc, 1 << offset, flag << offset);
L
Linus Torvalds 已提交
225
	}
226 227
}

228
void mask_msi_irq(struct irq_data *data)
229
{
230
	msi_set_mask_bit(data, 1);
231 232
}

233
void unmask_msi_irq(struct irq_data *data)
234
{
235
	msi_set_mask_bit(data, 0);
L
Linus Torvalds 已提交
236 237
}

238 239 240 241 242 243 244 245 246
void default_restore_msi_irqs(struct pci_dev *dev)
{
	struct msi_desc *entry;

	list_for_each_entry(entry, &dev->msi_list, list) {
		default_restore_msi_irq(dev, entry->irq);
	}
}

247
void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
L
Linus Torvalds 已提交
248
{
249 250 251 252 253 254 255 256 257 258 259
	BUG_ON(entry->dev->current_state != PCI_D0);

	if (entry->msi_attrib.is_msix) {
		void __iomem *base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
	} else {
		struct pci_dev *dev = entry->dev;
260
		int pos = dev->msi_cap;
261 262
		u16 data;

263 264
		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				      &msg->address_lo);
265
		if (entry->msi_attrib.is_64) {
266 267
			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					      &msg->address_hi);
B
Bjorn Helgaas 已提交
268
			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
269 270
		} else {
			msg->address_hi = 0;
B
Bjorn Helgaas 已提交
271
			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
272 273 274 275 276 277 278
		}
		msg->data = data;
	}
}

void read_msi_msg(unsigned int irq, struct msi_msg *msg)
{
279
	struct msi_desc *entry = irq_get_msi_desc(irq);
280

281
	__read_msi_msg(entry, msg);
282 283
}

284
void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
285 286
{
	/* Assert that the cache is valid, assuming that
287 288 289
	 * valid messages are not all-zeroes. */
	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
		 entry->msg.data));
290

291
	*msg = entry->msg;
292
}
L
Linus Torvalds 已提交
293

294
void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
295
{
296
	struct msi_desc *entry = irq_get_msi_desc(irq);
Y
Yinghai Lu 已提交
297

298
	__get_cached_msi_msg(entry, msg);
Y
Yinghai Lu 已提交
299
}
300
EXPORT_SYMBOL_GPL(get_cached_msi_msg);
Y
Yinghai Lu 已提交
301

302
void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
Y
Yinghai Lu 已提交
303
{
304 305 306
	if (entry->dev->current_state != PCI_D0) {
		/* Don't touch the hardware now */
	} else if (entry->msi_attrib.is_msix) {
307 308 309 310
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

311 312 313
		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
314
	} else {
315
		struct pci_dev *dev = entry->dev;
316
		int pos = dev->msi_cap;
317 318
		u16 msgctl;

319
		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
320 321
		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
		msgctl |= entry->msi_attrib.multiple << 4;
322
		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
323

324 325
		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				       msg->address_lo);
326
		if (entry->msi_attrib.is_64) {
327 328
			pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					       msg->address_hi);
B
Bjorn Helgaas 已提交
329 330
			pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
					      msg->data);
331
		} else {
B
Bjorn Helgaas 已提交
332 333
			pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
					      msg->data);
334
		}
L
Linus Torvalds 已提交
335
	}
336
	entry->msg = *msg;
L
Linus Torvalds 已提交
337
}
338

Y
Yinghai Lu 已提交
339 340
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
{
341
	struct msi_desc *entry = irq_get_msi_desc(irq);
Y
Yinghai Lu 已提交
342

343
	__write_msi_msg(entry, msg);
Y
Yinghai Lu 已提交
344
}
345
EXPORT_SYMBOL_GPL(write_msi_msg);
Y
Yinghai Lu 已提交
346

347 348 349
static void free_msi_irqs(struct pci_dev *dev)
{
	struct msi_desc *entry, *tmp;
350 351 352
	struct attribute **msi_attrs;
	struct device_attribute *dev_attr;
	int count = 0;
353 354 355 356 357

	list_for_each_entry(entry, &dev->msi_list, list) {
		int i, nvec;
		if (!entry->irq)
			continue;
358 359 360 361
		if (entry->nvec_used)
			nvec = entry->nvec_used;
		else
			nvec = 1 << entry->msi_attrib.multiple;
362 363 364 365 366 367 368 369 370 371 372
		for (i = 0; i < nvec; i++)
			BUG_ON(irq_has_action(entry->irq + i));
	}

	arch_teardown_msi_irqs(dev);

	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
		if (entry->msi_attrib.is_msix) {
			if (list_is_last(&entry->list, &dev->msi_list))
				iounmap(entry->mask_base);
		}
373

374 375 376
		list_del(&entry->list);
		kfree(entry);
	}
377 378 379 380

	if (dev->msi_irq_groups) {
		sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups);
		msi_attrs = dev->msi_irq_groups[0]->attrs;
381
		while (msi_attrs[count]) {
382 383 384 385 386 387 388 389 390 391 392
			dev_attr = container_of(msi_attrs[count],
						struct device_attribute, attr);
			kfree(dev_attr->attr.name);
			kfree(dev_attr);
			++count;
		}
		kfree(msi_attrs);
		kfree(dev->msi_irq_groups[0]);
		kfree(dev->msi_irq_groups);
		dev->msi_irq_groups = NULL;
	}
393
}
S
Satoru Takeuchi 已提交
394

395
static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
L
Linus Torvalds 已提交
396
{
397 398
	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
	if (!desc)
L
Linus Torvalds 已提交
399 400
		return NULL;

401 402
	INIT_LIST_HEAD(&desc->list);
	desc->dev = dev;
L
Linus Torvalds 已提交
403

404
	return desc;
L
Linus Torvalds 已提交
405 406
}

407 408 409 410 411 412
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
{
	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
		pci_intx(dev, enable);
}

413
static void __pci_restore_msi_state(struct pci_dev *dev)
414 415
{
	u16 control;
416
	struct msi_desc *entry;
417

418 419 420
	if (!dev->msi_enabled)
		return;

421
	entry = irq_get_msi_desc(dev->irq);
422

423
	pci_intx_for_msi(dev, 0);
424
	msi_set_enable(dev, 0);
425
	arch_restore_msi_irqs(dev);
426

427
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
428 429
	msi_mask_irq(entry, msi_mask(entry->msi_attrib.multi_cap),
		     entry->masked);
430
	control &= ~PCI_MSI_FLAGS_QSIZE;
431
	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
432
	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
433 434 435
}

static void __pci_restore_msix_state(struct pci_dev *dev)
436 437 438
{
	struct msi_desc *entry;

E
Eric W. Biederman 已提交
439 440
	if (!dev->msix_enabled)
		return;
441
	BUG_ON(list_empty(&dev->msi_list));
E
Eric W. Biederman 已提交
442

443
	/* route the table */
444
	pci_intx_for_msi(dev, 0);
445 446
	msix_clear_and_set_ctrl(dev, 0,
				PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL);
447

448
	arch_restore_msi_irqs(dev);
449
	list_for_each_entry(entry, &dev->msi_list, list) {
450
		msix_mask_irq(entry, entry->masked);
451 452
	}

453
	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
454
}
455 456 457 458 459 460

void pci_restore_msi_state(struct pci_dev *dev)
{
	__pci_restore_msi_state(dev);
	__pci_restore_msix_state(dev);
}
461
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
462

463
static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
464 465
			     char *buf)
{
466 467 468
	struct msi_desc *entry;
	unsigned long irq;
	int retval;
469

470 471 472
	retval = kstrtoul(attr->attr.name, 10, &irq);
	if (retval)
		return retval;
473

474 475 476 477 478
	entry = irq_get_msi_desc(irq);
	if (entry)
		return sprintf(buf, "%s\n",
				entry->msi_attrib.is_msix ? "msix" : "msi");

479
	return -ENODEV;
480 481 482 483
}

static int populate_msi_sysfs(struct pci_dev *pdev)
{
484 485 486 487 488
	struct attribute **msi_attrs;
	struct attribute *msi_attr;
	struct device_attribute *msi_dev_attr;
	struct attribute_group *msi_irq_group;
	const struct attribute_group **msi_irq_groups;
489
	struct msi_desc *entry;
490 491
	int ret = -ENOMEM;
	int num_msi = 0;
492 493
	int count = 0;

494 495 496 497 498 499
	/* Determine how many msi entries we have */
	list_for_each_entry(entry, &pdev->msi_list, list) {
		++num_msi;
	}
	if (!num_msi)
		return 0;
500

501 502 503 504
	/* Dynamically create the MSI attributes for the PCI device */
	msi_attrs = kzalloc(sizeof(void *) * (num_msi + 1), GFP_KERNEL);
	if (!msi_attrs)
		return -ENOMEM;
505
	list_for_each_entry(entry, &pdev->msi_list, list) {
506
		msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
507
		if (!msi_dev_attr)
508
			goto error_attrs;
509
		msi_attrs[count] = &msi_dev_attr->attr;
510

511
		sysfs_attr_init(&msi_dev_attr->attr);
512 513 514 515
		msi_dev_attr->attr.name = kasprintf(GFP_KERNEL, "%d",
						    entry->irq);
		if (!msi_dev_attr->attr.name)
			goto error_attrs;
516 517 518
		msi_dev_attr->attr.mode = S_IRUGO;
		msi_dev_attr->show = msi_mode_show;
		++count;
519 520
	}

521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
	msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
	if (!msi_irq_group)
		goto error_attrs;
	msi_irq_group->name = "msi_irqs";
	msi_irq_group->attrs = msi_attrs;

	msi_irq_groups = kzalloc(sizeof(void *) * 2, GFP_KERNEL);
	if (!msi_irq_groups)
		goto error_irq_group;
	msi_irq_groups[0] = msi_irq_group;

	ret = sysfs_create_groups(&pdev->dev.kobj, msi_irq_groups);
	if (ret)
		goto error_irq_groups;
	pdev->msi_irq_groups = msi_irq_groups;

537 538
	return 0;

539 540 541 542 543 544 545 546 547 548 549 550 551
error_irq_groups:
	kfree(msi_irq_groups);
error_irq_group:
	kfree(msi_irq_group);
error_attrs:
	count = 0;
	msi_attr = msi_attrs[count];
	while (msi_attr) {
		msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
		kfree(msi_attr->name);
		kfree(msi_dev_attr);
		++count;
		msi_attr = msi_attrs[count];
552
	}
553
	kfree(msi_attrs);
554 555 556
	return ret;
}

557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587
static struct msi_desc *msi_setup_entry(struct pci_dev *dev)
{
	u16 control;
	struct msi_desc *entry;

	/* MSI Entry Initialization */
	entry = alloc_msi_entry(dev);
	if (!entry)
		return NULL;

	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);

	entry->msi_attrib.is_msix	= 0;
	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
	entry->msi_attrib.entry_nr	= 0;
	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;

	if (control & PCI_MSI_FLAGS_64BIT)
		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
	else
		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;

	/* Save the initial mask status */
	if (entry->msi_attrib.maskbit)
		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);

	return entry;
}

L
Linus Torvalds 已提交
588 589 590
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
591
 * @nvec: number of interrupts to allocate
L
Linus Torvalds 已提交
592
 *
593 594 595 596 597 598 599
 * Setup the MSI capability structure of the device with the requested
 * number of interrupts.  A return value of zero indicates the successful
 * setup of an entry with the new MSI irq.  A negative return value indicates
 * an error, and a positive return value indicates the number of interrupts
 * which could have been allocated.
 */
static int msi_capability_init(struct pci_dev *dev, int nvec)
L
Linus Torvalds 已提交
600 601
{
	struct msi_desc *entry;
602
	int ret;
603
	unsigned mask;
L
Linus Torvalds 已提交
604

605
	msi_set_enable(dev, 0);	/* Disable MSI during set up */
606

607
	entry = msi_setup_entry(dev);
608 609
	if (!entry)
		return -ENOMEM;
610

611
	/* All MSIs are unmasked by default, Mask them all */
612
	mask = msi_mask(entry->msi_attrib.multi_cap);
613 614
	msi_mask_irq(entry, mask, mask);

615
	list_add_tail(&entry->list, &dev->msi_list);
616

L
Linus Torvalds 已提交
617
	/* Configure MSI capability structure */
618
	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
619
	if (ret) {
620
		msi_mask_irq(entry, mask, ~mask);
621
		free_msi_irqs(dev);
622
		return ret;
623
	}
624

625 626 627 628 629 630 631
	ret = populate_msi_sysfs(dev);
	if (ret) {
		msi_mask_irq(entry, mask, ~mask);
		free_msi_irqs(dev);
		return ret;
	}

L
Linus Torvalds 已提交
632
	/* Set MSI enabled bits	 */
633
	pci_intx_for_msi(dev, 0);
634
	msi_set_enable(dev, 1);
635
	dev->msi_enabled = 1;
L
Linus Torvalds 已提交
636

637
	dev->irq = entry->irq;
L
Linus Torvalds 已提交
638 639 640
	return 0;
}

641
static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
642
{
643
	resource_size_t phys_addr;
644 645 646
	u32 table_offset;
	u8 bir;

647 648
	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
			      &table_offset);
649 650
	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
	table_offset &= PCI_MSIX_TABLE_OFFSET;
651 652 653 654 655
	phys_addr = pci_resource_start(dev, bir) + table_offset;

	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
}

656 657
static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
			      struct msix_entry *entries, int nvec)
658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
{
	struct msi_desc *entry;
	int i;

	for (i = 0; i < nvec; i++) {
		entry = alloc_msi_entry(dev);
		if (!entry) {
			if (!i)
				iounmap(base);
			else
				free_msi_irqs(dev);
			/* No enough memory. Don't try again */
			return -ENOMEM;
		}

		entry->msi_attrib.is_msix	= 1;
		entry->msi_attrib.is_64		= 1;
		entry->msi_attrib.entry_nr	= entries[i].entry;
		entry->msi_attrib.default_irq	= dev->irq;
		entry->mask_base		= base;

		list_add_tail(&entry->list, &dev->msi_list);
	}

	return 0;
}

685
static void msix_program_entries(struct pci_dev *dev,
686
				 struct msix_entry *entries)
687 688 689 690 691 692 693 694 695
{
	struct msi_desc *entry;
	int i = 0;

	list_for_each_entry(entry, &dev->msi_list, list) {
		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
						PCI_MSIX_ENTRY_VECTOR_CTRL;

		entries[i].vector = entry->irq;
696
		irq_set_msi_desc(entry->irq, entry);
697 698 699 700 701 702
		entry->masked = readl(entry->mask_base + offset);
		msix_mask_irq(entry, 1);
		i++;
	}
}

L
Linus Torvalds 已提交
703 704 705
/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
706 707
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
708
 *
709
 * Setup the MSI-X capability structure of device function with a
710 711
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
712 713 714 715
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
716
	int ret;
717
	u16 control;
L
Linus Torvalds 已提交
718 719
	void __iomem *base;

720
	/* Ensure MSI-X is disabled while it is set up */
721
	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
722

723
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
L
Linus Torvalds 已提交
724
	/* Request & Map MSI-X table region */
725
	base = msix_map_region(dev, msix_table_size(control));
726
	if (!base)
L
Linus Torvalds 已提交
727 728
		return -ENOMEM;

729
	ret = msix_setup_entries(dev, base, entries, nvec);
730 731
	if (ret)
		return ret;
732 733

	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
734
	if (ret)
735
		goto out_avail;
736

737 738 739 740 741
	/*
	 * Some devices require MSI-X to be enabled before we can touch the
	 * MSI-X registers.  We need to mask all the vectors to prevent
	 * interrupts coming in before they're fully set up.
	 */
742 743
	msix_clear_and_set_ctrl(dev, 0,
				PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE);
744

745
	msix_program_entries(dev, entries);
746

747
	ret = populate_msi_sysfs(dev);
748 749
	if (ret)
		goto out_free;
750

751
	/* Set MSI-X enabled bits and unmask the function */
752
	pci_intx_for_msi(dev, 0);
753
	dev->msix_enabled = 1;
L
Linus Torvalds 已提交
754

755
	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0);
756

L
Linus Torvalds 已提交
757
	return 0;
758

759
out_avail:
760 761 762 763 764
	if (ret < 0) {
		/*
		 * If we had some success, report the number of irqs
		 * we succeeded in setting up.
		 */
765
		struct msi_desc *entry;
766 767 768 769 770 771 772 773 774 775
		int avail = 0;

		list_for_each_entry(entry, &dev->msi_list, list) {
			if (entry->irq != 0)
				avail++;
		}
		if (avail != 0)
			ret = avail;
	}

776
out_free:
777 778 779
	free_msi_irqs(dev);

	return ret;
L
Linus Torvalds 已提交
780 781
}

782
/**
783
 * pci_msi_supported - check whether MSI may be enabled on a device
784
 * @dev: pointer to the pci_dev data structure of MSI device function
785
 * @nvec: how many MSIs have been requested ?
786
 *
787
 * Look at global flags, the device itself, and its parent buses
788
 * to determine if MSI/-X are supported for the device. If MSI/-X is
789
 * supported return 1, else return 0.
790
 **/
791
static int pci_msi_supported(struct pci_dev *dev, int nvec)
792 793 794
{
	struct pci_bus *bus;

795
	/* MSI must be globally enabled and supported by the device */
796
	if (!pci_msi_enable)
797
		return 0;
798 799

	if (!dev || dev->no_msi || dev->current_state != PCI_D0)
800
		return 0;
801

802 803 804 805 806 807
	/*
	 * You can't ask to have 0 or less MSIs configured.
	 *  a) it's stupid ..
	 *  b) the list manipulation code assumes nvec >= 1.
	 */
	if (nvec < 1)
808
		return 0;
809

H
Hidetoshi Seto 已提交
810 811 812
	/*
	 * Any bridge which does NOT route MSI transactions from its
	 * secondary bus to its primary bus must set NO_MSI flag on
813 814 815 816
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
817 818
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
819
			return 0;
820

821
	return 1;
822 823
}

824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
/**
 * pci_msi_vec_count - Return the number of MSI vectors a device can send
 * @dev: device to report about
 *
 * This function returns the number of MSI vectors a device requested via
 * Multiple Message Capable register. It returns a negative errno if the
 * device is not capable sending MSI interrupts. Otherwise, the call succeeds
 * and returns a power of two, up to a maximum of 2^5 (32), according to the
 * MSI specification.
 **/
int pci_msi_vec_count(struct pci_dev *dev)
{
	int ret;
	u16 msgctl;

	if (!dev->msi_cap)
		return -EINVAL;

	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);

	return ret;
}
EXPORT_SYMBOL(pci_msi_vec_count);

849
void pci_msi_shutdown(struct pci_dev *dev)
L
Linus Torvalds 已提交
850
{
851 852
	struct msi_desc *desc;
	u32 mask;
L
Linus Torvalds 已提交
853

854
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
E
Eric W. Biederman 已提交
855 856
		return;

857 858 859
	BUG_ON(list_empty(&dev->msi_list));
	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);

860
	msi_set_enable(dev, 0);
861
	pci_intx_for_msi(dev, 1);
862
	dev->msi_enabled = 0;
863

864
	/* Return the device with MSI unmasked as initial states */
865
	mask = msi_mask(desc->msi_attrib.multi_cap);
866
	/* Keep cached state to be restored */
867
	__msi_mask_irq(desc, mask, ~mask);
868 869

	/* Restore dev->irq to its default pin-assertion irq */
870
	dev->irq = desc->msi_attrib.default_irq;
871
}
872

H
Hidetoshi Seto 已提交
873
void pci_disable_msi(struct pci_dev *dev)
874 875 876 877 878
{
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

	pci_msi_shutdown(dev);
879
	free_msi_irqs(dev);
L
Linus Torvalds 已提交
880
}
881
EXPORT_SYMBOL(pci_disable_msi);
L
Linus Torvalds 已提交
882

883
/**
884
 * pci_msix_vec_count - return the number of device's MSI-X table entries
885
 * @dev: pointer to the pci_dev data structure of MSI-X device function
886 887 888 889 890 891
 * This function returns the number of device's MSI-X table entries and
 * therefore the number of MSI-X vectors device is capable of sending.
 * It returns a negative errno if the device is not capable of sending MSI-X
 * interrupts.
 **/
int pci_msix_vec_count(struct pci_dev *dev)
892 893 894
{
	u16 control;

895
	if (!dev->msix_cap)
896
		return -EINVAL;
897

898
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
899
	return msix_table_size(control);
900
}
901
EXPORT_SYMBOL(pci_msix_vec_count);
902

L
Linus Torvalds 已提交
903 904 905
/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
906
 * @entries: pointer to an array of MSI-X entries
907
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
908 909
 *
 * Setup the MSI-X capability structure of device function with the number
910
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
911 912
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
913
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
914
 * Or a return of > 0 indicates that driver request is exceeding the number
915 916
 * of irqs or MSI-X vectors available. Driver should use the returned value to
 * re-send its request.
L
Linus Torvalds 已提交
917
 **/
H
Hidetoshi Seto 已提交
918
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
L
Linus Torvalds 已提交
919
{
920
	int nr_entries;
E
Eric W. Biederman 已提交
921
	int i, j;
L
Linus Torvalds 已提交
922

923 924
	if (!pci_msi_supported(dev, nvec))
		return -EINVAL;
925

926 927 928
	if (!entries)
		return -EINVAL;

929 930 931
	nr_entries = pci_msix_vec_count(dev);
	if (nr_entries < 0)
		return nr_entries;
L
Linus Torvalds 已提交
932
	if (nvec > nr_entries)
933
		return nr_entries;
L
Linus Torvalds 已提交
934 935 936 937 938 939 940 941 942 943

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
E
Eric W. Biederman 已提交
944
	WARN_ON(!!dev->msix_enabled);
945

946
	/* Check whether driver already requested for MSI irq */
H
Hidetoshi Seto 已提交
947
	if (dev->msi_enabled) {
948
		dev_info(&dev->dev, "can't enable MSI-X (MSI IRQ already assigned)\n");
L
Linus Torvalds 已提交
949 950
		return -EINVAL;
	}
951
	return msix_capability_init(dev, entries, nvec);
L
Linus Torvalds 已提交
952
}
953
EXPORT_SYMBOL(pci_enable_msix);
L
Linus Torvalds 已提交
954

H
Hidetoshi Seto 已提交
955
void pci_msix_shutdown(struct pci_dev *dev)
956
{
957 958
	struct msi_desc *entry;

959
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
E
Eric W. Biederman 已提交
960 961
		return;

962 963 964
	/* Return the device with MSI-X masked as initial states */
	list_for_each_entry(entry, &dev->msi_list, list) {
		/* Keep cached states to be restored */
965
		__msix_mask_irq(entry, 1);
966 967
	}

968
	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
969
	pci_intx_for_msi(dev, 1);
970
	dev->msix_enabled = 0;
971
}
972

H
Hidetoshi Seto 已提交
973
void pci_disable_msix(struct pci_dev *dev)
974 975 976 977 978
{
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
		return;

	pci_msix_shutdown(dev);
979
	free_msi_irqs(dev);
L
Linus Torvalds 已提交
980
}
981
EXPORT_SYMBOL(pci_disable_msix);
L
Linus Torvalds 已提交
982

983 984 985 986
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}
987

988 989 990 991 992 993 994
/**
 * pci_msi_enabled - is MSI enabled?
 *
 * Returns true if MSI has not been disabled by the command-line option
 * pci=nomsi.
 **/
int pci_msi_enabled(void)
995
{
996
	return pci_msi_enable;
997
}
998
EXPORT_SYMBOL(pci_msi_enabled);
999

1000
void pci_msi_init_pci_dev(struct pci_dev *dev)
1001
{
1002
	INIT_LIST_HEAD(&dev->msi_list);
1003 1004 1005 1006 1007

	/* Disable the msi hardware to avoid screaming interrupts
	 * during boot.  This is the power on reset default so
	 * usually this should be a noop.
	 */
1008 1009 1010 1011 1012 1013
	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (dev->msi_cap)
		msi_set_enable(dev, 0);

	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (dev->msix_cap)
1014
		msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
1015
}
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030

/**
 * pci_enable_msi_range - configure device's MSI capability structure
 * @dev: device to configure
 * @minvec: minimal number of interrupts to configure
 * @maxvec: maximum number of interrupts to configure
 *
 * This function tries to allocate a maximum possible number of interrupts in a
 * range between @minvec and @maxvec. It returns a negative errno if an error
 * occurs. If it succeeds, it returns the actual number of interrupts allocated
 * and updates the @dev's irq member to the lowest new interrupt number;
 * the other interrupt numbers allocated to this device are consecutive.
 **/
int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
{
1031
	int nvec;
1032 1033
	int rc;

1034 1035
	if (!pci_msi_supported(dev, minvec))
		return -EINVAL;
1036 1037 1038 1039 1040 1041 1042 1043 1044 1045

	WARN_ON(!!dev->msi_enabled);

	/* Check whether driver already requested MSI-X irqs */
	if (dev->msix_enabled) {
		dev_info(&dev->dev,
			 "can't enable MSI (MSI-X already enabled)\n");
		return -EINVAL;
	}

1046 1047 1048
	if (maxvec < minvec)
		return -ERANGE;

1049 1050 1051 1052 1053 1054 1055 1056
	nvec = pci_msi_vec_count(dev);
	if (nvec < 0)
		return nvec;
	else if (nvec < minvec)
		return -EINVAL;
	else if (nvec > maxvec)
		nvec = maxvec;

1057
	do {
1058
		rc = msi_capability_init(dev, nvec);
1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
		if (rc < 0) {
			return rc;
		} else if (rc > 0) {
			if (rc < minvec)
				return -ENOSPC;
			nvec = rc;
		}
	} while (rc);

	return nvec;
}
EXPORT_SYMBOL(pci_enable_msi_range);

/**
 * pci_enable_msix_range - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
 * @entries: pointer to an array of MSI-X entries
 * @minvec: minimum number of MSI-X irqs requested
 * @maxvec: maximum number of MSI-X irqs requested
 *
 * Setup the MSI-X capability structure of device function with a maximum
 * possible number of interrupts in the range between @minvec and @maxvec
 * upon its software driver call to request for MSI-X mode enabled on its
 * hardware device function. It returns a negative errno if an error occurs.
 * If it succeeds, it returns the actual number of interrupts allocated and
 * indicates the successful configuration of MSI-X capability structure
 * with new allocated MSI-X interrupts.
 **/
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
			       int minvec, int maxvec)
{
	int nvec = maxvec;
	int rc;

	if (maxvec < minvec)
		return -ERANGE;

	do {
		rc = pci_enable_msix(dev, entries, nvec);
		if (rc < 0) {
			return rc;
		} else if (rc > 0) {
			if (rc < minvec)
				return -ENOSPC;
			nvec = rc;
		}
	} while (rc);

	return nvec;
}
EXPORT_SYMBOL(pci_enable_msix_range);