msi.c 30.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
14
#include <linux/export.h>
L
Linus Torvalds 已提交
15 16 17
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
18
#include <linux/msi.h>
D
Dan Williams 已提交
19
#include <linux/smp.h>
H
Hidetoshi Seto 已提交
20 21
#include <linux/errno.h>
#include <linux/io.h>
22
#include <linux/slab.h>
L
Linus Torvalds 已提交
23 24 25 26 27

#include "pci.h"

static int pci_msi_enable = 1;

28 29 30
#define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)


31 32
/* Arch hooks */

33 34
int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
35 36 37 38 39 40 41 42 43 44 45 46 47
	struct msi_chip *chip = dev->bus->msi;
	int err;

	if (!chip || !chip->setup_irq)
		return -EINVAL;

	err = chip->setup_irq(chip, dev, desc);
	if (err < 0)
		return err;

	irq_set_chip_data(desc->irq, chip);

	return 0;
48 49 50
}

void __weak arch_teardown_msi_irq(unsigned int irq)
51
{
52 53 54 55 56 57
	struct msi_chip *chip = irq_get_chip_data(irq);

	if (!chip || !chip->teardown_irq)
		return;

	chip->teardown_irq(chip, irq);
58 59
}

60 61
int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
62 63 64 65 66 67
	struct msi_chip *chip = dev->bus->msi;

	if (!chip || !chip->check_device)
		return 0;

	return chip->check_device(chip, dev, nvec, type);
68
}
69

70
int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
71 72 73 74
{
	struct msi_desc *entry;
	int ret;

75 76 77 78 79 80 81
	/*
	 * If an architecture wants to support multiple MSI, it needs to
	 * override arch_setup_msi_irqs()
	 */
	if (type == PCI_CAP_ID_MSI && nvec > 1)
		return 1;

82 83
	list_for_each_entry(entry, &dev->msi_list, list) {
		ret = arch_setup_msi_irq(dev, entry);
84
		if (ret < 0)
85
			return ret;
86 87
		if (ret > 0)
			return -ENOSPC;
88 89 90 91
	}

	return 0;
}
92

93 94 95 96
/*
 * We have a default implementation available as a separate non-weak
 * function, as it is used by the Xen x86 PCI code
 */
97
void default_teardown_msi_irqs(struct pci_dev *dev)
98 99 100 101
{
	struct msi_desc *entry;

	list_for_each_entry(entry, &dev->msi_list, list) {
102 103 104
		int i, nvec;
		if (entry->irq == 0)
			continue;
105 106 107 108
		if (entry->nvec_used)
			nvec = entry->nvec_used;
		else
			nvec = 1 << entry->msi_attrib.multiple;
109 110
		for (i = 0; i < nvec; i++)
			arch_teardown_msi_irq(entry->irq + i);
111 112 113
	}
}

114 115 116 117
void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
{
	return default_teardown_msi_irqs(dev);
}
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135

void default_restore_msi_irqs(struct pci_dev *dev, int irq)
{
	struct msi_desc *entry;

	entry = NULL;
	if (dev->msix_enabled) {
		list_for_each_entry(entry, &dev->msi_list, list) {
			if (irq == entry->irq)
				break;
		}
	} else if (dev->msi_enabled)  {
		entry = irq_get_msi_desc(irq);
	}

	if (entry)
		write_msi_msg(irq, &entry->msg);
}
136 137 138 139 140

void __weak arch_restore_msi_irqs(struct pci_dev *dev, int irq)
{
	return default_restore_msi_irqs(dev, irq);
}
141

142
static void msi_set_enable(struct pci_dev *dev, int enable)
143 144 145
{
	u16 control;

146
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
147 148 149
	control &= ~PCI_MSI_FLAGS_ENABLE;
	if (enable)
		control |= PCI_MSI_FLAGS_ENABLE;
150
	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
151 152
}

153 154 155 156
static void msix_set_enable(struct pci_dev *dev, int enable)
{
	u16 control;

157 158 159 160 161
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
	control &= ~PCI_MSIX_FLAGS_ENABLE;
	if (enable)
		control |= PCI_MSIX_FLAGS_ENABLE;
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
162 163
}

164 165
static inline __attribute_const__ u32 msi_mask(unsigned x)
{
166 167 168 169
	/* Don't shift by >= width of type */
	if (x >= 5)
		return 0xffffffff;
	return (1 << (1 << x)) - 1;
170 171
}

172
static inline __attribute_const__ u32 msi_capable_mask(u16 control)
M
Mitch Williams 已提交
173
{
174 175
	return msi_mask((control >> 1) & 7);
}
M
Mitch Williams 已提交
176

177 178 179
static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
{
	return msi_mask((control >> 4) & 7);
M
Mitch Williams 已提交
180 181
}

182 183 184 185 186 187
/*
 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
 * mask all MSI interrupts by clearing the MSI enable bit does not work
 * reliably as devices without an INTx disable bit will then generate a
 * level IRQ which will never be cleared.
 */
188
u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
L
Linus Torvalds 已提交
189
{
190
	u32 mask_bits = desc->masked;
L
Linus Torvalds 已提交
191

192
	if (!desc->msi_attrib.maskbit)
193
		return 0;
194 195 196 197

	mask_bits &= ~mask;
	mask_bits |= flag;
	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
198 199 200 201

	return mask_bits;
}

202 203 204 205 206
__weak u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
	return default_msi_mask_irq(desc, mask, flag);
}

207 208
static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
209
	desc->masked = arch_msi_mask_irq(desc, mask, flag);
210 211 212 213 214 215 216 217 218
}

/*
 * This internal function does not flush PCI writes to the device.
 * All users must ensure that they read from the device before either
 * assuming that the device state is up to date, or returning out of this
 * file.  This saves a few milliseconds when initialising devices with lots
 * of MSI-X interrupts.
 */
219
u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
220 221 222
{
	u32 mask_bits = desc->masked;
	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
223
						PCI_MSIX_ENTRY_VECTOR_CTRL;
224 225 226
	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
	if (flag)
		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
227
	writel(mask_bits, desc->mask_base + offset);
228 229 230 231

	return mask_bits;
}

232 233 234 235 236
__weak u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
{
	return default_msix_mask_irq(desc, flag);
}

237 238
static void msix_mask_irq(struct msi_desc *desc, u32 flag)
{
239
	desc->masked = arch_msix_mask_irq(desc, flag);
240
}
241

242
static void msi_set_mask_bit(struct irq_data *data, u32 flag)
243
{
244
	struct msi_desc *desc = irq_data_get_msi(data);
245

246 247 248 249
	if (desc->msi_attrib.is_msix) {
		msix_mask_irq(desc, flag);
		readl(desc->mask_base);		/* Flush write to device */
	} else {
250
		unsigned offset = data->irq - desc->dev->irq;
251
		msi_mask_irq(desc, 1 << offset, flag << offset);
L
Linus Torvalds 已提交
252
	}
253 254
}

255
void mask_msi_irq(struct irq_data *data)
256
{
257
	msi_set_mask_bit(data, 1);
258 259
}

260
void unmask_msi_irq(struct irq_data *data)
261
{
262
	msi_set_mask_bit(data, 0);
L
Linus Torvalds 已提交
263 264
}

265
void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
L
Linus Torvalds 已提交
266
{
267 268 269 270 271 272 273 274 275 276 277
	BUG_ON(entry->dev->current_state != PCI_D0);

	if (entry->msi_attrib.is_msix) {
		void __iomem *base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
	} else {
		struct pci_dev *dev = entry->dev;
278
		int pos = dev->msi_cap;
279 280
		u16 data;

281 282
		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				      &msg->address_lo);
283
		if (entry->msi_attrib.is_64) {
284 285
			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					      &msg->address_hi);
B
Bjorn Helgaas 已提交
286
			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
287 288
		} else {
			msg->address_hi = 0;
B
Bjorn Helgaas 已提交
289
			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
290 291 292 293 294 295 296
		}
		msg->data = data;
	}
}

void read_msi_msg(unsigned int irq, struct msi_msg *msg)
{
297
	struct msi_desc *entry = irq_get_msi_desc(irq);
298

299
	__read_msi_msg(entry, msg);
300 301
}

302
void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
303 304
{
	/* Assert that the cache is valid, assuming that
305 306 307
	 * valid messages are not all-zeroes. */
	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
		 entry->msg.data));
308

309
	*msg = entry->msg;
310
}
L
Linus Torvalds 已提交
311

312
void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
313
{
314
	struct msi_desc *entry = irq_get_msi_desc(irq);
Y
Yinghai Lu 已提交
315

316
	__get_cached_msi_msg(entry, msg);
Y
Yinghai Lu 已提交
317 318
}

319
void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
Y
Yinghai Lu 已提交
320
{
321 322 323
	if (entry->dev->current_state != PCI_D0) {
		/* Don't touch the hardware now */
	} else if (entry->msi_attrib.is_msix) {
324 325 326 327
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

328 329 330
		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
331
	} else {
332
		struct pci_dev *dev = entry->dev;
333
		int pos = dev->msi_cap;
334 335
		u16 msgctl;

336
		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
337 338
		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
		msgctl |= entry->msi_attrib.multiple << 4;
339
		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
340

341 342
		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				       msg->address_lo);
343
		if (entry->msi_attrib.is_64) {
344 345
			pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					       msg->address_hi);
B
Bjorn Helgaas 已提交
346 347
			pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
					      msg->data);
348
		} else {
B
Bjorn Helgaas 已提交
349 350
			pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
					      msg->data);
351
		}
L
Linus Torvalds 已提交
352
	}
353
	entry->msg = *msg;
L
Linus Torvalds 已提交
354
}
355

Y
Yinghai Lu 已提交
356 357
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
{
358
	struct msi_desc *entry = irq_get_msi_desc(irq);
Y
Yinghai Lu 已提交
359

360
	__write_msi_msg(entry, msg);
Y
Yinghai Lu 已提交
361 362
}

363 364 365
static void free_msi_irqs(struct pci_dev *dev)
{
	struct msi_desc *entry, *tmp;
366 367 368
	struct attribute **msi_attrs;
	struct device_attribute *dev_attr;
	int count = 0;
369 370 371 372 373

	list_for_each_entry(entry, &dev->msi_list, list) {
		int i, nvec;
		if (!entry->irq)
			continue;
374 375 376 377
		if (entry->nvec_used)
			nvec = entry->nvec_used;
		else
			nvec = 1 << entry->msi_attrib.multiple;
378 379 380 381 382 383 384 385 386 387 388
		for (i = 0; i < nvec; i++)
			BUG_ON(irq_has_action(entry->irq + i));
	}

	arch_teardown_msi_irqs(dev);

	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
		if (entry->msi_attrib.is_msix) {
			if (list_is_last(&entry->list, &dev->msi_list))
				iounmap(entry->mask_base);
		}
389 390 391 392 393 394 395 396 397 398 399 400

		/*
		 * Its possible that we get into this path
		 * When populate_msi_sysfs fails, which means the entries
		 * were not registered with sysfs.  In that case don't
		 * unregister them.
		 */
		if (entry->kobj.parent) {
			kobject_del(&entry->kobj);
			kobject_put(&entry->kobj);
		}

401 402 403
		list_del(&entry->list);
		kfree(entry);
	}
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419

	if (dev->msi_irq_groups) {
		sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups);
		msi_attrs = dev->msi_irq_groups[0]->attrs;
		list_for_each_entry(entry, &dev->msi_list, list) {
			dev_attr = container_of(msi_attrs[count],
						struct device_attribute, attr);
			kfree(dev_attr->attr.name);
			kfree(dev_attr);
			++count;
		}
		kfree(msi_attrs);
		kfree(dev->msi_irq_groups[0]);
		kfree(dev->msi_irq_groups);
		dev->msi_irq_groups = NULL;
	}
420
}
S
Satoru Takeuchi 已提交
421

422
static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
L
Linus Torvalds 已提交
423
{
424 425
	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
	if (!desc)
L
Linus Torvalds 已提交
426 427
		return NULL;

428 429
	INIT_LIST_HEAD(&desc->list);
	desc->dev = dev;
L
Linus Torvalds 已提交
430

431
	return desc;
L
Linus Torvalds 已提交
432 433
}

434 435 436 437 438 439
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
{
	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
		pci_intx(dev, enable);
}

440
static void __pci_restore_msi_state(struct pci_dev *dev)
441 442
{
	u16 control;
443
	struct msi_desc *entry;
444

445 446 447
	if (!dev->msi_enabled)
		return;

448
	entry = irq_get_msi_desc(dev->irq);
449

450
	pci_intx_for_msi(dev, 0);
451
	msi_set_enable(dev, 0);
452
	arch_restore_msi_irqs(dev, dev->irq);
453

454
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
455
	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
456
	control &= ~PCI_MSI_FLAGS_QSIZE;
457
	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
458
	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
459 460 461
}

static void __pci_restore_msix_state(struct pci_dev *dev)
462 463
{
	struct msi_desc *entry;
464
	u16 control;
465

E
Eric W. Biederman 已提交
466 467
	if (!dev->msix_enabled)
		return;
468
	BUG_ON(list_empty(&dev->msi_list));
H
Hidetoshi Seto 已提交
469
	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
470
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
E
Eric W. Biederman 已提交
471

472
	/* route the table */
473
	pci_intx_for_msi(dev, 0);
474
	control |= PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL;
475
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
476

477
	list_for_each_entry(entry, &dev->msi_list, list) {
478
		arch_restore_msi_irqs(dev, entry->irq);
479
		msix_mask_irq(entry, entry->masked);
480 481
	}

482
	control &= ~PCI_MSIX_FLAGS_MASKALL;
483
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
484
}
485 486 487 488 489 490

void pci_restore_msi_state(struct pci_dev *dev)
{
	__pci_restore_msi_state(dev);
	__pci_restore_msix_state(dev);
}
491
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
492

493
static ssize_t msi_mode_show(struct device *dev, struct device_attribute *attr,
494 495
			     char *buf)
{
496 497 498 499
	struct pci_dev *pdev = to_pci_dev(dev);
	struct msi_desc *entry;
	unsigned long irq;
	int retval;
500

501 502 503
	retval = kstrtoul(attr->attr.name, 10, &irq);
	if (retval)
		return retval;
504

505 506 507 508 509 510 511
	list_for_each_entry(entry, &pdev->msi_list, list) {
		if (entry->irq == irq) {
			return sprintf(buf, "%s\n",
				       entry->msi_attrib.is_msix ? "msix" : "msi");
		}
	}
	return -ENODEV;
512 513 514 515
}

static int populate_msi_sysfs(struct pci_dev *pdev)
{
516 517 518 519 520
	struct attribute **msi_attrs;
	struct attribute *msi_attr;
	struct device_attribute *msi_dev_attr;
	struct attribute_group *msi_irq_group;
	const struct attribute_group **msi_irq_groups;
521
	struct msi_desc *entry;
522 523
	int ret = -ENOMEM;
	int num_msi = 0;
524 525
	int count = 0;

526 527 528 529 530 531
	/* Determine how many msi entries we have */
	list_for_each_entry(entry, &pdev->msi_list, list) {
		++num_msi;
	}
	if (!num_msi)
		return 0;
532

533 534 535 536
	/* Dynamically create the MSI attributes for the PCI device */
	msi_attrs = kzalloc(sizeof(void *) * (num_msi + 1), GFP_KERNEL);
	if (!msi_attrs)
		return -ENOMEM;
537
	list_for_each_entry(entry, &pdev->msi_list, list) {
538 539 540 541 542 543 544 545 546 547 548
		char *name = kmalloc(20, GFP_KERNEL);
		msi_dev_attr = kzalloc(sizeof(*msi_dev_attr), GFP_KERNEL);
		if (!msi_dev_attr)
			goto error_attrs;
		sprintf(name, "%d", entry->irq);
		sysfs_attr_init(&msi_dev_attr->attr);
		msi_dev_attr->attr.name = name;
		msi_dev_attr->attr.mode = S_IRUGO;
		msi_dev_attr->show = msi_mode_show;
		msi_attrs[count] = &msi_dev_attr->attr;
		++count;
549 550
	}

551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
	msi_irq_group = kzalloc(sizeof(*msi_irq_group), GFP_KERNEL);
	if (!msi_irq_group)
		goto error_attrs;
	msi_irq_group->name = "msi_irqs";
	msi_irq_group->attrs = msi_attrs;

	msi_irq_groups = kzalloc(sizeof(void *) * 2, GFP_KERNEL);
	if (!msi_irq_groups)
		goto error_irq_group;
	msi_irq_groups[0] = msi_irq_group;

	ret = sysfs_create_groups(&pdev->dev.kobj, msi_irq_groups);
	if (ret)
		goto error_irq_groups;
	pdev->msi_irq_groups = msi_irq_groups;

567 568
	return 0;

569 570 571 572 573 574 575 576 577 578 579 580 581
error_irq_groups:
	kfree(msi_irq_groups);
error_irq_group:
	kfree(msi_irq_group);
error_attrs:
	count = 0;
	msi_attr = msi_attrs[count];
	while (msi_attr) {
		msi_dev_attr = container_of(msi_attr, struct device_attribute, attr);
		kfree(msi_attr->name);
		kfree(msi_dev_attr);
		++count;
		msi_attr = msi_attrs[count];
582 583 584 585
	}
	return ret;
}

L
Linus Torvalds 已提交
586 587 588
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
589
 * @nvec: number of interrupts to allocate
L
Linus Torvalds 已提交
590
 *
591 592 593 594 595 596 597
 * Setup the MSI capability structure of the device with the requested
 * number of interrupts.  A return value of zero indicates the successful
 * setup of an entry with the new MSI irq.  A negative return value indicates
 * an error, and a positive return value indicates the number of interrupts
 * which could have been allocated.
 */
static int msi_capability_init(struct pci_dev *dev, int nvec)
L
Linus Torvalds 已提交
598 599
{
	struct msi_desc *entry;
600
	int ret;
L
Linus Torvalds 已提交
601
	u16 control;
602
	unsigned mask;
L
Linus Torvalds 已提交
603

604
	msi_set_enable(dev, 0);	/* Disable MSI during set up */
605

606
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
L
Linus Torvalds 已提交
607
	/* MSI Entry Initialization */
608
	entry = alloc_msi_entry(dev);
609 610
	if (!entry)
		return -ENOMEM;
611

H
Hidetoshi Seto 已提交
612
	entry->msi_attrib.is_msix	= 0;
613
	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
H
Hidetoshi Seto 已提交
614
	entry->msi_attrib.entry_nr	= 0;
615
	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
H
Hidetoshi Seto 已提交
616
	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
617
	entry->msi_attrib.pos		= dev->msi_cap;
618

D
Dan Carpenter 已提交
619 620 621 622
	if (control & PCI_MSI_FLAGS_64BIT)
		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
	else
		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
623 624 625 626 627 628
	/* All MSIs are unmasked by default, Mask them all */
	if (entry->msi_attrib.maskbit)
		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
	mask = msi_capable_mask(control);
	msi_mask_irq(entry, mask, mask);

629
	list_add_tail(&entry->list, &dev->msi_list);
630

L
Linus Torvalds 已提交
631
	/* Configure MSI capability structure */
632
	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
633
	if (ret) {
634
		msi_mask_irq(entry, mask, ~mask);
635
		free_msi_irqs(dev);
636
		return ret;
637
	}
638

639 640 641 642 643 644 645
	ret = populate_msi_sysfs(dev);
	if (ret) {
		msi_mask_irq(entry, mask, ~mask);
		free_msi_irqs(dev);
		return ret;
	}

L
Linus Torvalds 已提交
646
	/* Set MSI enabled bits	 */
647
	pci_intx_for_msi(dev, 0);
648
	msi_set_enable(dev, 1);
649
	dev->msi_enabled = 1;
L
Linus Torvalds 已提交
650

651
	dev->irq = entry->irq;
L
Linus Torvalds 已提交
652 653 654
	return 0;
}

655
static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
656
{
657
	resource_size_t phys_addr;
658 659 660
	u32 table_offset;
	u8 bir;

661 662
	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
			      &table_offset);
663 664
	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
	table_offset &= PCI_MSIX_TABLE_OFFSET;
665 666 667 668 669
	phys_addr = pci_resource_start(dev, bir) + table_offset;

	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
}

670 671
static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
			      struct msix_entry *entries, int nvec)
672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
{
	struct msi_desc *entry;
	int i;

	for (i = 0; i < nvec; i++) {
		entry = alloc_msi_entry(dev);
		if (!entry) {
			if (!i)
				iounmap(base);
			else
				free_msi_irqs(dev);
			/* No enough memory. Don't try again */
			return -ENOMEM;
		}

		entry->msi_attrib.is_msix	= 1;
		entry->msi_attrib.is_64		= 1;
		entry->msi_attrib.entry_nr	= entries[i].entry;
		entry->msi_attrib.default_irq	= dev->irq;
691
		entry->msi_attrib.pos		= dev->msix_cap;
692 693 694 695 696 697 698 699
		entry->mask_base		= base;

		list_add_tail(&entry->list, &dev->msi_list);
	}

	return 0;
}

700
static void msix_program_entries(struct pci_dev *dev,
701
				 struct msix_entry *entries)
702 703 704 705 706 707 708 709 710
{
	struct msi_desc *entry;
	int i = 0;

	list_for_each_entry(entry, &dev->msi_list, list) {
		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
						PCI_MSIX_ENTRY_VECTOR_CTRL;

		entries[i].vector = entry->irq;
711
		irq_set_msi_desc(entry->irq, entry);
712 713 714 715 716 717
		entry->masked = readl(entry->mask_base + offset);
		msix_mask_irq(entry, 1);
		i++;
	}
}

L
Linus Torvalds 已提交
718 719 720
/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
721 722
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
723
 *
724
 * Setup the MSI-X capability structure of device function with a
725 726
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
727 728 729 730
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
731
	int ret;
732
	u16 control;
L
Linus Torvalds 已提交
733 734
	void __iomem *base;

735
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
736 737 738

	/* Ensure MSI-X is disabled while it is set up */
	control &= ~PCI_MSIX_FLAGS_ENABLE;
739
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
740

L
Linus Torvalds 已提交
741
	/* Request & Map MSI-X table region */
742
	base = msix_map_region(dev, msix_table_size(control));
743
	if (!base)
L
Linus Torvalds 已提交
744 745
		return -ENOMEM;

746
	ret = msix_setup_entries(dev, base, entries, nvec);
747 748
	if (ret)
		return ret;
749 750

	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
751
	if (ret)
752
		goto out_avail;
753

754 755 756 757 758 759
	/*
	 * Some devices require MSI-X to be enabled before we can touch the
	 * MSI-X registers.  We need to mask all the vectors to prevent
	 * interrupts coming in before they're fully set up.
	 */
	control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
760
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
761

762
	msix_program_entries(dev, entries);
763

764
	ret = populate_msi_sysfs(dev);
765 766
	if (ret)
		goto out_free;
767

768
	/* Set MSI-X enabled bits and unmask the function */
769
	pci_intx_for_msi(dev, 0);
770
	dev->msix_enabled = 1;
L
Linus Torvalds 已提交
771

772
	control &= ~PCI_MSIX_FLAGS_MASKALL;
773
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
774

L
Linus Torvalds 已提交
775
	return 0;
776

777
out_avail:
778 779 780 781 782
	if (ret < 0) {
		/*
		 * If we had some success, report the number of irqs
		 * we succeeded in setting up.
		 */
783
		struct msi_desc *entry;
784 785 786 787 788 789 790 791 792 793
		int avail = 0;

		list_for_each_entry(entry, &dev->msi_list, list) {
			if (entry->irq != 0)
				avail++;
		}
		if (avail != 0)
			ret = avail;
	}

794
out_free:
795 796 797
	free_msi_irqs(dev);

	return ret;
L
Linus Torvalds 已提交
798 799
}

800
/**
801
 * pci_msi_check_device - check whether MSI may be enabled on a device
802
 * @dev: pointer to the pci_dev data structure of MSI device function
803
 * @nvec: how many MSIs have been requested ?
804
 * @type: are we checking for MSI or MSI-X ?
805
 *
806
 * Look at global flags, the device itself, and its parent buses
807 808
 * to determine if MSI/-X are supported for the device. If MSI/-X is
 * supported return 0, else return an error code.
809
 **/
H
Hidetoshi Seto 已提交
810
static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
811 812
{
	struct pci_bus *bus;
813
	int ret;
814

815
	/* MSI must be globally enabled and supported by the device */
816 817 818
	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

819 820 821 822 823 824 825 826
	/*
	 * You can't ask to have 0 or less MSIs configured.
	 *  a) it's stupid ..
	 *  b) the list manipulation code assumes nvec >= 1.
	 */
	if (nvec < 1)
		return -ERANGE;

H
Hidetoshi Seto 已提交
827 828 829
	/*
	 * Any bridge which does NOT route MSI transactions from its
	 * secondary bus to its primary bus must set NO_MSI flag on
830 831 832 833
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
834 835 836 837
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

838 839 840 841
	ret = arch_msi_check_device(dev, nvec, type);
	if (ret)
		return ret;

842 843 844
	return 0;
}

845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
/**
 * pci_msi_vec_count - Return the number of MSI vectors a device can send
 * @dev: device to report about
 *
 * This function returns the number of MSI vectors a device requested via
 * Multiple Message Capable register. It returns a negative errno if the
 * device is not capable sending MSI interrupts. Otherwise, the call succeeds
 * and returns a power of two, up to a maximum of 2^5 (32), according to the
 * MSI specification.
 **/
int pci_msi_vec_count(struct pci_dev *dev)
{
	int ret;
	u16 msgctl;

	if (!dev->msi_cap)
		return -EINVAL;

	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);

	return ret;
}
EXPORT_SYMBOL(pci_msi_vec_count);

L
Linus Torvalds 已提交
870
/**
871 872 873
 * pci_enable_msi_block - configure device's MSI capability structure
 * @dev: device to configure
 * @nvec: number of interrupts to configure
L
Linus Torvalds 已提交
874
 *
875 876 877 878 879 880 881 882
 * Allocate IRQs for a device with the MSI capability.
 * This function returns a negative errno if an error occurs.  If it
 * is unable to allocate the number of interrupts requested, it returns
 * the number of interrupts it might be able to allocate.  If it successfully
 * allocates at least the number of interrupts requested, it returns 0 and
 * updates the @dev's irq member to the lowest new interrupt number; the
 * other interrupt numbers allocated to this device are consecutive.
 */
883
int pci_enable_msi_block(struct pci_dev *dev, int nvec)
L
Linus Torvalds 已提交
884
{
885
	int status, maxvec;
886

887
	if (dev->current_state != PCI_D0)
888
		return -EINVAL;
889

890 891 892
	maxvec = pci_msi_vec_count(dev);
	if (maxvec < 0)
		return maxvec;
893 894
	if (nvec > maxvec)
		return maxvec;
L
Linus Torvalds 已提交
895

896
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
897 898
	if (status)
		return status;
L
Linus Torvalds 已提交
899

E
Eric W. Biederman 已提交
900
	WARN_ON(!!dev->msi_enabled);
L
Linus Torvalds 已提交
901

902
	/* Check whether driver already requested MSI-X irqs */
903
	if (dev->msix_enabled) {
904 905
		dev_info(&dev->dev, "can't enable MSI "
			 "(MSI-X already enabled)\n");
906
		return -EINVAL;
L
Linus Torvalds 已提交
907
	}
908 909

	status = msi_capability_init(dev, nvec);
L
Linus Torvalds 已提交
910 911
	return status;
}
912
EXPORT_SYMBOL(pci_enable_msi_block);
L
Linus Torvalds 已提交
913

914
void pci_msi_shutdown(struct pci_dev *dev)
L
Linus Torvalds 已提交
915
{
916 917 918
	struct msi_desc *desc;
	u32 mask;
	u16 ctrl;
L
Linus Torvalds 已提交
919

920
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
E
Eric W. Biederman 已提交
921 922
		return;

923 924 925
	BUG_ON(list_empty(&dev->msi_list));
	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);

926
	msi_set_enable(dev, 0);
927
	pci_intx_for_msi(dev, 1);
928
	dev->msi_enabled = 0;
929

930
	/* Return the device with MSI unmasked as initial states */
931
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
932
	mask = msi_capable_mask(ctrl);
933
	/* Keep cached state to be restored */
934
	arch_msi_mask_irq(desc, mask, ~mask);
935 936

	/* Restore dev->irq to its default pin-assertion irq */
937
	dev->irq = desc->msi_attrib.default_irq;
938
}
939

H
Hidetoshi Seto 已提交
940
void pci_disable_msi(struct pci_dev *dev)
941 942 943 944 945
{
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

	pci_msi_shutdown(dev);
946
	free_msi_irqs(dev);
L
Linus Torvalds 已提交
947
}
948
EXPORT_SYMBOL(pci_disable_msi);
L
Linus Torvalds 已提交
949

950
/**
951
 * pci_msix_vec_count - return the number of device's MSI-X table entries
952
 * @dev: pointer to the pci_dev data structure of MSI-X device function
953 954 955 956 957 958 959

 * This function returns the number of device's MSI-X table entries and
 * therefore the number of MSI-X vectors device is capable of sending.
 * It returns a negative errno if the device is not capable of sending MSI-X
 * interrupts.
 **/
int pci_msix_vec_count(struct pci_dev *dev)
960 961 962
{
	u16 control;

963
	if (!dev->msix_cap)
964
		return -EINVAL;
965

966
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
967
	return msix_table_size(control);
968
}
969
EXPORT_SYMBOL(pci_msix_vec_count);
970

L
Linus Torvalds 已提交
971 972 973
/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
974
 * @entries: pointer to an array of MSI-X entries
975
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
976 977
 *
 * Setup the MSI-X capability structure of device function with the number
978
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
979 980
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
981
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
982
 * Or a return of > 0 indicates that driver request is exceeding the number
983 984
 * of irqs or MSI-X vectors available. Driver should use the returned value to
 * re-send its request.
L
Linus Torvalds 已提交
985
 **/
H
Hidetoshi Seto 已提交
986
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
L
Linus Torvalds 已提交
987
{
988
	int status, nr_entries;
E
Eric W. Biederman 已提交
989
	int i, j;
L
Linus Torvalds 已提交
990

991
	if (!entries || !dev->msix_cap || dev->current_state != PCI_D0)
H
Hidetoshi Seto 已提交
992
		return -EINVAL;
L
Linus Torvalds 已提交
993

994 995 996 997
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
	if (status)
		return status;

998 999 1000
	nr_entries = pci_msix_vec_count(dev);
	if (nr_entries < 0)
		return nr_entries;
L
Linus Torvalds 已提交
1001
	if (nvec > nr_entries)
1002
		return nr_entries;
L
Linus Torvalds 已提交
1003 1004 1005 1006 1007 1008 1009 1010 1011 1012

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
E
Eric W. Biederman 已提交
1013
	WARN_ON(!!dev->msix_enabled);
1014

1015
	/* Check whether driver already requested for MSI irq */
H
Hidetoshi Seto 已提交
1016
	if (dev->msi_enabled) {
1017 1018
		dev_info(&dev->dev, "can't enable MSI-X "
		       "(MSI IRQ already assigned)\n");
L
Linus Torvalds 已提交
1019 1020 1021 1022 1023
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}
1024
EXPORT_SYMBOL(pci_enable_msix);
L
Linus Torvalds 已提交
1025

H
Hidetoshi Seto 已提交
1026
void pci_msix_shutdown(struct pci_dev *dev)
1027
{
1028 1029
	struct msi_desc *entry;

1030
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
E
Eric W. Biederman 已提交
1031 1032
		return;

1033 1034 1035
	/* Return the device with MSI-X masked as initial states */
	list_for_each_entry(entry, &dev->msi_list, list) {
		/* Keep cached states to be restored */
1036
		arch_msix_mask_irq(entry, 1);
1037 1038
	}

1039
	msix_set_enable(dev, 0);
1040
	pci_intx_for_msi(dev, 1);
1041
	dev->msix_enabled = 0;
1042
}
1043

H
Hidetoshi Seto 已提交
1044
void pci_disable_msix(struct pci_dev *dev)
1045 1046 1047 1048 1049
{
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
		return;

	pci_msix_shutdown(dev);
1050
	free_msi_irqs(dev);
L
Linus Torvalds 已提交
1051
}
1052
EXPORT_SYMBOL(pci_disable_msix);
L
Linus Torvalds 已提交
1053 1054

/**
1055
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
1056 1057
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
1058
 * Being called during hotplug remove, from which the device function
1059
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
1060 1061 1062
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
H
Hidetoshi Seto 已提交
1063
void msi_remove_pci_irq_vectors(struct pci_dev *dev)
L
Linus Torvalds 已提交
1064 1065
{
	if (!pci_msi_enable || !dev)
H
Hidetoshi Seto 已提交
1066
		return;
L
Linus Torvalds 已提交
1067

1068 1069
	if (dev->msi_enabled || dev->msix_enabled)
		free_msi_irqs(dev);
L
Linus Torvalds 已提交
1070 1071
}

1072 1073 1074 1075
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}
1076

1077 1078 1079 1080 1081 1082 1083
/**
 * pci_msi_enabled - is MSI enabled?
 *
 * Returns true if MSI has not been disabled by the command-line option
 * pci=nomsi.
 **/
int pci_msi_enabled(void)
1084
{
1085
	return pci_msi_enable;
1086
}
1087
EXPORT_SYMBOL(pci_msi_enabled);
1088

1089
void pci_msi_init_pci_dev(struct pci_dev *dev)
1090
{
1091
	INIT_LIST_HEAD(&dev->msi_list);
1092 1093 1094 1095 1096

	/* Disable the msi hardware to avoid screaming interrupts
	 * during boot.  This is the power on reset default so
	 * usually this should be a noop.
	 */
1097 1098 1099 1100 1101 1102 1103
	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (dev->msi_cap)
		msi_set_enable(dev, 0);

	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (dev->msix_cap)
		msix_set_enable(dev, 0);
1104
}
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178

/**
 * pci_enable_msi_range - configure device's MSI capability structure
 * @dev: device to configure
 * @minvec: minimal number of interrupts to configure
 * @maxvec: maximum number of interrupts to configure
 *
 * This function tries to allocate a maximum possible number of interrupts in a
 * range between @minvec and @maxvec. It returns a negative errno if an error
 * occurs. If it succeeds, it returns the actual number of interrupts allocated
 * and updates the @dev's irq member to the lowest new interrupt number;
 * the other interrupt numbers allocated to this device are consecutive.
 **/
int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
{
	int nvec = maxvec;
	int rc;

	if (maxvec < minvec)
		return -ERANGE;

	do {
		rc = pci_enable_msi_block(dev, nvec);
		if (rc < 0) {
			return rc;
		} else if (rc > 0) {
			if (rc < minvec)
				return -ENOSPC;
			nvec = rc;
		}
	} while (rc);

	return nvec;
}
EXPORT_SYMBOL(pci_enable_msi_range);

/**
 * pci_enable_msix_range - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
 * @entries: pointer to an array of MSI-X entries
 * @minvec: minimum number of MSI-X irqs requested
 * @maxvec: maximum number of MSI-X irqs requested
 *
 * Setup the MSI-X capability structure of device function with a maximum
 * possible number of interrupts in the range between @minvec and @maxvec
 * upon its software driver call to request for MSI-X mode enabled on its
 * hardware device function. It returns a negative errno if an error occurs.
 * If it succeeds, it returns the actual number of interrupts allocated and
 * indicates the successful configuration of MSI-X capability structure
 * with new allocated MSI-X interrupts.
 **/
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
			       int minvec, int maxvec)
{
	int nvec = maxvec;
	int rc;

	if (maxvec < minvec)
		return -ERANGE;

	do {
		rc = pci_enable_msix(dev, entries, nvec);
		if (rc < 0) {
			return rc;
		} else if (rc > 0) {
			if (rc < minvec)
				return -ENOSPC;
			nvec = rc;
		}
	} while (rc);

	return nvec;
}
EXPORT_SYMBOL(pci_enable_msix_range);