msi.c 26.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
14
#include <linux/export.h>
L
Linus Torvalds 已提交
15 16 17
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
18
#include <linux/msi.h>
D
Dan Williams 已提交
19
#include <linux/smp.h>
H
Hidetoshi Seto 已提交
20 21
#include <linux/errno.h>
#include <linux/io.h>
22
#include <linux/slab.h>
L
Linus Torvalds 已提交
23 24 25 26 27

#include "pci.h"

static int pci_msi_enable = 1;

28 29 30
#define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)


31 32
/* Arch hooks */

33 34
int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
35 36 37 38 39 40 41 42 43 44 45 46 47
	struct msi_chip *chip = dev->bus->msi;
	int err;

	if (!chip || !chip->setup_irq)
		return -EINVAL;

	err = chip->setup_irq(chip, dev, desc);
	if (err < 0)
		return err;

	irq_set_chip_data(desc->irq, chip);

	return 0;
48 49 50
}

void __weak arch_teardown_msi_irq(unsigned int irq)
51
{
52 53 54 55 56 57
	struct msi_chip *chip = irq_get_chip_data(irq);

	if (!chip || !chip->teardown_irq)
		return;

	chip->teardown_irq(chip, irq);
58 59
}

60 61
int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
62 63 64 65 66 67
	struct msi_chip *chip = dev->bus->msi;

	if (!chip || !chip->check_device)
		return 0;

	return chip->check_device(chip, dev, nvec, type);
68
}
69

70
int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
71 72 73 74
{
	struct msi_desc *entry;
	int ret;

75 76 77 78 79 80 81
	/*
	 * If an architecture wants to support multiple MSI, it needs to
	 * override arch_setup_msi_irqs()
	 */
	if (type == PCI_CAP_ID_MSI && nvec > 1)
		return 1;

82 83
	list_for_each_entry(entry, &dev->msi_list, list) {
		ret = arch_setup_msi_irq(dev, entry);
84
		if (ret < 0)
85
			return ret;
86 87
		if (ret > 0)
			return -ENOSPC;
88 89 90 91
	}

	return 0;
}
92

93 94 95 96
/*
 * We have a default implementation available as a separate non-weak
 * function, as it is used by the Xen x86 PCI code
 */
97
void default_teardown_msi_irqs(struct pci_dev *dev)
98 99 100 101
{
	struct msi_desc *entry;

	list_for_each_entry(entry, &dev->msi_list, list) {
102 103 104
		int i, nvec;
		if (entry->irq == 0)
			continue;
105 106 107 108
		if (entry->nvec_used)
			nvec = entry->nvec_used;
		else
			nvec = 1 << entry->msi_attrib.multiple;
109 110
		for (i = 0; i < nvec; i++)
			arch_teardown_msi_irq(entry->irq + i);
111 112 113
	}
}

114 115 116 117
void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
{
	return default_teardown_msi_irqs(dev);
}
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135

void default_restore_msi_irqs(struct pci_dev *dev, int irq)
{
	struct msi_desc *entry;

	entry = NULL;
	if (dev->msix_enabled) {
		list_for_each_entry(entry, &dev->msi_list, list) {
			if (irq == entry->irq)
				break;
		}
	} else if (dev->msi_enabled)  {
		entry = irq_get_msi_desc(irq);
	}

	if (entry)
		write_msi_msg(irq, &entry->msg);
}
136 137 138 139 140

void __weak arch_restore_msi_irqs(struct pci_dev *dev, int irq)
{
	return default_restore_msi_irqs(dev, irq);
}
141

142
static void msi_set_enable(struct pci_dev *dev, int enable)
143 144 145
{
	u16 control;

146
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
147 148 149
	control &= ~PCI_MSI_FLAGS_ENABLE;
	if (enable)
		control |= PCI_MSI_FLAGS_ENABLE;
150
	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
151 152
}

153 154 155 156
static void msix_set_enable(struct pci_dev *dev, int enable)
{
	u16 control;

157 158 159 160 161
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
	control &= ~PCI_MSIX_FLAGS_ENABLE;
	if (enable)
		control |= PCI_MSIX_FLAGS_ENABLE;
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
162 163
}

164 165
static inline __attribute_const__ u32 msi_mask(unsigned x)
{
166 167 168 169
	/* Don't shift by >= width of type */
	if (x >= 5)
		return 0xffffffff;
	return (1 << (1 << x)) - 1;
170 171
}

172
static inline __attribute_const__ u32 msi_capable_mask(u16 control)
M
Mitch Williams 已提交
173
{
174 175
	return msi_mask((control >> 1) & 7);
}
M
Mitch Williams 已提交
176

177 178 179
static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
{
	return msi_mask((control >> 4) & 7);
M
Mitch Williams 已提交
180 181
}

182 183 184 185 186 187
/*
 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
 * mask all MSI interrupts by clearing the MSI enable bit does not work
 * reliably as devices without an INTx disable bit will then generate a
 * level IRQ which will never be cleared.
 */
188
u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
L
Linus Torvalds 已提交
189
{
190
	u32 mask_bits = desc->masked;
L
Linus Torvalds 已提交
191

192
	if (!desc->msi_attrib.maskbit)
193
		return 0;
194 195 196 197

	mask_bits &= ~mask;
	mask_bits |= flag;
	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
198 199 200 201

	return mask_bits;
}

202 203 204 205 206
__weak u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
	return default_msi_mask_irq(desc, mask, flag);
}

207 208
static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
209
	desc->masked = arch_msi_mask_irq(desc, mask, flag);
210 211 212 213 214 215 216 217 218
}

/*
 * This internal function does not flush PCI writes to the device.
 * All users must ensure that they read from the device before either
 * assuming that the device state is up to date, or returning out of this
 * file.  This saves a few milliseconds when initialising devices with lots
 * of MSI-X interrupts.
 */
219
u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
220 221 222
{
	u32 mask_bits = desc->masked;
	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
223
						PCI_MSIX_ENTRY_VECTOR_CTRL;
224 225 226
	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
	if (flag)
		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
227
	writel(mask_bits, desc->mask_base + offset);
228 229 230 231

	return mask_bits;
}

232 233 234 235 236
__weak u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
{
	return default_msix_mask_irq(desc, flag);
}

237 238
static void msix_mask_irq(struct msi_desc *desc, u32 flag)
{
239
	desc->masked = arch_msix_mask_irq(desc, flag);
240
}
241

242
static void msi_set_mask_bit(struct irq_data *data, u32 flag)
243
{
244
	struct msi_desc *desc = irq_data_get_msi(data);
245

246 247 248 249
	if (desc->msi_attrib.is_msix) {
		msix_mask_irq(desc, flag);
		readl(desc->mask_base);		/* Flush write to device */
	} else {
250
		unsigned offset = data->irq - desc->dev->irq;
251
		msi_mask_irq(desc, 1 << offset, flag << offset);
L
Linus Torvalds 已提交
252
	}
253 254
}

255
void mask_msi_irq(struct irq_data *data)
256
{
257
	msi_set_mask_bit(data, 1);
258 259
}

260
void unmask_msi_irq(struct irq_data *data)
261
{
262
	msi_set_mask_bit(data, 0);
L
Linus Torvalds 已提交
263 264
}

265
void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
L
Linus Torvalds 已提交
266
{
267 268 269 270 271 272 273 274 275 276 277
	BUG_ON(entry->dev->current_state != PCI_D0);

	if (entry->msi_attrib.is_msix) {
		void __iomem *base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
	} else {
		struct pci_dev *dev = entry->dev;
278
		int pos = dev->msi_cap;
279 280
		u16 data;

281 282
		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				      &msg->address_lo);
283
		if (entry->msi_attrib.is_64) {
284 285
			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					      &msg->address_hi);
B
Bjorn Helgaas 已提交
286
			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
287 288
		} else {
			msg->address_hi = 0;
B
Bjorn Helgaas 已提交
289
			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
290 291 292 293 294 295 296
		}
		msg->data = data;
	}
}

void read_msi_msg(unsigned int irq, struct msi_msg *msg)
{
297
	struct msi_desc *entry = irq_get_msi_desc(irq);
298

299
	__read_msi_msg(entry, msg);
300 301
}

302
void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
303 304
{
	/* Assert that the cache is valid, assuming that
305 306 307
	 * valid messages are not all-zeroes. */
	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
		 entry->msg.data));
308

309
	*msg = entry->msg;
310
}
L
Linus Torvalds 已提交
311

312
void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
313
{
314
	struct msi_desc *entry = irq_get_msi_desc(irq);
Y
Yinghai Lu 已提交
315

316
	__get_cached_msi_msg(entry, msg);
Y
Yinghai Lu 已提交
317 318
}

319
void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
Y
Yinghai Lu 已提交
320
{
321 322 323
	if (entry->dev->current_state != PCI_D0) {
		/* Don't touch the hardware now */
	} else if (entry->msi_attrib.is_msix) {
324 325 326 327
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

328 329 330
		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
331
	} else {
332
		struct pci_dev *dev = entry->dev;
333
		int pos = dev->msi_cap;
334 335
		u16 msgctl;

336
		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
337 338
		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
		msgctl |= entry->msi_attrib.multiple << 4;
339
		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
340

341 342
		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				       msg->address_lo);
343
		if (entry->msi_attrib.is_64) {
344 345
			pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					       msg->address_hi);
B
Bjorn Helgaas 已提交
346 347
			pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
					      msg->data);
348
		} else {
B
Bjorn Helgaas 已提交
349 350
			pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
					      msg->data);
351
		}
L
Linus Torvalds 已提交
352
	}
353
	entry->msg = *msg;
L
Linus Torvalds 已提交
354
}
355

Y
Yinghai Lu 已提交
356 357
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
{
358
	struct msi_desc *entry = irq_get_msi_desc(irq);
Y
Yinghai Lu 已提交
359

360
	__write_msi_msg(entry, msg);
Y
Yinghai Lu 已提交
361 362
}

363 364 365 366 367 368 369 370
static void free_msi_irqs(struct pci_dev *dev)
{
	struct msi_desc *entry, *tmp;

	list_for_each_entry(entry, &dev->msi_list, list) {
		int i, nvec;
		if (!entry->irq)
			continue;
371 372 373 374
		if (entry->nvec_used)
			nvec = entry->nvec_used;
		else
			nvec = 1 << entry->msi_attrib.multiple;
375 376 377 378 379 380 381 382 383 384 385
		for (i = 0; i < nvec; i++)
			BUG_ON(irq_has_action(entry->irq + i));
	}

	arch_teardown_msi_irqs(dev);

	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
		if (entry->msi_attrib.is_msix) {
			if (list_is_last(&entry->list, &dev->msi_list))
				iounmap(entry->mask_base);
		}
386 387 388 389 390 391 392 393 394 395 396 397

		/*
		 * Its possible that we get into this path
		 * When populate_msi_sysfs fails, which means the entries
		 * were not registered with sysfs.  In that case don't
		 * unregister them.
		 */
		if (entry->kobj.parent) {
			kobject_del(&entry->kobj);
			kobject_put(&entry->kobj);
		}

398 399 400 401
		list_del(&entry->list);
		kfree(entry);
	}
}
S
Satoru Takeuchi 已提交
402

403
static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
L
Linus Torvalds 已提交
404
{
405 406
	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
	if (!desc)
L
Linus Torvalds 已提交
407 408
		return NULL;

409 410
	INIT_LIST_HEAD(&desc->list);
	desc->dev = dev;
L
Linus Torvalds 已提交
411

412
	return desc;
L
Linus Torvalds 已提交
413 414
}

415 416 417 418 419 420
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
{
	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
		pci_intx(dev, enable);
}

421
static void __pci_restore_msi_state(struct pci_dev *dev)
422 423
{
	u16 control;
424
	struct msi_desc *entry;
425

426 427 428
	if (!dev->msi_enabled)
		return;

429
	entry = irq_get_msi_desc(dev->irq);
430

431
	pci_intx_for_msi(dev, 0);
432
	msi_set_enable(dev, 0);
433
	arch_restore_msi_irqs(dev, dev->irq);
434

435
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
436
	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
437
	control &= ~PCI_MSI_FLAGS_QSIZE;
438
	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
439
	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
440 441 442
}

static void __pci_restore_msix_state(struct pci_dev *dev)
443 444
{
	struct msi_desc *entry;
445
	u16 control;
446

E
Eric W. Biederman 已提交
447 448
	if (!dev->msix_enabled)
		return;
449
	BUG_ON(list_empty(&dev->msi_list));
H
Hidetoshi Seto 已提交
450
	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
451
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
E
Eric W. Biederman 已提交
452

453
	/* route the table */
454
	pci_intx_for_msi(dev, 0);
455
	control |= PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL;
456
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
457

458
	list_for_each_entry(entry, &dev->msi_list, list) {
459
		arch_restore_msi_irqs(dev, entry->irq);
460
		msix_mask_irq(entry, entry->masked);
461 462
	}

463
	control &= ~PCI_MSIX_FLAGS_MASKALL;
464
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
465
}
466 467 468 469 470 471

void pci_restore_msi_state(struct pci_dev *dev)
{
	__pci_restore_msi_state(dev);
	__pci_restore_msix_state(dev);
}
472
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
473

474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511

#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)

struct msi_attribute {
	struct attribute        attr;
	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
			char *buf);
	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
			 const char *buf, size_t count);
};

static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
			     char *buf)
{
	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
}

static ssize_t msi_irq_attr_show(struct kobject *kobj,
				 struct attribute *attr, char *buf)
{
	struct msi_attribute *attribute = to_msi_attr(attr);
	struct msi_desc *entry = to_msi_desc(kobj);

	if (!attribute->show)
		return -EIO;

	return attribute->show(entry, attribute, buf);
}

static const struct sysfs_ops msi_irq_sysfs_ops = {
	.show = msi_irq_attr_show,
};

static struct msi_attribute mode_attribute =
	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);


512
static struct attribute *msi_irq_default_attrs[] = {
513 514 515 516
	&mode_attribute.attr,
	NULL
};

517
static void msi_kobj_release(struct kobject *kobj)
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
{
	struct msi_desc *entry = to_msi_desc(kobj);

	pci_dev_put(entry->dev);
}

static struct kobj_type msi_irq_ktype = {
	.release = msi_kobj_release,
	.sysfs_ops = &msi_irq_sysfs_ops,
	.default_attrs = msi_irq_default_attrs,
};

static int populate_msi_sysfs(struct pci_dev *pdev)
{
	struct msi_desc *entry;
	struct kobject *kobj;
	int ret;
	int count = 0;

	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
	if (!pdev->msi_kset)
		return -ENOMEM;

	list_for_each_entry(entry, &pdev->msi_list, list) {
		kobj = &entry->kobj;
		kobj->kset = pdev->msi_kset;
		pci_dev_get(pdev);
		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
				     "%u", entry->irq);
		if (ret)
			goto out_unroll;

		count++;
	}

	return 0;

out_unroll:
	list_for_each_entry(entry, &pdev->msi_list, list) {
		if (!count)
			break;
		kobject_del(&entry->kobj);
		kobject_put(&entry->kobj);
		count--;
	}
	return ret;
}

L
Linus Torvalds 已提交
566 567 568
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
569
 * @nvec: number of interrupts to allocate
L
Linus Torvalds 已提交
570
 *
571 572 573 574 575 576 577
 * Setup the MSI capability structure of the device with the requested
 * number of interrupts.  A return value of zero indicates the successful
 * setup of an entry with the new MSI irq.  A negative return value indicates
 * an error, and a positive return value indicates the number of interrupts
 * which could have been allocated.
 */
static int msi_capability_init(struct pci_dev *dev, int nvec)
L
Linus Torvalds 已提交
578 579
{
	struct msi_desc *entry;
580
	int ret;
L
Linus Torvalds 已提交
581
	u16 control;
582
	unsigned mask;
L
Linus Torvalds 已提交
583

584
	msi_set_enable(dev, 0);	/* Disable MSI during set up */
585

586
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
L
Linus Torvalds 已提交
587
	/* MSI Entry Initialization */
588
	entry = alloc_msi_entry(dev);
589 590
	if (!entry)
		return -ENOMEM;
591

H
Hidetoshi Seto 已提交
592
	entry->msi_attrib.is_msix	= 0;
593
	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
H
Hidetoshi Seto 已提交
594
	entry->msi_attrib.entry_nr	= 0;
595
	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
H
Hidetoshi Seto 已提交
596
	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
597
	entry->msi_attrib.pos		= dev->msi_cap;
598

D
Dan Carpenter 已提交
599 600 601 602
	if (control & PCI_MSI_FLAGS_64BIT)
		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
	else
		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
603 604 605 606 607 608
	/* All MSIs are unmasked by default, Mask them all */
	if (entry->msi_attrib.maskbit)
		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
	mask = msi_capable_mask(control);
	msi_mask_irq(entry, mask, mask);

609
	list_add_tail(&entry->list, &dev->msi_list);
610

L
Linus Torvalds 已提交
611
	/* Configure MSI capability structure */
612
	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
613
	if (ret) {
614
		msi_mask_irq(entry, mask, ~mask);
615
		free_msi_irqs(dev);
616
		return ret;
617
	}
618

619 620 621 622 623 624 625
	ret = populate_msi_sysfs(dev);
	if (ret) {
		msi_mask_irq(entry, mask, ~mask);
		free_msi_irqs(dev);
		return ret;
	}

L
Linus Torvalds 已提交
626
	/* Set MSI enabled bits	 */
627
	pci_intx_for_msi(dev, 0);
628
	msi_set_enable(dev, 1);
629
	dev->msi_enabled = 1;
L
Linus Torvalds 已提交
630

631
	dev->irq = entry->irq;
L
Linus Torvalds 已提交
632 633 634
	return 0;
}

635
static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
636
{
637
	resource_size_t phys_addr;
638 639 640
	u32 table_offset;
	u8 bir;

641 642
	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
			      &table_offset);
643 644
	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
	table_offset &= PCI_MSIX_TABLE_OFFSET;
645 646 647 648 649
	phys_addr = pci_resource_start(dev, bir) + table_offset;

	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
}

650 651
static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
			      struct msix_entry *entries, int nvec)
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
{
	struct msi_desc *entry;
	int i;

	for (i = 0; i < nvec; i++) {
		entry = alloc_msi_entry(dev);
		if (!entry) {
			if (!i)
				iounmap(base);
			else
				free_msi_irqs(dev);
			/* No enough memory. Don't try again */
			return -ENOMEM;
		}

		entry->msi_attrib.is_msix	= 1;
		entry->msi_attrib.is_64		= 1;
		entry->msi_attrib.entry_nr	= entries[i].entry;
		entry->msi_attrib.default_irq	= dev->irq;
671
		entry->msi_attrib.pos		= dev->msix_cap;
672 673 674 675 676 677 678 679
		entry->mask_base		= base;

		list_add_tail(&entry->list, &dev->msi_list);
	}

	return 0;
}

680
static void msix_program_entries(struct pci_dev *dev,
681
				 struct msix_entry *entries)
682 683 684 685 686 687 688 689 690
{
	struct msi_desc *entry;
	int i = 0;

	list_for_each_entry(entry, &dev->msi_list, list) {
		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
						PCI_MSIX_ENTRY_VECTOR_CTRL;

		entries[i].vector = entry->irq;
691
		irq_set_msi_desc(entry->irq, entry);
692 693 694 695 696 697
		entry->masked = readl(entry->mask_base + offset);
		msix_mask_irq(entry, 1);
		i++;
	}
}

L
Linus Torvalds 已提交
698 699 700
/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
701 702
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
703
 *
704
 * Setup the MSI-X capability structure of device function with a
705 706
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
707 708 709 710
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
711
	int ret;
712
	u16 control;
L
Linus Torvalds 已提交
713 714
	void __iomem *base;

715
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
716 717 718

	/* Ensure MSI-X is disabled while it is set up */
	control &= ~PCI_MSIX_FLAGS_ENABLE;
719
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
720

L
Linus Torvalds 已提交
721
	/* Request & Map MSI-X table region */
722
	base = msix_map_region(dev, msix_table_size(control));
723
	if (!base)
L
Linus Torvalds 已提交
724 725
		return -ENOMEM;

726
	ret = msix_setup_entries(dev, base, entries, nvec);
727 728
	if (ret)
		return ret;
729 730

	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
731 732
	if (ret)
		goto error;
733

734 735 736 737 738 739
	/*
	 * Some devices require MSI-X to be enabled before we can touch the
	 * MSI-X registers.  We need to mask all the vectors to prevent
	 * interrupts coming in before they're fully set up.
	 */
	control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
740
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
741

742
	msix_program_entries(dev, entries);
743

744 745 746 747 748 749
	ret = populate_msi_sysfs(dev);
	if (ret) {
		ret = 0;
		goto error;
	}

750
	/* Set MSI-X enabled bits and unmask the function */
751
	pci_intx_for_msi(dev, 0);
752
	dev->msix_enabled = 1;
L
Linus Torvalds 已提交
753

754
	control &= ~PCI_MSIX_FLAGS_MASKALL;
755
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
756

L
Linus Torvalds 已提交
757
	return 0;
758 759 760 761 762 763 764

error:
	if (ret < 0) {
		/*
		 * If we had some success, report the number of irqs
		 * we succeeded in setting up.
		 */
765
		struct msi_desc *entry;
766 767 768 769 770 771 772 773 774 775 776 777 778
		int avail = 0;

		list_for_each_entry(entry, &dev->msi_list, list) {
			if (entry->irq != 0)
				avail++;
		}
		if (avail != 0)
			ret = avail;
	}

	free_msi_irqs(dev);

	return ret;
L
Linus Torvalds 已提交
779 780
}

781
/**
782
 * pci_msi_check_device - check whether MSI may be enabled on a device
783
 * @dev: pointer to the pci_dev data structure of MSI device function
784
 * @nvec: how many MSIs have been requested ?
785
 * @type: are we checking for MSI or MSI-X ?
786
 *
787
 * Look at global flags, the device itself, and its parent buses
788 789
 * to determine if MSI/-X are supported for the device. If MSI/-X is
 * supported return 0, else return an error code.
790
 **/
H
Hidetoshi Seto 已提交
791
static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
792 793
{
	struct pci_bus *bus;
794
	int ret;
795

796
	/* MSI must be globally enabled and supported by the device */
797 798 799
	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

800 801 802 803 804 805 806 807
	/*
	 * You can't ask to have 0 or less MSIs configured.
	 *  a) it's stupid ..
	 *  b) the list manipulation code assumes nvec >= 1.
	 */
	if (nvec < 1)
		return -ERANGE;

H
Hidetoshi Seto 已提交
808 809 810
	/*
	 * Any bridge which does NOT route MSI transactions from its
	 * secondary bus to its primary bus must set NO_MSI flag on
811 812 813 814
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
815 816 817 818
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

819 820 821 822
	ret = arch_msi_check_device(dev, nvec, type);
	if (ret)
		return ret;

823 824 825
	return 0;
}

L
Linus Torvalds 已提交
826
/**
827 828 829
 * pci_enable_msi_block - configure device's MSI capability structure
 * @dev: device to configure
 * @nvec: number of interrupts to configure
L
Linus Torvalds 已提交
830
 *
831 832 833 834 835 836 837 838 839
 * Allocate IRQs for a device with the MSI capability.
 * This function returns a negative errno if an error occurs.  If it
 * is unable to allocate the number of interrupts requested, it returns
 * the number of interrupts it might be able to allocate.  If it successfully
 * allocates at least the number of interrupts requested, it returns 0 and
 * updates the @dev's irq member to the lowest new interrupt number; the
 * other interrupt numbers allocated to this device are consecutive.
 */
int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
L
Linus Torvalds 已提交
840
{
841
	int status, maxvec;
842 843
	u16 msgctl;

844
	if (!dev->msi_cap || dev->current_state != PCI_D0)
845
		return -EINVAL;
846 847

	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
848 849 850
	maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
	if (nvec > maxvec)
		return maxvec;
L
Linus Torvalds 已提交
851

852
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
853 854
	if (status)
		return status;
L
Linus Torvalds 已提交
855

E
Eric W. Biederman 已提交
856
	WARN_ON(!!dev->msi_enabled);
L
Linus Torvalds 已提交
857

858
	/* Check whether driver already requested MSI-X irqs */
859
	if (dev->msix_enabled) {
860 861
		dev_info(&dev->dev, "can't enable MSI "
			 "(MSI-X already enabled)\n");
862
		return -EINVAL;
L
Linus Torvalds 已提交
863
	}
864 865

	status = msi_capability_init(dev, nvec);
L
Linus Torvalds 已提交
866 867
	return status;
}
868
EXPORT_SYMBOL(pci_enable_msi_block);
L
Linus Torvalds 已提交
869

870 871
int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
{
872
	int ret, nvec;
873 874
	u16 msgctl;

875
	if (!dev->msi_cap || dev->current_state != PCI_D0)
876 877
		return -EINVAL;

878
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894
	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);

	if (maxvec)
		*maxvec = ret;

	do {
		nvec = ret;
		ret = pci_enable_msi_block(dev, nvec);
	} while (ret > 0);

	if (ret < 0)
		return ret;
	return nvec;
}
EXPORT_SYMBOL(pci_enable_msi_block_auto);

895
void pci_msi_shutdown(struct pci_dev *dev)
L
Linus Torvalds 已提交
896
{
897 898 899
	struct msi_desc *desc;
	u32 mask;
	u16 ctrl;
L
Linus Torvalds 已提交
900

901
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
E
Eric W. Biederman 已提交
902 903
		return;

904 905 906
	BUG_ON(list_empty(&dev->msi_list));
	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);

907
	msi_set_enable(dev, 0);
908
	pci_intx_for_msi(dev, 1);
909
	dev->msi_enabled = 0;
910

911
	/* Return the device with MSI unmasked as initial states */
912
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
913
	mask = msi_capable_mask(ctrl);
914
	/* Keep cached state to be restored */
915
	arch_msi_mask_irq(desc, mask, ~mask);
916 917

	/* Restore dev->irq to its default pin-assertion irq */
918
	dev->irq = desc->msi_attrib.default_irq;
919
}
920

H
Hidetoshi Seto 已提交
921
void pci_disable_msi(struct pci_dev *dev)
922 923 924 925 926
{
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

	pci_msi_shutdown(dev);
927
	free_msi_irqs(dev);
928 929
	kset_unregister(dev->msi_kset);
	dev->msi_kset = NULL;
L
Linus Torvalds 已提交
930
}
931
EXPORT_SYMBOL(pci_disable_msi);
L
Linus Torvalds 已提交
932

933 934 935 936 937 938 939 940
/**
 * pci_msix_table_size - return the number of device's MSI-X table entries
 * @dev: pointer to the pci_dev data structure of MSI-X device function
 */
int pci_msix_table_size(struct pci_dev *dev)
{
	u16 control;

941
	if (!dev->msix_cap)
942 943
		return 0;

944
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
945
	return msix_table_size(control);
946 947
}

L
Linus Torvalds 已提交
948 949 950
/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
951
 * @entries: pointer to an array of MSI-X entries
952
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
953 954
 *
 * Setup the MSI-X capability structure of device function with the number
955
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
956 957
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
958
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
959
 * Or a return of > 0 indicates that driver request is exceeding the number
960 961
 * of irqs or MSI-X vectors available. Driver should use the returned value to
 * re-send its request.
L
Linus Torvalds 已提交
962
 **/
H
Hidetoshi Seto 已提交
963
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
L
Linus Torvalds 已提交
964
{
965
	int status, nr_entries;
E
Eric W. Biederman 已提交
966
	int i, j;
L
Linus Torvalds 已提交
967

968
	if (!entries || !dev->msix_cap || dev->current_state != PCI_D0)
H
Hidetoshi Seto 已提交
969
		return -EINVAL;
L
Linus Torvalds 已提交
970

971 972 973 974
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
	if (status)
		return status;

975
	nr_entries = pci_msix_table_size(dev);
L
Linus Torvalds 已提交
976
	if (nvec > nr_entries)
977
		return nr_entries;
L
Linus Torvalds 已提交
978 979 980 981 982 983 984 985 986 987

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
E
Eric W. Biederman 已提交
988
	WARN_ON(!!dev->msix_enabled);
989

990
	/* Check whether driver already requested for MSI irq */
H
Hidetoshi Seto 已提交
991
	if (dev->msi_enabled) {
992 993
		dev_info(&dev->dev, "can't enable MSI-X "
		       "(MSI IRQ already assigned)\n");
L
Linus Torvalds 已提交
994 995 996 997 998
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}
999
EXPORT_SYMBOL(pci_enable_msix);
L
Linus Torvalds 已提交
1000

H
Hidetoshi Seto 已提交
1001
void pci_msix_shutdown(struct pci_dev *dev)
1002
{
1003 1004
	struct msi_desc *entry;

1005
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
E
Eric W. Biederman 已提交
1006 1007
		return;

1008 1009 1010
	/* Return the device with MSI-X masked as initial states */
	list_for_each_entry(entry, &dev->msi_list, list) {
		/* Keep cached states to be restored */
1011
		arch_msix_mask_irq(entry, 1);
1012 1013
	}

1014
	msix_set_enable(dev, 0);
1015
	pci_intx_for_msi(dev, 1);
1016
	dev->msix_enabled = 0;
1017
}
1018

H
Hidetoshi Seto 已提交
1019
void pci_disable_msix(struct pci_dev *dev)
1020 1021 1022 1023 1024
{
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
		return;

	pci_msix_shutdown(dev);
1025
	free_msi_irqs(dev);
1026 1027
	kset_unregister(dev->msi_kset);
	dev->msi_kset = NULL;
L
Linus Torvalds 已提交
1028
}
1029
EXPORT_SYMBOL(pci_disable_msix);
L
Linus Torvalds 已提交
1030 1031

/**
1032
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
1033 1034
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
1035
 * Being called during hotplug remove, from which the device function
1036
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
1037 1038 1039
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
H
Hidetoshi Seto 已提交
1040
void msi_remove_pci_irq_vectors(struct pci_dev *dev)
L
Linus Torvalds 已提交
1041 1042
{
	if (!pci_msi_enable || !dev)
H
Hidetoshi Seto 已提交
1043
		return;
L
Linus Torvalds 已提交
1044

1045 1046
	if (dev->msi_enabled || dev->msix_enabled)
		free_msi_irqs(dev);
L
Linus Torvalds 已提交
1047 1048
}

1049 1050 1051 1052
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}
1053

1054 1055 1056 1057 1058 1059 1060
/**
 * pci_msi_enabled - is MSI enabled?
 *
 * Returns true if MSI has not been disabled by the command-line option
 * pci=nomsi.
 **/
int pci_msi_enabled(void)
1061
{
1062
	return pci_msi_enable;
1063
}
1064
EXPORT_SYMBOL(pci_msi_enabled);
1065

1066
void pci_msi_init_pci_dev(struct pci_dev *dev)
1067
{
1068
	INIT_LIST_HEAD(&dev->msi_list);
1069 1070 1071 1072 1073

	/* Disable the msi hardware to avoid screaming interrupts
	 * during boot.  This is the power on reset default so
	 * usually this should be a noop.
	 */
1074 1075 1076 1077 1078 1079 1080
	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (dev->msi_cap)
		msi_set_enable(dev, 0);

	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (dev->msix_cap)
		msix_set_enable(dev, 0);
1081
}