msi.c 26.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
14
#include <linux/export.h>
L
Linus Torvalds 已提交
15 16 17
#include <linux/ioport.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
18
#include <linux/msi.h>
D
Dan Williams 已提交
19
#include <linux/smp.h>
H
Hidetoshi Seto 已提交
20 21
#include <linux/errno.h>
#include <linux/io.h>
22
#include <linux/slab.h>
L
Linus Torvalds 已提交
23 24 25 26 27

#include "pci.h"

static int pci_msi_enable = 1;

28 29 30
#define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)


31 32
/* Arch hooks */

33 34 35 36 37 38
int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
	return -EINVAL;
}

void __weak arch_teardown_msi_irq(unsigned int irq)
39 40 41
{
}

42 43 44 45
int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
	return 0;
}
46

47
int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
48 49 50 51
{
	struct msi_desc *entry;
	int ret;

52 53 54 55 56 57 58
	/*
	 * If an architecture wants to support multiple MSI, it needs to
	 * override arch_setup_msi_irqs()
	 */
	if (type == PCI_CAP_ID_MSI && nvec > 1)
		return 1;

59 60
	list_for_each_entry(entry, &dev->msi_list, list) {
		ret = arch_setup_msi_irq(dev, entry);
61
		if (ret < 0)
62
			return ret;
63 64
		if (ret > 0)
			return -ENOSPC;
65 66 67 68
	}

	return 0;
}
69

70 71 72 73
/*
 * We have a default implementation available as a separate non-weak
 * function, as it is used by the Xen x86 PCI code
 */
74
void default_teardown_msi_irqs(struct pci_dev *dev)
75 76 77 78
{
	struct msi_desc *entry;

	list_for_each_entry(entry, &dev->msi_list, list) {
79 80 81
		int i, nvec;
		if (entry->irq == 0)
			continue;
82 83 84 85
		if (entry->nvec_used)
			nvec = entry->nvec_used;
		else
			nvec = 1 << entry->msi_attrib.multiple;
86 87
		for (i = 0; i < nvec; i++)
			arch_teardown_msi_irq(entry->irq + i);
88 89 90
	}
}

91 92 93 94
void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
{
	return default_teardown_msi_irqs(dev);
}
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112

void default_restore_msi_irqs(struct pci_dev *dev, int irq)
{
	struct msi_desc *entry;

	entry = NULL;
	if (dev->msix_enabled) {
		list_for_each_entry(entry, &dev->msi_list, list) {
			if (irq == entry->irq)
				break;
		}
	} else if (dev->msi_enabled)  {
		entry = irq_get_msi_desc(irq);
	}

	if (entry)
		write_msi_msg(irq, &entry->msg);
}
113 114 115 116 117

void __weak arch_restore_msi_irqs(struct pci_dev *dev, int irq)
{
	return default_restore_msi_irqs(dev, irq);
}
118

119
static void msi_set_enable(struct pci_dev *dev, int enable)
120 121 122
{
	u16 control;

123
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
124 125 126
	control &= ~PCI_MSI_FLAGS_ENABLE;
	if (enable)
		control |= PCI_MSI_FLAGS_ENABLE;
127
	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
128 129
}

130 131 132 133
static void msix_set_enable(struct pci_dev *dev, int enable)
{
	u16 control;

134 135 136 137 138
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
	control &= ~PCI_MSIX_FLAGS_ENABLE;
	if (enable)
		control |= PCI_MSIX_FLAGS_ENABLE;
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
139 140
}

141 142
static inline __attribute_const__ u32 msi_mask(unsigned x)
{
143 144 145 146
	/* Don't shift by >= width of type */
	if (x >= 5)
		return 0xffffffff;
	return (1 << (1 << x)) - 1;
147 148
}

149
static inline __attribute_const__ u32 msi_capable_mask(u16 control)
M
Mitch Williams 已提交
150
{
151 152
	return msi_mask((control >> 1) & 7);
}
M
Mitch Williams 已提交
153

154 155 156
static inline __attribute_const__ u32 msi_enabled_mask(u16 control)
{
	return msi_mask((control >> 4) & 7);
M
Mitch Williams 已提交
157 158
}

159 160 161 162 163 164
/*
 * PCI 2.3 does not specify mask bits for each MSI interrupt.  Attempting to
 * mask all MSI interrupts by clearing the MSI enable bit does not work
 * reliably as devices without an INTx disable bit will then generate a
 * level IRQ which will never be cleared.
 */
165
static u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
L
Linus Torvalds 已提交
166
{
167
	u32 mask_bits = desc->masked;
L
Linus Torvalds 已提交
168

169
	if (!desc->msi_attrib.maskbit)
170
		return 0;
171 172 173 174

	mask_bits &= ~mask;
	mask_bits |= flag;
	pci_write_config_dword(desc->dev, desc->mask_pos, mask_bits);
175 176 177 178 179 180 181

	return mask_bits;
}

static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
{
	desc->masked = __msi_mask_irq(desc, mask, flag);
182 183 184 185 186 187 188 189 190
}

/*
 * This internal function does not flush PCI writes to the device.
 * All users must ensure that they read from the device before either
 * assuming that the device state is up to date, or returning out of this
 * file.  This saves a few milliseconds when initialising devices with lots
 * of MSI-X interrupts.
 */
191
static u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
192 193 194
{
	u32 mask_bits = desc->masked;
	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
195
						PCI_MSIX_ENTRY_VECTOR_CTRL;
196 197 198
	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
	if (flag)
		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
199
	writel(mask_bits, desc->mask_base + offset);
200 201 202 203 204 205 206

	return mask_bits;
}

static void msix_mask_irq(struct msi_desc *desc, u32 flag)
{
	desc->masked = __msix_mask_irq(desc, flag);
207
}
208

209 210
#ifdef CONFIG_GENERIC_HARDIRQS

211
static void msi_set_mask_bit(struct irq_data *data, u32 flag)
212
{
213
	struct msi_desc *desc = irq_data_get_msi(data);
214

215 216 217 218
	if (desc->msi_attrib.is_msix) {
		msix_mask_irq(desc, flag);
		readl(desc->mask_base);		/* Flush write to device */
	} else {
219
		unsigned offset = data->irq - desc->dev->irq;
220
		msi_mask_irq(desc, 1 << offset, flag << offset);
L
Linus Torvalds 已提交
221
	}
222 223
}

224
void mask_msi_irq(struct irq_data *data)
225
{
226
	msi_set_mask_bit(data, 1);
227 228
}

229
void unmask_msi_irq(struct irq_data *data)
230
{
231
	msi_set_mask_bit(data, 0);
L
Linus Torvalds 已提交
232 233
}

234 235
#endif /* CONFIG_GENERIC_HARDIRQS */

236
void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
L
Linus Torvalds 已提交
237
{
238 239 240 241 242 243 244 245 246 247 248
	BUG_ON(entry->dev->current_state != PCI_D0);

	if (entry->msi_attrib.is_msix) {
		void __iomem *base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
	} else {
		struct pci_dev *dev = entry->dev;
249
		int pos = dev->msi_cap;
250 251
		u16 data;

252 253
		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				      &msg->address_lo);
254
		if (entry->msi_attrib.is_64) {
255 256
			pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					      &msg->address_hi);
B
Bjorn Helgaas 已提交
257
			pci_read_config_word(dev, pos + PCI_MSI_DATA_64, &data);
258 259
		} else {
			msg->address_hi = 0;
B
Bjorn Helgaas 已提交
260
			pci_read_config_word(dev, pos + PCI_MSI_DATA_32, &data);
261 262 263 264 265 266 267
		}
		msg->data = data;
	}
}

void read_msi_msg(unsigned int irq, struct msi_msg *msg)
{
268
	struct msi_desc *entry = irq_get_msi_desc(irq);
269

270
	__read_msi_msg(entry, msg);
271 272
}

273
void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
274 275
{
	/* Assert that the cache is valid, assuming that
276 277 278
	 * valid messages are not all-zeroes. */
	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
		 entry->msg.data));
279

280
	*msg = entry->msg;
281
}
L
Linus Torvalds 已提交
282

283
void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
284
{
285
	struct msi_desc *entry = irq_get_msi_desc(irq);
Y
Yinghai Lu 已提交
286

287
	__get_cached_msi_msg(entry, msg);
Y
Yinghai Lu 已提交
288 289
}

290
void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
Y
Yinghai Lu 已提交
291
{
292 293 294
	if (entry->dev->current_state != PCI_D0) {
		/* Don't touch the hardware now */
	} else if (entry->msi_attrib.is_msix) {
295 296 297 298
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

299 300 301
		writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
		writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
302
	} else {
303
		struct pci_dev *dev = entry->dev;
304
		int pos = dev->msi_cap;
305 306
		u16 msgctl;

307
		pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
308 309
		msgctl &= ~PCI_MSI_FLAGS_QSIZE;
		msgctl |= entry->msi_attrib.multiple << 4;
310
		pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl);
311

312 313
		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO,
				       msg->address_lo);
314
		if (entry->msi_attrib.is_64) {
315 316
			pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI,
					       msg->address_hi);
B
Bjorn Helgaas 已提交
317 318
			pci_write_config_word(dev, pos + PCI_MSI_DATA_64,
					      msg->data);
319
		} else {
B
Bjorn Helgaas 已提交
320 321
			pci_write_config_word(dev, pos + PCI_MSI_DATA_32,
					      msg->data);
322
		}
L
Linus Torvalds 已提交
323
	}
324
	entry->msg = *msg;
L
Linus Torvalds 已提交
325
}
326

Y
Yinghai Lu 已提交
327 328
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
{
329
	struct msi_desc *entry = irq_get_msi_desc(irq);
Y
Yinghai Lu 已提交
330

331
	__write_msi_msg(entry, msg);
Y
Yinghai Lu 已提交
332 333
}

334 335 336 337 338 339 340 341
static void free_msi_irqs(struct pci_dev *dev)
{
	struct msi_desc *entry, *tmp;

	list_for_each_entry(entry, &dev->msi_list, list) {
		int i, nvec;
		if (!entry->irq)
			continue;
342 343 344 345
		if (entry->nvec_used)
			nvec = entry->nvec_used;
		else
			nvec = 1 << entry->msi_attrib.multiple;
346
#ifdef CONFIG_GENERIC_HARDIRQS
347 348
		for (i = 0; i < nvec; i++)
			BUG_ON(irq_has_action(entry->irq + i));
349
#endif
350 351 352 353 354 355 356 357 358
	}

	arch_teardown_msi_irqs(dev);

	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
		if (entry->msi_attrib.is_msix) {
			if (list_is_last(&entry->list, &dev->msi_list))
				iounmap(entry->mask_base);
		}
359 360 361 362 363 364 365 366 367 368 369 370

		/*
		 * Its possible that we get into this path
		 * When populate_msi_sysfs fails, which means the entries
		 * were not registered with sysfs.  In that case don't
		 * unregister them.
		 */
		if (entry->kobj.parent) {
			kobject_del(&entry->kobj);
			kobject_put(&entry->kobj);
		}

371 372 373 374
		list_del(&entry->list);
		kfree(entry);
	}
}
S
Satoru Takeuchi 已提交
375

376
static struct msi_desc *alloc_msi_entry(struct pci_dev *dev)
L
Linus Torvalds 已提交
377
{
378 379
	struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL);
	if (!desc)
L
Linus Torvalds 已提交
380 381
		return NULL;

382 383
	INIT_LIST_HEAD(&desc->list);
	desc->dev = dev;
L
Linus Torvalds 已提交
384

385
	return desc;
L
Linus Torvalds 已提交
386 387
}

388 389 390 391 392 393
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
{
	if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG))
		pci_intx(dev, enable);
}

394
static void __pci_restore_msi_state(struct pci_dev *dev)
395 396
{
	u16 control;
397
	struct msi_desc *entry;
398

399 400 401
	if (!dev->msi_enabled)
		return;

402
	entry = irq_get_msi_desc(dev->irq);
403

404
	pci_intx_for_msi(dev, 0);
405
	msi_set_enable(dev, 0);
406
	arch_restore_msi_irqs(dev, dev->irq);
407

408
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
409
	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
410
	control &= ~PCI_MSI_FLAGS_QSIZE;
411
	control |= (entry->msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE;
412
	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
413 414 415
}

static void __pci_restore_msix_state(struct pci_dev *dev)
416 417
{
	struct msi_desc *entry;
418
	u16 control;
419

E
Eric W. Biederman 已提交
420 421
	if (!dev->msix_enabled)
		return;
422
	BUG_ON(list_empty(&dev->msi_list));
H
Hidetoshi Seto 已提交
423
	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
424
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
E
Eric W. Biederman 已提交
425

426
	/* route the table */
427
	pci_intx_for_msi(dev, 0);
428
	control |= PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL;
429
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
430

431
	list_for_each_entry(entry, &dev->msi_list, list) {
432
		arch_restore_msi_irqs(dev, entry->irq);
433
		msix_mask_irq(entry, entry->masked);
434 435
	}

436
	control &= ~PCI_MSIX_FLAGS_MASKALL;
437
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
438
}
439 440 441 442 443 444

void pci_restore_msi_state(struct pci_dev *dev)
{
	__pci_restore_msi_state(dev);
	__pci_restore_msix_state(dev);
}
445
EXPORT_SYMBOL_GPL(pci_restore_msi_state);
446

447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484

#define to_msi_attr(obj) container_of(obj, struct msi_attribute, attr)
#define to_msi_desc(obj) container_of(obj, struct msi_desc, kobj)

struct msi_attribute {
	struct attribute        attr;
	ssize_t (*show)(struct msi_desc *entry, struct msi_attribute *attr,
			char *buf);
	ssize_t (*store)(struct msi_desc *entry, struct msi_attribute *attr,
			 const char *buf, size_t count);
};

static ssize_t show_msi_mode(struct msi_desc *entry, struct msi_attribute *atr,
			     char *buf)
{
	return sprintf(buf, "%s\n", entry->msi_attrib.is_msix ? "msix" : "msi");
}

static ssize_t msi_irq_attr_show(struct kobject *kobj,
				 struct attribute *attr, char *buf)
{
	struct msi_attribute *attribute = to_msi_attr(attr);
	struct msi_desc *entry = to_msi_desc(kobj);

	if (!attribute->show)
		return -EIO;

	return attribute->show(entry, attribute, buf);
}

static const struct sysfs_ops msi_irq_sysfs_ops = {
	.show = msi_irq_attr_show,
};

static struct msi_attribute mode_attribute =
	__ATTR(mode, S_IRUGO, show_msi_mode, NULL);


485
static struct attribute *msi_irq_default_attrs[] = {
486 487 488 489
	&mode_attribute.attr,
	NULL
};

490
static void msi_kobj_release(struct kobject *kobj)
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
{
	struct msi_desc *entry = to_msi_desc(kobj);

	pci_dev_put(entry->dev);
}

static struct kobj_type msi_irq_ktype = {
	.release = msi_kobj_release,
	.sysfs_ops = &msi_irq_sysfs_ops,
	.default_attrs = msi_irq_default_attrs,
};

static int populate_msi_sysfs(struct pci_dev *pdev)
{
	struct msi_desc *entry;
	struct kobject *kobj;
	int ret;
	int count = 0;

	pdev->msi_kset = kset_create_and_add("msi_irqs", NULL, &pdev->dev.kobj);
	if (!pdev->msi_kset)
		return -ENOMEM;

	list_for_each_entry(entry, &pdev->msi_list, list) {
		kobj = &entry->kobj;
		kobj->kset = pdev->msi_kset;
		pci_dev_get(pdev);
		ret = kobject_init_and_add(kobj, &msi_irq_ktype, NULL,
				     "%u", entry->irq);
		if (ret)
			goto out_unroll;

		count++;
	}

	return 0;

out_unroll:
	list_for_each_entry(entry, &pdev->msi_list, list) {
		if (!count)
			break;
		kobject_del(&entry->kobj);
		kobject_put(&entry->kobj);
		count--;
	}
	return ret;
}

L
Linus Torvalds 已提交
539 540 541
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
542
 * @nvec: number of interrupts to allocate
L
Linus Torvalds 已提交
543
 *
544 545 546 547 548 549 550
 * Setup the MSI capability structure of the device with the requested
 * number of interrupts.  A return value of zero indicates the successful
 * setup of an entry with the new MSI irq.  A negative return value indicates
 * an error, and a positive return value indicates the number of interrupts
 * which could have been allocated.
 */
static int msi_capability_init(struct pci_dev *dev, int nvec)
L
Linus Torvalds 已提交
551 552
{
	struct msi_desc *entry;
553
	int ret;
L
Linus Torvalds 已提交
554
	u16 control;
555
	unsigned mask;
L
Linus Torvalds 已提交
556

557
	msi_set_enable(dev, 0);	/* Disable MSI during set up */
558

559
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
L
Linus Torvalds 已提交
560
	/* MSI Entry Initialization */
561
	entry = alloc_msi_entry(dev);
562 563
	if (!entry)
		return -ENOMEM;
564

H
Hidetoshi Seto 已提交
565
	entry->msi_attrib.is_msix	= 0;
566
	entry->msi_attrib.is_64		= !!(control & PCI_MSI_FLAGS_64BIT);
H
Hidetoshi Seto 已提交
567
	entry->msi_attrib.entry_nr	= 0;
568
	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
H
Hidetoshi Seto 已提交
569
	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
570
	entry->msi_attrib.pos		= dev->msi_cap;
571

D
Dan Carpenter 已提交
572 573 574 575
	if (control & PCI_MSI_FLAGS_64BIT)
		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
	else
		entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_32;
576 577 578 579 580 581
	/* All MSIs are unmasked by default, Mask them all */
	if (entry->msi_attrib.maskbit)
		pci_read_config_dword(dev, entry->mask_pos, &entry->masked);
	mask = msi_capable_mask(control);
	msi_mask_irq(entry, mask, mask);

582
	list_add_tail(&entry->list, &dev->msi_list);
583

L
Linus Torvalds 已提交
584
	/* Configure MSI capability structure */
585
	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
586
	if (ret) {
587
		msi_mask_irq(entry, mask, ~mask);
588
		free_msi_irqs(dev);
589
		return ret;
590
	}
591

592 593 594 595 596 597 598
	ret = populate_msi_sysfs(dev);
	if (ret) {
		msi_mask_irq(entry, mask, ~mask);
		free_msi_irqs(dev);
		return ret;
	}

L
Linus Torvalds 已提交
599
	/* Set MSI enabled bits	 */
600
	pci_intx_for_msi(dev, 0);
601
	msi_set_enable(dev, 1);
602
	dev->msi_enabled = 1;
L
Linus Torvalds 已提交
603

604
	dev->irq = entry->irq;
L
Linus Torvalds 已提交
605 606 607
	return 0;
}

608
static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
609
{
610
	resource_size_t phys_addr;
611 612 613
	u32 table_offset;
	u8 bir;

614 615
	pci_read_config_dword(dev, dev->msix_cap + PCI_MSIX_TABLE,
			      &table_offset);
616 617
	bir = (u8)(table_offset & PCI_MSIX_TABLE_BIR);
	table_offset &= PCI_MSIX_TABLE_OFFSET;
618 619 620 621 622
	phys_addr = pci_resource_start(dev, bir) + table_offset;

	return ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
}

623 624
static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
			      struct msix_entry *entries, int nvec)
625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
{
	struct msi_desc *entry;
	int i;

	for (i = 0; i < nvec; i++) {
		entry = alloc_msi_entry(dev);
		if (!entry) {
			if (!i)
				iounmap(base);
			else
				free_msi_irqs(dev);
			/* No enough memory. Don't try again */
			return -ENOMEM;
		}

		entry->msi_attrib.is_msix	= 1;
		entry->msi_attrib.is_64		= 1;
		entry->msi_attrib.entry_nr	= entries[i].entry;
		entry->msi_attrib.default_irq	= dev->irq;
644
		entry->msi_attrib.pos		= dev->msix_cap;
645 646 647 648 649 650 651 652
		entry->mask_base		= base;

		list_add_tail(&entry->list, &dev->msi_list);
	}

	return 0;
}

653
static void msix_program_entries(struct pci_dev *dev,
654
				 struct msix_entry *entries)
655 656 657 658 659 660 661 662 663
{
	struct msi_desc *entry;
	int i = 0;

	list_for_each_entry(entry, &dev->msi_list, list) {
		int offset = entries[i].entry * PCI_MSIX_ENTRY_SIZE +
						PCI_MSIX_ENTRY_VECTOR_CTRL;

		entries[i].vector = entry->irq;
664
		irq_set_msi_desc(entry->irq, entry);
665 666 667 668 669 670
		entry->masked = readl(entry->mask_base + offset);
		msix_mask_irq(entry, 1);
		i++;
	}
}

L
Linus Torvalds 已提交
671 672 673
/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
674 675
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
676
 *
677
 * Setup the MSI-X capability structure of device function with a
678 679
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
680 681 682 683
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
684
	int ret;
685
	u16 control;
L
Linus Torvalds 已提交
686 687
	void __iomem *base;

688
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
689 690 691

	/* Ensure MSI-X is disabled while it is set up */
	control &= ~PCI_MSIX_FLAGS_ENABLE;
692
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
693

L
Linus Torvalds 已提交
694
	/* Request & Map MSI-X table region */
695
	base = msix_map_region(dev, msix_table_size(control));
696
	if (!base)
L
Linus Torvalds 已提交
697 698
		return -ENOMEM;

699
	ret = msix_setup_entries(dev, base, entries, nvec);
700 701
	if (ret)
		return ret;
702 703

	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
704 705
	if (ret)
		goto error;
706

707 708 709 710 711 712
	/*
	 * Some devices require MSI-X to be enabled before we can touch the
	 * MSI-X registers.  We need to mask all the vectors to prevent
	 * interrupts coming in before they're fully set up.
	 */
	control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
713
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
714

715
	msix_program_entries(dev, entries);
716

717 718 719 720 721 722
	ret = populate_msi_sysfs(dev);
	if (ret) {
		ret = 0;
		goto error;
	}

723
	/* Set MSI-X enabled bits and unmask the function */
724
	pci_intx_for_msi(dev, 0);
725
	dev->msix_enabled = 1;
L
Linus Torvalds 已提交
726

727
	control &= ~PCI_MSIX_FLAGS_MASKALL;
728
	pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, control);
729

L
Linus Torvalds 已提交
730
	return 0;
731 732 733 734 735 736 737

error:
	if (ret < 0) {
		/*
		 * If we had some success, report the number of irqs
		 * we succeeded in setting up.
		 */
738
		struct msi_desc *entry;
739 740 741 742 743 744 745 746 747 748 749 750 751
		int avail = 0;

		list_for_each_entry(entry, &dev->msi_list, list) {
			if (entry->irq != 0)
				avail++;
		}
		if (avail != 0)
			ret = avail;
	}

	free_msi_irqs(dev);

	return ret;
L
Linus Torvalds 已提交
752 753
}

754
/**
755
 * pci_msi_check_device - check whether MSI may be enabled on a device
756
 * @dev: pointer to the pci_dev data structure of MSI device function
757
 * @nvec: how many MSIs have been requested ?
758
 * @type: are we checking for MSI or MSI-X ?
759
 *
760
 * Look at global flags, the device itself, and its parent busses
761 762
 * to determine if MSI/-X are supported for the device. If MSI/-X is
 * supported return 0, else return an error code.
763
 **/
H
Hidetoshi Seto 已提交
764
static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
765 766
{
	struct pci_bus *bus;
767
	int ret;
768

769
	/* MSI must be globally enabled and supported by the device */
770 771 772
	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

773 774 775 776 777 778 779 780
	/*
	 * You can't ask to have 0 or less MSIs configured.
	 *  a) it's stupid ..
	 *  b) the list manipulation code assumes nvec >= 1.
	 */
	if (nvec < 1)
		return -ERANGE;

H
Hidetoshi Seto 已提交
781 782 783
	/*
	 * Any bridge which does NOT route MSI transactions from its
	 * secondary bus to its primary bus must set NO_MSI flag on
784 785 786 787
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
788 789 790 791
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

792 793 794 795
	ret = arch_msi_check_device(dev, nvec, type);
	if (ret)
		return ret;

796 797 798
	return 0;
}

L
Linus Torvalds 已提交
799
/**
800 801 802
 * pci_enable_msi_block - configure device's MSI capability structure
 * @dev: device to configure
 * @nvec: number of interrupts to configure
L
Linus Torvalds 已提交
803
 *
804 805 806 807 808 809 810 811 812
 * Allocate IRQs for a device with the MSI capability.
 * This function returns a negative errno if an error occurs.  If it
 * is unable to allocate the number of interrupts requested, it returns
 * the number of interrupts it might be able to allocate.  If it successfully
 * allocates at least the number of interrupts requested, it returns 0 and
 * updates the @dev's irq member to the lowest new interrupt number; the
 * other interrupt numbers allocated to this device are consecutive.
 */
int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
L
Linus Torvalds 已提交
813
{
814
	int status, maxvec;
815 816
	u16 msgctl;

817
	if (!dev->msi_cap)
818
		return -EINVAL;
819 820

	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
821 822 823
	maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
	if (nvec > maxvec)
		return maxvec;
L
Linus Torvalds 已提交
824

825
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
826 827
	if (status)
		return status;
L
Linus Torvalds 已提交
828

E
Eric W. Biederman 已提交
829
	WARN_ON(!!dev->msi_enabled);
L
Linus Torvalds 已提交
830

831
	/* Check whether driver already requested MSI-X irqs */
832
	if (dev->msix_enabled) {
833 834
		dev_info(&dev->dev, "can't enable MSI "
			 "(MSI-X already enabled)\n");
835
		return -EINVAL;
L
Linus Torvalds 已提交
836
	}
837 838

	status = msi_capability_init(dev, nvec);
L
Linus Torvalds 已提交
839 840
	return status;
}
841
EXPORT_SYMBOL(pci_enable_msi_block);
L
Linus Torvalds 已提交
842

843 844
int pci_enable_msi_block_auto(struct pci_dev *dev, unsigned int *maxvec)
{
845
	int ret, nvec;
846 847
	u16 msgctl;

848
	if (!dev->msi_cap)
849 850
		return -EINVAL;

851
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl);
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867
	ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);

	if (maxvec)
		*maxvec = ret;

	do {
		nvec = ret;
		ret = pci_enable_msi_block(dev, nvec);
	} while (ret > 0);

	if (ret < 0)
		return ret;
	return nvec;
}
EXPORT_SYMBOL(pci_enable_msi_block_auto);

868
void pci_msi_shutdown(struct pci_dev *dev)
L
Linus Torvalds 已提交
869
{
870 871 872
	struct msi_desc *desc;
	u32 mask;
	u16 ctrl;
L
Linus Torvalds 已提交
873

874
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
E
Eric W. Biederman 已提交
875 876
		return;

877 878 879
	BUG_ON(list_empty(&dev->msi_list));
	desc = list_first_entry(&dev->msi_list, struct msi_desc, list);

880
	msi_set_enable(dev, 0);
881
	pci_intx_for_msi(dev, 1);
882
	dev->msi_enabled = 0;
883

884
	/* Return the device with MSI unmasked as initial states */
885
	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &ctrl);
886
	mask = msi_capable_mask(ctrl);
887 888
	/* Keep cached state to be restored */
	__msi_mask_irq(desc, mask, ~mask);
889 890

	/* Restore dev->irq to its default pin-assertion irq */
891
	dev->irq = desc->msi_attrib.default_irq;
892
}
893

H
Hidetoshi Seto 已提交
894
void pci_disable_msi(struct pci_dev *dev)
895 896 897 898 899
{
	if (!pci_msi_enable || !dev || !dev->msi_enabled)
		return;

	pci_msi_shutdown(dev);
900
	free_msi_irqs(dev);
901 902
	kset_unregister(dev->msi_kset);
	dev->msi_kset = NULL;
L
Linus Torvalds 已提交
903
}
904
EXPORT_SYMBOL(pci_disable_msi);
L
Linus Torvalds 已提交
905

906 907 908 909 910 911 912 913
/**
 * pci_msix_table_size - return the number of device's MSI-X table entries
 * @dev: pointer to the pci_dev data structure of MSI-X device function
 */
int pci_msix_table_size(struct pci_dev *dev)
{
	u16 control;

914
	if (!dev->msix_cap)
915 916
		return 0;

917
	pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control);
918
	return msix_table_size(control);
919 920
}

L
Linus Torvalds 已提交
921 922 923
/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
924
 * @entries: pointer to an array of MSI-X entries
925
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
926 927
 *
 * Setup the MSI-X capability structure of device function with the number
928
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
929 930
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
931
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
932
 * Or a return of > 0 indicates that driver request is exceeding the number
933 934
 * of irqs or MSI-X vectors available. Driver should use the returned value to
 * re-send its request.
L
Linus Torvalds 已提交
935
 **/
H
Hidetoshi Seto 已提交
936
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
L
Linus Torvalds 已提交
937
{
938
	int status, nr_entries;
E
Eric W. Biederman 已提交
939
	int i, j;
L
Linus Torvalds 已提交
940

941
	if (!entries || !dev->msix_cap)
H
Hidetoshi Seto 已提交
942
		return -EINVAL;
L
Linus Torvalds 已提交
943

944 945 946 947
	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
	if (status)
		return status;

948
	nr_entries = pci_msix_table_size(dev);
L
Linus Torvalds 已提交
949
	if (nvec > nr_entries)
950
		return nr_entries;
L
Linus Torvalds 已提交
951 952 953 954 955 956 957 958 959 960

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
E
Eric W. Biederman 已提交
961
	WARN_ON(!!dev->msix_enabled);
962

963
	/* Check whether driver already requested for MSI irq */
H
Hidetoshi Seto 已提交
964
	if (dev->msi_enabled) {
965 966
		dev_info(&dev->dev, "can't enable MSI-X "
		       "(MSI IRQ already assigned)\n");
L
Linus Torvalds 已提交
967 968 969 970 971
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}
972
EXPORT_SYMBOL(pci_enable_msix);
L
Linus Torvalds 已提交
973

H
Hidetoshi Seto 已提交
974
void pci_msix_shutdown(struct pci_dev *dev)
975
{
976 977
	struct msi_desc *entry;

978
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
E
Eric W. Biederman 已提交
979 980
		return;

981 982 983 984 985 986
	/* Return the device with MSI-X masked as initial states */
	list_for_each_entry(entry, &dev->msi_list, list) {
		/* Keep cached states to be restored */
		__msix_mask_irq(entry, 1);
	}

987
	msix_set_enable(dev, 0);
988
	pci_intx_for_msi(dev, 1);
989
	dev->msix_enabled = 0;
990
}
991

H
Hidetoshi Seto 已提交
992
void pci_disable_msix(struct pci_dev *dev)
993 994 995 996 997
{
	if (!pci_msi_enable || !dev || !dev->msix_enabled)
		return;

	pci_msix_shutdown(dev);
998
	free_msi_irqs(dev);
999 1000
	kset_unregister(dev->msi_kset);
	dev->msi_kset = NULL;
L
Linus Torvalds 已提交
1001
}
1002
EXPORT_SYMBOL(pci_disable_msix);
L
Linus Torvalds 已提交
1003 1004

/**
1005
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
1006 1007
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
1008
 * Being called during hotplug remove, from which the device function
1009
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
1010 1011 1012
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
H
Hidetoshi Seto 已提交
1013
void msi_remove_pci_irq_vectors(struct pci_dev *dev)
L
Linus Torvalds 已提交
1014 1015
{
	if (!pci_msi_enable || !dev)
H
Hidetoshi Seto 已提交
1016
		return;
L
Linus Torvalds 已提交
1017

1018 1019
	if (dev->msi_enabled || dev->msix_enabled)
		free_msi_irqs(dev);
L
Linus Torvalds 已提交
1020 1021
}

1022 1023 1024 1025
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}
1026

1027 1028 1029 1030 1031 1032 1033
/**
 * pci_msi_enabled - is MSI enabled?
 *
 * Returns true if MSI has not been disabled by the command-line option
 * pci=nomsi.
 **/
int pci_msi_enabled(void)
1034
{
1035
	return pci_msi_enable;
1036
}
1037
EXPORT_SYMBOL(pci_msi_enabled);
1038

1039
void pci_msi_init_pci_dev(struct pci_dev *dev)
1040
{
1041
	INIT_LIST_HEAD(&dev->msi_list);
1042 1043 1044 1045 1046

	/* Disable the msi hardware to avoid screaming interrupts
	 * during boot.  This is the power on reset default so
	 * usually this should be a noop.
	 */
1047 1048 1049 1050 1051 1052 1053
	dev->msi_cap = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (dev->msi_cap)
		msi_set_enable(dev, 0);

	dev->msix_cap = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (dev->msix_cap)
		msix_set_enable(dev, 0);
1054
}