msi.c 23.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13 14 15 16 17
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/smp_lock.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
18
#include <linux/msi.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26 27 28

#include <asm/errno.h>
#include <asm/io.h>
#include <asm/smp.h>

#include "pci.h"
#include "msi.h"

static DEFINE_SPINLOCK(msi_lock);
static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
29
static struct kmem_cache* msi_cachep;
L
Linus Torvalds 已提交
30 31 32 33 34

static int pci_msi_enable = 1;

static int msi_cache_init(void)
{
35 36
	msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
					0, SLAB_HWCACHE_ALIGN, NULL, NULL);
L
Linus Torvalds 已提交
37 38 39 40 41 42
	if (!msi_cachep)
		return -ENOMEM;

	return 0;
}

43
static void msi_set_mask_bit(unsigned int irq, int flag)
L
Linus Torvalds 已提交
44 45 46
{
	struct msi_desc *entry;

47
	entry = msi_desc[irq];
48
	BUG_ON(!entry || !entry->dev);
L
Linus Torvalds 已提交
49 50
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
51
		if (entry->msi_attrib.maskbit) {
S
Satoru Takeuchi 已提交
52 53
			int pos;
			u32 mask_bits;
54 55 56 57 58 59 60

			pos = (long)entry->mask_base;
			pci_read_config_dword(entry->dev, pos, &mask_bits);
			mask_bits &= ~(1);
			mask_bits |= flag;
			pci_write_config_dword(entry->dev, pos, mask_bits);
		}
L
Linus Torvalds 已提交
61 62 63 64 65 66 67 68 69
		break;
	case PCI_CAP_ID_MSIX:
	{
		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
		writel(flag, entry->mask_base + offset);
		break;
	}
	default:
70
		BUG();
L
Linus Torvalds 已提交
71 72 73 74
		break;
	}
}

75
void read_msi_msg(unsigned int irq, struct msi_msg *msg)
L
Linus Torvalds 已提交
76
{
77
	struct msi_desc *entry = get_irq_data(irq);
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
	switch(entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;
		u16 data;

		pci_read_config_dword(dev, msi_lower_address_reg(pos),
					&msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_read_config_dword(dev, msi_upper_address_reg(pos),
						&msg->address_hi);
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		} else {
			msg->address_hi = 0;
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		}
		msg->data = data;
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
 		break;
 	}
 	default:
		BUG();
	}
}
L
Linus Torvalds 已提交
113

114
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
115
{
116
	struct msi_desc *entry = get_irq_data(irq);
L
Linus Torvalds 已提交
117 118 119
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
120 121 122 123 124 125 126 127 128 129 130 131 132 133
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;

		pci_write_config_dword(dev, msi_lower_address_reg(pos),
					msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_write_config_dword(dev, msi_upper_address_reg(pos),
						msg->address_hi);
			pci_write_config_word(dev, msi_data_reg(pos, 1),
						msg->data);
		} else {
			pci_write_config_word(dev, msi_data_reg(pos, 0),
						msg->data);
		}
L
Linus Torvalds 已提交
134 135 136 137
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
138 139 140 141 142 143 144 145 146
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		writel(msg->address_lo,
			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		writel(msg->address_hi,
			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
L
Linus Torvalds 已提交
147 148 149
		break;
	}
	default:
150
		BUG();
L
Linus Torvalds 已提交
151 152
	}
}
153

154
void mask_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
155
{
156
	msi_set_mask_bit(irq, 1);
L
Linus Torvalds 已提交
157 158
}

159
void unmask_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
160
{
161
	msi_set_mask_bit(irq, 0);
L
Linus Torvalds 已提交
162 163
}

164
static int msi_free_irq(struct pci_dev* dev, int irq);
S
Satoru Takeuchi 已提交
165

L
Linus Torvalds 已提交
166 167 168 169 170 171 172
static int msi_init(void)
{
	static int status = -ENOMEM;

	if (!status)
		return status;

173 174
	status = msi_cache_init();
	if (status < 0) {
L
Linus Torvalds 已提交
175 176 177 178
		pci_msi_enable = 0;
		printk(KERN_WARNING "PCI: MSI cache init failed\n");
		return status;
	}
179

L
Linus Torvalds 已提交
180 181 182 183 184 185 186
	return status;
}

static struct msi_desc* alloc_msi_entry(void)
{
	struct msi_desc *entry;

187
	entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
188 189 190 191 192 193 194 195 196
	if (!entry)
		return NULL;

	entry->link.tail = entry->link.head = 0;	/* single message */
	entry->dev = NULL;

	return entry;
}

197
static void attach_msi_entry(struct msi_desc *entry, int irq)
L
Linus Torvalds 已提交
198 199 200 201
{
	unsigned long flags;

	spin_lock_irqsave(&msi_lock, flags);
202
	msi_desc[irq] = entry;
L
Linus Torvalds 已提交
203 204 205
	spin_unlock_irqrestore(&msi_lock, flags);
}

206
static int create_msi_irq(void)
L
Linus Torvalds 已提交
207
{
208 209 210 211 212 213
	struct msi_desc *entry;
	int irq;

	entry = alloc_msi_entry();
	if (!entry)
		return -ENOMEM;
214

215 216 217 218
	irq = create_irq();
	if (irq < 0) {
		kmem_cache_free(msi_cachep, entry);
		return -EBUSY;
L
Linus Torvalds 已提交
219
	}
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234

	set_irq_data(irq, entry);

	return irq;
}

static void destroy_msi_irq(unsigned int irq)
{
	struct msi_desc *entry;

	entry = get_irq_data(irq);
	set_irq_chip(irq, NULL);
	set_irq_data(irq, NULL);
	destroy_irq(irq);
	kmem_cache_free(msi_cachep, entry);
L
Linus Torvalds 已提交
235 236 237 238 239 240 241 242 243 244 245
}

static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
{
	u16 control;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (type == PCI_CAP_ID_MSI) {
		/* Set enabled bits to single MSI & enable MSI_enable bit */
		msi_enable(control, 1);
		pci_write_config_word(dev, msi_control_reg(pos), control);
246
		dev->msi_enabled = 1;
L
Linus Torvalds 已提交
247 248 249
	} else {
		msix_enable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
250
		dev->msix_enabled = 1;
L
Linus Torvalds 已提交
251
	}
252 253

	pci_intx(dev, 0);  /* disable intx */
L
Linus Torvalds 已提交
254 255
}

256
void disable_msi_mode(struct pci_dev *dev, int pos, int type)
L
Linus Torvalds 已提交
257 258 259 260 261 262 263 264
{
	u16 control;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (type == PCI_CAP_ID_MSI) {
		/* Set enabled bits to single MSI & enable MSI_enable bit */
		msi_disable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
265
		dev->msi_enabled = 0;
L
Linus Torvalds 已提交
266 267 268
	} else {
		msix_disable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
269
		dev->msix_enabled = 0;
L
Linus Torvalds 已提交
270
	}
271 272

	pci_intx(dev, 1);  /* enable intx */
L
Linus Torvalds 已提交
273 274
}

275
static int msi_lookup_irq(struct pci_dev *dev, int type)
L
Linus Torvalds 已提交
276
{
277
	int irq;
L
Linus Torvalds 已提交
278 279 280
	unsigned long flags;

	spin_lock_irqsave(&msi_lock, flags);
281 282 283 284
	for (irq = 0; irq < NR_IRQS; irq++) {
		if (!msi_desc[irq] || msi_desc[irq]->dev != dev ||
			msi_desc[irq]->msi_attrib.type != type ||
			msi_desc[irq]->msi_attrib.default_irq != dev->irq)
L
Linus Torvalds 已提交
285 286
			continue;
		spin_unlock_irqrestore(&msi_lock, flags);
287
		/* This pre-assigned MSI irq for this device
S
Satoru Takeuchi 已提交
288
		   already exists. Override dev->irq with this irq */
289
		dev->irq = irq;
L
Linus Torvalds 已提交
290 291 292 293 294 295 296
		return 0;
	}
	spin_unlock_irqrestore(&msi_lock, flags);

	return -EACCES;
}

297
#ifdef CONFIG_PM
298
static int __pci_save_msi_state(struct pci_dev *dev)
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
{
	int pos, i = 0;
	u16 control;
	struct pci_cap_saved_state *save_state;
	u32 *cap;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (pos <= 0 || dev->no_msi)
		return 0;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSI_FLAGS_ENABLE))
		return 0;

	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5,
		GFP_KERNEL);
	if (!save_state) {
		printk(KERN_ERR "Out of memory in pci_save_msi_state\n");
		return -ENOMEM;
	}
	cap = &save_state->data[0];

	pci_read_config_dword(dev, pos, &cap[i++]);
	control = cap[0] >> 16;
	pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, &cap[i++]);
	if (control & PCI_MSI_FLAGS_64BIT) {
		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, &cap[i++]);
		pci_read_config_dword(dev, pos + PCI_MSI_DATA_64, &cap[i++]);
	} else
		pci_read_config_dword(dev, pos + PCI_MSI_DATA_32, &cap[i++]);
	if (control & PCI_MSI_FLAGS_MASKBIT)
		pci_read_config_dword(dev, pos + PCI_MSI_MASK_BIT, &cap[i++]);
	save_state->cap_nr = PCI_CAP_ID_MSI;
	pci_add_saved_cap(dev, save_state);
	return 0;
}

336
static void __pci_restore_msi_state(struct pci_dev *dev)
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
{
	int i = 0, pos;
	u16 control;
	struct pci_cap_saved_state *save_state;
	u32 *cap;

	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI);
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!save_state || pos <= 0)
		return;
	cap = &save_state->data[0];

	control = cap[i++] >> 16;
	pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]);
	if (control & PCI_MSI_FLAGS_64BIT) {
		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]);
		pci_write_config_dword(dev, pos + PCI_MSI_DATA_64, cap[i++]);
	} else
		pci_write_config_dword(dev, pos + PCI_MSI_DATA_32, cap[i++]);
	if (control & PCI_MSI_FLAGS_MASKBIT)
		pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]);
	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
	pci_remove_saved_cap(save_state);
	kfree(save_state);
}

364
static int __pci_save_msix_state(struct pci_dev *dev)
365 366
{
	int pos;
367
	int temp;
368
	int irq, head, tail = 0;
369 370 371 372 373 374 375
	u16 control;
	struct pci_cap_saved_state *save_state;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos <= 0 || dev->no_msi)
		return 0;

376
	/* save the capability */
377 378 379 380 381 382 383 384 385 386 387
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSIX_FLAGS_ENABLE))
		return 0;
	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16),
		GFP_KERNEL);
	if (!save_state) {
		printk(KERN_ERR "Out of memory in pci_save_msix_state\n");
		return -ENOMEM;
	}
	*((u16 *)&save_state->data[0]) = control;

388 389
	/* save the table */
	temp = dev->irq;
390
	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
391 392 393 394
		kfree(save_state);
		return -EINVAL;
	}

395
	irq = head = dev->irq;
396 397 398
	while (head != tail) {
		struct msi_desc *entry;

399
		entry = msi_desc[irq];
400
		read_msi_msg(irq, &entry->msg_save);
401

402 403
		tail = msi_desc[irq]->link.tail;
		irq = tail;
404 405 406
	}
	dev->irq = temp;

407 408 409 410 411
	save_state->cap_nr = PCI_CAP_ID_MSIX;
	pci_add_saved_cap(dev, save_state);
	return 0;
}

412 413 414 415 416 417 418 419 420 421 422 423 424 425
int pci_save_msi_state(struct pci_dev *dev)
{
	int rc;

	rc = __pci_save_msi_state(dev);
	if (rc)
		return rc;

	rc = __pci_save_msix_state(dev);

	return rc;
}

static void __pci_restore_msix_state(struct pci_dev *dev)
426 427 428
{
	u16 save;
	int pos;
429
	int irq, head, tail = 0;
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
	struct msi_desc *entry;
	int temp;
	struct pci_cap_saved_state *save_state;

	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSIX);
	if (!save_state)
		return;
	save = *((u16 *)&save_state->data[0]);
	pci_remove_saved_cap(save_state);
	kfree(save_state);

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos <= 0)
		return;

	/* route the table */
	temp = dev->irq;
447
	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX))
448
		return;
449
	irq = head = dev->irq;
450
	while (head != tail) {
451
		entry = msi_desc[irq];
452
		write_msi_msg(irq, &entry->msg_save);
453

454 455
		tail = msi_desc[irq]->link.tail;
		irq = tail;
456 457 458 459 460 461
	}
	dev->irq = temp;

	pci_write_config_word(dev, msi_control_reg(pos), save);
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
}
462 463 464 465 466 467

void pci_restore_msi_state(struct pci_dev *dev)
{
	__pci_restore_msi_state(dev);
	__pci_restore_msix_state(dev);
}
S
Satoru Takeuchi 已提交
468
#endif	/* CONFIG_PM */
469

L
Linus Torvalds 已提交
470 471 472 473
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
474
 * Setup the MSI capability structure of device function with a single
475
 * MSI irq, regardless of device function is capable of handling
L
Linus Torvalds 已提交
476
 * multiple messages. A return of zero indicates the successful setup
477
 * of an entry zero with the new MSI irq or non-zero for otherwise.
L
Linus Torvalds 已提交
478 479 480
 **/
static int msi_capability_init(struct pci_dev *dev)
{
481
	int status;
L
Linus Torvalds 已提交
482
	struct msi_desc *entry;
483
	int pos, irq;
L
Linus Torvalds 已提交
484 485 486 487 488
	u16 control;

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	/* MSI Entry Initialization */
489
	irq = create_msi_irq();
490 491 492 493 494 495
	if (irq < 0)
		return irq;

	entry = get_irq_data(irq);
	entry->link.head = irq;
	entry->link.tail = irq;
L
Linus Torvalds 已提交
496
	entry->msi_attrib.type = PCI_CAP_ID_MSI;
497
	entry->msi_attrib.is_64 = is_64bit_address(control);
L
Linus Torvalds 已提交
498 499
	entry->msi_attrib.entry_nr = 0;
	entry->msi_attrib.maskbit = is_mask_bit_support(control);
500
	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
501
	entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
502 503 504 505
	if (is_mask_bit_support(control)) {
		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
				is_64bit_address(control));
	}
506 507 508 509 510 511 512 513 514 515 516 517 518 519
	entry->dev = dev;
	if (entry->msi_attrib.maskbit) {
		unsigned int maskbits, temp;
		/* All MSIs are unmasked by default, Mask them all */
		pci_read_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			&maskbits);
		temp = (1 << multi_msi_capable(control));
		temp = ((temp - 1) & ~temp);
		maskbits |= temp;
		pci_write_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			maskbits);
	}
L
Linus Torvalds 已提交
520
	/* Configure MSI capability structure */
521 522
	status = arch_setup_msi_irq(irq, dev);
	if (status < 0) {
523
		destroy_msi_irq(irq);
524 525
		return status;
	}
526

527
	attach_msi_entry(entry, irq);
L
Linus Torvalds 已提交
528 529 530
	/* Set MSI enabled bits	 */
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);

531
	dev->irq = irq;
L
Linus Torvalds 已提交
532 533 534 535 536 537
	return 0;
}

/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
538 539
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
540
 *
541
 * Setup the MSI-X capability structure of device function with a
542 543
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
544 545 546 547 548
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
	struct msi_desc *head = NULL, *tail = NULL, *entry = NULL;
549
	int status;
550
	int irq, pos, i, j, nr_entries, temp = 0;
551 552
	unsigned long phys_addr;
	u32 table_offset;
L
Linus Torvalds 已提交
553 554 555 556 557 558 559 560
 	u16 control;
	u8 bir;
	void __iomem *base;

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	/* Request & Map MSI-X table region */
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
561 562

 	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
L
Linus Torvalds 已提交
563
	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
564 565
	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
	phys_addr = pci_resource_start (dev, bir) + table_offset;
L
Linus Torvalds 已提交
566 567 568 569 570 571
	base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
	if (base == NULL)
		return -ENOMEM;

	/* MSI-X Table Initialization */
	for (i = 0; i < nvec; i++) {
572
		irq = create_msi_irq();
573
		if (irq < 0)
L
Linus Torvalds 已提交
574 575
			break;

576
		entry = get_irq_data(irq);
L
Linus Torvalds 已提交
577
 		j = entries[i].entry;
578
 		entries[i].vector = irq;
L
Linus Torvalds 已提交
579
		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
580
		entry->msi_attrib.is_64 = 1;
L
Linus Torvalds 已提交
581 582
		entry->msi_attrib.entry_nr = j;
		entry->msi_attrib.maskbit = 1;
583
		entry->msi_attrib.default_irq = dev->irq;
584
		entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
585 586 587
		entry->dev = dev;
		entry->mask_base = base;
		if (!head) {
588 589
			entry->link.head = irq;
			entry->link.tail = irq;
L
Linus Torvalds 已提交
590 591 592 593
			head = entry;
		} else {
			entry->link.head = temp;
			entry->link.tail = tail->link.tail;
594 595
			tail->link.tail = irq;
			head->link.head = irq;
L
Linus Torvalds 已提交
596
		}
597
		temp = irq;
L
Linus Torvalds 已提交
598 599
		tail = entry;
		/* Configure MSI-X capability structure */
600
		status = arch_setup_msi_irq(irq, dev);
601 602
		if (status < 0) {
			destroy_msi_irq(irq);
603
			break;
604
		}
605

606
		attach_msi_entry(entry, irq);
L
Linus Torvalds 已提交
607 608
	}
	if (i != nvec) {
609
		int avail = i - 1;
L
Linus Torvalds 已提交
610 611
		i--;
		for (; i >= 0; i--) {
612 613
			irq = (entries + i)->vector;
			msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
614 615
			(entries + i)->vector = 0;
		}
616 617 618 619 620 621
		/* If we had some success report the number of irqs
		 * we succeeded in setting up.
		 */
		if (avail <= 0)
			avail = -EBUSY;
		return avail;
L
Linus Torvalds 已提交
622 623 624 625 626 627 628
	}
	/* Set MSI-X enabled bits */
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);

	return 0;
}

629 630 631 632
/**
 * pci_msi_supported - check whether MSI may be enabled on device
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
633 634
 * Look at global flags, the device itself, and its parent busses
 * to return 0 if MSI are supported for the device.
635 636 637 638 639 640
 **/
static
int pci_msi_supported(struct pci_dev * dev)
{
	struct pci_bus *bus;

641
	/* MSI must be globally enabled and supported by the device */
642 643 644
	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

645 646 647 648 649 650
	/* Any bridge which does NOT route MSI transactions from it's
	 * secondary bus to it's primary bus must set NO_MSI flag on
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
651 652 653 654 655 656 657
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

	return 0;
}

L
Linus Torvalds 已提交
658 659 660 661 662
/**
 * pci_enable_msi - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * Setup the MSI capability structure of device function with
663
 * a single MSI irq upon its software driver call to request for
L
Linus Torvalds 已提交
664 665
 * MSI mode enabled on its hardware device function. A return of zero
 * indicates the successful setup of an entry zero with the new MSI
666
 * irq or non-zero for otherwise.
L
Linus Torvalds 已提交
667 668 669
 **/
int pci_enable_msi(struct pci_dev* dev)
{
670
	int pos, temp, status;
L
Linus Torvalds 已提交
671

672 673
	if (pci_msi_supported(dev) < 0)
		return -EINVAL;
674

L
Linus Torvalds 已提交
675 676
	temp = dev->irq;

677 678
	status = msi_init();
	if (status < 0)
L
Linus Torvalds 已提交
679 680
		return status;

681 682
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
L
Linus Torvalds 已提交
683 684
		return -EINVAL;

685
	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSI));
L
Linus Torvalds 已提交
686

687
	/* Check whether driver already requested for MSI-X irqs */
688
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
689
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
L
Linus Torvalds 已提交
690
			printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
691
			       "Device already has MSI-X irq assigned\n",
L
Linus Torvalds 已提交
692 693 694 695 696 697 698 699 700 701 702
			       pci_name(dev));
			dev->irq = temp;
			return -EINVAL;
	}
	status = msi_capability_init(dev);
	return status;
}

void pci_disable_msi(struct pci_dev* dev)
{
	struct msi_desc *entry;
703
	int pos, default_irq;
L
Linus Torvalds 已提交
704 705 706
	u16 control;
	unsigned long flags;

707 708
	if (!pci_msi_enable)
		return;
709 710
	if (!dev)
		return;
711

712 713
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
L
Linus Torvalds 已提交
714 715 716 717 718 719
		return;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSI_FLAGS_ENABLE))
		return;

720 721
	disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);

L
Linus Torvalds 已提交
722 723 724 725 726 727
	spin_lock_irqsave(&msi_lock, flags);
	entry = msi_desc[dev->irq];
	if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
		spin_unlock_irqrestore(&msi_lock, flags);
		return;
	}
728
	if (irq_has_action(dev->irq)) {
L
Linus Torvalds 已提交
729 730
		spin_unlock_irqrestore(&msi_lock, flags);
		printk(KERN_WARNING "PCI: %s: pci_disable_msi() called without "
731
		       "free_irq() on MSI irq %d\n",
L
Linus Torvalds 已提交
732
		       pci_name(dev), dev->irq);
733
		BUG_ON(irq_has_action(dev->irq));
L
Linus Torvalds 已提交
734
	} else {
735
		default_irq = entry->msi_attrib.default_irq;
L
Linus Torvalds 已提交
736
		spin_unlock_irqrestore(&msi_lock, flags);
737
		msi_free_irq(dev, dev->irq);
738

739 740
		/* Restore dev->irq to its default pin-assertion irq */
		dev->irq = default_irq;
L
Linus Torvalds 已提交
741 742 743
	}
}

744
static int msi_free_irq(struct pci_dev* dev, int irq)
L
Linus Torvalds 已提交
745 746 747 748 749 750
{
	struct msi_desc *entry;
	int head, entry_nr, type;
	void __iomem *base;
	unsigned long flags;

751
	arch_teardown_msi_irq(irq);
752

L
Linus Torvalds 已提交
753
	spin_lock_irqsave(&msi_lock, flags);
754
	entry = msi_desc[irq];
L
Linus Torvalds 已提交
755 756 757 758 759 760 761 762 763 764 765
	if (!entry || entry->dev != dev) {
		spin_unlock_irqrestore(&msi_lock, flags);
		return -EINVAL;
	}
	type = entry->msi_attrib.type;
	entry_nr = entry->msi_attrib.entry_nr;
	head = entry->link.head;
	base = entry->mask_base;
	msi_desc[entry->link.head]->link.tail = entry->link.tail;
	msi_desc[entry->link.tail]->link.head = entry->link.head;
	entry->dev = NULL;
766
	msi_desc[irq] = NULL;
L
Linus Torvalds 已提交
767 768
	spin_unlock_irqrestore(&msi_lock, flags);

769
	destroy_msi_irq(irq);
L
Linus Torvalds 已提交
770 771

	if (type == PCI_CAP_ID_MSIX) {
772 773
		writel(1, base + entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
L
Linus Torvalds 已提交
774

775
		if (head == irq)
L
Linus Torvalds 已提交
776 777 778 779 780 781 782 783 784
			iounmap(base);
	}

	return 0;
}

/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
785
 * @entries: pointer to an array of MSI-X entries
786
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
787 788
 *
 * Setup the MSI-X capability structure of device function with the number
789
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
790 791
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
792
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
793
 * Or a return of > 0 indicates that driver request is exceeding the number
794
 * of irqs available. Driver should use the returned value to re-send
L
Linus Torvalds 已提交
795 796 797 798
 * its request.
 **/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
799
	int status, pos, nr_entries;
L
Linus Torvalds 已提交
800 801 802
	int i, j, temp;
	u16 control;

803
	if (!entries || pci_msi_supported(dev) < 0)
L
Linus Torvalds 已提交
804 805
 		return -EINVAL;

806 807
	status = msi_init();
	if (status < 0)
L
Linus Torvalds 已提交
808 809
		return status;

810 811
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
L
Linus Torvalds 已提交
812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828
 		return -EINVAL;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
	if (nvec > nr_entries)
		return -EINVAL;

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
	temp = dev->irq;
829
	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSIX));
830

831
	/* Check whether driver already requested for MSI irq */
L
Linus Torvalds 已提交
832
   	if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
833
		!msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
L
Linus Torvalds 已提交
834
		printk(KERN_INFO "PCI: %s: Can't enable MSI-X.  "
835
		       "Device already has an MSI irq assigned\n",
L
Linus Torvalds 已提交
836 837 838 839 840 841 842 843 844 845 846 847 848
		       pci_name(dev));
		dev->irq = temp;
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}

void pci_disable_msix(struct pci_dev* dev)
{
	int pos, temp;
	u16 control;

849 850
	if (!pci_msi_enable)
		return;
851 852 853 854 855
	if (!dev)
		return;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
L
Linus Torvalds 已提交
856 857 858 859 860 861
		return;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSIX_FLAGS_ENABLE))
		return;

862 863
	disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);

L
Linus Torvalds 已提交
864
	temp = dev->irq;
865
	if (!msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
866
		int irq, head, tail = 0, warning = 0;
L
Linus Torvalds 已提交
867 868
		unsigned long flags;

869
		irq = head = dev->irq;
870
		dev->irq = temp;			/* Restore pin IRQ */
L
Linus Torvalds 已提交
871
		while (head != tail) {
872
			spin_lock_irqsave(&msi_lock, flags);
873
			tail = msi_desc[irq]->link.tail;
874
			spin_unlock_irqrestore(&msi_lock, flags);
875
			if (irq_has_action(irq))
L
Linus Torvalds 已提交
876
				warning = 1;
877 878 879
			else if (irq != head)	/* Release MSI-X irq */
				msi_free_irq(dev, irq);
			irq = tail;
L
Linus Torvalds 已提交
880
		}
881
		msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
882 883
		if (warning) {
			printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
884
			       "free_irq() on all MSI-X irqs\n",
L
Linus Torvalds 已提交
885 886 887 888 889 890 891
			       pci_name(dev));
			BUG_ON(warning > 0);
		}
	}
}

/**
892
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
893 894
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
895
 * Being called during hotplug remove, from which the device function
896
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
897 898 899 900 901
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
void msi_remove_pci_irq_vectors(struct pci_dev* dev)
{
902
	int pos, temp;
L
Linus Torvalds 已提交
903 904 905 906 907 908
	unsigned long flags;

	if (!pci_msi_enable || !dev)
 		return;

	temp = dev->irq;		/* Save IOAPIC IRQ */
909
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
910
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
911
		if (irq_has_action(dev->irq)) {
L
Linus Torvalds 已提交
912
			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
913
			       "called without free_irq() on MSI irq %d\n",
L
Linus Torvalds 已提交
914
			       pci_name(dev), dev->irq);
915
			BUG_ON(irq_has_action(dev->irq));
916 917
		} else /* Release MSI irq assigned to this device */
			msi_free_irq(dev, dev->irq);
L
Linus Torvalds 已提交
918 919
		dev->irq = temp;		/* Restore IOAPIC IRQ */
	}
920
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
921 922
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
		int irq, head, tail = 0, warning = 0;
L
Linus Torvalds 已提交
923 924
		void __iomem *base = NULL;

925
		irq = head = dev->irq;
L
Linus Torvalds 已提交
926 927
		while (head != tail) {
			spin_lock_irqsave(&msi_lock, flags);
928 929
			tail = msi_desc[irq]->link.tail;
			base = msi_desc[irq]->mask_base;
L
Linus Torvalds 已提交
930
			spin_unlock_irqrestore(&msi_lock, flags);
931
			if (irq_has_action(irq))
L
Linus Torvalds 已提交
932
				warning = 1;
933 934 935
			else if (irq != head) /* Release MSI-X irq */
				msi_free_irq(dev, irq);
			irq = tail;
L
Linus Torvalds 已提交
936
		}
937
		msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
938 939 940
		if (warning) {
			iounmap(base);
			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
941
			       "called without free_irq() on all MSI-X irqs\n",
L
Linus Torvalds 已提交
942 943 944 945 946 947 948
			       pci_name(dev));
			BUG_ON(warning > 0);
		}
		dev->irq = temp;		/* Restore IOAPIC IRQ */
	}
}

949 950 951 952 953
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}

L
Linus Torvalds 已提交
954 955 956 957
EXPORT_SYMBOL(pci_enable_msi);
EXPORT_SYMBOL(pci_disable_msi);
EXPORT_SYMBOL(pci_enable_msix);
EXPORT_SYMBOL(pci_disable_msix);