msi.c 23.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13 14 15 16 17
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/smp_lock.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
18
#include <linux/msi.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34

#include <asm/errno.h>
#include <asm/io.h>
#include <asm/smp.h>

#include "pci.h"
#include "msi.h"

static DEFINE_SPINLOCK(msi_lock);
static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
static kmem_cache_t* msi_cachep;

static int pci_msi_enable = 1;

static int msi_cache_init(void)
{
35 36
	msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
					0, SLAB_HWCACHE_ALIGN, NULL, NULL);
L
Linus Torvalds 已提交
37 38 39 40 41 42
	if (!msi_cachep)
		return -ENOMEM;

	return 0;
}

43
static void msi_set_mask_bit(unsigned int irq, int flag)
L
Linus Torvalds 已提交
44 45 46
{
	struct msi_desc *entry;

47
	entry = msi_desc[irq];
48
	BUG_ON(!entry || !entry->dev);
L
Linus Torvalds 已提交
49 50
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
51 52 53 54 55 56 57 58 59 60
		if (entry->msi_attrib.maskbit) {
			int		pos;
			u32		mask_bits;

			pos = (long)entry->mask_base;
			pci_read_config_dword(entry->dev, pos, &mask_bits);
			mask_bits &= ~(1);
			mask_bits |= flag;
			pci_write_config_dword(entry->dev, pos, mask_bits);
		}
L
Linus Torvalds 已提交
61 62 63 64 65 66 67 68 69
		break;
	case PCI_CAP_ID_MSIX:
	{
		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
		writel(flag, entry->mask_base + offset);
		break;
	}
	default:
70
		BUG();
L
Linus Torvalds 已提交
71 72 73 74
		break;
	}
}

75
void read_msi_msg(unsigned int irq, struct msi_msg *msg)
L
Linus Torvalds 已提交
76
{
77
	struct msi_desc *entry = get_irq_data(irq);
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
	switch(entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;
		u16 data;

		pci_read_config_dword(dev, msi_lower_address_reg(pos),
					&msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_read_config_dword(dev, msi_upper_address_reg(pos),
						&msg->address_hi);
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		} else {
			msg->address_hi = 0;
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		}
		msg->data = data;
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
 		break;
 	}
 	default:
		BUG();
	}
}
L
Linus Torvalds 已提交
113

114
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
115
{
116
	struct msi_desc *entry = get_irq_data(irq);
L
Linus Torvalds 已提交
117 118 119
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
120 121 122 123 124 125 126 127 128 129 130 131 132 133
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;

		pci_write_config_dword(dev, msi_lower_address_reg(pos),
					msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_write_config_dword(dev, msi_upper_address_reg(pos),
						msg->address_hi);
			pci_write_config_word(dev, msi_data_reg(pos, 1),
						msg->data);
		} else {
			pci_write_config_word(dev, msi_data_reg(pos, 0),
						msg->data);
		}
L
Linus Torvalds 已提交
134 135 136 137
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
138 139 140 141 142 143 144 145 146
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		writel(msg->address_lo,
			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		writel(msg->address_hi,
			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
L
Linus Torvalds 已提交
147 148 149
		break;
	}
	default:
150
		BUG();
L
Linus Torvalds 已提交
151 152
	}
}
153

154
void mask_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
155
{
156
	msi_set_mask_bit(irq, 1);
L
Linus Torvalds 已提交
157 158
}

159
void unmask_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
160
{
161
	msi_set_mask_bit(irq, 0);
L
Linus Torvalds 已提交
162 163
}

164
static int msi_free_irq(struct pci_dev* dev, int irq);
L
Linus Torvalds 已提交
165 166 167 168 169 170 171 172 173 174 175 176 177 178
static int msi_init(void)
{
	static int status = -ENOMEM;

	if (!status)
		return status;

	if (pci_msi_quirk) {
		pci_msi_enable = 0;
		printk(KERN_WARNING "PCI: MSI quirk detected. MSI disabled.\n");
		status = -EINVAL;
		return status;
	}

179 180
	status = msi_cache_init();
	if (status < 0) {
L
Linus Torvalds 已提交
181 182 183 184
		pci_msi_enable = 0;
		printk(KERN_WARNING "PCI: MSI cache init failed\n");
		return status;
	}
185

L
Linus Torvalds 已提交
186 187 188 189 190 191 192
	return status;
}

static struct msi_desc* alloc_msi_entry(void)
{
	struct msi_desc *entry;

193
	entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
194 195 196 197 198 199 200 201 202
	if (!entry)
		return NULL;

	entry->link.tail = entry->link.head = 0;	/* single message */
	entry->dev = NULL;

	return entry;
}

203
static void attach_msi_entry(struct msi_desc *entry, int irq)
L
Linus Torvalds 已提交
204 205 206 207
{
	unsigned long flags;

	spin_lock_irqsave(&msi_lock, flags);
208
	msi_desc[irq] = entry;
L
Linus Torvalds 已提交
209 210 211
	spin_unlock_irqrestore(&msi_lock, flags);
}

212
static int create_msi_irq(void)
L
Linus Torvalds 已提交
213
{
214 215 216 217 218 219
	struct msi_desc *entry;
	int irq;

	entry = alloc_msi_entry();
	if (!entry)
		return -ENOMEM;
220

221 222 223 224
	irq = create_irq();
	if (irq < 0) {
		kmem_cache_free(msi_cachep, entry);
		return -EBUSY;
L
Linus Torvalds 已提交
225
	}
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

	set_irq_data(irq, entry);

	return irq;
}

static void destroy_msi_irq(unsigned int irq)
{
	struct msi_desc *entry;

	entry = get_irq_data(irq);
	set_irq_chip(irq, NULL);
	set_irq_data(irq, NULL);
	destroy_irq(irq);
	kmem_cache_free(msi_cachep, entry);
L
Linus Torvalds 已提交
241 242 243 244 245 246 247 248 249 250 251
}

static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
{
	u16 control;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (type == PCI_CAP_ID_MSI) {
		/* Set enabled bits to single MSI & enable MSI_enable bit */
		msi_enable(control, 1);
		pci_write_config_word(dev, msi_control_reg(pos), control);
252
		dev->msi_enabled = 1;
L
Linus Torvalds 已提交
253 254 255
	} else {
		msix_enable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
256
		dev->msix_enabled = 1;
L
Linus Torvalds 已提交
257 258 259
	}
    	if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
		/* PCI Express Endpoint device detected */
B
Brett M Russ 已提交
260
		pci_intx(dev, 0);  /* disable intx */
L
Linus Torvalds 已提交
261 262 263
	}
}

264
void disable_msi_mode(struct pci_dev *dev, int pos, int type)
L
Linus Torvalds 已提交
265 266 267 268 269 270 271 272
{
	u16 control;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (type == PCI_CAP_ID_MSI) {
		/* Set enabled bits to single MSI & enable MSI_enable bit */
		msi_disable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
273
		dev->msi_enabled = 0;
L
Linus Torvalds 已提交
274 275 276
	} else {
		msix_disable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
277
		dev->msix_enabled = 0;
L
Linus Torvalds 已提交
278 279 280
	}
    	if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
		/* PCI Express Endpoint device detected */
B
Brett M Russ 已提交
281
		pci_intx(dev, 1);  /* enable intx */
L
Linus Torvalds 已提交
282 283 284
	}
}

285
static int msi_lookup_irq(struct pci_dev *dev, int type)
L
Linus Torvalds 已提交
286
{
287
	int irq;
L
Linus Torvalds 已提交
288 289 290
	unsigned long flags;

	spin_lock_irqsave(&msi_lock, flags);
291 292 293 294
	for (irq = 0; irq < NR_IRQS; irq++) {
		if (!msi_desc[irq] || msi_desc[irq]->dev != dev ||
			msi_desc[irq]->msi_attrib.type != type ||
			msi_desc[irq]->msi_attrib.default_irq != dev->irq)
L
Linus Torvalds 已提交
295 296
			continue;
		spin_unlock_irqrestore(&msi_lock, flags);
297 298 299
		/* This pre-assigned MSI irq for this device
		   already exits. Override dev->irq with this irq */
		dev->irq = irq;
L
Linus Torvalds 已提交
300 301 302 303 304 305 306 307 308 309 310 311 312
		return 0;
	}
	spin_unlock_irqrestore(&msi_lock, flags);

	return -EACCES;
}

void pci_scan_msi_device(struct pci_dev *dev)
{
	if (!dev)
		return;
}

313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
#ifdef CONFIG_PM
int pci_save_msi_state(struct pci_dev *dev)
{
	int pos, i = 0;
	u16 control;
	struct pci_cap_saved_state *save_state;
	u32 *cap;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (pos <= 0 || dev->no_msi)
		return 0;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSI_FLAGS_ENABLE))
		return 0;

	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5,
		GFP_KERNEL);
	if (!save_state) {
		printk(KERN_ERR "Out of memory in pci_save_msi_state\n");
		return -ENOMEM;
	}
	cap = &save_state->data[0];

	pci_read_config_dword(dev, pos, &cap[i++]);
	control = cap[0] >> 16;
	pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, &cap[i++]);
	if (control & PCI_MSI_FLAGS_64BIT) {
		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, &cap[i++]);
		pci_read_config_dword(dev, pos + PCI_MSI_DATA_64, &cap[i++]);
	} else
		pci_read_config_dword(dev, pos + PCI_MSI_DATA_32, &cap[i++]);
	if (control & PCI_MSI_FLAGS_MASKBIT)
		pci_read_config_dword(dev, pos + PCI_MSI_MASK_BIT, &cap[i++]);
	save_state->cap_nr = PCI_CAP_ID_MSI;
	pci_add_saved_cap(dev, save_state);
	return 0;
}

void pci_restore_msi_state(struct pci_dev *dev)
{
	int i = 0, pos;
	u16 control;
	struct pci_cap_saved_state *save_state;
	u32 *cap;

	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI);
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!save_state || pos <= 0)
		return;
	cap = &save_state->data[0];

	control = cap[i++] >> 16;
	pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]);
	if (control & PCI_MSI_FLAGS_64BIT) {
		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]);
		pci_write_config_dword(dev, pos + PCI_MSI_DATA_64, cap[i++]);
	} else
		pci_write_config_dword(dev, pos + PCI_MSI_DATA_32, cap[i++]);
	if (control & PCI_MSI_FLAGS_MASKBIT)
		pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]);
	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
	pci_remove_saved_cap(save_state);
	kfree(save_state);
}

int pci_save_msix_state(struct pci_dev *dev)
{
	int pos;
383
	int temp;
384
	int irq, head, tail = 0;
385 386 387 388 389 390 391
	u16 control;
	struct pci_cap_saved_state *save_state;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos <= 0 || dev->no_msi)
		return 0;

392
	/* save the capability */
393 394 395 396 397 398 399 400 401 402 403
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSIX_FLAGS_ENABLE))
		return 0;
	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16),
		GFP_KERNEL);
	if (!save_state) {
		printk(KERN_ERR "Out of memory in pci_save_msix_state\n");
		return -ENOMEM;
	}
	*((u16 *)&save_state->data[0]) = control;

404 405
	/* save the table */
	temp = dev->irq;
406
	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
407 408 409 410
		kfree(save_state);
		return -EINVAL;
	}

411
	irq = head = dev->irq;
412 413 414
	while (head != tail) {
		struct msi_desc *entry;

415
		entry = msi_desc[irq];
416
		read_msi_msg(irq, &entry->msg_save);
417

418 419
		tail = msi_desc[irq]->link.tail;
		irq = tail;
420 421 422
	}
	dev->irq = temp;

423 424 425 426 427 428 429 430 431
	save_state->cap_nr = PCI_CAP_ID_MSIX;
	pci_add_saved_cap(dev, save_state);
	return 0;
}

void pci_restore_msix_state(struct pci_dev *dev)
{
	u16 save;
	int pos;
432
	int irq, head, tail = 0;
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
	struct msi_desc *entry;
	int temp;
	struct pci_cap_saved_state *save_state;

	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSIX);
	if (!save_state)
		return;
	save = *((u16 *)&save_state->data[0]);
	pci_remove_saved_cap(save_state);
	kfree(save_state);

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos <= 0)
		return;

	/* route the table */
	temp = dev->irq;
450
	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX))
451
		return;
452
	irq = head = dev->irq;
453
	while (head != tail) {
454
		entry = msi_desc[irq];
455
		write_msi_msg(irq, &entry->msg_save);
456

457 458
		tail = msi_desc[irq]->link.tail;
		irq = tail;
459 460 461 462 463 464 465 466
	}
	dev->irq = temp;

	pci_write_config_word(dev, msi_control_reg(pos), save);
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
}
#endif

L
Linus Torvalds 已提交
467 468 469 470
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
471
 * Setup the MSI capability structure of device function with a single
472
 * MSI irq, regardless of device function is capable of handling
L
Linus Torvalds 已提交
473
 * multiple messages. A return of zero indicates the successful setup
474
 * of an entry zero with the new MSI irq or non-zero for otherwise.
L
Linus Torvalds 已提交
475 476 477
 **/
static int msi_capability_init(struct pci_dev *dev)
{
478
	int status;
L
Linus Torvalds 已提交
479
	struct msi_desc *entry;
480
	int pos, irq;
L
Linus Torvalds 已提交
481 482 483 484 485
	u16 control;

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	/* MSI Entry Initialization */
486
	irq = create_msi_irq();
487 488 489 490 491 492
	if (irq < 0)
		return irq;

	entry = get_irq_data(irq);
	entry->link.head = irq;
	entry->link.tail = irq;
L
Linus Torvalds 已提交
493
	entry->msi_attrib.type = PCI_CAP_ID_MSI;
494
	entry->msi_attrib.is_64 = is_64bit_address(control);
L
Linus Torvalds 已提交
495 496
	entry->msi_attrib.entry_nr = 0;
	entry->msi_attrib.maskbit = is_mask_bit_support(control);
497
	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
498
	entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
499 500 501 502
	if (is_mask_bit_support(control)) {
		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
				is_64bit_address(control));
	}
503 504 505 506 507 508 509 510 511 512 513 514 515 516
	entry->dev = dev;
	if (entry->msi_attrib.maskbit) {
		unsigned int maskbits, temp;
		/* All MSIs are unmasked by default, Mask them all */
		pci_read_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			&maskbits);
		temp = (1 << multi_msi_capable(control));
		temp = ((temp - 1) & ~temp);
		maskbits |= temp;
		pci_write_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			maskbits);
	}
L
Linus Torvalds 已提交
517
	/* Configure MSI capability structure */
518 519
	status = arch_setup_msi_irq(irq, dev);
	if (status < 0) {
520
		destroy_msi_irq(irq);
521 522
		return status;
	}
523

524
	attach_msi_entry(entry, irq);
L
Linus Torvalds 已提交
525 526 527
	/* Set MSI enabled bits	 */
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);

528
	dev->irq = irq;
L
Linus Torvalds 已提交
529 530 531 532 533 534
	return 0;
}

/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
535 536
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
537
 *
538
 * Setup the MSI-X capability structure of device function with a
539 540
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
541 542 543 544 545
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
	struct msi_desc *head = NULL, *tail = NULL, *entry = NULL;
546
	int status;
547
	int irq, pos, i, j, nr_entries, temp = 0;
548 549
	unsigned long phys_addr;
	u32 table_offset;
L
Linus Torvalds 已提交
550 551 552 553 554 555 556 557
 	u16 control;
	u8 bir;
	void __iomem *base;

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	/* Request & Map MSI-X table region */
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
558 559

 	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
L
Linus Torvalds 已提交
560
	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
561 562
	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
	phys_addr = pci_resource_start (dev, bir) + table_offset;
L
Linus Torvalds 已提交
563 564 565 566 567 568
	base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
	if (base == NULL)
		return -ENOMEM;

	/* MSI-X Table Initialization */
	for (i = 0; i < nvec; i++) {
569
		irq = create_msi_irq();
570
		if (irq < 0)
L
Linus Torvalds 已提交
571 572
			break;

573
		entry = get_irq_data(irq);
L
Linus Torvalds 已提交
574
 		j = entries[i].entry;
575
 		entries[i].vector = irq;
L
Linus Torvalds 已提交
576
		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
577
		entry->msi_attrib.is_64 = 1;
L
Linus Torvalds 已提交
578 579
		entry->msi_attrib.entry_nr = j;
		entry->msi_attrib.maskbit = 1;
580
		entry->msi_attrib.default_irq = dev->irq;
581
		entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
582 583 584
		entry->dev = dev;
		entry->mask_base = base;
		if (!head) {
585 586
			entry->link.head = irq;
			entry->link.tail = irq;
L
Linus Torvalds 已提交
587 588 589 590
			head = entry;
		} else {
			entry->link.head = temp;
			entry->link.tail = tail->link.tail;
591 592
			tail->link.tail = irq;
			head->link.head = irq;
L
Linus Torvalds 已提交
593
		}
594
		temp = irq;
L
Linus Torvalds 已提交
595 596
		tail = entry;
		/* Configure MSI-X capability structure */
597
		status = arch_setup_msi_irq(irq, dev);
598 599
		if (status < 0) {
			destroy_msi_irq(irq);
600
			break;
601
		}
602

603
		attach_msi_entry(entry, irq);
L
Linus Torvalds 已提交
604 605
	}
	if (i != nvec) {
606
		int avail = i - 1;
L
Linus Torvalds 已提交
607 608
		i--;
		for (; i >= 0; i--) {
609 610
			irq = (entries + i)->vector;
			msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
611 612
			(entries + i)->vector = 0;
		}
613 614 615 616 617 618
		/* If we had some success report the number of irqs
		 * we succeeded in setting up.
		 */
		if (avail <= 0)
			avail = -EBUSY;
		return avail;
L
Linus Torvalds 已提交
619 620 621 622 623 624 625
	}
	/* Set MSI-X enabled bits */
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);

	return 0;
}

626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
/**
 * pci_msi_supported - check whether MSI may be enabled on device
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * MSI must be globally enabled and supported by the device and its root
 * bus. But, the root bus is not easy to find since some architectures
 * have virtual busses on top of the PCI hierarchy (for instance the
 * hypertransport bus), while the actual bus where MSI must be supported
 * is below. So we test the MSI flag on all parent busses and assume
 * that no quirk will ever set the NO_MSI flag on a non-root bus.
 **/
static
int pci_msi_supported(struct pci_dev * dev)
{
	struct pci_bus *bus;

	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

	/* check MSI flags of all parent busses */
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

	return 0;
}

L
Linus Torvalds 已提交
653 654 655 656 657
/**
 * pci_enable_msi - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * Setup the MSI capability structure of device function with
658
 * a single MSI irq upon its software driver call to request for
L
Linus Torvalds 已提交
659 660
 * MSI mode enabled on its hardware device function. A return of zero
 * indicates the successful setup of an entry zero with the new MSI
661
 * irq or non-zero for otherwise.
L
Linus Torvalds 已提交
662 663 664
 **/
int pci_enable_msi(struct pci_dev* dev)
{
665
	int pos, temp, status;
L
Linus Torvalds 已提交
666

667 668
	if (pci_msi_supported(dev) < 0)
		return -EINVAL;
669

L
Linus Torvalds 已提交
670 671
	temp = dev->irq;

672 673
	status = msi_init();
	if (status < 0)
L
Linus Torvalds 已提交
674 675
		return status;

676 677
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
L
Linus Torvalds 已提交
678 679
		return -EINVAL;

680
	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSI));
L
Linus Torvalds 已提交
681

682
	/* Check whether driver already requested for MSI-X irqs */
683
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
684
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
L
Linus Torvalds 已提交
685
			printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
686
			       "Device already has MSI-X irq assigned\n",
L
Linus Torvalds 已提交
687 688 689 690 691 692 693 694 695 696 697
			       pci_name(dev));
			dev->irq = temp;
			return -EINVAL;
	}
	status = msi_capability_init(dev);
	return status;
}

void pci_disable_msi(struct pci_dev* dev)
{
	struct msi_desc *entry;
698
	int pos, default_irq;
L
Linus Torvalds 已提交
699 700 701
	u16 control;
	unsigned long flags;

702 703
	if (!pci_msi_enable)
		return;
704 705
	if (!dev)
		return;
706

707 708
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
L
Linus Torvalds 已提交
709 710 711 712 713 714
		return;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSI_FLAGS_ENABLE))
		return;

715 716
	disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);

L
Linus Torvalds 已提交
717 718 719 720 721 722
	spin_lock_irqsave(&msi_lock, flags);
	entry = msi_desc[dev->irq];
	if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
		spin_unlock_irqrestore(&msi_lock, flags);
		return;
	}
723
	if (irq_has_action(dev->irq)) {
L
Linus Torvalds 已提交
724 725
		spin_unlock_irqrestore(&msi_lock, flags);
		printk(KERN_WARNING "PCI: %s: pci_disable_msi() called without "
726
		       "free_irq() on MSI irq %d\n",
L
Linus Torvalds 已提交
727
		       pci_name(dev), dev->irq);
728
		BUG_ON(irq_has_action(dev->irq));
L
Linus Torvalds 已提交
729
	} else {
730
		default_irq = entry->msi_attrib.default_irq;
L
Linus Torvalds 已提交
731
		spin_unlock_irqrestore(&msi_lock, flags);
732
		msi_free_irq(dev, dev->irq);
733

734 735
		/* Restore dev->irq to its default pin-assertion irq */
		dev->irq = default_irq;
L
Linus Torvalds 已提交
736 737 738
	}
}

739
static int msi_free_irq(struct pci_dev* dev, int irq)
L
Linus Torvalds 已提交
740 741 742 743 744 745
{
	struct msi_desc *entry;
	int head, entry_nr, type;
	void __iomem *base;
	unsigned long flags;

746
	arch_teardown_msi_irq(irq);
747

L
Linus Torvalds 已提交
748
	spin_lock_irqsave(&msi_lock, flags);
749
	entry = msi_desc[irq];
L
Linus Torvalds 已提交
750 751 752 753 754 755 756 757 758 759 760
	if (!entry || entry->dev != dev) {
		spin_unlock_irqrestore(&msi_lock, flags);
		return -EINVAL;
	}
	type = entry->msi_attrib.type;
	entry_nr = entry->msi_attrib.entry_nr;
	head = entry->link.head;
	base = entry->mask_base;
	msi_desc[entry->link.head]->link.tail = entry->link.tail;
	msi_desc[entry->link.tail]->link.head = entry->link.head;
	entry->dev = NULL;
761
	msi_desc[irq] = NULL;
L
Linus Torvalds 已提交
762 763
	spin_unlock_irqrestore(&msi_lock, flags);

764
	destroy_msi_irq(irq);
L
Linus Torvalds 已提交
765 766

	if (type == PCI_CAP_ID_MSIX) {
767 768
		writel(1, base + entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
L
Linus Torvalds 已提交
769

770
		if (head == irq)
L
Linus Torvalds 已提交
771 772 773 774 775 776 777 778 779
			iounmap(base);
	}

	return 0;
}

/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
780
 * @entries: pointer to an array of MSI-X entries
781
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
782 783
 *
 * Setup the MSI-X capability structure of device function with the number
784
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
785 786
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
787
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
788
 * Or a return of > 0 indicates that driver request is exceeding the number
789
 * of irqs available. Driver should use the returned value to re-send
L
Linus Torvalds 已提交
790 791 792 793
 * its request.
 **/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
794
	int status, pos, nr_entries;
L
Linus Torvalds 已提交
795 796 797
	int i, j, temp;
	u16 control;

798
	if (!entries || pci_msi_supported(dev) < 0)
L
Linus Torvalds 已提交
799 800
 		return -EINVAL;

801 802
	status = msi_init();
	if (status < 0)
L
Linus Torvalds 已提交
803 804
		return status;

805 806
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
L
Linus Torvalds 已提交
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
 		return -EINVAL;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
	if (nvec > nr_entries)
		return -EINVAL;

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
	temp = dev->irq;
824
	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSIX));
825

826
	/* Check whether driver already requested for MSI irq */
L
Linus Torvalds 已提交
827
   	if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
828
		!msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
L
Linus Torvalds 已提交
829
		printk(KERN_INFO "PCI: %s: Can't enable MSI-X.  "
830
		       "Device already has an MSI irq assigned\n",
L
Linus Torvalds 已提交
831 832 833 834 835 836 837 838 839 840 841 842 843
		       pci_name(dev));
		dev->irq = temp;
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}

void pci_disable_msix(struct pci_dev* dev)
{
	int pos, temp;
	u16 control;

844 845
	if (!pci_msi_enable)
		return;
846 847 848 849 850
	if (!dev)
		return;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
L
Linus Torvalds 已提交
851 852 853 854 855 856
		return;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSIX_FLAGS_ENABLE))
		return;

857 858
	disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);

L
Linus Torvalds 已提交
859
	temp = dev->irq;
860
	if (!msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
861
		int irq, head, tail = 0, warning = 0;
L
Linus Torvalds 已提交
862 863
		unsigned long flags;

864
		irq = head = dev->irq;
865
		dev->irq = temp;			/* Restore pin IRQ */
L
Linus Torvalds 已提交
866
		while (head != tail) {
867
			spin_lock_irqsave(&msi_lock, flags);
868
			tail = msi_desc[irq]->link.tail;
869
			spin_unlock_irqrestore(&msi_lock, flags);
870
			if (irq_has_action(irq))
L
Linus Torvalds 已提交
871
				warning = 1;
872 873 874
			else if (irq != head)	/* Release MSI-X irq */
				msi_free_irq(dev, irq);
			irq = tail;
L
Linus Torvalds 已提交
875
		}
876
		msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
877 878
		if (warning) {
			printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
879
			       "free_irq() on all MSI-X irqs\n",
L
Linus Torvalds 已提交
880 881 882 883 884 885 886
			       pci_name(dev));
			BUG_ON(warning > 0);
		}
	}
}

/**
887
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
888 889
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
890
 * Being called during hotplug remove, from which the device function
891
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
892 893 894 895 896
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
void msi_remove_pci_irq_vectors(struct pci_dev* dev)
{
897
	int pos, temp;
L
Linus Torvalds 已提交
898 899 900 901 902 903
	unsigned long flags;

	if (!pci_msi_enable || !dev)
 		return;

	temp = dev->irq;		/* Save IOAPIC IRQ */
904
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
905
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
906
		if (irq_has_action(dev->irq)) {
L
Linus Torvalds 已提交
907
			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
908
			       "called without free_irq() on MSI irq %d\n",
L
Linus Torvalds 已提交
909
			       pci_name(dev), dev->irq);
910
			BUG_ON(irq_has_action(dev->irq));
911 912
		} else /* Release MSI irq assigned to this device */
			msi_free_irq(dev, dev->irq);
L
Linus Torvalds 已提交
913 914
		dev->irq = temp;		/* Restore IOAPIC IRQ */
	}
915
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
916 917
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
		int irq, head, tail = 0, warning = 0;
L
Linus Torvalds 已提交
918 919
		void __iomem *base = NULL;

920
		irq = head = dev->irq;
L
Linus Torvalds 已提交
921 922
		while (head != tail) {
			spin_lock_irqsave(&msi_lock, flags);
923 924
			tail = msi_desc[irq]->link.tail;
			base = msi_desc[irq]->mask_base;
L
Linus Torvalds 已提交
925
			spin_unlock_irqrestore(&msi_lock, flags);
926
			if (irq_has_action(irq))
L
Linus Torvalds 已提交
927
				warning = 1;
928 929 930
			else if (irq != head) /* Release MSI-X irq */
				msi_free_irq(dev, irq);
			irq = tail;
L
Linus Torvalds 已提交
931
		}
932
		msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
933 934 935
		if (warning) {
			iounmap(base);
			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
936
			       "called without free_irq() on all MSI-X irqs\n",
L
Linus Torvalds 已提交
937 938 939 940 941 942 943
			       pci_name(dev));
			BUG_ON(warning > 0);
		}
		dev->irq = temp;		/* Restore IOAPIC IRQ */
	}
}

944 945 946 947 948
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}

L
Linus Torvalds 已提交
949 950 951 952
EXPORT_SYMBOL(pci_enable_msi);
EXPORT_SYMBOL(pci_disable_msi);
EXPORT_SYMBOL(pci_enable_msix);
EXPORT_SYMBOL(pci_disable_msix);