msi.c 23.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8
/*
 * File:	msi.c
 * Purpose:	PCI Message Signaled Interrupt (MSI)
 *
 * Copyright (C) 2003-2004 Intel
 * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
 */

9
#include <linux/err.h>
L
Linus Torvalds 已提交
10 11 12 13 14 15 16 17
#include <linux/mm.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/ioport.h>
#include <linux/smp_lock.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
18
#include <linux/msi.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26 27 28

#include <asm/errno.h>
#include <asm/io.h>
#include <asm/smp.h>

#include "pci.h"
#include "msi.h"

static DEFINE_SPINLOCK(msi_lock);
static struct msi_desc* msi_desc[NR_IRQS] = { [0 ... NR_IRQS-1] = NULL };
29
static struct kmem_cache* msi_cachep;
L
Linus Torvalds 已提交
30 31 32 33 34

static int pci_msi_enable = 1;

static int msi_cache_init(void)
{
35 36
	msi_cachep = kmem_cache_create("msi_cache", sizeof(struct msi_desc),
					0, SLAB_HWCACHE_ALIGN, NULL, NULL);
L
Linus Torvalds 已提交
37 38 39 40 41 42
	if (!msi_cachep)
		return -ENOMEM;

	return 0;
}

43
static void msi_set_mask_bit(unsigned int irq, int flag)
L
Linus Torvalds 已提交
44 45 46
{
	struct msi_desc *entry;

47
	entry = msi_desc[irq];
48
	BUG_ON(!entry || !entry->dev);
L
Linus Torvalds 已提交
49 50
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
51
		if (entry->msi_attrib.maskbit) {
S
Satoru Takeuchi 已提交
52 53
			int pos;
			u32 mask_bits;
54 55 56 57 58 59 60

			pos = (long)entry->mask_base;
			pci_read_config_dword(entry->dev, pos, &mask_bits);
			mask_bits &= ~(1);
			mask_bits |= flag;
			pci_write_config_dword(entry->dev, pos, mask_bits);
		}
L
Linus Torvalds 已提交
61 62 63 64 65 66 67 68 69
		break;
	case PCI_CAP_ID_MSIX:
	{
		int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
		writel(flag, entry->mask_base + offset);
		break;
	}
	default:
70
		BUG();
L
Linus Torvalds 已提交
71 72 73 74
		break;
	}
}

75
void read_msi_msg(unsigned int irq, struct msi_msg *msg)
L
Linus Torvalds 已提交
76
{
77
	struct msi_desc *entry = get_irq_data(irq);
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
	switch(entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;
		u16 data;

		pci_read_config_dword(dev, msi_lower_address_reg(pos),
					&msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_read_config_dword(dev, msi_upper_address_reg(pos),
						&msg->address_hi);
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		} else {
			msg->address_hi = 0;
			pci_read_config_word(dev, msi_data_reg(pos, 1), &data);
		}
		msg->data = data;
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		msg->data = readl(base + PCI_MSIX_ENTRY_DATA_OFFSET);
 		break;
 	}
 	default:
		BUG();
	}
}
L
Linus Torvalds 已提交
113

114
void write_msi_msg(unsigned int irq, struct msi_msg *msg)
115
{
116
	struct msi_desc *entry = get_irq_data(irq);
L
Linus Torvalds 已提交
117 118 119
	switch (entry->msi_attrib.type) {
	case PCI_CAP_ID_MSI:
	{
120 121 122 123 124 125 126 127 128 129 130 131 132 133
		struct pci_dev *dev = entry->dev;
		int pos = entry->msi_attrib.pos;

		pci_write_config_dword(dev, msi_lower_address_reg(pos),
					msg->address_lo);
		if (entry->msi_attrib.is_64) {
			pci_write_config_dword(dev, msi_upper_address_reg(pos),
						msg->address_hi);
			pci_write_config_word(dev, msi_data_reg(pos, 1),
						msg->data);
		} else {
			pci_write_config_word(dev, msi_data_reg(pos, 0),
						msg->data);
		}
L
Linus Torvalds 已提交
134 135 136 137
		break;
	}
	case PCI_CAP_ID_MSIX:
	{
138 139 140 141 142 143 144 145 146
		void __iomem *base;
		base = entry->mask_base +
			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;

		writel(msg->address_lo,
			base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
		writel(msg->address_hi,
			base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
		writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
L
Linus Torvalds 已提交
147 148 149
		break;
	}
	default:
150
		BUG();
L
Linus Torvalds 已提交
151 152
	}
}
153

154
void mask_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
155
{
156
	msi_set_mask_bit(irq, 1);
L
Linus Torvalds 已提交
157 158
}

159
void unmask_msi_irq(unsigned int irq)
L
Linus Torvalds 已提交
160
{
161
	msi_set_mask_bit(irq, 0);
L
Linus Torvalds 已提交
162 163
}

164
static int msi_free_irq(struct pci_dev* dev, int irq);
S
Satoru Takeuchi 已提交
165

L
Linus Torvalds 已提交
166 167 168 169 170 171 172
static int msi_init(void)
{
	static int status = -ENOMEM;

	if (!status)
		return status;

173 174
	status = msi_cache_init();
	if (status < 0) {
L
Linus Torvalds 已提交
175 176 177 178
		pci_msi_enable = 0;
		printk(KERN_WARNING "PCI: MSI cache init failed\n");
		return status;
	}
179

L
Linus Torvalds 已提交
180 181 182 183 184 185 186
	return status;
}

static struct msi_desc* alloc_msi_entry(void)
{
	struct msi_desc *entry;

187
	entry = kmem_cache_zalloc(msi_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
188 189 190 191 192 193 194 195 196
	if (!entry)
		return NULL;

	entry->link.tail = entry->link.head = 0;	/* single message */
	entry->dev = NULL;

	return entry;
}

197
static void attach_msi_entry(struct msi_desc *entry, int irq)
L
Linus Torvalds 已提交
198 199 200 201
{
	unsigned long flags;

	spin_lock_irqsave(&msi_lock, flags);
202
	msi_desc[irq] = entry;
L
Linus Torvalds 已提交
203 204 205
	spin_unlock_irqrestore(&msi_lock, flags);
}

206
static int create_msi_irq(void)
L
Linus Torvalds 已提交
207
{
208 209 210 211 212 213
	struct msi_desc *entry;
	int irq;

	entry = alloc_msi_entry();
	if (!entry)
		return -ENOMEM;
214

215 216 217 218
	irq = create_irq();
	if (irq < 0) {
		kmem_cache_free(msi_cachep, entry);
		return -EBUSY;
L
Linus Torvalds 已提交
219
	}
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234

	set_irq_data(irq, entry);

	return irq;
}

static void destroy_msi_irq(unsigned int irq)
{
	struct msi_desc *entry;

	entry = get_irq_data(irq);
	set_irq_chip(irq, NULL);
	set_irq_data(irq, NULL);
	destroy_irq(irq);
	kmem_cache_free(msi_cachep, entry);
L
Linus Torvalds 已提交
235 236 237 238 239 240 241 242 243 244 245
}

static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
{
	u16 control;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (type == PCI_CAP_ID_MSI) {
		/* Set enabled bits to single MSI & enable MSI_enable bit */
		msi_enable(control, 1);
		pci_write_config_word(dev, msi_control_reg(pos), control);
246
		dev->msi_enabled = 1;
L
Linus Torvalds 已提交
247 248 249
	} else {
		msix_enable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
250
		dev->msix_enabled = 1;
L
Linus Torvalds 已提交
251
	}
252 253

	pci_intx(dev, 0);  /* disable intx */
L
Linus Torvalds 已提交
254 255
}

256
void disable_msi_mode(struct pci_dev *dev, int pos, int type)
L
Linus Torvalds 已提交
257 258 259 260 261 262 263 264
{
	u16 control;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (type == PCI_CAP_ID_MSI) {
		/* Set enabled bits to single MSI & enable MSI_enable bit */
		msi_disable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
265
		dev->msi_enabled = 0;
L
Linus Torvalds 已提交
266 267 268
	} else {
		msix_disable(control);
		pci_write_config_word(dev, msi_control_reg(pos), control);
269
		dev->msix_enabled = 0;
L
Linus Torvalds 已提交
270
	}
271 272

	pci_intx(dev, 1);  /* enable intx */
L
Linus Torvalds 已提交
273 274
}

275
static int msi_lookup_irq(struct pci_dev *dev, int type)
L
Linus Torvalds 已提交
276
{
277
	int irq;
L
Linus Torvalds 已提交
278 279 280
	unsigned long flags;

	spin_lock_irqsave(&msi_lock, flags);
281 282 283 284
	for (irq = 0; irq < NR_IRQS; irq++) {
		if (!msi_desc[irq] || msi_desc[irq]->dev != dev ||
			msi_desc[irq]->msi_attrib.type != type ||
			msi_desc[irq]->msi_attrib.default_irq != dev->irq)
L
Linus Torvalds 已提交
285 286
			continue;
		spin_unlock_irqrestore(&msi_lock, flags);
287
		/* This pre-assigned MSI irq for this device
S
Satoru Takeuchi 已提交
288
		   already exists. Override dev->irq with this irq */
289
		dev->irq = irq;
L
Linus Torvalds 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302
		return 0;
	}
	spin_unlock_irqrestore(&msi_lock, flags);

	return -EACCES;
}

void pci_scan_msi_device(struct pci_dev *dev)
{
	if (!dev)
		return;
}

303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
#ifdef CONFIG_PM
int pci_save_msi_state(struct pci_dev *dev)
{
	int pos, i = 0;
	u16 control;
	struct pci_cap_saved_state *save_state;
	u32 *cap;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (pos <= 0 || dev->no_msi)
		return 0;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSI_FLAGS_ENABLE))
		return 0;

	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u32) * 5,
		GFP_KERNEL);
	if (!save_state) {
		printk(KERN_ERR "Out of memory in pci_save_msi_state\n");
		return -ENOMEM;
	}
	cap = &save_state->data[0];

	pci_read_config_dword(dev, pos, &cap[i++]);
	control = cap[0] >> 16;
	pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, &cap[i++]);
	if (control & PCI_MSI_FLAGS_64BIT) {
		pci_read_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, &cap[i++]);
		pci_read_config_dword(dev, pos + PCI_MSI_DATA_64, &cap[i++]);
	} else
		pci_read_config_dword(dev, pos + PCI_MSI_DATA_32, &cap[i++]);
	if (control & PCI_MSI_FLAGS_MASKBIT)
		pci_read_config_dword(dev, pos + PCI_MSI_MASK_BIT, &cap[i++]);
	save_state->cap_nr = PCI_CAP_ID_MSI;
	pci_add_saved_cap(dev, save_state);
	return 0;
}

void pci_restore_msi_state(struct pci_dev *dev)
{
	int i = 0, pos;
	u16 control;
	struct pci_cap_saved_state *save_state;
	u32 *cap;

	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSI);
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!save_state || pos <= 0)
		return;
	cap = &save_state->data[0];

	control = cap[i++] >> 16;
	pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, cap[i++]);
	if (control & PCI_MSI_FLAGS_64BIT) {
		pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, cap[i++]);
		pci_write_config_dword(dev, pos + PCI_MSI_DATA_64, cap[i++]);
	} else
		pci_write_config_dword(dev, pos + PCI_MSI_DATA_32, cap[i++]);
	if (control & PCI_MSI_FLAGS_MASKBIT)
		pci_write_config_dword(dev, pos + PCI_MSI_MASK_BIT, cap[i++]);
	pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
	pci_remove_saved_cap(save_state);
	kfree(save_state);
}

int pci_save_msix_state(struct pci_dev *dev)
{
	int pos;
373
	int temp;
374
	int irq, head, tail = 0;
375 376 377 378 379 380 381
	u16 control;
	struct pci_cap_saved_state *save_state;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos <= 0 || dev->no_msi)
		return 0;

382
	/* save the capability */
383 384 385 386 387 388 389 390 391 392 393
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSIX_FLAGS_ENABLE))
		return 0;
	save_state = kzalloc(sizeof(struct pci_cap_saved_state) + sizeof(u16),
		GFP_KERNEL);
	if (!save_state) {
		printk(KERN_ERR "Out of memory in pci_save_msix_state\n");
		return -ENOMEM;
	}
	*((u16 *)&save_state->data[0]) = control;

394 395
	/* save the table */
	temp = dev->irq;
396
	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
397 398 399 400
		kfree(save_state);
		return -EINVAL;
	}

401
	irq = head = dev->irq;
402 403 404
	while (head != tail) {
		struct msi_desc *entry;

405
		entry = msi_desc[irq];
406
		read_msi_msg(irq, &entry->msg_save);
407

408 409
		tail = msi_desc[irq]->link.tail;
		irq = tail;
410 411 412
	}
	dev->irq = temp;

413 414 415 416 417 418 419 420 421
	save_state->cap_nr = PCI_CAP_ID_MSIX;
	pci_add_saved_cap(dev, save_state);
	return 0;
}

void pci_restore_msix_state(struct pci_dev *dev)
{
	u16 save;
	int pos;
422
	int irq, head, tail = 0;
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
	struct msi_desc *entry;
	int temp;
	struct pci_cap_saved_state *save_state;

	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_MSIX);
	if (!save_state)
		return;
	save = *((u16 *)&save_state->data[0]);
	pci_remove_saved_cap(save_state);
	kfree(save_state);

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (pos <= 0)
		return;

	/* route the table */
	temp = dev->irq;
440
	if (msi_lookup_irq(dev, PCI_CAP_ID_MSIX))
441
		return;
442
	irq = head = dev->irq;
443
	while (head != tail) {
444
		entry = msi_desc[irq];
445
		write_msi_msg(irq, &entry->msg_save);
446

447 448
		tail = msi_desc[irq]->link.tail;
		irq = tail;
449 450 451 452 453 454
	}
	dev->irq = temp;

	pci_write_config_word(dev, msi_control_reg(pos), save);
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
}
S
Satoru Takeuchi 已提交
455
#endif	/* CONFIG_PM */
456

L
Linus Torvalds 已提交
457 458 459 460
/**
 * msi_capability_init - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
461
 * Setup the MSI capability structure of device function with a single
462
 * MSI irq, regardless of device function is capable of handling
L
Linus Torvalds 已提交
463
 * multiple messages. A return of zero indicates the successful setup
464
 * of an entry zero with the new MSI irq or non-zero for otherwise.
L
Linus Torvalds 已提交
465 466 467
 **/
static int msi_capability_init(struct pci_dev *dev)
{
468
	int status;
L
Linus Torvalds 已提交
469
	struct msi_desc *entry;
470
	int pos, irq;
L
Linus Torvalds 已提交
471 472 473 474 475
	u16 control;

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	pci_read_config_word(dev, msi_control_reg(pos), &control);
	/* MSI Entry Initialization */
476
	irq = create_msi_irq();
477 478 479 480 481 482
	if (irq < 0)
		return irq;

	entry = get_irq_data(irq);
	entry->link.head = irq;
	entry->link.tail = irq;
L
Linus Torvalds 已提交
483
	entry->msi_attrib.type = PCI_CAP_ID_MSI;
484
	entry->msi_attrib.is_64 = is_64bit_address(control);
L
Linus Torvalds 已提交
485 486
	entry->msi_attrib.entry_nr = 0;
	entry->msi_attrib.maskbit = is_mask_bit_support(control);
487
	entry->msi_attrib.default_irq = dev->irq;	/* Save IOAPIC IRQ */
488
	entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
489 490 491 492
	if (is_mask_bit_support(control)) {
		entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
				is_64bit_address(control));
	}
493 494 495 496 497 498 499 500 501 502 503 504 505 506
	entry->dev = dev;
	if (entry->msi_attrib.maskbit) {
		unsigned int maskbits, temp;
		/* All MSIs are unmasked by default, Mask them all */
		pci_read_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			&maskbits);
		temp = (1 << multi_msi_capable(control));
		temp = ((temp - 1) & ~temp);
		maskbits |= temp;
		pci_write_config_dword(dev,
			msi_mask_bits_reg(pos, is_64bit_address(control)),
			maskbits);
	}
L
Linus Torvalds 已提交
507
	/* Configure MSI capability structure */
508 509
	status = arch_setup_msi_irq(irq, dev);
	if (status < 0) {
510
		destroy_msi_irq(irq);
511 512
		return status;
	}
513

514
	attach_msi_entry(entry, irq);
L
Linus Torvalds 已提交
515 516 517
	/* Set MSI enabled bits	 */
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);

518
	dev->irq = irq;
L
Linus Torvalds 已提交
519 520 521 522 523 524
	return 0;
}

/**
 * msix_capability_init - configure device's MSI-X capability
 * @dev: pointer to the pci_dev data structure of MSI-X device function
R
Randy Dunlap 已提交
525 526
 * @entries: pointer to an array of struct msix_entry entries
 * @nvec: number of @entries
L
Linus Torvalds 已提交
527
 *
528
 * Setup the MSI-X capability structure of device function with a
529 530
 * single MSI-X irq. A return of zero indicates the successful setup of
 * requested MSI-X entries with allocated irqs or non-zero for otherwise.
L
Linus Torvalds 已提交
531 532 533 534 535
 **/
static int msix_capability_init(struct pci_dev *dev,
				struct msix_entry *entries, int nvec)
{
	struct msi_desc *head = NULL, *tail = NULL, *entry = NULL;
536
	int status;
537
	int irq, pos, i, j, nr_entries, temp = 0;
538 539
	unsigned long phys_addr;
	u32 table_offset;
L
Linus Torvalds 已提交
540 541 542 543 544 545 546 547
 	u16 control;
	u8 bir;
	void __iomem *base;

   	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	/* Request & Map MSI-X table region */
 	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
548 549

 	pci_read_config_dword(dev, msix_table_offset_reg(pos), &table_offset);
L
Linus Torvalds 已提交
550
	bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
551 552
	table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
	phys_addr = pci_resource_start (dev, bir) + table_offset;
L
Linus Torvalds 已提交
553 554 555 556 557 558
	base = ioremap_nocache(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE);
	if (base == NULL)
		return -ENOMEM;

	/* MSI-X Table Initialization */
	for (i = 0; i < nvec; i++) {
559
		irq = create_msi_irq();
560
		if (irq < 0)
L
Linus Torvalds 已提交
561 562
			break;

563
		entry = get_irq_data(irq);
L
Linus Torvalds 已提交
564
 		j = entries[i].entry;
565
 		entries[i].vector = irq;
L
Linus Torvalds 已提交
566
		entry->msi_attrib.type = PCI_CAP_ID_MSIX;
567
		entry->msi_attrib.is_64 = 1;
L
Linus Torvalds 已提交
568 569
		entry->msi_attrib.entry_nr = j;
		entry->msi_attrib.maskbit = 1;
570
		entry->msi_attrib.default_irq = dev->irq;
571
		entry->msi_attrib.pos = pos;
L
Linus Torvalds 已提交
572 573 574
		entry->dev = dev;
		entry->mask_base = base;
		if (!head) {
575 576
			entry->link.head = irq;
			entry->link.tail = irq;
L
Linus Torvalds 已提交
577 578 579 580
			head = entry;
		} else {
			entry->link.head = temp;
			entry->link.tail = tail->link.tail;
581 582
			tail->link.tail = irq;
			head->link.head = irq;
L
Linus Torvalds 已提交
583
		}
584
		temp = irq;
L
Linus Torvalds 已提交
585 586
		tail = entry;
		/* Configure MSI-X capability structure */
587
		status = arch_setup_msi_irq(irq, dev);
588 589
		if (status < 0) {
			destroy_msi_irq(irq);
590
			break;
591
		}
592

593
		attach_msi_entry(entry, irq);
L
Linus Torvalds 已提交
594 595
	}
	if (i != nvec) {
596
		int avail = i - 1;
L
Linus Torvalds 已提交
597 598
		i--;
		for (; i >= 0; i--) {
599 600
			irq = (entries + i)->vector;
			msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
601 602
			(entries + i)->vector = 0;
		}
603 604 605 606 607 608
		/* If we had some success report the number of irqs
		 * we succeeded in setting up.
		 */
		if (avail <= 0)
			avail = -EBUSY;
		return avail;
L
Linus Torvalds 已提交
609 610 611 612 613 614 615
	}
	/* Set MSI-X enabled bits */
	enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);

	return 0;
}

616 617 618 619
/**
 * pci_msi_supported - check whether MSI may be enabled on device
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
620 621
 * Look at global flags, the device itself, and its parent busses
 * to return 0 if MSI are supported for the device.
622 623 624 625 626 627
 **/
static
int pci_msi_supported(struct pci_dev * dev)
{
	struct pci_bus *bus;

628
	/* MSI must be globally enabled and supported by the device */
629 630 631
	if (!pci_msi_enable || !dev || dev->no_msi)
		return -EINVAL;

632 633 634 635 636 637
	/* Any bridge which does NOT route MSI transactions from it's
	 * secondary bus to it's primary bus must set NO_MSI flag on
	 * the secondary pci_bus.
	 * We expect only arch-specific PCI host bus controller driver
	 * or quirks for specific PCI bridges to be setting NO_MSI.
	 */
638 639 640 641 642 643 644
	for (bus = dev->bus; bus; bus = bus->parent)
		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
			return -EINVAL;

	return 0;
}

L
Linus Torvalds 已提交
645 646 647 648 649
/**
 * pci_enable_msi - configure device's MSI capability structure
 * @dev: pointer to the pci_dev data structure of MSI device function
 *
 * Setup the MSI capability structure of device function with
650
 * a single MSI irq upon its software driver call to request for
L
Linus Torvalds 已提交
651 652
 * MSI mode enabled on its hardware device function. A return of zero
 * indicates the successful setup of an entry zero with the new MSI
653
 * irq or non-zero for otherwise.
L
Linus Torvalds 已提交
654 655 656
 **/
int pci_enable_msi(struct pci_dev* dev)
{
657
	int pos, temp, status;
L
Linus Torvalds 已提交
658

659 660
	if (pci_msi_supported(dev) < 0)
		return -EINVAL;
661

L
Linus Torvalds 已提交
662 663
	temp = dev->irq;

664 665
	status = msi_init();
	if (status < 0)
L
Linus Torvalds 已提交
666 667
		return status;

668 669
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
L
Linus Torvalds 已提交
670 671
		return -EINVAL;

672
	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSI));
L
Linus Torvalds 已提交
673

674
	/* Check whether driver already requested for MSI-X irqs */
675
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
676
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
L
Linus Torvalds 已提交
677
			printk(KERN_INFO "PCI: %s: Can't enable MSI.  "
678
			       "Device already has MSI-X irq assigned\n",
L
Linus Torvalds 已提交
679 680 681 682 683 684 685 686 687 688 689
			       pci_name(dev));
			dev->irq = temp;
			return -EINVAL;
	}
	status = msi_capability_init(dev);
	return status;
}

void pci_disable_msi(struct pci_dev* dev)
{
	struct msi_desc *entry;
690
	int pos, default_irq;
L
Linus Torvalds 已提交
691 692 693
	u16 control;
	unsigned long flags;

694 695
	if (!pci_msi_enable)
		return;
696 697
	if (!dev)
		return;
698

699 700
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
	if (!pos)
L
Linus Torvalds 已提交
701 702 703 704 705 706
		return;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSI_FLAGS_ENABLE))
		return;

707 708
	disable_msi_mode(dev, pos, PCI_CAP_ID_MSI);

L
Linus Torvalds 已提交
709 710 711 712 713 714
	spin_lock_irqsave(&msi_lock, flags);
	entry = msi_desc[dev->irq];
	if (!entry || !entry->dev || entry->msi_attrib.type != PCI_CAP_ID_MSI) {
		spin_unlock_irqrestore(&msi_lock, flags);
		return;
	}
715
	if (irq_has_action(dev->irq)) {
L
Linus Torvalds 已提交
716 717
		spin_unlock_irqrestore(&msi_lock, flags);
		printk(KERN_WARNING "PCI: %s: pci_disable_msi() called without "
718
		       "free_irq() on MSI irq %d\n",
L
Linus Torvalds 已提交
719
		       pci_name(dev), dev->irq);
720
		BUG_ON(irq_has_action(dev->irq));
L
Linus Torvalds 已提交
721
	} else {
722
		default_irq = entry->msi_attrib.default_irq;
L
Linus Torvalds 已提交
723
		spin_unlock_irqrestore(&msi_lock, flags);
724
		msi_free_irq(dev, dev->irq);
725

726 727
		/* Restore dev->irq to its default pin-assertion irq */
		dev->irq = default_irq;
L
Linus Torvalds 已提交
728 729 730
	}
}

731
static int msi_free_irq(struct pci_dev* dev, int irq)
L
Linus Torvalds 已提交
732 733 734 735 736 737
{
	struct msi_desc *entry;
	int head, entry_nr, type;
	void __iomem *base;
	unsigned long flags;

738
	arch_teardown_msi_irq(irq);
739

L
Linus Torvalds 已提交
740
	spin_lock_irqsave(&msi_lock, flags);
741
	entry = msi_desc[irq];
L
Linus Torvalds 已提交
742 743 744 745 746 747 748 749 750 751 752
	if (!entry || entry->dev != dev) {
		spin_unlock_irqrestore(&msi_lock, flags);
		return -EINVAL;
	}
	type = entry->msi_attrib.type;
	entry_nr = entry->msi_attrib.entry_nr;
	head = entry->link.head;
	base = entry->mask_base;
	msi_desc[entry->link.head]->link.tail = entry->link.tail;
	msi_desc[entry->link.tail]->link.head = entry->link.head;
	entry->dev = NULL;
753
	msi_desc[irq] = NULL;
L
Linus Torvalds 已提交
754 755
	spin_unlock_irqrestore(&msi_lock, flags);

756
	destroy_msi_irq(irq);
L
Linus Torvalds 已提交
757 758

	if (type == PCI_CAP_ID_MSIX) {
759 760
		writel(1, base + entry_nr * PCI_MSIX_ENTRY_SIZE +
			PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
L
Linus Torvalds 已提交
761

762
		if (head == irq)
L
Linus Torvalds 已提交
763 764 765 766 767 768 769 770 771
			iounmap(base);
	}

	return 0;
}

/**
 * pci_enable_msix - configure device's MSI-X capability structure
 * @dev: pointer to the pci_dev data structure of MSI-X device function
772
 * @entries: pointer to an array of MSI-X entries
773
 * @nvec: number of MSI-X irqs requested for allocation by device driver
L
Linus Torvalds 已提交
774 775
 *
 * Setup the MSI-X capability structure of device function with the number
776
 * of requested irqs upon its software driver call to request for
L
Linus Torvalds 已提交
777 778
 * MSI-X mode enabled on its hardware device function. A return of zero
 * indicates the successful configuration of MSI-X capability structure
779
 * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
L
Linus Torvalds 已提交
780
 * Or a return of > 0 indicates that driver request is exceeding the number
781
 * of irqs available. Driver should use the returned value to re-send
L
Linus Torvalds 已提交
782 783 784 785
 * its request.
 **/
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
{
786
	int status, pos, nr_entries;
L
Linus Torvalds 已提交
787 788 789
	int i, j, temp;
	u16 control;

790
	if (!entries || pci_msi_supported(dev) < 0)
L
Linus Torvalds 已提交
791 792
 		return -EINVAL;

793 794
	status = msi_init();
	if (status < 0)
L
Linus Torvalds 已提交
795 796
		return status;

797 798
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
L
Linus Torvalds 已提交
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815
 		return -EINVAL;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	nr_entries = multi_msix_capable(control);
	if (nvec > nr_entries)
		return -EINVAL;

	/* Check for any invalid entries */
	for (i = 0; i < nvec; i++) {
		if (entries[i].entry >= nr_entries)
			return -EINVAL;		/* invalid entry */
		for (j = i + 1; j < nvec; j++) {
			if (entries[i].entry == entries[j].entry)
				return -EINVAL;	/* duplicate entry */
		}
	}
	temp = dev->irq;
816
	WARN_ON(!msi_lookup_irq(dev, PCI_CAP_ID_MSIX));
817

818
	/* Check whether driver already requested for MSI irq */
L
Linus Torvalds 已提交
819
   	if (pci_find_capability(dev, PCI_CAP_ID_MSI) > 0 &&
820
		!msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
L
Linus Torvalds 已提交
821
		printk(KERN_INFO "PCI: %s: Can't enable MSI-X.  "
822
		       "Device already has an MSI irq assigned\n",
L
Linus Torvalds 已提交
823 824 825 826 827 828 829 830 831 832 833 834 835
		       pci_name(dev));
		dev->irq = temp;
		return -EINVAL;
	}
	status = msix_capability_init(dev, entries, nvec);
	return status;
}

void pci_disable_msix(struct pci_dev* dev)
{
	int pos, temp;
	u16 control;

836 837
	if (!pci_msi_enable)
		return;
838 839 840 841 842
	if (!dev)
		return;

	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
	if (!pos)
L
Linus Torvalds 已提交
843 844 845 846 847 848
		return;

	pci_read_config_word(dev, msi_control_reg(pos), &control);
	if (!(control & PCI_MSIX_FLAGS_ENABLE))
		return;

849 850
	disable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);

L
Linus Torvalds 已提交
851
	temp = dev->irq;
852
	if (!msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
853
		int irq, head, tail = 0, warning = 0;
L
Linus Torvalds 已提交
854 855
		unsigned long flags;

856
		irq = head = dev->irq;
857
		dev->irq = temp;			/* Restore pin IRQ */
L
Linus Torvalds 已提交
858
		while (head != tail) {
859
			spin_lock_irqsave(&msi_lock, flags);
860
			tail = msi_desc[irq]->link.tail;
861
			spin_unlock_irqrestore(&msi_lock, flags);
862
			if (irq_has_action(irq))
L
Linus Torvalds 已提交
863
				warning = 1;
864 865 866
			else if (irq != head)	/* Release MSI-X irq */
				msi_free_irq(dev, irq);
			irq = tail;
L
Linus Torvalds 已提交
867
		}
868
		msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
869 870
		if (warning) {
			printk(KERN_WARNING "PCI: %s: pci_disable_msix() called without "
871
			       "free_irq() on all MSI-X irqs\n",
L
Linus Torvalds 已提交
872 873 874 875 876 877 878
			       pci_name(dev));
			BUG_ON(warning > 0);
		}
	}
}

/**
879
 * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
L
Linus Torvalds 已提交
880 881
 * @dev: pointer to the pci_dev data structure of MSI(X) device function
 *
882
 * Being called during hotplug remove, from which the device function
883
 * is hot-removed. All previous assigned MSI/MSI-X irqs, if
L
Linus Torvalds 已提交
884 885 886 887 888
 * allocated for this device function, are reclaimed to unused state,
 * which may be used later on.
 **/
void msi_remove_pci_irq_vectors(struct pci_dev* dev)
{
889
	int pos, temp;
L
Linus Torvalds 已提交
890 891 892 893 894 895
	unsigned long flags;

	if (!pci_msi_enable || !dev)
 		return;

	temp = dev->irq;		/* Save IOAPIC IRQ */
896
	pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
897
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSI)) {
898
		if (irq_has_action(dev->irq)) {
L
Linus Torvalds 已提交
899
			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
900
			       "called without free_irq() on MSI irq %d\n",
L
Linus Torvalds 已提交
901
			       pci_name(dev), dev->irq);
902
			BUG_ON(irq_has_action(dev->irq));
903 904
		} else /* Release MSI irq assigned to this device */
			msi_free_irq(dev, dev->irq);
L
Linus Torvalds 已提交
905 906
		dev->irq = temp;		/* Restore IOAPIC IRQ */
	}
907
	pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
908 909
	if (pos > 0 && !msi_lookup_irq(dev, PCI_CAP_ID_MSIX)) {
		int irq, head, tail = 0, warning = 0;
L
Linus Torvalds 已提交
910 911
		void __iomem *base = NULL;

912
		irq = head = dev->irq;
L
Linus Torvalds 已提交
913 914
		while (head != tail) {
			spin_lock_irqsave(&msi_lock, flags);
915 916
			tail = msi_desc[irq]->link.tail;
			base = msi_desc[irq]->mask_base;
L
Linus Torvalds 已提交
917
			spin_unlock_irqrestore(&msi_lock, flags);
918
			if (irq_has_action(irq))
L
Linus Torvalds 已提交
919
				warning = 1;
920 921 922
			else if (irq != head) /* Release MSI-X irq */
				msi_free_irq(dev, irq);
			irq = tail;
L
Linus Torvalds 已提交
923
		}
924
		msi_free_irq(dev, irq);
L
Linus Torvalds 已提交
925 926 927
		if (warning) {
			iounmap(base);
			printk(KERN_WARNING "PCI: %s: msi_remove_pci_irq_vectors() "
928
			       "called without free_irq() on all MSI-X irqs\n",
L
Linus Torvalds 已提交
929 930 931 932 933 934 935
			       pci_name(dev));
			BUG_ON(warning > 0);
		}
		dev->irq = temp;		/* Restore IOAPIC IRQ */
	}
}

936 937 938 939 940
void pci_no_msi(void)
{
	pci_msi_enable = 0;
}

L
Linus Torvalds 已提交
941 942 943 944
EXPORT_SYMBOL(pci_enable_msi);
EXPORT_SYMBOL(pci_disable_msi);
EXPORT_SYMBOL(pci_enable_msix);
EXPORT_SYMBOL(pci_disable_msix);