pci_sun4v.c 24.0 KB
Newer Older
1 2
/* pci_sun4v.c: SUN4V specific PCI controller support.
 *
3
 * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
4 5 6 7 8 9 10 11
 */

#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
12
#include <linux/percpu.h>
13 14
#include <linux/irq.h>
#include <linux/msi.h>
15
#include <linux/log2.h>
16
#include <linux/of_device.h>
17 18 19 20

#include <asm/iommu.h>
#include <asm/irq.h>
#include <asm/hypervisor.h>
21
#include <asm/prom.h>
22 23 24 25

#include "pci_impl.h"
#include "iommu_common.h"

26 27
#include "pci_sun4v.h"

28 29 30
#define DRIVER_NAME	"pci_sun4v"
#define PFX		DRIVER_NAME ": "

31 32 33
static unsigned long vpci_major = 1;
static unsigned long vpci_minor = 1;

34
#define PGLIST_NENTS	(PAGE_SIZE / sizeof(u64))
35

36
struct iommu_batch {
37
	struct device	*dev;		/* Device mapping is for.	*/
38 39 40 41
	unsigned long	prot;		/* IOMMU page protections	*/
	unsigned long	entry;		/* Index into IOTSB.		*/
	u64		*pglist;	/* List of physical pages	*/
	unsigned long	npages;		/* Number of pages in list.	*/
42 43
};

44
static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
45
static int iommu_batch_initialized;
46 47

/* Interrupts must be disabled.  */
48
static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
49
{
50
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
51

52
	p->dev		= dev;
53 54 55 56 57 58
	p->prot		= prot;
	p->entry	= entry;
	p->npages	= 0;
}

/* Interrupts must be disabled.  */
59
static long iommu_batch_flush(struct iommu_batch *p)
60
{
61
	struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
62
	unsigned long devhandle = pbm->devhandle;
63 64 65 66 67
	unsigned long prot = p->prot;
	unsigned long entry = p->entry;
	u64 *pglist = p->pglist;
	unsigned long npages = p->npages;

68
	while (npages != 0) {
69 70 71 72 73 74
		long num;

		num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
					  npages, prot, __pa(pglist));
		if (unlikely(num < 0)) {
			if (printk_ratelimit())
75
				printk("iommu_batch_flush: IOMMU map of "
76
				       "[%08lx:%08llx:%lx:%lx:%lx] failed with "
77 78 79 80 81 82 83 84 85
				       "status %ld\n",
				       devhandle, HV_PCI_TSBID(0, entry),
				       npages, prot, __pa(pglist), num);
			return -1;
		}

		entry += num;
		npages -= num;
		pglist += num;
86
	}
87 88 89 90 91 92 93

	p->entry = entry;
	p->npages = 0;

	return 0;
}

94 95 96 97 98 99 100 101 102 103 104
static inline void iommu_batch_new_entry(unsigned long entry)
{
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);

	if (p->entry + p->npages == entry)
		return;
	if (p->entry != ~0UL)
		iommu_batch_flush(p);
	p->entry = entry;
}

105
/* Interrupts must be disabled.  */
106
static inline long iommu_batch_add(u64 phys_page)
107
{
108
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
109 110 111 112 113

	BUG_ON(p->npages >= PGLIST_NENTS);

	p->pglist[p->npages++] = phys_page;
	if (p->npages == PGLIST_NENTS)
114
		return iommu_batch_flush(p);
115 116 117 118 119

	return 0;
}

/* Interrupts must be disabled.  */
120
static inline long iommu_batch_end(void)
121
{
122
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
123 124 125

	BUG_ON(p->npages >= PGLIST_NENTS);

126
	return iommu_batch_flush(p);
127
}
128

129 130
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
				   dma_addr_t *dma_addrp, gfp_t gfp)
131
{
132
	unsigned long flags, order, first_page, npages, n;
133 134
	struct iommu *iommu;
	struct page *page;
135 136
	void *ret;
	long entry;
137
	int nid;
138 139 140

	size = IO_PAGE_ALIGN(size);
	order = get_order(size);
141
	if (unlikely(order >= MAX_ORDER))
142 143 144 145
		return NULL;

	npages = size >> IO_PAGE_SHIFT;

146 147 148
	nid = dev->archdata.numa_node;
	page = alloc_pages_node(nid, gfp, order);
	if (unlikely(!page))
149
		return NULL;
150

151
	first_page = (unsigned long) page_address(page);
152 153
	memset((char *)first_page, 0, PAGE_SIZE << order);

154
	iommu = dev->archdata.iommu;
155 156

	spin_lock_irqsave(&iommu->lock, flags);
157
	entry = iommu_range_alloc(dev, iommu, npages, NULL);
158 159
	spin_unlock_irqrestore(&iommu->lock, flags);

160 161
	if (unlikely(entry == DMA_ERROR_CODE))
		goto range_alloc_fail;
162 163 164 165 166 167

	*dma_addrp = (iommu->page_table_map_base +
		      (entry << IO_PAGE_SHIFT));
	ret = (void *) first_page;
	first_page = __pa(first_page);

168
	local_irq_save(flags);
169

170 171 172 173
	iommu_batch_start(dev,
			  (HV_PCI_MAP_ATTR_READ |
			   HV_PCI_MAP_ATTR_WRITE),
			  entry);
174

175
	for (n = 0; n < npages; n++) {
176
		long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
177 178 179
		if (unlikely(err < 0L))
			goto iommu_map_fail;
	}
180

181
	if (unlikely(iommu_batch_end() < 0L))
182
		goto iommu_map_fail;
183

184
	local_irq_restore(flags);
185 186

	return ret;
187 188 189 190

iommu_map_fail:
	/* Interrupts are disabled.  */
	spin_lock(&iommu->lock);
191
	iommu_range_free(iommu, *dma_addrp, npages);
192 193
	spin_unlock_irqrestore(&iommu->lock, flags);

194
range_alloc_fail:
195 196
	free_pages(first_page, order);
	return NULL;
197 198
}

199 200
static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
				 dma_addr_t dvma)
201
{
202
	struct pci_pbm_info *pbm;
203
	struct iommu *iommu;
204 205
	unsigned long flags, order, npages, entry;
	u32 devhandle;
206 207

	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
208 209
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
210
	devhandle = pbm->devhandle;
211 212 213 214
	entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);

	spin_lock_irqsave(&iommu->lock, flags);

215
	iommu_range_free(iommu, dvma, npages);
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230

	do {
		unsigned long num;

		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
					    npages);
		entry += num;
		npages -= num;
	} while (npages != 0);

	spin_unlock_irqrestore(&iommu->lock, flags);

	order = get_order(size);
	if (order < 10)
		free_pages((unsigned long)cpu, order);
231 232
}

233 234
static dma_addr_t dma_4v_map_single(struct device *dev, void *ptr, size_t sz,
				    enum dma_data_direction direction)
235
{
236
	struct iommu *iommu;
237
	unsigned long flags, npages, oaddr;
238
	unsigned long i, base_paddr;
239
	u32 bus_addr, ret;
240 241 242
	unsigned long prot;
	long entry;

243
	iommu = dev->archdata.iommu;
244

245
	if (unlikely(direction == DMA_NONE))
246 247 248 249 250 251 252
		goto bad;

	oaddr = (unsigned long)ptr;
	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;

	spin_lock_irqsave(&iommu->lock, flags);
253
	entry = iommu_range_alloc(dev, iommu, npages, NULL);
254 255
	spin_unlock_irqrestore(&iommu->lock, flags);

256
	if (unlikely(entry == DMA_ERROR_CODE))
257 258 259 260 261 262 263
		goto bad;

	bus_addr = (iommu->page_table_map_base +
		    (entry << IO_PAGE_SHIFT));
	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
	base_paddr = __pa(oaddr & IO_PAGE_MASK);
	prot = HV_PCI_MAP_ATTR_READ;
264
	if (direction != DMA_TO_DEVICE)
265 266
		prot |= HV_PCI_MAP_ATTR_WRITE;

267
	local_irq_save(flags);
268

269
	iommu_batch_start(dev, prot, entry);
270

271
	for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
272
		long err = iommu_batch_add(base_paddr);
273 274 275
		if (unlikely(err < 0L))
			goto iommu_map_fail;
	}
276
	if (unlikely(iommu_batch_end() < 0L))
277
		goto iommu_map_fail;
278

279
	local_irq_restore(flags);
280 281 282 283 284 285

	return ret;

bad:
	if (printk_ratelimit())
		WARN_ON(1);
286
	return DMA_ERROR_CODE;
287 288 289 290

iommu_map_fail:
	/* Interrupts are disabled.  */
	spin_lock(&iommu->lock);
291
	iommu_range_free(iommu, bus_addr, npages);
292 293
	spin_unlock_irqrestore(&iommu->lock, flags);

294
	return DMA_ERROR_CODE;
295 296
}

297 298
static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr,
				size_t sz, enum dma_data_direction direction)
299
{
300
	struct pci_pbm_info *pbm;
301
	struct iommu *iommu;
302
	unsigned long flags, npages;
303
	long entry;
304
	u32 devhandle;
305

306
	if (unlikely(direction == DMA_NONE)) {
307 308 309 310 311
		if (printk_ratelimit())
			WARN_ON(1);
		return;
	}

312 313
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
314
	devhandle = pbm->devhandle;
315 316 317 318 319 320 321

	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;
	bus_addr &= IO_PAGE_MASK;

	spin_lock_irqsave(&iommu->lock, flags);

322
	iommu_range_free(iommu, bus_addr, npages);
323

324
	entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
325 326 327 328 329 330 331 332 333 334 335 336
	do {
		unsigned long num;

		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
					    npages);
		entry += num;
		npages -= num;
	} while (npages != 0);

	spin_unlock_irqrestore(&iommu->lock, flags);
}

337 338
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
			 int nelems, enum dma_data_direction direction)
339
{
340 341 342 343
	struct scatterlist *s, *outs, *segstart;
	unsigned long flags, handle, prot;
	dma_addr_t dma_next = 0, dma_addr;
	unsigned int max_seg_size;
344
	unsigned long seg_boundary_size;
345
	int outcount, incount, i;
346
	struct iommu *iommu;
347
	unsigned long base_shift;
348 349 350
	long err;

	BUG_ON(direction == DMA_NONE);
351

352
	iommu = dev->archdata.iommu;
353 354
	if (nelems == 0 || !iommu)
		return 0;
355
	
356 357 358
	prot = HV_PCI_MAP_ATTR_READ;
	if (direction != DMA_TO_DEVICE)
		prot |= HV_PCI_MAP_ATTR_WRITE;
359

360 361 362 363
	outs = s = segstart = &sglist[0];
	outcount = 1;
	incount = nelems;
	handle = 0;
364

365 366
	/* Init first segment length for backout at failure */
	outs->dma_length = 0;
367

368
	spin_lock_irqsave(&iommu->lock, flags);
369

370
	iommu_batch_start(dev, prot, ~0UL);
371

372
	max_seg_size = dma_get_max_seg_size(dev);
373 374 375
	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
	base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
376
	for_each_sg(sglist, s, nelems, i) {
377
		unsigned long paddr, npages, entry, out_entry = 0, slen;
378

379 380 381 382 383 384 385 386
		slen = s->length;
		/* Sanity check */
		if (slen == 0) {
			dma_next = 0;
			continue;
		}
		/* Allocate iommu entries for that segment */
		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
387
		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
388
		entry = iommu_range_alloc(dev, iommu, npages, &handle);
389

390 391 392 393 394 395 396
		/* Handle failure */
		if (unlikely(entry == DMA_ERROR_CODE)) {
			if (printk_ratelimit())
				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
				       " npages %lx\n", iommu, paddr, npages);
			goto iommu_map_failed;
		}
397

398
		iommu_batch_new_entry(entry);
399

400 401 402 403
		/* Convert entry to a dma_addr_t */
		dma_addr = iommu->page_table_map_base +
			(entry << IO_PAGE_SHIFT);
		dma_addr |= (s->offset & ~IO_PAGE_MASK);
404

405
		/* Insert into HW table */
406
		paddr &= IO_PAGE_MASK;
407
		while (npages--) {
408
			err = iommu_batch_add(paddr);
409
			if (unlikely(err < 0L))
410
				goto iommu_map_failed;
411 412 413 414 415 416 417 418 419
			paddr += IO_PAGE_SIZE;
		}

		/* If we are in an open segment, try merging */
		if (segstart != s) {
			/* We cannot merge if:
			 * - allocated dma_addr isn't contiguous to previous allocation
			 */
			if ((dma_addr != dma_next) ||
420 421 422
			    (outs->dma_length + s->length > max_seg_size) ||
			    (is_span_boundary(out_entry, base_shift,
					      seg_boundary_size, outs, s))) {
423 424 425 426 427 428
				/* Can't merge: create a new segment */
				segstart = s;
				outcount++;
				outs = sg_next(outs);
			} else {
				outs->dma_length += s->length;
429
			}
430
		}
431

432 433 434 435
		if (segstart == s) {
			/* This is a new segment, fill entries */
			outs->dma_address = dma_addr;
			outs->dma_length = slen;
436
			out_entry = entry;
437
		}
438 439 440

		/* Calculate next page pointer for contiguous check */
		dma_next = dma_addr + slen;
441 442 443 444
	}

	err = iommu_batch_end();

445 446
	if (unlikely(err < 0L))
		goto iommu_map_failed;
447

448
	spin_unlock_irqrestore(&iommu->lock, flags);
449

450 451 452 453 454 455 456
	if (outcount < incount) {
		outs = sg_next(outs);
		outs->dma_address = DMA_ERROR_CODE;
		outs->dma_length = 0;
	}

	return outcount;
457 458

iommu_map_failed:
459 460 461 462 463
	for_each_sg(sglist, s, nelems, i) {
		if (s->dma_length != 0) {
			unsigned long vaddr, npages;

			vaddr = s->dma_address & IO_PAGE_MASK;
464 465
			npages = iommu_num_pages(s->dma_address, s->dma_length,
						 IO_PAGE_SIZE);
466 467 468 469 470 471 472 473
			iommu_range_free(iommu, vaddr, npages);
			/* XXX demap? XXX */
			s->dma_address = DMA_ERROR_CODE;
			s->dma_length = 0;
		}
		if (s == outs)
			break;
	}
474 475 476
	spin_unlock_irqrestore(&iommu->lock, flags);

	return 0;
477 478
}

479 480
static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
			    int nelems, enum dma_data_direction direction)
481
{
482
	struct pci_pbm_info *pbm;
483
	struct scatterlist *sg;
484
	struct iommu *iommu;
485 486
	unsigned long flags;
	u32 devhandle;
487

488
	BUG_ON(direction == DMA_NONE);
489

490 491
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
492
	devhandle = pbm->devhandle;
493 494 495
	
	spin_lock_irqsave(&iommu->lock, flags);

496 497 498 499 500 501 502 503
	sg = sglist;
	while (nelems--) {
		dma_addr_t dma_handle = sg->dma_address;
		unsigned int len = sg->dma_length;
		unsigned long npages, entry;

		if (!len)
			break;
504
		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
505 506 507 508 509 510 511 512 513 514 515
		iommu_range_free(iommu, dma_handle, npages);

		entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
		while (npages) {
			unsigned long num;

			num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
						    npages);
			entry += num;
			npages -= num;
		}
516

517 518
		sg = sg_next(sg);
	}
519 520

	spin_unlock_irqrestore(&iommu->lock, flags);
521 522
}

523 524 525
static void dma_4v_sync_single_for_cpu(struct device *dev,
				       dma_addr_t bus_addr, size_t sz,
				       enum dma_data_direction direction)
526
{
527
	/* Nothing to do... */
528 529
}

530 531 532
static void dma_4v_sync_sg_for_cpu(struct device *dev,
				   struct scatterlist *sglist, int nelems,
				   enum dma_data_direction direction)
533
{
534
	/* Nothing to do... */
535 536
}

A
Adrian Bunk 已提交
537
static const struct dma_ops sun4v_dma_ops = {
538 539 540 541 542 543 544 545
	.alloc_coherent			= dma_4v_alloc_coherent,
	.free_coherent			= dma_4v_free_coherent,
	.map_single			= dma_4v_map_single,
	.unmap_single			= dma_4v_unmap_single,
	.map_sg				= dma_4v_map_sg,
	.unmap_sg			= dma_4v_unmap_sg,
	.sync_single_for_cpu		= dma_4v_sync_single_for_cpu,
	.sync_sg_for_cpu		= dma_4v_sync_sg_for_cpu,
546 547
};

548 549
static void __devinit pci_sun4v_scan_bus(struct pci_pbm_info *pbm,
					 struct device *parent)
550
{
551 552 553
	struct property *prop;
	struct device_node *dp;

554
	dp = pbm->op->node;
555 556
	prop = of_find_property(dp, "66mhz-capable", NULL);
	pbm->is_66mhz_capable = (prop != NULL);
557
	pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
558 559

	/* XXX register error interrupt handlers XXX */
560 561
}

562 563
static unsigned long __devinit probe_existing_entries(struct pci_pbm_info *pbm,
						      struct iommu *iommu)
564
{
565
	struct iommu_arena *arena = &iommu->arena;
566
	unsigned long i, cnt = 0;
567
	u32 devhandle;
568 569 570 571 572 573 574 575

	devhandle = pbm->devhandle;
	for (i = 0; i < arena->limit; i++) {
		unsigned long ret, io_attrs, ra;

		ret = pci_sun4v_iommu_getmap(devhandle,
					     HV_PCI_TSBID(0, i),
					     &io_attrs, &ra);
576
		if (ret == HV_EOK) {
577 578 579 580 581 582 583
			if (page_in_phys_avail(ra)) {
				pci_sun4v_iommu_demap(devhandle,
						      HV_PCI_TSBID(0, i), 1);
			} else {
				cnt++;
				__set_bit(i, arena->map);
			}
584
		}
585
	}
586 587

	return cnt;
588 589
}

590
static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
591
{
592
	static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
593
	struct iommu *iommu = pbm->iommu;
594
	unsigned long num_tsb_entries, sz, tsbsize;
595 596 597
	u32 dma_mask, dma_offset;
	const u32 *vdma;

598
	vdma = of_get_property(pbm->op->node, "virtual-dma", NULL);
599 600
	if (!vdma)
		vdma = vdma_default;
601

602
	if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
603 604 605
		printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
		       vdma[0], vdma[1]);
		return -EINVAL;
606 607
	};

608 609 610
	dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
	num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
	tsbsize = num_tsb_entries * sizeof(iopte_t);
611 612 613 614 615 616 617 618 619 620

	dma_offset = vdma[0];

	/* Setup initial software IOMMU state. */
	spin_lock_init(&iommu->lock);
	iommu->ctx_lowest_free = 1;
	iommu->page_table_map_base = dma_offset;
	iommu->dma_addr_mask = dma_mask;

	/* Allocate and initialize the free area map.  */
621
	sz = (num_tsb_entries + 7) / 8;
622
	sz = (sz + 7UL) & ~7UL;
623
	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
624
	if (!iommu->arena.map) {
625 626
		printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
		return -ENOMEM;
627 628 629
	}
	iommu->arena.limit = num_tsb_entries;

630
	sz = probe_existing_entries(pbm, iommu);
631 632 633
	if (sz)
		printk("%s: Imported %lu TSB entries from OBP\n",
		       pbm->name, sz);
634 635

	return 0;
636 637
}

638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
#ifdef CONFIG_PCI_MSI
struct pci_sun4v_msiq_entry {
	u64		version_type;
#define MSIQ_VERSION_MASK		0xffffffff00000000UL
#define MSIQ_VERSION_SHIFT		32
#define MSIQ_TYPE_MASK			0x00000000000000ffUL
#define MSIQ_TYPE_SHIFT			0
#define MSIQ_TYPE_NONE			0x00
#define MSIQ_TYPE_MSG			0x01
#define MSIQ_TYPE_MSI32			0x02
#define MSIQ_TYPE_MSI64			0x03
#define MSIQ_TYPE_INTX			0x08
#define MSIQ_TYPE_NONE2			0xff

	u64		intx_sysino;
	u64		reserved1;
	u64		stick;
	u64		req_id;  /* bus/device/func */
#define MSIQ_REQID_BUS_MASK		0xff00UL
#define MSIQ_REQID_BUS_SHIFT		8
#define MSIQ_REQID_DEVICE_MASK		0x00f8UL
#define MSIQ_REQID_DEVICE_SHIFT		3
#define MSIQ_REQID_FUNC_MASK		0x0007UL
#define MSIQ_REQID_FUNC_SHIFT		0

	u64		msi_address;

S
Simon Arlott 已提交
665
	/* The format of this value is message type dependent.
666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
	 * For MSI bits 15:0 are the data from the MSI packet.
	 * For MSI-X bits 31:0 are the data from the MSI packet.
	 * For MSG, the message code and message routing code where:
	 * 	bits 39:32 is the bus/device/fn of the msg target-id
	 *	bits 18:16 is the message routing code
	 *	bits 7:0 is the message code
	 * For INTx the low order 2-bits are:
	 *	00 - INTA
	 *	01 - INTB
	 *	10 - INTC
	 *	11 - INTD
	 */
	u64		msi_data;

	u64		reserved2;
};

683 684
static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
			      unsigned long *head)
685
{
686
	unsigned long err, limit;
687

688
	err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
689
	if (unlikely(err))
690
		return -ENXIO;
691

692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
	limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	if (unlikely(*head >= limit))
		return -EFBIG;

	return 0;
}

static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
				 unsigned long msiqid, unsigned long *head,
				 unsigned long *msi)
{
	struct pci_sun4v_msiq_entry *ep;
	unsigned long err, type;

	/* Note: void pointer arithmetic, 'head' is a byte offset  */
	ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
				 (pbm->msiq_ent_count *
				  sizeof(struct pci_sun4v_msiq_entry))) +
	      *head);

	if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
		return 0;
714

715 716 717 718
	type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
	if (unlikely(type != MSIQ_TYPE_MSI32 &&
		     type != MSIQ_TYPE_MSI64))
		return -EINVAL;
719

720 721 722 723 724 725 726
	*msi = ep->msi_data;

	err = pci_sun4v_msi_setstate(pbm->devhandle,
				     ep->msi_data /* msi_num */,
				     HV_MSISTATE_IDLE);
	if (unlikely(err))
		return -ENXIO;
727

728 729
	/* Clear the entry.  */
	ep->version_type &= ~MSIQ_TYPE_MASK;
730

731 732 733 734
	(*head) += sizeof(struct pci_sun4v_msiq_entry);
	if (*head >=
	    (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
		*head = 0;
735

736
	return 1;
737 738
}

739 740
static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
			      unsigned long head)
741
{
742
	unsigned long err;
743

744 745 746
	err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
	if (unlikely(err))
		return -EINVAL;
747

748 749
	return 0;
}
750

751 752 753 754 755 756 757 758 759 760 761
static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
			       unsigned long msi, int is_msi64)
{
	if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
				  (is_msi64 ?
				   HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
		return -ENXIO;
	if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
		return -ENXIO;
	if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
		return -ENXIO;
762 763 764
	return 0;
}

765
static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
766
{
767 768 769 770 771 772 773 774 775
	unsigned long err, msiqid;

	err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
	if (err)
		return -ENXIO;

	pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);

	return 0;
776 777
}

778
static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
{
	unsigned long q_size, alloc_size, pages, order;
	int i;

	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	alloc_size = (pbm->msiq_num * q_size);
	order = get_order(alloc_size);
	pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
	if (pages == 0UL) {
		printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
		       order);
		return -ENOMEM;
	}
	memset((char *)pages, 0, PAGE_SIZE << order);
	pbm->msi_queues = (void *) pages;

	for (i = 0; i < pbm->msiq_num; i++) {
		unsigned long err, base = __pa(pages + (i * q_size));
		unsigned long ret1, ret2;

		err = pci_sun4v_msiq_conf(pbm->devhandle,
					  pbm->msiq_first + i,
					  base, pbm->msiq_ent_count);
		if (err) {
			printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
			       err);
			goto h_error;
		}

		err = pci_sun4v_msiq_info(pbm->devhandle,
					  pbm->msiq_first + i,
					  &ret1, &ret2);
		if (err) {
			printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
			       err);
			goto h_error;
		}
		if (ret1 != base || ret2 != pbm->msiq_ent_count) {
			printk(KERN_ERR "MSI: Bogus qconf "
			       "expected[%lx:%x] got[%lx:%lx]\n",
			       base, pbm->msiq_ent_count,
			       ret1, ret2);
			goto h_error;
		}
	}

	return 0;

h_error:
	free_pages(pages, order);
	return -EINVAL;
}

832
static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
833
{
834
	unsigned long q_size, alloc_size, pages, order;
835 836
	int i;

837 838
	for (i = 0; i < pbm->msiq_num; i++) {
		unsigned long msiqid = pbm->msiq_first + i;
839

840
		(void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
841
	}
842

843 844 845
	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	alloc_size = (pbm->msiq_num * q_size);
	order = get_order(alloc_size);
846

847
	pages = (unsigned long) pbm->msi_queues;
848

849
	free_pages(pages, order);
850

851
	pbm->msi_queues = NULL;
852 853
}

854 855 856
static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
				    unsigned long msiqid,
				    unsigned long devino)
857
{
858
	unsigned int virt_irq = sun4v_build_irq(pbm->devhandle, devino);
859

860 861
	if (!virt_irq)
		return -ENOMEM;
862

863 864 865 866
	if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
		return -EINVAL;
	if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
		return -EINVAL;
867

868
	return virt_irq;
869
}
870

871 872 873 874 875 876 877 878 879 880 881
static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
	.get_head	=	pci_sun4v_get_head,
	.dequeue_msi	=	pci_sun4v_dequeue_msi,
	.set_head	=	pci_sun4v_set_head,
	.msi_setup	=	pci_sun4v_msi_setup,
	.msi_teardown	=	pci_sun4v_msi_teardown,
	.msiq_alloc	=	pci_sun4v_msiq_alloc,
	.msiq_free	=	pci_sun4v_msiq_free,
	.msiq_build_irq	=	pci_sun4v_msiq_build_irq,
};

882 883
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
884
	sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
885
}
886 887 888 889 890 891
#else /* CONFIG_PCI_MSI */
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
}
#endif /* !(CONFIG_PCI_MSI) */

892 893
static int __devinit pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
					struct of_device *op, u32 devhandle)
894
{
895
	struct device_node *dp = op->node;
896
	int err;
897

898 899
	pbm->numa_node = of_node_to_nid(dp);

900 901
	pbm->pci_ops = &sun4v_pci_ops;
	pbm->config_space_reg_bits = 12;
902

903 904
	pbm->index = pci_num_pbms++;

905
	pbm->op = op;
906

D
David S. Miller 已提交
907
	pbm->devhandle = devhandle;
908

909
	pbm->name = dp->full_name;
910

911
	printk("%s: SUN4V PCI Bus Module\n", pbm->name);
912
	printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
913

914
	pci_determine_mem_io_space(pbm);
915

916
	pci_get_pbm_props(pbm);
917 918 919 920 921

	err = pci_sun4v_iommu_init(pbm);
	if (err)
		return err;

922
	pci_sun4v_msi_init(pbm);
923

924
	pci_sun4v_scan_bus(pbm, &op->dev);
925

926 927 928
	pbm->next = pci_pbm_root;
	pci_pbm_root = pbm;

929
	return 0;
930 931
}

932
static int __devinit pci_sun4v_probe(struct of_device *op,
933
				     const struct of_device_id *match)
934
{
935
	const struct linux_prom64_registers *regs;
936
	static int hvapi_negotiated = 0;
937
	struct pci_pbm_info *pbm;
938
	struct device_node *dp;
939
	struct iommu *iommu;
940
	u32 devhandle;
941
	int i, err;
D
David S. Miller 已提交
942

943 944
	dp = op->node;

945
	if (!hvapi_negotiated++) {
946 947 948
		err = sun4v_hvapi_register(HV_GRP_PCI,
					   vpci_major,
					   &vpci_minor);
949 950

		if (err) {
951 952 953
			printk(KERN_ERR PFX "Could not register hvapi, "
			       "err=%d\n", err);
			return err;
954
		}
955
		printk(KERN_INFO PFX "Registered hvapi major[%lu] minor[%lu]\n",
956
		       vpci_major, vpci_minor);
957 958

		dma_ops = &sun4v_dma_ops;
959 960
	}

961
	regs = of_get_property(dp, "reg", NULL);
962
	err = -ENODEV;
963 964
	if (!regs) {
		printk(KERN_ERR PFX "Could not find config registers\n");
965
		goto out_err;
966
	}
967
	devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
D
David S. Miller 已提交
968

969
	err = -ENOMEM;
970 971 972
	if (!iommu_batch_initialized) {
		for_each_possible_cpu(i) {
			unsigned long page = get_zeroed_page(GFP_KERNEL);
973

974 975
			if (!page)
				goto out_err;
976

977 978 979
			per_cpu(iommu_batch, i).pglist = (u64 *) page;
		}
		iommu_batch_initialized = 1;
980
	}
981

982 983 984
	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
	if (!pbm) {
		printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n");
985
		goto out_err;
986
	}
987

988
	iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
989
	if (!iommu) {
990
		printk(KERN_ERR PFX "Could not allocate pbm iommu\n");
991
		goto out_free_controller;
992
	}
993

994
	pbm->iommu = iommu;
995

996 997 998
	err = pci_sun4v_pbm_init(pbm, op, devhandle);
	if (err)
		goto out_free_iommu;
999

1000
	dev_set_drvdata(&op->dev, pbm);
1001

1002
	return 0;
1003

1004 1005
out_free_iommu:
	kfree(pbm->iommu);
1006 1007

out_free_controller:
1008
	kfree(pbm);
1009 1010 1011

out_err:
	return err;
1012
}
1013

1014
static struct of_device_id __initdata pci_sun4v_match[] = {
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
	{
		.name = "pci",
		.compatible = "SUNW,sun4v-pci",
	},
	{},
};

static struct of_platform_driver pci_sun4v_driver = {
	.name		= DRIVER_NAME,
	.match_table	= pci_sun4v_match,
	.probe		= pci_sun4v_probe,
};

static int __init pci_sun4v_init(void)
{
	return of_register_driver(&pci_sun4v_driver, &of_bus_type);
}

subsys_initcall(pci_sun4v_init);