pci_sun4v.c 23.7 KB
Newer Older
1 2
/* pci_sun4v.c: SUN4V specific PCI controller support.
 *
3
 * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
4 5 6 7 8 9 10 11
 */

#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
12
#include <linux/percpu.h>
13 14
#include <linux/irq.h>
#include <linux/msi.h>
15
#include <linux/export.h>
16
#include <linux/log2.h>
17
#include <linux/of_device.h>
18 19 20 21

#include <asm/iommu.h>
#include <asm/irq.h>
#include <asm/hypervisor.h>
22
#include <asm/prom.h>
23 24 25 26

#include "pci_impl.h"
#include "iommu_common.h"

27 28
#include "pci_sun4v.h"

29 30 31
#define DRIVER_NAME	"pci_sun4v"
#define PFX		DRIVER_NAME ": "

32 33 34
static unsigned long vpci_major = 1;
static unsigned long vpci_minor = 1;

35
#define PGLIST_NENTS	(PAGE_SIZE / sizeof(u64))
36

37
struct iommu_batch {
38
	struct device	*dev;		/* Device mapping is for.	*/
39 40 41 42
	unsigned long	prot;		/* IOMMU page protections	*/
	unsigned long	entry;		/* Index into IOTSB.		*/
	u64		*pglist;	/* List of physical pages	*/
	unsigned long	npages;		/* Number of pages in list.	*/
43 44
};

45
static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
46
static int iommu_batch_initialized;
47 48

/* Interrupts must be disabled.  */
49
static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
50
{
51
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
52

53
	p->dev		= dev;
54 55 56 57 58 59
	p->prot		= prot;
	p->entry	= entry;
	p->npages	= 0;
}

/* Interrupts must be disabled.  */
60
static long iommu_batch_flush(struct iommu_batch *p)
61
{
62
	struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
63
	unsigned long devhandle = pbm->devhandle;
64 65 66 67 68
	unsigned long prot = p->prot;
	unsigned long entry = p->entry;
	u64 *pglist = p->pglist;
	unsigned long npages = p->npages;

69
	while (npages != 0) {
70 71 72 73 74 75
		long num;

		num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
					  npages, prot, __pa(pglist));
		if (unlikely(num < 0)) {
			if (printk_ratelimit())
76
				printk("iommu_batch_flush: IOMMU map of "
77
				       "[%08lx:%08llx:%lx:%lx:%lx] failed with "
78 79 80 81 82 83 84 85 86
				       "status %ld\n",
				       devhandle, HV_PCI_TSBID(0, entry),
				       npages, prot, __pa(pglist), num);
			return -1;
		}

		entry += num;
		npages -= num;
		pglist += num;
87
	}
88 89 90 91 92 93 94

	p->entry = entry;
	p->npages = 0;

	return 0;
}

95 96 97 98 99 100 101 102 103 104 105
static inline void iommu_batch_new_entry(unsigned long entry)
{
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);

	if (p->entry + p->npages == entry)
		return;
	if (p->entry != ~0UL)
		iommu_batch_flush(p);
	p->entry = entry;
}

106
/* Interrupts must be disabled.  */
107
static inline long iommu_batch_add(u64 phys_page)
108
{
109
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
110 111 112 113 114

	BUG_ON(p->npages >= PGLIST_NENTS);

	p->pglist[p->npages++] = phys_page;
	if (p->npages == PGLIST_NENTS)
115
		return iommu_batch_flush(p);
116 117 118 119 120

	return 0;
}

/* Interrupts must be disabled.  */
121
static inline long iommu_batch_end(void)
122
{
123
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
124 125 126

	BUG_ON(p->npages >= PGLIST_NENTS);

127
	return iommu_batch_flush(p);
128
}
129

130 131
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
				   dma_addr_t *dma_addrp, gfp_t gfp)
132
{
133
	unsigned long flags, order, first_page, npages, n;
134 135
	struct iommu *iommu;
	struct page *page;
136 137
	void *ret;
	long entry;
138
	int nid;
139 140 141

	size = IO_PAGE_ALIGN(size);
	order = get_order(size);
142
	if (unlikely(order >= MAX_ORDER))
143 144 145 146
		return NULL;

	npages = size >> IO_PAGE_SHIFT;

147 148 149
	nid = dev->archdata.numa_node;
	page = alloc_pages_node(nid, gfp, order);
	if (unlikely(!page))
150
		return NULL;
151

152
	first_page = (unsigned long) page_address(page);
153 154
	memset((char *)first_page, 0, PAGE_SIZE << order);

155
	iommu = dev->archdata.iommu;
156 157

	spin_lock_irqsave(&iommu->lock, flags);
158
	entry = iommu_range_alloc(dev, iommu, npages, NULL);
159 160
	spin_unlock_irqrestore(&iommu->lock, flags);

161 162
	if (unlikely(entry == DMA_ERROR_CODE))
		goto range_alloc_fail;
163 164 165 166 167 168

	*dma_addrp = (iommu->page_table_map_base +
		      (entry << IO_PAGE_SHIFT));
	ret = (void *) first_page;
	first_page = __pa(first_page);

169
	local_irq_save(flags);
170

171 172 173 174
	iommu_batch_start(dev,
			  (HV_PCI_MAP_ATTR_READ |
			   HV_PCI_MAP_ATTR_WRITE),
			  entry);
175

176
	for (n = 0; n < npages; n++) {
177
		long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
178 179 180
		if (unlikely(err < 0L))
			goto iommu_map_fail;
	}
181

182
	if (unlikely(iommu_batch_end() < 0L))
183
		goto iommu_map_fail;
184

185
	local_irq_restore(flags);
186 187

	return ret;
188 189 190 191

iommu_map_fail:
	/* Interrupts are disabled.  */
	spin_lock(&iommu->lock);
192
	iommu_range_free(iommu, *dma_addrp, npages);
193 194
	spin_unlock_irqrestore(&iommu->lock, flags);

195
range_alloc_fail:
196 197
	free_pages(first_page, order);
	return NULL;
198 199
}

200 201
static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
				 dma_addr_t dvma)
202
{
203
	struct pci_pbm_info *pbm;
204
	struct iommu *iommu;
205 206
	unsigned long flags, order, npages, entry;
	u32 devhandle;
207 208

	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
209 210
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
211
	devhandle = pbm->devhandle;
212 213 214 215
	entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);

	spin_lock_irqsave(&iommu->lock, flags);

216
	iommu_range_free(iommu, dvma, npages);
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231

	do {
		unsigned long num;

		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
					    npages);
		entry += num;
		npages -= num;
	} while (npages != 0);

	spin_unlock_irqrestore(&iommu->lock, flags);

	order = get_order(size);
	if (order < 10)
		free_pages((unsigned long)cpu, order);
232 233
}

234 235
static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
				  unsigned long offset, size_t sz,
F
FUJITA Tomonori 已提交
236 237
				  enum dma_data_direction direction,
				  struct dma_attrs *attrs)
238
{
239
	struct iommu *iommu;
240
	unsigned long flags, npages, oaddr;
241
	unsigned long i, base_paddr;
242
	u32 bus_addr, ret;
243 244 245
	unsigned long prot;
	long entry;

246
	iommu = dev->archdata.iommu;
247

248
	if (unlikely(direction == DMA_NONE))
249 250
		goto bad;

251
	oaddr = (unsigned long)(page_address(page) + offset);
252 253 254 255
	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;

	spin_lock_irqsave(&iommu->lock, flags);
256
	entry = iommu_range_alloc(dev, iommu, npages, NULL);
257 258
	spin_unlock_irqrestore(&iommu->lock, flags);

259
	if (unlikely(entry == DMA_ERROR_CODE))
260 261 262 263 264 265 266
		goto bad;

	bus_addr = (iommu->page_table_map_base +
		    (entry << IO_PAGE_SHIFT));
	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
	base_paddr = __pa(oaddr & IO_PAGE_MASK);
	prot = HV_PCI_MAP_ATTR_READ;
267
	if (direction != DMA_TO_DEVICE)
268 269
		prot |= HV_PCI_MAP_ATTR_WRITE;

270
	local_irq_save(flags);
271

272
	iommu_batch_start(dev, prot, entry);
273

274
	for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
275
		long err = iommu_batch_add(base_paddr);
276 277 278
		if (unlikely(err < 0L))
			goto iommu_map_fail;
	}
279
	if (unlikely(iommu_batch_end() < 0L))
280
		goto iommu_map_fail;
281

282
	local_irq_restore(flags);
283 284 285 286 287 288

	return ret;

bad:
	if (printk_ratelimit())
		WARN_ON(1);
289
	return DMA_ERROR_CODE;
290 291 292 293

iommu_map_fail:
	/* Interrupts are disabled.  */
	spin_lock(&iommu->lock);
294
	iommu_range_free(iommu, bus_addr, npages);
295 296
	spin_unlock_irqrestore(&iommu->lock, flags);

297
	return DMA_ERROR_CODE;
298 299
}

300
static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
F
FUJITA Tomonori 已提交
301 302
			      size_t sz, enum dma_data_direction direction,
			      struct dma_attrs *attrs)
303
{
304
	struct pci_pbm_info *pbm;
305
	struct iommu *iommu;
306
	unsigned long flags, npages;
307
	long entry;
308
	u32 devhandle;
309

310
	if (unlikely(direction == DMA_NONE)) {
311 312 313 314 315
		if (printk_ratelimit())
			WARN_ON(1);
		return;
	}

316 317
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
318
	devhandle = pbm->devhandle;
319 320 321 322 323 324 325

	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;
	bus_addr &= IO_PAGE_MASK;

	spin_lock_irqsave(&iommu->lock, flags);

326
	iommu_range_free(iommu, bus_addr, npages);
327

328
	entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
329 330 331 332 333 334 335 336 337 338 339 340
	do {
		unsigned long num;

		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
					    npages);
		entry += num;
		npages -= num;
	} while (npages != 0);

	spin_unlock_irqrestore(&iommu->lock, flags);
}

341
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
F
FUJITA Tomonori 已提交
342 343
			 int nelems, enum dma_data_direction direction,
			 struct dma_attrs *attrs)
344
{
345 346 347 348
	struct scatterlist *s, *outs, *segstart;
	unsigned long flags, handle, prot;
	dma_addr_t dma_next = 0, dma_addr;
	unsigned int max_seg_size;
349
	unsigned long seg_boundary_size;
350
	int outcount, incount, i;
351
	struct iommu *iommu;
352
	unsigned long base_shift;
353 354 355
	long err;

	BUG_ON(direction == DMA_NONE);
356

357
	iommu = dev->archdata.iommu;
358 359
	if (nelems == 0 || !iommu)
		return 0;
360
	
361 362 363
	prot = HV_PCI_MAP_ATTR_READ;
	if (direction != DMA_TO_DEVICE)
		prot |= HV_PCI_MAP_ATTR_WRITE;
364

365 366 367 368
	outs = s = segstart = &sglist[0];
	outcount = 1;
	incount = nelems;
	handle = 0;
369

370 371
	/* Init first segment length for backout at failure */
	outs->dma_length = 0;
372

373
	spin_lock_irqsave(&iommu->lock, flags);
374

375
	iommu_batch_start(dev, prot, ~0UL);
376

377
	max_seg_size = dma_get_max_seg_size(dev);
378 379 380
	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
	base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
381
	for_each_sg(sglist, s, nelems, i) {
382
		unsigned long paddr, npages, entry, out_entry = 0, slen;
383

384 385 386 387 388 389 390 391
		slen = s->length;
		/* Sanity check */
		if (slen == 0) {
			dma_next = 0;
			continue;
		}
		/* Allocate iommu entries for that segment */
		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
392
		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
393
		entry = iommu_range_alloc(dev, iommu, npages, &handle);
394

395 396 397 398 399 400 401
		/* Handle failure */
		if (unlikely(entry == DMA_ERROR_CODE)) {
			if (printk_ratelimit())
				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
				       " npages %lx\n", iommu, paddr, npages);
			goto iommu_map_failed;
		}
402

403
		iommu_batch_new_entry(entry);
404

405 406 407 408
		/* Convert entry to a dma_addr_t */
		dma_addr = iommu->page_table_map_base +
			(entry << IO_PAGE_SHIFT);
		dma_addr |= (s->offset & ~IO_PAGE_MASK);
409

410
		/* Insert into HW table */
411
		paddr &= IO_PAGE_MASK;
412
		while (npages--) {
413
			err = iommu_batch_add(paddr);
414
			if (unlikely(err < 0L))
415
				goto iommu_map_failed;
416 417 418 419 420 421 422 423 424
			paddr += IO_PAGE_SIZE;
		}

		/* If we are in an open segment, try merging */
		if (segstart != s) {
			/* We cannot merge if:
			 * - allocated dma_addr isn't contiguous to previous allocation
			 */
			if ((dma_addr != dma_next) ||
425 426 427
			    (outs->dma_length + s->length > max_seg_size) ||
			    (is_span_boundary(out_entry, base_shift,
					      seg_boundary_size, outs, s))) {
428 429 430 431 432 433
				/* Can't merge: create a new segment */
				segstart = s;
				outcount++;
				outs = sg_next(outs);
			} else {
				outs->dma_length += s->length;
434
			}
435
		}
436

437 438 439 440
		if (segstart == s) {
			/* This is a new segment, fill entries */
			outs->dma_address = dma_addr;
			outs->dma_length = slen;
441
			out_entry = entry;
442
		}
443 444 445

		/* Calculate next page pointer for contiguous check */
		dma_next = dma_addr + slen;
446 447 448 449
	}

	err = iommu_batch_end();

450 451
	if (unlikely(err < 0L))
		goto iommu_map_failed;
452

453
	spin_unlock_irqrestore(&iommu->lock, flags);
454

455 456 457 458 459 460 461
	if (outcount < incount) {
		outs = sg_next(outs);
		outs->dma_address = DMA_ERROR_CODE;
		outs->dma_length = 0;
	}

	return outcount;
462 463

iommu_map_failed:
464 465 466 467 468
	for_each_sg(sglist, s, nelems, i) {
		if (s->dma_length != 0) {
			unsigned long vaddr, npages;

			vaddr = s->dma_address & IO_PAGE_MASK;
469 470
			npages = iommu_num_pages(s->dma_address, s->dma_length,
						 IO_PAGE_SIZE);
471 472 473 474 475 476 477 478
			iommu_range_free(iommu, vaddr, npages);
			/* XXX demap? XXX */
			s->dma_address = DMA_ERROR_CODE;
			s->dma_length = 0;
		}
		if (s == outs)
			break;
	}
479 480 481
	spin_unlock_irqrestore(&iommu->lock, flags);

	return 0;
482 483
}

484
static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
F
FUJITA Tomonori 已提交
485 486
			    int nelems, enum dma_data_direction direction,
			    struct dma_attrs *attrs)
487
{
488
	struct pci_pbm_info *pbm;
489
	struct scatterlist *sg;
490
	struct iommu *iommu;
491 492
	unsigned long flags;
	u32 devhandle;
493

494
	BUG_ON(direction == DMA_NONE);
495

496 497
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
498
	devhandle = pbm->devhandle;
499 500 501
	
	spin_lock_irqsave(&iommu->lock, flags);

502 503 504 505 506 507 508 509
	sg = sglist;
	while (nelems--) {
		dma_addr_t dma_handle = sg->dma_address;
		unsigned int len = sg->dma_length;
		unsigned long npages, entry;

		if (!len)
			break;
510
		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
511 512 513 514 515 516 517 518 519 520 521
		iommu_range_free(iommu, dma_handle, npages);

		entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
		while (npages) {
			unsigned long num;

			num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
						    npages);
			entry += num;
			npages -= num;
		}
522

523 524
		sg = sg_next(sg);
	}
525 526

	spin_unlock_irqrestore(&iommu->lock, flags);
527 528
}

529
static struct dma_map_ops sun4v_dma_ops = {
530 531
	.alloc_coherent			= dma_4v_alloc_coherent,
	.free_coherent			= dma_4v_free_coherent,
532 533
	.map_page			= dma_4v_map_page,
	.unmap_page			= dma_4v_unmap_page,
534 535
	.map_sg				= dma_4v_map_sg,
	.unmap_sg			= dma_4v_unmap_sg,
536 537
};

538 539
static void __devinit pci_sun4v_scan_bus(struct pci_pbm_info *pbm,
					 struct device *parent)
540
{
541 542 543
	struct property *prop;
	struct device_node *dp;

544
	dp = pbm->op->dev.of_node;
545 546
	prop = of_find_property(dp, "66mhz-capable", NULL);
	pbm->is_66mhz_capable = (prop != NULL);
547
	pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
548 549

	/* XXX register error interrupt handlers XXX */
550 551
}

552 553
static unsigned long __devinit probe_existing_entries(struct pci_pbm_info *pbm,
						      struct iommu *iommu)
554
{
555
	struct iommu_arena *arena = &iommu->arena;
556
	unsigned long i, cnt = 0;
557
	u32 devhandle;
558 559 560 561 562 563 564 565

	devhandle = pbm->devhandle;
	for (i = 0; i < arena->limit; i++) {
		unsigned long ret, io_attrs, ra;

		ret = pci_sun4v_iommu_getmap(devhandle,
					     HV_PCI_TSBID(0, i),
					     &io_attrs, &ra);
566
		if (ret == HV_EOK) {
567 568 569 570 571 572 573
			if (page_in_phys_avail(ra)) {
				pci_sun4v_iommu_demap(devhandle,
						      HV_PCI_TSBID(0, i), 1);
			} else {
				cnt++;
				__set_bit(i, arena->map);
			}
574
		}
575
	}
576 577

	return cnt;
578 579
}

580
static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
581
{
582
	static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
583
	struct iommu *iommu = pbm->iommu;
584
	unsigned long num_tsb_entries, sz;
585 586 587
	u32 dma_mask, dma_offset;
	const u32 *vdma;

588
	vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", NULL);
589 590
	if (!vdma)
		vdma = vdma_default;
591

592
	if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
593 594 595
		printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
		       vdma[0], vdma[1]);
		return -EINVAL;
596 597
	};

598 599
	dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
	num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
600 601 602 603 604 605 606 607 608 609

	dma_offset = vdma[0];

	/* Setup initial software IOMMU state. */
	spin_lock_init(&iommu->lock);
	iommu->ctx_lowest_free = 1;
	iommu->page_table_map_base = dma_offset;
	iommu->dma_addr_mask = dma_mask;

	/* Allocate and initialize the free area map.  */
610
	sz = (num_tsb_entries + 7) / 8;
611
	sz = (sz + 7UL) & ~7UL;
612
	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
613
	if (!iommu->arena.map) {
614 615
		printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
		return -ENOMEM;
616 617 618
	}
	iommu->arena.limit = num_tsb_entries;

619
	sz = probe_existing_entries(pbm, iommu);
620 621 622
	if (sz)
		printk("%s: Imported %lu TSB entries from OBP\n",
		       pbm->name, sz);
623 624

	return 0;
625 626
}

627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
#ifdef CONFIG_PCI_MSI
struct pci_sun4v_msiq_entry {
	u64		version_type;
#define MSIQ_VERSION_MASK		0xffffffff00000000UL
#define MSIQ_VERSION_SHIFT		32
#define MSIQ_TYPE_MASK			0x00000000000000ffUL
#define MSIQ_TYPE_SHIFT			0
#define MSIQ_TYPE_NONE			0x00
#define MSIQ_TYPE_MSG			0x01
#define MSIQ_TYPE_MSI32			0x02
#define MSIQ_TYPE_MSI64			0x03
#define MSIQ_TYPE_INTX			0x08
#define MSIQ_TYPE_NONE2			0xff

	u64		intx_sysino;
	u64		reserved1;
	u64		stick;
	u64		req_id;  /* bus/device/func */
#define MSIQ_REQID_BUS_MASK		0xff00UL
#define MSIQ_REQID_BUS_SHIFT		8
#define MSIQ_REQID_DEVICE_MASK		0x00f8UL
#define MSIQ_REQID_DEVICE_SHIFT		3
#define MSIQ_REQID_FUNC_MASK		0x0007UL
#define MSIQ_REQID_FUNC_SHIFT		0

	u64		msi_address;

S
Simon Arlott 已提交
654
	/* The format of this value is message type dependent.
655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
	 * For MSI bits 15:0 are the data from the MSI packet.
	 * For MSI-X bits 31:0 are the data from the MSI packet.
	 * For MSG, the message code and message routing code where:
	 * 	bits 39:32 is the bus/device/fn of the msg target-id
	 *	bits 18:16 is the message routing code
	 *	bits 7:0 is the message code
	 * For INTx the low order 2-bits are:
	 *	00 - INTA
	 *	01 - INTB
	 *	10 - INTC
	 *	11 - INTD
	 */
	u64		msi_data;

	u64		reserved2;
};

672 673
static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
			      unsigned long *head)
674
{
675
	unsigned long err, limit;
676

677
	err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
678
	if (unlikely(err))
679
		return -ENXIO;
680

681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
	limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	if (unlikely(*head >= limit))
		return -EFBIG;

	return 0;
}

static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
				 unsigned long msiqid, unsigned long *head,
				 unsigned long *msi)
{
	struct pci_sun4v_msiq_entry *ep;
	unsigned long err, type;

	/* Note: void pointer arithmetic, 'head' is a byte offset  */
	ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
				 (pbm->msiq_ent_count *
				  sizeof(struct pci_sun4v_msiq_entry))) +
	      *head);

	if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
		return 0;
703

704 705 706 707
	type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
	if (unlikely(type != MSIQ_TYPE_MSI32 &&
		     type != MSIQ_TYPE_MSI64))
		return -EINVAL;
708

709 710 711 712 713 714 715
	*msi = ep->msi_data;

	err = pci_sun4v_msi_setstate(pbm->devhandle,
				     ep->msi_data /* msi_num */,
				     HV_MSISTATE_IDLE);
	if (unlikely(err))
		return -ENXIO;
716

717 718
	/* Clear the entry.  */
	ep->version_type &= ~MSIQ_TYPE_MASK;
719

720 721 722 723
	(*head) += sizeof(struct pci_sun4v_msiq_entry);
	if (*head >=
	    (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
		*head = 0;
724

725
	return 1;
726 727
}

728 729
static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
			      unsigned long head)
730
{
731
	unsigned long err;
732

733 734 735
	err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
	if (unlikely(err))
		return -EINVAL;
736

737 738
	return 0;
}
739

740 741 742 743 744 745 746 747 748 749 750
static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
			       unsigned long msi, int is_msi64)
{
	if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
				  (is_msi64 ?
				   HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
		return -ENXIO;
	if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
		return -ENXIO;
	if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
		return -ENXIO;
751 752 753
	return 0;
}

754
static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
755
{
756 757 758 759 760 761 762 763 764
	unsigned long err, msiqid;

	err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
	if (err)
		return -ENXIO;

	pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);

	return 0;
765 766
}

767
static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
{
	unsigned long q_size, alloc_size, pages, order;
	int i;

	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	alloc_size = (pbm->msiq_num * q_size);
	order = get_order(alloc_size);
	pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
	if (pages == 0UL) {
		printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
		       order);
		return -ENOMEM;
	}
	memset((char *)pages, 0, PAGE_SIZE << order);
	pbm->msi_queues = (void *) pages;

	for (i = 0; i < pbm->msiq_num; i++) {
		unsigned long err, base = __pa(pages + (i * q_size));
		unsigned long ret1, ret2;

		err = pci_sun4v_msiq_conf(pbm->devhandle,
					  pbm->msiq_first + i,
					  base, pbm->msiq_ent_count);
		if (err) {
			printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
			       err);
			goto h_error;
		}

		err = pci_sun4v_msiq_info(pbm->devhandle,
					  pbm->msiq_first + i,
					  &ret1, &ret2);
		if (err) {
			printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
			       err);
			goto h_error;
		}
		if (ret1 != base || ret2 != pbm->msiq_ent_count) {
			printk(KERN_ERR "MSI: Bogus qconf "
			       "expected[%lx:%x] got[%lx:%lx]\n",
			       base, pbm->msiq_ent_count,
			       ret1, ret2);
			goto h_error;
		}
	}

	return 0;

h_error:
	free_pages(pages, order);
	return -EINVAL;
}

821
static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
822
{
823
	unsigned long q_size, alloc_size, pages, order;
824 825
	int i;

826 827
	for (i = 0; i < pbm->msiq_num; i++) {
		unsigned long msiqid = pbm->msiq_first + i;
828

829
		(void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
830
	}
831

832 833 834
	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	alloc_size = (pbm->msiq_num * q_size);
	order = get_order(alloc_size);
835

836
	pages = (unsigned long) pbm->msi_queues;
837

838
	free_pages(pages, order);
839

840
	pbm->msi_queues = NULL;
841 842
}

843 844 845
static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
				    unsigned long msiqid,
				    unsigned long devino)
846
{
847
	unsigned int irq = sun4v_build_irq(pbm->devhandle, devino);
848

849
	if (!irq)
850
		return -ENOMEM;
851

852 853 854 855
	if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
		return -EINVAL;
	if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
		return -EINVAL;
856

857
	return irq;
858
}
859

860 861 862 863 864 865 866 867 868 869 870
static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
	.get_head	=	pci_sun4v_get_head,
	.dequeue_msi	=	pci_sun4v_dequeue_msi,
	.set_head	=	pci_sun4v_set_head,
	.msi_setup	=	pci_sun4v_msi_setup,
	.msi_teardown	=	pci_sun4v_msi_teardown,
	.msiq_alloc	=	pci_sun4v_msiq_alloc,
	.msiq_free	=	pci_sun4v_msiq_free,
	.msiq_build_irq	=	pci_sun4v_msiq_build_irq,
};

871 872
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
873
	sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
874
}
875 876 877 878 879 880
#else /* CONFIG_PCI_MSI */
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
}
#endif /* !(CONFIG_PCI_MSI) */

881
static int __devinit pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
882
					struct platform_device *op, u32 devhandle)
883
{
884
	struct device_node *dp = op->dev.of_node;
885
	int err;
886

887 888
	pbm->numa_node = of_node_to_nid(dp);

889 890
	pbm->pci_ops = &sun4v_pci_ops;
	pbm->config_space_reg_bits = 12;
891

892 893
	pbm->index = pci_num_pbms++;

894
	pbm->op = op;
895

D
David S. Miller 已提交
896
	pbm->devhandle = devhandle;
897

898
	pbm->name = dp->full_name;
899

900
	printk("%s: SUN4V PCI Bus Module\n", pbm->name);
901
	printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
902

903
	pci_determine_mem_io_space(pbm);
904

905
	pci_get_pbm_props(pbm);
906 907 908 909 910

	err = pci_sun4v_iommu_init(pbm);
	if (err)
		return err;

911
	pci_sun4v_msi_init(pbm);
912

913
	pci_sun4v_scan_bus(pbm, &op->dev);
914

915 916 917
	pbm->next = pci_pbm_root;
	pci_pbm_root = pbm;

918
	return 0;
919 920
}

921
static int __devinit pci_sun4v_probe(struct platform_device *op)
922
{
923
	const struct linux_prom64_registers *regs;
924
	static int hvapi_negotiated = 0;
925
	struct pci_pbm_info *pbm;
926
	struct device_node *dp;
927
	struct iommu *iommu;
928
	u32 devhandle;
929
	int i, err;
D
David S. Miller 已提交
930

931
	dp = op->dev.of_node;
932

933
	if (!hvapi_negotiated++) {
934 935 936
		err = sun4v_hvapi_register(HV_GRP_PCI,
					   vpci_major,
					   &vpci_minor);
937 938

		if (err) {
939 940 941
			printk(KERN_ERR PFX "Could not register hvapi, "
			       "err=%d\n", err);
			return err;
942
		}
943
		printk(KERN_INFO PFX "Registered hvapi major[%lu] minor[%lu]\n",
944
		       vpci_major, vpci_minor);
945 946

		dma_ops = &sun4v_dma_ops;
947 948
	}

949
	regs = of_get_property(dp, "reg", NULL);
950
	err = -ENODEV;
951 952
	if (!regs) {
		printk(KERN_ERR PFX "Could not find config registers\n");
953
		goto out_err;
954
	}
955
	devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
D
David S. Miller 已提交
956

957
	err = -ENOMEM;
958 959 960
	if (!iommu_batch_initialized) {
		for_each_possible_cpu(i) {
			unsigned long page = get_zeroed_page(GFP_KERNEL);
961

962 963
			if (!page)
				goto out_err;
964

965 966 967
			per_cpu(iommu_batch, i).pglist = (u64 *) page;
		}
		iommu_batch_initialized = 1;
968
	}
969

970 971 972
	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
	if (!pbm) {
		printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n");
973
		goto out_err;
974
	}
975

976
	iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
977
	if (!iommu) {
978
		printk(KERN_ERR PFX "Could not allocate pbm iommu\n");
979
		goto out_free_controller;
980
	}
981

982
	pbm->iommu = iommu;
983

984 985 986
	err = pci_sun4v_pbm_init(pbm, op, devhandle);
	if (err)
		goto out_free_iommu;
987

988
	dev_set_drvdata(&op->dev, pbm);
989

990
	return 0;
991

992 993
out_free_iommu:
	kfree(pbm->iommu);
994 995

out_free_controller:
996
	kfree(pbm);
997 998 999

out_err:
	return err;
1000
}
1001

1002
static const struct of_device_id pci_sun4v_match[] = {
1003 1004 1005 1006 1007 1008 1009
	{
		.name = "pci",
		.compatible = "SUNW,sun4v-pci",
	},
	{},
};

1010
static struct platform_driver pci_sun4v_driver = {
1011 1012 1013 1014 1015
	.driver = {
		.name = DRIVER_NAME,
		.owner = THIS_MODULE,
		.of_match_table = pci_sun4v_match,
	},
1016 1017 1018 1019 1020
	.probe		= pci_sun4v_probe,
};

static int __init pci_sun4v_init(void)
{
1021
	return platform_driver_register(&pci_sun4v_driver);
1022 1023 1024
}

subsys_initcall(pci_sun4v_init);