pci_sun4v.c 23.8 KB
Newer Older
1 2
/* pci_sun4v.c: SUN4V specific PCI controller support.
 *
3
 * Copyright (C) 2006, 2007, 2008 David S. Miller (davem@davemloft.net)
4 5 6 7 8 9 10 11
 */

#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
12
#include <linux/percpu.h>
13 14
#include <linux/irq.h>
#include <linux/msi.h>
15
#include <linux/export.h>
16
#include <linux/log2.h>
17
#include <linux/of_device.h>
18 19 20 21

#include <asm/iommu.h>
#include <asm/irq.h>
#include <asm/hypervisor.h>
22
#include <asm/prom.h>
23 24 25 26

#include "pci_impl.h"
#include "iommu_common.h"

27 28
#include "pci_sun4v.h"

29 30 31
#define DRIVER_NAME	"pci_sun4v"
#define PFX		DRIVER_NAME ": "

32 33 34
static unsigned long vpci_major = 1;
static unsigned long vpci_minor = 1;

35
#define PGLIST_NENTS	(PAGE_SIZE / sizeof(u64))
36

37
struct iommu_batch {
38
	struct device	*dev;		/* Device mapping is for.	*/
39 40 41 42
	unsigned long	prot;		/* IOMMU page protections	*/
	unsigned long	entry;		/* Index into IOTSB.		*/
	u64		*pglist;	/* List of physical pages	*/
	unsigned long	npages;		/* Number of pages in list.	*/
43 44
};

45
static DEFINE_PER_CPU(struct iommu_batch, iommu_batch);
46
static int iommu_batch_initialized;
47 48

/* Interrupts must be disabled.  */
49
static inline void iommu_batch_start(struct device *dev, unsigned long prot, unsigned long entry)
50
{
51
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
52

53
	p->dev		= dev;
54 55 56 57 58 59
	p->prot		= prot;
	p->entry	= entry;
	p->npages	= 0;
}

/* Interrupts must be disabled.  */
60
static long iommu_batch_flush(struct iommu_batch *p)
61
{
62
	struct pci_pbm_info *pbm = p->dev->archdata.host_controller;
63
	unsigned long devhandle = pbm->devhandle;
64 65 66 67 68
	unsigned long prot = p->prot;
	unsigned long entry = p->entry;
	u64 *pglist = p->pglist;
	unsigned long npages = p->npages;

69
	while (npages != 0) {
70 71 72 73 74 75
		long num;

		num = pci_sun4v_iommu_map(devhandle, HV_PCI_TSBID(0, entry),
					  npages, prot, __pa(pglist));
		if (unlikely(num < 0)) {
			if (printk_ratelimit())
76
				printk("iommu_batch_flush: IOMMU map of "
77
				       "[%08lx:%08llx:%lx:%lx:%lx] failed with "
78 79 80 81 82 83 84 85 86
				       "status %ld\n",
				       devhandle, HV_PCI_TSBID(0, entry),
				       npages, prot, __pa(pglist), num);
			return -1;
		}

		entry += num;
		npages -= num;
		pglist += num;
87
	}
88 89 90 91 92 93 94

	p->entry = entry;
	p->npages = 0;

	return 0;
}

95 96 97 98 99 100 101 102 103 104 105
static inline void iommu_batch_new_entry(unsigned long entry)
{
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);

	if (p->entry + p->npages == entry)
		return;
	if (p->entry != ~0UL)
		iommu_batch_flush(p);
	p->entry = entry;
}

106
/* Interrupts must be disabled.  */
107
static inline long iommu_batch_add(u64 phys_page)
108
{
109
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
110 111 112 113 114

	BUG_ON(p->npages >= PGLIST_NENTS);

	p->pglist[p->npages++] = phys_page;
	if (p->npages == PGLIST_NENTS)
115
		return iommu_batch_flush(p);
116 117 118 119 120

	return 0;
}

/* Interrupts must be disabled.  */
121
static inline long iommu_batch_end(void)
122
{
123
	struct iommu_batch *p = &__get_cpu_var(iommu_batch);
124 125 126

	BUG_ON(p->npages >= PGLIST_NENTS);

127
	return iommu_batch_flush(p);
128
}
129

130
static void *dma_4v_alloc_coherent(struct device *dev, size_t size,
131 132
				   dma_addr_t *dma_addrp, gfp_t gfp,
				   struct dma_attrs *attrs)
133
{
134
	unsigned long flags, order, first_page, npages, n;
135 136
	struct iommu *iommu;
	struct page *page;
137 138
	void *ret;
	long entry;
139
	int nid;
140 141 142

	size = IO_PAGE_ALIGN(size);
	order = get_order(size);
143
	if (unlikely(order >= MAX_ORDER))
144 145 146 147
		return NULL;

	npages = size >> IO_PAGE_SHIFT;

148 149 150
	nid = dev->archdata.numa_node;
	page = alloc_pages_node(nid, gfp, order);
	if (unlikely(!page))
151
		return NULL;
152

153
	first_page = (unsigned long) page_address(page);
154 155
	memset((char *)first_page, 0, PAGE_SIZE << order);

156
	iommu = dev->archdata.iommu;
157 158

	spin_lock_irqsave(&iommu->lock, flags);
159
	entry = iommu_range_alloc(dev, iommu, npages, NULL);
160 161
	spin_unlock_irqrestore(&iommu->lock, flags);

162 163
	if (unlikely(entry == DMA_ERROR_CODE))
		goto range_alloc_fail;
164 165 166 167 168 169

	*dma_addrp = (iommu->page_table_map_base +
		      (entry << IO_PAGE_SHIFT));
	ret = (void *) first_page;
	first_page = __pa(first_page);

170
	local_irq_save(flags);
171

172 173 174 175
	iommu_batch_start(dev,
			  (HV_PCI_MAP_ATTR_READ |
			   HV_PCI_MAP_ATTR_WRITE),
			  entry);
176

177
	for (n = 0; n < npages; n++) {
178
		long err = iommu_batch_add(first_page + (n * PAGE_SIZE));
179 180 181
		if (unlikely(err < 0L))
			goto iommu_map_fail;
	}
182

183
	if (unlikely(iommu_batch_end() < 0L))
184
		goto iommu_map_fail;
185

186
	local_irq_restore(flags);
187 188

	return ret;
189 190 191 192

iommu_map_fail:
	/* Interrupts are disabled.  */
	spin_lock(&iommu->lock);
193
	iommu_range_free(iommu, *dma_addrp, npages);
194 195
	spin_unlock_irqrestore(&iommu->lock, flags);

196
range_alloc_fail:
197 198
	free_pages(first_page, order);
	return NULL;
199 200
}

201
static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu,
202
				 dma_addr_t dvma, struct dma_attrs *attrs)
203
{
204
	struct pci_pbm_info *pbm;
205
	struct iommu *iommu;
206 207
	unsigned long flags, order, npages, entry;
	u32 devhandle;
208 209

	npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT;
210 211
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
212
	devhandle = pbm->devhandle;
213 214 215 216
	entry = ((dvma - iommu->page_table_map_base) >> IO_PAGE_SHIFT);

	spin_lock_irqsave(&iommu->lock, flags);

217
	iommu_range_free(iommu, dvma, npages);
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232

	do {
		unsigned long num;

		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
					    npages);
		entry += num;
		npages -= num;
	} while (npages != 0);

	spin_unlock_irqrestore(&iommu->lock, flags);

	order = get_order(size);
	if (order < 10)
		free_pages((unsigned long)cpu, order);
233 234
}

235 236
static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page,
				  unsigned long offset, size_t sz,
F
FUJITA Tomonori 已提交
237 238
				  enum dma_data_direction direction,
				  struct dma_attrs *attrs)
239
{
240
	struct iommu *iommu;
241
	unsigned long flags, npages, oaddr;
242
	unsigned long i, base_paddr;
243
	u32 bus_addr, ret;
244 245 246
	unsigned long prot;
	long entry;

247
	iommu = dev->archdata.iommu;
248

249
	if (unlikely(direction == DMA_NONE))
250 251
		goto bad;

252
	oaddr = (unsigned long)(page_address(page) + offset);
253 254 255 256
	npages = IO_PAGE_ALIGN(oaddr + sz) - (oaddr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;

	spin_lock_irqsave(&iommu->lock, flags);
257
	entry = iommu_range_alloc(dev, iommu, npages, NULL);
258 259
	spin_unlock_irqrestore(&iommu->lock, flags);

260
	if (unlikely(entry == DMA_ERROR_CODE))
261 262 263 264 265 266 267
		goto bad;

	bus_addr = (iommu->page_table_map_base +
		    (entry << IO_PAGE_SHIFT));
	ret = bus_addr | (oaddr & ~IO_PAGE_MASK);
	base_paddr = __pa(oaddr & IO_PAGE_MASK);
	prot = HV_PCI_MAP_ATTR_READ;
268
	if (direction != DMA_TO_DEVICE)
269 270
		prot |= HV_PCI_MAP_ATTR_WRITE;

271
	local_irq_save(flags);
272

273
	iommu_batch_start(dev, prot, entry);
274

275
	for (i = 0; i < npages; i++, base_paddr += IO_PAGE_SIZE) {
276
		long err = iommu_batch_add(base_paddr);
277 278 279
		if (unlikely(err < 0L))
			goto iommu_map_fail;
	}
280
	if (unlikely(iommu_batch_end() < 0L))
281
		goto iommu_map_fail;
282

283
	local_irq_restore(flags);
284 285 286 287 288 289

	return ret;

bad:
	if (printk_ratelimit())
		WARN_ON(1);
290
	return DMA_ERROR_CODE;
291 292 293 294

iommu_map_fail:
	/* Interrupts are disabled.  */
	spin_lock(&iommu->lock);
295
	iommu_range_free(iommu, bus_addr, npages);
296 297
	spin_unlock_irqrestore(&iommu->lock, flags);

298
	return DMA_ERROR_CODE;
299 300
}

301
static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr,
F
FUJITA Tomonori 已提交
302 303
			      size_t sz, enum dma_data_direction direction,
			      struct dma_attrs *attrs)
304
{
305
	struct pci_pbm_info *pbm;
306
	struct iommu *iommu;
307
	unsigned long flags, npages;
308
	long entry;
309
	u32 devhandle;
310

311
	if (unlikely(direction == DMA_NONE)) {
312 313 314 315 316
		if (printk_ratelimit())
			WARN_ON(1);
		return;
	}

317 318
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
319
	devhandle = pbm->devhandle;
320 321 322 323 324 325 326

	npages = IO_PAGE_ALIGN(bus_addr + sz) - (bus_addr & IO_PAGE_MASK);
	npages >>= IO_PAGE_SHIFT;
	bus_addr &= IO_PAGE_MASK;

	spin_lock_irqsave(&iommu->lock, flags);

327
	iommu_range_free(iommu, bus_addr, npages);
328

329
	entry = (bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT;
330 331 332 333 334 335 336 337 338 339 340 341
	do {
		unsigned long num;

		num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
					    npages);
		entry += num;
		npages -= num;
	} while (npages != 0);

	spin_unlock_irqrestore(&iommu->lock, flags);
}

342
static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist,
F
FUJITA Tomonori 已提交
343 344
			 int nelems, enum dma_data_direction direction,
			 struct dma_attrs *attrs)
345
{
346 347 348 349
	struct scatterlist *s, *outs, *segstart;
	unsigned long flags, handle, prot;
	dma_addr_t dma_next = 0, dma_addr;
	unsigned int max_seg_size;
350
	unsigned long seg_boundary_size;
351
	int outcount, incount, i;
352
	struct iommu *iommu;
353
	unsigned long base_shift;
354 355 356
	long err;

	BUG_ON(direction == DMA_NONE);
357

358
	iommu = dev->archdata.iommu;
359 360
	if (nelems == 0 || !iommu)
		return 0;
361
	
362 363 364
	prot = HV_PCI_MAP_ATTR_READ;
	if (direction != DMA_TO_DEVICE)
		prot |= HV_PCI_MAP_ATTR_WRITE;
365

366 367 368 369
	outs = s = segstart = &sglist[0];
	outcount = 1;
	incount = nelems;
	handle = 0;
370

371 372
	/* Init first segment length for backout at failure */
	outs->dma_length = 0;
373

374
	spin_lock_irqsave(&iommu->lock, flags);
375

376
	iommu_batch_start(dev, prot, ~0UL);
377

378
	max_seg_size = dma_get_max_seg_size(dev);
379 380 381
	seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
				  IO_PAGE_SIZE) >> IO_PAGE_SHIFT;
	base_shift = iommu->page_table_map_base >> IO_PAGE_SHIFT;
382
	for_each_sg(sglist, s, nelems, i) {
383
		unsigned long paddr, npages, entry, out_entry = 0, slen;
384

385 386 387 388 389 390 391 392
		slen = s->length;
		/* Sanity check */
		if (slen == 0) {
			dma_next = 0;
			continue;
		}
		/* Allocate iommu entries for that segment */
		paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s);
393
		npages = iommu_num_pages(paddr, slen, IO_PAGE_SIZE);
394
		entry = iommu_range_alloc(dev, iommu, npages, &handle);
395

396 397 398 399 400 401 402
		/* Handle failure */
		if (unlikely(entry == DMA_ERROR_CODE)) {
			if (printk_ratelimit())
				printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx"
				       " npages %lx\n", iommu, paddr, npages);
			goto iommu_map_failed;
		}
403

404
		iommu_batch_new_entry(entry);
405

406 407 408 409
		/* Convert entry to a dma_addr_t */
		dma_addr = iommu->page_table_map_base +
			(entry << IO_PAGE_SHIFT);
		dma_addr |= (s->offset & ~IO_PAGE_MASK);
410

411
		/* Insert into HW table */
412
		paddr &= IO_PAGE_MASK;
413
		while (npages--) {
414
			err = iommu_batch_add(paddr);
415
			if (unlikely(err < 0L))
416
				goto iommu_map_failed;
417 418 419 420 421 422 423 424 425
			paddr += IO_PAGE_SIZE;
		}

		/* If we are in an open segment, try merging */
		if (segstart != s) {
			/* We cannot merge if:
			 * - allocated dma_addr isn't contiguous to previous allocation
			 */
			if ((dma_addr != dma_next) ||
426 427 428
			    (outs->dma_length + s->length > max_seg_size) ||
			    (is_span_boundary(out_entry, base_shift,
					      seg_boundary_size, outs, s))) {
429 430 431 432 433 434
				/* Can't merge: create a new segment */
				segstart = s;
				outcount++;
				outs = sg_next(outs);
			} else {
				outs->dma_length += s->length;
435
			}
436
		}
437

438 439 440 441
		if (segstart == s) {
			/* This is a new segment, fill entries */
			outs->dma_address = dma_addr;
			outs->dma_length = slen;
442
			out_entry = entry;
443
		}
444 445 446

		/* Calculate next page pointer for contiguous check */
		dma_next = dma_addr + slen;
447 448 449 450
	}

	err = iommu_batch_end();

451 452
	if (unlikely(err < 0L))
		goto iommu_map_failed;
453

454
	spin_unlock_irqrestore(&iommu->lock, flags);
455

456 457 458 459 460 461 462
	if (outcount < incount) {
		outs = sg_next(outs);
		outs->dma_address = DMA_ERROR_CODE;
		outs->dma_length = 0;
	}

	return outcount;
463 464

iommu_map_failed:
465 466 467 468 469
	for_each_sg(sglist, s, nelems, i) {
		if (s->dma_length != 0) {
			unsigned long vaddr, npages;

			vaddr = s->dma_address & IO_PAGE_MASK;
470 471
			npages = iommu_num_pages(s->dma_address, s->dma_length,
						 IO_PAGE_SIZE);
472 473 474 475 476 477 478 479
			iommu_range_free(iommu, vaddr, npages);
			/* XXX demap? XXX */
			s->dma_address = DMA_ERROR_CODE;
			s->dma_length = 0;
		}
		if (s == outs)
			break;
	}
480 481 482
	spin_unlock_irqrestore(&iommu->lock, flags);

	return 0;
483 484
}

485
static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist,
F
FUJITA Tomonori 已提交
486 487
			    int nelems, enum dma_data_direction direction,
			    struct dma_attrs *attrs)
488
{
489
	struct pci_pbm_info *pbm;
490
	struct scatterlist *sg;
491
	struct iommu *iommu;
492 493
	unsigned long flags;
	u32 devhandle;
494

495
	BUG_ON(direction == DMA_NONE);
496

497 498
	iommu = dev->archdata.iommu;
	pbm = dev->archdata.host_controller;
499
	devhandle = pbm->devhandle;
500 501 502
	
	spin_lock_irqsave(&iommu->lock, flags);

503 504 505 506 507 508 509 510
	sg = sglist;
	while (nelems--) {
		dma_addr_t dma_handle = sg->dma_address;
		unsigned int len = sg->dma_length;
		unsigned long npages, entry;

		if (!len)
			break;
511
		npages = iommu_num_pages(dma_handle, len, IO_PAGE_SIZE);
512 513 514 515 516 517 518 519 520 521 522
		iommu_range_free(iommu, dma_handle, npages);

		entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT);
		while (npages) {
			unsigned long num;

			num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry),
						    npages);
			entry += num;
			npages -= num;
		}
523

524 525
		sg = sg_next(sg);
	}
526 527

	spin_unlock_irqrestore(&iommu->lock, flags);
528 529
}

530
static struct dma_map_ops sun4v_dma_ops = {
531 532
	.alloc				= dma_4v_alloc_coherent,
	.free				= dma_4v_free_coherent,
533 534
	.map_page			= dma_4v_map_page,
	.unmap_page			= dma_4v_unmap_page,
535 536
	.map_sg				= dma_4v_map_sg,
	.unmap_sg			= dma_4v_unmap_sg,
537 538
};

539 540
static void __devinit pci_sun4v_scan_bus(struct pci_pbm_info *pbm,
					 struct device *parent)
541
{
542 543 544
	struct property *prop;
	struct device_node *dp;

545
	dp = pbm->op->dev.of_node;
546 547
	prop = of_find_property(dp, "66mhz-capable", NULL);
	pbm->is_66mhz_capable = (prop != NULL);
548
	pbm->pci_bus = pci_scan_one_pbm(pbm, parent);
549 550

	/* XXX register error interrupt handlers XXX */
551 552
}

553 554
static unsigned long __devinit probe_existing_entries(struct pci_pbm_info *pbm,
						      struct iommu *iommu)
555
{
556
	struct iommu_arena *arena = &iommu->arena;
557
	unsigned long i, cnt = 0;
558
	u32 devhandle;
559 560 561 562 563 564 565 566

	devhandle = pbm->devhandle;
	for (i = 0; i < arena->limit; i++) {
		unsigned long ret, io_attrs, ra;

		ret = pci_sun4v_iommu_getmap(devhandle,
					     HV_PCI_TSBID(0, i),
					     &io_attrs, &ra);
567
		if (ret == HV_EOK) {
568 569 570 571 572 573 574
			if (page_in_phys_avail(ra)) {
				pci_sun4v_iommu_demap(devhandle,
						      HV_PCI_TSBID(0, i), 1);
			} else {
				cnt++;
				__set_bit(i, arena->map);
			}
575
		}
576
	}
577 578

	return cnt;
579 580
}

581
static int __devinit pci_sun4v_iommu_init(struct pci_pbm_info *pbm)
582
{
583
	static const u32 vdma_default[] = { 0x80000000, 0x80000000 };
584
	struct iommu *iommu = pbm->iommu;
585
	unsigned long num_tsb_entries, sz;
586 587 588
	u32 dma_mask, dma_offset;
	const u32 *vdma;

589
	vdma = of_get_property(pbm->op->dev.of_node, "virtual-dma", NULL);
590 591
	if (!vdma)
		vdma = vdma_default;
592

593
	if ((vdma[0] | vdma[1]) & ~IO_PAGE_MASK) {
594 595 596
		printk(KERN_ERR PFX "Strange virtual-dma[%08x:%08x].\n",
		       vdma[0], vdma[1]);
		return -EINVAL;
597 598
	};

599 600
	dma_mask = (roundup_pow_of_two(vdma[1]) - 1UL);
	num_tsb_entries = vdma[1] / IO_PAGE_SIZE;
601 602 603 604 605 606 607 608 609 610

	dma_offset = vdma[0];

	/* Setup initial software IOMMU state. */
	spin_lock_init(&iommu->lock);
	iommu->ctx_lowest_free = 1;
	iommu->page_table_map_base = dma_offset;
	iommu->dma_addr_mask = dma_mask;

	/* Allocate and initialize the free area map.  */
611
	sz = (num_tsb_entries + 7) / 8;
612
	sz = (sz + 7UL) & ~7UL;
613
	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
614
	if (!iommu->arena.map) {
615 616
		printk(KERN_ERR PFX "Error, kmalloc(arena.map) failed.\n");
		return -ENOMEM;
617 618 619
	}
	iommu->arena.limit = num_tsb_entries;

620
	sz = probe_existing_entries(pbm, iommu);
621 622 623
	if (sz)
		printk("%s: Imported %lu TSB entries from OBP\n",
		       pbm->name, sz);
624 625

	return 0;
626 627
}

628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
#ifdef CONFIG_PCI_MSI
struct pci_sun4v_msiq_entry {
	u64		version_type;
#define MSIQ_VERSION_MASK		0xffffffff00000000UL
#define MSIQ_VERSION_SHIFT		32
#define MSIQ_TYPE_MASK			0x00000000000000ffUL
#define MSIQ_TYPE_SHIFT			0
#define MSIQ_TYPE_NONE			0x00
#define MSIQ_TYPE_MSG			0x01
#define MSIQ_TYPE_MSI32			0x02
#define MSIQ_TYPE_MSI64			0x03
#define MSIQ_TYPE_INTX			0x08
#define MSIQ_TYPE_NONE2			0xff

	u64		intx_sysino;
	u64		reserved1;
	u64		stick;
	u64		req_id;  /* bus/device/func */
#define MSIQ_REQID_BUS_MASK		0xff00UL
#define MSIQ_REQID_BUS_SHIFT		8
#define MSIQ_REQID_DEVICE_MASK		0x00f8UL
#define MSIQ_REQID_DEVICE_SHIFT		3
#define MSIQ_REQID_FUNC_MASK		0x0007UL
#define MSIQ_REQID_FUNC_SHIFT		0

	u64		msi_address;

S
Simon Arlott 已提交
655
	/* The format of this value is message type dependent.
656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
	 * For MSI bits 15:0 are the data from the MSI packet.
	 * For MSI-X bits 31:0 are the data from the MSI packet.
	 * For MSG, the message code and message routing code where:
	 * 	bits 39:32 is the bus/device/fn of the msg target-id
	 *	bits 18:16 is the message routing code
	 *	bits 7:0 is the message code
	 * For INTx the low order 2-bits are:
	 *	00 - INTA
	 *	01 - INTB
	 *	10 - INTC
	 *	11 - INTD
	 */
	u64		msi_data;

	u64		reserved2;
};

673 674
static int pci_sun4v_get_head(struct pci_pbm_info *pbm, unsigned long msiqid,
			      unsigned long *head)
675
{
676
	unsigned long err, limit;
677

678
	err = pci_sun4v_msiq_gethead(pbm->devhandle, msiqid, head);
679
	if (unlikely(err))
680
		return -ENXIO;
681

682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
	limit = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	if (unlikely(*head >= limit))
		return -EFBIG;

	return 0;
}

static int pci_sun4v_dequeue_msi(struct pci_pbm_info *pbm,
				 unsigned long msiqid, unsigned long *head,
				 unsigned long *msi)
{
	struct pci_sun4v_msiq_entry *ep;
	unsigned long err, type;

	/* Note: void pointer arithmetic, 'head' is a byte offset  */
	ep = (pbm->msi_queues + ((msiqid - pbm->msiq_first) *
				 (pbm->msiq_ent_count *
				  sizeof(struct pci_sun4v_msiq_entry))) +
	      *head);

	if ((ep->version_type & MSIQ_TYPE_MASK) == 0)
		return 0;
704

705 706 707 708
	type = (ep->version_type & MSIQ_TYPE_MASK) >> MSIQ_TYPE_SHIFT;
	if (unlikely(type != MSIQ_TYPE_MSI32 &&
		     type != MSIQ_TYPE_MSI64))
		return -EINVAL;
709

710 711 712 713 714 715 716
	*msi = ep->msi_data;

	err = pci_sun4v_msi_setstate(pbm->devhandle,
				     ep->msi_data /* msi_num */,
				     HV_MSISTATE_IDLE);
	if (unlikely(err))
		return -ENXIO;
717

718 719
	/* Clear the entry.  */
	ep->version_type &= ~MSIQ_TYPE_MASK;
720

721 722 723 724
	(*head) += sizeof(struct pci_sun4v_msiq_entry);
	if (*head >=
	    (pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry)))
		*head = 0;
725

726
	return 1;
727 728
}

729 730
static int pci_sun4v_set_head(struct pci_pbm_info *pbm, unsigned long msiqid,
			      unsigned long head)
731
{
732
	unsigned long err;
733

734 735 736
	err = pci_sun4v_msiq_sethead(pbm->devhandle, msiqid, head);
	if (unlikely(err))
		return -EINVAL;
737

738 739
	return 0;
}
740

741 742 743 744 745 746 747 748 749 750 751
static int pci_sun4v_msi_setup(struct pci_pbm_info *pbm, unsigned long msiqid,
			       unsigned long msi, int is_msi64)
{
	if (pci_sun4v_msi_setmsiq(pbm->devhandle, msi, msiqid,
				  (is_msi64 ?
				   HV_MSITYPE_MSI64 : HV_MSITYPE_MSI32)))
		return -ENXIO;
	if (pci_sun4v_msi_setstate(pbm->devhandle, msi, HV_MSISTATE_IDLE))
		return -ENXIO;
	if (pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_VALID))
		return -ENXIO;
752 753 754
	return 0;
}

755
static int pci_sun4v_msi_teardown(struct pci_pbm_info *pbm, unsigned long msi)
756
{
757 758 759 760 761 762 763 764 765
	unsigned long err, msiqid;

	err = pci_sun4v_msi_getmsiq(pbm->devhandle, msi, &msiqid);
	if (err)
		return -ENXIO;

	pci_sun4v_msi_setvalid(pbm->devhandle, msi, HV_MSIVALID_INVALID);

	return 0;
766 767
}

768
static int pci_sun4v_msiq_alloc(struct pci_pbm_info *pbm)
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821
{
	unsigned long q_size, alloc_size, pages, order;
	int i;

	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	alloc_size = (pbm->msiq_num * q_size);
	order = get_order(alloc_size);
	pages = __get_free_pages(GFP_KERNEL | __GFP_COMP, order);
	if (pages == 0UL) {
		printk(KERN_ERR "MSI: Cannot allocate MSI queues (o=%lu).\n",
		       order);
		return -ENOMEM;
	}
	memset((char *)pages, 0, PAGE_SIZE << order);
	pbm->msi_queues = (void *) pages;

	for (i = 0; i < pbm->msiq_num; i++) {
		unsigned long err, base = __pa(pages + (i * q_size));
		unsigned long ret1, ret2;

		err = pci_sun4v_msiq_conf(pbm->devhandle,
					  pbm->msiq_first + i,
					  base, pbm->msiq_ent_count);
		if (err) {
			printk(KERN_ERR "MSI: msiq register fails (err=%lu)\n",
			       err);
			goto h_error;
		}

		err = pci_sun4v_msiq_info(pbm->devhandle,
					  pbm->msiq_first + i,
					  &ret1, &ret2);
		if (err) {
			printk(KERN_ERR "MSI: Cannot read msiq (err=%lu)\n",
			       err);
			goto h_error;
		}
		if (ret1 != base || ret2 != pbm->msiq_ent_count) {
			printk(KERN_ERR "MSI: Bogus qconf "
			       "expected[%lx:%x] got[%lx:%lx]\n",
			       base, pbm->msiq_ent_count,
			       ret1, ret2);
			goto h_error;
		}
	}

	return 0;

h_error:
	free_pages(pages, order);
	return -EINVAL;
}

822
static void pci_sun4v_msiq_free(struct pci_pbm_info *pbm)
823
{
824
	unsigned long q_size, alloc_size, pages, order;
825 826
	int i;

827 828
	for (i = 0; i < pbm->msiq_num; i++) {
		unsigned long msiqid = pbm->msiq_first + i;
829

830
		(void) pci_sun4v_msiq_conf(pbm->devhandle, msiqid, 0UL, 0);
831
	}
832

833 834 835
	q_size = pbm->msiq_ent_count * sizeof(struct pci_sun4v_msiq_entry);
	alloc_size = (pbm->msiq_num * q_size);
	order = get_order(alloc_size);
836

837
	pages = (unsigned long) pbm->msi_queues;
838

839
	free_pages(pages, order);
840

841
	pbm->msi_queues = NULL;
842 843
}

844 845 846
static int pci_sun4v_msiq_build_irq(struct pci_pbm_info *pbm,
				    unsigned long msiqid,
				    unsigned long devino)
847
{
848
	unsigned int irq = sun4v_build_irq(pbm->devhandle, devino);
849

850
	if (!irq)
851
		return -ENOMEM;
852

853 854
	if (pci_sun4v_msiq_setvalid(pbm->devhandle, msiqid, HV_MSIQ_VALID))
		return -EINVAL;
855 856
	if (pci_sun4v_msiq_setstate(pbm->devhandle, msiqid, HV_MSIQSTATE_IDLE))
		return -EINVAL;
857

858
	return irq;
859
}
860

861 862 863 864 865 866 867 868 869 870 871
static const struct sparc64_msiq_ops pci_sun4v_msiq_ops = {
	.get_head	=	pci_sun4v_get_head,
	.dequeue_msi	=	pci_sun4v_dequeue_msi,
	.set_head	=	pci_sun4v_set_head,
	.msi_setup	=	pci_sun4v_msi_setup,
	.msi_teardown	=	pci_sun4v_msi_teardown,
	.msiq_alloc	=	pci_sun4v_msiq_alloc,
	.msiq_free	=	pci_sun4v_msiq_free,
	.msiq_build_irq	=	pci_sun4v_msiq_build_irq,
};

872 873
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
874
	sparc64_pbm_msi_init(pbm, &pci_sun4v_msiq_ops);
875
}
876 877 878 879 880 881
#else /* CONFIG_PCI_MSI */
static void pci_sun4v_msi_init(struct pci_pbm_info *pbm)
{
}
#endif /* !(CONFIG_PCI_MSI) */

882
static int __devinit pci_sun4v_pbm_init(struct pci_pbm_info *pbm,
883
					struct platform_device *op, u32 devhandle)
884
{
885
	struct device_node *dp = op->dev.of_node;
886
	int err;
887

888 889
	pbm->numa_node = of_node_to_nid(dp);

890 891
	pbm->pci_ops = &sun4v_pci_ops;
	pbm->config_space_reg_bits = 12;
892

893 894
	pbm->index = pci_num_pbms++;

895
	pbm->op = op;
896

D
David S. Miller 已提交
897
	pbm->devhandle = devhandle;
898

899
	pbm->name = dp->full_name;
900

901
	printk("%s: SUN4V PCI Bus Module\n", pbm->name);
902
	printk("%s: On NUMA node %d\n", pbm->name, pbm->numa_node);
903

904
	pci_determine_mem_io_space(pbm);
905

906
	pci_get_pbm_props(pbm);
907 908 909 910 911

	err = pci_sun4v_iommu_init(pbm);
	if (err)
		return err;

912
	pci_sun4v_msi_init(pbm);
913

914
	pci_sun4v_scan_bus(pbm, &op->dev);
915

916 917 918
	pbm->next = pci_pbm_root;
	pci_pbm_root = pbm;

919
	return 0;
920 921
}

922
static int __devinit pci_sun4v_probe(struct platform_device *op)
923
{
924
	const struct linux_prom64_registers *regs;
925
	static int hvapi_negotiated = 0;
926
	struct pci_pbm_info *pbm;
927
	struct device_node *dp;
928
	struct iommu *iommu;
929
	u32 devhandle;
930
	int i, err;
D
David S. Miller 已提交
931

932
	dp = op->dev.of_node;
933

934
	if (!hvapi_negotiated++) {
935 936 937
		err = sun4v_hvapi_register(HV_GRP_PCI,
					   vpci_major,
					   &vpci_minor);
938 939

		if (err) {
940 941 942
			printk(KERN_ERR PFX "Could not register hvapi, "
			       "err=%d\n", err);
			return err;
943
		}
944
		printk(KERN_INFO PFX "Registered hvapi major[%lu] minor[%lu]\n",
945
		       vpci_major, vpci_minor);
946 947

		dma_ops = &sun4v_dma_ops;
948 949
	}

950
	regs = of_get_property(dp, "reg", NULL);
951
	err = -ENODEV;
952 953
	if (!regs) {
		printk(KERN_ERR PFX "Could not find config registers\n");
954
		goto out_err;
955
	}
956
	devhandle = (regs->phys_addr >> 32UL) & 0x0fffffff;
D
David S. Miller 已提交
957

958
	err = -ENOMEM;
959 960 961
	if (!iommu_batch_initialized) {
		for_each_possible_cpu(i) {
			unsigned long page = get_zeroed_page(GFP_KERNEL);
962

963 964
			if (!page)
				goto out_err;
965

966 967 968
			per_cpu(iommu_batch, i).pglist = (u64 *) page;
		}
		iommu_batch_initialized = 1;
969
	}
970

971 972 973
	pbm = kzalloc(sizeof(*pbm), GFP_KERNEL);
	if (!pbm) {
		printk(KERN_ERR PFX "Could not allocate pci_pbm_info\n");
974
		goto out_err;
975
	}
976

977
	iommu = kzalloc(sizeof(struct iommu), GFP_KERNEL);
978
	if (!iommu) {
979
		printk(KERN_ERR PFX "Could not allocate pbm iommu\n");
980
		goto out_free_controller;
981
	}
982

983
	pbm->iommu = iommu;
984

985 986 987
	err = pci_sun4v_pbm_init(pbm, op, devhandle);
	if (err)
		goto out_free_iommu;
988

989
	dev_set_drvdata(&op->dev, pbm);
990

991
	return 0;
992

993 994
out_free_iommu:
	kfree(pbm->iommu);
995 996

out_free_controller:
997
	kfree(pbm);
998 999 1000

out_err:
	return err;
1001
}
1002

1003
static const struct of_device_id pci_sun4v_match[] = {
1004 1005 1006 1007 1008 1009 1010
	{
		.name = "pci",
		.compatible = "SUNW,sun4v-pci",
	},
	{},
};

1011
static struct platform_driver pci_sun4v_driver = {
1012 1013 1014 1015 1016
	.driver = {
		.name = DRIVER_NAME,
		.owner = THIS_MODULE,
		.of_match_table = pci_sun4v_match,
	},
1017 1018 1019 1020 1021
	.probe		= pci_sun4v_probe,
};

static int __init pci_sun4v_init(void)
{
1022
	return platform_driver_register(&pci_sun4v_driver);
1023 1024 1025
}

subsys_initcall(pci_sun4v_init);