pci_dma.c 13.7 KB
Newer Older
J
Jan Glauber 已提交
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * Copyright IBM Corp. 2012
 *
 * Author(s):
 *   Jan Glauber <jang@linux.vnet.ibm.com>
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/iommu-helper.h>
#include <linux/dma-mapping.h>
13
#include <linux/vmalloc.h>
J
Jan Glauber 已提交
14 15 16 17 18
#include <linux/pci.h>
#include <asm/pci_dma.h>

static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
19 20 21 22 23 24 25
static int s390_iommu_strict;

static int zpci_refresh_global(struct zpci_dev *zdev)
{
	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
				  zdev->iommu_pages * PAGE_SIZE);
}
J
Jan Glauber 已提交
26

27
unsigned long *dma_alloc_cpu_table(void)
J
Jan Glauber 已提交
28 29 30 31 32 33 34 35
{
	unsigned long *table, *entry;

	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
	if (!table)
		return NULL;

	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
36
		*entry = ZPCI_TABLE_INVALID;
J
Jan Glauber 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
	return table;
}

static void dma_free_cpu_table(void *table)
{
	kmem_cache_free(dma_region_table_cache, table);
}

static unsigned long *dma_alloc_page_table(void)
{
	unsigned long *table, *entry;

	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
	if (!table)
		return NULL;

	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
54
		*entry = ZPCI_PTE_INVALID;
J
Jan Glauber 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
	return table;
}

static void dma_free_page_table(void *table)
{
	kmem_cache_free(dma_page_table_cache, table);
}

static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
{
	unsigned long *sto;

	if (reg_entry_isvalid(*entry))
		sto = get_rt_sto(*entry);
	else {
		sto = dma_alloc_cpu_table();
		if (!sto)
			return NULL;

		set_rt_sto(entry, sto);
		validate_rt_entry(entry);
		entry_clr_protected(entry);
	}
	return sto;
}

static unsigned long *dma_get_page_table_origin(unsigned long *entry)
{
	unsigned long *pto;

	if (reg_entry_isvalid(*entry))
		pto = get_st_pto(*entry);
	else {
		pto = dma_alloc_page_table();
		if (!pto)
			return NULL;
		set_st_pto(entry, pto);
		validate_st_entry(entry);
		entry_clr_protected(entry);
	}
	return pto;
}

98
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
J
Jan Glauber 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
{
	unsigned long *sto, *pto;
	unsigned int rtx, sx, px;

	rtx = calc_rtx(dma_addr);
	sto = dma_get_seg_table_origin(&rto[rtx]);
	if (!sto)
		return NULL;

	sx = calc_sx(dma_addr);
	pto = dma_get_page_table_origin(&sto[sx]);
	if (!pto)
		return NULL;

	px = calc_px(dma_addr);
	return &pto[px];
}

117
void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
J
Jan Glauber 已提交
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
{
	if (flags & ZPCI_PTE_INVALID) {
		invalidate_pt_entry(entry);
	} else {
		set_pt_pfaa(entry, page_addr);
		validate_pt_entry(entry);
	}

	if (flags & ZPCI_TABLE_PROTECTED)
		entry_set_protected(entry);
	else
		entry_clr_protected(entry);
}

static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
			    dma_addr_t dma_addr, size_t size, int flags)
{
	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
	dma_addr_t start_dma_addr = dma_addr;
	unsigned long irq_flags;
139
	unsigned long *entry;
J
Jan Glauber 已提交
140 141 142 143 144 145
	int i, rc = 0;

	if (!nr_pages)
		return -EINVAL;

	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
146 147
	if (!zdev->dma_table) {
		rc = -EINVAL;
J
Jan Glauber 已提交
148
		goto no_refresh;
149
	}
J
Jan Glauber 已提交
150 151

	for (i = 0; i < nr_pages; i++) {
152 153 154 155 156 157
		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
		if (!entry) {
			rc = -ENOMEM;
			goto undo_cpu_trans;
		}
		dma_update_cpu_trans(entry, page_addr, flags);
J
Jan Glauber 已提交
158 159 160 161 162
		page_addr += PAGE_SIZE;
		dma_addr += PAGE_SIZE;
	}

	/*
163 164 165 166 167
	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
	 * translations when previously invalid translation-table entries are
	 * validated. With lazy unmap, it also is skipped for previously valid
	 * entries, but a global rpcit is then required before any address can
	 * be re-used, i.e. after each iommu bitmap wrap-around.
J
Jan Glauber 已提交
168 169
	 */
	if (!zdev->tlb_refresh &&
170 171
			(!s390_iommu_strict ||
			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
J
Jan Glauber 已提交
172
		goto no_refresh;
173

174 175
	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
				nr_pages * PAGE_SIZE);
176 177 178 179 180 181 182 183 184 185 186 187
undo_cpu_trans:
	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
		flags = ZPCI_PTE_INVALID;
		while (i-- > 0) {
			page_addr -= PAGE_SIZE;
			dma_addr -= PAGE_SIZE;
			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
			if (!entry)
				break;
			dma_update_cpu_trans(entry, page_addr, flags);
		}
	}
J
Jan Glauber 已提交
188 189 190 191 192 193

no_refresh:
	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
	return rc;
}

194
void dma_free_seg_table(unsigned long entry)
J
Jan Glauber 已提交
195 196 197 198 199 200 201 202 203 204 205
{
	unsigned long *sto = get_rt_sto(entry);
	int sx;

	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
		if (reg_entry_isvalid(sto[sx]))
			dma_free_page_table(get_st_pto(sto[sx]));

	dma_free_cpu_table(sto);
}

206
void dma_cleanup_tables(unsigned long *table)
J
Jan Glauber 已提交
207 208 209
{
	int rtx;

210
	if (!table)
J
Jan Glauber 已提交
211 212 213 214 215 216 217 218 219
		return;

	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
		if (reg_entry_isvalid(table[rtx]))
			dma_free_seg_table(table[rtx]);

	dma_free_cpu_table(table);
}

220 221
static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
				       unsigned long start, int size)
J
Jan Glauber 已提交
222
{
223
	unsigned long boundary_size;
J
Jan Glauber 已提交
224

225 226
	boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
			      PAGE_SIZE) >> PAGE_SHIFT;
J
Jan Glauber 已提交
227 228 229 230 231 232 233
	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
				start, size, 0, boundary_size, 0);
}

static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
{
	unsigned long offset, flags;
234
	int wrap = 0;
J
Jan Glauber 已提交
235 236 237

	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
	offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
238 239
	if (offset == -1) {
		/* wrap-around */
J
Jan Glauber 已提交
240
		offset = __dma_alloc_iommu(zdev, 0, size);
241 242
		wrap = 1;
	}
J
Jan Glauber 已提交
243 244 245

	if (offset != -1) {
		zdev->next_bit = offset + size;
246 247 248
		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
			/* global flush after wrap-around with lazy unmap */
			zpci_refresh_global(zdev);
J
Jan Glauber 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261
	}
	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
	return offset;
}

static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
{
	unsigned long flags;

	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
	if (!zdev->iommu_bitmap)
		goto out;
	bitmap_clear(zdev->iommu_bitmap, offset, size);
262 263 264 265 266
	/*
	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
	 * until wrap-around.
	 */
	if (!s390_iommu_strict && offset >= zdev->next_bit)
J
Jan Glauber 已提交
267 268 269 270 271
		zdev->next_bit = offset + size;
out:
	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
}

272 273 274 275 276 277 278 279 280 281
static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
{
	struct {
		unsigned long rc;
		unsigned long addr;
	} __packed data = {rc, addr};

	zpci_err_hex(&data, sizeof(data));
}

J
Jan Glauber 已提交
282 283 284 285 286
static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
				     unsigned long offset, size_t size,
				     enum dma_data_direction direction,
				     struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
287
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
288 289 290 291
	unsigned long nr_pages, iommu_page_index;
	unsigned long pa = page_to_phys(page) + offset;
	int flags = ZPCI_PTE_VALID;
	dma_addr_t dma_addr;
292
	int ret;
J
Jan Glauber 已提交
293 294 295 296

	/* This rounds up number of pages based on size and offset */
	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
	iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
297 298
	if (iommu_page_index == -1) {
		ret = -ENOSPC;
J
Jan Glauber 已提交
299
		goto out_err;
300
	}
J
Jan Glauber 已提交
301 302 303 304 305

	/* Use rounded up size */
	size = nr_pages * PAGE_SIZE;

	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
306 307
	if (dma_addr + size > zdev->end_dma) {
		ret = -ERANGE;
J
Jan Glauber 已提交
308
		goto out_free;
309
	}
J
Jan Glauber 已提交
310 311 312 313

	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
		flags |= ZPCI_TABLE_PROTECTED;

314 315 316 317 318 319
	ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
	if (ret)
		goto out_free;

	atomic64_add(nr_pages, &zdev->mapped_pages);
	return dma_addr + (offset & ~PAGE_MASK);
J
Jan Glauber 已提交
320 321 322 323

out_free:
	dma_free_iommu(zdev, iommu_page_index, nr_pages);
out_err:
S
Sebastian Ott 已提交
324
	zpci_err("map error:\n");
325
	zpci_err_dma(ret, pa);
J
Jan Glauber 已提交
326 327 328 329 330 331 332
	return DMA_ERROR_CODE;
}

static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
				 size_t size, enum dma_data_direction direction,
				 struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
333
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
334
	unsigned long iommu_page_index;
335
	int npages, ret;
J
Jan Glauber 已提交
336 337 338

	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
	dma_addr = dma_addr & PAGE_MASK;
339 340 341
	ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
			       ZPCI_PTE_INVALID);
	if (ret) {
S
Sebastian Ott 已提交
342
		zpci_err("unmap error:\n");
343 344
		zpci_err_dma(ret, dma_addr);
		return;
S
Sebastian Ott 已提交
345
	}
J
Jan Glauber 已提交
346

347
	atomic64_add(npages, &zdev->unmapped_pages);
J
Jan Glauber 已提交
348 349 350 351 352 353 354 355
	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
	dma_free_iommu(zdev, iommu_page_index, npages);
}

static void *s390_dma_alloc(struct device *dev, size_t size,
			    dma_addr_t *dma_handle, gfp_t flag,
			    struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
356
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
357 358 359 360 361 362 363 364
	struct page *page;
	unsigned long pa;
	dma_addr_t map;

	size = PAGE_ALIGN(size);
	page = alloc_pages(flag, get_order(size));
	if (!page)
		return NULL;
365

J
Jan Glauber 已提交
366 367 368
	pa = page_to_phys(page);
	memset((void *) pa, 0, size);

369
	map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, NULL);
J
Jan Glauber 已提交
370 371 372 373 374
	if (dma_mapping_error(dev, map)) {
		free_pages(pa, get_order(size));
		return NULL;
	}

375
	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
J
Jan Glauber 已提交
376 377 378 379 380 381 382 383 384
	if (dma_handle)
		*dma_handle = map;
	return (void *) pa;
}

static void s390_dma_free(struct device *dev, size_t size,
			  void *pa, dma_addr_t dma_handle,
			  struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
385
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
386 387

	size = PAGE_ALIGN(size);
388
	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
389
	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
J
Jan Glauber 已提交
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
	free_pages((unsigned long) pa, get_order(size));
}

static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
			   int nr_elements, enum dma_data_direction dir,
			   struct dma_attrs *attrs)
{
	int mapped_elements = 0;
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nr_elements, i) {
		struct page *page = sg_page(s);
		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
						    s->length, dir, NULL);
		if (!dma_mapping_error(dev, s->dma_address)) {
			s->dma_length = s->length;
			mapped_elements++;
		} else
			goto unmap;
	}
out:
	return mapped_elements;

unmap:
	for_each_sg(sg, s, mapped_elements, i) {
		if (s->dma_address)
			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
					     dir, NULL);
		s->dma_address = 0;
		s->dma_length = 0;
	}
	mapped_elements = 0;
	goto out;
}

static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
			      int nr_elements, enum dma_data_direction dir,
			      struct dma_attrs *attrs)
{
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nr_elements, i) {
		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
		s->dma_address = 0;
		s->dma_length = 0;
	}
}

int zpci_dma_init_device(struct zpci_dev *zdev)
{
	int rc;

444 445 446 447 448 449 450
	/*
	 * At this point, if the device is part of an IOMMU domain, this would
	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
	 */
	WARN_ON(zdev->s390_domain);

J
Jan Glauber 已提交
451 452 453 454 455 456 457 458 459
	spin_lock_init(&zdev->iommu_bitmap_lock);
	spin_lock_init(&zdev->dma_table_lock);

	zdev->dma_table = dma_alloc_cpu_table();
	if (!zdev->dma_table) {
		rc = -ENOMEM;
		goto out_clean;
	}

460 461 462 463 464 465 466 467 468 469 470 471 472
	/*
	 * Restrict the iommu bitmap size to the minimum of the following:
	 * - main memory size
	 * - 3-level pagetable address limit minus start_dma offset
	 * - DMA address range allowed by the hardware (clp query pci fn)
	 *
	 * Also set zdev->end_dma to the actual end address of the usable
	 * range, instead of the theoretical maximum as reported by hardware.
	 */
	zdev->iommu_size = min3((u64) high_memory,
				ZPCI_TABLE_SIZE_RT - zdev->start_dma,
				zdev->end_dma - zdev->start_dma + 1);
	zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
J
Jan Glauber 已提交
473
	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
474
	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
J
Jan Glauber 已提交
475 476 477 478 479
	if (!zdev->iommu_bitmap) {
		rc = -ENOMEM;
		goto out_reg;
	}

480
	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
J
Jan Glauber 已提交
481 482 483 484 485 486 487 488 489 490 491 492 493
				(u64) zdev->dma_table);
	if (rc)
		goto out_reg;
	return 0;

out_reg:
	dma_free_cpu_table(zdev->dma_table);
out_clean:
	return rc;
}

void zpci_dma_exit_device(struct zpci_dev *zdev)
{
494 495 496 497 498 499 500
	/*
	 * At this point, if the device is part of an IOMMU domain, this would
	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
	 */
	WARN_ON(zdev->s390_domain);

J
Jan Glauber 已提交
501
	zpci_unregister_ioat(zdev, 0);
502 503
	dma_cleanup_tables(zdev->dma_table);
	zdev->dma_table = NULL;
504
	vfree(zdev->iommu_bitmap);
J
Jan Glauber 已提交
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
	zdev->iommu_bitmap = NULL;
	zdev->next_bit = 0;
}

static int __init dma_alloc_cpu_table_caches(void)
{
	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
					0, NULL);
	if (!dma_region_table_cache)
		return -ENOMEM;

	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
					0, NULL);
	if (!dma_page_table_cache) {
		kmem_cache_destroy(dma_region_table_cache);
		return -ENOMEM;
	}
	return 0;
}

int __init zpci_dma_init(void)
{
	return dma_alloc_cpu_table_caches();
}

void zpci_dma_exit(void)
{
	kmem_cache_destroy(dma_page_table_cache);
	kmem_cache_destroy(dma_region_table_cache);
}

#define PREALLOC_DMA_DEBUG_ENTRIES	(1 << 16)

static int __init dma_debug_do_init(void)
{
	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
	return 0;
}
fs_initcall(dma_debug_do_init);

struct dma_map_ops s390_dma_ops = {
	.alloc		= s390_dma_alloc,
	.free		= s390_dma_free,
	.map_sg		= s390_dma_map_sg,
	.unmap_sg	= s390_dma_unmap_sg,
	.map_page	= s390_dma_map_pages,
	.unmap_page	= s390_dma_unmap_pages,
	/* if we support direct DMA this must be conditional */
	.is_phys	= 0,
	/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_dma_ops);
559 560 561 562 563 564 565 566 567

static int __init s390_iommu_setup(char *str)
{
	if (!strncmp(str, "strict", 6))
		s390_iommu_strict = 1;
	return 0;
}

__setup("s390_iommu=", s390_iommu_setup);