pci_dma.c 13.0 KB
Newer Older
J
Jan Glauber 已提交
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * Copyright IBM Corp. 2012
 *
 * Author(s):
 *   Jan Glauber <jang@linux.vnet.ibm.com>
 */

#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/iommu-helper.h>
#include <linux/dma-mapping.h>
13
#include <linux/vmalloc.h>
J
Jan Glauber 已提交
14 15 16 17 18
#include <linux/pci.h>
#include <asm/pci_dma.h>

static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
19 20 21 22 23 24 25
static int s390_iommu_strict;

static int zpci_refresh_global(struct zpci_dev *zdev)
{
	return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
				  zdev->iommu_pages * PAGE_SIZE);
}
J
Jan Glauber 已提交
26

27
unsigned long *dma_alloc_cpu_table(void)
J
Jan Glauber 已提交
28 29 30 31 32 33 34 35
{
	unsigned long *table, *entry;

	table = kmem_cache_alloc(dma_region_table_cache, GFP_ATOMIC);
	if (!table)
		return NULL;

	for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
36
		*entry = ZPCI_TABLE_INVALID;
J
Jan Glauber 已提交
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
	return table;
}

static void dma_free_cpu_table(void *table)
{
	kmem_cache_free(dma_region_table_cache, table);
}

static unsigned long *dma_alloc_page_table(void)
{
	unsigned long *table, *entry;

	table = kmem_cache_alloc(dma_page_table_cache, GFP_ATOMIC);
	if (!table)
		return NULL;

	for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
54
		*entry = ZPCI_PTE_INVALID;
J
Jan Glauber 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
	return table;
}

static void dma_free_page_table(void *table)
{
	kmem_cache_free(dma_page_table_cache, table);
}

static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
{
	unsigned long *sto;

	if (reg_entry_isvalid(*entry))
		sto = get_rt_sto(*entry);
	else {
		sto = dma_alloc_cpu_table();
		if (!sto)
			return NULL;

		set_rt_sto(entry, sto);
		validate_rt_entry(entry);
		entry_clr_protected(entry);
	}
	return sto;
}

static unsigned long *dma_get_page_table_origin(unsigned long *entry)
{
	unsigned long *pto;

	if (reg_entry_isvalid(*entry))
		pto = get_st_pto(*entry);
	else {
		pto = dma_alloc_page_table();
		if (!pto)
			return NULL;
		set_st_pto(entry, pto);
		validate_st_entry(entry);
		entry_clr_protected(entry);
	}
	return pto;
}

98
unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
J
Jan Glauber 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116
{
	unsigned long *sto, *pto;
	unsigned int rtx, sx, px;

	rtx = calc_rtx(dma_addr);
	sto = dma_get_seg_table_origin(&rto[rtx]);
	if (!sto)
		return NULL;

	sx = calc_sx(dma_addr);
	pto = dma_get_page_table_origin(&sto[sx]);
	if (!pto)
		return NULL;

	px = calc_px(dma_addr);
	return &pto[px];
}

117
void dma_update_cpu_trans(unsigned long *entry, void *page_addr, int flags)
J
Jan Glauber 已提交
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
{
	if (flags & ZPCI_PTE_INVALID) {
		invalidate_pt_entry(entry);
	} else {
		set_pt_pfaa(entry, page_addr);
		validate_pt_entry(entry);
	}

	if (flags & ZPCI_TABLE_PROTECTED)
		entry_set_protected(entry);
	else
		entry_clr_protected(entry);
}

static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
			    dma_addr_t dma_addr, size_t size, int flags)
{
	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
	dma_addr_t start_dma_addr = dma_addr;
	unsigned long irq_flags;
139
	unsigned long *entry;
J
Jan Glauber 已提交
140 141 142 143 144 145
	int i, rc = 0;

	if (!nr_pages)
		return -EINVAL;

	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
146 147
	if (!zdev->dma_table) {
		rc = -EINVAL;
J
Jan Glauber 已提交
148
		goto no_refresh;
149
	}
J
Jan Glauber 已提交
150 151

	for (i = 0; i < nr_pages; i++) {
152 153 154 155 156 157
		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
		if (!entry) {
			rc = -ENOMEM;
			goto undo_cpu_trans;
		}
		dma_update_cpu_trans(entry, page_addr, flags);
J
Jan Glauber 已提交
158 159 160 161 162
		page_addr += PAGE_SIZE;
		dma_addr += PAGE_SIZE;
	}

	/*
163 164 165 166 167
	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
	 * translations when previously invalid translation-table entries are
	 * validated. With lazy unmap, it also is skipped for previously valid
	 * entries, but a global rpcit is then required before any address can
	 * be re-used, i.e. after each iommu bitmap wrap-around.
J
Jan Glauber 已提交
168 169
	 */
	if (!zdev->tlb_refresh &&
170 171
			(!s390_iommu_strict ||
			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
J
Jan Glauber 已提交
172
		goto no_refresh;
173

174 175
	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
				nr_pages * PAGE_SIZE);
176 177 178 179 180 181 182 183 184 185 186 187
undo_cpu_trans:
	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
		flags = ZPCI_PTE_INVALID;
		while (i-- > 0) {
			page_addr -= PAGE_SIZE;
			dma_addr -= PAGE_SIZE;
			entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
			if (!entry)
				break;
			dma_update_cpu_trans(entry, page_addr, flags);
		}
	}
J
Jan Glauber 已提交
188 189 190 191 192 193

no_refresh:
	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
	return rc;
}

194
void dma_free_seg_table(unsigned long entry)
J
Jan Glauber 已提交
195 196 197 198 199 200 201 202 203 204 205
{
	unsigned long *sto = get_rt_sto(entry);
	int sx;

	for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
		if (reg_entry_isvalid(sto[sx]))
			dma_free_page_table(get_st_pto(sto[sx]));

	dma_free_cpu_table(sto);
}

206
void dma_cleanup_tables(unsigned long *table)
J
Jan Glauber 已提交
207 208 209
{
	int rtx;

210
	if (!table)
J
Jan Glauber 已提交
211 212 213 214 215 216 217 218 219
		return;

	for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
		if (reg_entry_isvalid(table[rtx]))
			dma_free_seg_table(table[rtx]);

	dma_free_cpu_table(table);
}

220 221
static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
				       unsigned long start, int size)
J
Jan Glauber 已提交
222
{
223
	unsigned long boundary_size;
J
Jan Glauber 已提交
224

225 226
	boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1,
			      PAGE_SIZE) >> PAGE_SHIFT;
J
Jan Glauber 已提交
227 228 229 230 231 232 233
	return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
				start, size, 0, boundary_size, 0);
}

static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
{
	unsigned long offset, flags;
234
	int wrap = 0;
J
Jan Glauber 已提交
235 236 237

	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
	offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
238 239
	if (offset == -1) {
		/* wrap-around */
J
Jan Glauber 已提交
240
		offset = __dma_alloc_iommu(zdev, 0, size);
241 242
		wrap = 1;
	}
J
Jan Glauber 已提交
243 244 245

	if (offset != -1) {
		zdev->next_bit = offset + size;
246 247 248
		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
			/* global flush after wrap-around with lazy unmap */
			zpci_refresh_global(zdev);
J
Jan Glauber 已提交
249 250 251 252 253 254 255 256 257 258 259 260 261
	}
	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
	return offset;
}

static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size)
{
	unsigned long flags;

	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
	if (!zdev->iommu_bitmap)
		goto out;
	bitmap_clear(zdev->iommu_bitmap, offset, size);
262 263 264 265 266
	/*
	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
	 * until wrap-around.
	 */
	if (!s390_iommu_strict && offset >= zdev->next_bit)
J
Jan Glauber 已提交
267 268 269 270 271 272 273 274 275 276
		zdev->next_bit = offset + size;
out:
	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
}

static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
				     unsigned long offset, size_t size,
				     enum dma_data_direction direction,
				     struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
277
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
	unsigned long nr_pages, iommu_page_index;
	unsigned long pa = page_to_phys(page) + offset;
	int flags = ZPCI_PTE_VALID;
	dma_addr_t dma_addr;

	/* This rounds up number of pages based on size and offset */
	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
	iommu_page_index = dma_alloc_iommu(zdev, nr_pages);
	if (iommu_page_index == -1)
		goto out_err;

	/* Use rounded up size */
	size = nr_pages * PAGE_SIZE;

	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
S
Sebastian Ott 已提交
293
	if (dma_addr + size > zdev->end_dma)
J
Jan Glauber 已提交
294 295 296 297 298
		goto out_free;

	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
		flags |= ZPCI_TABLE_PROTECTED;

299
	if (!dma_update_trans(zdev, pa, dma_addr, size, flags)) {
300
		atomic64_add(nr_pages, &zdev->mapped_pages);
301
		return dma_addr + (offset & ~PAGE_MASK);
302
	}
J
Jan Glauber 已提交
303 304 305 306

out_free:
	dma_free_iommu(zdev, iommu_page_index, nr_pages);
out_err:
S
Sebastian Ott 已提交
307 308
	zpci_err("map error:\n");
	zpci_err_hex(&pa, sizeof(pa));
J
Jan Glauber 已提交
309 310 311 312 313 314 315
	return DMA_ERROR_CODE;
}

static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
				 size_t size, enum dma_data_direction direction,
				 struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
316
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
317 318 319 320 321 322
	unsigned long iommu_page_index;
	int npages;

	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
	dma_addr = dma_addr & PAGE_MASK;
	if (dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
323
			     ZPCI_PTE_INVALID)) {
S
Sebastian Ott 已提交
324 325 326
		zpci_err("unmap error:\n");
		zpci_err_hex(&dma_addr, sizeof(dma_addr));
	}
J
Jan Glauber 已提交
327

328
	atomic64_add(npages, &zdev->unmapped_pages);
J
Jan Glauber 已提交
329 330 331 332 333 334 335 336
	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
	dma_free_iommu(zdev, iommu_page_index, npages);
}

static void *s390_dma_alloc(struct device *dev, size_t size,
			    dma_addr_t *dma_handle, gfp_t flag,
			    struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
337
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
J
Jan Glauber 已提交
338 339 340 341 342 343 344 345
	struct page *page;
	unsigned long pa;
	dma_addr_t map;

	size = PAGE_ALIGN(size);
	page = alloc_pages(flag, get_order(size));
	if (!page)
		return NULL;
346

J
Jan Glauber 已提交
347 348 349 350 351 352 353 354 355 356
	pa = page_to_phys(page);
	memset((void *) pa, 0, size);

	map = s390_dma_map_pages(dev, page, pa % PAGE_SIZE,
				 size, DMA_BIDIRECTIONAL, NULL);
	if (dma_mapping_error(dev, map)) {
		free_pages(pa, get_order(size));
		return NULL;
	}

357
	atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
J
Jan Glauber 已提交
358 359 360 361 362 363 364 365 366
	if (dma_handle)
		*dma_handle = map;
	return (void *) pa;
}

static void s390_dma_free(struct device *dev, size_t size,
			  void *pa, dma_addr_t dma_handle,
			  struct dma_attrs *attrs)
{
S
Sebastian Ott 已提交
367
	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
368 369

	size = PAGE_ALIGN(size);
370
	atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
371
	s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
J
Jan Glauber 已提交
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
	free_pages((unsigned long) pa, get_order(size));
}

static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
			   int nr_elements, enum dma_data_direction dir,
			   struct dma_attrs *attrs)
{
	int mapped_elements = 0;
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nr_elements, i) {
		struct page *page = sg_page(s);
		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
						    s->length, dir, NULL);
		if (!dma_mapping_error(dev, s->dma_address)) {
			s->dma_length = s->length;
			mapped_elements++;
		} else
			goto unmap;
	}
out:
	return mapped_elements;

unmap:
	for_each_sg(sg, s, mapped_elements, i) {
		if (s->dma_address)
			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
					     dir, NULL);
		s->dma_address = 0;
		s->dma_length = 0;
	}
	mapped_elements = 0;
	goto out;
}

static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
			      int nr_elements, enum dma_data_direction dir,
			      struct dma_attrs *attrs)
{
	struct scatterlist *s;
	int i;

	for_each_sg(sg, s, nr_elements, i) {
		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir, NULL);
		s->dma_address = 0;
		s->dma_length = 0;
	}
}

int zpci_dma_init_device(struct zpci_dev *zdev)
{
	int rc;

426 427 428 429 430 431 432
	/*
	 * At this point, if the device is part of an IOMMU domain, this would
	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
	 */
	WARN_ON(zdev->s390_domain);

J
Jan Glauber 已提交
433 434 435 436 437 438 439 440 441 442 443
	spin_lock_init(&zdev->iommu_bitmap_lock);
	spin_lock_init(&zdev->dma_table_lock);

	zdev->dma_table = dma_alloc_cpu_table();
	if (!zdev->dma_table) {
		rc = -ENOMEM;
		goto out_clean;
	}

	zdev->iommu_size = (unsigned long) high_memory - PAGE_OFFSET;
	zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
444
	zdev->iommu_bitmap = vzalloc(zdev->iommu_pages / 8);
J
Jan Glauber 已提交
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
	if (!zdev->iommu_bitmap) {
		rc = -ENOMEM;
		goto out_reg;
	}

	rc = zpci_register_ioat(zdev,
				0,
				zdev->start_dma + PAGE_OFFSET,
				zdev->start_dma + zdev->iommu_size - 1,
				(u64) zdev->dma_table);
	if (rc)
		goto out_reg;
	return 0;

out_reg:
	dma_free_cpu_table(zdev->dma_table);
out_clean:
	return rc;
}

void zpci_dma_exit_device(struct zpci_dev *zdev)
{
467 468 469 470 471 472 473
	/*
	 * At this point, if the device is part of an IOMMU domain, this would
	 * be a strong hint towards a bug in the IOMMU API (common) code and/or
	 * simultaneous access via IOMMU and DMA API. So let's issue a warning.
	 */
	WARN_ON(zdev->s390_domain);

J
Jan Glauber 已提交
474
	zpci_unregister_ioat(zdev, 0);
475 476
	dma_cleanup_tables(zdev->dma_table);
	zdev->dma_table = NULL;
477
	vfree(zdev->iommu_bitmap);
J
Jan Glauber 已提交
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
	zdev->iommu_bitmap = NULL;
	zdev->next_bit = 0;
}

static int __init dma_alloc_cpu_table_caches(void)
{
	dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
					ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
					0, NULL);
	if (!dma_region_table_cache)
		return -ENOMEM;

	dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
					ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
					0, NULL);
	if (!dma_page_table_cache) {
		kmem_cache_destroy(dma_region_table_cache);
		return -ENOMEM;
	}
	return 0;
}

int __init zpci_dma_init(void)
{
	return dma_alloc_cpu_table_caches();
}

void zpci_dma_exit(void)
{
	kmem_cache_destroy(dma_page_table_cache);
	kmem_cache_destroy(dma_region_table_cache);
}

#define PREALLOC_DMA_DEBUG_ENTRIES	(1 << 16)

static int __init dma_debug_do_init(void)
{
	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
	return 0;
}
fs_initcall(dma_debug_do_init);

struct dma_map_ops s390_dma_ops = {
	.alloc		= s390_dma_alloc,
	.free		= s390_dma_free,
	.map_sg		= s390_dma_map_sg,
	.unmap_sg	= s390_dma_unmap_sg,
	.map_page	= s390_dma_map_pages,
	.unmap_page	= s390_dma_unmap_pages,
	/* if we support direct DMA this must be conditional */
	.is_phys	= 0,
	/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_dma_ops);
532 533 534 535 536 537 538 539 540

static int __init s390_iommu_setup(char *str)
{
	if (!strncmp(str, "strict", 6))
		s390_iommu_strict = 1;
	return 0;
}

__setup("s390_iommu=", s390_iommu_setup);