gk20a.c 16.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

23 24 25
/*
 * GK20A does not have dedicated video memory, and to accurately represent this
 * fact Nouveau will not create a RAM device for it. Therefore its instmem
26 27
 * implementation must be done directly on top of system memory, while
 * preserving coherency for read and write operations.
28 29
 *
 * Instmem can be allocated through two means:
30
 * 1) If an IOMMU unit has been probed, the IOMMU API is used to make memory
31
 *    pages contiguous to the GPU. This is the preferred way.
32
 * 2) If no IOMMU unit is probed, the DMA API is used to allocate physically
33 34
 *    contiguous memory.
 *
35 36 37 38 39 40 41 42
 * In both cases CPU read and writes are performed by creating a write-combined
 * mapping. The GPU L2 cache must thus be flushed/invalidated when required. To
 * be conservative we do this every time we acquire or release an instobj, but
 * ideally L2 management should be handled at a higher level.
 *
 * To improve performance, CPU mappings are not removed upon instobj release.
 * Instead they are placed into a LRU list to be recycled when the mapped space
 * goes beyond a certain threshold. At the moment this limit is 1MB.
43
 */
44
#include "priv.h"
45

46
#include <core/memory.h>
47
#include <core/mm.h>
48
#include <core/tegra.h>
49
#include <subdev/fb.h>
50
#include <subdev/ltc.h>
51

B
Ben Skeggs 已提交
52
struct gk20a_instobj {
53 54
	struct nvkm_memory memory;
	struct nvkm_mem mem;
55 56 57 58 59
	struct gk20a_instmem *imem;

	/* CPU mapping */
	u32 *vaddr;
	struct list_head vaddr_node;
60
};
61
#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
62 63 64 65 66

/*
 * Used for objects allocated using the DMA API
 */
struct gk20a_instobj_dma {
B
Ben Skeggs 已提交
67
	struct gk20a_instobj base;
68

69
	u32 *cpuaddr;
70 71 72
	dma_addr_t handle;
	struct nvkm_mm_node r;
};
73 74
#define gk20a_instobj_dma(p) \
	container_of(gk20a_instobj(p), struct gk20a_instobj_dma, base)
75

76 77 78 79
/*
 * Used for objects flattened using the IOMMU API
 */
struct gk20a_instobj_iommu {
B
Ben Skeggs 已提交
80
	struct gk20a_instobj base;
81

82 83 84
	/* will point to the higher half of pages */
	dma_addr_t *dma_addrs;
	/* array of base.mem->size pages (+ dma_addr_ts) */
85 86
	struct page *pages[];
};
87 88
#define gk20a_instobj_iommu(p) \
	container_of(gk20a_instobj(p), struct gk20a_instobj_iommu, base)
89

B
Ben Skeggs 已提交
90
struct gk20a_instmem {
91
	struct nvkm_instmem base;
92 93

	/* protects vaddr_* and gk20a_instobj::vaddr* */
94
	spinlock_t lock;
95 96 97 98 99

	/* CPU mappings LRU */
	unsigned int vaddr_use;
	unsigned int vaddr_max;
	struct list_head vaddr_lru;
100 101 102 103 104 105 106 107

	/* Only used if IOMMU if present */
	struct mutex *mm_mutex;
	struct nvkm_mm *mm;
	struct iommu_domain *domain;
	unsigned long iommu_pgshift;

	/* Only used by DMA API */
108
	struct dma_attrs attrs;
109 110

	void __iomem * (*cpu_map)(struct nvkm_memory *);
111
};
112
#define gk20a_instmem(p) container_of((p), struct gk20a_instmem, base)
113

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
static enum nvkm_memory_target
gk20a_instobj_target(struct nvkm_memory *memory)
{
	return NVKM_MEM_TARGET_HOST;
}

static u64
gk20a_instobj_addr(struct nvkm_memory *memory)
{
	return gk20a_instobj(memory)->mem.offset;
}

static u64
gk20a_instobj_size(struct nvkm_memory *memory)
{
	return (u64)gk20a_instobj(memory)->mem.size << 12;
}

132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
static void __iomem *
gk20a_instobj_cpu_map_dma(struct nvkm_memory *memory)
{
	struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory);
	struct device *dev = node->base.imem->base.subdev.device->dev;
	int npages = nvkm_memory_size(memory) >> 12;
	struct page *pages[npages];
	int i;

	/* phys_to_page does not exist on all platforms... */
	pages[0] = pfn_to_page(dma_to_phys(dev, node->handle) >> PAGE_SHIFT);
	for (i = 1; i < npages; i++)
		pages[i] = pages[0] + i;

	return vmap(pages, npages, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
}

static void __iomem *
gk20a_instobj_cpu_map_iommu(struct nvkm_memory *memory)
{
	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
	int npages = nvkm_memory_size(memory) >> 12;

	return vmap(node->pages, npages, VM_MAP,
		    pgprot_writecombine(PAGE_KERNEL));
}

/*
 * Must be called while holding gk20a_instmem_lock
 */
static void
gk20a_instmem_vaddr_gc(struct gk20a_instmem *imem, const u64 size)
{
	while (imem->vaddr_use + size > imem->vaddr_max) {
		struct gk20a_instobj *obj;

		/* no candidate that can be unmapped, abort... */
		if (list_empty(&imem->vaddr_lru))
			break;

		obj = list_first_entry(&imem->vaddr_lru, struct gk20a_instobj,
				       vaddr_node);
		list_del(&obj->vaddr_node);
		vunmap(obj->vaddr);
		obj->vaddr = NULL;
		imem->vaddr_use -= nvkm_memory_size(&obj->memory);
		nvkm_debug(&imem->base.subdev, "(GC) vaddr used: %x/%x\n",
			   imem->vaddr_use, imem->vaddr_max);

	}
}

184 185 186
static void __iomem *
gk20a_instobj_acquire(struct nvkm_memory *memory)
{
187 188 189 190
	struct gk20a_instobj *node = gk20a_instobj(memory);
	struct gk20a_instmem *imem = node->imem;
	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
	const u64 size = nvkm_memory_size(memory);
191
	unsigned long flags;
192 193 194

	nvkm_ltc_flush(ltc);

195
	spin_lock_irqsave(&imem->lock, flags);
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

	if (node->vaddr) {
		/* remove us from the LRU list since we cannot be unmapped */
		list_del(&node->vaddr_node);

		goto out;
	}

	/* try to free some address space if we reached the limit */
	gk20a_instmem_vaddr_gc(imem, size);

	node->vaddr = imem->cpu_map(memory);

	if (!node->vaddr) {
		nvkm_error(&imem->base.subdev, "cannot map instobj - "
			   "this is not going to end well...\n");
		goto out;
	}

	imem->vaddr_use += size;
	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n",
		   imem->vaddr_use, imem->vaddr_max);

out:
	spin_unlock_irqrestore(&imem->lock, flags);

	return node->vaddr;
223 224 225 226 227
}

static void
gk20a_instobj_release(struct nvkm_memory *memory)
{
228 229 230 231
	struct gk20a_instobj *node = gk20a_instobj(memory);
	struct gk20a_instmem *imem = node->imem;
	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
	unsigned long flags;
232

233 234 235 236 237 238 239 240 241 242
	spin_lock_irqsave(&imem->lock, flags);

	/* add ourselves to the LRU list so our CPU mapping can be freed */
	list_add_tail(&node->vaddr_node, &imem->vaddr_lru);

	spin_unlock_irqrestore(&imem->lock, flags);

	wmb();
	nvkm_ltc_invalidate(ltc);
}
243

244
static u32
245
gk20a_instobj_rd32(struct nvkm_memory *memory, u64 offset)
246
{
247
	struct gk20a_instobj *node = gk20a_instobj(memory);
248 249

	return node->vaddr[offset / 4];
250 251 252
}

static void
253
gk20a_instobj_wr32(struct nvkm_memory *memory, u64 offset, u32 data)
254
{
255
	struct gk20a_instobj *node = gk20a_instobj(memory);
256

257
	node->vaddr[offset / 4] = data;
258 259 260 261 262 263
}

static void
gk20a_instobj_map(struct nvkm_memory *memory, struct nvkm_vma *vma, u64 offset)
{
	struct gk20a_instobj *node = gk20a_instobj(memory);
264

265
	nvkm_vm_map_at(vma, offset, &node->mem);
266 267
}

268 269 270
/*
 * Clear the CPU mapping of an instobj if it exists
 */
271
static void
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
gk20a_instobj_dtor(struct gk20a_instobj *node)
{
	struct gk20a_instmem *imem = node->imem;
	struct gk20a_instobj *obj;
	unsigned long flags;

	spin_lock_irqsave(&imem->lock, flags);

	if (!node->vaddr)
		goto out;

	list_for_each_entry(obj, &imem->vaddr_lru, vaddr_node) {
		if (obj == node) {
			list_del(&obj->vaddr_node);
			break;
		}
	}
	vunmap(node->vaddr);
	node->vaddr = NULL;
	imem->vaddr_use -= nvkm_memory_size(&node->memory);
	nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n",
		   imem->vaddr_use, imem->vaddr_max);

out:
	spin_unlock_irqrestore(&imem->lock, flags);
}

static void *
gk20a_instobj_dtor_dma(struct nvkm_memory *memory)
301
{
302 303
	struct gk20a_instobj_dma *node = gk20a_instobj_dma(memory);
	struct gk20a_instmem *imem = node->base.imem;
304
	struct device *dev = imem->base.subdev.device->dev;
305

306 307
	gk20a_instobj_dtor(&node->base);

308
	if (unlikely(!node->cpuaddr))
309
		goto out;
310

311
	dma_free_attrs(dev, node->base.mem.size << PAGE_SHIFT, node->cpuaddr,
B
Ben Skeggs 已提交
312
		       node->handle, &imem->attrs);
313 314 315

out:
	return node;
316 317
}

318 319
static void *
gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
320
{
321 322 323
	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
	struct gk20a_instmem *imem = node->base.imem;
	struct device *dev = imem->base.subdev.device->dev;
324 325 326
	struct nvkm_mm_node *r;
	int i;

327
	gk20a_instobj_dtor(&node->base);
328

329 330 331 332
	if (unlikely(list_empty(&node->base.mem.regions)))
		goto out;

	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
333 334 335
			     rl_entry);

	/* clear bit 34 to unmap pages */
B
Ben Skeggs 已提交
336
	r->offset &= ~BIT(34 - imem->iommu_pgshift);
337 338

	/* Unmap pages from GPU address space and free them */
339
	for (i = 0; i < node->base.mem.size; i++) {
B
Ben Skeggs 已提交
340 341
		iommu_unmap(imem->domain,
			    (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
342 343
		dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE,
			       DMA_BIDIRECTIONAL);
344 345 346 347
		__free_page(node->pages[i]);
	}

	/* Release area from GPU address space */
B
Ben Skeggs 已提交
348 349 350
	mutex_lock(imem->mm_mutex);
	nvkm_mm_free(imem->mm, &r);
	mutex_unlock(imem->mm_mutex);
351

352
out:
353
	return node;
354 355
}

356
static const struct nvkm_memory_func
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
gk20a_instobj_func_dma = {
	.dtor = gk20a_instobj_dtor_dma,
	.target = gk20a_instobj_target,
	.addr = gk20a_instobj_addr,
	.size = gk20a_instobj_size,
	.acquire = gk20a_instobj_acquire,
	.release = gk20a_instobj_release,
	.rd32 = gk20a_instobj_rd32,
	.wr32 = gk20a_instobj_wr32,
	.map = gk20a_instobj_map,
};

static const struct nvkm_memory_func
gk20a_instobj_func_iommu = {
	.dtor = gk20a_instobj_dtor_iommu,
372 373 374 375 376 377 378 379 380 381
	.target = gk20a_instobj_target,
	.addr = gk20a_instobj_addr,
	.size = gk20a_instobj_size,
	.acquire = gk20a_instobj_acquire,
	.release = gk20a_instobj_release,
	.rd32 = gk20a_instobj_rd32,
	.wr32 = gk20a_instobj_wr32,
	.map = gk20a_instobj_map,
};

382
static int
383
gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
B
Ben Skeggs 已提交
384
		       struct gk20a_instobj **_node)
385
{
386
	struct gk20a_instobj_dma *node;
387
	struct nvkm_subdev *subdev = &imem->base.subdev;
388
	struct device *dev = subdev->device->dev;
389

390 391
	if (!(node = kzalloc(sizeof(*node), GFP_KERNEL)))
		return -ENOMEM;
392
	*_node = &node->base;
393

394 395
	nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);

396 397
	node->cpuaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
					&node->handle, GFP_KERNEL,
B
Ben Skeggs 已提交
398
					&imem->attrs);
399
	if (!node->cpuaddr) {
400
		nvkm_error(subdev, "cannot allocate DMA memory\n");
401 402 403 404 405
		return -ENOMEM;
	}

	/* alignment check */
	if (unlikely(node->handle & (align - 1)))
406 407 408
		nvkm_warn(subdev,
			  "memory not aligned as requested: %pad (0x%x)\n",
			  &node->handle, align);
409

410 411 412 413 414
	/* present memory for being mapped using small pages */
	node->r.type = 12;
	node->r.offset = node->handle >> 12;
	node->r.length = (npages << PAGE_SHIFT) >> 12;

415
	node->base.mem.offset = node->handle;
416

417 418
	INIT_LIST_HEAD(&node->base.mem.regions);
	list_add_tail(&node->r.rl_entry, &node->base.mem.regions);
419 420 421 422 423

	return 0;
}

static int
424
gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
B
Ben Skeggs 已提交
425
			 struct gk20a_instobj **_node)
426 427
{
	struct gk20a_instobj_iommu *node;
428
	struct nvkm_subdev *subdev = &imem->base.subdev;
429
	struct device *dev = subdev->device->dev;
430 431 432 433
	struct nvkm_mm_node *r;
	int ret;
	int i;

434 435 436 437 438 439
	/*
	 * despite their variable size, instmem allocations are small enough
	 * (< 1 page) to be handled by kzalloc
	 */
	if (!(node = kzalloc(sizeof(*node) + ((sizeof(node->pages[0]) +
			     sizeof(*node->dma_addrs)) * npages), GFP_KERNEL)))
440
		return -ENOMEM;
441
	*_node = &node->base;
442 443 444
	node->dma_addrs = (void *)(node->pages + npages);

	nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory);
445 446 447 448

	/* Allocate backing memory */
	for (i = 0; i < npages; i++) {
		struct page *p = alloc_page(GFP_KERNEL);
449
		dma_addr_t dma_adr;
450 451 452 453 454 455

		if (p == NULL) {
			ret = -ENOMEM;
			goto free_pages;
		}
		node->pages[i] = p;
456 457 458 459 460 461 462
		dma_adr = dma_map_page(dev, p, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
		if (dma_mapping_error(dev, dma_adr)) {
			nvkm_error(subdev, "DMA mapping error!\n");
			ret = -ENOMEM;
			goto free_pages;
		}
		node->dma_addrs[i] = dma_adr;
463 464
	}

B
Ben Skeggs 已提交
465
	mutex_lock(imem->mm_mutex);
466
	/* Reserve area from GPU address space */
B
Ben Skeggs 已提交
467 468 469
	ret = nvkm_mm_head(imem->mm, 0, 1, npages, npages,
			   align >> imem->iommu_pgshift, &r);
	mutex_unlock(imem->mm_mutex);
470
	if (ret) {
471
		nvkm_error(subdev, "IOMMU space is full!\n");
472 473 474 475 476
		goto free_pages;
	}

	/* Map into GPU address space */
	for (i = 0; i < npages; i++) {
B
Ben Skeggs 已提交
477
		u32 offset = (r->offset + i) << imem->iommu_pgshift;
478

479
		ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
480 481
				PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
		if (ret < 0) {
482
			nvkm_error(subdev, "IOMMU mapping failure: %d\n", ret);
483 484 485

			while (i-- > 0) {
				offset -= PAGE_SIZE;
B
Ben Skeggs 已提交
486
				iommu_unmap(imem->domain, offset, PAGE_SIZE);
487 488 489 490 491 492
			}
			goto release_area;
		}
	}

	/* Bit 34 tells that an address is to be resolved through the IOMMU */
B
Ben Skeggs 已提交
493
	r->offset |= BIT(34 - imem->iommu_pgshift);
494

495
	node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift;
496

497 498
	INIT_LIST_HEAD(&node->base.mem.regions);
	list_add_tail(&r->rl_entry, &node->base.mem.regions);
499 500 501 502

	return 0;

release_area:
B
Ben Skeggs 已提交
503 504 505
	mutex_lock(imem->mm_mutex);
	nvkm_mm_free(imem->mm, &r);
	mutex_unlock(imem->mm_mutex);
506 507

free_pages:
508 509 510 511 512
	for (i = 0; i < npages && node->pages[i] != NULL; i++) {
		dma_addr_t dma_addr = node->dma_addrs[i];
		if (dma_addr)
			dma_unmap_page(dev, dma_addr, PAGE_SIZE,
				       DMA_BIDIRECTIONAL);
513
		__free_page(node->pages[i]);
514
	}
515 516 517 518 519

	return ret;
}

static int
520 521
gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
		  struct nvkm_memory **pmemory)
522
{
523
	struct gk20a_instmem *imem = gk20a_instmem(base);
524
	struct nvkm_subdev *subdev = &imem->base.subdev;
525
	struct gk20a_instobj *node = NULL;
526 527
	int ret;

528
	nvkm_debug(subdev, "%s (%s): size: %x align: %x\n", __func__,
529
		   imem->domain ? "IOMMU" : "DMA", size, align);
530 531

	/* Round size and align to page bounds */
532 533
	size = max(roundup(size, PAGE_SIZE), PAGE_SIZE);
	align = max(roundup(align, PAGE_SIZE), PAGE_SIZE);
534

B
Ben Skeggs 已提交
535
	if (imem->domain)
536 537
		ret = gk20a_instobj_ctor_iommu(imem, size >> PAGE_SHIFT,
					       align, &node);
538
	else
539 540
		ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT,
					     align, &node);
541
	*pmemory = node ? &node->memory : NULL;
542 543 544
	if (ret)
		return ret;

545
	node->imem = imem;
546 547

	/* present memory for being mapped using small pages */
548 549 550
	node->mem.size = size >> 12;
	node->mem.memtype = 0;
	node->mem.page_shift = 12;
551

552
	nvkm_debug(subdev, "alloc size: 0x%x, align: 0x%x, gaddr: 0x%llx\n",
553
		   size, align, node->mem.offset);
554 555 556 557

	return 0;
}

558 559
static void *
gk20a_instmem_dtor(struct nvkm_instmem *base)
560
{
561 562 563 564 565 566 567 568 569 570 571
	struct gk20a_instmem *imem = gk20a_instmem(base);

	/* perform some sanity checks... */
	if (!list_empty(&imem->vaddr_lru))
		nvkm_warn(&base->subdev, "instobj LRU not empty!\n");

	if (imem->vaddr_use != 0)
		nvkm_warn(&base->subdev, "instobj vmap area not empty! "
			  "0x%x bytes still mapped\n", imem->vaddr_use);

	return imem;
572 573
}

574 575
static const struct nvkm_instmem_func
gk20a_instmem = {
576
	.dtor = gk20a_instmem_dtor,
577 578 579 580 581 582 583
	.memory_new = gk20a_instobj_new,
	.persistent = true,
	.zero = false,
};

int
gk20a_instmem_new(struct nvkm_device *device, int index,
584
		  struct nvkm_instmem **pimem)
585
{
586
	struct nvkm_device_tegra *tdev = device->func->tegra(device);
B
Ben Skeggs 已提交
587
	struct gk20a_instmem *imem;
588

589 590 591
	if (!(imem = kzalloc(sizeof(*imem), GFP_KERNEL)))
		return -ENOMEM;
	nvkm_instmem_ctor(&gk20a_instmem, device, index, &imem->base);
B
Ben Skeggs 已提交
592
	spin_lock_init(&imem->lock);
593
	*pimem = &imem->base;
594

595 596 597 598 599
	/* do not allow more than 1MB of CPU-mapped instmem */
	imem->vaddr_use = 0;
	imem->vaddr_max = 0x100000;
	INIT_LIST_HEAD(&imem->vaddr_lru);

600
	if (tdev->iommu.domain) {
601
		imem->mm_mutex = &tdev->iommu.mutex;
602
		imem->mm = &tdev->iommu.mm;
603
		imem->domain = tdev->iommu.domain;
604
		imem->iommu_pgshift = tdev->iommu.pgshift;
605
		imem->cpu_map = gk20a_instobj_cpu_map_iommu;
606

607
		nvkm_info(&imem->base.subdev, "using IOMMU\n");
608
	} else {
B
Ben Skeggs 已提交
609
		init_dma_attrs(&imem->attrs);
610
		/* We will access the memory through our own mapping */
B
Ben Skeggs 已提交
611 612 613 614
		dma_set_attr(DMA_ATTR_NON_CONSISTENT, &imem->attrs);
		dma_set_attr(DMA_ATTR_WEAK_ORDERING, &imem->attrs);
		dma_set_attr(DMA_ATTR_WRITE_COMBINE, &imem->attrs);
		dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &imem->attrs);
615
		imem->cpu_map = gk20a_instobj_cpu_map_dma;
616

617
		nvkm_info(&imem->base.subdev, "using DMA API\n");
618
	}
619

620 621
	return 0;
}