amdgpu_ttm.c 60.5 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
32

33
#include <linux/dma-mapping.h>
34 35 36
#include <linux/iommu.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
37
#include <linux/sched/mm.h>
38 39 40 41
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swiotlb.h>
42
#include <linux/dma-buf.h>
43
#include <linux/sizes.h>
44

45 46 47
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
48
#include <drm/ttm/ttm_range_manager.h>
49

A
Alex Deucher 已提交
50
#include <drm/amdgpu_drm.h>
51

A
Alex Deucher 已提交
52
#include "amdgpu.h"
53
#include "amdgpu_object.h"
54
#include "amdgpu_trace.h"
55
#include "amdgpu_amdkfd.h"
56
#include "amdgpu_sdma.h"
57
#include "amdgpu_ras.h"
58
#include "amdgpu_atomfirmware.h"
59
#include "amdgpu_res_cursor.h"
A
Alex Deucher 已提交
60 61
#include "bif/bif_4_1_d.h"

62 63
#define AMDGPU_TTM_VRAM_MAX_DW_READ	(size_t)128

64
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
65 66
				   struct ttm_tt *ttm,
				   struct ttm_resource *bo_mem);
67
static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
68
				      struct ttm_tt *ttm);
69

70
static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
71
				    unsigned int type,
72
				    uint64_t size_in_page)
73
{
74
	return ttm_range_man_init(&adev->mman.bdev, type,
75
				  false, size_in_page);
A
Alex Deucher 已提交
76 77
}

78 79 80 81 82 83 84 85
/**
 * amdgpu_evict_flags - Compute placement flags
 *
 * @bo: The buffer object to evict
 * @placement: Possible destination(s) for evicted BO
 *
 * Fill in placement data when ttm_bo_evict() is called
 */
A
Alex Deucher 已提交
86 87 88
static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
				struct ttm_placement *placement)
{
89
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
90
	struct amdgpu_bo *abo;
91
	static const struct ttm_place placements = {
A
Alex Deucher 已提交
92 93
		.fpfn = 0,
		.lpfn = 0,
94
		.mem_type = TTM_PL_SYSTEM,
95
		.flags = 0
A
Alex Deucher 已提交
96 97
	};

98
	/* Don't handle scatter gather BOs */
99 100 101 102 103 104
	if (bo->type == ttm_bo_type_sg) {
		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		return;
	}

105
	/* Object isn't an AMDGPU object so ignore */
106
	if (!amdgpu_bo_is_amdgpu_bo(bo)) {
A
Alex Deucher 已提交
107 108 109 110 111 112
		placement->placement = &placements;
		placement->busy_placement = &placements;
		placement->num_placement = 1;
		placement->num_busy_placement = 1;
		return;
	}
113

114
	abo = ttm_to_amdgpu_bo(bo);
115 116 117 118 119 120 121 122 123 124 125 126 127 128
	if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
		struct dma_fence *fence;
		struct dma_resv *resv = &bo->base._resv;

		rcu_read_lock();
		fence = rcu_dereference(resv->fence_excl);
		if (fence && !fence->ops->signaled)
			dma_fence_enable_sw_signaling(fence);

		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		rcu_read_unlock();
		return;
	}
129 130

	switch (bo->resource->mem_type) {
131 132 133 134 135 136 137
	case AMDGPU_PL_GDS:
	case AMDGPU_PL_GWS:
	case AMDGPU_PL_OA:
		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		return;

A
Alex Deucher 已提交
138
	case TTM_PL_VRAM:
139
		if (!adev->mman.buffer_funcs_enabled) {
140
			/* Move to system memory */
141
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
142
		} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
143 144
			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
			   amdgpu_bo_in_cpu_visible_vram(abo)) {
145 146 147 148 149 150

			/* Try evicting to the CPU inaccessible part of VRAM
			 * first, but only set GTT as busy placement, so this
			 * BO will be evicted to GTT rather than causing other
			 * BOs to be evicted from VRAM
			 */
151
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
152
							 AMDGPU_GEM_DOMAIN_GTT);
153
			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
154 155 156
			abo->placements[0].lpfn = 0;
			abo->placement.busy_placement = &abo->placements[1];
			abo->placement.num_busy_placement = 1;
157
		} else {
158
			/* Move to GTT memory */
159
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
160
		}
A
Alex Deucher 已提交
161 162
		break;
	case TTM_PL_TT:
163
	case AMDGPU_PL_PREEMPT:
A
Alex Deucher 已提交
164
	default:
165
		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
166
		break;
A
Alex Deucher 已提交
167
	}
168
	*placement = abo->placement;
A
Alex Deucher 已提交
169 170
}

171 172 173 174
/**
 * amdgpu_ttm_map_buffer - Map memory into the GART windows
 * @bo: buffer object to map
 * @mem: memory object to map
175
 * @mm_cur: range to map
176 177 178 179 180 181 182 183 184 185
 * @num_pages: number of pages to map
 * @window: which GART window to use
 * @ring: DMA ring to use for the copy
 * @tmz: if we should setup a TMZ enabled mapping
 * @addr: resulting address inside the MC address space
 *
 * Setup one of the GART windows to access a specific piece of memory or return
 * the physical address for local memory.
 */
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
186
				 struct ttm_resource *mem,
187 188 189 190
				 struct amdgpu_res_cursor *mm_cur,
				 unsigned num_pages, unsigned window,
				 struct amdgpu_ring *ring, bool tmz,
				 uint64_t *addr)
191 192 193 194 195 196
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_job *job;
	unsigned num_dw, num_bytes;
	struct dma_fence *fence;
	uint64_t src_addr, dst_addr;
197
	void *cpu_addr;
198
	uint64_t flags;
199
	unsigned int i;
200 201 202 203
	int r;

	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
204
	BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
205 206

	/* Map only what can't be accessed directly */
207
	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
208 209
		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
			mm_cur->start;
210 211 212 213 214 215
		return 0;
	}

	*addr = adev->gmc.gart_start;
	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
		AMDGPU_GPU_PAGE_SIZE;
216
	*addr += mm_cur->start & ~PAGE_MASK;
217 218

	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
219
	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
220 221

	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
222
				     AMDGPU_IB_POOL_DELAYED, &job);
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
	if (r)
		return r;

	src_addr = num_dw * 4;
	src_addr += job->ibs[0].gpu_addr;

	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
				dst_addr, num_bytes, false);

	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);

	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
	if (tmz)
		flags |= AMDGPU_PTE_TMZ;

241 242 243
	cpu_addr = &job->ibs[0].ptr[num_dw];

	if (mem->mem_type == TTM_PL_TT) {
244
		dma_addr_t *dma_addr;
245

246 247
		dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
		r = amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags,
248 249 250 251 252 253
				    cpu_addr);
		if (r)
			goto error_free;
	} else {
		dma_addr_t dma_address;

254
		dma_address = mm_cur->start;
255 256 257 258 259 260 261 262 263 264 265
		dma_address += adev->vm_manager.vram_base_offset;

		for (i = 0; i < num_pages; ++i) {
			r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
					    &dma_address, flags, cpu_addr);
			if (r)
				goto error_free;

			dma_address += PAGE_SIZE;
		}
	}
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280

	r = amdgpu_job_submit(job, &adev->mman.entity,
			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
	if (r)
		goto error_free;

	dma_fence_put(fence);

	return r;

error_free:
	amdgpu_job_free(job);
	return r;
}

281
/**
282
 * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
283 284 285 286 287 288 289
 * @adev: amdgpu device
 * @src: buffer/address where to read from
 * @dst: buffer/address where to write to
 * @size: number of bytes to copy
 * @tmz: if a secure copy should be used
 * @resv: resv object to sync to
 * @f: Returns the last fence if multiple jobs are submitted.
290 291 292 293 294 295 296
 *
 * The function copies @size bytes from {src->mem + src->offset} to
 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
 * move and different for a BO to BO copy.
 *
 */
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
297 298
			       const struct amdgpu_copy_mem *src,
			       const struct amdgpu_copy_mem *dst,
299
			       uint64_t size, bool tmz,
300
			       struct dma_resv *resv,
301
			       struct dma_fence **f)
302
{
303 304 305
	const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
					AMDGPU_GPU_PAGE_SIZE);

306
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
307
	struct amdgpu_res_cursor src_mm, dst_mm;
308
	struct dma_fence *fence = NULL;
309
	int r = 0;
310

311
	if (!adev->mman.buffer_funcs_enabled) {
A
Alex Deucher 已提交
312 313 314 315
		DRM_ERROR("Trying to move memory with ring turned off.\n");
		return -EINVAL;
	}

316 317
	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
318

319
	mutex_lock(&adev->mman.gtt_window_lock);
320 321 322
	while (src_mm.remaining) {
		uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
		uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
323
		struct dma_fence *next;
324 325
		uint32_t cur_size;
		uint64_t from, to;
326

327 328 329
		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
		 * begins at an offset, then adjust the size accordingly
		 */
330
		cur_size = max(src_page_offset, dst_page_offset);
331
		cur_size = min(min3(src_mm.size, dst_mm.size, size),
332
			       (uint64_t)(GTT_MAX_BYTES - cur_size));
333 334

		/* Map src to window 0 and dst to window 1. */
335
		r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
336
					  PFN_UP(cur_size + src_page_offset),
337
					  0, ring, tmz, &from);
338 339
		if (r)
			goto error;
340

341
		r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
342
					  PFN_UP(cur_size + dst_page_offset),
343
					  1, ring, tmz, &to);
344 345
		if (r)
			goto error;
346

347
		r = amdgpu_copy_buffer(ring, from, to, cur_size,
348
				       resv, &next, false, true, tmz);
349 350 351
		if (r)
			goto error;

352
		dma_fence_put(fence);
353 354
		fence = next;

355 356
		amdgpu_res_next(&src_mm, cur_size);
		amdgpu_res_next(&dst_mm, cur_size);
357
	}
358
error:
359
	mutex_unlock(&adev->mman.gtt_window_lock);
360 361 362 363 364 365
	if (f)
		*f = dma_fence_get(fence);
	dma_fence_put(fence);
	return r;
}

366
/*
367 368
 * amdgpu_move_blit - Copy an entire buffer to another buffer
 *
369 370
 * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
 * help move buffers to and from VRAM.
371
 */
372
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
373
			    bool evict,
374 375
			    struct ttm_resource *new_mem,
			    struct ttm_resource *old_mem)
376 377
{
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
378
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
379 380 381 382 383 384 385 386 387 388 389 390 391
	struct amdgpu_copy_mem src, dst;
	struct dma_fence *fence = NULL;
	int r;

	src.bo = bo;
	dst.bo = bo;
	src.mem = old_mem;
	dst.mem = new_mem;
	src.offset = 0;
	dst.offset = 0;

	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
				       new_mem->num_pages << PAGE_SHIFT,
392
				       amdgpu_bo_encrypted(abo),
393
				       bo->base.resv, &fence);
394 395
	if (r)
		goto error;
396

397 398
	/* clear the space being freed */
	if (old_mem->mem_type == TTM_PL_VRAM &&
399
	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
400 401 402 403 404 405 406 407 408 409 410 411
		struct dma_fence *wipe_fence = NULL;

		r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
				       NULL, &wipe_fence);
		if (r) {
			goto error;
		} else if (wipe_fence) {
			dma_fence_put(fence);
			fence = wipe_fence;
		}
	}

412 413
	/* Always block for VM page tables before committing the new location */
	if (bo->type == ttm_bo_type_kernel)
414
		r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem);
415
	else
416
		r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem);
417
	dma_fence_put(fence);
A
Alex Deucher 已提交
418
	return r;
419 420 421

error:
	if (fence)
422 423
		dma_fence_wait(fence, false);
	dma_fence_put(fence);
424
	return r;
A
Alex Deucher 已提交
425 426
}

427
/*
428 429 430 431 432
 * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
 *
 * Called by amdgpu_bo_move()
 */
static bool amdgpu_mem_visible(struct amdgpu_device *adev,
433
			       struct ttm_resource *mem)
434
{
435 436
	uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
	struct amdgpu_res_cursor cursor;
437 438 439 440 441 442 443

	if (mem->mem_type == TTM_PL_SYSTEM ||
	    mem->mem_type == TTM_PL_TT)
		return true;
	if (mem->mem_type != TTM_PL_VRAM)
		return false;

444 445
	amdgpu_res_first(mem, 0, mem_size, &cursor);

446
	/* ttm_resource_ioremap only supports contiguous memory */
447
	if (cursor.size != mem_size)
448 449
		return false;

450
	return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
451 452
}

453
/*
454 455 456 457
 * amdgpu_bo_move - Move a buffer object to a new memory location
 *
 * Called by ttm_bo_handle_move_mem()
 */
458 459
static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
			  struct ttm_operation_ctx *ctx,
460 461
			  struct ttm_resource *new_mem,
			  struct ttm_place *hop)
A
Alex Deucher 已提交
462 463
{
	struct amdgpu_device *adev;
464
	struct amdgpu_bo *abo;
465
	struct ttm_resource *old_mem = bo->resource;
A
Alex Deucher 已提交
466 467
	int r;

468 469
	if (new_mem->mem_type == TTM_PL_TT ||
	    new_mem->mem_type == AMDGPU_PL_PREEMPT) {
470 471 472 473 474
		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
		if (r)
			return r;
	}

475
	/* Can't move a pinned BO */
476
	abo = ttm_to_amdgpu_bo(bo);
477
	if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
478 479
		return -EINVAL;

480
	adev = amdgpu_ttm_adev(bo->bdev);
481

A
Alex Deucher 已提交
482
	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
483
		ttm_bo_move_null(bo, new_mem);
484
		goto out;
A
Alex Deucher 已提交
485
	}
486
	if (old_mem->mem_type == TTM_PL_SYSTEM &&
487 488
	    (new_mem->mem_type == TTM_PL_TT ||
	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
489
		ttm_bo_move_null(bo, new_mem);
490
		goto out;
A
Alex Deucher 已提交
491
	}
492 493
	if ((old_mem->mem_type == TTM_PL_TT ||
	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
494
	    new_mem->mem_type == TTM_PL_SYSTEM) {
495
		r = ttm_bo_wait_ctx(bo, ctx);
496
		if (r)
497
			return r;
498 499

		amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
500
		ttm_resource_free(bo, &bo->resource);
501
		ttm_bo_assign_mem(bo, new_mem);
502
		goto out;
503
	}
504

505 506 507 508 509 510 511
	if (old_mem->mem_type == AMDGPU_PL_GDS ||
	    old_mem->mem_type == AMDGPU_PL_GWS ||
	    old_mem->mem_type == AMDGPU_PL_OA ||
	    new_mem->mem_type == AMDGPU_PL_GDS ||
	    new_mem->mem_type == AMDGPU_PL_GWS ||
	    new_mem->mem_type == AMDGPU_PL_OA) {
		/* Nothing to save here */
512
		ttm_bo_move_null(bo, new_mem);
513
		goto out;
514
	}
515

516 517 518 519 520 521 522 523
	if (adev->mman.buffer_funcs_enabled) {
		if (((old_mem->mem_type == TTM_PL_SYSTEM &&
		      new_mem->mem_type == TTM_PL_VRAM) ||
		     (old_mem->mem_type == TTM_PL_VRAM &&
		      new_mem->mem_type == TTM_PL_SYSTEM))) {
			hop->fpfn = 0;
			hop->lpfn = 0;
			hop->mem_type = TTM_PL_TT;
524
			hop->flags = TTM_PL_FLAG_TEMPORARY;
525 526 527 528 529
			return -EMULTIHOP;
		}

		r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
	} else {
530 531
		r = -ENODEV;
	}
A
Alex Deucher 已提交
532 533

	if (r) {
534 535 536 537
		/* Check that all memory is CPU accessible */
		if (!amdgpu_mem_visible(adev, old_mem) ||
		    !amdgpu_mem_visible(adev, new_mem)) {
			pr_err("Move buffer fallback to memcpy unavailable\n");
538
			return r;
A
Alex Deucher 已提交
539
		}
540 541 542

		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
		if (r)
543
			return r;
A
Alex Deucher 已提交
544 545
	}

546 547 548 549 550 551 552 553 554
	if (bo->type == ttm_bo_type_device &&
	    new_mem->mem_type == TTM_PL_VRAM &&
	    old_mem->mem_type != TTM_PL_VRAM) {
		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
		 * accesses the BO after it's moved.
		 */
		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
	}

555
out:
A
Alex Deucher 已提交
556
	/* update statistics */
557
	atomic64_add(bo->base.size, &adev->num_bytes_moved);
558
	amdgpu_bo_move_notify(bo, evict, new_mem);
A
Alex Deucher 已提交
559 560 561
	return 0;
}

562
/*
563 564 565 566
 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
 *
 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
 */
567 568
static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
				     struct ttm_resource *mem)
A
Alex Deucher 已提交
569
{
570
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
571
	size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
A
Alex Deucher 已提交
572 573 574 575 576 577

	switch (mem->mem_type) {
	case TTM_PL_SYSTEM:
		/* system memory */
		return 0;
	case TTM_PL_TT:
578
	case AMDGPU_PL_PREEMPT:
A
Alex Deucher 已提交
579 580 581 582
		break;
	case TTM_PL_VRAM:
		mem->bus.offset = mem->start << PAGE_SHIFT;
		/* check if it's visible */
583
		if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
A
Alex Deucher 已提交
584
			return -EINVAL;
585

586
		if (adev->mman.aper_base_kaddr &&
587
		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
588 589 590
			mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
					mem->bus.offset;

591
		mem->bus.offset += adev->gmc.aper_base;
A
Alex Deucher 已提交
592
		mem->bus.is_iomem = true;
593 594 595 596
		if (adev->gmc.xgmi.connected_to_cpu)
			mem->bus.caching = ttm_cached;
		else
			mem->bus.caching = ttm_write_combined;
A
Alex Deucher 已提交
597 598 599 600 601 602 603
		break;
	default:
		return -EINVAL;
	}
	return 0;
}

604 605 606
static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
					   unsigned long page_offset)
{
607
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
608
	struct amdgpu_res_cursor cursor;
609

610 611
	amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
			 &cursor);
612
	return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
613 614
}

615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635
/**
 * amdgpu_ttm_domain_start - Returns GPU start address
 * @adev: amdgpu device object
 * @type: type of the memory
 *
 * Returns:
 * GPU start address of a memory domain
 */

uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
{
	switch (type) {
	case TTM_PL_TT:
		return adev->gmc.gart_start;
	case TTM_PL_VRAM:
		return adev->gmc.vram_start;
	}

	return 0;
}

A
Alex Deucher 已提交
636 637 638 639
/*
 * TTM backend functions.
 */
struct amdgpu_ttm_tt {
640
	struct ttm_tt	ttm;
641
	struct drm_gem_object	*gobj;
642 643
	u64			offset;
	uint64_t		userptr;
644
	struct task_struct	*usertask;
645
	uint32_t		userflags;
646
	bool			bound;
647
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
648
	struct hmm_range	*range;
649
#endif
A
Alex Deucher 已提交
650 651
};

652
#ifdef CONFIG_DRM_AMDGPU_USERPTR
653
/*
654 655
 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
 * memory and start HMM tracking CPU page table update
656
 *
657 658
 * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
 * once afterwards to stop HMM tracking
659
 */
660
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
A
Alex Deucher 已提交
661
{
662
	struct ttm_tt *ttm = bo->tbo.ttm;
A
Alex Deucher 已提交
663
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
664
	unsigned long start = gtt->userptr;
665
	struct vm_area_struct *vma;
666
	struct mm_struct *mm;
667
	bool readonly;
668
	int r = 0;
A
Alex Deucher 已提交
669

670 671 672
	mm = bo->notifier.mm;
	if (unlikely(!mm)) {
		DRM_DEBUG_DRIVER("BO is not registered?\n");
673
		return -EFAULT;
674
	}
675

676 677 678 679
	/* Another get_user_pages is running at the same time?? */
	if (WARN_ON(gtt->range))
		return -EFAULT;

680
	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
681 682
		return -ESRCH;

683
	mmap_read_lock(mm);
684
	vma = find_vma(mm, start);
685
	mmap_read_unlock(mm);
686 687
	if (unlikely(!vma || start < vma->vm_start)) {
		r = -EFAULT;
688
		goto out_putmm;
689
	}
690
	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
691
		vma->vm_file)) {
692
		r = -EPERM;
693
		goto out_putmm;
694
	}
695

696 697 698 699 700
	readonly = amdgpu_ttm_tt_is_readonly(ttm);
	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
				       ttm->num_pages, &gtt->range, readonly,
				       false);
out_putmm:
701
	mmput(mm);
702

703 704 705
	return r;
}

706
/*
707 708
 * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
 * Check if the pages backing this ttm range have been invalidated
709
 *
710
 * Returns: true if pages are still valid
711
 */
712
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
713
{
714
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
715
	bool r = false;
716

717 718
	if (!gtt || !gtt->userptr)
		return false;
719

720
	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
721
		gtt->userptr, ttm->num_pages);
722

723
	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
724 725
		"No user pages to check\n");

726
	if (gtt->range) {
727 728 729 730
		/*
		 * FIXME: Must always hold notifier_lock for this, and must
		 * not ignore the return code.
		 */
731
		r = amdgpu_hmm_range_get_pages_done(gtt->range);
732
		gtt->range = NULL;
733
	}
734

735
	return !r;
736
}
737
#endif
738

739
/*
740
 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
741
 *
742
 * Called by amdgpu_cs_list_validate(). This creates the page list
743 744
 * that backs user memory and will ultimately be mapped into the device
 * address space.
745
 */
746
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
747
{
748
	unsigned long i;
749

750
	for (i = 0; i < ttm->num_pages; ++i)
751
		ttm->pages[i] = pages ? pages[i] : NULL;
752 753
}

754
/*
755
 * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
756 757 758
 *
 * Called by amdgpu_ttm_backend_bind()
 **/
759
static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
D
Dave Airlie 已提交
760
				     struct ttm_tt *ttm)
761
{
D
Dave Airlie 已提交
762
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
763 764 765 766
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
767
	int r;
768

769
	/* Allocate an SG array and squash pages into it */
A
Alex Deucher 已提交
770
	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
X
xinhui pan 已提交
771
				      (u64)ttm->num_pages << PAGE_SHIFT,
A
Alex Deucher 已提交
772 773 774 775
				      GFP_KERNEL);
	if (r)
		goto release_sg;

776
	/* Map SG to device */
777 778
	r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
	if (r)
A
Alex Deucher 已提交
779 780
		goto release_sg;

781
	/* convert SG to linear array of pages and dma addresses */
782 783
	drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
				       ttm->num_pages);
A
Alex Deucher 已提交
784 785 786 787 788

	return 0;

release_sg:
	kfree(ttm->sg);
789
	ttm->sg = NULL;
A
Alex Deucher 已提交
790 791 792
	return r;
}

793
/*
794 795
 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
 */
796
static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
D
Dave Airlie 已提交
797
					struct ttm_tt *ttm)
A
Alex Deucher 已提交
798
{
D
Dave Airlie 已提交
799
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
800 801 802 803 804 805
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;

	/* double check that we don't free the table twice */
806
	if (!ttm->sg || !ttm->sg->sgl)
A
Alex Deucher 已提交
807 808
		return;

809
	/* unmap the pages mapped to the device */
810
	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
811
	sg_free_table(ttm->sg);
812

813
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
814 815 816 817 818
	if (gtt->range) {
		unsigned long i;

		for (i = 0; i < ttm->num_pages; i++) {
			if (ttm->pages[i] !=
819
			    hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
820 821 822 823 824
				break;
		}

		WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
	}
825
#endif
A
Alex Deucher 已提交
826 827
}

828
static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
829 830 831 832 833 834 835 836
				struct ttm_buffer_object *tbo,
				uint64_t flags)
{
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
	struct ttm_tt *ttm = tbo->ttm;
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int r;

837 838 839
	if (amdgpu_bo_encrypted(abo))
		flags |= AMDGPU_PTE_TMZ;

840
	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
841 842 843 844 845 846 847
		uint64_t page_idx = 1;

		r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
				ttm->pages, gtt->ttm.dma_address, flags);
		if (r)
			goto gart_bind_fail;

848 849 850 851
		/* The memory type of the first page defaults to UC. Now
		 * modify the memory type to NC from the second page of
		 * the BO onward.
		 */
852 853
		flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
		flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
854 855 856 857 858 859 860 861 862 863 864 865 866

		r = amdgpu_gart_bind(adev,
				gtt->offset + (page_idx << PAGE_SHIFT),
				ttm->num_pages - page_idx,
				&ttm->pages[page_idx],
				&(gtt->ttm.dma_address[page_idx]), flags);
	} else {
		r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
				     ttm->pages, gtt->ttm.dma_address, flags);
	}

gart_bind_fail:
	if (r)
867
		DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
868 869 870 871 872
			  ttm->num_pages, gtt->offset);

	return r;
}

873
/*
874 875 876 877 878
 * amdgpu_ttm_backend_bind - Bind GTT memory
 *
 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
 * This handles binding GTT memory to the device address space.
 */
879
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
D
Dave Airlie 已提交
880
				   struct ttm_tt *ttm,
881
				   struct ttm_resource *bo_mem)
A
Alex Deucher 已提交
882
{
D
Dave Airlie 已提交
883
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
884
	struct amdgpu_ttm_tt *gtt = (void*)ttm;
885
	uint64_t flags;
886
	int r = 0;
A
Alex Deucher 已提交
887

888 889 890 891 892 893
	if (!bo_mem)
		return -EINVAL;

	if (gtt->bound)
		return 0;

894
	if (gtt->userptr) {
D
Dave Airlie 已提交
895
		r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
896 897 898 899
		if (r) {
			DRM_ERROR("failed to pin userptr\n");
			return r;
		}
900 901 902 903 904 905 906 907 908 909 910 911 912 913 914
	} else if (ttm->page_flags & TTM_PAGE_FLAG_SG) {
		if (!ttm->sg) {
			struct dma_buf_attachment *attach;
			struct sg_table *sgt;

			attach = gtt->gobj->import_attach;
			sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
			if (IS_ERR(sgt))
				return PTR_ERR(sgt);

			ttm->sg = sgt;
		}

		drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
					       ttm->num_pages);
915
	}
916

A
Alex Deucher 已提交
917
	if (!ttm->num_pages) {
918
		WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
A
Alex Deucher 已提交
919 920 921 922 923 924 925 926
		     ttm->num_pages, bo_mem, ttm);
	}

	if (bo_mem->mem_type == AMDGPU_PL_GDS ||
	    bo_mem->mem_type == AMDGPU_PL_GWS ||
	    bo_mem->mem_type == AMDGPU_PL_OA)
		return -EINVAL;

927 928
	if (!amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
		gtt->offset = AMDGPU_BO_INVALID_OFFSET;
929
		return 0;
930
	}
931

932
	/* compute PTE flags relevant to this BO memory */
C
Christian König 已提交
933
	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
934 935

	/* bind pages into GART page tables */
936
	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
C
Christian König 已提交
937
	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
938 939
		ttm->pages, gtt->ttm.dma_address, flags);

940
	if (r)
941
		DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
942
			  ttm->num_pages, gtt->offset);
943
	gtt->bound = true;
944
	return r;
945 946
}

947
/*
948 949 950 951 952 953
 * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
 * through AGP or GART aperture.
 *
 * If bo is accessible through AGP aperture, then use AGP aperture
 * to access bo; otherwise allocate logical space in GART aperture
 * and map bo to GART aperture.
954
 */
955
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
956
{
957
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
958
	struct ttm_operation_ctx ctx = { false, false };
959
	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
960 961
	struct ttm_placement placement;
	struct ttm_place placements;
962
	struct ttm_resource *tmp;
963
	uint64_t addr, flags;
964 965
	int r;

966
	if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
967 968
		return 0;

969 970
	addr = amdgpu_gmc_agp_addr(bo);
	if (addr != AMDGPU_BO_INVALID_OFFSET) {
971
		bo->resource->start = addr >> PAGE_SHIFT;
972 973
		return 0;
	}
974

975 976 977 978 979 980 981 982 983 984 985 986 987
	/* allocate GART space */
	placement.num_placement = 1;
	placement.placement = &placements;
	placement.num_busy_placement = 1;
	placement.busy_placement = &placements;
	placements.fpfn = 0;
	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
	placements.mem_type = TTM_PL_TT;
	placements.flags = bo->resource->placement;

	r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
	if (unlikely(r))
		return r;
988

989 990
	/* compute PTE flags for this buffer object */
	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
991

992 993 994 995 996 997
	/* Bind pages */
	gtt->offset = (u64)tmp->start << PAGE_SHIFT;
	r = amdgpu_ttm_gart_bind(adev, bo, flags);
	if (unlikely(r)) {
		ttm_resource_free(bo, &tmp);
		return r;
998
	}
999

1000
	amdgpu_gart_invalidate_tlb(adev);
1001 1002 1003
	ttm_resource_free(bo, &bo->resource);
	ttm_bo_assign_mem(bo, tmp);

1004
	return 0;
A
Alex Deucher 已提交
1005 1006
}

1007
/*
1008 1009 1010 1011 1012
 * amdgpu_ttm_recover_gart - Rebind GTT pages
 *
 * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
 * rebind GTT pages during a GPU reset.
 */
1013
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
1014
{
1015
	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
1016
	uint64_t flags;
1017 1018
	int r;

1019
	if (!tbo->ttm)
1020 1021
		return 0;

1022
	flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
1023 1024
	r = amdgpu_ttm_gart_bind(adev, tbo, flags);

1025
	return r;
1026 1027
}

1028
/*
1029 1030 1031 1032 1033
 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
 *
 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
 * ttm_tt_destroy().
 */
1034
static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
D
Dave Airlie 已提交
1035
				      struct ttm_tt *ttm)
A
Alex Deucher 已提交
1036
{
D
Dave Airlie 已提交
1037
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
1038
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1039
	int r;
A
Alex Deucher 已提交
1040

1041
	/* if the pages have userptr pinning then clear that first */
1042
	if (gtt->userptr) {
D
Dave Airlie 已提交
1043
		amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
1044 1045 1046 1047 1048 1049 1050
	} else if (ttm->sg && gtt->gobj->import_attach) {
		struct dma_buf_attachment *attach;

		attach = gtt->gobj->import_attach;
		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
		ttm->sg = NULL;
	}
1051

1052 1053 1054
	if (!gtt->bound)
		return;

1055
	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1056
		return;
1057

A
Alex Deucher 已提交
1058
	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
C
Christian König 已提交
1059
	r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
1060
	if (r)
1061
		DRM_ERROR("failed to unbind %u pages at 0x%08llX\n",
1062
			  gtt->ttm.num_pages, gtt->offset);
1063
	gtt->bound = false;
A
Alex Deucher 已提交
1064 1065
}

1066
static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
D
Dave Airlie 已提交
1067
				       struct ttm_tt *ttm)
A
Alex Deucher 已提交
1068 1069 1070
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

1071
	amdgpu_ttm_backend_unbind(bdev, ttm);
D
Dave Airlie 已提交
1072
	ttm_tt_destroy_common(bdev, ttm);
1073 1074 1075
	if (gtt->usertask)
		put_task_struct(gtt->usertask);

1076
	ttm_tt_fini(&gtt->ttm);
A
Alex Deucher 已提交
1077 1078 1079
	kfree(gtt);
}

1080 1081 1082 1083
/**
 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
 *
 * @bo: The buffer object to create a GTT ttm_tt object around
1084
 * @page_flags: Page flags to be added to the ttm_tt object
1085 1086 1087
 *
 * Called by ttm_tt_create().
 */
1088 1089
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
					   uint32_t page_flags)
A
Alex Deucher 已提交
1090
{
1091
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
A
Alex Deucher 已提交
1092
	struct amdgpu_ttm_tt *gtt;
1093
	enum ttm_caching caching;
A
Alex Deucher 已提交
1094 1095 1096 1097 1098

	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
	if (gtt == NULL) {
		return NULL;
	}
1099
	gtt->gobj = &bo->base;
1100

1101 1102 1103 1104 1105
	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
		caching = ttm_write_combined;
	else
		caching = ttm_cached;

1106
	/* allocate space for the uninitialized page entries */
1107
	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
A
Alex Deucher 已提交
1108 1109 1110
		kfree(gtt);
		return NULL;
	}
1111
	return &gtt->ttm;
A
Alex Deucher 已提交
1112 1113
}

1114
/*
1115 1116 1117 1118 1119
 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
 *
 * Map the pages of a ttm_tt object to an address space visible
 * to the underlying device.
 */
1120
static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
D
Dave Airlie 已提交
1121 1122
				  struct ttm_tt *ttm,
				  struct ttm_operation_ctx *ctx)
A
Alex Deucher 已提交
1123
{
D
Dave Airlie 已提交
1124
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
1125 1126
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

1127
	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
A
Alex Deucher 已提交
1128
	if (gtt && gtt->userptr) {
1129
		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
A
Alex Deucher 已提交
1130 1131 1132 1133 1134 1135 1136
		if (!ttm->sg)
			return -ENOMEM;

		ttm->page_flags |= TTM_PAGE_FLAG_SG;
		return 0;
	}

1137
	if (ttm->page_flags & TTM_PAGE_FLAG_SG)
1138
		return 0;
A
Alex Deucher 已提交
1139

1140
	return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
A
Alex Deucher 已提交
1141 1142
}

1143
/*
1144 1145 1146 1147 1148
 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
 *
 * Unmaps pages of a ttm_tt object from the device address space and
 * unpopulates the page array backing it.
 */
1149
static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
1150
				     struct ttm_tt *ttm)
A
Alex Deucher 已提交
1151 1152
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1153
	struct amdgpu_device *adev;
A
Alex Deucher 已提交
1154 1155

	if (gtt && gtt->userptr) {
1156
		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
A
Alex Deucher 已提交
1157
		kfree(ttm->sg);
1158
		ttm->sg = NULL;
A
Alex Deucher 已提交
1159
		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
1160 1161 1162 1163
		return;
	}

	if (ttm->page_flags & TTM_PAGE_FLAG_SG)
A
Alex Deucher 已提交
1164 1165
		return;

D
Dave Airlie 已提交
1166
	adev = amdgpu_ttm_adev(bdev);
1167
	return ttm_pool_free(&adev->mman.bdev.pool, ttm);
A
Alex Deucher 已提交
1168 1169
}

1170
/**
1171 1172
 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
 * task
1173
 *
1174
 * @bo: The ttm_buffer_object to bind this userptr to
1175 1176 1177 1178 1179 1180
 * @addr:  The address in the current tasks VM space to use
 * @flags: Requirements of userptr object.
 *
 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
 * to current task
 */
1181 1182
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
			      uint64_t addr, uint32_t flags)
A
Alex Deucher 已提交
1183
{
1184
	struct amdgpu_ttm_tt *gtt;
A
Alex Deucher 已提交
1185

1186 1187 1188 1189 1190 1191
	if (!bo->ttm) {
		/* TODO: We want a separate TTM object type for userptrs */
		bo->ttm = amdgpu_ttm_tt_create(bo, 0);
		if (bo->ttm == NULL)
			return -ENOMEM;
	}
A
Alex Deucher 已提交
1192

1193
	gtt = (void *)bo->ttm;
A
Alex Deucher 已提交
1194 1195
	gtt->userptr = addr;
	gtt->userflags = flags;
1196 1197 1198 1199 1200 1201

	if (gtt->usertask)
		put_task_struct(gtt->usertask);
	gtt->usertask = current->group_leader;
	get_task_struct(gtt->usertask);

A
Alex Deucher 已提交
1202 1203 1204
	return 0;
}

1205
/*
1206 1207
 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
 */
1208
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
A
Alex Deucher 已提交
1209 1210 1211 1212
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
1213
		return NULL;
A
Alex Deucher 已提交
1214

1215 1216 1217 1218
	if (gtt->usertask == NULL)
		return NULL;

	return gtt->usertask->mm;
A
Alex Deucher 已提交
1219 1220
}

1221
/*
1222 1223
 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
 * address range for the current task.
1224 1225
 *
 */
1226 1227 1228 1229 1230 1231
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
				  unsigned long end)
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	unsigned long size;

1232
	if (gtt == NULL || !gtt->userptr)
1233 1234
		return false;

1235 1236 1237
	/* Return false if no part of the ttm_tt object lies within
	 * the range
	 */
1238
	size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
1239 1240 1241 1242 1243 1244
	if (gtt->userptr > end || gtt->userptr + size <= start)
		return false;

	return true;
}

1245
/*
1246
 * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1247
 */
1248
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1249 1250 1251 1252 1253 1254
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL || !gtt->userptr)
		return false;

1255
	return true;
1256 1257
}

1258
/*
1259 1260
 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
 */
A
Alex Deucher 已提交
1261 1262 1263 1264 1265 1266 1267 1268 1269 1270
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
		return false;

	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
}

1271
/**
1272
 * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1273 1274 1275
 *
 * @ttm: The ttm_tt object to compute the flags for
 * @mem: The memory registry backing this ttm_tt object
1276 1277
 *
 * Figure out the flags to use for a VM PDE (Page Directory Entry).
1278
 */
1279
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
A
Alex Deucher 已提交
1280
{
1281
	uint64_t flags = 0;
A
Alex Deucher 已提交
1282 1283 1284 1285

	if (mem && mem->mem_type != TTM_PL_SYSTEM)
		flags |= AMDGPU_PTE_VALID;

1286 1287
	if (mem && (mem->mem_type == TTM_PL_TT ||
		    mem->mem_type == AMDGPU_PL_PREEMPT)) {
A
Alex Deucher 已提交
1288 1289
		flags |= AMDGPU_PTE_SYSTEM;

1290
		if (ttm->caching == ttm_cached)
1291 1292
			flags |= AMDGPU_PTE_SNOOPED;
	}
A
Alex Deucher 已提交
1293

1294 1295 1296 1297
	if (mem && mem->mem_type == TTM_PL_VRAM &&
			mem->bus.caching == ttm_cached)
		flags |= AMDGPU_PTE_SNOOPED;

1298 1299 1300 1301 1302 1303
	return flags;
}

/**
 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
 *
1304
 * @adev: amdgpu_device pointer
1305 1306
 * @ttm: The ttm_tt object to compute the flags for
 * @mem: The memory registry backing this ttm_tt object
1307
 *
1308 1309 1310
 * Figure out the flags to use for a VM PTE (Page Table Entry).
 */
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1311
				 struct ttm_resource *mem)
1312 1313 1314
{
	uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);

1315
	flags |= adev->gart.gart_pte_flags;
A
Alex Deucher 已提交
1316 1317 1318 1319 1320 1321 1322 1323
	flags |= AMDGPU_PTE_READABLE;

	if (!amdgpu_ttm_tt_is_readonly(ttm))
		flags |= AMDGPU_PTE_WRITEABLE;

	return flags;
}

1324
/*
1325 1326
 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
 * object.
1327
 *
1328 1329 1330
 * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
 * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
 * it can find space for a new object and by ttm_bo_force_list_clean() which is
1331 1332
 * used to clean out a memory space.
 */
1333 1334 1335
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
					    const struct ttm_place *place)
{
1336
	unsigned long num_pages = bo->resource->num_pages;
1337
	struct amdgpu_res_cursor cursor;
1338
	struct dma_resv_list *flist;
1339 1340 1341
	struct dma_fence *f;
	int i;

1342 1343 1344 1345
	/* Swapout? */
	if (bo->resource->mem_type == TTM_PL_SYSTEM)
		return true;

1346
	if (bo->type == ttm_bo_type_kernel &&
1347
	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
1348 1349
		return false;

1350 1351 1352 1353
	/* If bo is a KFD BO, check if the bo belongs to the current process.
	 * If true, then return false as any KFD process needs all its BOs to
	 * be resident to run successfully
	 */
1354
	flist = dma_resv_shared_list(bo->base.resv);
1355 1356 1357
	if (flist) {
		for (i = 0; i < flist->shared_count; ++i) {
			f = rcu_dereference_protected(flist->shared[i],
1358
				dma_resv_held(bo->base.resv));
1359 1360 1361 1362
			if (amdkfd_fence_check_mm(f, current->mm))
				return false;
		}
	}
1363

1364
	switch (bo->resource->mem_type) {
1365 1366 1367 1368 1369 1370 1371 1372 1373
	case AMDGPU_PL_PREEMPT:
		/* Preemptible BOs don't own system resources managed by the
		 * driver (pages, VRAM, GART space). They point to resources
		 * owned by someone else (e.g. pageable memory in user mode
		 * or a DMABuf). They are used in a preemptible context so we
		 * can guarantee no deadlocks and good QoS in case of MMU
		 * notifiers or DMABuf move notifiers from the resource owner.
		 */
		return false;
1374
	case TTM_PL_TT:
1375 1376 1377
		if (amdgpu_bo_is_amdgpu_bo(bo) &&
		    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
			return false;
1378
		return true;
1379

1380
	case TTM_PL_VRAM:
1381
		/* Check each drm MM node individually */
1382
		amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
1383 1384 1385 1386 1387
				 &cursor);
		while (cursor.remaining) {
			if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
			    && !(place->lpfn &&
				 place->lpfn <= PFN_DOWN(cursor.start)))
1388 1389
				return true;

1390
			amdgpu_res_next(&cursor, cursor.size);
1391
		}
1392
		return false;
1393

1394 1395
	default:
		break;
1396 1397 1398 1399 1400
	}

	return ttm_bo_eviction_valuable(bo, place);
}

1401
/**
1402
 * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
 *
 * @bo:  The buffer object to read/write
 * @offset:  Offset into buffer object
 * @buf:  Secondary buffer to write/read from
 * @len: Length in bytes of access
 * @write:  true if writing
 *
 * This is used to access VRAM that backs a buffer object via MMIO
 * access for debugging purposes.
 */
1413
static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1414 1415
				    unsigned long offset, void *buf, int len,
				    int write)
1416
{
1417
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1418
	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1419 1420
	struct amdgpu_res_cursor cursor;
	unsigned long flags;
1421 1422 1423
	uint32_t value = 0;
	int ret = 0;

1424
	if (bo->resource->mem_type != TTM_PL_VRAM)
1425 1426
		return -EIO;

1427
	amdgpu_res_first(bo->resource, offset, len, &cursor);
1428 1429 1430 1431
	while (cursor.remaining) {
		uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
		uint64_t bytes = 4 - (cursor.start & 3);
		uint32_t shift = (cursor.start & 3) * 8;
1432 1433
		uint32_t mask = 0xffffffff << shift;

1434 1435 1436
		if (cursor.size < bytes) {
			mask &= 0xffffffff >> (bytes - cursor.size) * 8;
			bytes = cursor.size;
1437 1438
		}

1439 1440 1441 1442
		if (mask != 0xffffffff) {
			spin_lock_irqsave(&adev->mmio_idx_lock, flags);
			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
1443
			value = RREG32_NO_KIQ(mmMM_DATA);
1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454
			if (write) {
				value &= ~mask;
				value |= (*(uint32_t *)buf << shift) & mask;
				WREG32_NO_KIQ(mmMM_DATA, value);
			}
			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
			if (!write) {
				value = (value & mask) >> shift;
				memcpy(buf, &value, bytes);
			}
		} else {
1455
			bytes = cursor.size & ~0x3ULL;
1456 1457 1458
			amdgpu_device_vram_access(adev, cursor.start,
						  (uint32_t *)buf, bytes,
						  write);
1459 1460 1461 1462
		}

		ret += bytes;
		buf = (uint8_t *)buf + bytes;
1463
		amdgpu_res_next(&cursor, bytes);
1464 1465 1466 1467 1468
	}

	return ret;
}

1469 1470 1471 1472 1473 1474
static void
amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
{
	amdgpu_bo_move_notify(bo, false, NULL);
}

1475
static struct ttm_device_funcs amdgpu_bo_driver = {
A
Alex Deucher 已提交
1476 1477 1478
	.ttm_tt_create = &amdgpu_ttm_tt_create,
	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1479
	.ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
1480
	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
A
Alex Deucher 已提交
1481 1482
	.evict_flags = &amdgpu_evict_flags,
	.move = &amdgpu_bo_move,
1483
	.delete_mem_notify = &amdgpu_bo_delete_mem_notify,
1484
	.release_notify = &amdgpu_bo_release_notify,
A
Alex Deucher 已提交
1485
	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1486
	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1487 1488
	.access_memory = &amdgpu_ttm_access_memory,
	.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
A
Alex Deucher 已提交
1489 1490
};

1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
/*
 * Firmware Reservation functions
 */
/**
 * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
 *
 * @adev: amdgpu_device pointer
 *
 * free fw reserved vram if it has been reserved.
 */
static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
{
1503 1504
	amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
		NULL, &adev->mman.fw_vram_usage_va);
1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
}

/**
 * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
 *
 * @adev: amdgpu_device pointer
 *
 * create bo vram reservation from fw.
 */
static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
{
1516 1517
	uint64_t vram_size = adev->gmc.visible_vram_size;

1518 1519
	adev->mman.fw_vram_usage_va = NULL;
	adev->mman.fw_vram_usage_reserved_bo = NULL;
1520

1521 1522
	if (adev->mman.fw_vram_usage_size == 0 ||
	    adev->mman.fw_vram_usage_size > vram_size)
1523
		return 0;
1524

1525
	return amdgpu_bo_create_kernel_at(adev,
1526 1527
					  adev->mman.fw_vram_usage_start_offset,
					  adev->mman.fw_vram_usage_size,
1528
					  AMDGPU_GEM_DOMAIN_VRAM,
1529 1530
					  &adev->mman.fw_vram_usage_reserved_bo,
					  &adev->mman.fw_vram_usage_va);
1531
}
1532

1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554
/*
 * Memoy training reservation functions
 */

/**
 * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
 *
 * @adev: amdgpu_device pointer
 *
 * free memory training reserved vram if it has been reserved.
 */
static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
{
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;

	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
	amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
	ctx->c2p_bo = NULL;

	return 0;
}

1555
static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
1556
{
1557
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1558

1559
	memset(ctx, 0, sizeof(*ctx));
1560

1561
	ctx->c2p_train_data_offset =
1562
		ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
1563 1564 1565 1566
	ctx->p2c_train_data_offset =
		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
	ctx->train_data_size =
		GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1567

1568 1569 1570 1571
	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
			ctx->train_data_size,
			ctx->p2c_train_data_offset,
			ctx->c2p_train_data_offset);
1572 1573
}

1574 1575 1576
/*
 * reserve TMR memory at the top of VRAM which holds
 * IP Discovery data and is protected by PSP.
1577
 */
1578
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
1579 1580 1581
{
	int ret;
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1582
	bool mem_train_support = false;
1583

1584
	if (!amdgpu_sriov_vf(adev)) {
1585
		if (amdgpu_atomfirmware_mem_training_supported(adev))
1586
			mem_train_support = true;
1587
		else
1588
			DRM_DEBUG("memory training does not support!\n");
1589 1590
	}

1591 1592 1593 1594 1595 1596 1597
	/*
	 * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
	 * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
	 *
	 * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
	 * discovery data and G6 memory training data respectively
	 */
1598
	adev->mman.discovery_tmr_size =
1599
		amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
1600 1601
	if (!adev->mman.discovery_tmr_size)
		adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
1602 1603 1604 1605 1606

	if (mem_train_support) {
		/* reserve vram for mem train according to TMR location */
		amdgpu_ttm_training_data_block_init(adev);
		ret = amdgpu_bo_create_kernel_at(adev,
1607 1608 1609 1610 1611
					 ctx->c2p_train_data_offset,
					 ctx->train_data_size,
					 AMDGPU_GEM_DOMAIN_VRAM,
					 &ctx->c2p_bo,
					 NULL);
1612 1613 1614 1615
		if (ret) {
			DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
			amdgpu_ttm_training_reserve_vram_fini(adev);
			return ret;
1616
		}
1617
		ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1618
	}
1619 1620

	ret = amdgpu_bo_create_kernel_at(adev,
1621 1622
				adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
				adev->mman.discovery_tmr_size,
1623
				AMDGPU_GEM_DOMAIN_VRAM,
1624
				&adev->mman.discovery_memory,
1625
				NULL);
1626
	if (ret) {
1627
		DRM_ERROR("alloc tmr failed(%d)!\n", ret);
1628
		amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1629
		return ret;
1630 1631 1632 1633 1634
	}

	return 0;
}

1635
/*
1636 1637
 * amdgpu_ttm_init - Init the memory management (ttm) as well as various
 * gtt/vram related fields.
1638 1639 1640 1641 1642 1643
 *
 * This initializes all of the memory space pools that the TTM layer
 * will need such as the GTT space (system memory mapped to the device),
 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
 * can be mapped per VMID.
 */
A
Alex Deucher 已提交
1644 1645
int amdgpu_ttm_init(struct amdgpu_device *adev)
{
1646
	uint64_t gtt_size;
A
Alex Deucher 已提交
1647
	int r;
1648
	u64 vis_vram_limit;
A
Alex Deucher 已提交
1649

1650 1651
	mutex_init(&adev->mman.gtt_window_lock);

A
Alex Deucher 已提交
1652
	/* No others user of address space so set it to 0 */
1653
	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
1654 1655
			       adev_to_drm(adev)->anon_inode->i_mapping,
			       adev_to_drm(adev)->vma_offset_manager,
1656
			       adev->need_swiotlb,
1657
			       dma_addressing_limited(adev->dev));
A
Alex Deucher 已提交
1658 1659 1660 1661 1662
	if (r) {
		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
		return r;
	}
	adev->mman.initialized = true;
1663

1664
	/* Initialize VRAM pool with all of VRAM divided into pages */
1665
	r = amdgpu_vram_mgr_init(adev);
A
Alex Deucher 已提交
1666 1667 1668 1669
	if (r) {
		DRM_ERROR("Failed initializing VRAM heap.\n");
		return r;
	}
1670 1671 1672 1673

	/* Reduce size of CPU-visible VRAM if requested */
	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
	if (amdgpu_vis_vram_limit > 0 &&
1674 1675
	    vis_vram_limit <= adev->gmc.visible_vram_size)
		adev->gmc.visible_vram_size = vis_vram_limit;
1676

A
Alex Deucher 已提交
1677
	/* Change the size here instead of the init above so only lpfn is affected */
1678
	amdgpu_ttm_set_buffer_funcs_status(adev, false);
1679
#ifdef CONFIG_64BIT
1680
#ifdef CONFIG_X86
1681 1682 1683 1684 1685
	if (adev->gmc.xgmi.connected_to_cpu)
		adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
				adev->gmc.visible_vram_size);

	else
1686
#endif
1687 1688
		adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
				adev->gmc.visible_vram_size);
1689
#endif
A
Alex Deucher 已提交
1690

1691 1692 1693 1694
	/*
	 *The reserved vram for firmware must be pinned to the specified
	 *place on the VRAM, so reserve it early.
	 */
1695
	r = amdgpu_ttm_fw_reserve_vram_init(adev);
1696 1697 1698 1699
	if (r) {
		return r;
	}

1700
	/*
1701 1702 1703
	 * only NAVI10 and onwards ASIC support for IP discovery.
	 * If IP discovery enabled, a block of memory should be
	 * reserved for IP discovey.
1704
	 */
1705
	if (adev->mman.discovery_bin) {
1706
		r = amdgpu_ttm_reserve_tmr(adev);
1707 1708 1709
		if (r)
			return r;
	}
1710

1711 1712 1713 1714
	/* allocate memory as required for VGA
	 * This is used for VGA emulation and pre-OS scanout buffers to
	 * avoid display artifacts while transitioning between pre-OS
	 * and driver.  */
1715
	r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
1716
				       AMDGPU_GEM_DOMAIN_VRAM,
1717
				       &adev->mman.stolen_vga_memory,
1718
				       NULL);
C
Christian König 已提交
1719 1720
	if (r)
		return r;
1721 1722
	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
				       adev->mman.stolen_extended_size,
1723
				       AMDGPU_GEM_DOMAIN_VRAM,
1724
				       &adev->mman.stolen_extended_memory,
1725
				       NULL);
C
Christian König 已提交
1726 1727
	if (r)
		return r;
1728 1729 1730 1731 1732 1733 1734
	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
				       adev->mman.stolen_reserved_size,
				       AMDGPU_GEM_DOMAIN_VRAM,
				       &adev->mman.stolen_reserved_memory,
				       NULL);
	if (r)
		return r;
1735

A
Alex Deucher 已提交
1736
	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1737
		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1738

1739 1740
	/* Compute GTT size, either bsaed on 3/4th the size of RAM size
	 * or whatever the user passed on module init */
1741 1742 1743 1744
	if (amdgpu_gtt_size == -1) {
		struct sysinfo si;

		si_meminfo(&si);
1745
		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1746
			       adev->gmc.mc_vram_size),
1747 1748 1749
			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
	}
	else
1750
		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1751 1752

	/* Initialize GTT memory pool */
1753
	r = amdgpu_gtt_mgr_init(adev, gtt_size);
A
Alex Deucher 已提交
1754 1755 1756 1757 1758
	if (r) {
		DRM_ERROR("Failed initializing GTT heap.\n");
		return r;
	}
	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1759
		 (unsigned)(gtt_size / (1024 * 1024)));
A
Alex Deucher 已提交
1760

1761 1762 1763 1764 1765 1766 1767
	/* Initialize preemptible memory pool */
	r = amdgpu_preempt_mgr_init(adev);
	if (r) {
		DRM_ERROR("Failed initializing PREEMPT heap.\n");
		return r;
	}

1768
	/* Initialize various on-chip memory pools */
1769
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
1770 1771 1772
	if (r) {
		DRM_ERROR("Failed initializing GDS heap.\n");
		return r;
A
Alex Deucher 已提交
1773 1774
	}

1775
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
1776 1777 1778
	if (r) {
		DRM_ERROR("Failed initializing gws heap.\n");
		return r;
A
Alex Deucher 已提交
1779 1780
	}

1781
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
1782 1783 1784
	if (r) {
		DRM_ERROR("Failed initializing oa heap.\n");
		return r;
A
Alex Deucher 已提交
1785 1786 1787 1788 1789
	}

	return 0;
}

1790
/*
1791 1792
 * amdgpu_ttm_fini - De-initialize the TTM memory pools
 */
A
Alex Deucher 已提交
1793 1794 1795 1796
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
	if (!adev->mman.initialized)
		return;
1797

1798
	amdgpu_ttm_training_reserve_vram_fini(adev);
1799
	/* return the stolen vga memory back to VRAM */
1800 1801
	amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
	amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
1802
	/* return the IP Discovery TMR memory back to VRAM */
1803
	amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1804 1805 1806
	if (adev->mman.stolen_reserved_size)
		amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
				      NULL, NULL);
1807
	amdgpu_ttm_fw_reserve_vram_fini(adev);
1808

1809 1810
	amdgpu_vram_mgr_fini(adev);
	amdgpu_gtt_mgr_fini(adev);
1811
	amdgpu_preempt_mgr_fini(adev);
1812 1813 1814
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
1815
	ttm_device_fini(&adev->mman.bdev);
A
Alex Deucher 已提交
1816 1817 1818 1819
	adev->mman.initialized = false;
	DRM_INFO("amdgpu: ttm finalized\n");
}

1820 1821 1822 1823 1824 1825 1826 1827 1828 1829
/**
 * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
 *
 * @adev: amdgpu_device pointer
 * @enable: true when we can use buffer functions.
 *
 * Enable/disable use of buffer functions during suspend/resume. This should
 * only be called at bootup or when userspace isn't running.
 */
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
A
Alex Deucher 已提交
1830
{
1831
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
1832
	uint64_t size;
1833
	int r;
A
Alex Deucher 已提交
1834

1835
	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
1836
	    adev->mman.buffer_funcs_enabled == enable)
A
Alex Deucher 已提交
1837 1838
		return;

1839 1840
	if (enable) {
		struct amdgpu_ring *ring;
N
Nirmoy Das 已提交
1841
		struct drm_gpu_scheduler *sched;
1842 1843

		ring = adev->mman.buffer_funcs_ring;
N
Nirmoy Das 已提交
1844 1845
		sched = &ring->sched;
		r = drm_sched_entity_init(&adev->mman.entity,
1846
					  DRM_SCHED_PRIORITY_KERNEL, &sched,
N
Nirmoy Das 已提交
1847
					  1, NULL);
1848 1849 1850 1851 1852 1853
		if (r) {
			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
				  r);
			return;
		}
	} else {
1854
		drm_sched_entity_destroy(&adev->mman.entity);
1855 1856
		dma_fence_put(man->move);
		man->move = NULL;
1857 1858
	}

A
Alex Deucher 已提交
1859
	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
1860 1861 1862 1863
	if (enable)
		size = adev->gmc.real_vram_size;
	else
		size = adev->gmc.visible_vram_size;
A
Alex Deucher 已提交
1864
	man->size = size >> PAGE_SHIFT;
1865
	adev->mman.buffer_funcs_enabled = enable;
A
Alex Deucher 已提交
1866 1867
}

1868 1869
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
		       uint64_t dst_offset, uint32_t byte_count,
1870
		       struct dma_resv *resv,
1871
		       struct dma_fence **fence, bool direct_submit,
1872
		       bool vm_needs_flush, bool tmz)
A
Alex Deucher 已提交
1873
{
1874 1875
	enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
		AMDGPU_IB_POOL_DELAYED;
A
Alex Deucher 已提交
1876
	struct amdgpu_device *adev = ring->adev;
1877 1878
	struct amdgpu_job *job;

A
Alex Deucher 已提交
1879 1880 1881 1882 1883
	uint32_t max_bytes;
	unsigned num_loops, num_dw;
	unsigned i;
	int r;

1884
	if (direct_submit && !ring->sched.ready) {
1885 1886 1887 1888
		DRM_ERROR("Trying to move memory with ring turned off.\n");
		return -EINVAL;
	}

A
Alex Deucher 已提交
1889 1890
	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
L
Luben Tuikov 已提交
1891
	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
1892

1893
	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
1894
	if (r)
1895
		return r;
1896

1897
	if (vm_needs_flush) {
1898 1899
		job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
					adev->gmc.pdb0_bo : adev->gart.bo);
1900 1901
		job->vm_needs_flush = true;
	}
1902
	if (resv) {
1903
		r = amdgpu_sync_resv(adev, &job->sync, resv,
1904 1905
				     AMDGPU_SYNC_ALWAYS,
				     AMDGPU_FENCE_OWNER_UNDEFINED);
1906 1907 1908 1909
		if (r) {
			DRM_ERROR("sync failed (%d).\n", r);
			goto error_free;
		}
A
Alex Deucher 已提交
1910 1911 1912 1913 1914
	}

	for (i = 0; i < num_loops; i++) {
		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);

1915
		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
1916
					dst_offset, cur_size_in_bytes, tmz);
A
Alex Deucher 已提交
1917 1918 1919 1920 1921 1922

		src_offset += cur_size_in_bytes;
		dst_offset += cur_size_in_bytes;
		byte_count -= cur_size_in_bytes;
	}

1923 1924
	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);
1925 1926 1927
	if (direct_submit)
		r = amdgpu_job_submit_direct(job, ring, fence);
	else
1928
		r = amdgpu_job_submit(job, &adev->mman.entity,
1929
				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1930 1931
	if (r)
		goto error_free;
A
Alex Deucher 已提交
1932

1933
	return r;
1934

1935
error_free:
1936
	amdgpu_job_free(job);
1937
	DRM_ERROR("Error scheduling IBs (%d)\n", r);
1938
	return r;
A
Alex Deucher 已提交
1939 1940
}

1941
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1942
		       uint32_t src_data,
1943
		       struct dma_resv *resv,
1944
		       struct dma_fence **fence)
1945
{
1946
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1947
	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
1948 1949
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;

1950
	struct amdgpu_res_cursor cursor;
1951
	unsigned int num_loops, num_dw;
1952
	uint64_t num_bytes;
1953 1954

	struct amdgpu_job *job;
1955 1956
	int r;

1957
	if (!adev->mman.buffer_funcs_enabled) {
1958 1959 1960 1961
		DRM_ERROR("Trying to clear memory with ring turned off.\n");
		return -EINVAL;
	}

1962
	if (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT) {
1963 1964 1965 1966
		DRM_ERROR("Trying to clear preemptible memory.\n");
		return -EINVAL;
	}

1967
	if (bo->tbo.resource->mem_type == TTM_PL_TT) {
1968
		r = amdgpu_ttm_alloc_gart(&bo->tbo);
1969 1970 1971 1972
		if (r)
			return r;
	}

1973
	num_bytes = bo->tbo.resource->num_pages << PAGE_SHIFT;
1974 1975
	num_loops = 0;

1976
	amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
1977 1978 1979
	while (cursor.remaining) {
		num_loops += DIV_ROUND_UP_ULL(cursor.size, max_bytes);
		amdgpu_res_next(&cursor, cursor.size);
1980
	}
1981
	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
1982 1983

	/* for IB padding */
1984
	num_dw += 64;
1985

1986 1987
	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
				     &job);
1988 1989 1990 1991 1992
	if (r)
		return r;

	if (resv) {
		r = amdgpu_sync_resv(adev, &job->sync, resv,
1993 1994
				     AMDGPU_SYNC_ALWAYS,
				     AMDGPU_FENCE_OWNER_UNDEFINED);
1995 1996 1997 1998 1999 2000
		if (r) {
			DRM_ERROR("sync failed (%d).\n", r);
			goto error_free;
		}
	}

2001
	amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
2002 2003 2004
	while (cursor.remaining) {
		uint32_t cur_size = min_t(uint64_t, cursor.size, max_bytes);
		uint64_t dst_addr = cursor.start;
2005

2006 2007
		dst_addr += amdgpu_ttm_domain_start(adev,
						    bo->tbo.resource->mem_type);
2008 2009
		amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
					cur_size);
2010

2011
		amdgpu_res_next(&cursor, cur_size);
2012 2013 2014 2015
	}

	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);
2016
	r = amdgpu_job_submit(job, &adev->mman.entity,
2017
			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2018 2019 2020 2021 2022 2023 2024 2025 2026 2027
	if (r)
		goto error_free;

	return 0;

error_free:
	amdgpu_job_free(job);
	return r;
}

A
Alex Deucher 已提交
2028 2029
#if defined(CONFIG_DEBUG_FS)

2030
static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused)
A
Alex Deucher 已提交
2031
{
2032 2033 2034
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    TTM_PL_VRAM);
D
Daniel Vetter 已提交
2035
	struct drm_printer p = drm_seq_file_printer(m);
A
Alex Deucher 已提交
2036

2037
	man->func->debug(man, &p);
D
Daniel Vetter 已提交
2038
	return 0;
A
Alex Deucher 已提交
2039 2040
}

2041
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
2042
{
2043
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2044 2045 2046 2047

	return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
}

2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097
static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    TTM_PL_TT);
	struct drm_printer p = drm_seq_file_printer(m);

	man->func->debug(man, &p);
	return 0;
}

static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_GDS);
	struct drm_printer p = drm_seq_file_printer(m);

	man->func->debug(man, &p);
	return 0;
}

static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_GWS);
	struct drm_printer p = drm_seq_file_printer(m);

	man->func->debug(man, &p);
	return 0;
}

static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_OA);
	struct drm_printer p = drm_seq_file_printer(m);

	man->func->debug(man, &p);
	return 0;
}

DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
A
Alex Deucher 已提交
2098

2099
/*
2100 2101 2102 2103
 * amdgpu_ttm_vram_read - Linear read access to VRAM
 *
 * Accesses VRAM via MMIO for debugging purposes.
 */
A
Alex Deucher 已提交
2104 2105 2106
static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
				    size_t size, loff_t *pos)
{
A
Al Viro 已提交
2107
	struct amdgpu_device *adev = file_inode(f)->i_private;
A
Alex Deucher 已提交
2108 2109 2110 2111 2112
	ssize_t result = 0;

	if (size & 0x3 || *pos & 0x3)
		return -EINVAL;

2113
	if (*pos >= adev->gmc.mc_vram_size)
2114 2115
		return -ENXIO;

2116
	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
A
Alex Deucher 已提交
2117
	while (size) {
2118 2119
		size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
		uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
A
Alex Deucher 已提交
2120

2121
		amdgpu_device_vram_access(adev, *pos, value, bytes, false);
2122 2123
		if (copy_to_user(buf, value, bytes))
			return -EFAULT;
A
Alex Deucher 已提交
2124

2125 2126 2127 2128
		result += bytes;
		buf += bytes;
		*pos += bytes;
		size -= bytes;
A
Alex Deucher 已提交
2129 2130 2131 2132 2133
	}

	return result;
}

2134
/*
2135 2136 2137 2138
 * amdgpu_ttm_vram_write - Linear write access to VRAM
 *
 * Accesses VRAM via MMIO for debugging purposes.
 */
2139 2140 2141 2142 2143 2144 2145 2146 2147 2148
static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
				    size_t size, loff_t *pos)
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	ssize_t result = 0;
	int r;

	if (size & 0x3 || *pos & 0x3)
		return -EINVAL;

2149
	if (*pos >= adev->gmc.mc_vram_size)
2150 2151 2152 2153 2154 2155
		return -ENXIO;

	while (size) {
		unsigned long flags;
		uint32_t value;

2156
		if (*pos >= adev->gmc.mc_vram_size)
2157 2158 2159 2160 2161 2162 2163
			return result;

		r = get_user(value, (uint32_t *)buf);
		if (r)
			return r;

		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
2164 2165 2166
		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)*pos) | 0x80000000);
		WREG32_NO_KIQ(mmMM_INDEX_HI, *pos >> 31);
		WREG32_NO_KIQ(mmMM_DATA, value);
2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177
		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);

		result += 4;
		buf += 4;
		*pos += 4;
		size -= 4;
	}

	return result;
}

A
Alex Deucher 已提交
2178 2179 2180
static const struct file_operations amdgpu_ttm_vram_fops = {
	.owner = THIS_MODULE,
	.read = amdgpu_ttm_vram_read,
2181 2182
	.write = amdgpu_ttm_vram_write,
	.llseek = default_llseek,
A
Alex Deucher 已提交
2183 2184
};

2185
/*
2186 2187 2188 2189 2190 2191
 * amdgpu_iomem_read - Virtual read access to GPU mapped memory
 *
 * This function is used to read memory that has been mapped to the
 * GPU and the known addresses are not physical addresses but instead
 * bus addresses (e.g., what you'd put in an IB or ring buffer).
 */
2192 2193
static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
				 size_t size, loff_t *pos)
2194 2195 2196
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	struct iommu_domain *dom;
2197 2198
	ssize_t result = 0;
	int r;
2199

2200
	/* retrieve the IOMMU domain if any for this device */
2201
	dom = iommu_get_domain_for_dev(adev->dev);
2202

2203 2204 2205 2206 2207 2208 2209 2210 2211 2212
	while (size) {
		phys_addr_t addr = *pos & PAGE_MASK;
		loff_t off = *pos & ~PAGE_MASK;
		size_t bytes = PAGE_SIZE - off;
		unsigned long pfn;
		struct page *p;
		void *ptr;

		bytes = bytes < size ? bytes : size;

2213 2214 2215 2216
		/* Translate the bus address to a physical address.  If
		 * the domain is NULL it means there is no IOMMU active
		 * and the address translation is the identity
		 */
2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227
		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;

		pfn = addr >> PAGE_SHIFT;
		if (!pfn_valid(pfn))
			return -EPERM;

		p = pfn_to_page(pfn);
		if (p->mapping != adev->mman.bdev.dev_mapping)
			return -EPERM;

		ptr = kmap(p);
2228
		r = copy_to_user(buf, ptr + off, bytes);
2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240
		kunmap(p);
		if (r)
			return -EFAULT;

		size -= bytes;
		*pos += bytes;
		result += bytes;
	}

	return result;
}

2241
/*
2242 2243 2244 2245 2246 2247
 * amdgpu_iomem_write - Virtual write access to GPU mapped memory
 *
 * This function is used to write memory that has been mapped to the
 * GPU and the known addresses are not physical addresses but instead
 * bus addresses (e.g., what you'd put in an IB or ring buffer).
 */
2248 2249 2250 2251 2252 2253 2254
static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
				 size_t size, loff_t *pos)
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	struct iommu_domain *dom;
	ssize_t result = 0;
	int r;
2255 2256

	dom = iommu_get_domain_for_dev(adev->dev);
2257

2258 2259 2260 2261 2262 2263 2264 2265 2266
	while (size) {
		phys_addr_t addr = *pos & PAGE_MASK;
		loff_t off = *pos & ~PAGE_MASK;
		size_t bytes = PAGE_SIZE - off;
		unsigned long pfn;
		struct page *p;
		void *ptr;

		bytes = bytes < size ? bytes : size;
2267

2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278
		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;

		pfn = addr >> PAGE_SHIFT;
		if (!pfn_valid(pfn))
			return -EPERM;

		p = pfn_to_page(pfn);
		if (p->mapping != adev->mman.bdev.dev_mapping)
			return -EPERM;

		ptr = kmap(p);
2279
		r = copy_from_user(ptr + off, buf, bytes);
2280 2281 2282 2283 2284 2285 2286 2287 2288 2289
		kunmap(p);
		if (r)
			return -EFAULT;

		size -= bytes;
		*pos += bytes;
		result += bytes;
	}

	return result;
2290 2291
}

2292
static const struct file_operations amdgpu_ttm_iomem_fops = {
2293
	.owner = THIS_MODULE,
2294 2295
	.read = amdgpu_iomem_read,
	.write = amdgpu_iomem_write,
2296 2297
	.llseek = default_llseek
};
2298

2299 2300
#endif

2301
void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
A
Alex Deucher 已提交
2302 2303
{
#if defined(CONFIG_DEBUG_FS)
2304
	struct drm_minor *minor = adev_to_drm(adev)->primary;
2305 2306
	struct dentry *root = minor->debugfs_root;

2307
	debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
2308
				 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
2309
	debugfs_create_file("amdgpu_iomem", 0444, root, adev,
2310
			    &amdgpu_ttm_iomem_fops);
2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322
	debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,
			    &amdgpu_mm_vram_table_fops);
	debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev,
			    &amdgpu_mm_tt_table_fops);
	debugfs_create_file("amdgpu_gds_mm", 0444, root, adev,
			    &amdgpu_mm_gds_table_fops);
	debugfs_create_file("amdgpu_gws_mm", 0444, root, adev,
			    &amdgpu_mm_gws_table_fops);
	debugfs_create_file("amdgpu_oa_mm", 0444, root, adev,
			    &amdgpu_mm_oa_table_fops);
	debugfs_create_file("ttm_page_pool", 0444, root, adev,
			    &amdgpu_ttm_page_pool_fops);
A
Alex Deucher 已提交
2323 2324
#endif
}