amdgpu_ttm.c 61.2 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
32

33
#include <linux/dma-mapping.h>
34 35 36
#include <linux/iommu.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
37
#include <linux/sched/mm.h>
38 39 40 41
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swiotlb.h>
42
#include <linux/dma-buf.h>
43
#include <linux/sizes.h>
44
#include <linux/module.h>
45

46
#include <drm/drm_drv.h>
47 48 49
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
50
#include <drm/ttm/ttm_range_manager.h>
51

A
Alex Deucher 已提交
52
#include <drm/amdgpu_drm.h>
53

A
Alex Deucher 已提交
54
#include "amdgpu.h"
55
#include "amdgpu_object.h"
56
#include "amdgpu_trace.h"
57
#include "amdgpu_amdkfd.h"
58
#include "amdgpu_sdma.h"
59
#include "amdgpu_ras.h"
60
#include "amdgpu_atomfirmware.h"
61
#include "amdgpu_res_cursor.h"
A
Alex Deucher 已提交
62 63
#include "bif/bif_4_1_d.h"

64 65
MODULE_IMPORT_NS(DMA_BUF);

66 67
#define AMDGPU_TTM_VRAM_MAX_DW_READ	(size_t)128

68
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
69 70
				   struct ttm_tt *ttm,
				   struct ttm_resource *bo_mem);
71
static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
72
				      struct ttm_tt *ttm);
73

74
static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
75
				    unsigned int type,
76
				    uint64_t size_in_page)
77
{
78
	return ttm_range_man_init(&adev->mman.bdev, type,
79
				  false, size_in_page);
A
Alex Deucher 已提交
80 81
}

82 83 84 85 86 87 88 89
/**
 * amdgpu_evict_flags - Compute placement flags
 *
 * @bo: The buffer object to evict
 * @placement: Possible destination(s) for evicted BO
 *
 * Fill in placement data when ttm_bo_evict() is called
 */
A
Alex Deucher 已提交
90 91 92
static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
				struct ttm_placement *placement)
{
93
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
94
	struct amdgpu_bo *abo;
95
	static const struct ttm_place placements = {
A
Alex Deucher 已提交
96 97
		.fpfn = 0,
		.lpfn = 0,
98
		.mem_type = TTM_PL_SYSTEM,
99
		.flags = 0
A
Alex Deucher 已提交
100 101
	};

102
	/* Don't handle scatter gather BOs */
103 104 105 106 107 108
	if (bo->type == ttm_bo_type_sg) {
		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		return;
	}

109
	/* Object isn't an AMDGPU object so ignore */
110
	if (!amdgpu_bo_is_amdgpu_bo(bo)) {
A
Alex Deucher 已提交
111 112 113 114 115 116
		placement->placement = &placements;
		placement->busy_placement = &placements;
		placement->num_placement = 1;
		placement->num_busy_placement = 1;
		return;
	}
117

118
	abo = ttm_to_amdgpu_bo(bo);
119 120 121 122 123
	if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		return;
	}
124 125

	switch (bo->resource->mem_type) {
126 127 128 129 130 131 132
	case AMDGPU_PL_GDS:
	case AMDGPU_PL_GWS:
	case AMDGPU_PL_OA:
		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		return;

A
Alex Deucher 已提交
133
	case TTM_PL_VRAM:
134
		if (!adev->mman.buffer_funcs_enabled) {
135
			/* Move to system memory */
136
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
137
		} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
138 139
			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
			   amdgpu_bo_in_cpu_visible_vram(abo)) {
140 141 142 143 144 145

			/* Try evicting to the CPU inaccessible part of VRAM
			 * first, but only set GTT as busy placement, so this
			 * BO will be evicted to GTT rather than causing other
			 * BOs to be evicted from VRAM
			 */
146
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
147 148
							AMDGPU_GEM_DOMAIN_GTT |
							AMDGPU_GEM_DOMAIN_CPU);
149
			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
150 151 152
			abo->placements[0].lpfn = 0;
			abo->placement.busy_placement = &abo->placements[1];
			abo->placement.num_busy_placement = 1;
153
		} else {
154
			/* Move to GTT memory */
155 156
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
							AMDGPU_GEM_DOMAIN_CPU);
157
		}
A
Alex Deucher 已提交
158 159
		break;
	case TTM_PL_TT:
160
	case AMDGPU_PL_PREEMPT:
A
Alex Deucher 已提交
161
	default:
162
		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
163
		break;
A
Alex Deucher 已提交
164
	}
165
	*placement = abo->placement;
A
Alex Deucher 已提交
166 167
}

168 169 170 171
/**
 * amdgpu_ttm_map_buffer - Map memory into the GART windows
 * @bo: buffer object to map
 * @mem: memory object to map
172
 * @mm_cur: range to map
173 174 175 176 177 178 179 180 181 182
 * @num_pages: number of pages to map
 * @window: which GART window to use
 * @ring: DMA ring to use for the copy
 * @tmz: if we should setup a TMZ enabled mapping
 * @addr: resulting address inside the MC address space
 *
 * Setup one of the GART windows to access a specific piece of memory or return
 * the physical address for local memory.
 */
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
183
				 struct ttm_resource *mem,
184 185 186 187
				 struct amdgpu_res_cursor *mm_cur,
				 unsigned num_pages, unsigned window,
				 struct amdgpu_ring *ring, bool tmz,
				 uint64_t *addr)
188 189 190 191 192 193
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_job *job;
	unsigned num_dw, num_bytes;
	struct dma_fence *fence;
	uint64_t src_addr, dst_addr;
194
	void *cpu_addr;
195
	uint64_t flags;
196
	unsigned int i;
197 198 199 200
	int r;

	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
201
	BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
202 203

	/* Map only what can't be accessed directly */
204
	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
205 206
		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
			mm_cur->start;
207 208 209 210 211 212
		return 0;
	}

	*addr = adev->gmc.gart_start;
	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
		AMDGPU_GPU_PAGE_SIZE;
213
	*addr += mm_cur->start & ~PAGE_MASK;
214 215

	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
216
	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
217 218

	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
219
				     AMDGPU_IB_POOL_DELAYED, &job);
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
	if (r)
		return r;

	src_addr = num_dw * 4;
	src_addr += job->ibs[0].gpu_addr;

	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
				dst_addr, num_bytes, false);

	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);

	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
	if (tmz)
		flags |= AMDGPU_PTE_TMZ;

238 239 240
	cpu_addr = &job->ibs[0].ptr[num_dw];

	if (mem->mem_type == TTM_PL_TT) {
241
		dma_addr_t *dma_addr;
242

243 244
		dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
		r = amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags,
245 246 247 248 249 250
				    cpu_addr);
		if (r)
			goto error_free;
	} else {
		dma_addr_t dma_address;

251
		dma_address = mm_cur->start;
252 253 254 255 256 257 258 259 260 261 262
		dma_address += adev->vm_manager.vram_base_offset;

		for (i = 0; i < num_pages; ++i) {
			r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
					    &dma_address, flags, cpu_addr);
			if (r)
				goto error_free;

			dma_address += PAGE_SIZE;
		}
	}
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277

	r = amdgpu_job_submit(job, &adev->mman.entity,
			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
	if (r)
		goto error_free;

	dma_fence_put(fence);

	return r;

error_free:
	amdgpu_job_free(job);
	return r;
}

278
/**
279
 * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
280 281 282 283 284 285 286
 * @adev: amdgpu device
 * @src: buffer/address where to read from
 * @dst: buffer/address where to write to
 * @size: number of bytes to copy
 * @tmz: if a secure copy should be used
 * @resv: resv object to sync to
 * @f: Returns the last fence if multiple jobs are submitted.
287 288 289 290 291 292 293
 *
 * The function copies @size bytes from {src->mem + src->offset} to
 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
 * move and different for a BO to BO copy.
 *
 */
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
294 295
			       const struct amdgpu_copy_mem *src,
			       const struct amdgpu_copy_mem *dst,
296
			       uint64_t size, bool tmz,
297
			       struct dma_resv *resv,
298
			       struct dma_fence **f)
299
{
300 301 302
	const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
					AMDGPU_GPU_PAGE_SIZE);

303
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
304
	struct amdgpu_res_cursor src_mm, dst_mm;
305
	struct dma_fence *fence = NULL;
306
	int r = 0;
307

308
	if (!adev->mman.buffer_funcs_enabled) {
A
Alex Deucher 已提交
309 310 311 312
		DRM_ERROR("Trying to move memory with ring turned off.\n");
		return -EINVAL;
	}

313 314
	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
315

316
	mutex_lock(&adev->mman.gtt_window_lock);
317 318 319
	while (src_mm.remaining) {
		uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
		uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
320
		struct dma_fence *next;
321 322
		uint32_t cur_size;
		uint64_t from, to;
323

324 325 326
		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
		 * begins at an offset, then adjust the size accordingly
		 */
327
		cur_size = max(src_page_offset, dst_page_offset);
328
		cur_size = min(min3(src_mm.size, dst_mm.size, size),
329
			       (uint64_t)(GTT_MAX_BYTES - cur_size));
330 331

		/* Map src to window 0 and dst to window 1. */
332
		r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
333
					  PFN_UP(cur_size + src_page_offset),
334
					  0, ring, tmz, &from);
335 336
		if (r)
			goto error;
337

338
		r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
339
					  PFN_UP(cur_size + dst_page_offset),
340
					  1, ring, tmz, &to);
341 342
		if (r)
			goto error;
343

344
		r = amdgpu_copy_buffer(ring, from, to, cur_size,
345
				       resv, &next, false, true, tmz);
346 347 348
		if (r)
			goto error;

349
		dma_fence_put(fence);
350 351
		fence = next;

352 353
		amdgpu_res_next(&src_mm, cur_size);
		amdgpu_res_next(&dst_mm, cur_size);
354
	}
355
error:
356
	mutex_unlock(&adev->mman.gtt_window_lock);
357 358 359 360 361 362
	if (f)
		*f = dma_fence_get(fence);
	dma_fence_put(fence);
	return r;
}

363
/*
364 365
 * amdgpu_move_blit - Copy an entire buffer to another buffer
 *
366 367
 * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
 * help move buffers to and from VRAM.
368
 */
369
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
370
			    bool evict,
371 372
			    struct ttm_resource *new_mem,
			    struct ttm_resource *old_mem)
373 374
{
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
375
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
376 377 378 379 380 381 382 383 384 385 386 387 388
	struct amdgpu_copy_mem src, dst;
	struct dma_fence *fence = NULL;
	int r;

	src.bo = bo;
	dst.bo = bo;
	src.mem = old_mem;
	dst.mem = new_mem;
	src.offset = 0;
	dst.offset = 0;

	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
				       new_mem->num_pages << PAGE_SHIFT,
389
				       amdgpu_bo_encrypted(abo),
390
				       bo->base.resv, &fence);
391 392
	if (r)
		goto error;
393

394 395
	/* clear the space being freed */
	if (old_mem->mem_type == TTM_PL_VRAM &&
396
	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
397 398 399 400 401 402 403 404 405 406 407 408
		struct dma_fence *wipe_fence = NULL;

		r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
				       NULL, &wipe_fence);
		if (r) {
			goto error;
		} else if (wipe_fence) {
			dma_fence_put(fence);
			fence = wipe_fence;
		}
	}

409 410
	/* Always block for VM page tables before committing the new location */
	if (bo->type == ttm_bo_type_kernel)
411
		r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem);
412
	else
413
		r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem);
414
	dma_fence_put(fence);
A
Alex Deucher 已提交
415
	return r;
416 417 418

error:
	if (fence)
419 420
		dma_fence_wait(fence, false);
	dma_fence_put(fence);
421
	return r;
A
Alex Deucher 已提交
422 423
}

424
/*
425 426 427 428 429
 * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
 *
 * Called by amdgpu_bo_move()
 */
static bool amdgpu_mem_visible(struct amdgpu_device *adev,
430
			       struct ttm_resource *mem)
431
{
432 433
	uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
	struct amdgpu_res_cursor cursor;
434 435 436 437 438 439 440

	if (mem->mem_type == TTM_PL_SYSTEM ||
	    mem->mem_type == TTM_PL_TT)
		return true;
	if (mem->mem_type != TTM_PL_VRAM)
		return false;

441 442
	amdgpu_res_first(mem, 0, mem_size, &cursor);

443
	/* ttm_resource_ioremap only supports contiguous memory */
444
	if (cursor.size != mem_size)
445 446
		return false;

447
	return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
448 449
}

450
/*
451 452 453 454
 * amdgpu_bo_move - Move a buffer object to a new memory location
 *
 * Called by ttm_bo_handle_move_mem()
 */
455 456
static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
			  struct ttm_operation_ctx *ctx,
457 458
			  struct ttm_resource *new_mem,
			  struct ttm_place *hop)
A
Alex Deucher 已提交
459 460
{
	struct amdgpu_device *adev;
461
	struct amdgpu_bo *abo;
462
	struct ttm_resource *old_mem = bo->resource;
A
Alex Deucher 已提交
463 464
	int r;

465 466
	if (new_mem->mem_type == TTM_PL_TT ||
	    new_mem->mem_type == AMDGPU_PL_PREEMPT) {
467 468 469 470 471
		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
		if (r)
			return r;
	}

472
	/* Can't move a pinned BO */
473
	abo = ttm_to_amdgpu_bo(bo);
474
	if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
475 476
		return -EINVAL;

477
	adev = amdgpu_ttm_adev(bo->bdev);
478

A
Alex Deucher 已提交
479
	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
480
		ttm_bo_move_null(bo, new_mem);
481
		goto out;
A
Alex Deucher 已提交
482
	}
483
	if (old_mem->mem_type == TTM_PL_SYSTEM &&
484 485
	    (new_mem->mem_type == TTM_PL_TT ||
	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
486
		ttm_bo_move_null(bo, new_mem);
487
		goto out;
A
Alex Deucher 已提交
488
	}
489 490
	if ((old_mem->mem_type == TTM_PL_TT ||
	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
491
	    new_mem->mem_type == TTM_PL_SYSTEM) {
492
		r = ttm_bo_wait_ctx(bo, ctx);
493
		if (r)
494
			return r;
495 496

		amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
497
		ttm_resource_free(bo, &bo->resource);
498
		ttm_bo_assign_mem(bo, new_mem);
499
		goto out;
500
	}
501

502 503 504 505 506 507 508
	if (old_mem->mem_type == AMDGPU_PL_GDS ||
	    old_mem->mem_type == AMDGPU_PL_GWS ||
	    old_mem->mem_type == AMDGPU_PL_OA ||
	    new_mem->mem_type == AMDGPU_PL_GDS ||
	    new_mem->mem_type == AMDGPU_PL_GWS ||
	    new_mem->mem_type == AMDGPU_PL_OA) {
		/* Nothing to save here */
509
		ttm_bo_move_null(bo, new_mem);
510
		goto out;
511
	}
512

513 514 515 516 517 518 519 520 521
	if (bo->type == ttm_bo_type_device &&
	    new_mem->mem_type == TTM_PL_VRAM &&
	    old_mem->mem_type != TTM_PL_VRAM) {
		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
		 * accesses the BO after it's moved.
		 */
		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
	}

522 523 524 525 526 527 528 529
	if (adev->mman.buffer_funcs_enabled) {
		if (((old_mem->mem_type == TTM_PL_SYSTEM &&
		      new_mem->mem_type == TTM_PL_VRAM) ||
		     (old_mem->mem_type == TTM_PL_VRAM &&
		      new_mem->mem_type == TTM_PL_SYSTEM))) {
			hop->fpfn = 0;
			hop->lpfn = 0;
			hop->mem_type = TTM_PL_TT;
530
			hop->flags = TTM_PL_FLAG_TEMPORARY;
531 532 533 534 535
			return -EMULTIHOP;
		}

		r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
	} else {
536 537
		r = -ENODEV;
	}
A
Alex Deucher 已提交
538 539

	if (r) {
540 541 542 543
		/* Check that all memory is CPU accessible */
		if (!amdgpu_mem_visible(adev, old_mem) ||
		    !amdgpu_mem_visible(adev, new_mem)) {
			pr_err("Move buffer fallback to memcpy unavailable\n");
544
			return r;
A
Alex Deucher 已提交
545
		}
546 547 548

		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
		if (r)
549
			return r;
A
Alex Deucher 已提交
550 551
	}

552
out:
A
Alex Deucher 已提交
553
	/* update statistics */
554
	atomic64_add(bo->base.size, &adev->num_bytes_moved);
555
	amdgpu_bo_move_notify(bo, evict, new_mem);
A
Alex Deucher 已提交
556 557 558
	return 0;
}

559
/*
560 561 562 563
 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
 *
 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
 */
564 565
static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
				     struct ttm_resource *mem)
A
Alex Deucher 已提交
566
{
567
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
568
	size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
A
Alex Deucher 已提交
569 570 571 572 573 574

	switch (mem->mem_type) {
	case TTM_PL_SYSTEM:
		/* system memory */
		return 0;
	case TTM_PL_TT:
575
	case AMDGPU_PL_PREEMPT:
A
Alex Deucher 已提交
576 577 578 579
		break;
	case TTM_PL_VRAM:
		mem->bus.offset = mem->start << PAGE_SHIFT;
		/* check if it's visible */
580
		if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
A
Alex Deucher 已提交
581
			return -EINVAL;
582

583
		if (adev->mman.aper_base_kaddr &&
584
		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
585 586 587
			mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
					mem->bus.offset;

588
		mem->bus.offset += adev->gmc.aper_base;
A
Alex Deucher 已提交
589 590 591 592 593 594 595 596
		mem->bus.is_iomem = true;
		break;
	default:
		return -EINVAL;
	}
	return 0;
}

597 598 599
static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
					   unsigned long page_offset)
{
600
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
601
	struct amdgpu_res_cursor cursor;
602

603 604
	amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
			 &cursor);
605
	return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
606 607
}

608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
/**
 * amdgpu_ttm_domain_start - Returns GPU start address
 * @adev: amdgpu device object
 * @type: type of the memory
 *
 * Returns:
 * GPU start address of a memory domain
 */

uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
{
	switch (type) {
	case TTM_PL_TT:
		return adev->gmc.gart_start;
	case TTM_PL_VRAM:
		return adev->gmc.vram_start;
	}

	return 0;
}

A
Alex Deucher 已提交
629 630 631 632
/*
 * TTM backend functions.
 */
struct amdgpu_ttm_tt {
633
	struct ttm_tt	ttm;
634
	struct drm_gem_object	*gobj;
635 636
	u64			offset;
	uint64_t		userptr;
637
	struct task_struct	*usertask;
638
	uint32_t		userflags;
639
	bool			bound;
640
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
641
	struct hmm_range	*range;
642
#endif
A
Alex Deucher 已提交
643 644
};

645
#ifdef CONFIG_DRM_AMDGPU_USERPTR
646
/*
647 648
 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
 * memory and start HMM tracking CPU page table update
649
 *
650 651
 * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
 * once afterwards to stop HMM tracking
652
 */
653
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
A
Alex Deucher 已提交
654
{
655
	struct ttm_tt *ttm = bo->tbo.ttm;
A
Alex Deucher 已提交
656
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
657
	unsigned long start = gtt->userptr;
658
	struct vm_area_struct *vma;
659
	struct mm_struct *mm;
660
	bool readonly;
661
	int r = 0;
A
Alex Deucher 已提交
662

663 664 665
	mm = bo->notifier.mm;
	if (unlikely(!mm)) {
		DRM_DEBUG_DRIVER("BO is not registered?\n");
666
		return -EFAULT;
667
	}
668

669 670 671 672
	/* Another get_user_pages is running at the same time?? */
	if (WARN_ON(gtt->range))
		return -EFAULT;

673
	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
674 675
		return -ESRCH;

676
	mmap_read_lock(mm);
677 678
	vma = vma_lookup(mm, start);
	if (unlikely(!vma)) {
679
		r = -EFAULT;
680
		goto out_unlock;
681
	}
682
	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
683
		vma->vm_file)) {
684
		r = -EPERM;
685
		goto out_unlock;
686
	}
687

688 689 690
	readonly = amdgpu_ttm_tt_is_readonly(ttm);
	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
				       ttm->num_pages, &gtt->range, readonly,
691
				       true, NULL);
692
out_unlock:
693
	mmap_read_unlock(mm);
694 695 696
	if (r)
		pr_debug("failed %d to get user pages 0x%lx\n", r, start);

697
	mmput(mm);
698

699 700 701
	return r;
}

702
/*
703 704
 * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
 * Check if the pages backing this ttm range have been invalidated
705
 *
706
 * Returns: true if pages are still valid
707
 */
708
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
709
{
710
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
711
	bool r = false;
712

713 714
	if (!gtt || !gtt->userptr)
		return false;
715

716
	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
717
		gtt->userptr, ttm->num_pages);
718

719
	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
720 721
		"No user pages to check\n");

722
	if (gtt->range) {
723 724 725 726
		/*
		 * FIXME: Must always hold notifier_lock for this, and must
		 * not ignore the return code.
		 */
727
		r = amdgpu_hmm_range_get_pages_done(gtt->range);
728
		gtt->range = NULL;
729
	}
730

731
	return !r;
732
}
733
#endif
734

735
/*
736
 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
737
 *
738
 * Called by amdgpu_cs_list_validate(). This creates the page list
739 740
 * that backs user memory and will ultimately be mapped into the device
 * address space.
741
 */
742
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
743
{
744
	unsigned long i;
745

746
	for (i = 0; i < ttm->num_pages; ++i)
747
		ttm->pages[i] = pages ? pages[i] : NULL;
748 749
}

750
/*
751
 * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
752 753 754
 *
 * Called by amdgpu_ttm_backend_bind()
 **/
755
static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
D
Dave Airlie 已提交
756
				     struct ttm_tt *ttm)
757
{
D
Dave Airlie 已提交
758
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
759 760 761 762
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
763
	int r;
764

765
	/* Allocate an SG array and squash pages into it */
A
Alex Deucher 已提交
766
	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
X
xinhui pan 已提交
767
				      (u64)ttm->num_pages << PAGE_SHIFT,
A
Alex Deucher 已提交
768 769 770 771
				      GFP_KERNEL);
	if (r)
		goto release_sg;

772
	/* Map SG to device */
773 774
	r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
	if (r)
A
Alex Deucher 已提交
775 776
		goto release_sg;

777
	/* convert SG to linear array of pages and dma addresses */
778 779
	drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
				       ttm->num_pages);
A
Alex Deucher 已提交
780 781 782 783 784

	return 0;

release_sg:
	kfree(ttm->sg);
785
	ttm->sg = NULL;
A
Alex Deucher 已提交
786 787 788
	return r;
}

789
/*
790 791
 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
 */
792
static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
D
Dave Airlie 已提交
793
					struct ttm_tt *ttm)
A
Alex Deucher 已提交
794
{
D
Dave Airlie 已提交
795
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
796 797 798 799 800 801
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;

	/* double check that we don't free the table twice */
802
	if (!ttm->sg || !ttm->sg->sgl)
A
Alex Deucher 已提交
803 804
		return;

805
	/* unmap the pages mapped to the device */
806
	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
807
	sg_free_table(ttm->sg);
808

809
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
810 811 812 813 814
	if (gtt->range) {
		unsigned long i;

		for (i = 0; i < ttm->num_pages; i++) {
			if (ttm->pages[i] !=
815
			    hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
816 817 818 819 820
				break;
		}

		WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
	}
821
#endif
A
Alex Deucher 已提交
822 823
}

824
static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
825 826 827 828 829 830 831 832
				struct ttm_buffer_object *tbo,
				uint64_t flags)
{
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
	struct ttm_tt *ttm = tbo->ttm;
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int r;

833 834 835
	if (amdgpu_bo_encrypted(abo))
		flags |= AMDGPU_PTE_TMZ;

836
	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
837 838 839
		uint64_t page_idx = 1;

		r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
840
				gtt->ttm.dma_address, flags);
841 842 843
		if (r)
			goto gart_bind_fail;

844 845 846 847
		/* The memory type of the first page defaults to UC. Now
		 * modify the memory type to NC from the second page of
		 * the BO onward.
		 */
848 849
		flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
		flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
850 851 852 853 854 855 856

		r = amdgpu_gart_bind(adev,
				gtt->offset + (page_idx << PAGE_SHIFT),
				ttm->num_pages - page_idx,
				&(gtt->ttm.dma_address[page_idx]), flags);
	} else {
		r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
857
				     gtt->ttm.dma_address, flags);
858 859 860 861
	}

gart_bind_fail:
	if (r)
862
		DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
863 864 865 866 867
			  ttm->num_pages, gtt->offset);

	return r;
}

868
/*
869 870 871 872 873
 * amdgpu_ttm_backend_bind - Bind GTT memory
 *
 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
 * This handles binding GTT memory to the device address space.
 */
874
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
D
Dave Airlie 已提交
875
				   struct ttm_tt *ttm,
876
				   struct ttm_resource *bo_mem)
A
Alex Deucher 已提交
877
{
D
Dave Airlie 已提交
878
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
879
	struct amdgpu_ttm_tt *gtt = (void*)ttm;
880
	uint64_t flags;
881
	int r = 0;
A
Alex Deucher 已提交
882

883 884 885 886 887 888
	if (!bo_mem)
		return -EINVAL;

	if (gtt->bound)
		return 0;

889
	if (gtt->userptr) {
D
Dave Airlie 已提交
890
		r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
891 892 893 894
		if (r) {
			DRM_ERROR("failed to pin userptr\n");
			return r;
		}
M
Matthew Auld 已提交
895
	} else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
896 897 898 899 900 901 902 903 904 905 906 907 908 909
		if (!ttm->sg) {
			struct dma_buf_attachment *attach;
			struct sg_table *sgt;

			attach = gtt->gobj->import_attach;
			sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
			if (IS_ERR(sgt))
				return PTR_ERR(sgt);

			ttm->sg = sgt;
		}

		drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
					       ttm->num_pages);
910
	}
911

A
Alex Deucher 已提交
912
	if (!ttm->num_pages) {
913
		WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
A
Alex Deucher 已提交
914 915 916
		     ttm->num_pages, bo_mem, ttm);
	}

917 918
	if (bo_mem->mem_type != TTM_PL_TT ||
	    !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
919
		gtt->offset = AMDGPU_BO_INVALID_OFFSET;
920
		return 0;
921
	}
922

923
	/* compute PTE flags relevant to this BO memory */
C
Christian König 已提交
924
	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
925 926

	/* bind pages into GART page tables */
927
	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
C
Christian König 已提交
928
	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
929
		gtt->ttm.dma_address, flags);
930

931
	if (r)
932
		DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
933
			  ttm->num_pages, gtt->offset);
934
	gtt->bound = true;
935
	return r;
936 937
}

938
/*
939 940 941 942 943 944
 * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
 * through AGP or GART aperture.
 *
 * If bo is accessible through AGP aperture, then use AGP aperture
 * to access bo; otherwise allocate logical space in GART aperture
 * and map bo to GART aperture.
945
 */
946
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
947
{
948
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
949
	struct ttm_operation_ctx ctx = { false, false };
950
	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
951 952
	struct ttm_placement placement;
	struct ttm_place placements;
953
	struct ttm_resource *tmp;
954
	uint64_t addr, flags;
955 956
	int r;

957
	if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
958 959
		return 0;

960 961
	addr = amdgpu_gmc_agp_addr(bo);
	if (addr != AMDGPU_BO_INVALID_OFFSET) {
962
		bo->resource->start = addr >> PAGE_SHIFT;
963 964
		return 0;
	}
965

966 967 968 969 970 971 972 973 974
	/* allocate GART space */
	placement.num_placement = 1;
	placement.placement = &placements;
	placement.num_busy_placement = 1;
	placement.busy_placement = &placements;
	placements.fpfn = 0;
	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
	placements.mem_type = TTM_PL_TT;
	placements.flags = bo->resource->placement;
975

976 977 978
	r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
	if (unlikely(r))
		return r;
979

980 981
	/* compute PTE flags for this buffer object */
	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
982

983 984 985 986 987 988
	/* Bind pages */
	gtt->offset = (u64)tmp->start << PAGE_SHIFT;
	r = amdgpu_ttm_gart_bind(adev, bo, flags);
	if (unlikely(r)) {
		ttm_resource_free(bo, &tmp);
		return r;
989
	}
990

991
	amdgpu_gart_invalidate_tlb(adev);
992 993 994
	ttm_resource_free(bo, &bo->resource);
	ttm_bo_assign_mem(bo, tmp);

995
	return 0;
A
Alex Deucher 已提交
996 997
}

998
/*
999 1000 1001 1002 1003
 * amdgpu_ttm_recover_gart - Rebind GTT pages
 *
 * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
 * rebind GTT pages during a GPU reset.
 */
1004
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
1005
{
1006
	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
1007
	uint64_t flags;
1008 1009
	int r;

1010
	if (!tbo->ttm)
1011 1012
		return 0;

1013
	flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
1014 1015
	r = amdgpu_ttm_gart_bind(adev, tbo, flags);

1016
	return r;
1017 1018
}

1019
/*
1020 1021 1022 1023 1024
 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
 *
 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
 * ttm_tt_destroy().
 */
1025
static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
D
Dave Airlie 已提交
1026
				      struct ttm_tt *ttm)
A
Alex Deucher 已提交
1027
{
D
Dave Airlie 已提交
1028
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
1029
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1030
	int r;
A
Alex Deucher 已提交
1031

1032
	/* if the pages have userptr pinning then clear that first */
1033
	if (gtt->userptr) {
D
Dave Airlie 已提交
1034
		amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
1035 1036 1037 1038 1039 1040 1041
	} else if (ttm->sg && gtt->gobj->import_attach) {
		struct dma_buf_attachment *attach;

		attach = gtt->gobj->import_attach;
		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
		ttm->sg = NULL;
	}
1042

1043 1044 1045
	if (!gtt->bound)
		return;

1046
	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1047
		return;
1048

A
Alex Deucher 已提交
1049
	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
C
Christian König 已提交
1050
	r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
1051
	if (r)
1052
		DRM_ERROR("failed to unbind %u pages at 0x%08llX\n",
1053
			  gtt->ttm.num_pages, gtt->offset);
1054
	gtt->bound = false;
A
Alex Deucher 已提交
1055 1056
}

1057
static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
D
Dave Airlie 已提交
1058
				       struct ttm_tt *ttm)
A
Alex Deucher 已提交
1059 1060 1061
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

1062 1063 1064
	if (gtt->usertask)
		put_task_struct(gtt->usertask);

1065
	ttm_tt_fini(&gtt->ttm);
A
Alex Deucher 已提交
1066 1067 1068
	kfree(gtt);
}

1069 1070 1071 1072
/**
 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
 *
 * @bo: The buffer object to create a GTT ttm_tt object around
1073
 * @page_flags: Page flags to be added to the ttm_tt object
1074 1075 1076
 *
 * Called by ttm_tt_create().
 */
1077 1078
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
					   uint32_t page_flags)
A
Alex Deucher 已提交
1079
{
1080
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
A
Alex Deucher 已提交
1081
	struct amdgpu_ttm_tt *gtt;
1082
	enum ttm_caching caching;
A
Alex Deucher 已提交
1083 1084 1085 1086 1087

	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
	if (gtt == NULL) {
		return NULL;
	}
1088
	gtt->gobj = &bo->base;
1089

1090 1091 1092 1093 1094
	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
		caching = ttm_write_combined;
	else
		caching = ttm_cached;

1095
	/* allocate space for the uninitialized page entries */
1096
	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
A
Alex Deucher 已提交
1097 1098 1099
		kfree(gtt);
		return NULL;
	}
1100
	return &gtt->ttm;
A
Alex Deucher 已提交
1101 1102
}

1103
/*
1104 1105 1106 1107 1108
 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
 *
 * Map the pages of a ttm_tt object to an address space visible
 * to the underlying device.
 */
1109
static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
D
Dave Airlie 已提交
1110 1111
				  struct ttm_tt *ttm,
				  struct ttm_operation_ctx *ctx)
A
Alex Deucher 已提交
1112
{
D
Dave Airlie 已提交
1113
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
1114
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1115 1116
	pgoff_t i;
	int ret;
A
Alex Deucher 已提交
1117

1118
	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
1119
	if (gtt->userptr) {
1120
		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
A
Alex Deucher 已提交
1121 1122 1123 1124 1125
		if (!ttm->sg)
			return -ENOMEM;
		return 0;
	}

M
Matthew Auld 已提交
1126
	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
1127
		return 0;
A
Alex Deucher 已提交
1128

1129 1130 1131 1132 1133 1134 1135 1136
	ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
	if (ret)
		return ret;

	for (i = 0; i < ttm->num_pages; ++i)
		ttm->pages[i]->mapping = bdev->dev_mapping;

	return 0;
A
Alex Deucher 已提交
1137 1138
}

1139
/*
1140 1141 1142 1143 1144
 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
 *
 * Unmaps pages of a ttm_tt object from the device address space and
 * unpopulates the page array backing it.
 */
1145
static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
1146
				     struct ttm_tt *ttm)
A
Alex Deucher 已提交
1147 1148
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1149
	struct amdgpu_device *adev;
1150
	pgoff_t i;
A
Alex Deucher 已提交
1151

1152 1153
	amdgpu_ttm_backend_unbind(bdev, ttm);

1154
	if (gtt->userptr) {
1155
		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
A
Alex Deucher 已提交
1156
		kfree(ttm->sg);
X
xinhui pan 已提交
1157
		ttm->sg = NULL;
1158 1159 1160
		return;
	}

M
Matthew Auld 已提交
1161
	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
A
Alex Deucher 已提交
1162 1163
		return;

1164 1165 1166
	for (i = 0; i < ttm->num_pages; ++i)
		ttm->pages[i]->mapping = NULL;

D
Dave Airlie 已提交
1167
	adev = amdgpu_ttm_adev(bdev);
1168
	return ttm_pool_free(&adev->mman.bdev.pool, ttm);
A
Alex Deucher 已提交
1169 1170
}

1171
/**
1172 1173
 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
 * task
1174
 *
1175
 * @bo: The ttm_buffer_object to bind this userptr to
1176 1177 1178 1179 1180 1181
 * @addr:  The address in the current tasks VM space to use
 * @flags: Requirements of userptr object.
 *
 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
 * to current task
 */
1182 1183
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
			      uint64_t addr, uint32_t flags)
A
Alex Deucher 已提交
1184
{
1185
	struct amdgpu_ttm_tt *gtt;
A
Alex Deucher 已提交
1186

1187 1188 1189 1190 1191 1192
	if (!bo->ttm) {
		/* TODO: We want a separate TTM object type for userptrs */
		bo->ttm = amdgpu_ttm_tt_create(bo, 0);
		if (bo->ttm == NULL)
			return -ENOMEM;
	}
A
Alex Deucher 已提交
1193

M
Matthew Auld 已提交
1194 1195
	/* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
	bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
1196

1197
	gtt = (void *)bo->ttm;
A
Alex Deucher 已提交
1198 1199
	gtt->userptr = addr;
	gtt->userflags = flags;
1200 1201 1202 1203 1204 1205

	if (gtt->usertask)
		put_task_struct(gtt->usertask);
	gtt->usertask = current->group_leader;
	get_task_struct(gtt->usertask);

A
Alex Deucher 已提交
1206 1207 1208
	return 0;
}

1209
/*
1210 1211
 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
 */
1212
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
A
Alex Deucher 已提交
1213 1214 1215 1216
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
1217
		return NULL;
A
Alex Deucher 已提交
1218

1219 1220 1221 1222
	if (gtt->usertask == NULL)
		return NULL;

	return gtt->usertask->mm;
A
Alex Deucher 已提交
1223 1224
}

1225
/*
1226 1227
 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
 * address range for the current task.
1228 1229
 *
 */
1230
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1231
				  unsigned long end, unsigned long *userptr)
1232 1233 1234 1235
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	unsigned long size;

1236
	if (gtt == NULL || !gtt->userptr)
1237 1238
		return false;

1239 1240 1241
	/* Return false if no part of the ttm_tt object lies within
	 * the range
	 */
1242
	size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
1243 1244 1245
	if (gtt->userptr > end || gtt->userptr + size <= start)
		return false;

1246 1247
	if (userptr)
		*userptr = gtt->userptr;
1248 1249 1250
	return true;
}

1251
/*
1252
 * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1253
 */
1254
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1255 1256 1257 1258 1259 1260
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL || !gtt->userptr)
		return false;

1261
	return true;
1262 1263
}

1264
/*
1265 1266
 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
 */
A
Alex Deucher 已提交
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
		return false;

	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
}

1277
/**
1278
 * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1279 1280 1281
 *
 * @ttm: The ttm_tt object to compute the flags for
 * @mem: The memory registry backing this ttm_tt object
1282 1283
 *
 * Figure out the flags to use for a VM PDE (Page Directory Entry).
1284
 */
1285
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
A
Alex Deucher 已提交
1286
{
1287
	uint64_t flags = 0;
A
Alex Deucher 已提交
1288 1289 1290 1291

	if (mem && mem->mem_type != TTM_PL_SYSTEM)
		flags |= AMDGPU_PTE_VALID;

1292 1293
	if (mem && (mem->mem_type == TTM_PL_TT ||
		    mem->mem_type == AMDGPU_PL_PREEMPT)) {
A
Alex Deucher 已提交
1294 1295
		flags |= AMDGPU_PTE_SYSTEM;

1296
		if (ttm->caching == ttm_cached)
1297 1298
			flags |= AMDGPU_PTE_SNOOPED;
	}
A
Alex Deucher 已提交
1299

1300 1301 1302 1303
	if (mem && mem->mem_type == TTM_PL_VRAM &&
			mem->bus.caching == ttm_cached)
		flags |= AMDGPU_PTE_SNOOPED;

1304 1305 1306 1307 1308 1309
	return flags;
}

/**
 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
 *
1310
 * @adev: amdgpu_device pointer
1311 1312
 * @ttm: The ttm_tt object to compute the flags for
 * @mem: The memory registry backing this ttm_tt object
1313
 *
1314 1315 1316
 * Figure out the flags to use for a VM PTE (Page Table Entry).
 */
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1317
				 struct ttm_resource *mem)
1318 1319 1320
{
	uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);

1321
	flags |= adev->gart.gart_pte_flags;
A
Alex Deucher 已提交
1322 1323 1324 1325 1326 1327 1328 1329
	flags |= AMDGPU_PTE_READABLE;

	if (!amdgpu_ttm_tt_is_readonly(ttm))
		flags |= AMDGPU_PTE_WRITEABLE;

	return flags;
}

1330
/*
1331 1332
 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
 * object.
1333
 *
1334 1335 1336
 * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
 * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
 * it can find space for a new object and by ttm_bo_force_list_clean() which is
1337 1338
 * used to clean out a memory space.
 */
1339 1340 1341
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
					    const struct ttm_place *place)
{
1342
	unsigned long num_pages = bo->resource->num_pages;
1343
	struct dma_resv_iter resv_cursor;
1344
	struct amdgpu_res_cursor cursor;
1345 1346
	struct dma_fence *f;

1347 1348 1349 1350
	/* Swapout? */
	if (bo->resource->mem_type == TTM_PL_SYSTEM)
		return true;

1351
	if (bo->type == ttm_bo_type_kernel &&
1352
	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
1353 1354
		return false;

1355 1356 1357 1358
	/* If bo is a KFD BO, check if the bo belongs to the current process.
	 * If true, then return false as any KFD process needs all its BOs to
	 * be resident to run successfully
	 */
1359 1360 1361
	dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
		if (amdkfd_fence_check_mm(f, current->mm))
			return false;
1362
	}
1363

1364
	switch (bo->resource->mem_type) {
1365 1366 1367 1368 1369 1370 1371 1372 1373
	case AMDGPU_PL_PREEMPT:
		/* Preemptible BOs don't own system resources managed by the
		 * driver (pages, VRAM, GART space). They point to resources
		 * owned by someone else (e.g. pageable memory in user mode
		 * or a DMABuf). They are used in a preemptible context so we
		 * can guarantee no deadlocks and good QoS in case of MMU
		 * notifiers or DMABuf move notifiers from the resource owner.
		 */
		return false;
1374
	case TTM_PL_TT:
1375 1376 1377
		if (amdgpu_bo_is_amdgpu_bo(bo) &&
		    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
			return false;
1378
		return true;
1379

1380
	case TTM_PL_VRAM:
1381
		/* Check each drm MM node individually */
1382
		amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
1383 1384 1385 1386 1387
				 &cursor);
		while (cursor.remaining) {
			if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
			    && !(place->lpfn &&
				 place->lpfn <= PFN_DOWN(cursor.start)))
1388 1389
				return true;

1390
			amdgpu_res_next(&cursor, cursor.size);
1391
		}
1392
		return false;
1393

1394 1395
	default:
		break;
1396 1397 1398 1399 1400
	}

	return ttm_bo_eviction_valuable(bo, place);
}

1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435
static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
				      void *buf, size_t size, bool write)
{
	while (size) {
		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
		uint64_t bytes = 4 - (pos & 0x3);
		uint32_t shift = (pos & 0x3) * 8;
		uint32_t mask = 0xffffffff << shift;
		uint32_t value = 0;

		if (size < bytes) {
			mask &= 0xffffffff >> (bytes - size) * 8;
			bytes = size;
		}

		if (mask != 0xffffffff) {
			amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
			if (write) {
				value &= ~mask;
				value |= (*(uint32_t *)buf << shift) & mask;
				amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
			} else {
				value = (value & mask) >> shift;
				memcpy(buf, &value, bytes);
			}
		} else {
			amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
		}

		pos += bytes;
		buf += bytes;
		size -= bytes;
	}
}

1436
/**
1437
 * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1438 1439 1440 1441 1442 1443 1444 1445 1446 1447
 *
 * @bo:  The buffer object to read/write
 * @offset:  Offset into buffer object
 * @buf:  Secondary buffer to write/read from
 * @len: Length in bytes of access
 * @write:  true if writing
 *
 * This is used to access VRAM that backs a buffer object via MMIO
 * access for debugging purposes.
 */
1448
static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1449 1450
				    unsigned long offset, void *buf, int len,
				    int write)
1451
{
1452
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1453
	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1454
	struct amdgpu_res_cursor cursor;
1455 1456
	int ret = 0;

1457
	if (bo->resource->mem_type != TTM_PL_VRAM)
1458 1459
		return -EIO;

1460
	amdgpu_res_first(bo->resource, offset, len, &cursor);
1461
	while (cursor.remaining) {
1462 1463 1464 1465 1466 1467 1468 1469 1470 1471
		size_t count, size = cursor.size;
		loff_t pos = cursor.start;

		count = amdgpu_device_aper_access(adev, pos, buf, size, write);
		size -= count;
		if (size) {
			/* using MM to access rest vram and handle un-aligned address */
			pos += count;
			buf += count;
			amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
1472 1473
		}

1474 1475 1476
		ret += cursor.size;
		buf += cursor.size;
		amdgpu_res_next(&cursor, cursor.size);
1477 1478 1479 1480 1481
	}

	return ret;
}

1482 1483 1484 1485 1486 1487
static void
amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
{
	amdgpu_bo_move_notify(bo, false, NULL);
}

1488
static struct ttm_device_funcs amdgpu_bo_driver = {
A
Alex Deucher 已提交
1489 1490 1491
	.ttm_tt_create = &amdgpu_ttm_tt_create,
	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1492
	.ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
1493
	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
A
Alex Deucher 已提交
1494 1495
	.evict_flags = &amdgpu_evict_flags,
	.move = &amdgpu_bo_move,
1496
	.delete_mem_notify = &amdgpu_bo_delete_mem_notify,
1497
	.release_notify = &amdgpu_bo_release_notify,
A
Alex Deucher 已提交
1498
	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1499
	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1500
	.access_memory = &amdgpu_ttm_access_memory,
A
Alex Deucher 已提交
1501 1502
};

1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514
/*
 * Firmware Reservation functions
 */
/**
 * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
 *
 * @adev: amdgpu_device pointer
 *
 * free fw reserved vram if it has been reserved.
 */
static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
{
1515 1516
	amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
		NULL, &adev->mman.fw_vram_usage_va);
1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527
}

/**
 * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
 *
 * @adev: amdgpu_device pointer
 *
 * create bo vram reservation from fw.
 */
static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
{
1528 1529
	uint64_t vram_size = adev->gmc.visible_vram_size;

1530 1531
	adev->mman.fw_vram_usage_va = NULL;
	adev->mman.fw_vram_usage_reserved_bo = NULL;
1532

1533 1534
	if (adev->mman.fw_vram_usage_size == 0 ||
	    adev->mman.fw_vram_usage_size > vram_size)
1535
		return 0;
1536

1537
	return amdgpu_bo_create_kernel_at(adev,
1538 1539
					  adev->mman.fw_vram_usage_start_offset,
					  adev->mman.fw_vram_usage_size,
1540
					  AMDGPU_GEM_DOMAIN_VRAM,
1541 1542
					  &adev->mman.fw_vram_usage_reserved_bo,
					  &adev->mman.fw_vram_usage_va);
1543
}
1544

1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566
/*
 * Memoy training reservation functions
 */

/**
 * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
 *
 * @adev: amdgpu_device pointer
 *
 * free memory training reserved vram if it has been reserved.
 */
static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
{
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;

	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
	amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
	ctx->c2p_bo = NULL;

	return 0;
}

1567
static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
1568
{
1569
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1570

1571
	memset(ctx, 0, sizeof(*ctx));
1572

1573
	ctx->c2p_train_data_offset =
1574
		ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
1575 1576 1577 1578
	ctx->p2c_train_data_offset =
		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
	ctx->train_data_size =
		GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1579

1580 1581 1582 1583
	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
			ctx->train_data_size,
			ctx->p2c_train_data_offset,
			ctx->c2p_train_data_offset);
1584 1585
}

1586 1587 1588
/*
 * reserve TMR memory at the top of VRAM which holds
 * IP Discovery data and is protected by PSP.
1589
 */
1590
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
1591 1592 1593
{
	int ret;
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1594
	bool mem_train_support = false;
1595

1596
	if (!amdgpu_sriov_vf(adev)) {
1597
		if (amdgpu_atomfirmware_mem_training_supported(adev))
1598
			mem_train_support = true;
1599
		else
1600
			DRM_DEBUG("memory training does not support!\n");
1601 1602
	}

1603 1604 1605 1606 1607 1608 1609
	/*
	 * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
	 * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
	 *
	 * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
	 * discovery data and G6 memory training data respectively
	 */
1610
	adev->mman.discovery_tmr_size =
1611
		amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
1612 1613
	if (!adev->mman.discovery_tmr_size)
		adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
1614 1615 1616 1617 1618

	if (mem_train_support) {
		/* reserve vram for mem train according to TMR location */
		amdgpu_ttm_training_data_block_init(adev);
		ret = amdgpu_bo_create_kernel_at(adev,
1619 1620 1621 1622 1623
					 ctx->c2p_train_data_offset,
					 ctx->train_data_size,
					 AMDGPU_GEM_DOMAIN_VRAM,
					 &ctx->c2p_bo,
					 NULL);
1624 1625 1626 1627
		if (ret) {
			DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
			amdgpu_ttm_training_reserve_vram_fini(adev);
			return ret;
1628
		}
1629
		ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1630
	}
1631 1632

	ret = amdgpu_bo_create_kernel_at(adev,
1633 1634
				adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
				adev->mman.discovery_tmr_size,
1635
				AMDGPU_GEM_DOMAIN_VRAM,
1636
				&adev->mman.discovery_memory,
1637
				NULL);
1638
	if (ret) {
1639
		DRM_ERROR("alloc tmr failed(%d)!\n", ret);
1640
		amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1641
		return ret;
1642 1643 1644 1645 1646
	}

	return 0;
}

1647
/*
1648 1649
 * amdgpu_ttm_init - Init the memory management (ttm) as well as various
 * gtt/vram related fields.
1650 1651 1652 1653 1654 1655
 *
 * This initializes all of the memory space pools that the TTM layer
 * will need such as the GTT space (system memory mapped to the device),
 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
 * can be mapped per VMID.
 */
A
Alex Deucher 已提交
1656 1657
int amdgpu_ttm_init(struct amdgpu_device *adev)
{
1658
	uint64_t gtt_size;
A
Alex Deucher 已提交
1659
	int r;
1660
	u64 vis_vram_limit;
A
Alex Deucher 已提交
1661

1662 1663
	mutex_init(&adev->mman.gtt_window_lock);

A
Alex Deucher 已提交
1664
	/* No others user of address space so set it to 0 */
1665
	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
1666 1667
			       adev_to_drm(adev)->anon_inode->i_mapping,
			       adev_to_drm(adev)->vma_offset_manager,
1668
			       adev->need_swiotlb,
1669
			       dma_addressing_limited(adev->dev));
A
Alex Deucher 已提交
1670 1671 1672 1673 1674
	if (r) {
		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
		return r;
	}
	adev->mman.initialized = true;
1675

1676
	/* Initialize VRAM pool with all of VRAM divided into pages */
1677
	r = amdgpu_vram_mgr_init(adev);
A
Alex Deucher 已提交
1678 1679 1680 1681
	if (r) {
		DRM_ERROR("Failed initializing VRAM heap.\n");
		return r;
	}
1682 1683 1684 1685

	/* Reduce size of CPU-visible VRAM if requested */
	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
	if (amdgpu_vis_vram_limit > 0 &&
1686 1687
	    vis_vram_limit <= adev->gmc.visible_vram_size)
		adev->gmc.visible_vram_size = vis_vram_limit;
1688

A
Alex Deucher 已提交
1689
	/* Change the size here instead of the init above so only lpfn is affected */
1690
	amdgpu_ttm_set_buffer_funcs_status(adev, false);
1691
#ifdef CONFIG_64BIT
1692
#ifdef CONFIG_X86
1693 1694 1695 1696 1697
	if (adev->gmc.xgmi.connected_to_cpu)
		adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
				adev->gmc.visible_vram_size);

	else
1698
#endif
1699 1700
		adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
				adev->gmc.visible_vram_size);
1701
#endif
A
Alex Deucher 已提交
1702

1703 1704 1705 1706
	/*
	 *The reserved vram for firmware must be pinned to the specified
	 *place on the VRAM, so reserve it early.
	 */
1707
	r = amdgpu_ttm_fw_reserve_vram_init(adev);
1708 1709 1710 1711
	if (r) {
		return r;
	}

1712
	/*
1713 1714 1715
	 * only NAVI10 and onwards ASIC support for IP discovery.
	 * If IP discovery enabled, a block of memory should be
	 * reserved for IP discovey.
1716
	 */
1717
	if (adev->mman.discovery_bin) {
1718
		r = amdgpu_ttm_reserve_tmr(adev);
1719 1720 1721
		if (r)
			return r;
	}
1722

1723 1724 1725 1726
	/* allocate memory as required for VGA
	 * This is used for VGA emulation and pre-OS scanout buffers to
	 * avoid display artifacts while transitioning between pre-OS
	 * and driver.  */
1727
	r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
1728
				       AMDGPU_GEM_DOMAIN_VRAM,
1729
				       &adev->mman.stolen_vga_memory,
1730
				       NULL);
C
Christian König 已提交
1731 1732
	if (r)
		return r;
1733 1734
	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
				       adev->mman.stolen_extended_size,
1735
				       AMDGPU_GEM_DOMAIN_VRAM,
1736
				       &adev->mman.stolen_extended_memory,
1737
				       NULL);
C
Christian König 已提交
1738 1739
	if (r)
		return r;
1740 1741 1742 1743 1744 1745 1746
	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
				       adev->mman.stolen_reserved_size,
				       AMDGPU_GEM_DOMAIN_VRAM,
				       &adev->mman.stolen_reserved_memory,
				       NULL);
	if (r)
		return r;
1747

A
Alex Deucher 已提交
1748
	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1749
		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1750

1751 1752
	/* Compute GTT size, either bsaed on 3/4th the size of RAM size
	 * or whatever the user passed on module init */
1753 1754 1755 1756
	if (amdgpu_gtt_size == -1) {
		struct sysinfo si;

		si_meminfo(&si);
1757
		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1758
			       adev->gmc.mc_vram_size),
1759 1760 1761
			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
	}
	else
1762
		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1763 1764

	/* Initialize GTT memory pool */
1765
	r = amdgpu_gtt_mgr_init(adev, gtt_size);
A
Alex Deucher 已提交
1766 1767 1768 1769 1770
	if (r) {
		DRM_ERROR("Failed initializing GTT heap.\n");
		return r;
	}
	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1771
		 (unsigned)(gtt_size / (1024 * 1024)));
A
Alex Deucher 已提交
1772

1773 1774 1775 1776 1777 1778 1779
	/* Initialize preemptible memory pool */
	r = amdgpu_preempt_mgr_init(adev);
	if (r) {
		DRM_ERROR("Failed initializing PREEMPT heap.\n");
		return r;
	}

1780
	/* Initialize various on-chip memory pools */
1781
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
1782 1783 1784
	if (r) {
		DRM_ERROR("Failed initializing GDS heap.\n");
		return r;
A
Alex Deucher 已提交
1785 1786
	}

1787
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
1788 1789 1790
	if (r) {
		DRM_ERROR("Failed initializing gws heap.\n");
		return r;
A
Alex Deucher 已提交
1791 1792
	}

1793
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
1794 1795 1796
	if (r) {
		DRM_ERROR("Failed initializing oa heap.\n");
		return r;
A
Alex Deucher 已提交
1797 1798 1799 1800 1801
	}

	return 0;
}

1802
/*
1803 1804
 * amdgpu_ttm_fini - De-initialize the TTM memory pools
 */
A
Alex Deucher 已提交
1805 1806
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
1807
	int idx;
A
Alex Deucher 已提交
1808 1809
	if (!adev->mman.initialized)
		return;
1810

1811
	amdgpu_ttm_training_reserve_vram_fini(adev);
1812
	/* return the stolen vga memory back to VRAM */
1813 1814
	amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
	amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
1815
	/* return the IP Discovery TMR memory back to VRAM */
1816
	amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1817 1818 1819
	if (adev->mman.stolen_reserved_size)
		amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
				      NULL, NULL);
1820
	amdgpu_ttm_fw_reserve_vram_fini(adev);
1821

1822 1823 1824 1825 1826 1827 1828 1829 1830
	if (drm_dev_enter(adev_to_drm(adev), &idx)) {

		if (adev->mman.aper_base_kaddr)
			iounmap(adev->mman.aper_base_kaddr);
		adev->mman.aper_base_kaddr = NULL;

		drm_dev_exit(idx);
	}

1831 1832
	amdgpu_vram_mgr_fini(adev);
	amdgpu_gtt_mgr_fini(adev);
1833
	amdgpu_preempt_mgr_fini(adev);
1834 1835 1836
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
1837
	ttm_device_fini(&adev->mman.bdev);
A
Alex Deucher 已提交
1838 1839 1840 1841
	adev->mman.initialized = false;
	DRM_INFO("amdgpu: ttm finalized\n");
}

1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
/**
 * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
 *
 * @adev: amdgpu_device pointer
 * @enable: true when we can use buffer functions.
 *
 * Enable/disable use of buffer functions during suspend/resume. This should
 * only be called at bootup or when userspace isn't running.
 */
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
A
Alex Deucher 已提交
1852
{
1853
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
1854
	uint64_t size;
1855
	int r;
A
Alex Deucher 已提交
1856

1857
	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
1858
	    adev->mman.buffer_funcs_enabled == enable)
A
Alex Deucher 已提交
1859 1860
		return;

1861 1862
	if (enable) {
		struct amdgpu_ring *ring;
N
Nirmoy Das 已提交
1863
		struct drm_gpu_scheduler *sched;
1864 1865

		ring = adev->mman.buffer_funcs_ring;
N
Nirmoy Das 已提交
1866 1867
		sched = &ring->sched;
		r = drm_sched_entity_init(&adev->mman.entity,
1868
					  DRM_SCHED_PRIORITY_KERNEL, &sched,
N
Nirmoy Das 已提交
1869
					  1, NULL);
1870 1871 1872 1873 1874 1875
		if (r) {
			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
				  r);
			return;
		}
	} else {
1876
		drm_sched_entity_destroy(&adev->mman.entity);
1877 1878
		dma_fence_put(man->move);
		man->move = NULL;
1879 1880
	}

A
Alex Deucher 已提交
1881
	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
1882 1883 1884 1885
	if (enable)
		size = adev->gmc.real_vram_size;
	else
		size = adev->gmc.visible_vram_size;
1886
	man->size = size;
1887
	adev->mman.buffer_funcs_enabled = enable;
A
Alex Deucher 已提交
1888 1889
}

1890 1891
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
		       uint64_t dst_offset, uint32_t byte_count,
1892
		       struct dma_resv *resv,
1893
		       struct dma_fence **fence, bool direct_submit,
1894
		       bool vm_needs_flush, bool tmz)
A
Alex Deucher 已提交
1895
{
1896 1897
	enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
		AMDGPU_IB_POOL_DELAYED;
A
Alex Deucher 已提交
1898
	struct amdgpu_device *adev = ring->adev;
1899 1900
	struct amdgpu_job *job;

A
Alex Deucher 已提交
1901 1902 1903 1904 1905
	uint32_t max_bytes;
	unsigned num_loops, num_dw;
	unsigned i;
	int r;

1906
	if (direct_submit && !ring->sched.ready) {
1907 1908 1909 1910
		DRM_ERROR("Trying to move memory with ring turned off.\n");
		return -EINVAL;
	}

A
Alex Deucher 已提交
1911 1912
	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
L
Luben Tuikov 已提交
1913
	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
1914

1915
	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
1916
	if (r)
1917
		return r;
1918

1919
	if (vm_needs_flush) {
1920 1921
		job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
					adev->gmc.pdb0_bo : adev->gart.bo);
1922 1923
		job->vm_needs_flush = true;
	}
1924
	if (resv) {
1925
		r = amdgpu_sync_resv(adev, &job->sync, resv,
1926 1927
				     AMDGPU_SYNC_ALWAYS,
				     AMDGPU_FENCE_OWNER_UNDEFINED);
1928 1929 1930 1931
		if (r) {
			DRM_ERROR("sync failed (%d).\n", r);
			goto error_free;
		}
A
Alex Deucher 已提交
1932 1933 1934 1935 1936
	}

	for (i = 0; i < num_loops; i++) {
		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);

1937
		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
1938
					dst_offset, cur_size_in_bytes, tmz);
A
Alex Deucher 已提交
1939 1940 1941 1942 1943 1944

		src_offset += cur_size_in_bytes;
		dst_offset += cur_size_in_bytes;
		byte_count -= cur_size_in_bytes;
	}

1945 1946
	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);
1947 1948 1949
	if (direct_submit)
		r = amdgpu_job_submit_direct(job, ring, fence);
	else
1950
		r = amdgpu_job_submit(job, &adev->mman.entity,
1951
				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
1952 1953
	if (r)
		goto error_free;
A
Alex Deucher 已提交
1954

1955
	return r;
1956

1957
error_free:
1958
	amdgpu_job_free(job);
1959
	DRM_ERROR("Error scheduling IBs (%d)\n", r);
1960
	return r;
A
Alex Deucher 已提交
1961 1962
}

1963
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
1964
		       uint32_t src_data,
1965
		       struct dma_resv *resv,
1966
		       struct dma_fence **fence)
1967
{
1968
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
1969
	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
1970 1971
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;

1972
	struct amdgpu_res_cursor cursor;
1973
	unsigned int num_loops, num_dw;
1974
	uint64_t num_bytes;
1975 1976

	struct amdgpu_job *job;
1977 1978
	int r;

1979
	if (!adev->mman.buffer_funcs_enabled) {
1980 1981 1982 1983
		DRM_ERROR("Trying to clear memory with ring turned off.\n");
		return -EINVAL;
	}

1984
	if (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT) {
1985 1986 1987 1988
		DRM_ERROR("Trying to clear preemptible memory.\n");
		return -EINVAL;
	}

1989
	if (bo->tbo.resource->mem_type == TTM_PL_TT) {
1990
		r = amdgpu_ttm_alloc_gart(&bo->tbo);
1991 1992 1993 1994
		if (r)
			return r;
	}

1995
	num_bytes = bo->tbo.resource->num_pages << PAGE_SHIFT;
1996 1997
	num_loops = 0;

1998
	amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
1999 2000 2001
	while (cursor.remaining) {
		num_loops += DIV_ROUND_UP_ULL(cursor.size, max_bytes);
		amdgpu_res_next(&cursor, cursor.size);
2002
	}
2003
	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
2004 2005

	/* for IB padding */
2006
	num_dw += 64;
2007

2008 2009
	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
				     &job);
2010 2011 2012 2013 2014
	if (r)
		return r;

	if (resv) {
		r = amdgpu_sync_resv(adev, &job->sync, resv,
2015 2016
				     AMDGPU_SYNC_ALWAYS,
				     AMDGPU_FENCE_OWNER_UNDEFINED);
2017 2018 2019 2020 2021 2022
		if (r) {
			DRM_ERROR("sync failed (%d).\n", r);
			goto error_free;
		}
	}

2023
	amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
2024 2025 2026
	while (cursor.remaining) {
		uint32_t cur_size = min_t(uint64_t, cursor.size, max_bytes);
		uint64_t dst_addr = cursor.start;
2027

2028 2029
		dst_addr += amdgpu_ttm_domain_start(adev,
						    bo->tbo.resource->mem_type);
2030 2031
		amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
					cur_size);
2032

2033
		amdgpu_res_next(&cursor, cur_size);
2034 2035 2036 2037
	}

	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);
2038
	r = amdgpu_job_submit(job, &adev->mman.entity,
2039
			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2040 2041 2042 2043 2044 2045 2046 2047 2048 2049
	if (r)
		goto error_free;

	return 0;

error_free:
	amdgpu_job_free(job);
	return r;
}

2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079
/**
 * amdgpu_ttm_evict_resources - evict memory buffers
 * @adev: amdgpu device object
 * @mem_type: evicted BO's memory type
 *
 * Evicts all @mem_type buffers on the lru list of the memory type.
 *
 * Returns:
 * 0 for success or a negative error code on failure.
 */
int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
{
	struct ttm_resource_manager *man;

	switch (mem_type) {
	case TTM_PL_VRAM:
	case TTM_PL_TT:
	case AMDGPU_PL_GWS:
	case AMDGPU_PL_GDS:
	case AMDGPU_PL_OA:
		man = ttm_manager_type(&adev->mman.bdev, mem_type);
		break;
	default:
		DRM_ERROR("Trying to evict invalid memory type\n");
		return -EINVAL;
	}

	return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
}

A
Alex Deucher 已提交
2080 2081
#if defined(CONFIG_DEBUG_FS)

2082
static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused)
A
Alex Deucher 已提交
2083
{
2084 2085 2086
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    TTM_PL_VRAM);
D
Daniel Vetter 已提交
2087
	struct drm_printer p = drm_seq_file_printer(m);
A
Alex Deucher 已提交
2088

2089
	ttm_resource_manager_debug(man, &p);
D
Daniel Vetter 已提交
2090
	return 0;
A
Alex Deucher 已提交
2091 2092
}

2093
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
2094
{
2095
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2096 2097 2098 2099

	return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
}

2100 2101 2102 2103 2104 2105 2106
static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    TTM_PL_TT);
	struct drm_printer p = drm_seq_file_printer(m);

2107
	ttm_resource_manager_debug(man, &p);
2108 2109 2110 2111 2112 2113 2114 2115 2116 2117
	return 0;
}

static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_GDS);
	struct drm_printer p = drm_seq_file_printer(m);

2118
	ttm_resource_manager_debug(man, &p);
2119 2120 2121 2122 2123 2124 2125 2126 2127 2128
	return 0;
}

static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_GWS);
	struct drm_printer p = drm_seq_file_printer(m);

2129
	ttm_resource_manager_debug(man, &p);
2130 2131 2132 2133 2134 2135 2136 2137 2138 2139
	return 0;
}

static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_OA);
	struct drm_printer p = drm_seq_file_printer(m);

2140
	ttm_resource_manager_debug(man, &p);
2141 2142 2143 2144 2145 2146 2147 2148 2149
	return 0;
}

DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
A
Alex Deucher 已提交
2150

2151
/*
2152 2153 2154 2155
 * amdgpu_ttm_vram_read - Linear read access to VRAM
 *
 * Accesses VRAM via MMIO for debugging purposes.
 */
A
Alex Deucher 已提交
2156 2157 2158
static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
				    size_t size, loff_t *pos)
{
A
Al Viro 已提交
2159
	struct amdgpu_device *adev = file_inode(f)->i_private;
A
Alex Deucher 已提交
2160 2161 2162 2163 2164
	ssize_t result = 0;

	if (size & 0x3 || *pos & 0x3)
		return -EINVAL;

2165
	if (*pos >= adev->gmc.mc_vram_size)
2166 2167
		return -ENXIO;

2168
	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
A
Alex Deucher 已提交
2169
	while (size) {
2170 2171
		size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
		uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
A
Alex Deucher 已提交
2172

2173
		amdgpu_device_vram_access(adev, *pos, value, bytes, false);
2174 2175
		if (copy_to_user(buf, value, bytes))
			return -EFAULT;
A
Alex Deucher 已提交
2176

2177 2178 2179 2180
		result += bytes;
		buf += bytes;
		*pos += bytes;
		size -= bytes;
A
Alex Deucher 已提交
2181 2182 2183 2184 2185
	}

	return result;
}

2186
/*
2187 2188 2189 2190
 * amdgpu_ttm_vram_write - Linear write access to VRAM
 *
 * Accesses VRAM via MMIO for debugging purposes.
 */
2191 2192 2193 2194 2195 2196 2197 2198 2199 2200
static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
				    size_t size, loff_t *pos)
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	ssize_t result = 0;
	int r;

	if (size & 0x3 || *pos & 0x3)
		return -EINVAL;

2201
	if (*pos >= adev->gmc.mc_vram_size)
2202 2203 2204 2205 2206
		return -ENXIO;

	while (size) {
		uint32_t value;

2207
		if (*pos >= adev->gmc.mc_vram_size)
2208 2209 2210 2211 2212 2213
			return result;

		r = get_user(value, (uint32_t *)buf);
		if (r)
			return r;

2214
		amdgpu_device_mm_access(adev, *pos, &value, 4, true);
2215 2216 2217 2218 2219 2220 2221 2222 2223 2224

		result += 4;
		buf += 4;
		*pos += 4;
		size -= 4;
	}

	return result;
}

A
Alex Deucher 已提交
2225 2226 2227
static const struct file_operations amdgpu_ttm_vram_fops = {
	.owner = THIS_MODULE,
	.read = amdgpu_ttm_vram_read,
2228 2229
	.write = amdgpu_ttm_vram_write,
	.llseek = default_llseek,
A
Alex Deucher 已提交
2230 2231
};

2232
/*
2233 2234 2235 2236 2237 2238
 * amdgpu_iomem_read - Virtual read access to GPU mapped memory
 *
 * This function is used to read memory that has been mapped to the
 * GPU and the known addresses are not physical addresses but instead
 * bus addresses (e.g., what you'd put in an IB or ring buffer).
 */
2239 2240
static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
				 size_t size, loff_t *pos)
2241 2242 2243
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	struct iommu_domain *dom;
2244 2245
	ssize_t result = 0;
	int r;
2246

2247
	/* retrieve the IOMMU domain if any for this device */
2248
	dom = iommu_get_domain_for_dev(adev->dev);
2249

2250 2251 2252 2253 2254 2255 2256 2257 2258 2259
	while (size) {
		phys_addr_t addr = *pos & PAGE_MASK;
		loff_t off = *pos & ~PAGE_MASK;
		size_t bytes = PAGE_SIZE - off;
		unsigned long pfn;
		struct page *p;
		void *ptr;

		bytes = bytes < size ? bytes : size;

2260 2261 2262 2263
		/* Translate the bus address to a physical address.  If
		 * the domain is NULL it means there is no IOMMU active
		 * and the address translation is the identity
		 */
2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274
		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;

		pfn = addr >> PAGE_SHIFT;
		if (!pfn_valid(pfn))
			return -EPERM;

		p = pfn_to_page(pfn);
		if (p->mapping != adev->mman.bdev.dev_mapping)
			return -EPERM;

		ptr = kmap(p);
2275
		r = copy_to_user(buf, ptr + off, bytes);
2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287
		kunmap(p);
		if (r)
			return -EFAULT;

		size -= bytes;
		*pos += bytes;
		result += bytes;
	}

	return result;
}

2288
/*
2289 2290 2291 2292 2293 2294
 * amdgpu_iomem_write - Virtual write access to GPU mapped memory
 *
 * This function is used to write memory that has been mapped to the
 * GPU and the known addresses are not physical addresses but instead
 * bus addresses (e.g., what you'd put in an IB or ring buffer).
 */
2295 2296 2297 2298 2299 2300 2301
static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
				 size_t size, loff_t *pos)
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	struct iommu_domain *dom;
	ssize_t result = 0;
	int r;
2302 2303

	dom = iommu_get_domain_for_dev(adev->dev);
2304

2305 2306 2307 2308 2309 2310 2311 2312 2313
	while (size) {
		phys_addr_t addr = *pos & PAGE_MASK;
		loff_t off = *pos & ~PAGE_MASK;
		size_t bytes = PAGE_SIZE - off;
		unsigned long pfn;
		struct page *p;
		void *ptr;

		bytes = bytes < size ? bytes : size;
2314

2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325
		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;

		pfn = addr >> PAGE_SHIFT;
		if (!pfn_valid(pfn))
			return -EPERM;

		p = pfn_to_page(pfn);
		if (p->mapping != adev->mman.bdev.dev_mapping)
			return -EPERM;

		ptr = kmap(p);
2326
		r = copy_from_user(ptr + off, buf, bytes);
2327 2328 2329 2330 2331 2332 2333 2334 2335 2336
		kunmap(p);
		if (r)
			return -EFAULT;

		size -= bytes;
		*pos += bytes;
		result += bytes;
	}

	return result;
2337 2338
}

2339
static const struct file_operations amdgpu_ttm_iomem_fops = {
2340
	.owner = THIS_MODULE,
2341 2342
	.read = amdgpu_iomem_read,
	.write = amdgpu_iomem_write,
2343 2344
	.llseek = default_llseek
};
2345

2346 2347
#endif

2348
void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
A
Alex Deucher 已提交
2349 2350
{
#if defined(CONFIG_DEBUG_FS)
2351
	struct drm_minor *minor = adev_to_drm(adev)->primary;
2352 2353
	struct dentry *root = minor->debugfs_root;

2354
	debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
2355
				 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
2356
	debugfs_create_file("amdgpu_iomem", 0444, root, adev,
2357
			    &amdgpu_ttm_iomem_fops);
2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369
	debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,
			    &amdgpu_mm_vram_table_fops);
	debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev,
			    &amdgpu_mm_tt_table_fops);
	debugfs_create_file("amdgpu_gds_mm", 0444, root, adev,
			    &amdgpu_mm_gds_table_fops);
	debugfs_create_file("amdgpu_gws_mm", 0444, root, adev,
			    &amdgpu_mm_gws_table_fops);
	debugfs_create_file("amdgpu_oa_mm", 0444, root, adev,
			    &amdgpu_mm_oa_table_fops);
	debugfs_create_file("ttm_page_pool", 0444, root, adev,
			    &amdgpu_ttm_page_pool_fops);
A
Alex Deucher 已提交
2370 2371
#endif
}