amdgpu_ttm.c 63.1 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
32

33
#include <linux/dma-mapping.h>
34 35 36
#include <linux/iommu.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
37
#include <linux/sched/mm.h>
38 39 40 41
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swiotlb.h>
42
#include <linux/dma-buf.h>
43
#include <linux/sizes.h>
44
#include <linux/module.h>
45

46
#include <drm/drm_drv.h>
47 48 49
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
50
#include <drm/ttm/ttm_range_manager.h>
51

A
Alex Deucher 已提交
52
#include <drm/amdgpu_drm.h>
53
#include <drm/drm_drv.h>
54

A
Alex Deucher 已提交
55
#include "amdgpu.h"
56
#include "amdgpu_object.h"
57
#include "amdgpu_trace.h"
58
#include "amdgpu_amdkfd.h"
59
#include "amdgpu_sdma.h"
60
#include "amdgpu_ras.h"
61
#include "amdgpu_atomfirmware.h"
62
#include "amdgpu_res_cursor.h"
A
Alex Deucher 已提交
63 64
#include "bif/bif_4_1_d.h"

65 66
MODULE_IMPORT_NS(DMA_BUF);

67 68
#define AMDGPU_TTM_VRAM_MAX_DW_READ	(size_t)128

69
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
70 71
				   struct ttm_tt *ttm,
				   struct ttm_resource *bo_mem);
72
static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
73
				      struct ttm_tt *ttm);
74

75
static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
76
				    unsigned int type,
77
				    uint64_t size_in_page)
78
{
79
	return ttm_range_man_init(&adev->mman.bdev, type,
80
				  false, size_in_page);
A
Alex Deucher 已提交
81 82
}

83 84 85 86 87 88 89 90
/**
 * amdgpu_evict_flags - Compute placement flags
 *
 * @bo: The buffer object to evict
 * @placement: Possible destination(s) for evicted BO
 *
 * Fill in placement data when ttm_bo_evict() is called
 */
A
Alex Deucher 已提交
91 92 93
static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
				struct ttm_placement *placement)
{
94
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
95
	struct amdgpu_bo *abo;
96
	static const struct ttm_place placements = {
A
Alex Deucher 已提交
97 98
		.fpfn = 0,
		.lpfn = 0,
99
		.mem_type = TTM_PL_SYSTEM,
100
		.flags = 0
A
Alex Deucher 已提交
101 102
	};

103
	/* Don't handle scatter gather BOs */
104 105 106 107 108 109
	if (bo->type == ttm_bo_type_sg) {
		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		return;
	}

110
	/* Object isn't an AMDGPU object so ignore */
111
	if (!amdgpu_bo_is_amdgpu_bo(bo)) {
A
Alex Deucher 已提交
112 113 114 115 116 117
		placement->placement = &placements;
		placement->busy_placement = &placements;
		placement->num_placement = 1;
		placement->num_busy_placement = 1;
		return;
	}
118

119
	abo = ttm_to_amdgpu_bo(bo);
120 121 122 123 124
	if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		return;
	}
125 126

	switch (bo->resource->mem_type) {
127 128 129 130 131 132 133
	case AMDGPU_PL_GDS:
	case AMDGPU_PL_GWS:
	case AMDGPU_PL_OA:
		placement->num_placement = 0;
		placement->num_busy_placement = 0;
		return;

A
Alex Deucher 已提交
134
	case TTM_PL_VRAM:
135
		if (!adev->mman.buffer_funcs_enabled) {
136
			/* Move to system memory */
137
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
138
		} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
139 140
			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
			   amdgpu_bo_in_cpu_visible_vram(abo)) {
141 142 143 144 145 146

			/* Try evicting to the CPU inaccessible part of VRAM
			 * first, but only set GTT as busy placement, so this
			 * BO will be evicted to GTT rather than causing other
			 * BOs to be evicted from VRAM
			 */
147
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
148 149
							AMDGPU_GEM_DOMAIN_GTT |
							AMDGPU_GEM_DOMAIN_CPU);
150
			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
151 152 153
			abo->placements[0].lpfn = 0;
			abo->placement.busy_placement = &abo->placements[1];
			abo->placement.num_busy_placement = 1;
154
		} else {
155
			/* Move to GTT memory */
156 157
			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT |
							AMDGPU_GEM_DOMAIN_CPU);
158
		}
A
Alex Deucher 已提交
159 160
		break;
	case TTM_PL_TT:
161
	case AMDGPU_PL_PREEMPT:
A
Alex Deucher 已提交
162
	default:
163
		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
164
		break;
A
Alex Deucher 已提交
165
	}
166
	*placement = abo->placement;
A
Alex Deucher 已提交
167 168
}

169 170 171 172
/**
 * amdgpu_ttm_map_buffer - Map memory into the GART windows
 * @bo: buffer object to map
 * @mem: memory object to map
173
 * @mm_cur: range to map
174 175 176 177 178 179 180 181 182 183
 * @num_pages: number of pages to map
 * @window: which GART window to use
 * @ring: DMA ring to use for the copy
 * @tmz: if we should setup a TMZ enabled mapping
 * @addr: resulting address inside the MC address space
 *
 * Setup one of the GART windows to access a specific piece of memory or return
 * the physical address for local memory.
 */
static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
184
				 struct ttm_resource *mem,
185 186 187 188
				 struct amdgpu_res_cursor *mm_cur,
				 unsigned num_pages, unsigned window,
				 struct amdgpu_ring *ring, bool tmz,
				 uint64_t *addr)
189 190 191 192 193 194
{
	struct amdgpu_device *adev = ring->adev;
	struct amdgpu_job *job;
	unsigned num_dw, num_bytes;
	struct dma_fence *fence;
	uint64_t src_addr, dst_addr;
195
	void *cpu_addr;
196
	uint64_t flags;
197
	unsigned int i;
198 199 200 201
	int r;

	BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
	       AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
202
	BUG_ON(mem->mem_type == AMDGPU_PL_PREEMPT);
203 204

	/* Map only what can't be accessed directly */
205
	if (!tmz && mem->start != AMDGPU_BO_INVALID_OFFSET) {
206 207
		*addr = amdgpu_ttm_domain_start(adev, mem->mem_type) +
			mm_cur->start;
208 209 210 211 212 213
		return 0;
	}

	*addr = adev->gmc.gart_start;
	*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
		AMDGPU_GPU_PAGE_SIZE;
214
	*addr += mm_cur->start & ~PAGE_MASK;
215 216

	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
217
	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
218 219

	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
220
				     AMDGPU_IB_POOL_DELAYED, &job);
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
	if (r)
		return r;

	src_addr = num_dw * 4;
	src_addr += job->ibs[0].gpu_addr;

	dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo);
	dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8;
	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr,
				dst_addr, num_bytes, false);

	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);

	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, mem);
	if (tmz)
		flags |= AMDGPU_PTE_TMZ;

239 240 241
	cpu_addr = &job->ibs[0].ptr[num_dw];

	if (mem->mem_type == TTM_PL_TT) {
242
		dma_addr_t *dma_addr;
243

244 245
		dma_addr = &bo->ttm->dma_address[mm_cur->start >> PAGE_SHIFT];
		r = amdgpu_gart_map(adev, 0, num_pages, dma_addr, flags,
246 247 248 249 250 251
				    cpu_addr);
		if (r)
			goto error_free;
	} else {
		dma_addr_t dma_address;

252
		dma_address = mm_cur->start;
253 254 255 256 257 258 259 260 261 262 263
		dma_address += adev->vm_manager.vram_base_offset;

		for (i = 0; i < num_pages; ++i) {
			r = amdgpu_gart_map(adev, i << PAGE_SHIFT, 1,
					    &dma_address, flags, cpu_addr);
			if (r)
				goto error_free;

			dma_address += PAGE_SIZE;
		}
	}
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278

	r = amdgpu_job_submit(job, &adev->mman.entity,
			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
	if (r)
		goto error_free;

	dma_fence_put(fence);

	return r;

error_free:
	amdgpu_job_free(job);
	return r;
}

279
/**
280
 * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
281 282 283 284 285 286 287
 * @adev: amdgpu device
 * @src: buffer/address where to read from
 * @dst: buffer/address where to write to
 * @size: number of bytes to copy
 * @tmz: if a secure copy should be used
 * @resv: resv object to sync to
 * @f: Returns the last fence if multiple jobs are submitted.
288 289 290 291 292 293 294
 *
 * The function copies @size bytes from {src->mem + src->offset} to
 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
 * move and different for a BO to BO copy.
 *
 */
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
295 296
			       const struct amdgpu_copy_mem *src,
			       const struct amdgpu_copy_mem *dst,
297
			       uint64_t size, bool tmz,
298
			       struct dma_resv *resv,
299
			       struct dma_fence **f)
300
{
301 302 303
	const uint32_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
					AMDGPU_GPU_PAGE_SIZE);

304
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
305
	struct amdgpu_res_cursor src_mm, dst_mm;
306
	struct dma_fence *fence = NULL;
307
	int r = 0;
308

309
	if (!adev->mman.buffer_funcs_enabled) {
A
Alex Deucher 已提交
310 311 312 313
		DRM_ERROR("Trying to move memory with ring turned off.\n");
		return -EINVAL;
	}

314 315
	amdgpu_res_first(src->mem, src->offset, size, &src_mm);
	amdgpu_res_first(dst->mem, dst->offset, size, &dst_mm);
316

317
	mutex_lock(&adev->mman.gtt_window_lock);
318 319 320
	while (src_mm.remaining) {
		uint32_t src_page_offset = src_mm.start & ~PAGE_MASK;
		uint32_t dst_page_offset = dst_mm.start & ~PAGE_MASK;
321
		struct dma_fence *next;
322 323
		uint32_t cur_size;
		uint64_t from, to;
324

325 326 327
		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
		 * begins at an offset, then adjust the size accordingly
		 */
328
		cur_size = max(src_page_offset, dst_page_offset);
329
		cur_size = min(min3(src_mm.size, dst_mm.size, size),
330
			       (uint64_t)(GTT_MAX_BYTES - cur_size));
331 332

		/* Map src to window 0 and dst to window 1. */
333
		r = amdgpu_ttm_map_buffer(src->bo, src->mem, &src_mm,
334
					  PFN_UP(cur_size + src_page_offset),
335
					  0, ring, tmz, &from);
336 337
		if (r)
			goto error;
338

339
		r = amdgpu_ttm_map_buffer(dst->bo, dst->mem, &dst_mm,
340
					  PFN_UP(cur_size + dst_page_offset),
341
					  1, ring, tmz, &to);
342 343
		if (r)
			goto error;
344

345
		r = amdgpu_copy_buffer(ring, from, to, cur_size,
346
				       resv, &next, false, true, tmz);
347 348 349
		if (r)
			goto error;

350
		dma_fence_put(fence);
351 352
		fence = next;

353 354
		amdgpu_res_next(&src_mm, cur_size);
		amdgpu_res_next(&dst_mm, cur_size);
355
	}
356
error:
357
	mutex_unlock(&adev->mman.gtt_window_lock);
358 359 360 361 362 363
	if (f)
		*f = dma_fence_get(fence);
	dma_fence_put(fence);
	return r;
}

364
/*
365 366
 * amdgpu_move_blit - Copy an entire buffer to another buffer
 *
367 368
 * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
 * help move buffers to and from VRAM.
369
 */
370
static int amdgpu_move_blit(struct ttm_buffer_object *bo,
371
			    bool evict,
372 373
			    struct ttm_resource *new_mem,
			    struct ttm_resource *old_mem)
374 375
{
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
376
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
377 378 379 380 381 382 383 384 385 386 387 388 389
	struct amdgpu_copy_mem src, dst;
	struct dma_fence *fence = NULL;
	int r;

	src.bo = bo;
	dst.bo = bo;
	src.mem = old_mem;
	dst.mem = new_mem;
	src.offset = 0;
	dst.offset = 0;

	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
				       new_mem->num_pages << PAGE_SHIFT,
390
				       amdgpu_bo_encrypted(abo),
391
				       bo->base.resv, &fence);
392 393
	if (r)
		goto error;
394

395 396
	/* clear the space being freed */
	if (old_mem->mem_type == TTM_PL_VRAM &&
397
	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) {
398 399 400 401 402 403 404 405 406 407 408 409
		struct dma_fence *wipe_fence = NULL;

		r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON,
				       NULL, &wipe_fence);
		if (r) {
			goto error;
		} else if (wipe_fence) {
			dma_fence_put(fence);
			fence = wipe_fence;
		}
	}

410 411
	/* Always block for VM page tables before committing the new location */
	if (bo->type == ttm_bo_type_kernel)
412
		r = ttm_bo_move_accel_cleanup(bo, fence, true, false, new_mem);
413
	else
414
		r = ttm_bo_move_accel_cleanup(bo, fence, evict, true, new_mem);
415
	dma_fence_put(fence);
A
Alex Deucher 已提交
416
	return r;
417 418 419

error:
	if (fence)
420 421
		dma_fence_wait(fence, false);
	dma_fence_put(fence);
422
	return r;
A
Alex Deucher 已提交
423 424
}

425
/*
426 427 428 429 430
 * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
 *
 * Called by amdgpu_bo_move()
 */
static bool amdgpu_mem_visible(struct amdgpu_device *adev,
431
			       struct ttm_resource *mem)
432
{
433 434
	uint64_t mem_size = (u64)mem->num_pages << PAGE_SHIFT;
	struct amdgpu_res_cursor cursor;
435 436 437 438 439 440 441

	if (mem->mem_type == TTM_PL_SYSTEM ||
	    mem->mem_type == TTM_PL_TT)
		return true;
	if (mem->mem_type != TTM_PL_VRAM)
		return false;

442 443
	amdgpu_res_first(mem, 0, mem_size, &cursor);

444
	/* ttm_resource_ioremap only supports contiguous memory */
445
	if (cursor.size != mem_size)
446 447
		return false;

448
	return cursor.start + cursor.size <= adev->gmc.visible_vram_size;
449 450
}

451
/*
452 453 454 455
 * amdgpu_bo_move - Move a buffer object to a new memory location
 *
 * Called by ttm_bo_handle_move_mem()
 */
456 457
static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
			  struct ttm_operation_ctx *ctx,
458 459
			  struct ttm_resource *new_mem,
			  struct ttm_place *hop)
A
Alex Deucher 已提交
460 461
{
	struct amdgpu_device *adev;
462
	struct amdgpu_bo *abo;
463
	struct ttm_resource *old_mem = bo->resource;
A
Alex Deucher 已提交
464 465
	int r;

466 467
	if (new_mem->mem_type == TTM_PL_TT ||
	    new_mem->mem_type == AMDGPU_PL_PREEMPT) {
468 469 470 471 472
		r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
		if (r)
			return r;
	}

473
	/* Can't move a pinned BO */
474
	abo = ttm_to_amdgpu_bo(bo);
475
	if (WARN_ON_ONCE(abo->tbo.pin_count > 0))
476 477
		return -EINVAL;

478
	adev = amdgpu_ttm_adev(bo->bdev);
479

A
Alex Deucher 已提交
480
	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
481
		ttm_bo_move_null(bo, new_mem);
482
		goto out;
A
Alex Deucher 已提交
483
	}
484
	if (old_mem->mem_type == TTM_PL_SYSTEM &&
485 486
	    (new_mem->mem_type == TTM_PL_TT ||
	     new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
487
		ttm_bo_move_null(bo, new_mem);
488
		goto out;
A
Alex Deucher 已提交
489
	}
490 491
	if ((old_mem->mem_type == TTM_PL_TT ||
	     old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
492
	    new_mem->mem_type == TTM_PL_SYSTEM) {
493
		r = ttm_bo_wait_ctx(bo, ctx);
494
		if (r)
495
			return r;
496 497

		amdgpu_ttm_backend_unbind(bo->bdev, bo->ttm);
498
		ttm_resource_free(bo, &bo->resource);
499
		ttm_bo_assign_mem(bo, new_mem);
500
		goto out;
501
	}
502

503 504 505 506 507 508 509
	if (old_mem->mem_type == AMDGPU_PL_GDS ||
	    old_mem->mem_type == AMDGPU_PL_GWS ||
	    old_mem->mem_type == AMDGPU_PL_OA ||
	    new_mem->mem_type == AMDGPU_PL_GDS ||
	    new_mem->mem_type == AMDGPU_PL_GWS ||
	    new_mem->mem_type == AMDGPU_PL_OA) {
		/* Nothing to save here */
510
		ttm_bo_move_null(bo, new_mem);
511
		goto out;
512
	}
513

514 515 516 517 518 519 520 521 522
	if (bo->type == ttm_bo_type_device &&
	    new_mem->mem_type == TTM_PL_VRAM &&
	    old_mem->mem_type != TTM_PL_VRAM) {
		/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
		 * accesses the BO after it's moved.
		 */
		abo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
	}

523 524 525 526 527 528 529 530
	if (adev->mman.buffer_funcs_enabled) {
		if (((old_mem->mem_type == TTM_PL_SYSTEM &&
		      new_mem->mem_type == TTM_PL_VRAM) ||
		     (old_mem->mem_type == TTM_PL_VRAM &&
		      new_mem->mem_type == TTM_PL_SYSTEM))) {
			hop->fpfn = 0;
			hop->lpfn = 0;
			hop->mem_type = TTM_PL_TT;
531
			hop->flags = TTM_PL_FLAG_TEMPORARY;
532 533 534 535 536
			return -EMULTIHOP;
		}

		r = amdgpu_move_blit(bo, evict, new_mem, old_mem);
	} else {
537 538
		r = -ENODEV;
	}
A
Alex Deucher 已提交
539 540

	if (r) {
541 542 543 544
		/* Check that all memory is CPU accessible */
		if (!amdgpu_mem_visible(adev, old_mem) ||
		    !amdgpu_mem_visible(adev, new_mem)) {
			pr_err("Move buffer fallback to memcpy unavailable\n");
545
			return r;
A
Alex Deucher 已提交
546
		}
547 548 549

		r = ttm_bo_move_memcpy(bo, ctx, new_mem);
		if (r)
550
			return r;
A
Alex Deucher 已提交
551 552
	}

553
out:
A
Alex Deucher 已提交
554
	/* update statistics */
555
	atomic64_add(bo->base.size, &adev->num_bytes_moved);
556
	amdgpu_bo_move_notify(bo, evict, new_mem);
A
Alex Deucher 已提交
557 558 559
	return 0;
}

560
/*
561 562 563 564
 * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault
 *
 * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault()
 */
565 566
static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
				     struct ttm_resource *mem)
A
Alex Deucher 已提交
567
{
568
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
569
	size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
A
Alex Deucher 已提交
570 571 572 573 574 575

	switch (mem->mem_type) {
	case TTM_PL_SYSTEM:
		/* system memory */
		return 0;
	case TTM_PL_TT:
576
	case AMDGPU_PL_PREEMPT:
A
Alex Deucher 已提交
577 578 579 580
		break;
	case TTM_PL_VRAM:
		mem->bus.offset = mem->start << PAGE_SHIFT;
		/* check if it's visible */
581
		if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
A
Alex Deucher 已提交
582
			return -EINVAL;
583

584
		if (adev->mman.aper_base_kaddr &&
585
		    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
586 587 588
			mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
					mem->bus.offset;

589
		mem->bus.offset += adev->gmc.aper_base;
A
Alex Deucher 已提交
590 591 592 593 594 595 596 597
		mem->bus.is_iomem = true;
		break;
	default:
		return -EINVAL;
	}
	return 0;
}

598 599 600
static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
					   unsigned long page_offset)
{
601
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
602
	struct amdgpu_res_cursor cursor;
603

604 605
	amdgpu_res_first(bo->resource, (u64)page_offset << PAGE_SHIFT, 0,
			 &cursor);
606
	return (adev->gmc.aper_base + cursor.start) >> PAGE_SHIFT;
607 608
}

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
/**
 * amdgpu_ttm_domain_start - Returns GPU start address
 * @adev: amdgpu device object
 * @type: type of the memory
 *
 * Returns:
 * GPU start address of a memory domain
 */

uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
{
	switch (type) {
	case TTM_PL_TT:
		return adev->gmc.gart_start;
	case TTM_PL_VRAM:
		return adev->gmc.vram_start;
	}

	return 0;
}

A
Alex Deucher 已提交
630 631 632 633
/*
 * TTM backend functions.
 */
struct amdgpu_ttm_tt {
634
	struct ttm_tt	ttm;
635
	struct drm_gem_object	*gobj;
636 637
	u64			offset;
	uint64_t		userptr;
638
	struct task_struct	*usertask;
639
	uint32_t		userflags;
640
	bool			bound;
641
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
642
	struct hmm_range	*range;
643
#endif
A
Alex Deucher 已提交
644 645
};

646
#ifdef CONFIG_DRM_AMDGPU_USERPTR
647
/*
648 649
 * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
 * memory and start HMM tracking CPU page table update
650
 *
651 652
 * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
 * once afterwards to stop HMM tracking
653
 */
654
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
A
Alex Deucher 已提交
655
{
656
	struct ttm_tt *ttm = bo->tbo.ttm;
A
Alex Deucher 已提交
657
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
658
	unsigned long start = gtt->userptr;
659
	struct vm_area_struct *vma;
660
	struct mm_struct *mm;
661
	bool readonly;
662
	int r = 0;
A
Alex Deucher 已提交
663

664 665 666
	mm = bo->notifier.mm;
	if (unlikely(!mm)) {
		DRM_DEBUG_DRIVER("BO is not registered?\n");
667
		return -EFAULT;
668
	}
669

670 671 672 673
	/* Another get_user_pages is running at the same time?? */
	if (WARN_ON(gtt->range))
		return -EFAULT;

674
	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
675 676
		return -ESRCH;

677
	mmap_read_lock(mm);
678 679
	vma = vma_lookup(mm, start);
	if (unlikely(!vma)) {
680
		r = -EFAULT;
681
		goto out_unlock;
682
	}
683
	if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
684
		vma->vm_file)) {
685
		r = -EPERM;
686
		goto out_unlock;
687
	}
688

689 690 691
	readonly = amdgpu_ttm_tt_is_readonly(ttm);
	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
				       ttm->num_pages, &gtt->range, readonly,
692
				       true, NULL);
693
out_unlock:
694
	mmap_read_unlock(mm);
695 696 697
	if (r)
		pr_debug("failed %d to get user pages 0x%lx\n", r, start);

698
	mmput(mm);
699

700 701 702
	return r;
}

703
/*
704 705
 * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
 * Check if the pages backing this ttm range have been invalidated
706
 *
707
 * Returns: true if pages are still valid
708
 */
709
bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
710
{
711
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
712
	bool r = false;
713

714 715
	if (!gtt || !gtt->userptr)
		return false;
716

717
	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
718
		gtt->userptr, ttm->num_pages);
719

720
	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
721 722
		"No user pages to check\n");

723
	if (gtt->range) {
724 725 726 727
		/*
		 * FIXME: Must always hold notifier_lock for this, and must
		 * not ignore the return code.
		 */
728
		r = amdgpu_hmm_range_get_pages_done(gtt->range);
729
		gtt->range = NULL;
730
	}
731

732
	return !r;
733
}
734
#endif
735

736
/*
737
 * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
738
 *
739
 * Called by amdgpu_cs_list_validate(). This creates the page list
740 741
 * that backs user memory and will ultimately be mapped into the device
 * address space.
742
 */
743
void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages)
744
{
745
	unsigned long i;
746

747
	for (i = 0; i < ttm->num_pages; ++i)
748
		ttm->pages[i] = pages ? pages[i] : NULL;
749 750
}

751
/*
752
 * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages
753 754 755
 *
 * Called by amdgpu_ttm_backend_bind()
 **/
756
static int amdgpu_ttm_tt_pin_userptr(struct ttm_device *bdev,
D
Dave Airlie 已提交
757
				     struct ttm_tt *ttm)
758
{
D
Dave Airlie 已提交
759
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
760 761 762 763
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
764
	int r;
765

766
	/* Allocate an SG array and squash pages into it */
A
Alex Deucher 已提交
767
	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
X
xinhui pan 已提交
768
				      (u64)ttm->num_pages << PAGE_SHIFT,
A
Alex Deucher 已提交
769 770 771 772
				      GFP_KERNEL);
	if (r)
		goto release_sg;

773
	/* Map SG to device */
774 775
	r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
	if (r)
A
Alex Deucher 已提交
776 777
		goto release_sg;

778
	/* convert SG to linear array of pages and dma addresses */
779 780
	drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
				       ttm->num_pages);
A
Alex Deucher 已提交
781 782 783 784 785

	return 0;

release_sg:
	kfree(ttm->sg);
786
	ttm->sg = NULL;
A
Alex Deucher 已提交
787 788 789
	return r;
}

790
/*
791 792
 * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages
 */
793
static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
D
Dave Airlie 已提交
794
					struct ttm_tt *ttm)
A
Alex Deucher 已提交
795
{
D
Dave Airlie 已提交
796
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
797 798 799 800 801 802
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;

	/* double check that we don't free the table twice */
803
	if (!ttm->sg || !ttm->sg->sgl)
A
Alex Deucher 已提交
804 805
		return;

806
	/* unmap the pages mapped to the device */
807
	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
808
	sg_free_table(ttm->sg);
809

810
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
811 812 813 814 815
	if (gtt->range) {
		unsigned long i;

		for (i = 0; i < ttm->num_pages; i++) {
			if (ttm->pages[i] !=
816
			    hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
817 818 819 820 821
				break;
		}

		WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
	}
822
#endif
A
Alex Deucher 已提交
823 824
}

825
static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
826 827 828 829 830 831 832 833
				struct ttm_buffer_object *tbo,
				uint64_t flags)
{
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(tbo);
	struct ttm_tt *ttm = tbo->ttm;
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	int r;

834 835 836
	if (amdgpu_bo_encrypted(abo))
		flags |= AMDGPU_PTE_TMZ;

837
	if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
838 839 840
		uint64_t page_idx = 1;

		r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
841
				gtt->ttm.dma_address, flags);
842 843 844
		if (r)
			goto gart_bind_fail;

845 846 847 848
		/* The memory type of the first page defaults to UC. Now
		 * modify the memory type to NC from the second page of
		 * the BO onward.
		 */
849 850
		flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
		flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
851 852 853 854 855 856 857

		r = amdgpu_gart_bind(adev,
				gtt->offset + (page_idx << PAGE_SHIFT),
				ttm->num_pages - page_idx,
				&(gtt->ttm.dma_address[page_idx]), flags);
	} else {
		r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
858
				     gtt->ttm.dma_address, flags);
859 860 861 862
	}

gart_bind_fail:
	if (r)
863
		DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
864 865 866 867 868
			  ttm->num_pages, gtt->offset);

	return r;
}

869
/*
870 871 872 873 874
 * amdgpu_ttm_backend_bind - Bind GTT memory
 *
 * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem().
 * This handles binding GTT memory to the device address space.
 */
875
static int amdgpu_ttm_backend_bind(struct ttm_device *bdev,
D
Dave Airlie 已提交
876
				   struct ttm_tt *ttm,
877
				   struct ttm_resource *bo_mem)
A
Alex Deucher 已提交
878
{
D
Dave Airlie 已提交
879
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
880
	struct amdgpu_ttm_tt *gtt = (void*)ttm;
881
	uint64_t flags;
882
	int r = 0;
A
Alex Deucher 已提交
883

884 885 886 887 888 889
	if (!bo_mem)
		return -EINVAL;

	if (gtt->bound)
		return 0;

890
	if (gtt->userptr) {
D
Dave Airlie 已提交
891
		r = amdgpu_ttm_tt_pin_userptr(bdev, ttm);
892 893 894 895
		if (r) {
			DRM_ERROR("failed to pin userptr\n");
			return r;
		}
M
Matthew Auld 已提交
896
	} else if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) {
897 898 899 900 901 902 903 904 905 906 907 908 909 910
		if (!ttm->sg) {
			struct dma_buf_attachment *attach;
			struct sg_table *sgt;

			attach = gtt->gobj->import_attach;
			sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL);
			if (IS_ERR(sgt))
				return PTR_ERR(sgt);

			ttm->sg = sgt;
		}

		drm_prime_sg_to_dma_addr_array(ttm->sg, gtt->ttm.dma_address,
					       ttm->num_pages);
911
	}
912

A
Alex Deucher 已提交
913
	if (!ttm->num_pages) {
914
		WARN(1, "nothing to bind %u pages for mreg %p back %p!\n",
A
Alex Deucher 已提交
915 916 917
		     ttm->num_pages, bo_mem, ttm);
	}

918 919
	if (bo_mem->mem_type != TTM_PL_TT ||
	    !amdgpu_gtt_mgr_has_gart_addr(bo_mem)) {
920
		gtt->offset = AMDGPU_BO_INVALID_OFFSET;
921
		return 0;
922
	}
923

924
	/* compute PTE flags relevant to this BO memory */
C
Christian König 已提交
925
	flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
926 927

	/* bind pages into GART page tables */
928
	gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
C
Christian König 已提交
929
	r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
930
		gtt->ttm.dma_address, flags);
931

932
	if (r)
933
		DRM_ERROR("failed to bind %u pages at 0x%08llX\n",
934
			  ttm->num_pages, gtt->offset);
935
	gtt->bound = true;
936
	return r;
937 938
}

939
/*
940 941 942 943 944 945
 * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either
 * through AGP or GART aperture.
 *
 * If bo is accessible through AGP aperture, then use AGP aperture
 * to access bo; otherwise allocate logical space in GART aperture
 * and map bo to GART aperture.
946
 */
947
int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
948
{
949
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
950
	struct ttm_operation_ctx ctx = { false, false };
951
	struct amdgpu_ttm_tt *gtt = (void *)bo->ttm;
952 953
	struct ttm_placement placement;
	struct ttm_place placements;
954
	struct ttm_resource *tmp;
955
	uint64_t addr, flags;
956 957
	int r;

958
	if (bo->resource->start != AMDGPU_BO_INVALID_OFFSET)
959 960
		return 0;

961 962
	addr = amdgpu_gmc_agp_addr(bo);
	if (addr != AMDGPU_BO_INVALID_OFFSET) {
963
		bo->resource->start = addr >> PAGE_SHIFT;
964 965
		return 0;
	}
966

967 968 969 970 971 972 973 974 975
	/* allocate GART space */
	placement.num_placement = 1;
	placement.placement = &placements;
	placement.num_busy_placement = 1;
	placement.busy_placement = &placements;
	placements.fpfn = 0;
	placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
	placements.mem_type = TTM_PL_TT;
	placements.flags = bo->resource->placement;
976

977 978 979
	r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx);
	if (unlikely(r))
		return r;
980

981 982
	/* compute PTE flags for this buffer object */
	flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, tmp);
983

984 985 986 987 988 989
	/* Bind pages */
	gtt->offset = (u64)tmp->start << PAGE_SHIFT;
	r = amdgpu_ttm_gart_bind(adev, bo, flags);
	if (unlikely(r)) {
		ttm_resource_free(bo, &tmp);
		return r;
990
	}
991

992
	amdgpu_gart_invalidate_tlb(adev);
993 994 995
	ttm_resource_free(bo, &bo->resource);
	ttm_bo_assign_mem(bo, tmp);

996
	return 0;
A
Alex Deucher 已提交
997 998
}

999
/*
1000 1001 1002 1003 1004
 * amdgpu_ttm_recover_gart - Rebind GTT pages
 *
 * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to
 * rebind GTT pages during a GPU reset.
 */
1005
int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
1006
{
1007
	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
1008
	uint64_t flags;
1009 1010
	int r;

1011
	if (!tbo->ttm)
1012 1013
		return 0;

1014
	flags = amdgpu_ttm_tt_pte_flags(adev, tbo->ttm, tbo->resource);
1015 1016
	r = amdgpu_ttm_gart_bind(adev, tbo, flags);

1017
	return r;
1018 1019
}

1020
/*
1021 1022 1023 1024 1025
 * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages
 *
 * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
 * ttm_tt_destroy().
 */
1026
static void amdgpu_ttm_backend_unbind(struct ttm_device *bdev,
D
Dave Airlie 已提交
1027
				      struct ttm_tt *ttm)
A
Alex Deucher 已提交
1028
{
D
Dave Airlie 已提交
1029
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
1030
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1031
	int r;
A
Alex Deucher 已提交
1032

1033
	/* if the pages have userptr pinning then clear that first */
1034
	if (gtt->userptr) {
D
Dave Airlie 已提交
1035
		amdgpu_ttm_tt_unpin_userptr(bdev, ttm);
1036 1037 1038 1039 1040 1041 1042
	} else if (ttm->sg && gtt->gobj->import_attach) {
		struct dma_buf_attachment *attach;

		attach = gtt->gobj->import_attach;
		dma_buf_unmap_attachment(attach, ttm->sg, DMA_BIDIRECTIONAL);
		ttm->sg = NULL;
	}
1043

1044 1045 1046
	if (!gtt->bound)
		return;

1047
	if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
1048
		return;
1049

A
Alex Deucher 已提交
1050
	/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
C
Christian König 已提交
1051
	r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
1052
	if (r)
1053
		DRM_ERROR("failed to unbind %u pages at 0x%08llX\n",
1054
			  gtt->ttm.num_pages, gtt->offset);
1055
	gtt->bound = false;
A
Alex Deucher 已提交
1056 1057
}

1058
static void amdgpu_ttm_backend_destroy(struct ttm_device *bdev,
D
Dave Airlie 已提交
1059
				       struct ttm_tt *ttm)
A
Alex Deucher 已提交
1060 1061 1062
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

1063 1064 1065
	if (gtt->usertask)
		put_task_struct(gtt->usertask);

1066
	ttm_tt_fini(&gtt->ttm);
A
Alex Deucher 已提交
1067 1068 1069
	kfree(gtt);
}

1070 1071 1072 1073
/**
 * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO
 *
 * @bo: The buffer object to create a GTT ttm_tt object around
1074
 * @page_flags: Page flags to be added to the ttm_tt object
1075 1076 1077
 *
 * Called by ttm_tt_create().
 */
1078 1079
static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
					   uint32_t page_flags)
A
Alex Deucher 已提交
1080
{
1081
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
A
Alex Deucher 已提交
1082
	struct amdgpu_ttm_tt *gtt;
1083
	enum ttm_caching caching;
A
Alex Deucher 已提交
1084 1085 1086 1087 1088

	gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
	if (gtt == NULL) {
		return NULL;
	}
1089
	gtt->gobj = &bo->base;
1090

1091 1092 1093 1094 1095
	if (abo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
		caching = ttm_write_combined;
	else
		caching = ttm_cached;

1096
	/* allocate space for the uninitialized page entries */
1097
	if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
A
Alex Deucher 已提交
1098 1099 1100
		kfree(gtt);
		return NULL;
	}
1101
	return &gtt->ttm;
A
Alex Deucher 已提交
1102 1103
}

1104
/*
1105 1106 1107 1108 1109
 * amdgpu_ttm_tt_populate - Map GTT pages visible to the device
 *
 * Map the pages of a ttm_tt object to an address space visible
 * to the underlying device.
 */
1110
static int amdgpu_ttm_tt_populate(struct ttm_device *bdev,
D
Dave Airlie 已提交
1111 1112
				  struct ttm_tt *ttm,
				  struct ttm_operation_ctx *ctx)
A
Alex Deucher 已提交
1113
{
D
Dave Airlie 已提交
1114
	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
A
Alex Deucher 已提交
1115
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1116 1117
	pgoff_t i;
	int ret;
A
Alex Deucher 已提交
1118

1119
	/* user pages are bound by amdgpu_ttm_tt_pin_userptr() */
1120
	if (gtt->userptr) {
1121
		ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
A
Alex Deucher 已提交
1122 1123 1124 1125 1126
		if (!ttm->sg)
			return -ENOMEM;
		return 0;
	}

M
Matthew Auld 已提交
1127
	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
1128
		return 0;
A
Alex Deucher 已提交
1129

1130 1131 1132 1133 1134 1135 1136 1137
	ret = ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx);
	if (ret)
		return ret;

	for (i = 0; i < ttm->num_pages; ++i)
		ttm->pages[i]->mapping = bdev->dev_mapping;

	return 0;
A
Alex Deucher 已提交
1138 1139
}

1140
/*
1141 1142 1143 1144 1145
 * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays
 *
 * Unmaps pages of a ttm_tt object from the device address space and
 * unpopulates the page array backing it.
 */
1146
static void amdgpu_ttm_tt_unpopulate(struct ttm_device *bdev,
1147
				     struct ttm_tt *ttm)
A
Alex Deucher 已提交
1148 1149
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
1150
	struct amdgpu_device *adev;
1151
	pgoff_t i;
A
Alex Deucher 已提交
1152

1153 1154
	amdgpu_ttm_backend_unbind(bdev, ttm);

1155
	if (gtt->userptr) {
1156
		amdgpu_ttm_tt_set_user_pages(ttm, NULL);
A
Alex Deucher 已提交
1157
		kfree(ttm->sg);
X
xinhui pan 已提交
1158
		ttm->sg = NULL;
1159 1160 1161
		return;
	}

M
Matthew Auld 已提交
1162
	if (ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
A
Alex Deucher 已提交
1163 1164
		return;

1165 1166 1167
	for (i = 0; i < ttm->num_pages; ++i)
		ttm->pages[i]->mapping = NULL;

D
Dave Airlie 已提交
1168
	adev = amdgpu_ttm_adev(bdev);
1169
	return ttm_pool_free(&adev->mman.bdev.pool, ttm);
A
Alex Deucher 已提交
1170 1171
}

1172
/**
1173 1174
 * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
 * task
1175
 *
1176
 * @bo: The ttm_buffer_object to bind this userptr to
1177 1178 1179 1180 1181 1182
 * @addr:  The address in the current tasks VM space to use
 * @flags: Requirements of userptr object.
 *
 * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
 * to current task
 */
1183 1184
int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
			      uint64_t addr, uint32_t flags)
A
Alex Deucher 已提交
1185
{
1186
	struct amdgpu_ttm_tt *gtt;
A
Alex Deucher 已提交
1187

1188 1189 1190 1191 1192 1193
	if (!bo->ttm) {
		/* TODO: We want a separate TTM object type for userptrs */
		bo->ttm = amdgpu_ttm_tt_create(bo, 0);
		if (bo->ttm == NULL)
			return -ENOMEM;
	}
A
Alex Deucher 已提交
1194

M
Matthew Auld 已提交
1195 1196
	/* Set TTM_TT_FLAG_EXTERNAL before populate but after create. */
	bo->ttm->page_flags |= TTM_TT_FLAG_EXTERNAL;
1197

1198
	gtt = (void *)bo->ttm;
A
Alex Deucher 已提交
1199 1200
	gtt->userptr = addr;
	gtt->userflags = flags;
1201 1202 1203 1204 1205 1206

	if (gtt->usertask)
		put_task_struct(gtt->usertask);
	gtt->usertask = current->group_leader;
	get_task_struct(gtt->usertask);

A
Alex Deucher 已提交
1207 1208 1209
	return 0;
}

1210
/*
1211 1212
 * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object
 */
1213
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
A
Alex Deucher 已提交
1214 1215 1216 1217
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
1218
		return NULL;
A
Alex Deucher 已提交
1219

1220 1221 1222 1223
	if (gtt->usertask == NULL)
		return NULL;

	return gtt->usertask->mm;
A
Alex Deucher 已提交
1224 1225
}

1226
/*
1227 1228
 * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
 * address range for the current task.
1229 1230
 *
 */
1231
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
1232
				  unsigned long end, unsigned long *userptr)
1233 1234 1235 1236
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;
	unsigned long size;

1237
	if (gtt == NULL || !gtt->userptr)
1238 1239
		return false;

1240 1241 1242
	/* Return false if no part of the ttm_tt object lies within
	 * the range
	 */
1243
	size = (unsigned long)gtt->ttm.num_pages * PAGE_SIZE;
1244 1245 1246
	if (gtt->userptr > end || gtt->userptr + size <= start)
		return false;

1247 1248
	if (userptr)
		*userptr = gtt->userptr;
1249 1250 1251
	return true;
}

1252
/*
1253
 * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr?
1254
 */
1255
bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm)
1256 1257 1258 1259 1260 1261
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL || !gtt->userptr)
		return false;

1262
	return true;
1263 1264
}

1265
/*
1266 1267
 * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only?
 */
A
Alex Deucher 已提交
1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
	struct amdgpu_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
		return false;

	return !!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
}

1278
/**
1279
 * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object
1280 1281 1282
 *
 * @ttm: The ttm_tt object to compute the flags for
 * @mem: The memory registry backing this ttm_tt object
1283 1284
 *
 * Figure out the flags to use for a VM PDE (Page Directory Entry).
1285
 */
1286
uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem)
A
Alex Deucher 已提交
1287
{
1288
	uint64_t flags = 0;
A
Alex Deucher 已提交
1289 1290 1291 1292

	if (mem && mem->mem_type != TTM_PL_SYSTEM)
		flags |= AMDGPU_PTE_VALID;

1293 1294
	if (mem && (mem->mem_type == TTM_PL_TT ||
		    mem->mem_type == AMDGPU_PL_PREEMPT)) {
A
Alex Deucher 已提交
1295 1296
		flags |= AMDGPU_PTE_SYSTEM;

1297
		if (ttm->caching == ttm_cached)
1298 1299
			flags |= AMDGPU_PTE_SNOOPED;
	}
A
Alex Deucher 已提交
1300

1301 1302 1303 1304
	if (mem && mem->mem_type == TTM_PL_VRAM &&
			mem->bus.caching == ttm_cached)
		flags |= AMDGPU_PTE_SNOOPED;

1305 1306 1307 1308 1309 1310
	return flags;
}

/**
 * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object
 *
1311
 * @adev: amdgpu_device pointer
1312 1313
 * @ttm: The ttm_tt object to compute the flags for
 * @mem: The memory registry backing this ttm_tt object
1314
 *
1315 1316 1317
 * Figure out the flags to use for a VM PTE (Page Table Entry).
 */
uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
1318
				 struct ttm_resource *mem)
1319 1320 1321
{
	uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem);

1322
	flags |= adev->gart.gart_pte_flags;
A
Alex Deucher 已提交
1323 1324 1325 1326 1327 1328 1329 1330
	flags |= AMDGPU_PTE_READABLE;

	if (!amdgpu_ttm_tt_is_readonly(ttm))
		flags |= AMDGPU_PTE_WRITEABLE;

	return flags;
}

1331
/*
1332 1333
 * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
 * object.
1334
 *
1335 1336 1337
 * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
 * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
 * it can find space for a new object and by ttm_bo_force_list_clean() which is
1338 1339
 * used to clean out a memory space.
 */
1340 1341 1342
static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
					    const struct ttm_place *place)
{
1343
	unsigned long num_pages = bo->resource->num_pages;
1344
	struct dma_resv_iter resv_cursor;
1345
	struct amdgpu_res_cursor cursor;
1346 1347
	struct dma_fence *f;

1348 1349 1350 1351
	/* Swapout? */
	if (bo->resource->mem_type == TTM_PL_SYSTEM)
		return true;

1352
	if (bo->type == ttm_bo_type_kernel &&
1353
	    !amdgpu_vm_evictable(ttm_to_amdgpu_bo(bo)))
1354 1355
		return false;

1356 1357 1358 1359
	/* If bo is a KFD BO, check if the bo belongs to the current process.
	 * If true, then return false as any KFD process needs all its BOs to
	 * be resident to run successfully
	 */
1360 1361 1362
	dma_resv_for_each_fence(&resv_cursor, bo->base.resv, true, f) {
		if (amdkfd_fence_check_mm(f, current->mm))
			return false;
1363
	}
1364

1365
	switch (bo->resource->mem_type) {
1366 1367 1368 1369 1370 1371 1372 1373 1374
	case AMDGPU_PL_PREEMPT:
		/* Preemptible BOs don't own system resources managed by the
		 * driver (pages, VRAM, GART space). They point to resources
		 * owned by someone else (e.g. pageable memory in user mode
		 * or a DMABuf). They are used in a preemptible context so we
		 * can guarantee no deadlocks and good QoS in case of MMU
		 * notifiers or DMABuf move notifiers from the resource owner.
		 */
		return false;
1375
	case TTM_PL_TT:
1376 1377 1378
		if (amdgpu_bo_is_amdgpu_bo(bo) &&
		    amdgpu_bo_encrypted(ttm_to_amdgpu_bo(bo)))
			return false;
1379
		return true;
1380

1381
	case TTM_PL_VRAM:
1382
		/* Check each drm MM node individually */
1383
		amdgpu_res_first(bo->resource, 0, (u64)num_pages << PAGE_SHIFT,
1384 1385 1386 1387 1388
				 &cursor);
		while (cursor.remaining) {
			if (place->fpfn < PFN_DOWN(cursor.start + cursor.size)
			    && !(place->lpfn &&
				 place->lpfn <= PFN_DOWN(cursor.start)))
1389 1390
				return true;

1391
			amdgpu_res_next(&cursor, cursor.size);
1392
		}
1393
		return false;
1394

1395 1396
	default:
		break;
1397 1398 1399 1400 1401
	}

	return ttm_bo_eviction_valuable(bo, place);
}

1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436
static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
				      void *buf, size_t size, bool write)
{
	while (size) {
		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
		uint64_t bytes = 4 - (pos & 0x3);
		uint32_t shift = (pos & 0x3) * 8;
		uint32_t mask = 0xffffffff << shift;
		uint32_t value = 0;

		if (size < bytes) {
			mask &= 0xffffffff >> (bytes - size) * 8;
			bytes = size;
		}

		if (mask != 0xffffffff) {
			amdgpu_device_mm_access(adev, aligned_pos, &value, 4, false);
			if (write) {
				value &= ~mask;
				value |= (*(uint32_t *)buf << shift) & mask;
				amdgpu_device_mm_access(adev, aligned_pos, &value, 4, true);
			} else {
				value = (value & mask) >> shift;
				memcpy(buf, &value, bytes);
			}
		} else {
			amdgpu_device_mm_access(adev, aligned_pos, buf, 4, write);
		}

		pos += bytes;
		buf += bytes;
		size -= bytes;
	}
}

1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
					unsigned long offset, void *buf, int len, int write)
{
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
	struct amdgpu_job *job;
	struct dma_fence *fence;
	uint64_t src_addr, dst_addr;
	unsigned int num_dw;
	int r, idx;

	if (len != PAGE_SIZE)
		return -EINVAL;

	if (!adev->mman.sdma_access_ptr)
		return -EACCES;

	r = drm_dev_enter(adev_to_drm(adev), &idx);
	if (r)
		return r;

	if (write)
		memcpy(adev->mman.sdma_access_ptr, buf, len);

	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job);
	if (r)
		goto out;

1466 1467 1468 1469 1470
	src_addr = amdgpu_bo_gpu_offset(abo);
	dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
	if (write)
		swap(src_addr, dst_addr);

1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492
	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false);

	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);

	r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
	if (r) {
		amdgpu_job_free(job);
		goto out;
	}

	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
		r = -ETIMEDOUT;
	dma_fence_put(fence);

	if (!(r || write))
		memcpy(buf, adev->mman.sdma_access_ptr, len);
out:
	drm_dev_exit(idx);
	return r;
}

1493
/**
1494
 * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504
 *
 * @bo:  The buffer object to read/write
 * @offset:  Offset into buffer object
 * @buf:  Secondary buffer to write/read from
 * @len: Length in bytes of access
 * @write:  true if writing
 *
 * This is used to access VRAM that backs a buffer object via MMIO
 * access for debugging purposes.
 */
1505
static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
1506 1507
				    unsigned long offset, void *buf, int len,
				    int write)
1508
{
1509
	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
1510
	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
1511
	struct amdgpu_res_cursor cursor;
1512 1513
	int ret = 0;

1514
	if (bo->resource->mem_type != TTM_PL_VRAM)
1515 1516
		return -EIO;

1517
	if (amdgpu_device_has_timeouts_enabled(adev) &&
1518 1519 1520
			!amdgpu_ttm_access_memory_sdma(bo, offset, buf, len, write))
		return len;

1521
	amdgpu_res_first(bo->resource, offset, len, &cursor);
1522
	while (cursor.remaining) {
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532
		size_t count, size = cursor.size;
		loff_t pos = cursor.start;

		count = amdgpu_device_aper_access(adev, pos, buf, size, write);
		size -= count;
		if (size) {
			/* using MM to access rest vram and handle un-aligned address */
			pos += count;
			buf += count;
			amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
1533 1534
		}

1535 1536 1537
		ret += cursor.size;
		buf += cursor.size;
		amdgpu_res_next(&cursor, cursor.size);
1538 1539 1540 1541 1542
	}

	return ret;
}

1543 1544 1545 1546 1547 1548
static void
amdgpu_bo_delete_mem_notify(struct ttm_buffer_object *bo)
{
	amdgpu_bo_move_notify(bo, false, NULL);
}

1549
static struct ttm_device_funcs amdgpu_bo_driver = {
A
Alex Deucher 已提交
1550 1551 1552
	.ttm_tt_create = &amdgpu_ttm_tt_create,
	.ttm_tt_populate = &amdgpu_ttm_tt_populate,
	.ttm_tt_unpopulate = &amdgpu_ttm_tt_unpopulate,
1553
	.ttm_tt_destroy = &amdgpu_ttm_backend_destroy,
1554
	.eviction_valuable = amdgpu_ttm_bo_eviction_valuable,
A
Alex Deucher 已提交
1555 1556
	.evict_flags = &amdgpu_evict_flags,
	.move = &amdgpu_bo_move,
1557
	.delete_mem_notify = &amdgpu_bo_delete_mem_notify,
1558
	.release_notify = &amdgpu_bo_release_notify,
A
Alex Deucher 已提交
1559
	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
1560
	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
1561 1562
	.access_memory = &amdgpu_ttm_access_memory,
	.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
A
Alex Deucher 已提交
1563 1564
};

1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576
/*
 * Firmware Reservation functions
 */
/**
 * amdgpu_ttm_fw_reserve_vram_fini - free fw reserved vram
 *
 * @adev: amdgpu_device pointer
 *
 * free fw reserved vram if it has been reserved.
 */
static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
{
1577 1578
	amdgpu_bo_free_kernel(&adev->mman.fw_vram_usage_reserved_bo,
		NULL, &adev->mman.fw_vram_usage_va);
1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
}

/**
 * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
 *
 * @adev: amdgpu_device pointer
 *
 * create bo vram reservation from fw.
 */
static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
{
1590 1591
	uint64_t vram_size = adev->gmc.visible_vram_size;

1592 1593
	adev->mman.fw_vram_usage_va = NULL;
	adev->mman.fw_vram_usage_reserved_bo = NULL;
1594

1595 1596
	if (adev->mman.fw_vram_usage_size == 0 ||
	    adev->mman.fw_vram_usage_size > vram_size)
1597
		return 0;
1598

1599
	return amdgpu_bo_create_kernel_at(adev,
1600 1601
					  adev->mman.fw_vram_usage_start_offset,
					  adev->mman.fw_vram_usage_size,
1602
					  AMDGPU_GEM_DOMAIN_VRAM,
1603 1604
					  &adev->mman.fw_vram_usage_reserved_bo,
					  &adev->mman.fw_vram_usage_va);
1605
}
1606

1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628
/*
 * Memoy training reservation functions
 */

/**
 * amdgpu_ttm_training_reserve_vram_fini - free memory training reserved vram
 *
 * @adev: amdgpu_device pointer
 *
 * free memory training reserved vram if it has been reserved.
 */
static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
{
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;

	ctx->init = PSP_MEM_TRAIN_NOT_SUPPORT;
	amdgpu_bo_free_kernel(&ctx->c2p_bo, NULL, NULL);
	ctx->c2p_bo = NULL;

	return 0;
}

1629
static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
1630
{
1631
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1632

1633
	memset(ctx, 0, sizeof(*ctx));
1634

1635
	ctx->c2p_train_data_offset =
1636
		ALIGN((adev->gmc.mc_vram_size - adev->mman.discovery_tmr_size - SZ_1M), SZ_1M);
1637 1638 1639 1640
	ctx->p2c_train_data_offset =
		(adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
	ctx->train_data_size =
		GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
1641

1642 1643 1644 1645
	DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
			ctx->train_data_size,
			ctx->p2c_train_data_offset,
			ctx->c2p_train_data_offset);
1646 1647
}

1648 1649 1650
/*
 * reserve TMR memory at the top of VRAM which holds
 * IP Discovery data and is protected by PSP.
1651
 */
1652
static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
1653 1654 1655
{
	int ret;
	struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
1656
	bool mem_train_support = false;
1657

1658
	if (!amdgpu_sriov_vf(adev)) {
1659
		if (amdgpu_atomfirmware_mem_training_supported(adev))
1660
			mem_train_support = true;
1661
		else
1662
			DRM_DEBUG("memory training does not support!\n");
1663 1664
	}

1665 1666 1667 1668 1669 1670 1671
	/*
	 * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
	 * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
	 *
	 * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
	 * discovery data and G6 memory training data respectively
	 */
1672
	adev->mman.discovery_tmr_size =
1673
		amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
1674 1675
	if (!adev->mman.discovery_tmr_size)
		adev->mman.discovery_tmr_size = DISCOVERY_TMR_OFFSET;
1676 1677 1678 1679 1680

	if (mem_train_support) {
		/* reserve vram for mem train according to TMR location */
		amdgpu_ttm_training_data_block_init(adev);
		ret = amdgpu_bo_create_kernel_at(adev,
1681 1682 1683 1684 1685
					 ctx->c2p_train_data_offset,
					 ctx->train_data_size,
					 AMDGPU_GEM_DOMAIN_VRAM,
					 &ctx->c2p_bo,
					 NULL);
1686 1687 1688 1689
		if (ret) {
			DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
			amdgpu_ttm_training_reserve_vram_fini(adev);
			return ret;
1690
		}
1691
		ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
1692
	}
1693 1694

	ret = amdgpu_bo_create_kernel_at(adev,
1695 1696
				adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
				adev->mman.discovery_tmr_size,
1697
				AMDGPU_GEM_DOMAIN_VRAM,
1698
				&adev->mman.discovery_memory,
1699
				NULL);
1700
	if (ret) {
1701
		DRM_ERROR("alloc tmr failed(%d)!\n", ret);
1702
		amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1703
		return ret;
1704 1705 1706 1707 1708
	}

	return 0;
}

1709
/*
1710 1711
 * amdgpu_ttm_init - Init the memory management (ttm) as well as various
 * gtt/vram related fields.
1712 1713 1714 1715 1716 1717
 *
 * This initializes all of the memory space pools that the TTM layer
 * will need such as the GTT space (system memory mapped to the device),
 * VRAM (on-board memory), and on-chip memories (GDS, GWS, OA) which
 * can be mapped per VMID.
 */
A
Alex Deucher 已提交
1718 1719
int amdgpu_ttm_init(struct amdgpu_device *adev)
{
1720
	uint64_t gtt_size;
A
Alex Deucher 已提交
1721
	int r;
1722
	u64 vis_vram_limit;
A
Alex Deucher 已提交
1723

1724 1725
	mutex_init(&adev->mman.gtt_window_lock);

A
Alex Deucher 已提交
1726
	/* No others user of address space so set it to 0 */
1727
	r = ttm_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->dev,
1728 1729
			       adev_to_drm(adev)->anon_inode->i_mapping,
			       adev_to_drm(adev)->vma_offset_manager,
1730
			       adev->need_swiotlb,
1731
			       dma_addressing_limited(adev->dev));
A
Alex Deucher 已提交
1732 1733 1734 1735 1736
	if (r) {
		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
		return r;
	}
	adev->mman.initialized = true;
1737

1738
	/* Initialize VRAM pool with all of VRAM divided into pages */
1739
	r = amdgpu_vram_mgr_init(adev);
A
Alex Deucher 已提交
1740 1741 1742 1743
	if (r) {
		DRM_ERROR("Failed initializing VRAM heap.\n");
		return r;
	}
1744 1745 1746 1747

	/* Reduce size of CPU-visible VRAM if requested */
	vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
	if (amdgpu_vis_vram_limit > 0 &&
1748 1749
	    vis_vram_limit <= adev->gmc.visible_vram_size)
		adev->gmc.visible_vram_size = vis_vram_limit;
1750

A
Alex Deucher 已提交
1751
	/* Change the size here instead of the init above so only lpfn is affected */
1752
	amdgpu_ttm_set_buffer_funcs_status(adev, false);
1753
#ifdef CONFIG_64BIT
1754
#ifdef CONFIG_X86
1755 1756 1757 1758 1759
	if (adev->gmc.xgmi.connected_to_cpu)
		adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
				adev->gmc.visible_vram_size);

	else
1760
#endif
1761 1762
		adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
				adev->gmc.visible_vram_size);
1763
#endif
A
Alex Deucher 已提交
1764

1765 1766 1767 1768
	/*
	 *The reserved vram for firmware must be pinned to the specified
	 *place on the VRAM, so reserve it early.
	 */
1769
	r = amdgpu_ttm_fw_reserve_vram_init(adev);
1770 1771 1772 1773
	if (r) {
		return r;
	}

1774
	/*
1775 1776 1777
	 * only NAVI10 and onwards ASIC support for IP discovery.
	 * If IP discovery enabled, a block of memory should be
	 * reserved for IP discovey.
1778
	 */
1779
	if (adev->mman.discovery_bin) {
1780
		r = amdgpu_ttm_reserve_tmr(adev);
1781 1782 1783
		if (r)
			return r;
	}
1784

1785 1786 1787 1788
	/* allocate memory as required for VGA
	 * This is used for VGA emulation and pre-OS scanout buffers to
	 * avoid display artifacts while transitioning between pre-OS
	 * and driver.  */
1789
	r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
1790
				       AMDGPU_GEM_DOMAIN_VRAM,
1791
				       &adev->mman.stolen_vga_memory,
1792
				       NULL);
C
Christian König 已提交
1793 1794
	if (r)
		return r;
1795 1796
	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
				       adev->mman.stolen_extended_size,
1797
				       AMDGPU_GEM_DOMAIN_VRAM,
1798
				       &adev->mman.stolen_extended_memory,
1799
				       NULL);
C
Christian König 已提交
1800 1801
	if (r)
		return r;
1802 1803 1804 1805 1806 1807 1808
	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
				       adev->mman.stolen_reserved_size,
				       AMDGPU_GEM_DOMAIN_VRAM,
				       &adev->mman.stolen_reserved_memory,
				       NULL);
	if (r)
		return r;
1809

A
Alex Deucher 已提交
1810
	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
1811
		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
1812

1813 1814
	/* Compute GTT size, either bsaed on 3/4th the size of RAM size
	 * or whatever the user passed on module init */
1815 1816 1817 1818
	if (amdgpu_gtt_size == -1) {
		struct sysinfo si;

		si_meminfo(&si);
1819
		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
1820
			       adev->gmc.mc_vram_size),
1821 1822 1823
			       ((uint64_t)si.totalram * si.mem_unit * 3/4));
	}
	else
1824
		gtt_size = (uint64_t)amdgpu_gtt_size << 20;
1825 1826

	/* Initialize GTT memory pool */
1827
	r = amdgpu_gtt_mgr_init(adev, gtt_size);
A
Alex Deucher 已提交
1828 1829 1830 1831 1832
	if (r) {
		DRM_ERROR("Failed initializing GTT heap.\n");
		return r;
	}
	DRM_INFO("amdgpu: %uM of GTT memory ready.\n",
1833
		 (unsigned)(gtt_size / (1024 * 1024)));
A
Alex Deucher 已提交
1834

1835 1836 1837 1838 1839 1840 1841
	/* Initialize preemptible memory pool */
	r = amdgpu_preempt_mgr_init(adev);
	if (r) {
		DRM_ERROR("Failed initializing PREEMPT heap.\n");
		return r;
	}

1842
	/* Initialize various on-chip memory pools */
1843
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GDS, adev->gds.gds_size);
1844 1845 1846
	if (r) {
		DRM_ERROR("Failed initializing GDS heap.\n");
		return r;
A
Alex Deucher 已提交
1847 1848
	}

1849
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_GWS, adev->gds.gws_size);
1850 1851 1852
	if (r) {
		DRM_ERROR("Failed initializing gws heap.\n");
		return r;
A
Alex Deucher 已提交
1853 1854
	}

1855
	r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_OA, adev->gds.oa_size);
1856 1857 1858
	if (r) {
		DRM_ERROR("Failed initializing oa heap.\n");
		return r;
A
Alex Deucher 已提交
1859 1860
	}

1861 1862 1863 1864 1865 1866
	if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
				AMDGPU_GEM_DOMAIN_GTT,
				&adev->mman.sdma_access_bo, NULL,
				adev->mman.sdma_access_ptr))
		DRM_WARN("Debug VRAM access will use slowpath MM access\n");

A
Alex Deucher 已提交
1867 1868 1869
	return 0;
}

1870
/*
1871 1872
 * amdgpu_ttm_fini - De-initialize the TTM memory pools
 */
A
Alex Deucher 已提交
1873 1874
void amdgpu_ttm_fini(struct amdgpu_device *adev)
{
1875
	int idx;
A
Alex Deucher 已提交
1876 1877
	if (!adev->mman.initialized)
		return;
1878

1879
	amdgpu_ttm_training_reserve_vram_fini(adev);
1880
	/* return the stolen vga memory back to VRAM */
1881 1882
	amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
	amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
1883
	/* return the IP Discovery TMR memory back to VRAM */
1884
	amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
1885 1886 1887
	if (adev->mman.stolen_reserved_size)
		amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
				      NULL, NULL);
1888
	amdgpu_ttm_fw_reserve_vram_fini(adev);
1889

1890 1891 1892 1893 1894 1895 1896 1897 1898
	if (drm_dev_enter(adev_to_drm(adev), &idx)) {

		if (adev->mman.aper_base_kaddr)
			iounmap(adev->mman.aper_base_kaddr);
		adev->mman.aper_base_kaddr = NULL;

		drm_dev_exit(idx);
	}

1899 1900
	amdgpu_vram_mgr_fini(adev);
	amdgpu_gtt_mgr_fini(adev);
1901
	amdgpu_preempt_mgr_fini(adev);
1902 1903 1904
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS);
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS);
	ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA);
1905
	ttm_device_fini(&adev->mman.bdev);
A
Alex Deucher 已提交
1906
	adev->mman.initialized = false;
1907
	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
1908
					&adev->mman.sdma_access_ptr);
A
Alex Deucher 已提交
1909 1910 1911
	DRM_INFO("amdgpu: ttm finalized\n");
}

1912 1913 1914 1915 1916 1917 1918 1919 1920 1921
/**
 * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
 *
 * @adev: amdgpu_device pointer
 * @enable: true when we can use buffer functions.
 *
 * Enable/disable use of buffer functions during suspend/resume. This should
 * only be called at bootup or when userspace isn't running.
 */
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
A
Alex Deucher 已提交
1922
{
1923
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
1924
	uint64_t size;
1925
	int r;
A
Alex Deucher 已提交
1926

1927
	if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
1928
	    adev->mman.buffer_funcs_enabled == enable)
A
Alex Deucher 已提交
1929 1930
		return;

1931 1932
	if (enable) {
		struct amdgpu_ring *ring;
N
Nirmoy Das 已提交
1933
		struct drm_gpu_scheduler *sched;
1934 1935

		ring = adev->mman.buffer_funcs_ring;
N
Nirmoy Das 已提交
1936 1937
		sched = &ring->sched;
		r = drm_sched_entity_init(&adev->mman.entity,
1938
					  DRM_SCHED_PRIORITY_KERNEL, &sched,
N
Nirmoy Das 已提交
1939
					  1, NULL);
1940 1941 1942 1943 1944 1945
		if (r) {
			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
				  r);
			return;
		}
	} else {
1946
		drm_sched_entity_destroy(&adev->mman.entity);
1947 1948
		dma_fence_put(man->move);
		man->move = NULL;
1949 1950
	}

A
Alex Deucher 已提交
1951
	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
1952 1953 1954 1955
	if (enable)
		size = adev->gmc.real_vram_size;
	else
		size = adev->gmc.visible_vram_size;
A
Alex Deucher 已提交
1956
	man->size = size >> PAGE_SHIFT;
1957
	adev->mman.buffer_funcs_enabled = enable;
A
Alex Deucher 已提交
1958 1959
}

1960 1961
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
		       uint64_t dst_offset, uint32_t byte_count,
1962
		       struct dma_resv *resv,
1963
		       struct dma_fence **fence, bool direct_submit,
1964
		       bool vm_needs_flush, bool tmz)
A
Alex Deucher 已提交
1965
{
1966 1967
	enum amdgpu_ib_pool_type pool = direct_submit ? AMDGPU_IB_POOL_DIRECT :
		AMDGPU_IB_POOL_DELAYED;
A
Alex Deucher 已提交
1968
	struct amdgpu_device *adev = ring->adev;
1969 1970
	struct amdgpu_job *job;

A
Alex Deucher 已提交
1971 1972 1973 1974 1975
	uint32_t max_bytes;
	unsigned num_loops, num_dw;
	unsigned i;
	int r;

1976
	if (direct_submit && !ring->sched.ready) {
1977 1978 1979 1980
		DRM_ERROR("Trying to move memory with ring turned off.\n");
		return -EINVAL;
	}

A
Alex Deucher 已提交
1981 1982
	max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
	num_loops = DIV_ROUND_UP(byte_count, max_bytes);
L
Luben Tuikov 已提交
1983
	num_dw = ALIGN(num_loops * adev->mman.buffer_funcs->copy_num_dw, 8);
1984

1985
	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, &job);
1986
	if (r)
1987
		return r;
1988

1989
	if (vm_needs_flush) {
1990 1991
		job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo ?
					adev->gmc.pdb0_bo : adev->gart.bo);
1992 1993
		job->vm_needs_flush = true;
	}
1994
	if (resv) {
1995
		r = amdgpu_sync_resv(adev, &job->sync, resv,
1996 1997
				     AMDGPU_SYNC_ALWAYS,
				     AMDGPU_FENCE_OWNER_UNDEFINED);
1998 1999 2000 2001
		if (r) {
			DRM_ERROR("sync failed (%d).\n", r);
			goto error_free;
		}
A
Alex Deucher 已提交
2002 2003 2004 2005 2006
	}

	for (i = 0; i < num_loops; i++) {
		uint32_t cur_size_in_bytes = min(byte_count, max_bytes);

2007
		amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
2008
					dst_offset, cur_size_in_bytes, tmz);
A
Alex Deucher 已提交
2009 2010 2011 2012 2013 2014

		src_offset += cur_size_in_bytes;
		dst_offset += cur_size_in_bytes;
		byte_count -= cur_size_in_bytes;
	}

2015 2016
	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);
2017 2018 2019
	if (direct_submit)
		r = amdgpu_job_submit_direct(job, ring, fence);
	else
2020
		r = amdgpu_job_submit(job, &adev->mman.entity,
2021
				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2022 2023
	if (r)
		goto error_free;
A
Alex Deucher 已提交
2024

2025
	return r;
2026

2027
error_free:
2028
	amdgpu_job_free(job);
2029
	DRM_ERROR("Error scheduling IBs (%d)\n", r);
2030
	return r;
A
Alex Deucher 已提交
2031 2032
}

2033
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
2034
		       uint32_t src_data,
2035
		       struct dma_resv *resv,
2036
		       struct dma_fence **fence)
2037
{
2038
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
2039
	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
2040 2041
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;

2042
	struct amdgpu_res_cursor cursor;
2043
	unsigned int num_loops, num_dw;
2044
	uint64_t num_bytes;
2045 2046

	struct amdgpu_job *job;
2047 2048
	int r;

2049
	if (!adev->mman.buffer_funcs_enabled) {
2050 2051 2052 2053
		DRM_ERROR("Trying to clear memory with ring turned off.\n");
		return -EINVAL;
	}

2054
	if (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT) {
2055 2056 2057 2058
		DRM_ERROR("Trying to clear preemptible memory.\n");
		return -EINVAL;
	}

2059
	if (bo->tbo.resource->mem_type == TTM_PL_TT) {
2060
		r = amdgpu_ttm_alloc_gart(&bo->tbo);
2061 2062 2063 2064
		if (r)
			return r;
	}

2065
	num_bytes = bo->tbo.resource->num_pages << PAGE_SHIFT;
2066 2067
	num_loops = 0;

2068
	amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
2069 2070 2071
	while (cursor.remaining) {
		num_loops += DIV_ROUND_UP_ULL(cursor.size, max_bytes);
		amdgpu_res_next(&cursor, cursor.size);
2072
	}
2073
	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
2074 2075

	/* for IB padding */
2076
	num_dw += 64;
2077

2078 2079
	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED,
				     &job);
2080 2081 2082 2083 2084
	if (r)
		return r;

	if (resv) {
		r = amdgpu_sync_resv(adev, &job->sync, resv,
2085 2086
				     AMDGPU_SYNC_ALWAYS,
				     AMDGPU_FENCE_OWNER_UNDEFINED);
2087 2088 2089 2090 2091 2092
		if (r) {
			DRM_ERROR("sync failed (%d).\n", r);
			goto error_free;
		}
	}

2093
	amdgpu_res_first(bo->tbo.resource, 0, num_bytes, &cursor);
2094 2095 2096
	while (cursor.remaining) {
		uint32_t cur_size = min_t(uint64_t, cursor.size, max_bytes);
		uint64_t dst_addr = cursor.start;
2097

2098 2099
		dst_addr += amdgpu_ttm_domain_start(adev,
						    bo->tbo.resource->mem_type);
2100 2101
		amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data, dst_addr,
					cur_size);
2102

2103
		amdgpu_res_next(&cursor, cur_size);
2104 2105 2106 2107
	}

	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
	WARN_ON(job->ibs[0].length_dw > num_dw);
2108
	r = amdgpu_job_submit(job, &adev->mman.entity,
2109
			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
2110 2111 2112 2113 2114 2115 2116 2117 2118 2119
	if (r)
		goto error_free;

	return 0;

error_free:
	amdgpu_job_free(job);
	return r;
}

2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149
/**
 * amdgpu_ttm_evict_resources - evict memory buffers
 * @adev: amdgpu device object
 * @mem_type: evicted BO's memory type
 *
 * Evicts all @mem_type buffers on the lru list of the memory type.
 *
 * Returns:
 * 0 for success or a negative error code on failure.
 */
int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type)
{
	struct ttm_resource_manager *man;

	switch (mem_type) {
	case TTM_PL_VRAM:
	case TTM_PL_TT:
	case AMDGPU_PL_GWS:
	case AMDGPU_PL_GDS:
	case AMDGPU_PL_OA:
		man = ttm_manager_type(&adev->mman.bdev, mem_type);
		break;
	default:
		DRM_ERROR("Trying to evict invalid memory type\n");
		return -EINVAL;
	}

	return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
}

A
Alex Deucher 已提交
2150 2151
#if defined(CONFIG_DEBUG_FS)

2152
static int amdgpu_mm_vram_table_show(struct seq_file *m, void *unused)
A
Alex Deucher 已提交
2153
{
2154 2155 2156
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    TTM_PL_VRAM);
D
Daniel Vetter 已提交
2157
	struct drm_printer p = drm_seq_file_printer(m);
A
Alex Deucher 已提交
2158

2159
	man->func->debug(man, &p);
D
Daniel Vetter 已提交
2160
	return 0;
A
Alex Deucher 已提交
2161 2162
}

2163
static int amdgpu_ttm_page_pool_show(struct seq_file *m, void *unused)
2164
{
2165
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
2166 2167 2168 2169

	return ttm_pool_debugfs(&adev->mman.bdev.pool, m);
}

2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219
static int amdgpu_mm_tt_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    TTM_PL_TT);
	struct drm_printer p = drm_seq_file_printer(m);

	man->func->debug(man, &p);
	return 0;
}

static int amdgpu_mm_gds_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_GDS);
	struct drm_printer p = drm_seq_file_printer(m);

	man->func->debug(man, &p);
	return 0;
}

static int amdgpu_mm_gws_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_GWS);
	struct drm_printer p = drm_seq_file_printer(m);

	man->func->debug(man, &p);
	return 0;
}

static int amdgpu_mm_oa_table_show(struct seq_file *m, void *unused)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)m->private;
	struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev,
							    AMDGPU_PL_OA);
	struct drm_printer p = drm_seq_file_printer(m);

	man->func->debug(man, &p);
	return 0;
}

DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_vram_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_tt_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gds_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_gws_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_mm_oa_table);
DEFINE_SHOW_ATTRIBUTE(amdgpu_ttm_page_pool);
A
Alex Deucher 已提交
2220

2221
/*
2222 2223 2224 2225
 * amdgpu_ttm_vram_read - Linear read access to VRAM
 *
 * Accesses VRAM via MMIO for debugging purposes.
 */
A
Alex Deucher 已提交
2226 2227 2228
static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
				    size_t size, loff_t *pos)
{
A
Al Viro 已提交
2229
	struct amdgpu_device *adev = file_inode(f)->i_private;
A
Alex Deucher 已提交
2230 2231 2232 2233 2234
	ssize_t result = 0;

	if (size & 0x3 || *pos & 0x3)
		return -EINVAL;

2235
	if (*pos >= adev->gmc.mc_vram_size)
2236 2237
		return -ENXIO;

2238
	size = min(size, (size_t)(adev->gmc.mc_vram_size - *pos));
A
Alex Deucher 已提交
2239
	while (size) {
2240 2241
		size_t bytes = min(size, AMDGPU_TTM_VRAM_MAX_DW_READ * 4);
		uint32_t value[AMDGPU_TTM_VRAM_MAX_DW_READ];
A
Alex Deucher 已提交
2242

2243
		amdgpu_device_vram_access(adev, *pos, value, bytes, false);
2244 2245
		if (copy_to_user(buf, value, bytes))
			return -EFAULT;
A
Alex Deucher 已提交
2246

2247 2248 2249 2250
		result += bytes;
		buf += bytes;
		*pos += bytes;
		size -= bytes;
A
Alex Deucher 已提交
2251 2252 2253 2254 2255
	}

	return result;
}

2256
/*
2257 2258 2259 2260
 * amdgpu_ttm_vram_write - Linear write access to VRAM
 *
 * Accesses VRAM via MMIO for debugging purposes.
 */
2261 2262 2263 2264 2265 2266 2267 2268 2269 2270
static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
				    size_t size, loff_t *pos)
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	ssize_t result = 0;
	int r;

	if (size & 0x3 || *pos & 0x3)
		return -EINVAL;

2271
	if (*pos >= adev->gmc.mc_vram_size)
2272 2273 2274 2275 2276
		return -ENXIO;

	while (size) {
		uint32_t value;

2277
		if (*pos >= adev->gmc.mc_vram_size)
2278 2279 2280 2281 2282 2283
			return result;

		r = get_user(value, (uint32_t *)buf);
		if (r)
			return r;

2284
		amdgpu_device_mm_access(adev, *pos, &value, 4, true);
2285 2286 2287 2288 2289 2290 2291 2292 2293 2294

		result += 4;
		buf += 4;
		*pos += 4;
		size -= 4;
	}

	return result;
}

A
Alex Deucher 已提交
2295 2296 2297
static const struct file_operations amdgpu_ttm_vram_fops = {
	.owner = THIS_MODULE,
	.read = amdgpu_ttm_vram_read,
2298 2299
	.write = amdgpu_ttm_vram_write,
	.llseek = default_llseek,
A
Alex Deucher 已提交
2300 2301
};

2302
/*
2303 2304 2305 2306 2307 2308
 * amdgpu_iomem_read - Virtual read access to GPU mapped memory
 *
 * This function is used to read memory that has been mapped to the
 * GPU and the known addresses are not physical addresses but instead
 * bus addresses (e.g., what you'd put in an IB or ring buffer).
 */
2309 2310
static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
				 size_t size, loff_t *pos)
2311 2312 2313
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	struct iommu_domain *dom;
2314 2315
	ssize_t result = 0;
	int r;
2316

2317
	/* retrieve the IOMMU domain if any for this device */
2318
	dom = iommu_get_domain_for_dev(adev->dev);
2319

2320 2321 2322 2323 2324 2325 2326 2327 2328 2329
	while (size) {
		phys_addr_t addr = *pos & PAGE_MASK;
		loff_t off = *pos & ~PAGE_MASK;
		size_t bytes = PAGE_SIZE - off;
		unsigned long pfn;
		struct page *p;
		void *ptr;

		bytes = bytes < size ? bytes : size;

2330 2331 2332 2333
		/* Translate the bus address to a physical address.  If
		 * the domain is NULL it means there is no IOMMU active
		 * and the address translation is the identity
		 */
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344
		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;

		pfn = addr >> PAGE_SHIFT;
		if (!pfn_valid(pfn))
			return -EPERM;

		p = pfn_to_page(pfn);
		if (p->mapping != adev->mman.bdev.dev_mapping)
			return -EPERM;

		ptr = kmap(p);
2345
		r = copy_to_user(buf, ptr + off, bytes);
2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357
		kunmap(p);
		if (r)
			return -EFAULT;

		size -= bytes;
		*pos += bytes;
		result += bytes;
	}

	return result;
}

2358
/*
2359 2360 2361 2362 2363 2364
 * amdgpu_iomem_write - Virtual write access to GPU mapped memory
 *
 * This function is used to write memory that has been mapped to the
 * GPU and the known addresses are not physical addresses but instead
 * bus addresses (e.g., what you'd put in an IB or ring buffer).
 */
2365 2366 2367 2368 2369 2370 2371
static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
				 size_t size, loff_t *pos)
{
	struct amdgpu_device *adev = file_inode(f)->i_private;
	struct iommu_domain *dom;
	ssize_t result = 0;
	int r;
2372 2373

	dom = iommu_get_domain_for_dev(adev->dev);
2374

2375 2376 2377 2378 2379 2380 2381 2382 2383
	while (size) {
		phys_addr_t addr = *pos & PAGE_MASK;
		loff_t off = *pos & ~PAGE_MASK;
		size_t bytes = PAGE_SIZE - off;
		unsigned long pfn;
		struct page *p;
		void *ptr;

		bytes = bytes < size ? bytes : size;
2384

2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395
		addr = dom ? iommu_iova_to_phys(dom, addr) : addr;

		pfn = addr >> PAGE_SHIFT;
		if (!pfn_valid(pfn))
			return -EPERM;

		p = pfn_to_page(pfn);
		if (p->mapping != adev->mman.bdev.dev_mapping)
			return -EPERM;

		ptr = kmap(p);
2396
		r = copy_from_user(ptr + off, buf, bytes);
2397 2398 2399 2400 2401 2402 2403 2404 2405 2406
		kunmap(p);
		if (r)
			return -EFAULT;

		size -= bytes;
		*pos += bytes;
		result += bytes;
	}

	return result;
2407 2408
}

2409
static const struct file_operations amdgpu_ttm_iomem_fops = {
2410
	.owner = THIS_MODULE,
2411 2412
	.read = amdgpu_iomem_read,
	.write = amdgpu_iomem_write,
2413 2414
	.llseek = default_llseek
};
2415

2416 2417
#endif

2418
void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
A
Alex Deucher 已提交
2419 2420
{
#if defined(CONFIG_DEBUG_FS)
2421
	struct drm_minor *minor = adev_to_drm(adev)->primary;
2422 2423
	struct dentry *root = minor->debugfs_root;

2424
	debugfs_create_file_size("amdgpu_vram", 0444, root, adev,
2425
				 &amdgpu_ttm_vram_fops, adev->gmc.mc_vram_size);
2426
	debugfs_create_file("amdgpu_iomem", 0444, root, adev,
2427
			    &amdgpu_ttm_iomem_fops);
2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439
	debugfs_create_file("amdgpu_vram_mm", 0444, root, adev,
			    &amdgpu_mm_vram_table_fops);
	debugfs_create_file("amdgpu_gtt_mm", 0444, root, adev,
			    &amdgpu_mm_tt_table_fops);
	debugfs_create_file("amdgpu_gds_mm", 0444, root, adev,
			    &amdgpu_mm_gds_table_fops);
	debugfs_create_file("amdgpu_gws_mm", 0444, root, adev,
			    &amdgpu_mm_gws_table_fops);
	debugfs_create_file("amdgpu_oa_mm", 0444, root, adev,
			    &amdgpu_mm_oa_table_fops);
	debugfs_create_file("ttm_page_pool", 0444, root, adev,
			    &amdgpu_ttm_page_pool_fops);
A
Alex Deucher 已提交
2440 2441
#endif
}