amdgpu_object.c 23.8 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
#include <linux/list.h>
#include <linux/slab.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
36
#include <drm/drm_cache.h>
A
Alex Deucher 已提交
37 38 39 40 41
#include "amdgpu.h"
#include "amdgpu_trace.h"

static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
42
	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
43
	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
A
Alex Deucher 已提交
44

45
	amdgpu_bo_kunmap(bo);
A
Alex Deucher 已提交
46 47

	drm_gem_object_release(&bo->gem_base);
48
	amdgpu_bo_unref(&bo->parent);
49
	if (!list_empty(&bo->shadow_list)) {
50
		mutex_lock(&adev->shadow_list_lock);
51
		list_del_init(&bo->shadow_list);
52
		mutex_unlock(&adev->shadow_list_lock);
53
	}
A
Alex Deucher 已提交
54 55 56 57 58 59 60 61 62 63 64
	kfree(bo->metadata);
	kfree(bo);
}

bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
	if (bo->destroy == &amdgpu_ttm_bo_destroy)
		return true;
	return false;
}

65
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
A
Alex Deucher 已提交
66
{
67 68 69 70
	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
	struct ttm_placement *placement = &abo->placement;
	struct ttm_place *places = abo->placements;
	u64 flags = abo->flags;
71
	u32 c = 0;
72

A
Alex Deucher 已提交
73
	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
74 75 76
		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;

		places[c].fpfn = 0;
77
		places[c].lpfn = 0;
78
		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
79
			TTM_PL_FLAG_VRAM;
80

81 82 83 84
		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
			places[c].lpfn = visible_pfn;
		else
			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
85 86 87

		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
88
		c++;
A
Alex Deucher 已提交
89 90 91
	}

	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
92
		places[c].fpfn = 0;
93 94 95 96
		if (flags & AMDGPU_GEM_CREATE_SHADOW)
			places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
		else
			places[c].lpfn = 0;
97 98 99 100 101 102 103
		places[c].flags = TTM_PL_FLAG_TT;
		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
			places[c].flags |= TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED;
		else
			places[c].flags |= TTM_PL_FLAG_CACHED;
		c++;
A
Alex Deucher 已提交
104 105 106
	}

	if (domain & AMDGPU_GEM_DOMAIN_CPU) {
107 108 109 110 111 112 113 114 115
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_SYSTEM;
		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
			places[c].flags |= TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED;
		else
			places[c].flags |= TTM_PL_FLAG_CACHED;
		c++;
A
Alex Deucher 已提交
116 117 118
	}

	if (domain & AMDGPU_GEM_DOMAIN_GDS) {
119 120 121 122
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS;
		c++;
A
Alex Deucher 已提交
123
	}
124

A
Alex Deucher 已提交
125
	if (domain & AMDGPU_GEM_DOMAIN_GWS) {
126 127 128 129
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS;
		c++;
A
Alex Deucher 已提交
130
	}
131

A
Alex Deucher 已提交
132
	if (domain & AMDGPU_GEM_DOMAIN_OA) {
133 134 135 136
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA;
		c++;
A
Alex Deucher 已提交
137 138 139
	}

	if (!c) {
140 141 142 143
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
		c++;
A
Alex Deucher 已提交
144
	}
145

146
	placement->num_placement = c;
147
	placement->placement = places;
A
Alex Deucher 已提交
148

149 150
	placement->num_busy_placement = c;
	placement->busy_placement = places;
A
Alex Deucher 已提交
151 152
}

153
/**
154
 * amdgpu_bo_create_reserved - create reserved BO for kernel use
155 156 157 158 159 160 161 162 163
 *
 * @adev: amdgpu device object
 * @size: size for the new BO
 * @align: alignment for the new BO
 * @domain: where to place it
 * @bo_ptr: resulting BO
 * @gpu_addr: GPU addr of the pinned BO
 * @cpu_addr: optional CPU address mapping
 *
164 165
 * Allocates and pins a BO for kernel internal use, and returns it still
 * reserved.
166 167 168
 *
 * Returns 0 on success, negative error code otherwise.
 */
169 170 171 172
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
			      unsigned long size, int align,
			      u32 domain, struct amdgpu_bo **bo_ptr,
			      u64 *gpu_addr, void **cpu_addr)
173
{
174
	bool free = false;
175 176
	int r;

177 178 179 180
	if (!*bo_ptr) {
		r = amdgpu_bo_create(adev, size, align, true, domain,
				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
181
				     NULL, NULL, 0, bo_ptr);
182 183 184 185 186 187
		if (r) {
			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
				r);
			return r;
		}
		free = true;
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
	}

	r = amdgpu_bo_reserve(*bo_ptr, false);
	if (r) {
		dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r);
		goto error_free;
	}

	r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr);
	if (r) {
		dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
		goto error_unreserve;
	}

	if (cpu_addr) {
		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
		if (r) {
			dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
			goto error_unreserve;
		}
	}

	return 0;

error_unreserve:
	amdgpu_bo_unreserve(*bo_ptr);

error_free:
216 217
	if (free)
		amdgpu_bo_unref(bo_ptr);
218 219 220 221

	return r;
}

222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/**
 * amdgpu_bo_create_kernel - create BO for kernel use
 *
 * @adev: amdgpu device object
 * @size: size for the new BO
 * @align: alignment for the new BO
 * @domain: where to place it
 * @bo_ptr: resulting BO
 * @gpu_addr: GPU addr of the pinned BO
 * @cpu_addr: optional CPU address mapping
 *
 * Allocates and pins a BO for kernel internal use.
 *
 * Returns 0 on success, negative error code otherwise.
 */
int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
			    unsigned long size, int align,
			    u32 domain, struct amdgpu_bo **bo_ptr,
			    u64 *gpu_addr, void **cpu_addr)
{
	int r;

	r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr,
				      gpu_addr, cpu_addr);

	if (r)
		return r;

	amdgpu_bo_unreserve(*bo_ptr);

	return 0;
}

255 256 257 258 259 260 261 262 263 264 265 266 267
/**
 * amdgpu_bo_free_kernel - free BO for kernel use
 *
 * @bo: amdgpu BO to free
 *
 * unmaps and unpin a BO for kernel internal use.
 */
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
			   void **cpu_addr)
{
	if (*bo == NULL)
		return;

268
	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
		if (cpu_addr)
			amdgpu_bo_kunmap(*bo);

		amdgpu_bo_unpin(*bo);
		amdgpu_bo_unreserve(*bo);
	}
	amdgpu_bo_unref(bo);

	if (gpu_addr)
		*gpu_addr = 0;

	if (cpu_addr)
		*cpu_addr = NULL;
}

284 285 286 287 288 289 290
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
			       unsigned long size, int byte_align,
			       bool kernel, u32 domain, u64 flags,
			       struct sg_table *sg,
			       struct reservation_object *resv,
			       uint64_t init_value,
			       struct amdgpu_bo **bo_ptr)
A
Alex Deucher 已提交
291 292 293 294
{
	struct amdgpu_bo *bo;
	enum ttm_bo_type type;
	unsigned long page_align;
295
	u64 initial_bytes_moved, bytes_moved;
A
Alex Deucher 已提交
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
	size_t acc_size;
	int r;

	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
	size = ALIGN(size, PAGE_SIZE);

	if (kernel) {
		type = ttm_bo_type_kernel;
	} else if (sg) {
		type = ttm_bo_type_sg;
	} else {
		type = ttm_bo_type_device;
	}
	*bo_ptr = NULL;

	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
				       sizeof(struct amdgpu_bo));

	bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
	if (bo == NULL)
		return -ENOMEM;
	r = drm_gem_object_init(adev->ddev, &bo->gem_base, size);
	if (unlikely(r)) {
		kfree(bo);
		return r;
	}
322
	INIT_LIST_HEAD(&bo->shadow_list);
A
Alex Deucher 已提交
323
	INIT_LIST_HEAD(&bo->va);
K
Kent Russell 已提交
324
	bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
325 326 327 328 329
					 AMDGPU_GEM_DOMAIN_GTT |
					 AMDGPU_GEM_DOMAIN_CPU |
					 AMDGPU_GEM_DOMAIN_GDS |
					 AMDGPU_GEM_DOMAIN_GWS |
					 AMDGPU_GEM_DOMAIN_OA);
K
Kent Russell 已提交
330
	bo->allowed_domains = bo->preferred_domains;
331 332
	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
A
Alex Deucher 已提交
333 334

	bo->flags = flags;
335

336 337 338 339 340 341 342 343 344 345 346
#ifdef CONFIG_X86_32
	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
	 */
	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
	/* Don't try to enable write-combining when it can't work, or things
	 * may be slow
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
	 */

347
#ifndef CONFIG_COMPILE_TEST
348 349
#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
	 thanks to write-combining
350
#endif
351 352 353 354 355 356

	if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
			      "better performance thanks to write-combining\n");
	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
#else
357 358 359 360 361
	/* For architectures that don't support WC memory,
	 * mask out the WC flag from the BO
	 */
	if (!drm_arch_can_wc_memory())
		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
362
#endif
363

364 365
	bo->tbo.bdev = &adev->mman.bdev;
	amdgpu_ttm_placement_from_domain(bo, domain);
366

367
	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
368
	/* Kernel allocation are uninterruptible */
369 370 371
	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
				 &bo->placement, page_align, !kernel, NULL,
				 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
372 373 374
	if (unlikely(r != 0))
		return r;

375 376 377 378 379 380 381 382
	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
		      initial_bytes_moved;
	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
		amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
	else
		amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
383

384
	if (kernel)
385
		bo->tbo.priority = 1;
386

387 388
	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
389
		struct dma_fence *fence;
390

391
		r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence);
392 393 394
		if (unlikely(r))
			goto fail_unreserve;

395
		amdgpu_bo_fence(bo, fence, false);
396 397 398
		dma_fence_put(bo->tbo.moving);
		bo->tbo.moving = dma_fence_get(fence);
		dma_fence_put(fence);
399
	}
400
	if (!resv)
401
		amdgpu_bo_unreserve(bo);
A
Alex Deucher 已提交
402 403 404 405
	*bo_ptr = bo;

	trace_amdgpu_bo_create(bo);

406 407 408 409
	/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
	if (type == ttm_bo_type_device)
		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

A
Alex Deucher 已提交
410
	return 0;
411 412

fail_unreserve:
413 414
	if (!resv)
		ww_mutex_unlock(&bo->tbo.resv->lock);
415 416
	amdgpu_bo_unref(&bo);
	return r;
A
Alex Deucher 已提交
417 418
}

419 420 421 422 423 424 425 426 427
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
				   unsigned long size, int byte_align,
				   struct amdgpu_bo *bo)
{
	int r;

	if (bo->shadow)
		return 0;

428 429 430 431 432 433
	r = amdgpu_bo_do_create(adev, size, byte_align, true,
				AMDGPU_GEM_DOMAIN_GTT,
				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
				AMDGPU_GEM_CREATE_SHADOW,
				NULL, bo->tbo.resv, 0,
				&bo->shadow);
434
	if (!r) {
435
		bo->shadow->parent = amdgpu_bo_ref(bo);
436 437 438 439
		mutex_lock(&adev->shadow_list_lock);
		list_add_tail(&bo->shadow_list, &adev->shadow_list);
		mutex_unlock(&adev->shadow_list_lock);
	}
440 441 442 443

	return r;
}

444 445 446
/* init_value will only take effect when flags contains
 * AMDGPU_GEM_CREATE_VRAM_CLEARED.
 */
447 448 449
int amdgpu_bo_create(struct amdgpu_device *adev,
		     unsigned long size, int byte_align,
		     bool kernel, u32 domain, u64 flags,
450 451
		     struct sg_table *sg,
		     struct reservation_object *resv,
452
		     uint64_t init_value,
453
		     struct amdgpu_bo **bo_ptr)
454
{
455
	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
456
	int r;
457

458 459
	r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain,
				parent_flags, sg, resv, init_value, bo_ptr);
460 461 462
	if (r)
		return r;

463 464 465 466
	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
		if (!resv)
			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
							NULL));
467

468
		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
469 470

		if (!resv)
471
			reservation_object_unlock((*bo_ptr)->tbo.resv);
472

473 474 475 476 477
		if (r)
			amdgpu_bo_unref(bo_ptr);
	}

	return r;
478 479
}

480 481 482 483
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
			       struct amdgpu_ring *ring,
			       struct amdgpu_bo *bo,
			       struct reservation_object *resv,
484
			       struct dma_fence **fence,
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
			       bool direct)

{
	struct amdgpu_bo *shadow = bo->shadow;
	uint64_t bo_addr, shadow_addr;
	int r;

	if (!shadow)
		return -EINVAL;

	bo_addr = amdgpu_bo_gpu_offset(bo);
	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);

	r = reservation_object_reserve_shared(bo->tbo.resv);
	if (r)
		goto err;

	r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
			       amdgpu_bo_size(bo), resv, fence,
504
			       direct, false);
505 506 507 508 509 510 511
	if (!r)
		amdgpu_bo_fence(bo, *fence, true);

err:
	return r;
}

512 513 514 515 516 517 518 519
int amdgpu_bo_validate(struct amdgpu_bo *bo)
{
	uint32_t domain;
	int r;

	if (bo->pin_count)
		return 0;

K
Kent Russell 已提交
520
	domain = bo->preferred_domains;
521 522 523 524 525 526 527 528 529 530 531 532

retry:
	amdgpu_ttm_placement_from_domain(bo, domain);
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
		domain = bo->allowed_domains;
		goto retry;
	}

	return r;
}

533 534 535 536
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
				  struct amdgpu_ring *ring,
				  struct amdgpu_bo *bo,
				  struct reservation_object *resv,
537
				  struct dma_fence **fence,
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
				  bool direct)

{
	struct amdgpu_bo *shadow = bo->shadow;
	uint64_t bo_addr, shadow_addr;
	int r;

	if (!shadow)
		return -EINVAL;

	bo_addr = amdgpu_bo_gpu_offset(bo);
	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);

	r = reservation_object_reserve_shared(bo->tbo.resv);
	if (r)
		goto err;

	r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
			       amdgpu_bo_size(bo), resv, fence,
557
			       direct, false);
558 559 560 561 562 563 564
	if (!r)
		amdgpu_bo_fence(bo, *fence, true);

err:
	return r;
}

A
Alex Deucher 已提交
565 566
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
567
	void *kptr;
568
	long r;
A
Alex Deucher 已提交
569

570 571 572
	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
		return -EPERM;

573 574 575 576
	kptr = amdgpu_bo_kptr(bo);
	if (kptr) {
		if (ptr)
			*ptr = kptr;
A
Alex Deucher 已提交
577 578
		return 0;
	}
579 580 581 582 583 584

	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
						MAX_SCHEDULE_TIMEOUT);
	if (r < 0)
		return r;

A
Alex Deucher 已提交
585
	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
586
	if (r)
A
Alex Deucher 已提交
587
		return r;
588 589

	if (ptr)
590
		*ptr = amdgpu_bo_kptr(bo);
591

A
Alex Deucher 已提交
592 593 594
	return 0;
}

595 596 597 598 599 600 601
void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
{
	bool is_iomem;

	return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
}

A
Alex Deucher 已提交
602 603
void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
{
604 605
	if (bo->kmap.bo)
		ttm_bo_kunmap(&bo->kmap);
A
Alex Deucher 已提交
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
}

struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
{
	if (bo == NULL)
		return NULL;

	ttm_bo_reference(&bo->tbo);
	return bo;
}

void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
	struct ttm_buffer_object *tbo;

	if ((*bo) == NULL)
		return;

	tbo = &((*bo)->tbo);
	ttm_bo_unref(&tbo);
	if (tbo == NULL)
		*bo = NULL;
}

630 631
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
			     u64 min_offset, u64 max_offset,
A
Alex Deucher 已提交
632 633
			     u64 *gpu_addr)
{
634
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
A
Alex Deucher 已提交
635 636
	int r, i;

637
	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
A
Alex Deucher 已提交
638 639
		return -EPERM;

640 641 642
	if (WARN_ON_ONCE(min_offset > max_offset))
		return -EINVAL;

643 644 645 646
	/* A shared bo cannot be migrated to VRAM */
	if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM))
		return -EINVAL;

A
Alex Deucher 已提交
647
	if (bo->pin_count) {
648 649 650 651 652
		uint32_t mem_type = bo->tbo.mem.mem_type;

		if (domain != amdgpu_mem_type_to_domain(mem_type))
			return -EINVAL;

A
Alex Deucher 已提交
653 654 655 656 657
		bo->pin_count++;
		if (gpu_addr)
			*gpu_addr = amdgpu_bo_gpu_offset(bo);

		if (max_offset != 0) {
658
			u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset;
A
Alex Deucher 已提交
659 660 661 662 663 664
			WARN_ON_ONCE(max_offset <
				     (amdgpu_bo_gpu_offset(bo) - domain_start));
		}

		return 0;
	}
665 666

	bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
667 668 669
	/* force to pin into visible video ram */
	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
		bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
A
Alex Deucher 已提交
670 671
	amdgpu_ttm_placement_from_domain(bo, domain);
	for (i = 0; i < bo->placement.num_placement; i++) {
672 673 674 675 676
		unsigned fpfn, lpfn;

		fpfn = min_offset >> PAGE_SHIFT;
		lpfn = max_offset >> PAGE_SHIFT;

677 678
		if (fpfn > bo->placements[i].fpfn)
			bo->placements[i].fpfn = fpfn;
679 680
		if (!bo->placements[i].lpfn ||
		    (lpfn && lpfn < bo->placements[i].lpfn))
681
			bo->placements[i].lpfn = lpfn;
A
Alex Deucher 已提交
682 683 684 685
		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
	}

	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
686
	if (unlikely(r)) {
687
		dev_err(adev->dev, "%p pin failed\n", bo);
688 689 690 691
		goto error;
	}

	bo->pin_count = 1;
692 693 694 695 696 697
	if (gpu_addr != NULL) {
		r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
		if (unlikely(r)) {
			dev_err(adev->dev, "%p bind failed\n", bo);
			goto error;
		}
698
		*gpu_addr = amdgpu_bo_gpu_offset(bo);
699
	}
700
	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
701
		adev->vram_pin_size += amdgpu_bo_size(bo);
702
		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
703
			adev->invisible_pin_size += amdgpu_bo_size(bo);
704
	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
705
		adev->gart_pin_size += amdgpu_bo_size(bo);
A
Alex Deucher 已提交
706
	}
707 708

error:
A
Alex Deucher 已提交
709 710 711 712 713
	return r;
}

int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
{
714
	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
A
Alex Deucher 已提交
715 716 717 718
}

int amdgpu_bo_unpin(struct amdgpu_bo *bo)
{
719
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
A
Alex Deucher 已提交
720 721 722
	int r, i;

	if (!bo->pin_count) {
723
		dev_warn(adev->dev, "%p unpin not necessary\n", bo);
A
Alex Deucher 已提交
724 725 726 727 728 729 730 731 732 733
		return 0;
	}
	bo->pin_count--;
	if (bo->pin_count)
		return 0;
	for (i = 0; i < bo->placement.num_placement; i++) {
		bo->placements[i].lpfn = 0;
		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
	}
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
734
	if (unlikely(r)) {
735
		dev_err(adev->dev, "%p validate failed for unpin\n", bo);
736
		goto error;
A
Alex Deucher 已提交
737
	}
738 739

	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
740
		adev->vram_pin_size -= amdgpu_bo_size(bo);
741
		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
742
			adev->invisible_pin_size -= amdgpu_bo_size(bo);
743
	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
744
		adev->gart_pin_size -= amdgpu_bo_size(bo);
745 746 747
	}

error:
A
Alex Deucher 已提交
748 749 750 751 752 753
	return r;
}

int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{
	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
754
	if (0 && (adev->flags & AMD_IS_APU)) {
A
Alex Deucher 已提交
755 756 757 758 759 760
		/* Useless to evict on IGP chips */
		return 0;
	}
	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
}

761 762 763 764 765 766 767 768 769 770 771
static const char *amdgpu_vram_names[] = {
	"UNKNOWN",
	"GDDR1",
	"DDR2",
	"GDDR3",
	"GDDR4",
	"GDDR5",
	"HBM",
	"DDR3"
};

A
Alex Deucher 已提交
772 773
int amdgpu_bo_init(struct amdgpu_device *adev)
{
774 775 776 777
	/* reserve PAT memory space to WC for VRAM */
	arch_io_reserve_memtype_wc(adev->mc.aper_base,
				   adev->mc.aper_size);

A
Alex Deucher 已提交
778 779 780 781
	/* Add an MTRR for the VRAM */
	adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
					      adev->mc.aper_size);
	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
782 783
		 adev->mc.mc_vram_size >> 20,
		 (unsigned long long)adev->mc.aper_size >> 20);
784 785
	DRM_INFO("RAM width %dbits %s\n",
		 adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
A
Alex Deucher 已提交
786 787 788 789 790 791 792
	return amdgpu_ttm_init(adev);
}

void amdgpu_bo_fini(struct amdgpu_device *adev)
{
	amdgpu_ttm_fini(adev);
	arch_phys_wc_del(adev->mc.vram_mtrr);
793
	arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
A
Alex Deucher 已提交
794 795 796 797 798 799 800 801 802 803
}

int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
			     struct vm_area_struct *vma)
{
	return ttm_fbdev_mmap(vma, &bo->tbo);
}

int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
{
804 805 806 807
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);

	if (adev->family <= AMDGPU_FAMILY_CZ &&
	    AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
A
Alex Deucher 已提交
808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829
		return -EINVAL;

	bo->tiling_flags = tiling_flags;
	return 0;
}

void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
	lockdep_assert_held(&bo->tbo.resv->lock.base);

	if (tiling_flags)
		*tiling_flags = bo->tiling_flags;
}

int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
			    uint32_t metadata_size, uint64_t flags)
{
	void *buffer;

	if (!metadata_size) {
		if (bo->metadata_size) {
			kfree(bo->metadata);
830
			bo->metadata = NULL;
A
Alex Deucher 已提交
831 832 833 834 835 836 837 838
			bo->metadata_size = 0;
		}
		return 0;
	}

	if (metadata == NULL)
		return -EINVAL;

839
	buffer = kmemdup(metadata, metadata_size, GFP_KERNEL);
A
Alex Deucher 已提交
840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
	if (buffer == NULL)
		return -ENOMEM;

	kfree(bo->metadata);
	bo->metadata_flags = flags;
	bo->metadata = buffer;
	bo->metadata_size = metadata_size;

	return 0;
}

int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
			   size_t buffer_size, uint32_t *metadata_size,
			   uint64_t *flags)
{
	if (!buffer && !metadata_size)
		return -EINVAL;

	if (buffer) {
		if (buffer_size < bo->metadata_size)
			return -EINVAL;

		if (bo->metadata_size)
			memcpy(buffer, bo->metadata, bo->metadata_size);
	}

	if (metadata_size)
		*metadata_size = bo->metadata_size;
	if (flags)
		*flags = bo->metadata_flags;

	return 0;
}

void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
875
			   bool evict,
A
Alex Deucher 已提交
876 877
			   struct ttm_mem_reg *new_mem)
{
878
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
879
	struct amdgpu_bo *abo;
880
	struct ttm_mem_reg *old_mem = &bo->mem;
A
Alex Deucher 已提交
881 882 883 884

	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
		return;

885
	abo = ttm_to_amdgpu_bo(bo);
886
	amdgpu_vm_bo_invalidate(adev, abo, evict);
A
Alex Deucher 已提交
887

888 889
	amdgpu_bo_kunmap(abo);

890 891 892 893
	/* remember the eviction */
	if (evict)
		atomic64_inc(&adev->num_evictions);

A
Alex Deucher 已提交
894 895 896 897 898
	/* update statistics */
	if (!new_mem)
		return;

	/* move_notify is called before move happens */
899
	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
A
Alex Deucher 已提交
900 901 902 903
}

int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
{
904
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
905
	struct amdgpu_bo *abo;
906 907
	unsigned long offset, size;
	int r;
A
Alex Deucher 已提交
908 909 910

	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
		return 0;
911

912
	abo = ttm_to_amdgpu_bo(bo);
913 914 915 916

	/* Remember that this BO was accessed by the CPU */
	abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

917 918 919 920 921
	if (bo->mem.mem_type != TTM_PL_VRAM)
		return 0;

	size = bo->mem.num_pages << PAGE_SHIFT;
	offset = bo->mem.start << PAGE_SHIFT;
922
	if ((offset + size) <= adev->mc.visible_vram_size)
923 924
		return 0;

925 926 927 928
	/* Can't move a pinned BO to visible VRAM */
	if (abo->pin_count > 0)
		return -EINVAL;

929
	/* hurrah the memory is not visible ! */
930
	atomic64_inc(&adev->num_vram_cpu_page_faults);
931 932 933 934 935 936 937
	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
					 AMDGPU_GEM_DOMAIN_GTT);

	/* Avoid costly evictions; only set GTT as a busy placement */
	abo->placement.num_busy_placement = 1;
	abo->placement.busy_placement = &abo->placements[1];

938
	r = ttm_bo_validate(bo, &abo->placement, false, false);
939
	if (unlikely(r != 0))
940 941 942 943
		return r;

	offset = bo->mem.start << PAGE_SHIFT;
	/* this should never happen */
944 945
	if (bo->mem.mem_type == TTM_PL_VRAM &&
	    (offset + size) > adev->mc.visible_vram_size)
946 947
		return -EINVAL;

A
Alex Deucher 已提交
948 949 950 951 952 953 954 955 956 957 958
	return 0;
}

/**
 * amdgpu_bo_fence - add fence to buffer object
 *
 * @bo: buffer object in question
 * @fence: fence to add
 * @shared: true if fence should be added shared
 *
 */
959
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
A
Alex Deucher 已提交
960 961 962 963 964
		     bool shared)
{
	struct reservation_object *resv = bo->tbo.resv;

	if (shared)
965
		reservation_object_add_shared_fence(resv, fence);
A
Alex Deucher 已提交
966
	else
967
		reservation_object_add_excl_fence(resv, fence);
A
Alex Deucher 已提交
968
}
969 970 971 972 973 974 975 976 977 978 979 980 981

/**
 * amdgpu_bo_gpu_offset - return GPU offset of bo
 * @bo:	amdgpu object for which we query the offset
 *
 * Returns current GPU offset of the object.
 *
 * Note: object should either be pinned or reserved when calling this
 * function, it might be useful to add check for this for debugging.
 */
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
982 983
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
		     !amdgpu_ttm_is_bound(bo->tbo.ttm));
984 985
	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
		     !bo->pin_count);
986
	WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
987 988
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
989 990 991

	return bo->tbo.offset;
}