amdgpu_object.c 23.8 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
#include <linux/list.h>
#include <linux/slab.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
36
#include <drm/drm_cache.h>
A
Alex Deucher 已提交
37 38 39 40 41
#include "amdgpu.h"
#include "amdgpu_trace.h"

static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
42
	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
43
	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
A
Alex Deucher 已提交
44

45
	amdgpu_bo_kunmap(bo);
A
Alex Deucher 已提交
46 47

	drm_gem_object_release(&bo->gem_base);
48
	amdgpu_bo_unref(&bo->parent);
49
	if (!list_empty(&bo->shadow_list)) {
50
		mutex_lock(&adev->shadow_list_lock);
51
		list_del_init(&bo->shadow_list);
52
		mutex_unlock(&adev->shadow_list_lock);
53
	}
A
Alex Deucher 已提交
54 55 56 57 58 59 60 61 62 63 64
	kfree(bo->metadata);
	kfree(bo);
}

bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
{
	if (bo->destroy == &amdgpu_ttm_bo_destroy)
		return true;
	return false;
}

65
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
A
Alex Deucher 已提交
66
{
67 68 69 70
	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
	struct ttm_placement *placement = &abo->placement;
	struct ttm_place *places = abo->placements;
	u64 flags = abo->flags;
71
	u32 c = 0;
72

A
Alex Deucher 已提交
73
	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
74 75 76
		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;

		places[c].fpfn = 0;
77
		places[c].lpfn = 0;
78
		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
79
			TTM_PL_FLAG_VRAM;
80

81 82 83 84
		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
			places[c].lpfn = visible_pfn;
		else
			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
85 86 87

		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
88
		c++;
A
Alex Deucher 已提交
89 90 91
	}

	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
92
		places[c].fpfn = 0;
93 94 95 96
		if (flags & AMDGPU_GEM_CREATE_SHADOW)
			places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
		else
			places[c].lpfn = 0;
97 98 99 100 101 102 103
		places[c].flags = TTM_PL_FLAG_TT;
		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
			places[c].flags |= TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED;
		else
			places[c].flags |= TTM_PL_FLAG_CACHED;
		c++;
A
Alex Deucher 已提交
104 105 106
	}

	if (domain & AMDGPU_GEM_DOMAIN_CPU) {
107 108 109 110 111 112 113 114 115
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_SYSTEM;
		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
			places[c].flags |= TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED;
		else
			places[c].flags |= TTM_PL_FLAG_CACHED;
		c++;
A
Alex Deucher 已提交
116 117 118
	}

	if (domain & AMDGPU_GEM_DOMAIN_GDS) {
119 120 121 122
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GDS;
		c++;
A
Alex Deucher 已提交
123
	}
124

A
Alex Deucher 已提交
125
	if (domain & AMDGPU_GEM_DOMAIN_GWS) {
126 127 128 129
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_GWS;
		c++;
A
Alex Deucher 已提交
130
	}
131

A
Alex Deucher 已提交
132
	if (domain & AMDGPU_GEM_DOMAIN_OA) {
133 134 135 136
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_FLAG_UNCACHED | AMDGPU_PL_FLAG_OA;
		c++;
A
Alex Deucher 已提交
137 138 139
	}

	if (!c) {
140 141 142 143
		places[c].fpfn = 0;
		places[c].lpfn = 0;
		places[c].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
		c++;
A
Alex Deucher 已提交
144
	}
145

146
	placement->num_placement = c;
147
	placement->placement = places;
A
Alex Deucher 已提交
148

149 150
	placement->num_busy_placement = c;
	placement->busy_placement = places;
A
Alex Deucher 已提交
151 152
}

153
/**
154
 * amdgpu_bo_create_reserved - create reserved BO for kernel use
155 156 157 158 159 160 161 162 163
 *
 * @adev: amdgpu device object
 * @size: size for the new BO
 * @align: alignment for the new BO
 * @domain: where to place it
 * @bo_ptr: resulting BO
 * @gpu_addr: GPU addr of the pinned BO
 * @cpu_addr: optional CPU address mapping
 *
164 165
 * Allocates and pins a BO for kernel internal use, and returns it still
 * reserved.
166 167 168
 *
 * Returns 0 on success, negative error code otherwise.
 */
169 170 171 172
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
			      unsigned long size, int align,
			      u32 domain, struct amdgpu_bo **bo_ptr,
			      u64 *gpu_addr, void **cpu_addr)
173
{
174
	bool free = false;
175 176
	int r;

177 178 179 180
	if (!*bo_ptr) {
		r = amdgpu_bo_create(adev, size, align, true, domain,
				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
				     AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
181
				     NULL, NULL, 0, bo_ptr);
182 183 184 185 186 187
		if (r) {
			dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
				r);
			return r;
		}
		free = true;
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
	}

	r = amdgpu_bo_reserve(*bo_ptr, false);
	if (r) {
		dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r);
		goto error_free;
	}

	r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr);
	if (r) {
		dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
		goto error_unreserve;
	}

	if (cpu_addr) {
		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
		if (r) {
			dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
			goto error_unreserve;
		}
	}

	return 0;

error_unreserve:
	amdgpu_bo_unreserve(*bo_ptr);

error_free:
216 217
	if (free)
		amdgpu_bo_unref(bo_ptr);
218 219 220 221

	return r;
}

222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/**
 * amdgpu_bo_create_kernel - create BO for kernel use
 *
 * @adev: amdgpu device object
 * @size: size for the new BO
 * @align: alignment for the new BO
 * @domain: where to place it
 * @bo_ptr: resulting BO
 * @gpu_addr: GPU addr of the pinned BO
 * @cpu_addr: optional CPU address mapping
 *
 * Allocates and pins a BO for kernel internal use.
 *
 * Returns 0 on success, negative error code otherwise.
 */
int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
			    unsigned long size, int align,
			    u32 domain, struct amdgpu_bo **bo_ptr,
			    u64 *gpu_addr, void **cpu_addr)
{
	int r;

	r = amdgpu_bo_create_reserved(adev, size, align, domain, bo_ptr,
				      gpu_addr, cpu_addr);

	if (r)
		return r;

	amdgpu_bo_unreserve(*bo_ptr);

	return 0;
}

255 256 257 258 259 260 261 262 263 264 265 266 267
/**
 * amdgpu_bo_free_kernel - free BO for kernel use
 *
 * @bo: amdgpu BO to free
 *
 * unmaps and unpin a BO for kernel internal use.
 */
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
			   void **cpu_addr)
{
	if (*bo == NULL)
		return;

268
	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
		if (cpu_addr)
			amdgpu_bo_kunmap(*bo);

		amdgpu_bo_unpin(*bo);
		amdgpu_bo_unreserve(*bo);
	}
	amdgpu_bo_unref(bo);

	if (gpu_addr)
		*gpu_addr = 0;

	if (cpu_addr)
		*cpu_addr = NULL;
}

284 285 286 287 288 289 290
static int amdgpu_bo_do_create(struct amdgpu_device *adev,
			       unsigned long size, int byte_align,
			       bool kernel, u32 domain, u64 flags,
			       struct sg_table *sg,
			       struct reservation_object *resv,
			       uint64_t init_value,
			       struct amdgpu_bo **bo_ptr)
A
Alex Deucher 已提交
291 292 293 294
{
	struct amdgpu_bo *bo;
	enum ttm_bo_type type;
	unsigned long page_align;
295
	u64 initial_bytes_moved, bytes_moved;
A
Alex Deucher 已提交
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
	size_t acc_size;
	int r;

	page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
	size = ALIGN(size, PAGE_SIZE);

	if (kernel) {
		type = ttm_bo_type_kernel;
	} else if (sg) {
		type = ttm_bo_type_sg;
	} else {
		type = ttm_bo_type_device;
	}
	*bo_ptr = NULL;

	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
				       sizeof(struct amdgpu_bo));

	bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
	if (bo == NULL)
		return -ENOMEM;
	r = drm_gem_object_init(adev->ddev, &bo->gem_base, size);
	if (unlikely(r)) {
		kfree(bo);
		return r;
	}
322
	INIT_LIST_HEAD(&bo->shadow_list);
A
Alex Deucher 已提交
323
	INIT_LIST_HEAD(&bo->va);
K
Kent Russell 已提交
324
	bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
325 326 327 328 329
					 AMDGPU_GEM_DOMAIN_GTT |
					 AMDGPU_GEM_DOMAIN_CPU |
					 AMDGPU_GEM_DOMAIN_GDS |
					 AMDGPU_GEM_DOMAIN_GWS |
					 AMDGPU_GEM_DOMAIN_OA);
K
Kent Russell 已提交
330
	bo->allowed_domains = bo->preferred_domains;
331 332
	if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
		bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
A
Alex Deucher 已提交
333 334

	bo->flags = flags;
335

336 337 338 339 340 341 342 343 344 345 346
#ifdef CONFIG_X86_32
	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
	 */
	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
	/* Don't try to enable write-combining when it can't work, or things
	 * may be slow
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
	 */

347
#ifndef CONFIG_COMPILE_TEST
348 349
#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
	 thanks to write-combining
350
#endif
351 352 353 354 355 356

	if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
			      "better performance thanks to write-combining\n");
	bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
#else
357 358 359 360 361
	/* For architectures that don't support WC memory,
	 * mask out the WC flag from the BO
	 */
	if (!drm_arch_can_wc_memory())
		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC;
362
#endif
363

364 365
	bo->tbo.bdev = &adev->mman.bdev;
	amdgpu_ttm_placement_from_domain(bo, domain);
366

367
	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
368
	/* Kernel allocation are uninterruptible */
369 370 371
	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
				 &bo->placement, page_align, !kernel, NULL,
				 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
372 373 374
	if (unlikely(r != 0))
		return r;

375 376 377 378 379 380 381 382
	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
		      initial_bytes_moved;
	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
		amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
	else
		amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
383

384
	if (kernel)
385
		bo->tbo.priority = 1;
386

387 388
	if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
	    bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
389
		struct dma_fence *fence;
390

391
		r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence);
392 393 394
		if (unlikely(r))
			goto fail_unreserve;

395
		amdgpu_bo_fence(bo, fence, false);
396 397 398
		dma_fence_put(bo->tbo.moving);
		bo->tbo.moving = dma_fence_get(fence);
		dma_fence_put(fence);
399
	}
400
	if (!resv)
401
		amdgpu_bo_unreserve(bo);
A
Alex Deucher 已提交
402 403 404 405
	*bo_ptr = bo;

	trace_amdgpu_bo_create(bo);

406 407 408 409
	/* Treat CPU_ACCESS_REQUIRED only as a hint if given by UMD */
	if (type == ttm_bo_type_device)
		bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

A
Alex Deucher 已提交
410
	return 0;
411 412

fail_unreserve:
413 414
	if (!resv)
		ww_mutex_unlock(&bo->tbo.resv->lock);
415 416
	amdgpu_bo_unref(&bo);
	return r;
A
Alex Deucher 已提交
417 418
}

419 420 421 422 423 424 425 426 427
static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
				   unsigned long size, int byte_align,
				   struct amdgpu_bo *bo)
{
	int r;

	if (bo->shadow)
		return 0;

428 429 430 431 432 433
	r = amdgpu_bo_do_create(adev, size, byte_align, true,
				AMDGPU_GEM_DOMAIN_GTT,
				AMDGPU_GEM_CREATE_CPU_GTT_USWC |
				AMDGPU_GEM_CREATE_SHADOW,
				NULL, bo->tbo.resv, 0,
				&bo->shadow);
434
	if (!r) {
435
		bo->shadow->parent = amdgpu_bo_ref(bo);
436 437 438 439
		mutex_lock(&adev->shadow_list_lock);
		list_add_tail(&bo->shadow_list, &adev->shadow_list);
		mutex_unlock(&adev->shadow_list_lock);
	}
440 441 442 443

	return r;
}

444 445 446
/* init_value will only take effect when flags contains
 * AMDGPU_GEM_CREATE_VRAM_CLEARED.
 */
447 448 449
int amdgpu_bo_create(struct amdgpu_device *adev,
		     unsigned long size, int byte_align,
		     bool kernel, u32 domain, u64 flags,
450 451
		     struct sg_table *sg,
		     struct reservation_object *resv,
452
		     uint64_t init_value,
453
		     struct amdgpu_bo **bo_ptr)
454
{
455
	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
456
	int r;
457

458 459
	r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain,
				parent_flags, sg, resv, init_value, bo_ptr);
460 461 462
	if (r)
		return r;

463 464 465 466
	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
		if (!resv)
			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
							NULL));
467

468
		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
469 470

		if (!resv)
471
			reservation_object_unlock((*bo_ptr)->tbo.resv);
472

473 474 475 476 477
		if (r)
			amdgpu_bo_unref(bo_ptr);
	}

	return r;
478 479
}

480 481 482 483
int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
			       struct amdgpu_ring *ring,
			       struct amdgpu_bo *bo,
			       struct reservation_object *resv,
484
			       struct dma_fence **fence,
485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
			       bool direct)

{
	struct amdgpu_bo *shadow = bo->shadow;
	uint64_t bo_addr, shadow_addr;
	int r;

	if (!shadow)
		return -EINVAL;

	bo_addr = amdgpu_bo_gpu_offset(bo);
	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);

	r = reservation_object_reserve_shared(bo->tbo.resv);
	if (r)
		goto err;

	r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
			       amdgpu_bo_size(bo), resv, fence,
504
			       direct, false);
505 506 507 508 509 510 511
	if (!r)
		amdgpu_bo_fence(bo, *fence, true);

err:
	return r;
}

512 513 514 515 516 517 518 519
int amdgpu_bo_validate(struct amdgpu_bo *bo)
{
	uint32_t domain;
	int r;

	if (bo->pin_count)
		return 0;

K
Kent Russell 已提交
520
	domain = bo->preferred_domains;
521 522 523 524 525 526 527 528 529 530 531 532

retry:
	amdgpu_ttm_placement_from_domain(bo, domain);
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
		domain = bo->allowed_domains;
		goto retry;
	}

	return r;
}

533 534 535 536
int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
				  struct amdgpu_ring *ring,
				  struct amdgpu_bo *bo,
				  struct reservation_object *resv,
537
				  struct dma_fence **fence,
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
				  bool direct)

{
	struct amdgpu_bo *shadow = bo->shadow;
	uint64_t bo_addr, shadow_addr;
	int r;

	if (!shadow)
		return -EINVAL;

	bo_addr = amdgpu_bo_gpu_offset(bo);
	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);

	r = reservation_object_reserve_shared(bo->tbo.resv);
	if (r)
		goto err;

	r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
			       amdgpu_bo_size(bo), resv, fence,
557
			       direct, false);
558 559 560 561 562 563 564
	if (!r)
		amdgpu_bo_fence(bo, *fence, true);

err:
	return r;
}

A
Alex Deucher 已提交
565 566
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
567
	void *kptr;
568
	long r;
A
Alex Deucher 已提交
569

570 571 572
	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
		return -EPERM;

573 574 575 576
	kptr = amdgpu_bo_kptr(bo);
	if (kptr) {
		if (ptr)
			*ptr = kptr;
A
Alex Deucher 已提交
577 578
		return 0;
	}
579 580 581 582 583 584

	r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
						MAX_SCHEDULE_TIMEOUT);
	if (r < 0)
		return r;

A
Alex Deucher 已提交
585
	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
586
	if (r)
A
Alex Deucher 已提交
587
		return r;
588 589

	if (ptr)
590
		*ptr = amdgpu_bo_kptr(bo);
591

A
Alex Deucher 已提交
592 593 594
	return 0;
}

595 596 597 598 599 600 601
void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
{
	bool is_iomem;

	return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
}

A
Alex Deucher 已提交
602 603
void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
{
604 605
	if (bo->kmap.bo)
		ttm_bo_kunmap(&bo->kmap);
A
Alex Deucher 已提交
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
}

struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
{
	if (bo == NULL)
		return NULL;

	ttm_bo_reference(&bo->tbo);
	return bo;
}

void amdgpu_bo_unref(struct amdgpu_bo **bo)
{
	struct ttm_buffer_object *tbo;

	if ((*bo) == NULL)
		return;

	tbo = &((*bo)->tbo);
	ttm_bo_unref(&tbo);
	if (tbo == NULL)
		*bo = NULL;
}

630 631
int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
			     u64 min_offset, u64 max_offset,
A
Alex Deucher 已提交
632 633
			     u64 *gpu_addr)
{
634
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
A
Alex Deucher 已提交
635 636
	int r, i;

637
	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
A
Alex Deucher 已提交
638 639
		return -EPERM;

640 641 642
	if (WARN_ON_ONCE(min_offset > max_offset))
		return -EINVAL;

643 644 645 646
	/* A shared bo cannot be migrated to VRAM */
	if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM))
		return -EINVAL;

A
Alex Deucher 已提交
647
	if (bo->pin_count) {
648 649 650 651 652
		uint32_t mem_type = bo->tbo.mem.mem_type;

		if (domain != amdgpu_mem_type_to_domain(mem_type))
			return -EINVAL;

A
Alex Deucher 已提交
653 654 655 656 657
		bo->pin_count++;
		if (gpu_addr)
			*gpu_addr = amdgpu_bo_gpu_offset(bo);

		if (max_offset != 0) {
658
			u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset;
A
Alex Deucher 已提交
659 660 661 662 663 664
			WARN_ON_ONCE(max_offset <
				     (amdgpu_bo_gpu_offset(bo) - domain_start));
		}

		return 0;
	}
665 666

	bo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
667 668 669
	/* force to pin into visible video ram */
	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
		bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
A
Alex Deucher 已提交
670 671
	amdgpu_ttm_placement_from_domain(bo, domain);
	for (i = 0; i < bo->placement.num_placement; i++) {
672 673 674 675 676
		unsigned fpfn, lpfn;

		fpfn = min_offset >> PAGE_SHIFT;
		lpfn = max_offset >> PAGE_SHIFT;

677 678
		if (fpfn > bo->placements[i].fpfn)
			bo->placements[i].fpfn = fpfn;
679 680
		if (!bo->placements[i].lpfn ||
		    (lpfn && lpfn < bo->placements[i].lpfn))
681
			bo->placements[i].lpfn = lpfn;
A
Alex Deucher 已提交
682 683 684 685
		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
	}

	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
686
	if (unlikely(r)) {
687
		dev_err(adev->dev, "%p pin failed\n", bo);
688 689 690
		goto error;
	}

691 692 693 694 695 696
	r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem);
	if (unlikely(r)) {
		dev_err(adev->dev, "%p bind failed\n", bo);
		goto error;
	}

697
	bo->pin_count = 1;
698
	if (gpu_addr != NULL)
699
		*gpu_addr = amdgpu_bo_gpu_offset(bo);
700 701

	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
702
	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
703
		adev->vram_pin_size += amdgpu_bo_size(bo);
704
		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
705
			adev->invisible_pin_size += amdgpu_bo_size(bo);
706
	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
707
		adev->gart_pin_size += amdgpu_bo_size(bo);
A
Alex Deucher 已提交
708
	}
709 710

error:
A
Alex Deucher 已提交
711 712 713 714 715
	return r;
}

int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
{
716
	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
A
Alex Deucher 已提交
717 718 719 720
}

int amdgpu_bo_unpin(struct amdgpu_bo *bo)
{
721
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
A
Alex Deucher 已提交
722 723 724
	int r, i;

	if (!bo->pin_count) {
725
		dev_warn(adev->dev, "%p unpin not necessary\n", bo);
A
Alex Deucher 已提交
726 727 728 729 730 731 732 733 734 735
		return 0;
	}
	bo->pin_count--;
	if (bo->pin_count)
		return 0;
	for (i = 0; i < bo->placement.num_placement; i++) {
		bo->placements[i].lpfn = 0;
		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
	}
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
736
	if (unlikely(r)) {
737
		dev_err(adev->dev, "%p validate failed for unpin\n", bo);
738
		goto error;
A
Alex Deucher 已提交
739
	}
740 741

	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
742
		adev->vram_pin_size -= amdgpu_bo_size(bo);
743
		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
744
			adev->invisible_pin_size -= amdgpu_bo_size(bo);
745
	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
746
		adev->gart_pin_size -= amdgpu_bo_size(bo);
747 748 749
	}

error:
A
Alex Deucher 已提交
750 751 752 753 754 755
	return r;
}

int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
{
	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
756
	if (0 && (adev->flags & AMD_IS_APU)) {
A
Alex Deucher 已提交
757 758 759 760 761 762
		/* Useless to evict on IGP chips */
		return 0;
	}
	return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
}

763 764 765 766 767 768 769 770 771 772 773
static const char *amdgpu_vram_names[] = {
	"UNKNOWN",
	"GDDR1",
	"DDR2",
	"GDDR3",
	"GDDR4",
	"GDDR5",
	"HBM",
	"DDR3"
};

A
Alex Deucher 已提交
774 775
int amdgpu_bo_init(struct amdgpu_device *adev)
{
776 777 778 779
	/* reserve PAT memory space to WC for VRAM */
	arch_io_reserve_memtype_wc(adev->mc.aper_base,
				   adev->mc.aper_size);

A
Alex Deucher 已提交
780 781 782 783
	/* Add an MTRR for the VRAM */
	adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
					      adev->mc.aper_size);
	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
784 785
		 adev->mc.mc_vram_size >> 20,
		 (unsigned long long)adev->mc.aper_size >> 20);
786 787
	DRM_INFO("RAM width %dbits %s\n",
		 adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
A
Alex Deucher 已提交
788 789 790 791 792 793 794
	return amdgpu_ttm_init(adev);
}

void amdgpu_bo_fini(struct amdgpu_device *adev)
{
	amdgpu_ttm_fini(adev);
	arch_phys_wc_del(adev->mc.vram_mtrr);
795
	arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
A
Alex Deucher 已提交
796 797 798 799 800 801 802 803 804 805
}

int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
			     struct vm_area_struct *vma)
{
	return ttm_fbdev_mmap(vma, &bo->tbo);
}

int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
{
806 807 808 809
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);

	if (adev->family <= AMDGPU_FAMILY_CZ &&
	    AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT) > 6)
A
Alex Deucher 已提交
810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
		return -EINVAL;

	bo->tiling_flags = tiling_flags;
	return 0;
}

void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
{
	lockdep_assert_held(&bo->tbo.resv->lock.base);

	if (tiling_flags)
		*tiling_flags = bo->tiling_flags;
}

int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
			    uint32_t metadata_size, uint64_t flags)
{
	void *buffer;

	if (!metadata_size) {
		if (bo->metadata_size) {
			kfree(bo->metadata);
832
			bo->metadata = NULL;
A
Alex Deucher 已提交
833 834 835 836 837 838 839 840
			bo->metadata_size = 0;
		}
		return 0;
	}

	if (metadata == NULL)
		return -EINVAL;

841
	buffer = kmemdup(metadata, metadata_size, GFP_KERNEL);
A
Alex Deucher 已提交
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
	if (buffer == NULL)
		return -ENOMEM;

	kfree(bo->metadata);
	bo->metadata_flags = flags;
	bo->metadata = buffer;
	bo->metadata_size = metadata_size;

	return 0;
}

int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
			   size_t buffer_size, uint32_t *metadata_size,
			   uint64_t *flags)
{
	if (!buffer && !metadata_size)
		return -EINVAL;

	if (buffer) {
		if (buffer_size < bo->metadata_size)
			return -EINVAL;

		if (bo->metadata_size)
			memcpy(buffer, bo->metadata, bo->metadata_size);
	}

	if (metadata_size)
		*metadata_size = bo->metadata_size;
	if (flags)
		*flags = bo->metadata_flags;

	return 0;
}

void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
877
			   bool evict,
A
Alex Deucher 已提交
878 879
			   struct ttm_mem_reg *new_mem)
{
880
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
881
	struct amdgpu_bo *abo;
882
	struct ttm_mem_reg *old_mem = &bo->mem;
A
Alex Deucher 已提交
883 884 885 886

	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
		return;

887
	abo = ttm_to_amdgpu_bo(bo);
888
	amdgpu_vm_bo_invalidate(adev, abo, evict);
A
Alex Deucher 已提交
889

890 891
	amdgpu_bo_kunmap(abo);

892 893 894 895
	/* remember the eviction */
	if (evict)
		atomic64_inc(&adev->num_evictions);

A
Alex Deucher 已提交
896 897 898 899 900
	/* update statistics */
	if (!new_mem)
		return;

	/* move_notify is called before move happens */
901
	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
A
Alex Deucher 已提交
902 903 904 905
}

int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
{
906
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
907
	struct amdgpu_bo *abo;
908 909
	unsigned long offset, size;
	int r;
A
Alex Deucher 已提交
910 911 912

	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
		return 0;
913

914
	abo = ttm_to_amdgpu_bo(bo);
915 916 917 918

	/* Remember that this BO was accessed by the CPU */
	abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;

919 920 921 922 923
	if (bo->mem.mem_type != TTM_PL_VRAM)
		return 0;

	size = bo->mem.num_pages << PAGE_SHIFT;
	offset = bo->mem.start << PAGE_SHIFT;
924
	if ((offset + size) <= adev->mc.visible_vram_size)
925 926
		return 0;

927 928 929 930
	/* Can't move a pinned BO to visible VRAM */
	if (abo->pin_count > 0)
		return -EINVAL;

931
	/* hurrah the memory is not visible ! */
932
	atomic64_inc(&adev->num_vram_cpu_page_faults);
933 934 935 936 937 938 939
	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
					 AMDGPU_GEM_DOMAIN_GTT);

	/* Avoid costly evictions; only set GTT as a busy placement */
	abo->placement.num_busy_placement = 1;
	abo->placement.busy_placement = &abo->placements[1];

940
	r = ttm_bo_validate(bo, &abo->placement, false, false);
941
	if (unlikely(r != 0))
942 943 944 945
		return r;

	offset = bo->mem.start << PAGE_SHIFT;
	/* this should never happen */
946 947
	if (bo->mem.mem_type == TTM_PL_VRAM &&
	    (offset + size) > adev->mc.visible_vram_size)
948 949
		return -EINVAL;

A
Alex Deucher 已提交
950 951 952 953 954 955 956 957 958 959 960
	return 0;
}

/**
 * amdgpu_bo_fence - add fence to buffer object
 *
 * @bo: buffer object in question
 * @fence: fence to add
 * @shared: true if fence should be added shared
 *
 */
961
void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
A
Alex Deucher 已提交
962 963 964 965 966
		     bool shared)
{
	struct reservation_object *resv = bo->tbo.resv;

	if (shared)
967
		reservation_object_add_shared_fence(resv, fence);
A
Alex Deucher 已提交
968
	else
969
		reservation_object_add_excl_fence(resv, fence);
A
Alex Deucher 已提交
970
}
971 972 973 974 975 976 977 978 979 980 981 982 983

/**
 * amdgpu_bo_gpu_offset - return GPU offset of bo
 * @bo:	amdgpu object for which we query the offset
 *
 * Returns current GPU offset of the object.
 *
 * Note: object should either be pinned or reserved when calling this
 * function, it might be useful to add check for this for debugging.
 */
u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
{
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM);
984 985
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT &&
		     !amdgpu_ttm_is_bound(bo->tbo.ttm));
986 987
	WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) &&
		     !bo->pin_count);
988
	WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET);
989 990
	WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM &&
		     !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS));
991 992 993

	return bo->tbo.offset;
}