radeon_object.c 19.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
#include <linux/list.h>
33
#include <linux/slab.h>
34
#include <drm/drmP.h>
35
#include <drm/radeon_drm.h>
36
#include "radeon.h"
37
#include "radeon_trace.h"
38 39 40 41


int radeon_ttm_init(struct radeon_device *rdev);
void radeon_ttm_fini(struct radeon_device *rdev);
42
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
43 44 45 46 47 48

/*
 * To exclude mutual BO access we rely on bo_reserve exclusion, as all
 * function are calling it.
 */

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static void radeon_update_memory_usage(struct radeon_bo *bo,
				       unsigned mem_type, int sign)
{
	struct radeon_device *rdev = bo->rdev;
	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;

	switch (mem_type) {
	case TTM_PL_TT:
		if (sign > 0)
			atomic64_add(size, &rdev->gtt_usage);
		else
			atomic64_sub(size, &rdev->gtt_usage);
		break;
	case TTM_PL_VRAM:
		if (sign > 0)
			atomic64_add(size, &rdev->vram_usage);
		else
			atomic64_sub(size, &rdev->vram_usage);
		break;
	}
}

71
static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
72
{
73
	struct radeon_bo *bo;
74

75
	bo = container_of(tbo, struct radeon_bo, tbo);
76 77

	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
78
	radeon_mn_unregister(bo);
79

80 81 82 83
	mutex_lock(&bo->rdev->gem.mutex);
	list_del_init(&bo->list);
	mutex_unlock(&bo->rdev->gem.mutex);
	radeon_bo_clear_surface_reg(bo);
84
	WARN_ON(!list_empty(&bo->va));
85
	drm_gem_object_release(&bo->gem_base);
86
	kfree(bo);
87 88
}

89 90 91 92 93 94 95
bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
{
	if (bo->destroy == &radeon_ttm_bo_destroy)
		return true;
	return false;
}

96 97
void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
{
98
	u32 c = 0, i;
99 100

	rbo->placement.fpfn = 0;
101
	rbo->placement.lpfn = 0;
102
	rbo->placement.placement = rbo->placements;
103
	rbo->placement.busy_placement = rbo->placements;
104 105 106
	if (domain & RADEON_GEM_DOMAIN_VRAM)
		rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
					TTM_PL_FLAG_VRAM;
107
	if (domain & RADEON_GEM_DOMAIN_GTT) {
108 109 110 111 112 113
		if (rbo->flags & RADEON_GEM_GTT_UC) {
			rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT;
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
			   (rbo->rdev->flags & RADEON_IS_AGP)) {
			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_TT;
114 115 116 117 118
		} else {
			rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
		}
	}
	if (domain & RADEON_GEM_DOMAIN_CPU) {
119 120 121 122 123 124
		if (rbo->flags & RADEON_GEM_GTT_UC) {
			rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM;
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
		    rbo->rdev->flags & RADEON_IS_AGP) {
			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_SYSTEM;
125
		} else {
126
			rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
127 128
		}
	}
129 130
	if (!c)
		rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
131 132
	rbo->placement.num_placement = c;
	rbo->placement.num_busy_placement = c;
133 134 135 136 137 138 139 140 141 142 143

	/*
	 * Use two-ended allocation depending on the buffer size to
	 * improve fragmentation quality.
	 * 512kb was measured as the most optimal number.
	 */
	if (rbo->tbo.mem.size > 512 * 1024) {
		for (i = 0; i < c; i++) {
			rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN;
		}
	}
144 145
}

146
int radeon_bo_create(struct radeon_device *rdev,
147
		     unsigned long size, int byte_align, bool kernel, u32 domain,
148
		     u32 flags, struct sg_table *sg, struct radeon_bo **bo_ptr)
149
{
150
	struct radeon_bo *bo;
151
	enum ttm_bo_type type;
152
	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
153
	size_t acc_size;
154 155
	int r;

156 157
	size = ALIGN(size, PAGE_SIZE);

158 159
	if (kernel) {
		type = ttm_bo_type_kernel;
160 161
	} else if (sg) {
		type = ttm_bo_type_sg;
162 163 164
	} else {
		type = ttm_bo_type_device;
	}
165
	*bo_ptr = NULL;
166

167 168 169
	acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
				       sizeof(struct radeon_bo));

170 171
	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
	if (bo == NULL)
172
		return -ENOMEM;
173 174 175 176 177
	r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);
	if (unlikely(r)) {
		kfree(bo);
		return r;
	}
178 179 180
	bo->rdev = rdev;
	bo->surface_reg = -1;
	INIT_LIST_HEAD(&bo->list);
181
	INIT_LIST_HEAD(&bo->va);
182 183 184
	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
	                               RADEON_GEM_DOMAIN_GTT |
	                               RADEON_GEM_DOMAIN_CPU);
185 186 187 188 189 190

	bo->flags = flags;
	/* PCI GART is always snooped */
	if (!(rdev->flags & RADEON_IS_PCIE))
		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);

191
	radeon_ttm_placement_from_domain(bo, domain);
192
	/* Kernel allocation are uninterruptible */
193
	down_read(&rdev->pm.mclk_lock);
194
	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
195
			&bo->placement, page_align, !kernel, NULL,
196
			acc_size, sg, &radeon_ttm_bo_destroy);
197
	up_read(&rdev->pm.mclk_lock);
198 199 200
	if (unlikely(r != 0)) {
		return r;
	}
201
	*bo_ptr = bo;
202

203
	trace_radeon_bo_create(bo);
204

205 206 207
	return 0;
}

208
int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
209
{
210
	bool is_iomem;
211 212
	int r;

213
	if (bo->kptr) {
214
		if (ptr) {
215
			*ptr = bo->kptr;
216 217 218
		}
		return 0;
	}
219
	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
220 221 222
	if (r) {
		return r;
	}
223
	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
224
	if (ptr) {
225
		*ptr = bo->kptr;
226
	}
227
	radeon_bo_check_tiling(bo, 0, 0);
228 229 230
	return 0;
}

231
void radeon_bo_kunmap(struct radeon_bo *bo)
232
{
233
	if (bo->kptr == NULL)
234
		return;
235 236 237
	bo->kptr = NULL;
	radeon_bo_check_tiling(bo, 0, 0);
	ttm_bo_kunmap(&bo->kmap);
238 239
}

240 241 242 243 244 245 246 247 248
struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
{
	if (bo == NULL)
		return NULL;

	ttm_bo_reference(&bo->tbo);
	return bo;
}

249
void radeon_bo_unref(struct radeon_bo **bo)
250
{
251
	struct ttm_buffer_object *tbo;
252
	struct radeon_device *rdev;
253

254
	if ((*bo) == NULL)
255
		return;
256
	rdev = (*bo)->rdev;
257 258 259 260
	tbo = &((*bo)->tbo);
	ttm_bo_unref(&tbo);
	if (tbo == NULL)
		*bo = NULL;
261 262
}

263 264
int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
			     u64 *gpu_addr)
265
{
266
	int r, i;
267

268 269 270
	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
		return -EPERM;

271 272 273 274
	if (bo->pin_count) {
		bo->pin_count++;
		if (gpu_addr)
			*gpu_addr = radeon_bo_gpu_offset(bo);
275 276 277 278 279 280 281 282

		if (max_offset != 0) {
			u64 domain_start;

			if (domain == RADEON_GEM_DOMAIN_VRAM)
				domain_start = bo->rdev->mc.vram_start;
			else
				domain_start = bo->rdev->mc.gtt_start;
283 284
			WARN_ON_ONCE(max_offset <
				     (radeon_bo_gpu_offset(bo) - domain_start));
285 286
		}

287 288
		return 0;
	}
289
	radeon_ttm_placement_from_domain(bo, domain);
290 291 292 293
	if (domain == RADEON_GEM_DOMAIN_VRAM) {
		/* force to pin into visible video ram */
		bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
	}
294 295 296 297 298 299 300 301 302
	if (max_offset) {
		u64 lpfn = max_offset >> PAGE_SHIFT;

		if (!bo->placement.lpfn)
			bo->placement.lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT;

		if (lpfn < bo->placement.lpfn)
			bo->placement.lpfn = lpfn;
	}
303 304
	for (i = 0; i < bo->placement.num_placement; i++)
		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
305
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
306 307 308 309
	if (likely(r == 0)) {
		bo->pin_count = 1;
		if (gpu_addr != NULL)
			*gpu_addr = radeon_bo_gpu_offset(bo);
310 311 312 313 314
		if (domain == RADEON_GEM_DOMAIN_VRAM)
			bo->rdev->vram_pin_size += radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size += radeon_bo_size(bo);
	} else {
315
		dev_err(bo->rdev->dev, "%p pin failed\n", bo);
316
	}
317 318
	return r;
}
319 320 321 322 323

int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
{
	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
}
324

325
int radeon_bo_unpin(struct radeon_bo *bo)
326
{
327
	int r, i;
328

329 330 331
	if (!bo->pin_count) {
		dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
		return 0;
332
	}
333 334 335
	bo->pin_count--;
	if (bo->pin_count)
		return 0;
336 337
	for (i = 0; i < bo->placement.num_placement; i++)
		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
338
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
339 340 341 342 343 344
	if (likely(r == 0)) {
		if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
			bo->rdev->vram_pin_size -= radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size -= radeon_bo_size(bo);
	} else {
345
		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
346
	}
347
	return r;
348 349
}

350
int radeon_bo_evict_vram(struct radeon_device *rdev)
351
{
352 353
	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
	if (0 && (rdev->flags & RADEON_IS_IGP)) {
354 355 356
		if (rdev->mc.igp_sideport_enabled == false)
			/* Useless to evict on IGP chips */
			return 0;
357 358 359 360
	}
	return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM);
}

361
void radeon_bo_force_delete(struct radeon_device *rdev)
362
{
363
	struct radeon_bo *bo, *n;
364 365 366 367

	if (list_empty(&rdev->gem.objects)) {
		return;
	}
368 369
	dev_err(rdev->dev, "Userspace still has active objects !\n");
	list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
370
		mutex_lock(&rdev->ddev->struct_mutex);
371
		dev_err(rdev->dev, "%p %p %lu %lu force free\n",
372 373
			&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
			*((unsigned long *)&bo->gem_base.refcount));
374 375 376
		mutex_lock(&bo->rdev->gem.mutex);
		list_del_init(&bo->list);
		mutex_unlock(&bo->rdev->gem.mutex);
377
		/* this should unref the ttm bo */
378
		drm_gem_object_unreference(&bo->gem_base);
379 380 381 382
		mutex_unlock(&rdev->ddev->struct_mutex);
	}
}

383
int radeon_bo_init(struct radeon_device *rdev)
384
{
385
	/* Add an MTRR for the VRAM */
386
	if (!rdev->fastfb_working) {
387 388
		rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
						      rdev->mc.aper_size);
389
	}
390 391 392 393 394
	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
		rdev->mc.mc_vram_size >> 20,
		(unsigned long long)rdev->mc.aper_size >> 20);
	DRM_INFO("RAM width %dbits %cDR\n",
			rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
395 396 397
	return radeon_ttm_init(rdev);
}

398
void radeon_bo_fini(struct radeon_device *rdev)
399 400
{
	radeon_ttm_fini(rdev);
401
	arch_phys_wc_del(rdev->mc.vram_mtrr);
402 403
}

404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
/* Returns how many bytes TTM can move per IB.
 */
static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
{
	u64 real_vram_size = rdev->mc.real_vram_size;
	u64 vram_usage = atomic64_read(&rdev->vram_usage);

	/* This function is based on the current VRAM usage.
	 *
	 * - If all of VRAM is free, allow relocating the number of bytes that
	 *   is equal to 1/4 of the size of VRAM for this IB.

	 * - If more than one half of VRAM is occupied, only allow relocating
	 *   1 MB of data for this IB.
	 *
	 * - From 0 to one half of used VRAM, the threshold decreases
	 *   linearly.
	 *         __________________
	 * 1/4 of -|\               |
	 * VRAM    | \              |
	 *         |  \             |
	 *         |   \            |
	 *         |    \           |
	 *         |     \          |
	 *         |      \         |
	 *         |       \________|1 MB
	 *         |----------------|
	 *    VRAM 0 %             100 %
	 *         used            used
	 *
	 * Note: It's a threshold, not a limit. The threshold must be crossed
	 * for buffer relocations to stop, so any buffer of an arbitrary size
	 * can be moved as long as the threshold isn't crossed before
	 * the relocation takes place. We don't want to disable buffer
	 * relocations completely.
	 *
	 * The idea is that buffers should be placed in VRAM at creation time
	 * and TTM should only do a minimum number of relocations during
	 * command submission. In practice, you need to submit at least
	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
	 *
	 * Also, things can get pretty crazy under memory pressure and actual
	 * VRAM usage can change a lot, so playing safe even at 50% does
	 * consistently increase performance.
	 */

	u64 half_vram = real_vram_size >> 1;
	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
	u64 bytes_moved_threshold = half_free_vram >> 1;
	return max(bytes_moved_threshold, 1024*1024ull);
}

int radeon_bo_list_validate(struct radeon_device *rdev,
			    struct ww_acquire_ctx *ticket,
458
			    struct list_head *head, int ring)
459
{
460
	struct radeon_cs_reloc *lobj;
461
	struct radeon_bo *bo;
462
	int r;
463 464
	u64 bytes_moved = 0, initial_bytes_moved;
	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
465

466
	r = ttm_eu_reserve_buffers(ticket, head);
467 468 469
	if (unlikely(r != 0)) {
		return r;
	}
470

471
	list_for_each_entry(lobj, head, tv.head) {
472
		bo = lobj->robj;
473
		if (!bo->pin_count) {
474
			u32 domain = lobj->prefered_domains;
475 476 477 478 479 480 481 482 483 484 485
			u32 current_domain =
				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);

			/* Check if this buffer will be moved and don't move it
			 * if we have moved too many buffers for this IB already.
			 *
			 * Note that this allows moving at least one buffer of
			 * any size, because it doesn't take the current "bo"
			 * into account. We don't want to disallow buffer moves
			 * completely.
			 */
486
			if ((lobj->allowed_domains & current_domain) != 0 &&
487 488 489 490 491 492
			    (domain & current_domain) == 0 && /* will be moved */
			    bytes_moved > bytes_moved_threshold) {
				/* don't move it */
				domain = current_domain;
			}

493 494
		retry:
			radeon_ttm_placement_from_domain(bo, domain);
C
Christian König 已提交
495 496
			if (ring == R600_RING_TYPE_UVD_INDEX)
				radeon_uvd_force_into_uvd_segment(bo);
497 498 499 500 501 502

			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
			bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
				       initial_bytes_moved;

503
			if (unlikely(r)) {
504 505 506
				if (r != -ERESTARTSYS &&
				    domain != lobj->allowed_domains) {
					domain = lobj->allowed_domains;
507 508
					goto retry;
				}
509
				ttm_eu_backoff_reservation(ticket, head);
510
				return r;
511
			}
512
		}
513 514
		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
		lobj->tiling_flags = bo->tiling_flags;
515 516 517 518
	}
	return 0;
}

519
int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
520 521
			     struct vm_area_struct *vma)
{
522
	return ttm_fbdev_mmap(vma, &bo->tbo);
523 524
}

525
int radeon_bo_get_surface_reg(struct radeon_bo *bo)
526
{
527
	struct radeon_device *rdev = bo->rdev;
528
	struct radeon_surface_reg *reg;
529
	struct radeon_bo *old_object;
530 531 532
	int steal;
	int i;

533
	lockdep_assert_held(&bo->tbo.resv->lock.base);
534 535

	if (!bo->tiling_flags)
536 537
		return 0;

538 539 540
	if (bo->surface_reg >= 0) {
		reg = &rdev->surface_regs[bo->surface_reg];
		i = bo->surface_reg;
541 542 543 544 545 546 547
		goto out;
	}

	steal = -1;
	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {

		reg = &rdev->surface_regs[i];
548
		if (!reg->bo)
549 550
			break;

551
		old_object = reg->bo;
552 553 554 555 556 557 558 559 560 561
		if (old_object->pin_count == 0)
			steal = i;
	}

	/* if we are all out */
	if (i == RADEON_GEM_MAX_SURFACES) {
		if (steal == -1)
			return -ENOMEM;
		/* find someone with a surface reg and nuke their BO */
		reg = &rdev->surface_regs[steal];
562
		old_object = reg->bo;
563 564
		/* blow away the mapping */
		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
565
		ttm_bo_unmap_virtual(&old_object->tbo);
566 567 568 569
		old_object->surface_reg = -1;
		i = steal;
	}

570 571
	bo->surface_reg = i;
	reg->bo = bo;
572 573

out:
574
	radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
575
			       bo->tbo.mem.start << PAGE_SHIFT,
576
			       bo->tbo.num_pages << PAGE_SHIFT);
577 578 579
	return 0;
}

580
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
581
{
582
	struct radeon_device *rdev = bo->rdev;
583 584
	struct radeon_surface_reg *reg;

585
	if (bo->surface_reg == -1)
586 587
		return;

588 589
	reg = &rdev->surface_regs[bo->surface_reg];
	radeon_clear_surface_reg(rdev, bo->surface_reg);
590

591 592
	reg->bo = NULL;
	bo->surface_reg = -1;
593 594
}

595 596
int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
				uint32_t tiling_flags, uint32_t pitch)
597
{
598
	struct radeon_device *rdev = bo->rdev;
599 600
	int r;

601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
	if (rdev->family >= CHIP_CEDAR) {
		unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;

		bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
		bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
		mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
		tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
		stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
		switch (bankw) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (bankh) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (mtaspect) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		if (tilesplit > 6) {
			return -EINVAL;
		}
		if (stilesplit > 6) {
			return -EINVAL;
		}
	}
646 647 648 649 650 651 652
	r = radeon_bo_reserve(bo, false);
	if (unlikely(r != 0))
		return r;
	bo->tiling_flags = tiling_flags;
	bo->pitch = pitch;
	radeon_bo_unreserve(bo);
	return 0;
653 654
}

655 656 657
void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
				uint32_t *tiling_flags,
				uint32_t *pitch)
658
{
659 660
	lockdep_assert_held(&bo->tbo.resv->lock.base);

661
	if (tiling_flags)
662
		*tiling_flags = bo->tiling_flags;
663
	if (pitch)
664
		*pitch = bo->pitch;
665 666
}

667 668
int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
				bool force_drop)
669
{
670 671
	if (!force_drop)
		lockdep_assert_held(&bo->tbo.resv->lock.base);
672 673

	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
674 675 676
		return 0;

	if (force_drop) {
677
		radeon_bo_clear_surface_reg(bo);
678 679 680
		return 0;
	}

681
	if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
682 683 684
		if (!has_moved)
			return 0;

685 686
		if (bo->surface_reg >= 0)
			radeon_bo_clear_surface_reg(bo);
687 688 689
		return 0;
	}

690
	if ((bo->surface_reg >= 0) && !has_moved)
691 692
		return 0;

693
	return radeon_bo_get_surface_reg(bo);
694 695 696
}

void radeon_bo_move_notify(struct ttm_buffer_object *bo,
697
			   struct ttm_mem_reg *new_mem)
698
{
699
	struct radeon_bo *rbo;
700

701 702
	if (!radeon_ttm_bo_is_radeon_bo(bo))
		return;
703

704
	rbo = container_of(bo, struct radeon_bo, tbo);
705
	radeon_bo_check_tiling(rbo, 0, 1);
706
	radeon_vm_bo_invalidate(rbo->rdev, rbo);
707 708 709 710 711 712 713

	/* update statistics */
	if (!new_mem)
		return;

	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
714 715
}

716
int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
717
{
718
	struct radeon_device *rdev;
719
	struct radeon_bo *rbo;
720 721 722
	unsigned long offset, size;
	int r;

723
	if (!radeon_ttm_bo_is_radeon_bo(bo))
724
		return 0;
725
	rbo = container_of(bo, struct radeon_bo, tbo);
726
	radeon_bo_check_tiling(rbo, 0, 0);
727
	rdev = rbo->rdev;
728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
	if (bo->mem.mem_type != TTM_PL_VRAM)
		return 0;

	size = bo->mem.num_pages << PAGE_SHIFT;
	offset = bo->mem.start << PAGE_SHIFT;
	if ((offset + size) <= rdev->mc.visible_vram_size)
		return 0;

	/* hurrah the memory is not visible ! */
	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
	rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
	r = ttm_bo_validate(bo, &rbo->placement, false, false);
	if (unlikely(r == -ENOMEM)) {
		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
		return ttm_bo_validate(bo, &rbo->placement, false, false);
	} else if (unlikely(r != 0)) {
		return r;
745
	}
746 747 748 749 750 751

	offset = bo->mem.start << PAGE_SHIFT;
	/* this should never happen */
	if ((offset + size) > rdev->mc.visible_vram_size)
		return -EINVAL;

752
	return 0;
753
}
754

755
int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
756 757 758
{
	int r;

759
	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
760 761 762 763 764 765
	if (unlikely(r != 0))
		return r;
	spin_lock(&bo->tbo.bdev->fence_lock);
	if (mem_type)
		*mem_type = bo->tbo.mem.mem_type;
	if (bo->tbo.sync_obj)
766
		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
767 768 769 770
	spin_unlock(&bo->tbo.bdev->fence_lock);
	ttm_bo_unreserve(&bo->tbo);
	return r;
}