radeon_object.c 21.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
#include <linux/list.h>
33
#include <linux/slab.h>
34
#include <drm/drmP.h>
35
#include <drm/radeon_drm.h>
36
#include "radeon.h"
37
#include "radeon_trace.h"
38 39 40 41


int radeon_ttm_init(struct radeon_device *rdev);
void radeon_ttm_fini(struct radeon_device *rdev);
42
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
43 44 45 46 47 48

/*
 * To exclude mutual BO access we rely on bo_reserve exclusion, as all
 * function are calling it.
 */

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static void radeon_update_memory_usage(struct radeon_bo *bo,
				       unsigned mem_type, int sign)
{
	struct radeon_device *rdev = bo->rdev;
	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;

	switch (mem_type) {
	case TTM_PL_TT:
		if (sign > 0)
			atomic64_add(size, &rdev->gtt_usage);
		else
			atomic64_sub(size, &rdev->gtt_usage);
		break;
	case TTM_PL_VRAM:
		if (sign > 0)
			atomic64_add(size, &rdev->vram_usage);
		else
			atomic64_sub(size, &rdev->vram_usage);
		break;
	}
}

71
static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
72
{
73
	struct radeon_bo *bo;
74

75
	bo = container_of(tbo, struct radeon_bo, tbo);
76 77

	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
78
	radeon_mn_unregister(bo);
79

80 81 82 83
	mutex_lock(&bo->rdev->gem.mutex);
	list_del_init(&bo->list);
	mutex_unlock(&bo->rdev->gem.mutex);
	radeon_bo_clear_surface_reg(bo);
84
	WARN_ON(!list_empty(&bo->va));
85
	drm_gem_object_release(&bo->gem_base);
86
	kfree(bo);
87 88
}

89 90 91 92 93 94 95
bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
{
	if (bo->destroy == &radeon_ttm_bo_destroy)
		return true;
	return false;
}

96 97
void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
{
98
	u32 c = 0, i;
99 100

	rbo->placement.placement = rbo->placements;
101
	rbo->placement.busy_placement = rbo->placements;
102 103 104 105 106 107 108 109 110 111 112 113 114 115
	if (domain & RADEON_GEM_DOMAIN_VRAM) {
		/* Try placing BOs which don't need CPU access outside of the
		 * CPU accessible part of VRAM
		 */
		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
			rbo->placements[c].fpfn =
				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
						     TTM_PL_FLAG_UNCACHED |
						     TTM_PL_FLAG_VRAM;
		}

		rbo->placements[c].fpfn = 0;
116 117 118
		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
					     TTM_PL_FLAG_UNCACHED |
					     TTM_PL_FLAG_VRAM;
119
	}
120

121
	if (domain & RADEON_GEM_DOMAIN_GTT) {
122
		if (rbo->flags & RADEON_GEM_GTT_UC) {
123
			rbo->placements[c].fpfn = 0;
124 125 126
			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_TT;

127 128
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
			   (rbo->rdev->flags & RADEON_IS_AGP)) {
129
			rbo->placements[c].fpfn = 0;
130 131
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED |
132
				TTM_PL_FLAG_TT;
133
		} else {
134
			rbo->placements[c].fpfn = 0;
135 136
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
						     TTM_PL_FLAG_TT;
137 138
		}
	}
139

140
	if (domain & RADEON_GEM_DOMAIN_CPU) {
141
		if (rbo->flags & RADEON_GEM_GTT_UC) {
142
			rbo->placements[c].fpfn = 0;
143 144 145
			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_SYSTEM;

146 147
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
		    rbo->rdev->flags & RADEON_IS_AGP) {
148
			rbo->placements[c].fpfn = 0;
149 150
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED |
151
				TTM_PL_FLAG_SYSTEM;
152
		} else {
153
			rbo->placements[c].fpfn = 0;
154 155
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
						     TTM_PL_FLAG_SYSTEM;
156 157
		}
	}
158 159
	if (!c) {
		rbo->placements[c].fpfn = 0;
160 161
		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
					     TTM_PL_FLAG_SYSTEM;
162
	}
163

164 165
	rbo->placement.num_placement = c;
	rbo->placement.num_busy_placement = c;
166

167
	for (i = 0; i < c; ++i) {
168
		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
169 170
		    (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    !rbo->placements[i].fpfn)
171 172 173 174
			rbo->placements[i].lpfn =
				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
		else
			rbo->placements[i].lpfn = 0;
175 176
	}

177 178 179 180 181
	/*
	 * Use two-ended allocation depending on the buffer size to
	 * improve fragmentation quality.
	 * 512kb was measured as the most optimal number.
	 */
182
	if (rbo->tbo.mem.size > 512 * 1024) {
183
		for (i = 0; i < c; i++) {
184
			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
185 186
		}
	}
187 188
}

189
int radeon_bo_create(struct radeon_device *rdev,
190 191 192 193
		     unsigned long size, int byte_align, bool kernel,
		     u32 domain, u32 flags, struct sg_table *sg,
		     struct reservation_object *resv,
		     struct radeon_bo **bo_ptr)
194
{
195
	struct radeon_bo *bo;
196
	enum ttm_bo_type type;
197
	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
198
	size_t acc_size;
199 200
	int r;

201 202
	size = ALIGN(size, PAGE_SIZE);

203 204
	if (kernel) {
		type = ttm_bo_type_kernel;
205 206
	} else if (sg) {
		type = ttm_bo_type_sg;
207 208 209
	} else {
		type = ttm_bo_type_device;
	}
210
	*bo_ptr = NULL;
211

212 213 214
	acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
				       sizeof(struct radeon_bo));

215 216
	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
	if (bo == NULL)
217
		return -ENOMEM;
218 219 220 221 222
	r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);
	if (unlikely(r)) {
		kfree(bo);
		return r;
	}
223 224 225
	bo->rdev = rdev;
	bo->surface_reg = -1;
	INIT_LIST_HEAD(&bo->list);
226
	INIT_LIST_HEAD(&bo->va);
227 228 229
	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
	                               RADEON_GEM_DOMAIN_GTT |
	                               RADEON_GEM_DOMAIN_CPU);
230 231 232 233 234 235

	bo->flags = flags;
	/* PCI GART is always snooped */
	if (!(rdev->flags & RADEON_IS_PCIE))
		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);

236
	radeon_ttm_placement_from_domain(bo, domain);
237
	/* Kernel allocation are uninterruptible */
238
	down_read(&rdev->pm.mclk_lock);
239
	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
240
			&bo->placement, page_align, !kernel, NULL,
241
			acc_size, sg, resv, &radeon_ttm_bo_destroy);
242
	up_read(&rdev->pm.mclk_lock);
243 244 245
	if (unlikely(r != 0)) {
		return r;
	}
246
	*bo_ptr = bo;
247

248
	trace_radeon_bo_create(bo);
249

250 251 252
	return 0;
}

253
int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
254
{
255
	bool is_iomem;
256 257
	int r;

258
	if (bo->kptr) {
259
		if (ptr) {
260
			*ptr = bo->kptr;
261 262 263
		}
		return 0;
	}
264
	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
265 266 267
	if (r) {
		return r;
	}
268
	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
269
	if (ptr) {
270
		*ptr = bo->kptr;
271
	}
272
	radeon_bo_check_tiling(bo, 0, 0);
273 274 275
	return 0;
}

276
void radeon_bo_kunmap(struct radeon_bo *bo)
277
{
278
	if (bo->kptr == NULL)
279
		return;
280 281 282
	bo->kptr = NULL;
	radeon_bo_check_tiling(bo, 0, 0);
	ttm_bo_kunmap(&bo->kmap);
283 284
}

285 286 287 288 289 290 291 292 293
struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
{
	if (bo == NULL)
		return NULL;

	ttm_bo_reference(&bo->tbo);
	return bo;
}

294
void radeon_bo_unref(struct radeon_bo **bo)
295
{
296
	struct ttm_buffer_object *tbo;
297
	struct radeon_device *rdev;
298

299
	if ((*bo) == NULL)
300
		return;
301
	rdev = (*bo)->rdev;
302 303 304 305
	tbo = &((*bo)->tbo);
	ttm_bo_unref(&tbo);
	if (tbo == NULL)
		*bo = NULL;
306 307
}

308 309
int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
			     u64 *gpu_addr)
310
{
311
	int r, i;
312

313 314 315
	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
		return -EPERM;

316 317 318 319
	if (bo->pin_count) {
		bo->pin_count++;
		if (gpu_addr)
			*gpu_addr = radeon_bo_gpu_offset(bo);
320 321 322 323 324 325 326 327

		if (max_offset != 0) {
			u64 domain_start;

			if (domain == RADEON_GEM_DOMAIN_VRAM)
				domain_start = bo->rdev->mc.vram_start;
			else
				domain_start = bo->rdev->mc.gtt_start;
328 329
			WARN_ON_ONCE(max_offset <
				     (radeon_bo_gpu_offset(bo) - domain_start));
330 331
		}

332 333
		return 0;
	}
334
	radeon_ttm_placement_from_domain(bo, domain);
335
	for (i = 0; i < bo->placement.num_placement; i++) {
336
		/* force to pin into visible video ram */
337
		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
338
		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
339 340 341
		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
			bo->placements[i].lpfn =
				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
342
		else
343
			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
344

345
		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
346
	}
347

348
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
349 350 351 352
	if (likely(r == 0)) {
		bo->pin_count = 1;
		if (gpu_addr != NULL)
			*gpu_addr = radeon_bo_gpu_offset(bo);
353 354 355 356 357
		if (domain == RADEON_GEM_DOMAIN_VRAM)
			bo->rdev->vram_pin_size += radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size += radeon_bo_size(bo);
	} else {
358
		dev_err(bo->rdev->dev, "%p pin failed\n", bo);
359
	}
360 361
	return r;
}
362 363 364 365 366

int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
{
	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
}
367

368
int radeon_bo_unpin(struct radeon_bo *bo)
369
{
370
	int r, i;
371

372 373 374
	if (!bo->pin_count) {
		dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
		return 0;
375
	}
376 377 378
	bo->pin_count--;
	if (bo->pin_count)
		return 0;
379 380 381 382
	for (i = 0; i < bo->placement.num_placement; i++) {
		bo->placements[i].lpfn = 0;
		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
	}
383
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
384 385 386 387 388 389
	if (likely(r == 0)) {
		if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
			bo->rdev->vram_pin_size -= radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size -= radeon_bo_size(bo);
	} else {
390
		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
391
	}
392
	return r;
393 394
}

395
int radeon_bo_evict_vram(struct radeon_device *rdev)
396
{
397 398
	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
	if (0 && (rdev->flags & RADEON_IS_IGP)) {
399 400 401
		if (rdev->mc.igp_sideport_enabled == false)
			/* Useless to evict on IGP chips */
			return 0;
402 403 404 405
	}
	return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM);
}

406
void radeon_bo_force_delete(struct radeon_device *rdev)
407
{
408
	struct radeon_bo *bo, *n;
409 410 411 412

	if (list_empty(&rdev->gem.objects)) {
		return;
	}
413 414
	dev_err(rdev->dev, "Userspace still has active objects !\n");
	list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
415
		mutex_lock(&rdev->ddev->struct_mutex);
416
		dev_err(rdev->dev, "%p %p %lu %lu force free\n",
417 418
			&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
			*((unsigned long *)&bo->gem_base.refcount));
419 420 421
		mutex_lock(&bo->rdev->gem.mutex);
		list_del_init(&bo->list);
		mutex_unlock(&bo->rdev->gem.mutex);
422
		/* this should unref the ttm bo */
423
		drm_gem_object_unreference(&bo->gem_base);
424 425 426 427
		mutex_unlock(&rdev->ddev->struct_mutex);
	}
}

428
int radeon_bo_init(struct radeon_device *rdev)
429
{
430
	/* Add an MTRR for the VRAM */
431
	if (!rdev->fastfb_working) {
432 433
		rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
						      rdev->mc.aper_size);
434
	}
435 436 437 438 439
	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
		rdev->mc.mc_vram_size >> 20,
		(unsigned long long)rdev->mc.aper_size >> 20);
	DRM_INFO("RAM width %dbits %cDR\n",
			rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
440 441 442
	return radeon_ttm_init(rdev);
}

443
void radeon_bo_fini(struct radeon_device *rdev)
444 445
{
	radeon_ttm_fini(rdev);
446
	arch_phys_wc_del(rdev->mc.vram_mtrr);
447 448
}

449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502
/* Returns how many bytes TTM can move per IB.
 */
static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
{
	u64 real_vram_size = rdev->mc.real_vram_size;
	u64 vram_usage = atomic64_read(&rdev->vram_usage);

	/* This function is based on the current VRAM usage.
	 *
	 * - If all of VRAM is free, allow relocating the number of bytes that
	 *   is equal to 1/4 of the size of VRAM for this IB.

	 * - If more than one half of VRAM is occupied, only allow relocating
	 *   1 MB of data for this IB.
	 *
	 * - From 0 to one half of used VRAM, the threshold decreases
	 *   linearly.
	 *         __________________
	 * 1/4 of -|\               |
	 * VRAM    | \              |
	 *         |  \             |
	 *         |   \            |
	 *         |    \           |
	 *         |     \          |
	 *         |      \         |
	 *         |       \________|1 MB
	 *         |----------------|
	 *    VRAM 0 %             100 %
	 *         used            used
	 *
	 * Note: It's a threshold, not a limit. The threshold must be crossed
	 * for buffer relocations to stop, so any buffer of an arbitrary size
	 * can be moved as long as the threshold isn't crossed before
	 * the relocation takes place. We don't want to disable buffer
	 * relocations completely.
	 *
	 * The idea is that buffers should be placed in VRAM at creation time
	 * and TTM should only do a minimum number of relocations during
	 * command submission. In practice, you need to submit at least
	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
	 *
	 * Also, things can get pretty crazy under memory pressure and actual
	 * VRAM usage can change a lot, so playing safe even at 50% does
	 * consistently increase performance.
	 */

	u64 half_vram = real_vram_size >> 1;
	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
	u64 bytes_moved_threshold = half_free_vram >> 1;
	return max(bytes_moved_threshold, 1024*1024ull);
}

int radeon_bo_list_validate(struct radeon_device *rdev,
			    struct ww_acquire_ctx *ticket,
503
			    struct list_head *head, int ring)
504
{
505
	struct radeon_cs_reloc *lobj;
506
	struct radeon_bo *bo;
507
	int r;
508 509
	u64 bytes_moved = 0, initial_bytes_moved;
	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
510

511
	r = ttm_eu_reserve_buffers(ticket, head, true);
512 513 514
	if (unlikely(r != 0)) {
		return r;
	}
515

516
	list_for_each_entry(lobj, head, tv.head) {
517
		bo = lobj->robj;
518
		if (!bo->pin_count) {
519
			u32 domain = lobj->prefered_domains;
520
			u32 allowed = lobj->allowed_domains;
521 522 523 524 525 526 527 528 529 530 531
			u32 current_domain =
				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);

			/* Check if this buffer will be moved and don't move it
			 * if we have moved too many buffers for this IB already.
			 *
			 * Note that this allows moving at least one buffer of
			 * any size, because it doesn't take the current "bo"
			 * into account. We don't want to disallow buffer moves
			 * completely.
			 */
532
			if ((allowed & current_domain) != 0 &&
533 534 535 536 537 538
			    (domain & current_domain) == 0 && /* will be moved */
			    bytes_moved > bytes_moved_threshold) {
				/* don't move it */
				domain = current_domain;
			}

539 540
		retry:
			radeon_ttm_placement_from_domain(bo, domain);
C
Christian König 已提交
541
			if (ring == R600_RING_TYPE_UVD_INDEX)
542
				radeon_uvd_force_into_uvd_segment(bo, allowed);
543 544 545 546 547 548

			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
			bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
				       initial_bytes_moved;

549
			if (unlikely(r)) {
550 551 552
				if (r != -ERESTARTSYS &&
				    domain != lobj->allowed_domains) {
					domain = lobj->allowed_domains;
553 554
					goto retry;
				}
555
				ttm_eu_backoff_reservation(ticket, head);
556
				return r;
557
			}
558
		}
559 560
		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
		lobj->tiling_flags = bo->tiling_flags;
561 562 563 564
	}
	return 0;
}

565
int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
566 567
			     struct vm_area_struct *vma)
{
568
	return ttm_fbdev_mmap(vma, &bo->tbo);
569 570
}

571
int radeon_bo_get_surface_reg(struct radeon_bo *bo)
572
{
573
	struct radeon_device *rdev = bo->rdev;
574
	struct radeon_surface_reg *reg;
575
	struct radeon_bo *old_object;
576 577 578
	int steal;
	int i;

579
	lockdep_assert_held(&bo->tbo.resv->lock.base);
580 581

	if (!bo->tiling_flags)
582 583
		return 0;

584 585 586
	if (bo->surface_reg >= 0) {
		reg = &rdev->surface_regs[bo->surface_reg];
		i = bo->surface_reg;
587 588 589 590 591 592 593
		goto out;
	}

	steal = -1;
	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {

		reg = &rdev->surface_regs[i];
594
		if (!reg->bo)
595 596
			break;

597
		old_object = reg->bo;
598 599 600 601 602 603 604 605 606 607
		if (old_object->pin_count == 0)
			steal = i;
	}

	/* if we are all out */
	if (i == RADEON_GEM_MAX_SURFACES) {
		if (steal == -1)
			return -ENOMEM;
		/* find someone with a surface reg and nuke their BO */
		reg = &rdev->surface_regs[steal];
608
		old_object = reg->bo;
609 610
		/* blow away the mapping */
		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
611
		ttm_bo_unmap_virtual(&old_object->tbo);
612 613 614 615
		old_object->surface_reg = -1;
		i = steal;
	}

616 617
	bo->surface_reg = i;
	reg->bo = bo;
618 619

out:
620
	radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
621
			       bo->tbo.mem.start << PAGE_SHIFT,
622
			       bo->tbo.num_pages << PAGE_SHIFT);
623 624 625
	return 0;
}

626
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
627
{
628
	struct radeon_device *rdev = bo->rdev;
629 630
	struct radeon_surface_reg *reg;

631
	if (bo->surface_reg == -1)
632 633
		return;

634 635
	reg = &rdev->surface_regs[bo->surface_reg];
	radeon_clear_surface_reg(rdev, bo->surface_reg);
636

637 638
	reg->bo = NULL;
	bo->surface_reg = -1;
639 640
}

641 642
int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
				uint32_t tiling_flags, uint32_t pitch)
643
{
644
	struct radeon_device *rdev = bo->rdev;
645 646
	int r;

647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
	if (rdev->family >= CHIP_CEDAR) {
		unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;

		bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
		bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
		mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
		tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
		stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
		switch (bankw) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (bankh) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (mtaspect) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		if (tilesplit > 6) {
			return -EINVAL;
		}
		if (stilesplit > 6) {
			return -EINVAL;
		}
	}
692 693 694 695 696 697 698
	r = radeon_bo_reserve(bo, false);
	if (unlikely(r != 0))
		return r;
	bo->tiling_flags = tiling_flags;
	bo->pitch = pitch;
	radeon_bo_unreserve(bo);
	return 0;
699 700
}

701 702 703
void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
				uint32_t *tiling_flags,
				uint32_t *pitch)
704
{
705 706
	lockdep_assert_held(&bo->tbo.resv->lock.base);

707
	if (tiling_flags)
708
		*tiling_flags = bo->tiling_flags;
709
	if (pitch)
710
		*pitch = bo->pitch;
711 712
}

713 714
int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
				bool force_drop)
715
{
716 717
	if (!force_drop)
		lockdep_assert_held(&bo->tbo.resv->lock.base);
718 719

	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
720 721 722
		return 0;

	if (force_drop) {
723
		radeon_bo_clear_surface_reg(bo);
724 725 726
		return 0;
	}

727
	if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
728 729 730
		if (!has_moved)
			return 0;

731 732
		if (bo->surface_reg >= 0)
			radeon_bo_clear_surface_reg(bo);
733 734 735
		return 0;
	}

736
	if ((bo->surface_reg >= 0) && !has_moved)
737 738
		return 0;

739
	return radeon_bo_get_surface_reg(bo);
740 741 742
}

void radeon_bo_move_notify(struct ttm_buffer_object *bo,
743
			   struct ttm_mem_reg *new_mem)
744
{
745
	struct radeon_bo *rbo;
746

747 748
	if (!radeon_ttm_bo_is_radeon_bo(bo))
		return;
749

750
	rbo = container_of(bo, struct radeon_bo, tbo);
751
	radeon_bo_check_tiling(rbo, 0, 1);
752
	radeon_vm_bo_invalidate(rbo->rdev, rbo);
753 754 755 756 757 758 759

	/* update statistics */
	if (!new_mem)
		return;

	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
760 761
}

762
int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
763
{
764
	struct radeon_device *rdev;
765
	struct radeon_bo *rbo;
766 767
	unsigned long offset, size, lpfn;
	int i, r;
768

769
	if (!radeon_ttm_bo_is_radeon_bo(bo))
770
		return 0;
771
	rbo = container_of(bo, struct radeon_bo, tbo);
772
	radeon_bo_check_tiling(rbo, 0, 0);
773
	rdev = rbo->rdev;
774 775 776 777 778 779 780 781 782 783
	if (bo->mem.mem_type != TTM_PL_VRAM)
		return 0;

	size = bo->mem.num_pages << PAGE_SHIFT;
	offset = bo->mem.start << PAGE_SHIFT;
	if ((offset + size) <= rdev->mc.visible_vram_size)
		return 0;

	/* hurrah the memory is not visible ! */
	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
784 785 786 787 788 789 790
	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
	for (i = 0; i < rbo->placement.num_placement; i++) {
		/* Force into visible VRAM */
		if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
			rbo->placements[i].lpfn = lpfn;
	}
791 792 793 794 795 796
	r = ttm_bo_validate(bo, &rbo->placement, false, false);
	if (unlikely(r == -ENOMEM)) {
		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
		return ttm_bo_validate(bo, &rbo->placement, false, false);
	} else if (unlikely(r != 0)) {
		return r;
797
	}
798 799 800 801 802 803

	offset = bo->mem.start << PAGE_SHIFT;
	/* this should never happen */
	if ((offset + size) > rdev->mc.visible_vram_size)
		return -EINVAL;

804
	return 0;
805
}
806

807
int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
808 809 810
{
	int r;

811
	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
812 813 814 815
	if (unlikely(r != 0))
		return r;
	if (mem_type)
		*mem_type = bo->tbo.mem.mem_type;
816 817

	r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
818 819 820
	ttm_bo_unreserve(&bo->tbo);
	return r;
}
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839

/**
 * radeon_bo_fence - add fence to buffer object
 *
 * @bo: buffer object in question
 * @fence: fence to add
 * @shared: true if fence should be added shared
 *
 */
void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
                     bool shared)
{
	struct reservation_object *resv = bo->tbo.resv;

	if (shared)
		reservation_object_add_shared_fence(resv, &fence->base);
	else
		reservation_object_add_excl_fence(resv, &fence->base);
}