radeon_object.c 22.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
#include <linux/list.h>
33
#include <linux/slab.h>
34
#include <drm/drmP.h>
35
#include <drm/radeon_drm.h>
36
#include "radeon.h"
37
#include "radeon_trace.h"
38 39 40 41


int radeon_ttm_init(struct radeon_device *rdev);
void radeon_ttm_fini(struct radeon_device *rdev);
42
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
43 44 45 46 47 48

/*
 * To exclude mutual BO access we rely on bo_reserve exclusion, as all
 * function are calling it.
 */

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static void radeon_update_memory_usage(struct radeon_bo *bo,
				       unsigned mem_type, int sign)
{
	struct radeon_device *rdev = bo->rdev;
	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;

	switch (mem_type) {
	case TTM_PL_TT:
		if (sign > 0)
			atomic64_add(size, &rdev->gtt_usage);
		else
			atomic64_sub(size, &rdev->gtt_usage);
		break;
	case TTM_PL_VRAM:
		if (sign > 0)
			atomic64_add(size, &rdev->vram_usage);
		else
			atomic64_sub(size, &rdev->vram_usage);
		break;
	}
}

71
static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
72
{
73
	struct radeon_bo *bo;
74

75
	bo = container_of(tbo, struct radeon_bo, tbo);
76 77 78

	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);

79 80 81 82
	mutex_lock(&bo->rdev->gem.mutex);
	list_del_init(&bo->list);
	mutex_unlock(&bo->rdev->gem.mutex);
	radeon_bo_clear_surface_reg(bo);
83
	WARN_ON(!list_empty(&bo->va));
84
	drm_gem_object_release(&bo->gem_base);
85
	kfree(bo);
86 87
}

88 89 90 91 92 93 94
bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
{
	if (bo->destroy == &radeon_ttm_bo_destroy)
		return true;
	return false;
}

95 96
void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
{
97
	u32 c = 0, i;
98 99

	rbo->placement.placement = rbo->placements;
100
	rbo->placement.busy_placement = rbo->placements;
101 102 103 104 105 106 107 108 109 110 111 112 113 114
	if (domain & RADEON_GEM_DOMAIN_VRAM) {
		/* Try placing BOs which don't need CPU access outside of the
		 * CPU accessible part of VRAM
		 */
		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
			rbo->placements[c].fpfn =
				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
						     TTM_PL_FLAG_UNCACHED |
						     TTM_PL_FLAG_VRAM;
		}

		rbo->placements[c].fpfn = 0;
115 116 117
		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
					     TTM_PL_FLAG_UNCACHED |
					     TTM_PL_FLAG_VRAM;
118
	}
119

120
	if (domain & RADEON_GEM_DOMAIN_GTT) {
121
		if (rbo->flags & RADEON_GEM_GTT_UC) {
122
			rbo->placements[c].fpfn = 0;
123 124 125
			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_TT;

126 127
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
			   (rbo->rdev->flags & RADEON_IS_AGP)) {
128
			rbo->placements[c].fpfn = 0;
129 130
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED |
131
				TTM_PL_FLAG_TT;
132
		} else {
133
			rbo->placements[c].fpfn = 0;
134 135
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
						     TTM_PL_FLAG_TT;
136 137
		}
	}
138

139
	if (domain & RADEON_GEM_DOMAIN_CPU) {
140
		if (rbo->flags & RADEON_GEM_GTT_UC) {
141
			rbo->placements[c].fpfn = 0;
142 143 144
			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_SYSTEM;

145 146
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
		    rbo->rdev->flags & RADEON_IS_AGP) {
147
			rbo->placements[c].fpfn = 0;
148 149
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED |
150
				TTM_PL_FLAG_SYSTEM;
151
		} else {
152
			rbo->placements[c].fpfn = 0;
153 154
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
						     TTM_PL_FLAG_SYSTEM;
155 156
		}
	}
157 158
	if (!c) {
		rbo->placements[c].fpfn = 0;
159 160
		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
					     TTM_PL_FLAG_SYSTEM;
161
	}
162

163 164
	rbo->placement.num_placement = c;
	rbo->placement.num_busy_placement = c;
165

166
	for (i = 0; i < c; ++i) {
167
		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
168 169
		    (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    !rbo->placements[i].fpfn)
170 171 172 173
			rbo->placements[i].lpfn =
				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
		else
			rbo->placements[i].lpfn = 0;
174
	}
175 176
}

177
int radeon_bo_create(struct radeon_device *rdev,
178 179 180 181
		     unsigned long size, int byte_align, bool kernel,
		     u32 domain, u32 flags, struct sg_table *sg,
		     struct reservation_object *resv,
		     struct radeon_bo **bo_ptr)
182
{
183
	struct radeon_bo *bo;
184
	enum ttm_bo_type type;
185
	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
186
	size_t acc_size;
187 188
	int r;

189 190
	size = ALIGN(size, PAGE_SIZE);

191 192
	if (kernel) {
		type = ttm_bo_type_kernel;
193 194
	} else if (sg) {
		type = ttm_bo_type_sg;
195 196 197
	} else {
		type = ttm_bo_type_device;
	}
198
	*bo_ptr = NULL;
199

200 201 202
	acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
				       sizeof(struct radeon_bo));

203 204
	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
	if (bo == NULL)
205
		return -ENOMEM;
206 207 208 209 210
	r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);
	if (unlikely(r)) {
		kfree(bo);
		return r;
	}
211 212 213
	bo->rdev = rdev;
	bo->surface_reg = -1;
	INIT_LIST_HEAD(&bo->list);
214
	INIT_LIST_HEAD(&bo->va);
215 216 217
	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
	                               RADEON_GEM_DOMAIN_GTT |
	                               RADEON_GEM_DOMAIN_CPU);
218 219 220 221 222 223

	bo->flags = flags;
	/* PCI GART is always snooped */
	if (!(rdev->flags & RADEON_IS_PCIE))
		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);

224 225 226 227 228
#ifdef CONFIG_X86_32
	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
	 */
	bo->flags &= ~RADEON_GEM_GTT_WC;
229 230 231 232 233 234 235 236 237 238 239 240
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
	/* Don't try to enable write-combining when it can't work, or things
	 * may be slow
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
	 */

#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
	 thanks to write-combining

	DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
		      "better performance thanks to write-combining\n");
	bo->flags &= ~RADEON_GEM_GTT_WC;
241 242
#endif

243
	radeon_ttm_placement_from_domain(bo, domain);
244
	/* Kernel allocation are uninterruptible */
245
	down_read(&rdev->pm.mclk_lock);
246
	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
247
			&bo->placement, page_align, !kernel, NULL,
248
			acc_size, sg, resv, &radeon_ttm_bo_destroy);
249
	up_read(&rdev->pm.mclk_lock);
250 251 252
	if (unlikely(r != 0)) {
		return r;
	}
253
	*bo_ptr = bo;
254

255
	trace_radeon_bo_create(bo);
256

257 258 259
	return 0;
}

260
int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
261
{
262
	bool is_iomem;
263 264
	int r;

265
	if (bo->kptr) {
266
		if (ptr) {
267
			*ptr = bo->kptr;
268 269 270
		}
		return 0;
	}
271
	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
272 273 274
	if (r) {
		return r;
	}
275
	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
276
	if (ptr) {
277
		*ptr = bo->kptr;
278
	}
279
	radeon_bo_check_tiling(bo, 0, 0);
280 281 282
	return 0;
}

283
void radeon_bo_kunmap(struct radeon_bo *bo)
284
{
285
	if (bo->kptr == NULL)
286
		return;
287 288 289
	bo->kptr = NULL;
	radeon_bo_check_tiling(bo, 0, 0);
	ttm_bo_kunmap(&bo->kmap);
290 291
}

292 293 294 295 296 297 298 299 300
struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
{
	if (bo == NULL)
		return NULL;

	ttm_bo_reference(&bo->tbo);
	return bo;
}

301
void radeon_bo_unref(struct radeon_bo **bo)
302
{
303
	struct ttm_buffer_object *tbo;
304
	struct radeon_device *rdev;
305

306
	if ((*bo) == NULL)
307
		return;
308
	rdev = (*bo)->rdev;
309 310 311 312
	tbo = &((*bo)->tbo);
	ttm_bo_unref(&tbo);
	if (tbo == NULL)
		*bo = NULL;
313 314
}

315 316
int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
			     u64 *gpu_addr)
317
{
318
	int r, i;
319

320 321 322
	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
		return -EPERM;

323 324 325 326
	if (bo->pin_count) {
		bo->pin_count++;
		if (gpu_addr)
			*gpu_addr = radeon_bo_gpu_offset(bo);
327 328 329 330 331 332 333 334

		if (max_offset != 0) {
			u64 domain_start;

			if (domain == RADEON_GEM_DOMAIN_VRAM)
				domain_start = bo->rdev->mc.vram_start;
			else
				domain_start = bo->rdev->mc.gtt_start;
335 336
			WARN_ON_ONCE(max_offset <
				     (radeon_bo_gpu_offset(bo) - domain_start));
337 338
		}

339 340
		return 0;
	}
341
	radeon_ttm_placement_from_domain(bo, domain);
342
	for (i = 0; i < bo->placement.num_placement; i++) {
343
		/* force to pin into visible video ram */
344
		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
345
		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
346 347 348
		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
			bo->placements[i].lpfn =
				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
349
		else
350
			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
351

352
		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
353
	}
354

355
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
356 357 358 359
	if (likely(r == 0)) {
		bo->pin_count = 1;
		if (gpu_addr != NULL)
			*gpu_addr = radeon_bo_gpu_offset(bo);
360 361 362 363 364
		if (domain == RADEON_GEM_DOMAIN_VRAM)
			bo->rdev->vram_pin_size += radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size += radeon_bo_size(bo);
	} else {
365
		dev_err(bo->rdev->dev, "%p pin failed\n", bo);
366
	}
367 368
	return r;
}
369 370 371 372 373

int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
{
	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
}
374

375
int radeon_bo_unpin(struct radeon_bo *bo)
376
{
377
	int r, i;
378

379 380 381
	if (!bo->pin_count) {
		dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
		return 0;
382
	}
383 384 385
	bo->pin_count--;
	if (bo->pin_count)
		return 0;
386 387 388 389
	for (i = 0; i < bo->placement.num_placement; i++) {
		bo->placements[i].lpfn = 0;
		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
	}
390
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
391 392 393 394 395 396
	if (likely(r == 0)) {
		if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
			bo->rdev->vram_pin_size -= radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size -= radeon_bo_size(bo);
	} else {
397
		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
398
	}
399
	return r;
400 401
}

402
int radeon_bo_evict_vram(struct radeon_device *rdev)
403
{
404 405
	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
	if (0 && (rdev->flags & RADEON_IS_IGP)) {
406 407 408
		if (rdev->mc.igp_sideport_enabled == false)
			/* Useless to evict on IGP chips */
			return 0;
409 410 411 412
	}
	return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM);
}

413
void radeon_bo_force_delete(struct radeon_device *rdev)
414
{
415
	struct radeon_bo *bo, *n;
416 417 418 419

	if (list_empty(&rdev->gem.objects)) {
		return;
	}
420 421
	dev_err(rdev->dev, "Userspace still has active objects !\n");
	list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
422
		mutex_lock(&rdev->ddev->struct_mutex);
423
		dev_err(rdev->dev, "%p %p %lu %lu force free\n",
424 425
			&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
			*((unsigned long *)&bo->gem_base.refcount));
426 427 428
		mutex_lock(&bo->rdev->gem.mutex);
		list_del_init(&bo->list);
		mutex_unlock(&bo->rdev->gem.mutex);
429
		/* this should unref the ttm bo */
430
		drm_gem_object_unreference(&bo->gem_base);
431 432 433 434
		mutex_unlock(&rdev->ddev->struct_mutex);
	}
}

435
int radeon_bo_init(struct radeon_device *rdev)
436
{
437
	/* Add an MTRR for the VRAM */
438
	if (!rdev->fastfb_working) {
439 440
		rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
						      rdev->mc.aper_size);
441
	}
442 443 444 445 446
	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
		rdev->mc.mc_vram_size >> 20,
		(unsigned long long)rdev->mc.aper_size >> 20);
	DRM_INFO("RAM width %dbits %cDR\n",
			rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
447 448 449
	return radeon_ttm_init(rdev);
}

450
void radeon_bo_fini(struct radeon_device *rdev)
451 452
{
	radeon_ttm_fini(rdev);
453
	arch_phys_wc_del(rdev->mc.vram_mtrr);
454 455
}

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
/* Returns how many bytes TTM can move per IB.
 */
static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
{
	u64 real_vram_size = rdev->mc.real_vram_size;
	u64 vram_usage = atomic64_read(&rdev->vram_usage);

	/* This function is based on the current VRAM usage.
	 *
	 * - If all of VRAM is free, allow relocating the number of bytes that
	 *   is equal to 1/4 of the size of VRAM for this IB.

	 * - If more than one half of VRAM is occupied, only allow relocating
	 *   1 MB of data for this IB.
	 *
	 * - From 0 to one half of used VRAM, the threshold decreases
	 *   linearly.
	 *         __________________
	 * 1/4 of -|\               |
	 * VRAM    | \              |
	 *         |  \             |
	 *         |   \            |
	 *         |    \           |
	 *         |     \          |
	 *         |      \         |
	 *         |       \________|1 MB
	 *         |----------------|
	 *    VRAM 0 %             100 %
	 *         used            used
	 *
	 * Note: It's a threshold, not a limit. The threshold must be crossed
	 * for buffer relocations to stop, so any buffer of an arbitrary size
	 * can be moved as long as the threshold isn't crossed before
	 * the relocation takes place. We don't want to disable buffer
	 * relocations completely.
	 *
	 * The idea is that buffers should be placed in VRAM at creation time
	 * and TTM should only do a minimum number of relocations during
	 * command submission. In practice, you need to submit at least
	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
	 *
	 * Also, things can get pretty crazy under memory pressure and actual
	 * VRAM usage can change a lot, so playing safe even at 50% does
	 * consistently increase performance.
	 */

	u64 half_vram = real_vram_size >> 1;
	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
	u64 bytes_moved_threshold = half_free_vram >> 1;
	return max(bytes_moved_threshold, 1024*1024ull);
}

int radeon_bo_list_validate(struct radeon_device *rdev,
			    struct ww_acquire_ctx *ticket,
510
			    struct list_head *head, int ring)
511
{
512
	struct radeon_bo_list *lobj;
513
	struct list_head duplicates;
514
	int r;
515 516
	u64 bytes_moved = 0, initial_bytes_moved;
	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
517

518 519
	INIT_LIST_HEAD(&duplicates);
	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
520 521 522
	if (unlikely(r != 0)) {
		return r;
	}
523

524
	list_for_each_entry(lobj, head, tv.head) {
525
		struct radeon_bo *bo = lobj->robj;
526
		if (!bo->pin_count) {
527
			u32 domain = lobj->prefered_domains;
528
			u32 allowed = lobj->allowed_domains;
529 530 531 532 533 534 535 536 537 538 539
			u32 current_domain =
				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);

			/* Check if this buffer will be moved and don't move it
			 * if we have moved too many buffers for this IB already.
			 *
			 * Note that this allows moving at least one buffer of
			 * any size, because it doesn't take the current "bo"
			 * into account. We don't want to disallow buffer moves
			 * completely.
			 */
540
			if ((allowed & current_domain) != 0 &&
541 542 543 544 545 546
			    (domain & current_domain) == 0 && /* will be moved */
			    bytes_moved > bytes_moved_threshold) {
				/* don't move it */
				domain = current_domain;
			}

547 548
		retry:
			radeon_ttm_placement_from_domain(bo, domain);
C
Christian König 已提交
549
			if (ring == R600_RING_TYPE_UVD_INDEX)
550
				radeon_uvd_force_into_uvd_segment(bo, allowed);
551 552 553 554 555 556

			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
			bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
				       initial_bytes_moved;

557
			if (unlikely(r)) {
558 559 560
				if (r != -ERESTARTSYS &&
				    domain != lobj->allowed_domains) {
					domain = lobj->allowed_domains;
561 562
					goto retry;
				}
563
				ttm_eu_backoff_reservation(ticket, head);
564
				return r;
565
			}
566
		}
567 568
		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
		lobj->tiling_flags = bo->tiling_flags;
569
	}
570 571 572 573 574 575

	list_for_each_entry(lobj, &duplicates, tv.head) {
		lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
		lobj->tiling_flags = lobj->robj->tiling_flags;
	}

576 577 578
	return 0;
}

579
int radeon_bo_get_surface_reg(struct radeon_bo *bo)
580
{
581
	struct radeon_device *rdev = bo->rdev;
582
	struct radeon_surface_reg *reg;
583
	struct radeon_bo *old_object;
584 585 586
	int steal;
	int i;

587
	lockdep_assert_held(&bo->tbo.resv->lock.base);
588 589

	if (!bo->tiling_flags)
590 591
		return 0;

592 593 594
	if (bo->surface_reg >= 0) {
		reg = &rdev->surface_regs[bo->surface_reg];
		i = bo->surface_reg;
595 596 597 598 599 600 601
		goto out;
	}

	steal = -1;
	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {

		reg = &rdev->surface_regs[i];
602
		if (!reg->bo)
603 604
			break;

605
		old_object = reg->bo;
606 607 608 609 610 611 612 613 614 615
		if (old_object->pin_count == 0)
			steal = i;
	}

	/* if we are all out */
	if (i == RADEON_GEM_MAX_SURFACES) {
		if (steal == -1)
			return -ENOMEM;
		/* find someone with a surface reg and nuke their BO */
		reg = &rdev->surface_regs[steal];
616
		old_object = reg->bo;
617 618
		/* blow away the mapping */
		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
619
		ttm_bo_unmap_virtual(&old_object->tbo);
620 621 622 623
		old_object->surface_reg = -1;
		i = steal;
	}

624 625
	bo->surface_reg = i;
	reg->bo = bo;
626 627

out:
628
	radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
629
			       bo->tbo.mem.start << PAGE_SHIFT,
630
			       bo->tbo.num_pages << PAGE_SHIFT);
631 632 633
	return 0;
}

634
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
635
{
636
	struct radeon_device *rdev = bo->rdev;
637 638
	struct radeon_surface_reg *reg;

639
	if (bo->surface_reg == -1)
640 641
		return;

642 643
	reg = &rdev->surface_regs[bo->surface_reg];
	radeon_clear_surface_reg(rdev, bo->surface_reg);
644

645 646
	reg->bo = NULL;
	bo->surface_reg = -1;
647 648
}

649 650
int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
				uint32_t tiling_flags, uint32_t pitch)
651
{
652
	struct radeon_device *rdev = bo->rdev;
653 654
	int r;

655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
	if (rdev->family >= CHIP_CEDAR) {
		unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;

		bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
		bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
		mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
		tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
		stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
		switch (bankw) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (bankh) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (mtaspect) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		if (tilesplit > 6) {
			return -EINVAL;
		}
		if (stilesplit > 6) {
			return -EINVAL;
		}
	}
700 701 702 703 704 705 706
	r = radeon_bo_reserve(bo, false);
	if (unlikely(r != 0))
		return r;
	bo->tiling_flags = tiling_flags;
	bo->pitch = pitch;
	radeon_bo_unreserve(bo);
	return 0;
707 708
}

709 710 711
void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
				uint32_t *tiling_flags,
				uint32_t *pitch)
712
{
713 714
	lockdep_assert_held(&bo->tbo.resv->lock.base);

715
	if (tiling_flags)
716
		*tiling_flags = bo->tiling_flags;
717
	if (pitch)
718
		*pitch = bo->pitch;
719 720
}

721 722
int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
				bool force_drop)
723
{
724 725
	if (!force_drop)
		lockdep_assert_held(&bo->tbo.resv->lock.base);
726 727

	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
728 729 730
		return 0;

	if (force_drop) {
731
		radeon_bo_clear_surface_reg(bo);
732 733 734
		return 0;
	}

735
	if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
736 737 738
		if (!has_moved)
			return 0;

739 740
		if (bo->surface_reg >= 0)
			radeon_bo_clear_surface_reg(bo);
741 742 743
		return 0;
	}

744
	if ((bo->surface_reg >= 0) && !has_moved)
745 746
		return 0;

747
	return radeon_bo_get_surface_reg(bo);
748 749 750
}

void radeon_bo_move_notify(struct ttm_buffer_object *bo,
751
			   struct ttm_mem_reg *new_mem)
752
{
753
	struct radeon_bo *rbo;
754

755 756
	if (!radeon_ttm_bo_is_radeon_bo(bo))
		return;
757

758
	rbo = container_of(bo, struct radeon_bo, tbo);
759
	radeon_bo_check_tiling(rbo, 0, 1);
760
	radeon_vm_bo_invalidate(rbo->rdev, rbo);
761 762 763 764 765 766 767

	/* update statistics */
	if (!new_mem)
		return;

	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
768 769
}

770
int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
771
{
772
	struct radeon_device *rdev;
773
	struct radeon_bo *rbo;
774 775
	unsigned long offset, size, lpfn;
	int i, r;
776

777
	if (!radeon_ttm_bo_is_radeon_bo(bo))
778
		return 0;
779
	rbo = container_of(bo, struct radeon_bo, tbo);
780
	radeon_bo_check_tiling(rbo, 0, 0);
781
	rdev = rbo->rdev;
782 783 784 785 786 787 788 789 790 791
	if (bo->mem.mem_type != TTM_PL_VRAM)
		return 0;

	size = bo->mem.num_pages << PAGE_SHIFT;
	offset = bo->mem.start << PAGE_SHIFT;
	if ((offset + size) <= rdev->mc.visible_vram_size)
		return 0;

	/* hurrah the memory is not visible ! */
	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
792 793 794 795 796 797 798
	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
	for (i = 0; i < rbo->placement.num_placement; i++) {
		/* Force into visible VRAM */
		if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
			rbo->placements[i].lpfn = lpfn;
	}
799 800 801 802 803 804
	r = ttm_bo_validate(bo, &rbo->placement, false, false);
	if (unlikely(r == -ENOMEM)) {
		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
		return ttm_bo_validate(bo, &rbo->placement, false, false);
	} else if (unlikely(r != 0)) {
		return r;
805
	}
806 807 808 809 810 811

	offset = bo->mem.start << PAGE_SHIFT;
	/* this should never happen */
	if ((offset + size) > rdev->mc.visible_vram_size)
		return -EINVAL;

812
	return 0;
813
}
814

815
int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
816 817 818
{
	int r;

819
	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
820 821 822 823
	if (unlikely(r != 0))
		return r;
	if (mem_type)
		*mem_type = bo->tbo.mem.mem_type;
824 825

	r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
826 827 828
	ttm_bo_unreserve(&bo->tbo);
	return r;
}
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847

/**
 * radeon_bo_fence - add fence to buffer object
 *
 * @bo: buffer object in question
 * @fence: fence to add
 * @shared: true if fence should be added shared
 *
 */
void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
                     bool shared)
{
	struct reservation_object *resv = bo->tbo.resv;

	if (shared)
		reservation_object_add_shared_fence(resv, &fence->base);
	else
		reservation_object_add_excl_fence(resv, &fence->base);
}