radeon_object.c 22.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
#include <linux/list.h>
33
#include <linux/slab.h>
34
#include <drm/drmP.h>
35
#include <drm/radeon_drm.h>
36
#include "radeon.h"
37
#include "radeon_trace.h"
38 39 40 41


int radeon_ttm_init(struct radeon_device *rdev);
void radeon_ttm_fini(struct radeon_device *rdev);
42
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
43 44 45 46 47 48

/*
 * To exclude mutual BO access we rely on bo_reserve exclusion, as all
 * function are calling it.
 */

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
static void radeon_update_memory_usage(struct radeon_bo *bo,
				       unsigned mem_type, int sign)
{
	struct radeon_device *rdev = bo->rdev;
	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;

	switch (mem_type) {
	case TTM_PL_TT:
		if (sign > 0)
			atomic64_add(size, &rdev->gtt_usage);
		else
			atomic64_sub(size, &rdev->gtt_usage);
		break;
	case TTM_PL_VRAM:
		if (sign > 0)
			atomic64_add(size, &rdev->vram_usage);
		else
			atomic64_sub(size, &rdev->vram_usage);
		break;
	}
}

71
static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
72
{
73
	struct radeon_bo *bo;
74

75
	bo = container_of(tbo, struct radeon_bo, tbo);
76 77 78

	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);

79 80 81 82
	mutex_lock(&bo->rdev->gem.mutex);
	list_del_init(&bo->list);
	mutex_unlock(&bo->rdev->gem.mutex);
	radeon_bo_clear_surface_reg(bo);
83
	WARN_ON(!list_empty(&bo->va));
84
	drm_gem_object_release(&bo->gem_base);
85
	kfree(bo);
86 87
}

88 89 90 91 92 93 94
bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
{
	if (bo->destroy == &radeon_ttm_bo_destroy)
		return true;
	return false;
}

95 96
void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
{
97
	u32 c = 0, i;
98 99

	rbo->placement.placement = rbo->placements;
100
	rbo->placement.busy_placement = rbo->placements;
101 102 103 104 105 106 107 108 109 110 111 112 113 114
	if (domain & RADEON_GEM_DOMAIN_VRAM) {
		/* Try placing BOs which don't need CPU access outside of the
		 * CPU accessible part of VRAM
		 */
		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
			rbo->placements[c].fpfn =
				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
						     TTM_PL_FLAG_UNCACHED |
						     TTM_PL_FLAG_VRAM;
		}

		rbo->placements[c].fpfn = 0;
115 116 117
		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
					     TTM_PL_FLAG_UNCACHED |
					     TTM_PL_FLAG_VRAM;
118
	}
119

120
	if (domain & RADEON_GEM_DOMAIN_GTT) {
121
		if (rbo->flags & RADEON_GEM_GTT_UC) {
122
			rbo->placements[c].fpfn = 0;
123 124 125
			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_TT;

126 127
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
			   (rbo->rdev->flags & RADEON_IS_AGP)) {
128
			rbo->placements[c].fpfn = 0;
129 130
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED |
131
				TTM_PL_FLAG_TT;
132
		} else {
133
			rbo->placements[c].fpfn = 0;
134 135
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
						     TTM_PL_FLAG_TT;
136 137
		}
	}
138

139
	if (domain & RADEON_GEM_DOMAIN_CPU) {
140
		if (rbo->flags & RADEON_GEM_GTT_UC) {
141
			rbo->placements[c].fpfn = 0;
142 143 144
			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_SYSTEM;

145 146
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
		    rbo->rdev->flags & RADEON_IS_AGP) {
147
			rbo->placements[c].fpfn = 0;
148 149
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED |
150
				TTM_PL_FLAG_SYSTEM;
151
		} else {
152
			rbo->placements[c].fpfn = 0;
153 154
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
						     TTM_PL_FLAG_SYSTEM;
155 156
		}
	}
157 158
	if (!c) {
		rbo->placements[c].fpfn = 0;
159 160
		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
					     TTM_PL_FLAG_SYSTEM;
161
	}
162

163 164
	rbo->placement.num_placement = c;
	rbo->placement.num_busy_placement = c;
165

166
	for (i = 0; i < c; ++i) {
167
		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
168 169
		    (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    !rbo->placements[i].fpfn)
170 171 172 173
			rbo->placements[i].lpfn =
				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
		else
			rbo->placements[i].lpfn = 0;
174
	}
175 176
}

177
int radeon_bo_create(struct radeon_device *rdev,
178 179 180 181
		     unsigned long size, int byte_align, bool kernel,
		     u32 domain, u32 flags, struct sg_table *sg,
		     struct reservation_object *resv,
		     struct radeon_bo **bo_ptr)
182
{
183
	struct radeon_bo *bo;
184
	enum ttm_bo_type type;
185
	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
186
	size_t acc_size;
187 188
	int r;

189 190
	size = ALIGN(size, PAGE_SIZE);

191 192
	if (kernel) {
		type = ttm_bo_type_kernel;
193 194
	} else if (sg) {
		type = ttm_bo_type_sg;
195 196 197
	} else {
		type = ttm_bo_type_device;
	}
198
	*bo_ptr = NULL;
199

200 201 202
	acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
				       sizeof(struct radeon_bo));

203 204
	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
	if (bo == NULL)
205
		return -ENOMEM;
206 207 208 209 210
	r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);
	if (unlikely(r)) {
		kfree(bo);
		return r;
	}
211 212 213
	bo->rdev = rdev;
	bo->surface_reg = -1;
	INIT_LIST_HEAD(&bo->list);
214
	INIT_LIST_HEAD(&bo->va);
215 216 217
	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
	                               RADEON_GEM_DOMAIN_GTT |
	                               RADEON_GEM_DOMAIN_CPU);
218 219 220 221 222 223

	bo->flags = flags;
	/* PCI GART is always snooped */
	if (!(rdev->flags & RADEON_IS_PCIE))
		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);

224 225 226 227 228
#ifdef CONFIG_X86_32
	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
	 */
	bo->flags &= ~RADEON_GEM_GTT_WC;
229 230 231 232 233 234 235 236 237 238 239 240
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
	/* Don't try to enable write-combining when it can't work, or things
	 * may be slow
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
	 */

#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
	 thanks to write-combining

	DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
		      "better performance thanks to write-combining\n");
	bo->flags &= ~RADEON_GEM_GTT_WC;
241 242
#endif

243
	radeon_ttm_placement_from_domain(bo, domain);
244
	/* Kernel allocation are uninterruptible */
245
	down_read(&rdev->pm.mclk_lock);
246
	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
247
			&bo->placement, page_align, !kernel, NULL,
248
			acc_size, sg, resv, &radeon_ttm_bo_destroy);
249
	up_read(&rdev->pm.mclk_lock);
250 251 252
	if (unlikely(r != 0)) {
		return r;
	}
253
	*bo_ptr = bo;
254

255
	trace_radeon_bo_create(bo);
256

257 258 259
	return 0;
}

260
int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
261
{
262
	bool is_iomem;
263 264
	int r;

265
	if (bo->kptr) {
266
		if (ptr) {
267
			*ptr = bo->kptr;
268 269 270
		}
		return 0;
	}
271
	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
272 273 274
	if (r) {
		return r;
	}
275
	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
276
	if (ptr) {
277
		*ptr = bo->kptr;
278
	}
279
	radeon_bo_check_tiling(bo, 0, 0);
280 281 282
	return 0;
}

283
void radeon_bo_kunmap(struct radeon_bo *bo)
284
{
285
	if (bo->kptr == NULL)
286
		return;
287 288 289
	bo->kptr = NULL;
	radeon_bo_check_tiling(bo, 0, 0);
	ttm_bo_kunmap(&bo->kmap);
290 291
}

292 293 294 295 296 297 298 299 300
struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
{
	if (bo == NULL)
		return NULL;

	ttm_bo_reference(&bo->tbo);
	return bo;
}

301
void radeon_bo_unref(struct radeon_bo **bo)
302
{
303
	struct ttm_buffer_object *tbo;
304
	struct radeon_device *rdev;
305

306
	if ((*bo) == NULL)
307
		return;
308
	rdev = (*bo)->rdev;
309 310 311 312
	tbo = &((*bo)->tbo);
	ttm_bo_unref(&tbo);
	if (tbo == NULL)
		*bo = NULL;
313 314
}

315 316
int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
			     u64 *gpu_addr)
317
{
318
	int r, i;
319

320 321 322
	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
		return -EPERM;

323 324 325 326
	if (bo->pin_count) {
		bo->pin_count++;
		if (gpu_addr)
			*gpu_addr = radeon_bo_gpu_offset(bo);
327 328 329 330 331 332 333 334

		if (max_offset != 0) {
			u64 domain_start;

			if (domain == RADEON_GEM_DOMAIN_VRAM)
				domain_start = bo->rdev->mc.vram_start;
			else
				domain_start = bo->rdev->mc.gtt_start;
335 336
			WARN_ON_ONCE(max_offset <
				     (radeon_bo_gpu_offset(bo) - domain_start));
337 338
		}

339 340
		return 0;
	}
341
	radeon_ttm_placement_from_domain(bo, domain);
342
	for (i = 0; i < bo->placement.num_placement; i++) {
343
		/* force to pin into visible video ram */
344
		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
345
		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
346 347 348
		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
			bo->placements[i].lpfn =
				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
349
		else
350
			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
351

352
		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
353
	}
354

355
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
356 357 358 359
	if (likely(r == 0)) {
		bo->pin_count = 1;
		if (gpu_addr != NULL)
			*gpu_addr = radeon_bo_gpu_offset(bo);
360 361 362 363 364
		if (domain == RADEON_GEM_DOMAIN_VRAM)
			bo->rdev->vram_pin_size += radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size += radeon_bo_size(bo);
	} else {
365
		dev_err(bo->rdev->dev, "%p pin failed\n", bo);
366
	}
367 368
	return r;
}
369 370 371 372 373

int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
{
	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
}
374

375
int radeon_bo_unpin(struct radeon_bo *bo)
376
{
377
	int r, i;
378

379 380 381
	if (!bo->pin_count) {
		dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
		return 0;
382
	}
383 384 385
	bo->pin_count--;
	if (bo->pin_count)
		return 0;
386 387 388 389
	for (i = 0; i < bo->placement.num_placement; i++) {
		bo->placements[i].lpfn = 0;
		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
	}
390
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
391 392 393 394 395 396
	if (likely(r == 0)) {
		if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
			bo->rdev->vram_pin_size -= radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size -= radeon_bo_size(bo);
	} else {
397
		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
398
	}
399
	return r;
400 401
}

402
int radeon_bo_evict_vram(struct radeon_device *rdev)
403
{
404 405
	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
	if (0 && (rdev->flags & RADEON_IS_IGP)) {
406 407 408
		if (rdev->mc.igp_sideport_enabled == false)
			/* Useless to evict on IGP chips */
			return 0;
409 410 411 412
	}
	return ttm_bo_evict_mm(&rdev->mman.bdev, TTM_PL_VRAM);
}

413
void radeon_bo_force_delete(struct radeon_device *rdev)
414
{
415
	struct radeon_bo *bo, *n;
416 417 418 419

	if (list_empty(&rdev->gem.objects)) {
		return;
	}
420 421 422
	dev_err(rdev->dev, "Userspace still has active objects !\n");
	list_for_each_entry_safe(bo, n, &rdev->gem.objects, list) {
		dev_err(rdev->dev, "%p %p %lu %lu force free\n",
423 424
			&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
			*((unsigned long *)&bo->gem_base.refcount));
425 426 427
		mutex_lock(&bo->rdev->gem.mutex);
		list_del_init(&bo->list);
		mutex_unlock(&bo->rdev->gem.mutex);
428
		/* this should unref the ttm bo */
429
		drm_gem_object_unreference_unlocked(&bo->gem_base);
430 431 432
	}
}

433
int radeon_bo_init(struct radeon_device *rdev)
434
{
435
	/* Add an MTRR for the VRAM */
436
	if (!rdev->fastfb_working) {
437 438
		rdev->mc.vram_mtrr = arch_phys_wc_add(rdev->mc.aper_base,
						      rdev->mc.aper_size);
439
	}
440 441 442 443 444
	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
		rdev->mc.mc_vram_size >> 20,
		(unsigned long long)rdev->mc.aper_size >> 20);
	DRM_INFO("RAM width %dbits %cDR\n",
			rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
445 446 447
	return radeon_ttm_init(rdev);
}

448
void radeon_bo_fini(struct radeon_device *rdev)
449 450
{
	radeon_ttm_fini(rdev);
451
	arch_phys_wc_del(rdev->mc.vram_mtrr);
452 453
}

454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
/* Returns how many bytes TTM can move per IB.
 */
static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
{
	u64 real_vram_size = rdev->mc.real_vram_size;
	u64 vram_usage = atomic64_read(&rdev->vram_usage);

	/* This function is based on the current VRAM usage.
	 *
	 * - If all of VRAM is free, allow relocating the number of bytes that
	 *   is equal to 1/4 of the size of VRAM for this IB.

	 * - If more than one half of VRAM is occupied, only allow relocating
	 *   1 MB of data for this IB.
	 *
	 * - From 0 to one half of used VRAM, the threshold decreases
	 *   linearly.
	 *         __________________
	 * 1/4 of -|\               |
	 * VRAM    | \              |
	 *         |  \             |
	 *         |   \            |
	 *         |    \           |
	 *         |     \          |
	 *         |      \         |
	 *         |       \________|1 MB
	 *         |----------------|
	 *    VRAM 0 %             100 %
	 *         used            used
	 *
	 * Note: It's a threshold, not a limit. The threshold must be crossed
	 * for buffer relocations to stop, so any buffer of an arbitrary size
	 * can be moved as long as the threshold isn't crossed before
	 * the relocation takes place. We don't want to disable buffer
	 * relocations completely.
	 *
	 * The idea is that buffers should be placed in VRAM at creation time
	 * and TTM should only do a minimum number of relocations during
	 * command submission. In practice, you need to submit at least
	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
	 *
	 * Also, things can get pretty crazy under memory pressure and actual
	 * VRAM usage can change a lot, so playing safe even at 50% does
	 * consistently increase performance.
	 */

	u64 half_vram = real_vram_size >> 1;
	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
	u64 bytes_moved_threshold = half_free_vram >> 1;
	return max(bytes_moved_threshold, 1024*1024ull);
}

int radeon_bo_list_validate(struct radeon_device *rdev,
			    struct ww_acquire_ctx *ticket,
508
			    struct list_head *head, int ring)
509
{
510
	struct radeon_bo_list *lobj;
511
	struct list_head duplicates;
512
	int r;
513 514
	u64 bytes_moved = 0, initial_bytes_moved;
	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
515

516 517
	INIT_LIST_HEAD(&duplicates);
	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
518 519 520
	if (unlikely(r != 0)) {
		return r;
	}
521

522
	list_for_each_entry(lobj, head, tv.head) {
523
		struct radeon_bo *bo = lobj->robj;
524
		if (!bo->pin_count) {
525
			u32 domain = lobj->prefered_domains;
526
			u32 allowed = lobj->allowed_domains;
527 528 529 530 531 532 533 534 535 536 537
			u32 current_domain =
				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);

			/* Check if this buffer will be moved and don't move it
			 * if we have moved too many buffers for this IB already.
			 *
			 * Note that this allows moving at least one buffer of
			 * any size, because it doesn't take the current "bo"
			 * into account. We don't want to disallow buffer moves
			 * completely.
			 */
538
			if ((allowed & current_domain) != 0 &&
539 540 541 542 543 544
			    (domain & current_domain) == 0 && /* will be moved */
			    bytes_moved > bytes_moved_threshold) {
				/* don't move it */
				domain = current_domain;
			}

545 546
		retry:
			radeon_ttm_placement_from_domain(bo, domain);
C
Christian König 已提交
547
			if (ring == R600_RING_TYPE_UVD_INDEX)
548
				radeon_uvd_force_into_uvd_segment(bo, allowed);
549 550 551 552 553 554

			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
			bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
				       initial_bytes_moved;

555
			if (unlikely(r)) {
556 557 558
				if (r != -ERESTARTSYS &&
				    domain != lobj->allowed_domains) {
					domain = lobj->allowed_domains;
559 560
					goto retry;
				}
561
				ttm_eu_backoff_reservation(ticket, head);
562
				return r;
563
			}
564
		}
565 566
		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
		lobj->tiling_flags = bo->tiling_flags;
567
	}
568 569 570 571 572 573

	list_for_each_entry(lobj, &duplicates, tv.head) {
		lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
		lobj->tiling_flags = lobj->robj->tiling_flags;
	}

574 575 576
	return 0;
}

577
int radeon_bo_get_surface_reg(struct radeon_bo *bo)
578
{
579
	struct radeon_device *rdev = bo->rdev;
580
	struct radeon_surface_reg *reg;
581
	struct radeon_bo *old_object;
582 583 584
	int steal;
	int i;

585
	lockdep_assert_held(&bo->tbo.resv->lock.base);
586 587

	if (!bo->tiling_flags)
588 589
		return 0;

590 591 592
	if (bo->surface_reg >= 0) {
		reg = &rdev->surface_regs[bo->surface_reg];
		i = bo->surface_reg;
593 594 595 596 597 598 599
		goto out;
	}

	steal = -1;
	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {

		reg = &rdev->surface_regs[i];
600
		if (!reg->bo)
601 602
			break;

603
		old_object = reg->bo;
604 605 606 607 608 609 610 611 612 613
		if (old_object->pin_count == 0)
			steal = i;
	}

	/* if we are all out */
	if (i == RADEON_GEM_MAX_SURFACES) {
		if (steal == -1)
			return -ENOMEM;
		/* find someone with a surface reg and nuke their BO */
		reg = &rdev->surface_regs[steal];
614
		old_object = reg->bo;
615 616
		/* blow away the mapping */
		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
617
		ttm_bo_unmap_virtual(&old_object->tbo);
618 619 620 621
		old_object->surface_reg = -1;
		i = steal;
	}

622 623
	bo->surface_reg = i;
	reg->bo = bo;
624 625

out:
626
	radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
627
			       bo->tbo.mem.start << PAGE_SHIFT,
628
			       bo->tbo.num_pages << PAGE_SHIFT);
629 630 631
	return 0;
}

632
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
633
{
634
	struct radeon_device *rdev = bo->rdev;
635 636
	struct radeon_surface_reg *reg;

637
	if (bo->surface_reg == -1)
638 639
		return;

640 641
	reg = &rdev->surface_regs[bo->surface_reg];
	radeon_clear_surface_reg(rdev, bo->surface_reg);
642

643 644
	reg->bo = NULL;
	bo->surface_reg = -1;
645 646
}

647 648
int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
				uint32_t tiling_flags, uint32_t pitch)
649
{
650
	struct radeon_device *rdev = bo->rdev;
651 652
	int r;

653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697
	if (rdev->family >= CHIP_CEDAR) {
		unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;

		bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
		bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
		mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
		tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
		stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
		switch (bankw) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (bankh) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (mtaspect) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		if (tilesplit > 6) {
			return -EINVAL;
		}
		if (stilesplit > 6) {
			return -EINVAL;
		}
	}
698 699 700 701 702 703 704
	r = radeon_bo_reserve(bo, false);
	if (unlikely(r != 0))
		return r;
	bo->tiling_flags = tiling_flags;
	bo->pitch = pitch;
	radeon_bo_unreserve(bo);
	return 0;
705 706
}

707 708 709
void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
				uint32_t *tiling_flags,
				uint32_t *pitch)
710
{
711 712
	lockdep_assert_held(&bo->tbo.resv->lock.base);

713
	if (tiling_flags)
714
		*tiling_flags = bo->tiling_flags;
715
	if (pitch)
716
		*pitch = bo->pitch;
717 718
}

719 720
int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
				bool force_drop)
721
{
722 723
	if (!force_drop)
		lockdep_assert_held(&bo->tbo.resv->lock.base);
724 725

	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
726 727 728
		return 0;

	if (force_drop) {
729
		radeon_bo_clear_surface_reg(bo);
730 731 732
		return 0;
	}

733
	if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
734 735 736
		if (!has_moved)
			return 0;

737 738
		if (bo->surface_reg >= 0)
			radeon_bo_clear_surface_reg(bo);
739 740 741
		return 0;
	}

742
	if ((bo->surface_reg >= 0) && !has_moved)
743 744
		return 0;

745
	return radeon_bo_get_surface_reg(bo);
746 747 748
}

void radeon_bo_move_notify(struct ttm_buffer_object *bo,
749
			   struct ttm_mem_reg *new_mem)
750
{
751
	struct radeon_bo *rbo;
752

753 754
	if (!radeon_ttm_bo_is_radeon_bo(bo))
		return;
755

756
	rbo = container_of(bo, struct radeon_bo, tbo);
757
	radeon_bo_check_tiling(rbo, 0, 1);
758
	radeon_vm_bo_invalidate(rbo->rdev, rbo);
759 760 761 762 763 764 765

	/* update statistics */
	if (!new_mem)
		return;

	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
766 767
}

768
int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
769
{
770
	struct radeon_device *rdev;
771
	struct radeon_bo *rbo;
772 773
	unsigned long offset, size, lpfn;
	int i, r;
774

775
	if (!radeon_ttm_bo_is_radeon_bo(bo))
776
		return 0;
777
	rbo = container_of(bo, struct radeon_bo, tbo);
778
	radeon_bo_check_tiling(rbo, 0, 0);
779
	rdev = rbo->rdev;
780 781 782 783 784 785 786 787 788 789
	if (bo->mem.mem_type != TTM_PL_VRAM)
		return 0;

	size = bo->mem.num_pages << PAGE_SHIFT;
	offset = bo->mem.start << PAGE_SHIFT;
	if ((offset + size) <= rdev->mc.visible_vram_size)
		return 0;

	/* hurrah the memory is not visible ! */
	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
790 791 792 793 794 795 796
	lpfn =	rdev->mc.visible_vram_size >> PAGE_SHIFT;
	for (i = 0; i < rbo->placement.num_placement; i++) {
		/* Force into visible VRAM */
		if ((rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    (!rbo->placements[i].lpfn || rbo->placements[i].lpfn > lpfn))
			rbo->placements[i].lpfn = lpfn;
	}
797 798 799 800 801 802
	r = ttm_bo_validate(bo, &rbo->placement, false, false);
	if (unlikely(r == -ENOMEM)) {
		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
		return ttm_bo_validate(bo, &rbo->placement, false, false);
	} else if (unlikely(r != 0)) {
		return r;
803
	}
804 805 806 807 808 809

	offset = bo->mem.start << PAGE_SHIFT;
	/* this should never happen */
	if ((offset + size) > rdev->mc.visible_vram_size)
		return -EINVAL;

810
	return 0;
811
}
812

813
int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
814 815 816
{
	int r;

817
	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
818 819 820 821
	if (unlikely(r != 0))
		return r;
	if (mem_type)
		*mem_type = bo->tbo.mem.mem_type;
822 823

	r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
824 825 826
	ttm_bo_unreserve(&bo->tbo);
	return r;
}
827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845

/**
 * radeon_bo_fence - add fence to buffer object
 *
 * @bo: buffer object in question
 * @fence: fence to add
 * @shared: true if fence should be added shared
 *
 */
void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
                     bool shared)
{
	struct reservation_object *resv = bo->tbo.resv;

	if (shared)
		reservation_object_add_shared_fence(resv, &fence->base);
	else
		reservation_object_add_excl_fence(resv, &fence->base);
}