radeon_ttm.c 29.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Thomas Hellstrom <thomas-at-tungstengraphics-dot-com>
 *    Dave Airlie
 */
#include <ttm/ttm_bo_api.h>
#include <ttm/ttm_bo_driver.h>
#include <ttm/ttm_placement.h>
#include <ttm/ttm_module.h>
36
#include <ttm/ttm_page_alloc.h>
37 38
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
39
#include <linux/seq_file.h>
40
#include <linux/slab.h>
41
#include <linux/swiotlb.h>
42 43
#include <linux/swap.h>
#include <linux/pagemap.h>
44
#include <linux/debugfs.h>
45 46 47 48 49
#include "radeon_reg.h"
#include "radeon.h"

#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)

50
static int radeon_ttm_debugfs_init(struct radeon_device *rdev);
51
static void radeon_ttm_debugfs_fini(struct radeon_device *rdev);
52

53 54 55 56 57 58 59 60 61 62 63 64 65 66
static struct radeon_device *radeon_get_rdev(struct ttm_bo_device *bdev)
{
	struct radeon_mman *mman;
	struct radeon_device *rdev;

	mman = container_of(bdev, struct radeon_mman, bdev);
	rdev = container_of(mman, struct radeon_device, mman);
	return rdev;
}


/*
 * Global memory.
 */
67
static int radeon_ttm_mem_global_init(struct drm_global_reference *ref)
68 69 70 71
{
	return ttm_mem_global_init(ref->object);
}

72
static void radeon_ttm_mem_global_release(struct drm_global_reference *ref)
73 74 75 76 77 78
{
	ttm_mem_global_release(ref->object);
}

static int radeon_ttm_global_init(struct radeon_device *rdev)
{
79
	struct drm_global_reference *global_ref;
80 81 82 83
	int r;

	rdev->mman.mem_global_referenced = false;
	global_ref = &rdev->mman.mem_global_ref;
84
	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
85 86 87
	global_ref->size = sizeof(struct ttm_mem_global);
	global_ref->init = &radeon_ttm_mem_global_init;
	global_ref->release = &radeon_ttm_mem_global_release;
88
	r = drm_global_item_ref(global_ref);
89
	if (r != 0) {
90 91
		DRM_ERROR("Failed setting up TTM memory accounting "
			  "subsystem.\n");
92 93
		return r;
	}
94 95 96 97

	rdev->mman.bo_global_ref.mem_glob =
		rdev->mman.mem_global_ref.object;
	global_ref = &rdev->mman.bo_global_ref.ref;
98
	global_ref->global_type = DRM_GLOBAL_TTM_BO;
99
	global_ref->size = sizeof(struct ttm_bo_global);
100 101
	global_ref->init = &ttm_bo_global_init;
	global_ref->release = &ttm_bo_global_release;
102
	r = drm_global_item_ref(global_ref);
103 104
	if (r != 0) {
		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
105
		drm_global_item_unref(&rdev->mman.mem_global_ref);
106 107 108
		return r;
	}

109 110 111 112 113 114 115
	rdev->mman.mem_global_referenced = true;
	return 0;
}

static void radeon_ttm_global_fini(struct radeon_device *rdev)
{
	if (rdev->mman.mem_global_referenced) {
116 117
		drm_global_item_unref(&rdev->mman.bo_global_ref.ref);
		drm_global_item_unref(&rdev->mman.mem_global_ref);
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
		rdev->mman.mem_global_referenced = false;
	}
}

static int radeon_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
{
	return 0;
}

static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
				struct ttm_mem_type_manager *man)
{
	struct radeon_device *rdev;

	rdev = radeon_get_rdev(bdev);

	switch (type) {
	case TTM_PL_SYSTEM:
		/* System memory */
		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
		man->available_caching = TTM_PL_MASK_CACHING;
		man->default_caching = TTM_PL_FLAG_CACHED;
		break;
	case TTM_PL_TT:
142
		man->func = &ttm_bo_manager_func;
143
		man->gpu_offset = rdev->mc.gtt_start;
144 145
		man->available_caching = TTM_PL_MASK_CACHING;
		man->default_caching = TTM_PL_FLAG_CACHED;
146
		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
147 148
#if __OS_HAS_AGP
		if (rdev->flags & RADEON_IS_AGP) {
D
Daniel Vetter 已提交
149
			if (!rdev->ddev->agp) {
150 151 152 153
				DRM_ERROR("AGP is not enabled for memory type %u\n",
					  (unsigned)type);
				return -EINVAL;
			}
154
			if (!rdev->ddev->agp->cant_use_aperture)
155
				man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
156 157 158 159
			man->available_caching = TTM_PL_FLAG_UNCACHED |
						 TTM_PL_FLAG_WC;
			man->default_caching = TTM_PL_FLAG_WC;
		}
160
#endif
161 162 163
		break;
	case TTM_PL_VRAM:
		/* "On-card" video ram */
164
		man->func = &ttm_bo_manager_func;
165
		man->gpu_offset = rdev->mc.vram_start;
166 167 168 169 170 171 172 173 174 175 176 177
		man->flags = TTM_MEMTYPE_FLAG_FIXED |
			     TTM_MEMTYPE_FLAG_MAPPABLE;
		man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
		man->default_caching = TTM_PL_FLAG_WC;
		break;
	default:
		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
		return -EINVAL;
	}
	return 0;
}

178 179
static void radeon_evict_flags(struct ttm_buffer_object *bo,
				struct ttm_placement *placement)
180
{
181 182 183 184 185 186
	static struct ttm_place placements = {
		.fpfn = 0,
		.lpfn = 0,
		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
	};

187 188 189 190 191 192 193 194 195 196
	struct radeon_bo *rbo;

	if (!radeon_ttm_bo_is_radeon_bo(bo)) {
		placement->placement = &placements;
		placement->busy_placement = &placements;
		placement->num_placement = 1;
		placement->num_busy_placement = 1;
		return;
	}
	rbo = container_of(bo, struct radeon_bo, tbo);
197
	switch (bo->mem.mem_type) {
198
	case TTM_PL_VRAM:
199
		if (rbo->rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready == false)
200 201 202
			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
		else
			radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
203 204
		break;
	case TTM_PL_TT:
205
	default:
206
		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU);
207
	}
208
	*placement = rbo->placement;
209 210 211 212
}

static int radeon_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
213 214 215
	struct radeon_bo *rbo = container_of(bo, struct radeon_bo, tbo);

	return drm_vma_node_verify_access(&rbo->gem_base.vma_node, filp);
216 217 218 219 220 221 222 223 224 225 226 227 228
}

static void radeon_move_null(struct ttm_buffer_object *bo,
			     struct ttm_mem_reg *new_mem)
{
	struct ttm_mem_reg *old_mem = &bo->mem;

	BUG_ON(old_mem->mm_node != NULL);
	*old_mem = *new_mem;
	new_mem->mm_node = NULL;
}

static int radeon_move_blit(struct ttm_buffer_object *bo,
229
			bool evict, bool no_wait_gpu,
230 231
			struct ttm_mem_reg *new_mem,
			struct ttm_mem_reg *old_mem)
232 233 234
{
	struct radeon_device *rdev;
	uint64_t old_start, new_start;
235 236
	struct radeon_fence *fence;
	int r, ridx;
237 238

	rdev = radeon_get_rdev(bo->bdev);
239
	ridx = radeon_copy_ring_index(rdev);
240 241
	old_start = old_mem->start << PAGE_SHIFT;
	new_start = new_mem->start << PAGE_SHIFT;
242 243 244

	switch (old_mem->mem_type) {
	case TTM_PL_VRAM:
245
		old_start += rdev->mc.vram_start;
246 247
		break;
	case TTM_PL_TT:
248
		old_start += rdev->mc.gtt_start;
249 250 251 252 253 254 255
		break;
	default:
		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
		return -EINVAL;
	}
	switch (new_mem->mem_type) {
	case TTM_PL_VRAM:
256
		new_start += rdev->mc.vram_start;
257 258
		break;
	case TTM_PL_TT:
259
		new_start += rdev->mc.gtt_start;
260 261 262 263 264
		break;
	default:
		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
		return -EINVAL;
	}
265
	if (!rdev->ring[ridx].ready) {
266
		DRM_ERROR("Trying to move memory with ring turned off.\n");
267 268
		return -EINVAL;
	}
269 270 271

	BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);

272
	/* sync other rings */
273
	fence = bo->sync_obj;
274 275
	r = radeon_copy(rdev, old_start, new_start,
			new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
276
			&fence);
277
	/* FIXME: handle copy error */
278
	r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
279
				      evict, no_wait_gpu, new_mem);
280 281 282 283 284
	radeon_fence_unref(&fence);
	return r;
}

static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
285
				bool evict, bool interruptible,
286
				bool no_wait_gpu,
287 288 289 290 291
				struct ttm_mem_reg *new_mem)
{
	struct radeon_device *rdev;
	struct ttm_mem_reg *old_mem = &bo->mem;
	struct ttm_mem_reg tmp_mem;
292
	struct ttm_place placements;
293
	struct ttm_placement placement;
294 295 296 297 298
	int r;

	rdev = radeon_get_rdev(bo->bdev);
	tmp_mem = *new_mem;
	tmp_mem.mm_node = NULL;
299 300 301 302
	placement.num_placement = 1;
	placement.placement = &placements;
	placement.num_busy_placement = 1;
	placement.busy_placement = &placements;
303 304 305
	placements.fpfn = 0;
	placements.lpfn = 0;
	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
306
	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
307
			     interruptible, no_wait_gpu);
308 309 310
	if (unlikely(r)) {
		return r;
	}
311 312 313 314 315 316

	r = ttm_tt_set_placement_caching(bo->ttm, tmp_mem.placement);
	if (unlikely(r)) {
		goto out_cleanup;
	}

317 318 319 320
	r = ttm_tt_bind(bo->ttm, &tmp_mem);
	if (unlikely(r)) {
		goto out_cleanup;
	}
321
	r = radeon_move_blit(bo, true, no_wait_gpu, &tmp_mem, old_mem);
322 323 324
	if (unlikely(r)) {
		goto out_cleanup;
	}
325
	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, new_mem);
326
out_cleanup:
327
	ttm_bo_mem_put(bo, &tmp_mem);
328 329 330 331
	return r;
}

static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
332
				bool evict, bool interruptible,
333
				bool no_wait_gpu,
334 335 336 337 338
				struct ttm_mem_reg *new_mem)
{
	struct radeon_device *rdev;
	struct ttm_mem_reg *old_mem = &bo->mem;
	struct ttm_mem_reg tmp_mem;
339
	struct ttm_placement placement;
340
	struct ttm_place placements;
341 342 343 344 345
	int r;

	rdev = radeon_get_rdev(bo->bdev);
	tmp_mem = *new_mem;
	tmp_mem.mm_node = NULL;
346 347 348 349
	placement.num_placement = 1;
	placement.placement = &placements;
	placement.num_busy_placement = 1;
	placement.busy_placement = &placements;
350 351 352
	placements.fpfn = 0;
	placements.lpfn = 0;
	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
353 354
	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
			     interruptible, no_wait_gpu);
355 356 357
	if (unlikely(r)) {
		return r;
	}
358
	r = ttm_bo_move_ttm(bo, true, no_wait_gpu, &tmp_mem);
359 360 361
	if (unlikely(r)) {
		goto out_cleanup;
	}
362
	r = radeon_move_blit(bo, true, no_wait_gpu, new_mem, old_mem);
363 364 365 366
	if (unlikely(r)) {
		goto out_cleanup;
	}
out_cleanup:
367
	ttm_bo_mem_put(bo, &tmp_mem);
368 369 370 371
	return r;
}

static int radeon_bo_move(struct ttm_buffer_object *bo,
372
			bool evict, bool interruptible,
373
			bool no_wait_gpu,
374
			struct ttm_mem_reg *new_mem)
375 376 377 378 379 380 381 382 383 384 385 386 387 388
{
	struct radeon_device *rdev;
	struct ttm_mem_reg *old_mem = &bo->mem;
	int r;

	rdev = radeon_get_rdev(bo->bdev);
	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
		radeon_move_null(bo, new_mem);
		return 0;
	}
	if ((old_mem->mem_type == TTM_PL_TT &&
	     new_mem->mem_type == TTM_PL_SYSTEM) ||
	    (old_mem->mem_type == TTM_PL_SYSTEM &&
	     new_mem->mem_type == TTM_PL_TT)) {
389
		/* bind is enough */
390 391 392
		radeon_move_null(bo, new_mem);
		return 0;
	}
393 394
	if (!rdev->ring[radeon_copy_ring_index(rdev)].ready ||
	    rdev->asic->copy.copy == NULL) {
395
		/* use memcpy */
396
		goto memcpy;
397 398 399 400
	}

	if (old_mem->mem_type == TTM_PL_VRAM &&
	    new_mem->mem_type == TTM_PL_SYSTEM) {
401
		r = radeon_move_vram_ram(bo, evict, interruptible,
402
					no_wait_gpu, new_mem);
403 404
	} else if (old_mem->mem_type == TTM_PL_SYSTEM &&
		   new_mem->mem_type == TTM_PL_VRAM) {
405
		r = radeon_move_ram_vram(bo, evict, interruptible,
406
					    no_wait_gpu, new_mem);
407
	} else {
408
		r = radeon_move_blit(bo, evict, no_wait_gpu, new_mem, old_mem);
409
	}
410 411 412

	if (r) {
memcpy:
413
		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
414 415 416
		if (r) {
			return r;
		}
417
	}
418 419 420 421

	/* update statistics */
	atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &rdev->num_bytes_moved);
	return 0;
422 423
}

424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
{
	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
	struct radeon_device *rdev = radeon_get_rdev(bdev);

	mem->bus.addr = NULL;
	mem->bus.offset = 0;
	mem->bus.size = mem->num_pages << PAGE_SHIFT;
	mem->bus.base = 0;
	mem->bus.is_iomem = false;
	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
		return -EINVAL;
	switch (mem->mem_type) {
	case TTM_PL_SYSTEM:
		/* system memory */
		return 0;
	case TTM_PL_TT:
#if __OS_HAS_AGP
		if (rdev->flags & RADEON_IS_AGP) {
			/* RADEON_IS_AGP is set only if AGP is active */
444
			mem->bus.offset = mem->start << PAGE_SHIFT;
445
			mem->bus.base = rdev->mc.agp_base;
446
			mem->bus.is_iomem = !rdev->ddev->agp->cant_use_aperture;
447 448 449 450
		}
#endif
		break;
	case TTM_PL_VRAM:
451
		mem->bus.offset = mem->start << PAGE_SHIFT;
452 453 454 455 456
		/* check if it's visible */
		if ((mem->bus.offset + mem->bus.size) > rdev->mc.visible_vram_size)
			return -EINVAL;
		mem->bus.base = rdev->mc.aper_base;
		mem->bus.is_iomem = true;
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
#ifdef __alpha__
		/*
		 * Alpha: use bus.addr to hold the ioremap() return,
		 * so we can modify bus.base below.
		 */
		if (mem->placement & TTM_PL_FLAG_WC)
			mem->bus.addr =
				ioremap_wc(mem->bus.base + mem->bus.offset,
					   mem->bus.size);
		else
			mem->bus.addr =
				ioremap_nocache(mem->bus.base + mem->bus.offset,
						mem->bus.size);

		/*
		 * Alpha: Use just the bus offset plus
		 * the hose/domain memory base for bus.base.
		 * It then can be used to build PTEs for VRAM
		 * access, as done in ttm_bo_vm_fault().
		 */
		mem->bus.base = (mem->bus.base & 0x0ffffffffUL) +
			rdev->ddev->hose->dense_mem_base;
#endif
480 481 482 483 484 485 486 487 488 489 490
		break;
	default:
		return -EINVAL;
	}
	return 0;
}

static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
{
}

491
static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
492 493 494 495
{
	return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
}

496
static int radeon_sync_obj_flush(void *sync_obj)
497 498 499 500 501 502 503 504 505 506 507 508 509 510
{
	return 0;
}

static void radeon_sync_obj_unref(void **sync_obj)
{
	radeon_fence_unref((struct radeon_fence **)sync_obj);
}

static void *radeon_sync_obj_ref(void *sync_obj)
{
	return radeon_fence_ref((struct radeon_fence *)sync_obj);
}

511
static bool radeon_sync_obj_signaled(void *sync_obj)
512 513 514 515
{
	return radeon_fence_signaled((struct radeon_fence *)sync_obj);
}

516 517 518 519
/*
 * TTM backend functions.
 */
struct radeon_ttm_tt {
520
	struct ttm_dma_tt		ttm;
521 522
	struct radeon_device		*rdev;
	u64				offset;
523 524 525 526

	uint64_t			userptr;
	struct mm_struct		*usermm;
	uint32_t			userflags;
527 528
};

529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
/* prepare the sg table with the user pages */
static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{
	struct radeon_device *rdev = radeon_get_rdev(ttm->bdev);
	struct radeon_ttm_tt *gtt = (void *)ttm;
	unsigned pinned = 0, nents;
	int r;

	int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;

	if (current->mm != gtt->usermm)
		return -EPERM;

544 545 546 547 548 549 550 551 552 553
	if (gtt->userflags & RADEON_GEM_USERPTR_ANONONLY) {
		/* check that we only pin down anonymous memory
		   to prevent problems with writeback */
		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
		struct vm_area_struct *vma;
		vma = find_vma(gtt->usermm, gtt->userptr);
		if (!vma || vma->vm_file || vma->vm_end < end)
			return -EPERM;
	}

554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
	do {
		unsigned num_pages = ttm->num_pages - pinned;
		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
		struct page **pages = ttm->pages + pinned;

		r = get_user_pages(current, current->mm, userptr, num_pages,
				   write, 0, pages, NULL);
		if (r < 0)
			goto release_pages;

		pinned += r;

	} while (pinned < ttm->num_pages);

	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
				      ttm->num_pages << PAGE_SHIFT,
				      GFP_KERNEL);
	if (r)
		goto release_sg;

	r = -ENOMEM;
	nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
	if (nents != ttm->sg->nents)
		goto release_sg;

	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
					 gtt->ttm.dma_address, ttm->num_pages);

	return 0;

release_sg:
	kfree(ttm->sg);

release_pages:
	release_pages(ttm->pages, pinned, 0);
	return r;
}

static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
{
	struct radeon_device *rdev = radeon_get_rdev(ttm->bdev);
	struct radeon_ttm_tt *gtt = (void *)ttm;
	struct scatterlist *sg;
	int i;

	int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
	enum dma_data_direction direction = write ?
		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;

	/* free the sg table and pages again */
	dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);

	for_each_sg(ttm->sg->sgl, sg, ttm->sg->nents, i) {
		struct page *page = sg_page(sg);

		if (!(gtt->userflags & RADEON_GEM_USERPTR_READONLY))
			set_page_dirty(page);

		mark_page_accessed(page);
		page_cache_release(page);
	}

	sg_free_table(ttm->sg);
}

619 620 621
static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
				   struct ttm_mem_reg *bo_mem)
{
622
	struct radeon_ttm_tt *gtt = (void*)ttm;
623 624
	uint32_t flags = RADEON_GART_PAGE_VALID | RADEON_GART_PAGE_READ |
		RADEON_GART_PAGE_WRITE;
625 626
	int r;

627 628 629 630 631
	if (gtt->userptr) {
		radeon_ttm_tt_pin_userptr(ttm);
		flags &= ~RADEON_GART_PAGE_WRITE;
	}

632 633 634 635 636
	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
	if (!ttm->num_pages) {
		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
		     ttm->num_pages, bo_mem, ttm);
	}
637 638 639 640
	if (ttm->caching_state == tt_cached)
		flags |= RADEON_GART_PAGE_SNOOP;
	r = radeon_gart_bind(gtt->rdev, gtt->offset, ttm->num_pages,
			     ttm->pages, gtt->ttm.dma_address, flags);
641 642 643 644 645 646 647 648 649 650
	if (r) {
		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
			  ttm->num_pages, (unsigned)gtt->offset);
		return r;
	}
	return 0;
}

static int radeon_ttm_backend_unbind(struct ttm_tt *ttm)
{
651
	struct radeon_ttm_tt *gtt = (void *)ttm;
652 653

	radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages);
654 655 656 657

	if (gtt->userptr)
		radeon_ttm_tt_unpin_userptr(ttm);

658 659 660 661 662
	return 0;
}

static void radeon_ttm_backend_destroy(struct ttm_tt *ttm)
{
663
	struct radeon_ttm_tt *gtt = (void *)ttm;
664

665
	ttm_dma_tt_fini(&gtt->ttm);
666 667 668 669 670 671 672 673 674
	kfree(gtt);
}

static struct ttm_backend_func radeon_backend_func = {
	.bind = &radeon_ttm_backend_bind,
	.unbind = &radeon_ttm_backend_unbind,
	.destroy = &radeon_ttm_backend_destroy,
};

675
static struct ttm_tt *radeon_ttm_tt_create(struct ttm_bo_device *bdev,
676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
				    unsigned long size, uint32_t page_flags,
				    struct page *dummy_read_page)
{
	struct radeon_device *rdev;
	struct radeon_ttm_tt *gtt;

	rdev = radeon_get_rdev(bdev);
#if __OS_HAS_AGP
	if (rdev->flags & RADEON_IS_AGP) {
		return ttm_agp_tt_create(bdev, rdev->ddev->agp->bridge,
					 size, page_flags, dummy_read_page);
	}
#endif

	gtt = kzalloc(sizeof(struct radeon_ttm_tt), GFP_KERNEL);
	if (gtt == NULL) {
		return NULL;
	}
694
	gtt->ttm.ttm.func = &radeon_backend_func;
695
	gtt->rdev = rdev;
696 697
	if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
		kfree(gtt);
698 699
		return NULL;
	}
700
	return &gtt->ttm.ttm;
701 702
}

703 704 705
static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
{
	struct radeon_device *rdev;
706
	struct radeon_ttm_tt *gtt = (void *)ttm;
707 708
	unsigned i;
	int r;
709
	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
710 711 712 713

	if (ttm->state != tt_unpopulated)
		return 0;

714 715 716 717 718 719 720 721 722 723
	if (gtt->userptr) {
		ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL);
		if (!ttm->sg)
			return -ENOMEM;

		ttm->page_flags |= TTM_PAGE_FLAG_SG;
		ttm->state = tt_unbound;
		return 0;
	}

724 725 726 727 728 729 730
	if (slave && ttm->sg) {
		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
						 gtt->ttm.dma_address, ttm->num_pages);
		ttm->state = tt_unbound;
		return 0;
	}

731
	rdev = radeon_get_rdev(ttm->bdev);
J
Jerome Glisse 已提交
732 733 734 735 736
#if __OS_HAS_AGP
	if (rdev->flags & RADEON_IS_AGP) {
		return ttm_agp_tt_populate(ttm);
	}
#endif
737 738 739

#ifdef CONFIG_SWIOTLB
	if (swiotlb_nr_tbl()) {
740
		return ttm_dma_populate(&gtt->ttm, rdev->dev);
741 742 743 744 745 746 747 748 749
	}
#endif

	r = ttm_pool_populate(ttm);
	if (r) {
		return r;
	}

	for (i = 0; i < ttm->num_pages; i++) {
750 751 752 753
		gtt->ttm.dma_address[i] = pci_map_page(rdev->pdev, ttm->pages[i],
						       0, PAGE_SIZE,
						       PCI_DMA_BIDIRECTIONAL);
		if (pci_dma_mapping_error(rdev->pdev, gtt->ttm.dma_address[i])) {
754
			while (--i) {
755
				pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i],
756
					       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
757
				gtt->ttm.dma_address[i] = 0;
758 759 760 761 762 763 764 765 766 767 768
			}
			ttm_pool_unpopulate(ttm);
			return -EFAULT;
		}
	}
	return 0;
}

static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm)
{
	struct radeon_device *rdev;
769
	struct radeon_ttm_tt *gtt = (void *)ttm;
770
	unsigned i;
771 772
	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);

773 774 775 776 777 778
	if (gtt->userptr) {
		kfree(ttm->sg);
		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
		return;
	}

779 780
	if (slave)
		return;
781 782

	rdev = radeon_get_rdev(ttm->bdev);
J
Jerome Glisse 已提交
783 784 785 786 787 788
#if __OS_HAS_AGP
	if (rdev->flags & RADEON_IS_AGP) {
		ttm_agp_tt_unpopulate(ttm);
		return;
	}
#endif
789 790 791

#ifdef CONFIG_SWIOTLB
	if (swiotlb_nr_tbl()) {
792
		ttm_dma_unpopulate(&gtt->ttm, rdev->dev);
793 794 795 796 797
		return;
	}
#endif

	for (i = 0; i < ttm->num_pages; i++) {
798 799
		if (gtt->ttm.dma_address[i]) {
			pci_unmap_page(rdev->pdev, gtt->ttm.dma_address[i],
800 801 802 803 804 805
				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
		}
	}

	ttm_pool_unpopulate(ttm);
}
806

807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
			      uint32_t flags)
{
	struct radeon_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
		return -EINVAL;

	gtt->userptr = addr;
	gtt->usermm = current->mm;
	gtt->userflags = flags;
	return 0;
}

bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm)
{
	struct radeon_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
		return false;

	return !!gtt->userptr;
}

bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
	struct radeon_ttm_tt *gtt = (void *)ttm;

	if (gtt == NULL)
		return false;

	return !!(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
}

841
static struct ttm_bo_driver radeon_bo_driver = {
842
	.ttm_tt_create = &radeon_ttm_tt_create,
843 844
	.ttm_tt_populate = &radeon_ttm_tt_populate,
	.ttm_tt_unpopulate = &radeon_ttm_tt_unpopulate,
845 846 847 848 849 850 851 852 853 854
	.invalidate_caches = &radeon_invalidate_caches,
	.init_mem_type = &radeon_init_mem_type,
	.evict_flags = &radeon_evict_flags,
	.move = &radeon_bo_move,
	.verify_access = &radeon_verify_access,
	.sync_obj_signaled = &radeon_sync_obj_signaled,
	.sync_obj_wait = &radeon_sync_obj_wait,
	.sync_obj_flush = &radeon_sync_obj_flush,
	.sync_obj_unref = &radeon_sync_obj_unref,
	.sync_obj_ref = &radeon_sync_obj_ref,
855 856
	.move_notify = &radeon_bo_move_notify,
	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
857 858
	.io_mem_reserve = &radeon_ttm_io_mem_reserve,
	.io_mem_free = &radeon_ttm_io_mem_free,
859 860 861 862 863 864 865 866 867 868 869 870
};

int radeon_ttm_init(struct radeon_device *rdev)
{
	int r;

	r = radeon_ttm_global_init(rdev);
	if (r) {
		return r;
	}
	/* No others user of address space so set it to 0 */
	r = ttm_bo_device_init(&rdev->mman.bdev,
871
			       rdev->mman.bo_global_ref.ref.object,
872 873 874
			       &radeon_bo_driver,
			       rdev->ddev->anon_inode->i_mapping,
			       DRM_FILE_PAGE_OFFSET,
D
Dave Airlie 已提交
875
			       rdev->need_dma32);
876 877 878 879
	if (r) {
		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
		return r;
	}
880
	rdev->mman.initialized = true;
881
	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_VRAM,
882
				rdev->mc.real_vram_size >> PAGE_SHIFT);
883 884 885 886
	if (r) {
		DRM_ERROR("Failed initializing VRAM heap.\n");
		return r;
	}
887 888 889
	/* Change the size here instead of the init above so only lpfn is affected */
	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);

890
	r = radeon_bo_create(rdev, 256 * 1024, PAGE_SIZE, true,
891
			     RADEON_GEM_DOMAIN_VRAM, 0,
892
			     NULL, &rdev->stollen_vga_memory);
893 894 895
	if (r) {
		return r;
	}
896 897 898 899 900
	r = radeon_bo_reserve(rdev->stollen_vga_memory, false);
	if (r)
		return r;
	r = radeon_bo_pin(rdev->stollen_vga_memory, RADEON_GEM_DOMAIN_VRAM, NULL);
	radeon_bo_unreserve(rdev->stollen_vga_memory);
901
	if (r) {
902
		radeon_bo_unref(&rdev->stollen_vga_memory);
903 904 905
		return r;
	}
	DRM_INFO("radeon: %uM of VRAM memory ready\n",
906
		 (unsigned) (rdev->mc.real_vram_size / (1024 * 1024)));
907
	r = ttm_bo_init_mm(&rdev->mman.bdev, TTM_PL_TT,
908
				rdev->mc.gtt_size >> PAGE_SHIFT);
909 910 911 912 913
	if (r) {
		DRM_ERROR("Failed initializing GTT heap.\n");
		return r;
	}
	DRM_INFO("radeon: %uM of GTT memory ready.\n",
914
		 (unsigned)(rdev->mc.gtt_size / (1024 * 1024)));
915 916 917 918 919 920

	r = radeon_ttm_debugfs_init(rdev);
	if (r) {
		DRM_ERROR("Failed to init debugfs\n");
		return r;
	}
921 922 923 924 925
	return 0;
}

void radeon_ttm_fini(struct radeon_device *rdev)
{
926 927
	int r;

928 929
	if (!rdev->mman.initialized)
		return;
930
	radeon_ttm_debugfs_fini(rdev);
931
	if (rdev->stollen_vga_memory) {
932 933 934 935 936 937
		r = radeon_bo_reserve(rdev->stollen_vga_memory, false);
		if (r == 0) {
			radeon_bo_unpin(rdev->stollen_vga_memory);
			radeon_bo_unreserve(rdev->stollen_vga_memory);
		}
		radeon_bo_unref(&rdev->stollen_vga_memory);
938 939 940 941 942 943
	}
	ttm_bo_clean_mm(&rdev->mman.bdev, TTM_PL_VRAM);
	ttm_bo_clean_mm(&rdev->mman.bdev, TTM_PL_TT);
	ttm_bo_device_release(&rdev->mman.bdev);
	radeon_gart_fini(rdev);
	radeon_ttm_global_fini(rdev);
944
	rdev->mman.initialized = false;
945 946 947
	DRM_INFO("radeon: ttm finalized\n");
}

948 949 950 951 952 953 954 955 956 957 958 959 960 961
/* this should only be called at bootup or when userspace
 * isn't running */
void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
{
	struct ttm_mem_type_manager *man;

	if (!rdev->mman.initialized)
		return;

	man = &rdev->mman.bdev.man[TTM_PL_VRAM];
	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
	man->size = size >> PAGE_SHIFT;
}

962
static struct vm_operations_struct radeon_ttm_vm_ops;
963
static const struct vm_operations_struct *ttm_vm_ops = NULL;
964 965 966 967

static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct ttm_buffer_object *bo;
968
	struct radeon_device *rdev;
969 970
	int r;

971
	bo = (struct ttm_buffer_object *)vma->vm_private_data;	
972 973 974
	if (bo == NULL) {
		return VM_FAULT_NOPAGE;
	}
975
	rdev = radeon_get_rdev(bo->bdev);
976
	down_read(&rdev->pm.mclk_lock);
977
	r = ttm_vm_ops->fault(vma, vmf);
978
	up_read(&rdev->pm.mclk_lock);
979 980 981 982 983 984 985 986 987 988 989 990 991
	return r;
}

int radeon_mmap(struct file *filp, struct vm_area_struct *vma)
{
	struct drm_file *file_priv;
	struct radeon_device *rdev;
	int r;

	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET)) {
		return drm_mmap(filp, vma);
	}

992
	file_priv = filp->private_data;
993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
	rdev = file_priv->minor->dev->dev_private;
	if (rdev == NULL) {
		return -EINVAL;
	}
	r = ttm_bo_mmap(filp, vma, &rdev->mman.bdev);
	if (unlikely(r != 0)) {
		return r;
	}
	if (unlikely(ttm_vm_ops == NULL)) {
		ttm_vm_ops = vma->vm_ops;
		radeon_ttm_vm_ops = *ttm_vm_ops;
		radeon_ttm_vm_ops.fault = &radeon_ttm_fault;
	}
	vma->vm_ops = &radeon_ttm_vm_ops;
	return 0;
}

1010
#if defined(CONFIG_DEBUG_FS)
1011

1012 1013 1014
static int radeon_mm_dump_table(struct seq_file *m, void *data)
{
	struct drm_info_node *node = (struct drm_info_node *)m->private;
1015
	unsigned ttm_pl = *(int *)node->info_ent->data;
1016 1017
	struct drm_device *dev = node->minor->dev;
	struct radeon_device *rdev = dev->dev_private;
1018
	struct drm_mm *mm = (struct drm_mm *)rdev->mman.bdev.man[ttm_pl].priv;
1019 1020 1021 1022 1023 1024 1025 1026
	int ret;
	struct ttm_bo_global *glob = rdev->mman.bdev.glob;

	spin_lock(&glob->lru_lock);
	ret = drm_mm_dump_table(m, mm);
	spin_unlock(&glob->lru_lock);
	return ret;
}
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039

static int ttm_pl_vram = TTM_PL_VRAM;
static int ttm_pl_tt = TTM_PL_TT;

static struct drm_info_list radeon_ttm_debugfs_list[] = {
	{"radeon_vram_mm", radeon_mm_dump_table, 0, &ttm_pl_vram},
	{"radeon_gtt_mm", radeon_mm_dump_table, 0, &ttm_pl_tt},
	{"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL},
#ifdef CONFIG_SWIOTLB
	{"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL}
#endif
};

1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
static int radeon_ttm_vram_open(struct inode *inode, struct file *filep)
{
	struct radeon_device *rdev = inode->i_private;
	i_size_write(inode, rdev->mc.mc_vram_size);
	filep->private_data = inode->i_private;
	return 0;
}

static ssize_t radeon_ttm_vram_read(struct file *f, char __user *buf,
				    size_t size, loff_t *pos)
{
	struct radeon_device *rdev = f->private_data;
	ssize_t result = 0;
	int r;

	if (size & 0x3 || *pos & 0x3)
		return -EINVAL;

	while (size) {
		unsigned long flags;
		uint32_t value;

		if (*pos >= rdev->mc.mc_vram_size)
			return result;

		spin_lock_irqsave(&rdev->mmio_idx_lock, flags);
		WREG32(RADEON_MM_INDEX, ((uint32_t)*pos) | 0x80000000);
		if (rdev->family >= CHIP_CEDAR)
			WREG32(EVERGREEN_MM_INDEX_HI, *pos >> 31);
		value = RREG32(RADEON_MM_DATA);
		spin_unlock_irqrestore(&rdev->mmio_idx_lock, flags);

		r = put_user(value, (uint32_t *)buf);
		if (r)
			return r;

		result += 4;
		buf += 4;
		*pos += 4;
		size -= 4;
	}

	return result;
}

static const struct file_operations radeon_ttm_vram_fops = {
	.owner = THIS_MODULE,
	.open = radeon_ttm_vram_open,
	.read = radeon_ttm_vram_read,
	.llseek = default_llseek
};

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
static int radeon_ttm_gtt_open(struct inode *inode, struct file *filep)
{
	struct radeon_device *rdev = inode->i_private;
	i_size_write(inode, rdev->mc.gtt_size);
	filep->private_data = inode->i_private;
	return 0;
}

static ssize_t radeon_ttm_gtt_read(struct file *f, char __user *buf,
				   size_t size, loff_t *pos)
{
	struct radeon_device *rdev = f->private_data;
	ssize_t result = 0;
	int r;

	while (size) {
		loff_t p = *pos / PAGE_SIZE;
		unsigned off = *pos & ~PAGE_MASK;
1110
		size_t cur_size = min_t(size_t, size, PAGE_SIZE - off);
1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
		struct page *page;
		void *ptr;

		if (p >= rdev->gart.num_cpu_pages)
			return result;

		page = rdev->gart.pages[p];
		if (page) {
			ptr = kmap(page);
			ptr += off;

			r = copy_to_user(buf, ptr, cur_size);
			kunmap(rdev->gart.pages[p]);
		} else
			r = clear_user(buf, cur_size);

		if (r)
			return -EFAULT;

		result += cur_size;
		buf += cur_size;
		*pos += cur_size;
		size -= cur_size;
	}

	return result;
}

static const struct file_operations radeon_ttm_gtt_fops = {
	.owner = THIS_MODULE,
	.open = radeon_ttm_gtt_open,
	.read = radeon_ttm_gtt_read,
	.llseek = default_llseek
};

1146 1147 1148 1149
#endif

static int radeon_ttm_debugfs_init(struct radeon_device *rdev)
{
1150
#if defined(CONFIG_DEBUG_FS)
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161
	unsigned count;

	struct drm_minor *minor = rdev->ddev->primary;
	struct dentry *ent, *root = minor->debugfs_root;

	ent = debugfs_create_file("radeon_vram", S_IFREG | S_IRUGO, root,
				  rdev, &radeon_ttm_vram_fops);
	if (IS_ERR(ent))
		return PTR_ERR(ent);
	rdev->mman.vram = ent;

1162 1163 1164 1165 1166 1167
	ent = debugfs_create_file("radeon_gtt", S_IFREG | S_IRUGO, root,
				  rdev, &radeon_ttm_gtt_fops);
	if (IS_ERR(ent))
		return PTR_ERR(ent);
	rdev->mman.gtt = ent;

1168
	count = ARRAY_SIZE(radeon_ttm_debugfs_list);
1169

1170
#ifdef CONFIG_SWIOTLB
1171 1172
	if (!swiotlb_nr_tbl())
		--count;
1173
#endif
1174

1175 1176 1177
	return radeon_debugfs_add_files(rdev, radeon_ttm_debugfs_list, count);
#else

1178
	return 0;
1179
#endif
1180
}
1181 1182 1183 1184 1185 1186 1187

static void radeon_ttm_debugfs_fini(struct radeon_device *rdev)
{
#if defined(CONFIG_DEBUG_FS)

	debugfs_remove(rdev->mman.vram);
	rdev->mman.vram = NULL;
1188 1189 1190

	debugfs_remove(rdev->mman.gtt);
	rdev->mman.gtt = NULL;
1191 1192
#endif
}