i915_gem.c 131.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * Copyright © 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *
 */

#include "drmP.h"
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
C
Chris Wilson 已提交
32
#include "i915_trace.h"
33
#include "intel_drv.h"
34
#include <linux/slab.h>
35
#include <linux/swap.h>
J
Jesse Barnes 已提交
36
#include <linux/pci.h>
37
#include <linux/intel-gtt.h>
38

39
static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
40 41 42

static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
						  bool pipelined);
43 44 45 46 47 48 49 50
static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
					     int write);
static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
						     uint64_t offset,
						     uint64_t size);
static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
51 52
static int i915_gem_object_wait_rendering(struct drm_gem_object *obj,
					  bool interruptible);
53
static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
54
				       unsigned alignment, bool mappable);
55
static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
56 57 58
static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
				struct drm_i915_gem_pwrite *args,
				struct drm_file *file_priv);
59
static void i915_gem_free_object_tail(struct drm_gem_object *obj);
60

61 62 63 64 65 66 67
static int
i915_gem_object_get_pages(struct drm_gem_object *obj,
			  gfp_t gfpmask);

static void
i915_gem_object_put_pages(struct drm_gem_object *obj);

68 69 70 71
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
				    int nr_to_scan,
				    gfp_t gfp_mask);

72

73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
/* some bookkeeping */
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
				  size_t size)
{
	dev_priv->mm.object_count++;
	dev_priv->mm.object_memory += size;
}

static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
				     size_t size)
{
	dev_priv->mm.object_count--;
	dev_priv->mm.object_memory -= size;
}

static void i915_gem_info_add_gtt(struct drm_i915_private *dev_priv,
89
				  struct drm_gem_object *obj)
90
{
91
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
92
	dev_priv->mm.gtt_count++;
93 94 95 96 97 98 99
	dev_priv->mm.gtt_memory += obj->size;
	if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
		dev_priv->mm.mappable_gtt_used +=
			min_t(size_t, obj->size,
			      dev_priv->mm.gtt_mappable_end
					- obj_priv->gtt_offset);
	}
100 101 102
}

static void i915_gem_info_remove_gtt(struct drm_i915_private *dev_priv,
103
				     struct drm_gem_object *obj)
104
{
105
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
106
	dev_priv->mm.gtt_count--;
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
	dev_priv->mm.gtt_memory -= obj->size;
	if (obj_priv->gtt_offset < dev_priv->mm.gtt_mappable_end) {
		dev_priv->mm.mappable_gtt_used -=
			min_t(size_t, obj->size,
			      dev_priv->mm.gtt_mappable_end
					- obj_priv->gtt_offset);
	}
}

/**
 * Update the mappable working set counters. Call _only_ when there is a change
 * in one of (pin|fault)_mappable and update *_mappable _before_ calling.
 * @mappable: new state the changed mappable flag (either pin_ or fault_).
 */
static void
i915_gem_info_update_mappable(struct drm_i915_private *dev_priv,
			      struct drm_gem_object *obj,
			      bool mappable)
{
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);

	if (mappable) {
		if (obj_priv->pin_mappable && obj_priv->fault_mappable)
			/* Combined state was already mappable. */
			return;
		dev_priv->mm.gtt_mappable_count++;
		dev_priv->mm.gtt_mappable_memory += obj->size;
	} else {
		if (obj_priv->pin_mappable || obj_priv->fault_mappable)
			/* Combined state still mappable. */
			return;
		dev_priv->mm.gtt_mappable_count--;
		dev_priv->mm.gtt_mappable_memory -= obj->size;
	}
141 142 143
}

static void i915_gem_info_add_pin(struct drm_i915_private *dev_priv,
144 145
				  struct drm_gem_object *obj,
				  bool mappable)
146
{
147
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
148
	dev_priv->mm.pin_count++;
149 150 151 152 153
	dev_priv->mm.pin_memory += obj->size;
	if (mappable) {
		obj_priv->pin_mappable = true;
		i915_gem_info_update_mappable(dev_priv, obj, true);
	}
154 155 156
}

static void i915_gem_info_remove_pin(struct drm_i915_private *dev_priv,
157
				     struct drm_gem_object *obj)
158
{
159
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
160
	dev_priv->mm.pin_count--;
161 162 163 164 165
	dev_priv->mm.pin_memory -= obj->size;
	if (obj_priv->pin_mappable) {
		obj_priv->pin_mappable = false;
		i915_gem_info_update_mappable(dev_priv, obj, false);
	}
166 167
}

168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
int
i915_gem_check_is_wedged(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct completion *x = &dev_priv->error_completion;
	unsigned long flags;
	int ret;

	if (!atomic_read(&dev_priv->mm.wedged))
		return 0;

	ret = wait_for_completion_interruptible(x);
	if (ret)
		return ret;

	/* Success, we reset the GPU! */
	if (!atomic_read(&dev_priv->mm.wedged))
		return 0;

	/* GPU is hung, bump the completion count to account for
	 * the token we just consumed so that we never hit zero and
	 * end up waiting upon a subsequent completion event that
	 * will never happen.
	 */
	spin_lock_irqsave(&x->wait.lock, flags);
	x->done++;
	spin_unlock_irqrestore(&x->wait.lock, flags);
	return -EIO;
}

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
static int i915_mutex_lock_interruptible(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret;

	ret = i915_gem_check_is_wedged(dev);
	if (ret)
		return ret;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		return ret;

	if (atomic_read(&dev_priv->mm.wedged)) {
		mutex_unlock(&dev->struct_mutex);
		return -EAGAIN;
	}

216
	WARN_ON(i915_verify_lists(dev));
217 218
	return 0;
}
219

220 221 222 223 224 225 226 227
static inline bool
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj_priv)
{
	return obj_priv->gtt_space &&
		!obj_priv->active &&
		obj_priv->pin_count == 0;
}

228 229
int i915_gem_do_init(struct drm_device *dev,
		     unsigned long start,
D
Daniel Vetter 已提交
230
		     unsigned long mappable_end,
J
Jesse Barnes 已提交
231
		     unsigned long end)
232 233 234
{
	drm_i915_private_t *dev_priv = dev->dev_private;

J
Jesse Barnes 已提交
235 236 237
	if (start >= end ||
	    (start & (PAGE_SIZE - 1)) != 0 ||
	    (end & (PAGE_SIZE - 1)) != 0) {
238 239 240
		return -EINVAL;
	}

J
Jesse Barnes 已提交
241 242
	drm_mm_init(&dev_priv->mm.gtt_space, start,
		    end - start);
243

244
	dev_priv->mm.gtt_total = end - start;
245
	dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start;
D
Daniel Vetter 已提交
246
	dev_priv->mm.gtt_mappable_end = mappable_end;
J
Jesse Barnes 已提交
247 248 249

	return 0;
}
250

J
Jesse Barnes 已提交
251 252 253 254 255 256 257 258
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
		    struct drm_file *file_priv)
{
	struct drm_i915_gem_init *args = data;
	int ret;

	mutex_lock(&dev->struct_mutex);
D
Daniel Vetter 已提交
259
	ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end, args->gtt_end);
260 261
	mutex_unlock(&dev->struct_mutex);

J
Jesse Barnes 已提交
262
	return ret;
263 264
}

265 266 267 268
int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
			    struct drm_file *file_priv)
{
269
	struct drm_i915_private *dev_priv = dev->dev_private;
270 271 272 273 274
	struct drm_i915_gem_get_aperture *args = data;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

275 276 277 278
	mutex_lock(&dev->struct_mutex);
	args->aper_size = dev_priv->mm.gtt_total;
	args->aper_available_size = args->aper_size - dev_priv->mm.pin_memory;
	mutex_unlock(&dev->struct_mutex);
279 280 281 282

	return 0;
}

283 284 285 286 287 288 289 290 291 292

/**
 * Creates a new mm object and returns a handle to it.
 */
int
i915_gem_create_ioctl(struct drm_device *dev, void *data,
		      struct drm_file *file_priv)
{
	struct drm_i915_gem_create *args = data;
	struct drm_gem_object *obj;
293 294
	int ret;
	u32 handle;
295 296 297 298

	args->size = roundup(args->size, PAGE_SIZE);

	/* Allocate the new object */
299
	obj = i915_gem_alloc_object(dev, args->size);
300 301 302 303
	if (obj == NULL)
		return -ENOMEM;

	ret = drm_gem_handle_create(file_priv, obj, &handle);
304
	if (ret) {
305 306 307
		drm_gem_object_release(obj);
		i915_gem_info_remove_obj(dev->dev_private, obj->size);
		kfree(obj);
308
		return ret;
309
	}
310

311 312 313 314
	/* drop reference from allocate - handle holds it now */
	drm_gem_object_unreference(obj);
	trace_i915_gem_object_create(obj);

315
	args->handle = handle;
316 317 318
	return 0;
}

319 320 321 322 323 324 325 326 327 328
static bool
i915_gem_object_cpu_accessible(struct drm_i915_gem_object *obj)
{
	struct drm_device *dev = obj->base.dev;
	drm_i915_private_t *dev_priv = dev->dev_private;

	return obj->gtt_space == NULL ||
		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
}

329 330 331 332 333 334
static inline int
fast_shmem_read(struct page **pages,
		loff_t page_base, int page_offset,
		char __user *data,
		int length)
{
335
	char *vaddr;
336
	int ret;
337

P
Peter Zijlstra 已提交
338
	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
339
	ret = __copy_to_user_inatomic(data, vaddr + page_offset, length);
P
Peter Zijlstra 已提交
340
	kunmap_atomic(vaddr);
341

342
	return ret;
343 344
}

345 346 347
static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
{
	drm_i915_private_t *dev_priv = obj->dev->dev_private;
348
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
349 350 351 352 353

	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
		obj_priv->tiling_mode != I915_TILING_NONE;
}

354
static inline void
355 356 357 358 359 360 361 362
slow_shmem_copy(struct page *dst_page,
		int dst_offset,
		struct page *src_page,
		int src_offset,
		int length)
{
	char *dst_vaddr, *src_vaddr;

363 364
	dst_vaddr = kmap(dst_page);
	src_vaddr = kmap(src_page);
365 366 367

	memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);

368 369
	kunmap(src_page);
	kunmap(dst_page);
370 371
}

372
static inline void
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
slow_shmem_bit17_copy(struct page *gpu_page,
		      int gpu_offset,
		      struct page *cpu_page,
		      int cpu_offset,
		      int length,
		      int is_read)
{
	char *gpu_vaddr, *cpu_vaddr;

	/* Use the unswizzled path if this page isn't affected. */
	if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
		if (is_read)
			return slow_shmem_copy(cpu_page, cpu_offset,
					       gpu_page, gpu_offset, length);
		else
			return slow_shmem_copy(gpu_page, gpu_offset,
					       cpu_page, cpu_offset, length);
	}

392 393
	gpu_vaddr = kmap(gpu_page);
	cpu_vaddr = kmap(cpu_page);
394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416

	/* Copy the data, XORing A6 with A17 (1). The user already knows he's
	 * XORing with the other bits (A9 for Y, A9 and A10 for X)
	 */
	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		if (is_read) {
			memcpy(cpu_vaddr + cpu_offset,
			       gpu_vaddr + swizzled_gpu_offset,
			       this_length);
		} else {
			memcpy(gpu_vaddr + swizzled_gpu_offset,
			       cpu_vaddr + cpu_offset,
			       this_length);
		}
		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

417 418
	kunmap(cpu_page);
	kunmap(gpu_page);
419 420
}

421 422 423 424 425 426 427 428 429 430
/**
 * This is the fast shmem pread path, which attempts to copy_from_user directly
 * from the backing pages of the object to the user's address space.  On a
 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
 */
static int
i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
			  struct drm_i915_gem_pread *args,
			  struct drm_file *file_priv)
{
431
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
432 433 434 435 436 437 438 439
	ssize_t remain;
	loff_t offset, page_base;
	char __user *user_data;
	int page_offset, page_length;

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;

440
	obj_priv = to_intel_bo(obj);
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
	offset = args->offset;

	while (remain > 0) {
		/* Operation in this page
		 *
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
		 */
		page_base = (offset & ~(PAGE_SIZE-1));
		page_offset = offset & (PAGE_SIZE-1);
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

456 457 458 459
		if (fast_shmem_read(obj_priv->pages,
				    page_base, page_offset,
				    user_data, page_length))
			return -EFAULT;
460 461 462 463 464 465

		remain -= page_length;
		user_data += page_length;
		offset += page_length;
	}

466
	return 0;
467 468
}

469 470 471 472 473
static int
i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
{
	int ret;

474
	ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
475 476 477 478 479 480 481

	/* If we've insufficient memory to map in the pages, attempt
	 * to make some space by throwing out some old buffers.
	 */
	if (ret == -ENOMEM) {
		struct drm_device *dev = obj->dev;

482
		ret = i915_gem_evict_something(dev, obj->size,
483 484
					       i915_gem_get_gtt_alignment(obj),
					       false);
485 486 487
		if (ret)
			return ret;

488
		ret = i915_gem_object_get_pages(obj, 0);
489 490 491 492 493
	}

	return ret;
}

494 495 496 497 498 499 500 501 502 503 504
/**
 * This is the fallback shmem pread path, which allocates temporary storage
 * in kernel space to copy_to_user into outside of the struct_mutex, so we
 * can copy out of the object's backing pages while holding the struct mutex
 * and not take page faults.
 */
static int
i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
			  struct drm_i915_gem_pread *args,
			  struct drm_file *file_priv)
{
505
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
506 507 508 509 510 511 512 513 514 515
	struct mm_struct *mm = current->mm;
	struct page **user_pages;
	ssize_t remain;
	loff_t offset, pinned_pages, i;
	loff_t first_data_page, last_data_page, num_pages;
	int shmem_page_index, shmem_page_offset;
	int data_page_index,  data_page_offset;
	int page_length;
	int ret;
	uint64_t data_ptr = args->data_ptr;
516
	int do_bit17_swizzling;
517 518 519 520 521 522 523 524 525 526 527

	remain = args->size;

	/* Pin the user pages containing the data.  We can't fault while
	 * holding the struct mutex, yet we want to hold it while
	 * dereferencing the user data.
	 */
	first_data_page = data_ptr / PAGE_SIZE;
	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
	num_pages = last_data_page - first_data_page + 1;

528
	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
529 530 531
	if (user_pages == NULL)
		return -ENOMEM;

532
	mutex_unlock(&dev->struct_mutex);
533 534
	down_read(&mm->mmap_sem);
	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
535
				      num_pages, 1, 0, user_pages, NULL);
536
	up_read(&mm->mmap_sem);
537
	mutex_lock(&dev->struct_mutex);
538 539
	if (pinned_pages < num_pages) {
		ret = -EFAULT;
540
		goto out;
541 542
	}

543 544 545
	ret = i915_gem_object_set_cpu_read_domain_range(obj,
							args->offset,
							args->size);
546
	if (ret)
547
		goto out;
548

549
	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
550

551
	obj_priv = to_intel_bo(obj);
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
	offset = args->offset;

	while (remain > 0) {
		/* Operation in this page
		 *
		 * shmem_page_index = page number within shmem file
		 * shmem_page_offset = offset within page in shmem file
		 * data_page_index = page number in get_user_pages return
		 * data_page_offset = offset with data_page_index page.
		 * page_length = bytes to copy for this page
		 */
		shmem_page_index = offset / PAGE_SIZE;
		shmem_page_offset = offset & ~PAGE_MASK;
		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
		data_page_offset = data_ptr & ~PAGE_MASK;

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;
		if ((data_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - data_page_offset;

574
		if (do_bit17_swizzling) {
575
			slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
576
					      shmem_page_offset,
577 578 579 580 581 582 583 584 585 586
					      user_pages[data_page_index],
					      data_page_offset,
					      page_length,
					      1);
		} else {
			slow_shmem_copy(user_pages[data_page_index],
					data_page_offset,
					obj_priv->pages[shmem_page_index],
					shmem_page_offset,
					page_length);
587
		}
588 589 590 591 592 593

		remain -= page_length;
		data_ptr += page_length;
		offset += page_length;
	}

594
out:
595 596 597 598
	for (i = 0; i < pinned_pages; i++) {
		SetPageDirty(user_pages[i]);
		page_cache_release(user_pages[i]);
	}
599
	drm_free_large(user_pages);
600 601 602 603

	return ret;
}

604 605 606 607 608 609 610 611 612 613 614 615
/**
 * Reads data from the object referenced by handle.
 *
 * On error, the contents of *data are undefined.
 */
int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
		     struct drm_file *file_priv)
{
	struct drm_i915_gem_pread *args = data;
	struct drm_gem_object *obj;
	struct drm_i915_gem_object *obj_priv;
616
	int ret = 0;
617

618
	ret = i915_mutex_lock_interruptible(dev);
619
	if (ret)
620
		return ret;
621 622

	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
623 624 625
	if (obj == NULL) {
		ret = -ENOENT;
		goto unlock;
626
	}
627
	obj_priv = to_intel_bo(obj);
628

629 630
	/* Bounds check source.  */
	if (args->offset > obj->size || args->size > obj->size - args->offset) {
C
Chris Wilson 已提交
631
		ret = -EINVAL;
632
		goto out;
C
Chris Wilson 已提交
633 634
	}

635 636 637
	if (args->size == 0)
		goto out;

C
Chris Wilson 已提交
638 639 640 641
	if (!access_ok(VERIFY_WRITE,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size)) {
		ret = -EFAULT;
642
		goto out;
643 644
	}

645 646 647 648 649
	ret = fault_in_pages_writeable((char __user *)(uintptr_t)args->data_ptr,
				       args->size);
	if (ret) {
		ret = -EFAULT;
		goto out;
650
	}
651

652 653 654 655 656 657 658 659 660 661 662 663
	ret = i915_gem_object_get_pages_or_evict(obj);
	if (ret)
		goto out;

	ret = i915_gem_object_set_cpu_read_domain_range(obj,
							args->offset,
							args->size);
	if (ret)
		goto out_put;

	ret = -EFAULT;
	if (!i915_gem_object_needs_bit17_swizzle(obj))
664
		ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
665 666
	if (ret == -EFAULT)
		ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
667

668 669
out_put:
	i915_gem_object_put_pages(obj);
670
out:
671
	drm_gem_object_unreference(obj);
672
unlock:
673
	mutex_unlock(&dev->struct_mutex);
674
	return ret;
675 676
}

677 678
/* This is the fast write path which cannot handle
 * page faults in the source data
679
 */
680 681 682 683 684 685

static inline int
fast_user_write(struct io_mapping *mapping,
		loff_t page_base, int page_offset,
		char __user *user_data,
		int length)
686 687
{
	char *vaddr_atomic;
688
	unsigned long unwritten;
689

P
Peter Zijlstra 已提交
690
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
691 692
	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
						      user_data, length);
P
Peter Zijlstra 已提交
693
	io_mapping_unmap_atomic(vaddr_atomic);
694
	return unwritten;
695 696 697 698 699 700
}

/* Here's the write path which can sleep for
 * page faults
 */

701
static inline void
702 703 704 705
slow_kernel_write(struct io_mapping *mapping,
		  loff_t gtt_base, int gtt_offset,
		  struct page *user_page, int user_offset,
		  int length)
706
{
707 708
	char __iomem *dst_vaddr;
	char *src_vaddr;
709

710 711 712 713 714 715 716 717 718
	dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
	src_vaddr = kmap(user_page);

	memcpy_toio(dst_vaddr + gtt_offset,
		    src_vaddr + user_offset,
		    length);

	kunmap(user_page);
	io_mapping_unmap(dst_vaddr);
719 720
}

721 722 723 724 725 726
static inline int
fast_shmem_write(struct page **pages,
		 loff_t page_base, int page_offset,
		 char __user *data,
		 int length)
{
727
	char *vaddr;
728
	int ret;
729

P
Peter Zijlstra 已提交
730
	vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT]);
731
	ret = __copy_from_user_inatomic(vaddr + page_offset, data, length);
P
Peter Zijlstra 已提交
732
	kunmap_atomic(vaddr);
733

734
	return ret;
735 736
}

737 738 739 740
/**
 * This is the fast pwrite path, where we copy the data directly from the
 * user into the GTT, uncached.
 */
741
static int
742 743 744
i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
			 struct drm_i915_gem_pwrite *args,
			 struct drm_file *file_priv)
745
{
746
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
747
	drm_i915_private_t *dev_priv = dev->dev_private;
748
	ssize_t remain;
749
	loff_t offset, page_base;
750
	char __user *user_data;
751
	int page_offset, page_length;
752 753 754 755

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;

756
	obj_priv = to_intel_bo(obj);
757 758 759 760 761
	offset = obj_priv->gtt_offset + args->offset;

	while (remain > 0) {
		/* Operation in this page
		 *
762 763 764
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
765
		 */
766 767 768 769 770 771 772
		page_base = (offset & ~(PAGE_SIZE-1));
		page_offset = offset & (PAGE_SIZE-1);
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

		/* If we get a fault while copying data, then (presumably) our
773 774
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
775
		 */
776 777 778 779
		if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
				    page_offset, user_data, page_length))

			return -EFAULT;
780

781 782 783
		remain -= page_length;
		user_data += page_length;
		offset += page_length;
784 785
	}

786
	return 0;
787 788
}

789 790 791 792 793 794 795
/**
 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
 * the memory and maps it using kmap_atomic for copying.
 *
 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
 */
796
static int
797 798 799
i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
			 struct drm_i915_gem_pwrite *args,
			 struct drm_file *file_priv)
800
{
801
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
802 803 804 805 806 807 808 809
	drm_i915_private_t *dev_priv = dev->dev_private;
	ssize_t remain;
	loff_t gtt_page_base, offset;
	loff_t first_data_page, last_data_page, num_pages;
	loff_t pinned_pages, i;
	struct page **user_pages;
	struct mm_struct *mm = current->mm;
	int gtt_page_offset, data_page_offset, data_page_index, page_length;
810
	int ret;
811 812 813 814 815 816 817 818 819 820 821 822
	uint64_t data_ptr = args->data_ptr;

	remain = args->size;

	/* Pin the user pages containing the data.  We can't fault while
	 * holding the struct mutex, and all of the pwrite implementations
	 * want to hold it while dereferencing the user data.
	 */
	first_data_page = data_ptr / PAGE_SIZE;
	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
	num_pages = last_data_page - first_data_page + 1;

823
	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
824 825 826
	if (user_pages == NULL)
		return -ENOMEM;

827
	mutex_unlock(&dev->struct_mutex);
828 829 830 831
	down_read(&mm->mmap_sem);
	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
				      num_pages, 0, 0, user_pages, NULL);
	up_read(&mm->mmap_sem);
832
	mutex_lock(&dev->struct_mutex);
833 834 835 836
	if (pinned_pages < num_pages) {
		ret = -EFAULT;
		goto out_unpin_pages;
	}
837

838 839
	ret = i915_gem_object_set_to_gtt_domain(obj, 1);
	if (ret)
840
		goto out_unpin_pages;
841

842
	obj_priv = to_intel_bo(obj);
843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
	offset = obj_priv->gtt_offset + args->offset;

	while (remain > 0) {
		/* Operation in this page
		 *
		 * gtt_page_base = page offset within aperture
		 * gtt_page_offset = offset within page in aperture
		 * data_page_index = page number in get_user_pages return
		 * data_page_offset = offset with data_page_index page.
		 * page_length = bytes to copy for this page
		 */
		gtt_page_base = offset & PAGE_MASK;
		gtt_page_offset = offset & ~PAGE_MASK;
		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
		data_page_offset = data_ptr & ~PAGE_MASK;

		page_length = remain;
		if ((gtt_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - gtt_page_offset;
		if ((data_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - data_page_offset;

865 866 867 868 869
		slow_kernel_write(dev_priv->mm.gtt_mapping,
				  gtt_page_base, gtt_page_offset,
				  user_pages[data_page_index],
				  data_page_offset,
				  page_length);
870 871 872 873 874 875 876 877 878

		remain -= page_length;
		offset += page_length;
		data_ptr += page_length;
	}

out_unpin_pages:
	for (i = 0; i < pinned_pages; i++)
		page_cache_release(user_pages[i]);
879
	drm_free_large(user_pages);
880 881 882 883

	return ret;
}

884 885 886 887
/**
 * This is the fast shmem pwrite path, which attempts to directly
 * copy_from_user into the kmapped pages backing the object.
 */
888
static int
889 890 891
i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
			   struct drm_i915_gem_pwrite *args,
			   struct drm_file *file_priv)
892
{
893
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
894 895 896 897 898 899 900
	ssize_t remain;
	loff_t offset, page_base;
	char __user *user_data;
	int page_offset, page_length;

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;
901

902
	obj_priv = to_intel_bo(obj);
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
	offset = args->offset;
	obj_priv->dirty = 1;

	while (remain > 0) {
		/* Operation in this page
		 *
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
		 */
		page_base = (offset & ~(PAGE_SIZE-1));
		page_offset = offset & (PAGE_SIZE-1);
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

919
		if (fast_shmem_write(obj_priv->pages,
920
				       page_base, page_offset,
921 922
				       user_data, page_length))
			return -EFAULT;
923 924 925 926 927 928

		remain -= page_length;
		user_data += page_length;
		offset += page_length;
	}

929
	return 0;
930 931 932 933 934 935 936 937 938 939 940 941 942 943
}

/**
 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
 * the memory and maps it using kmap_atomic for copying.
 *
 * This avoids taking mmap_sem for faulting on the user's address while the
 * struct_mutex is held.
 */
static int
i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
			   struct drm_i915_gem_pwrite *args,
			   struct drm_file *file_priv)
{
944
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
945 946 947 948 949 950 951 952 953 954
	struct mm_struct *mm = current->mm;
	struct page **user_pages;
	ssize_t remain;
	loff_t offset, pinned_pages, i;
	loff_t first_data_page, last_data_page, num_pages;
	int shmem_page_index, shmem_page_offset;
	int data_page_index,  data_page_offset;
	int page_length;
	int ret;
	uint64_t data_ptr = args->data_ptr;
955
	int do_bit17_swizzling;
956 957 958 959 960 961 962 963 964 965 966

	remain = args->size;

	/* Pin the user pages containing the data.  We can't fault while
	 * holding the struct mutex, and all of the pwrite implementations
	 * want to hold it while dereferencing the user data.
	 */
	first_data_page = data_ptr / PAGE_SIZE;
	last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
	num_pages = last_data_page - first_data_page + 1;

967
	user_pages = drm_malloc_ab(num_pages, sizeof(struct page *));
968 969 970
	if (user_pages == NULL)
		return -ENOMEM;

971
	mutex_unlock(&dev->struct_mutex);
972 973 974 975
	down_read(&mm->mmap_sem);
	pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
				      num_pages, 0, 0, user_pages, NULL);
	up_read(&mm->mmap_sem);
976
	mutex_lock(&dev->struct_mutex);
977 978
	if (pinned_pages < num_pages) {
		ret = -EFAULT;
979
		goto out;
980 981
	}

982
	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
983
	if (ret)
984
		goto out;
985

986
	do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
987

988
	obj_priv = to_intel_bo(obj);
989
	offset = args->offset;
990
	obj_priv->dirty = 1;
991

992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
	while (remain > 0) {
		/* Operation in this page
		 *
		 * shmem_page_index = page number within shmem file
		 * shmem_page_offset = offset within page in shmem file
		 * data_page_index = page number in get_user_pages return
		 * data_page_offset = offset with data_page_index page.
		 * page_length = bytes to copy for this page
		 */
		shmem_page_index = offset / PAGE_SIZE;
		shmem_page_offset = offset & ~PAGE_MASK;
		data_page_index = data_ptr / PAGE_SIZE - first_data_page;
		data_page_offset = data_ptr & ~PAGE_MASK;

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;
		if ((data_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - data_page_offset;

1012
		if (do_bit17_swizzling) {
1013
			slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
1014 1015 1016
					      shmem_page_offset,
					      user_pages[data_page_index],
					      data_page_offset,
1017 1018 1019 1020 1021 1022 1023 1024
					      page_length,
					      0);
		} else {
			slow_shmem_copy(obj_priv->pages[shmem_page_index],
					shmem_page_offset,
					user_pages[data_page_index],
					data_page_offset,
					page_length);
1025
		}
1026 1027 1028 1029

		remain -= page_length;
		data_ptr += page_length;
		offset += page_length;
1030 1031
	}

1032
out:
1033 1034
	for (i = 0; i < pinned_pages; i++)
		page_cache_release(user_pages[i]);
1035
	drm_free_large(user_pages);
1036

1037
	return ret;
1038 1039 1040 1041 1042 1043 1044 1045 1046
}

/**
 * Writes data to the object referenced by handle.
 *
 * On error, the contents of the buffer that were to be modified are undefined.
 */
int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
1047
		      struct drm_file *file)
1048 1049 1050 1051 1052 1053
{
	struct drm_i915_gem_pwrite *args = data;
	struct drm_gem_object *obj;
	struct drm_i915_gem_object *obj_priv;
	int ret = 0;

1054
	ret = i915_mutex_lock_interruptible(dev);
1055
	if (ret)
1056
		return ret;
1057 1058 1059 1060 1061

	obj = drm_gem_object_lookup(dev, file, args->handle);
	if (obj == NULL) {
		ret = -ENOENT;
		goto unlock;
1062
	}
1063
	obj_priv = to_intel_bo(obj);
1064

1065

1066 1067
	/* Bounds check destination. */
	if (args->offset > obj->size || args->size > obj->size - args->offset) {
C
Chris Wilson 已提交
1068
		ret = -EINVAL;
1069
		goto out;
C
Chris Wilson 已提交
1070 1071
	}

1072 1073 1074
	if (args->size == 0)
		goto out;

C
Chris Wilson 已提交
1075 1076 1077 1078
	if (!access_ok(VERIFY_READ,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size)) {
		ret = -EFAULT;
1079
		goto out;
1080 1081
	}

1082 1083 1084 1085 1086
	ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
				      args->size);
	if (ret) {
		ret = -EFAULT;
		goto out;
1087 1088 1089 1090 1091 1092 1093 1094
	}

	/* We can only do the GTT pwrite on untiled buffers, as otherwise
	 * it would end up going through the fenced access, and we'll get
	 * different detiling behavior between reading and writing.
	 * pread/pwrite currently are reading and writing from the CPU
	 * perspective, requiring manual detiling by the client.
	 */
1095
	if (obj_priv->phys_obj)
1096
		ret = i915_gem_phys_pwrite(dev, obj, args, file);
1097
	else if (obj_priv->tiling_mode == I915_TILING_NONE &&
1098
		 obj_priv->gtt_space &&
1099
		 obj->write_domain != I915_GEM_DOMAIN_CPU) {
1100
		ret = i915_gem_object_pin(obj, 0, true);
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113
		if (ret)
			goto out;

		ret = i915_gem_object_set_to_gtt_domain(obj, 1);
		if (ret)
			goto out_unpin;

		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
		if (ret == -EFAULT)
			ret = i915_gem_gtt_pwrite_slow(dev, obj, args, file);

out_unpin:
		i915_gem_object_unpin(obj);
1114
	} else {
1115 1116 1117
		ret = i915_gem_object_get_pages_or_evict(obj);
		if (ret)
			goto out;
1118

1119 1120 1121
		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
		if (ret)
			goto out_put;
1122

1123 1124 1125 1126 1127 1128 1129 1130 1131
		ret = -EFAULT;
		if (!i915_gem_object_needs_bit17_swizzle(obj))
			ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file);
		if (ret == -EFAULT)
			ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file);

out_put:
		i915_gem_object_put_pages(obj);
	}
1132

1133
out:
1134
	drm_gem_object_unreference(obj);
1135
unlock:
1136
	mutex_unlock(&dev->struct_mutex);
1137 1138 1139 1140
	return ret;
}

/**
1141 1142
 * Called when user space prepares to use an object with the CPU, either
 * through the mmap ioctl's mapping or a GTT mapping.
1143 1144 1145 1146 1147
 */
int
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
			  struct drm_file *file_priv)
{
1148
	struct drm_i915_private *dev_priv = dev->dev_private;
1149 1150
	struct drm_i915_gem_set_domain *args = data;
	struct drm_gem_object *obj;
1151
	struct drm_i915_gem_object *obj_priv;
1152 1153
	uint32_t read_domains = args->read_domains;
	uint32_t write_domain = args->write_domain;
1154 1155 1156 1157 1158
	int ret;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

1159
	/* Only handle setting domains to types used by the CPU. */
1160
	if (write_domain & I915_GEM_GPU_DOMAINS)
1161 1162
		return -EINVAL;

1163
	if (read_domains & I915_GEM_GPU_DOMAINS)
1164 1165 1166 1167 1168 1169 1170 1171
		return -EINVAL;

	/* Having something in the write domain implies it's in the read
	 * domain, and only that read domain.  Enforce that in the request.
	 */
	if (write_domain != 0 && read_domains != write_domain)
		return -EINVAL;

1172
	ret = i915_mutex_lock_interruptible(dev);
1173
	if (ret)
1174
		return ret;
1175

1176
	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1177 1178 1179
	if (obj == NULL) {
		ret = -ENOENT;
		goto unlock;
1180
	}
1181
	obj_priv = to_intel_bo(obj);
1182

1183 1184
	intel_mark_busy(dev, obj);

1185 1186
	if (read_domains & I915_GEM_DOMAIN_GTT) {
		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1187

1188 1189 1190 1191
		/* Update the LRU on the fence for the CPU access that's
		 * about to occur.
		 */
		if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1192 1193 1194
			struct drm_i915_fence_reg *reg =
				&dev_priv->fence_regs[obj_priv->fence_reg];
			list_move_tail(&reg->lru_list,
1195 1196 1197
				       &dev_priv->mm.fence_list);
		}

1198 1199 1200 1201 1202 1203
		/* Silently promote "you're not bound, there was nothing to do"
		 * to success, since the client was just asking us to
		 * make sure everything was done.
		 */
		if (ret == -EINVAL)
			ret = 0;
1204
	} else {
1205
		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1206 1207
	}

1208 1209
	/* Maintain LRU order of "inactive" objects */
	if (ret == 0 && i915_gem_object_is_inactive(obj_priv))
1210
		list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
1211

1212
	drm_gem_object_unreference(obj);
1213
unlock:
1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231
	mutex_unlock(&dev->struct_mutex);
	return ret;
}

/**
 * Called when user space has done writes to this buffer
 */
int
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
		      struct drm_file *file_priv)
{
	struct drm_i915_gem_sw_finish *args = data;
	struct drm_gem_object *obj;
	int ret = 0;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

1232
	ret = i915_mutex_lock_interruptible(dev);
1233
	if (ret)
1234
		return ret;
1235

1236 1237
	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
	if (obj == NULL) {
1238 1239
		ret = -ENOENT;
		goto unlock;
1240 1241 1242
	}

	/* Pinned buffers may be scanout, so flush the cache */
1243
	if (to_intel_bo(obj)->pin_count)
1244 1245
		i915_gem_object_flush_cpu_write_domain(obj);

1246
	drm_gem_object_unreference(obj);
1247
unlock:
1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262
	mutex_unlock(&dev->struct_mutex);
	return ret;
}

/**
 * Maps the contents of an object, returning the address it is mapped
 * into.
 *
 * While the mapping holds a reference on the contents of the object, it doesn't
 * imply a ref on the object itself.
 */
int
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
		   struct drm_file *file_priv)
{
1263
	struct drm_i915_private *dev_priv = dev->dev_private;
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273
	struct drm_i915_gem_mmap *args = data;
	struct drm_gem_object *obj;
	loff_t offset;
	unsigned long addr;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
	if (obj == NULL)
1274
		return -ENOENT;
1275

1276 1277 1278 1279 1280
	if (obj->size > dev_priv->mm.gtt_mappable_end) {
		drm_gem_object_unreference_unlocked(obj);
		return -E2BIG;
	}

1281 1282 1283 1284 1285 1286 1287
	offset = args->offset;

	down_write(&current->mm->mmap_sem);
	addr = do_mmap(obj->filp, 0, args->size,
		       PROT_READ | PROT_WRITE, MAP_SHARED,
		       args->offset);
	up_write(&current->mm->mmap_sem);
1288
	drm_gem_object_unreference_unlocked(obj);
1289 1290 1291 1292 1293 1294 1295 1296
	if (IS_ERR((void *)addr))
		return addr;

	args->addr_ptr = (uint64_t) addr;

	return 0;
}

1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316
/**
 * i915_gem_fault - fault a page into the GTT
 * vma: VMA in question
 * vmf: fault info
 *
 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
 * from userspace.  The fault handler takes care of binding the object to
 * the GTT (if needed), allocating and programming a fence register (again,
 * only if needed based on whether the old reg is still valid or the object
 * is tiled) and inserting a new PTE into the faulting process.
 *
 * Note that the faulting process may involve evicting existing objects
 * from the GTT and/or fence registers to make room.  So performance may
 * suffer if the GTT working set is large or there are few fence registers
 * left.
 */
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
	struct drm_gem_object *obj = vma->vm_private_data;
	struct drm_device *dev = obj->dev;
1317
	drm_i915_private_t *dev_priv = dev->dev_private;
1318
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1319 1320 1321
	pgoff_t page_offset;
	unsigned long pfn;
	int ret = 0;
1322
	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1323 1324 1325 1326 1327 1328 1329

	/* We don't use vmf->pgoff since that has the fake offset */
	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
		PAGE_SHIFT;

	/* Now bind it into the GTT if needed */
	mutex_lock(&dev->struct_mutex);
1330
	BUG_ON(obj_priv->pin_count && !obj_priv->pin_mappable);
1331 1332 1333
	if (!i915_gem_object_cpu_accessible(obj_priv))
		i915_gem_object_unbind(obj);

1334
	if (!obj_priv->gtt_space) {
1335
		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
1336 1337
		if (ret)
			goto unlock;
1338 1339

		ret = i915_gem_object_set_to_gtt_domain(obj, write);
1340 1341
		if (ret)
			goto unlock;
1342 1343
	}

1344 1345 1346 1347 1348
	if (!obj_priv->fault_mappable) {
		obj_priv->fault_mappable = true;
		i915_gem_info_update_mappable(dev_priv, obj, true);
	}

1349
	/* Need a new fence register? */
1350
	if (obj_priv->tiling_mode != I915_TILING_NONE) {
1351
		ret = i915_gem_object_get_fence_reg(obj, true);
1352 1353
		if (ret)
			goto unlock;
1354
	}
1355

1356
	if (i915_gem_object_is_inactive(obj_priv))
1357
		list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
1358

1359 1360 1361 1362 1363
	pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
		page_offset;

	/* Finally, remap it using the new GTT offset */
	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1364
unlock:
1365 1366 1367
	mutex_unlock(&dev->struct_mutex);

	switch (ret) {
1368 1369 1370
	case 0:
	case -ERESTARTSYS:
		return VM_FAULT_NOPAGE;
1371 1372 1373 1374
	case -ENOMEM:
	case -EAGAIN:
		return VM_FAULT_OOM;
	default:
1375
		return VM_FAULT_SIGBUS;
1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395
	}
}

/**
 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
 * @obj: obj in question
 *
 * GEM memory mapping works by handing back to userspace a fake mmap offset
 * it can use in a subsequent mmap(2) call.  The DRM core code then looks
 * up the object based on the offset and sets up the various memory mapping
 * structures.
 *
 * This routine allocates and attaches a fake offset for @obj.
 */
static int
i915_gem_create_mmap_offset(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
	struct drm_gem_mm *mm = dev->mm_private;
	struct drm_map_list *list;
1396
	struct drm_local_map *map;
1397 1398 1399 1400
	int ret = 0;

	/* Set the object up for mmap'ing */
	list = &obj->map_list;
1401
	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
	if (!list->map)
		return -ENOMEM;

	map = list->map;
	map->type = _DRM_GEM;
	map->size = obj->size;
	map->handle = obj;

	/* Get a DRM GEM mmap offset allocated... */
	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
						    obj->size / PAGE_SIZE, 0, 0);
	if (!list->file_offset_node) {
		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1415
		ret = -ENOSPC;
1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426
		goto out_free_list;
	}

	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
						  obj->size / PAGE_SIZE, 0);
	if (!list->file_offset_node) {
		ret = -ENOMEM;
		goto out_free_list;
	}

	list->hash.key = list->file_offset_node->start;
1427 1428
	ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
	if (ret) {
1429 1430 1431 1432 1433 1434 1435 1436 1437
		DRM_ERROR("failed to add to map hash\n");
		goto out_free_mm;
	}

	return 0;

out_free_mm:
	drm_mm_put_block(list->file_offset_node);
out_free_list:
1438
	kfree(list->map);
C
Chris Wilson 已提交
1439
	list->map = NULL;
1440 1441 1442 1443

	return ret;
}

1444 1445 1446 1447
/**
 * i915_gem_release_mmap - remove physical page mappings
 * @obj: obj in question
 *
1448
 * Preserve the reservation of the mmapping with the DRM core code, but
1449 1450 1451 1452 1453 1454 1455 1456 1457
 * relinquish ownership of the pages back to the system.
 *
 * It is vital that we remove the page mapping if we have mapped a tiled
 * object through the GTT and then lose the fence register due to
 * resource pressure. Similarly if the object has been moved out of the
 * aperture, than pages mapped into userspace must be revoked. Removing the
 * mapping will then trigger a page fault on the next user access, allowing
 * fixup by i915_gem_fault().
 */
1458
void
1459 1460 1461
i915_gem_release_mmap(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
1462
	struct drm_i915_private *dev_priv = dev->dev_private;
1463
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1464

C
Chris Wilson 已提交
1465
	if (unlikely(obj->map_list.map && dev->dev_mapping))
1466
		unmap_mapping_range(dev->dev_mapping,
C
Chris Wilson 已提交
1467 1468
				    (loff_t)obj->map_list.hash.key<<PAGE_SHIFT,
				    obj->size, 1);
1469 1470 1471 1472 1473

	if (obj_priv->fault_mappable) {
		obj_priv->fault_mappable = false;
		i915_gem_info_update_mappable(dev_priv, obj, false);
	}
1474 1475
}

1476 1477 1478 1479 1480
static void
i915_gem_free_mmap_offset(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
	struct drm_gem_mm *mm = dev->mm_private;
C
Chris Wilson 已提交
1481
	struct drm_map_list *list = &obj->map_list;
1482 1483

	drm_ht_remove_item(&mm->offset_hash, &list->hash);
C
Chris Wilson 已提交
1484 1485 1486
	drm_mm_put_block(list->file_offset_node);
	kfree(list->map);
	list->map = NULL;
1487 1488
}

1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499
/**
 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
 * @obj: object to check
 *
 * Return the required GTT alignment for an object, taking into account
 * potential fence register mapping if needed.
 */
static uint32_t
i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
1500
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1501 1502 1503 1504 1505 1506
	int start, i;

	/*
	 * Minimum alignment is 4k (GTT page size), but might be greater
	 * if a fence register is needed for the object.
	 */
1507
	if (INTEL_INFO(dev)->gen >= 4 || obj_priv->tiling_mode == I915_TILING_NONE)
1508 1509 1510 1511 1512 1513
		return 4096;

	/*
	 * Previous chips need to be aligned to the size of the smallest
	 * fence register that can contain the object.
	 */
1514
	if (INTEL_INFO(dev)->gen == 3)
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543
		start = 1024*1024;
	else
		start = 512*1024;

	for (i = start; i < obj->size; i <<= 1)
		;

	return i;
}

/**
 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
 * @dev: DRM device
 * @data: GTT mapping ioctl data
 * @file_priv: GEM object info
 *
 * Simply returns the fake offset to userspace so it can mmap it.
 * The mmap call will end up in drm_gem_mmap(), which will set things
 * up so we can get faults in the handler above.
 *
 * The fault handler will take care of binding the object into the GTT
 * (since it may have been evicted to make room for something), allocating
 * a fence register, and mapping the appropriate aperture address into
 * userspace.
 */
int
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
			struct drm_file *file_priv)
{
1544
	struct drm_i915_private *dev_priv = dev->dev_private;
1545 1546 1547 1548 1549 1550 1551 1552
	struct drm_i915_gem_mmap_gtt *args = data;
	struct drm_gem_object *obj;
	struct drm_i915_gem_object *obj_priv;
	int ret;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

1553
	ret = i915_mutex_lock_interruptible(dev);
1554
	if (ret)
1555
		return ret;
1556

1557 1558 1559 1560 1561
	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
	if (obj == NULL) {
		ret = -ENOENT;
		goto unlock;
	}
1562
	obj_priv = to_intel_bo(obj);
1563

1564 1565 1566 1567 1568
	if (obj->size > dev_priv->mm.gtt_mappable_end) {
		ret = -E2BIG;
		goto unlock;
	}

1569 1570
	if (obj_priv->madv != I915_MADV_WILLNEED) {
		DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1571 1572
		ret = -EINVAL;
		goto out;
1573 1574
	}

C
Chris Wilson 已提交
1575
	if (!obj->map_list.map) {
1576
		ret = i915_gem_create_mmap_offset(obj);
1577 1578
		if (ret)
			goto out;
1579 1580
	}

C
Chris Wilson 已提交
1581
	args->offset = (u64)obj->map_list.hash.key << PAGE_SHIFT;
1582

1583
out:
1584
	drm_gem_object_unreference(obj);
1585
unlock:
1586
	mutex_unlock(&dev->struct_mutex);
1587
	return ret;
1588 1589
}

1590
static void
1591
i915_gem_object_put_pages(struct drm_gem_object *obj)
1592
{
1593
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1594 1595 1596
	int page_count = obj->size / PAGE_SIZE;
	int i;

1597
	BUG_ON(obj_priv->pages_refcount == 0);
C
Chris Wilson 已提交
1598
	BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1599

1600 1601
	if (--obj_priv->pages_refcount != 0)
		return;
1602

1603 1604 1605
	if (obj_priv->tiling_mode != I915_TILING_NONE)
		i915_gem_object_save_bit_17_swizzle(obj);

1606
	if (obj_priv->madv == I915_MADV_DONTNEED)
1607
		obj_priv->dirty = 0;
1608 1609 1610 1611 1612 1613

	for (i = 0; i < page_count; i++) {
		if (obj_priv->dirty)
			set_page_dirty(obj_priv->pages[i]);

		if (obj_priv->madv == I915_MADV_WILLNEED)
1614
			mark_page_accessed(obj_priv->pages[i]);
1615 1616 1617

		page_cache_release(obj_priv->pages[i]);
	}
1618 1619
	obj_priv->dirty = 0;

1620
	drm_free_large(obj_priv->pages);
1621
	obj_priv->pages = NULL;
1622 1623
}

1624 1625 1626 1627 1628 1629 1630 1631 1632 1633
static uint32_t
i915_gem_next_request_seqno(struct drm_device *dev,
			    struct intel_ring_buffer *ring)
{
	drm_i915_private_t *dev_priv = dev->dev_private;

	ring->outstanding_lazy_request = true;
	return dev_priv->next_seqno;
}

1634
static void
1635
i915_gem_object_move_to_active(struct drm_gem_object *obj,
1636
			       struct intel_ring_buffer *ring)
1637 1638
{
	struct drm_device *dev = obj->dev;
1639
	struct drm_i915_private *dev_priv = dev->dev_private;
1640
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1641
	uint32_t seqno = i915_gem_next_request_seqno(dev, ring);
1642

1643 1644
	BUG_ON(ring == NULL);
	obj_priv->ring = ring;
1645 1646 1647 1648 1649 1650

	/* Add a reference if we're newly entering the active list. */
	if (!obj_priv->active) {
		drm_gem_object_reference(obj);
		obj_priv->active = 1;
	}
1651

1652
	/* Move from whatever list we were on to the tail of execution. */
1653 1654
	list_move_tail(&obj_priv->mm_list, &dev_priv->mm.active_list);
	list_move_tail(&obj_priv->ring_list, &ring->active_list);
1655
	obj_priv->last_rendering_seqno = seqno;
1656 1657
}

1658 1659 1660 1661 1662
static void
i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
1663
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1664 1665

	BUG_ON(!obj_priv->active);
1666 1667
	list_move_tail(&obj_priv->mm_list, &dev_priv->mm.flushing_list);
	list_del_init(&obj_priv->ring_list);
1668 1669
	obj_priv->last_rendering_seqno = 0;
}
1670

1671 1672 1673 1674
/* Immediately discard the backing storage */
static void
i915_gem_object_truncate(struct drm_gem_object *obj)
{
1675
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
C
Chris Wilson 已提交
1676
	struct inode *inode;
1677

1678 1679 1680 1681 1682 1683
	/* Our goal here is to return as much of the memory as
	 * is possible back to the system as we are called from OOM.
	 * To do this we must instruct the shmfs to drop all of its
	 * backing pages, *now*. Here we mirror the actions taken
	 * when by shmem_delete_inode() to release the backing store.
	 */
C
Chris Wilson 已提交
1684
	inode = obj->filp->f_path.dentry->d_inode;
1685 1686 1687
	truncate_inode_pages(inode->i_mapping, 0);
	if (inode->i_op->truncate_range)
		inode->i_op->truncate_range(inode, 0, (loff_t)-1);
C
Chris Wilson 已提交
1688 1689

	obj_priv->madv = __I915_MADV_PURGED;
1690 1691 1692 1693 1694 1695 1696 1697
}

static inline int
i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
{
	return obj_priv->madv == I915_MADV_DONTNEED;
}

1698 1699 1700 1701 1702
static void
i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
1703
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1704 1705

	if (obj_priv->pin_count != 0)
1706
		list_move_tail(&obj_priv->mm_list, &dev_priv->mm.pinned_list);
1707
	else
1708 1709
		list_move_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
	list_del_init(&obj_priv->ring_list);
1710

1711 1712
	BUG_ON(!list_empty(&obj_priv->gpu_write_list));

1713
	obj_priv->last_rendering_seqno = 0;
1714
	obj_priv->ring = NULL;
1715 1716 1717 1718
	if (obj_priv->active) {
		obj_priv->active = 0;
		drm_gem_object_unreference(obj);
	}
1719
	WARN_ON(i915_verify_lists(dev));
1720 1721
}

1722 1723
static void
i915_gem_process_flushing_list(struct drm_device *dev,
1724
			       uint32_t flush_domains,
1725
			       struct intel_ring_buffer *ring)
1726 1727 1728 1729 1730
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_i915_gem_object *obj_priv, *next;

	list_for_each_entry_safe(obj_priv, next,
1731
				 &ring->gpu_write_list,
1732
				 gpu_write_list) {
1733
		struct drm_gem_object *obj = &obj_priv->base;
1734

1735
		if (obj->write_domain & flush_domains) {
1736 1737 1738 1739
			uint32_t old_write_domain = obj->write_domain;

			obj->write_domain = 0;
			list_del_init(&obj_priv->gpu_write_list);
1740
			i915_gem_object_move_to_active(obj, ring);
1741 1742

			/* update the fence lru list */
1743 1744 1745 1746
			if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
				struct drm_i915_fence_reg *reg =
					&dev_priv->fence_regs[obj_priv->fence_reg];
				list_move_tail(&reg->lru_list,
1747
						&dev_priv->mm.fence_list);
1748
			}
1749 1750 1751 1752 1753 1754 1755

			trace_i915_gem_object_change_domain(obj,
							    obj->read_domains,
							    old_write_domain);
		}
	}
}
1756

1757
int
1758
i915_add_request(struct drm_device *dev,
1759
		 struct drm_file *file,
C
Chris Wilson 已提交
1760
		 struct drm_i915_gem_request *request,
1761
		 struct intel_ring_buffer *ring)
1762 1763
{
	drm_i915_private_t *dev_priv = dev->dev_private;
1764
	struct drm_i915_file_private *file_priv = NULL;
1765 1766
	uint32_t seqno;
	int was_empty;
1767 1768 1769
	int ret;

	BUG_ON(request == NULL);
1770

1771 1772
	if (file != NULL)
		file_priv = file->driver_priv;
1773

1774 1775 1776
	ret = ring->add_request(ring, &seqno);
	if (ret)
	    return ret;
1777

1778
	ring->outstanding_lazy_request = false;
1779 1780

	request->seqno = seqno;
1781
	request->ring = ring;
1782
	request->emitted_jiffies = jiffies;
1783 1784 1785
	was_empty = list_empty(&ring->request_list);
	list_add_tail(&request->list, &ring->request_list);

1786
	if (file_priv) {
1787
		spin_lock(&file_priv->mm.lock);
1788
		request->file_priv = file_priv;
1789
		list_add_tail(&request->client_list,
1790
			      &file_priv->mm.request_list);
1791
		spin_unlock(&file_priv->mm.lock);
1792
	}
1793

B
Ben Gamari 已提交
1794
	if (!dev_priv->mm.suspended) {
1795 1796
		mod_timer(&dev_priv->hangcheck_timer,
			  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
B
Ben Gamari 已提交
1797
		if (was_empty)
1798 1799
			queue_delayed_work(dev_priv->wq,
					   &dev_priv->mm.retire_work, HZ);
B
Ben Gamari 已提交
1800
	}
1801
	return 0;
1802 1803 1804 1805 1806 1807 1808 1809
}

/**
 * Command execution barrier
 *
 * Ensures that all commands in the ring are finished
 * before signalling the CPU
 */
1810
static void
1811
i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1812 1813 1814 1815
{
	uint32_t flush_domains = 0;

	/* The sampler always gets flushed on i965 (sigh) */
1816
	if (INTEL_INFO(dev)->gen >= 4)
1817
		flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1818

1819
	ring->flush(ring, I915_GEM_DOMAIN_COMMAND, flush_domains);
1820 1821
}

1822 1823
static inline void
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1824
{
1825
	struct drm_i915_file_private *file_priv = request->file_priv;
1826

1827 1828
	if (!file_priv)
		return;
C
Chris Wilson 已提交
1829

1830 1831 1832 1833
	spin_lock(&file_priv->mm.lock);
	list_del(&request->client_list);
	request->file_priv = NULL;
	spin_unlock(&file_priv->mm.lock);
1834 1835
}

1836 1837
static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
				      struct intel_ring_buffer *ring)
1838
{
1839 1840
	while (!list_empty(&ring->request_list)) {
		struct drm_i915_gem_request *request;
1841

1842 1843 1844
		request = list_first_entry(&ring->request_list,
					   struct drm_i915_gem_request,
					   list);
1845

1846
		list_del(&request->list);
1847
		i915_gem_request_remove_from_client(request);
1848 1849
		kfree(request);
	}
1850

1851
	while (!list_empty(&ring->active_list)) {
1852 1853
		struct drm_i915_gem_object *obj_priv;

1854
		obj_priv = list_first_entry(&ring->active_list,
1855
					    struct drm_i915_gem_object,
1856
					    ring_list);
1857 1858

		obj_priv->base.write_domain = 0;
1859
		list_del_init(&obj_priv->gpu_write_list);
1860
		i915_gem_object_move_to_inactive(&obj_priv->base);
1861 1862 1863
	}
}

1864
void i915_gem_reset(struct drm_device *dev)
1865
{
1866 1867
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct drm_i915_gem_object *obj_priv;
1868
	int i;
1869

1870
	i915_gem_reset_ring_lists(dev_priv, &dev_priv->render_ring);
1871
	i915_gem_reset_ring_lists(dev_priv, &dev_priv->bsd_ring);
1872
	i915_gem_reset_ring_lists(dev_priv, &dev_priv->blt_ring);
1873 1874 1875 1876 1877 1878 1879 1880

	/* Remove anything from the flushing lists. The GPU cache is likely
	 * to be lost on reset along with the data, so simply move the
	 * lost bo to the inactive list.
	 */
	while (!list_empty(&dev_priv->mm.flushing_list)) {
		obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
					    struct drm_i915_gem_object,
1881
					    mm_list);
1882 1883 1884 1885 1886 1887 1888 1889 1890

		obj_priv->base.write_domain = 0;
		list_del_init(&obj_priv->gpu_write_list);
		i915_gem_object_move_to_inactive(&obj_priv->base);
	}

	/* Move everything out of the GPU domains to ensure we do any
	 * necessary invalidation upon reuse.
	 */
1891 1892
	list_for_each_entry(obj_priv,
			    &dev_priv->mm.inactive_list,
1893
			    mm_list)
1894 1895 1896
	{
		obj_priv->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
	}
1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907

	/* The fence registers are invalidated so clear them out */
	for (i = 0; i < 16; i++) {
		struct drm_i915_fence_reg *reg;

		reg = &dev_priv->fence_regs[i];
		if (!reg->obj)
			continue;

		i915_gem_clear_fence_reg(reg->obj);
	}
1908 1909 1910 1911 1912
}

/**
 * This function clears the request list as sequence numbers are passed.
 */
1913 1914 1915
static void
i915_gem_retire_requests_ring(struct drm_device *dev,
			      struct intel_ring_buffer *ring)
1916 1917 1918 1919
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	uint32_t seqno;

1920 1921
	if (!ring->status_page.page_addr ||
	    list_empty(&ring->request_list))
1922 1923
		return;

1924
	WARN_ON(i915_verify_lists(dev));
1925

1926
	seqno = ring->get_seqno(ring);
1927
	while (!list_empty(&ring->request_list)) {
1928 1929
		struct drm_i915_gem_request *request;

1930
		request = list_first_entry(&ring->request_list,
1931 1932 1933
					   struct drm_i915_gem_request,
					   list);

1934
		if (!i915_seqno_passed(seqno, request->seqno))
1935 1936 1937 1938 1939
			break;

		trace_i915_gem_request_retire(dev, request->seqno);

		list_del(&request->list);
1940
		i915_gem_request_remove_from_client(request);
1941 1942
		kfree(request);
	}
1943

1944 1945 1946 1947 1948 1949 1950 1951 1952
	/* Move any buffers on the active list that are no longer referenced
	 * by the ringbuffer to the flushing/inactive lists as appropriate.
	 */
	while (!list_empty(&ring->active_list)) {
		struct drm_gem_object *obj;
		struct drm_i915_gem_object *obj_priv;

		obj_priv = list_first_entry(&ring->active_list,
					    struct drm_i915_gem_object,
1953
					    ring_list);
1954

1955
		if (!i915_seqno_passed(seqno, obj_priv->last_rendering_seqno))
1956
			break;
1957 1958 1959 1960 1961 1962

		obj = &obj_priv->base;
		if (obj->write_domain != 0)
			i915_gem_object_move_to_flushing(obj);
		else
			i915_gem_object_move_to_inactive(obj);
1963
	}
1964 1965 1966

	if (unlikely (dev_priv->trace_irq_seqno &&
		      i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1967
		ring->user_irq_put(ring);
1968 1969
		dev_priv->trace_irq_seqno = 0;
	}
1970 1971

	WARN_ON(i915_verify_lists(dev));
1972 1973
}

1974 1975 1976 1977 1978
void
i915_gem_retire_requests(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;

1979 1980 1981 1982 1983 1984 1985 1986 1987 1988
	if (!list_empty(&dev_priv->mm.deferred_free_list)) {
	    struct drm_i915_gem_object *obj_priv, *tmp;

	    /* We must be careful that during unbind() we do not
	     * accidentally infinitely recurse into retire requests.
	     * Currently:
	     *   retire -> free -> unbind -> wait -> retire_ring
	     */
	    list_for_each_entry_safe(obj_priv, tmp,
				     &dev_priv->mm.deferred_free_list,
1989
				     mm_list)
1990 1991 1992
		    i915_gem_free_object_tail(&obj_priv->base);
	}

1993
	i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1994
	i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1995
	i915_gem_retire_requests_ring(dev, &dev_priv->blt_ring);
1996 1997
}

1998
static void
1999 2000 2001 2002 2003 2004 2005 2006 2007
i915_gem_retire_work_handler(struct work_struct *work)
{
	drm_i915_private_t *dev_priv;
	struct drm_device *dev;

	dev_priv = container_of(work, drm_i915_private_t,
				mm.retire_work.work);
	dev = dev_priv->dev;

2008 2009 2010 2011 2012 2013
	/* Come back later if the device is busy... */
	if (!mutex_trylock(&dev->struct_mutex)) {
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
		return;
	}

2014
	i915_gem_retire_requests(dev);
2015

2016
	if (!dev_priv->mm.suspended &&
2017
		(!list_empty(&dev_priv->render_ring.request_list) ||
2018 2019
		 !list_empty(&dev_priv->bsd_ring.request_list) ||
		 !list_empty(&dev_priv->blt_ring.request_list)))
2020
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
2021 2022 2023
	mutex_unlock(&dev->struct_mutex);
}

2024
int
2025
i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
2026
		     bool interruptible, struct intel_ring_buffer *ring)
2027 2028
{
	drm_i915_private_t *dev_priv = dev->dev_private;
2029
	u32 ier;
2030 2031 2032 2033
	int ret = 0;

	BUG_ON(seqno == 0);

2034
	if (atomic_read(&dev_priv->mm.wedged))
2035 2036
		return -EAGAIN;

2037
	if (ring->outstanding_lazy_request) {
2038 2039 2040 2041
		struct drm_i915_gem_request *request;

		request = kzalloc(sizeof(*request), GFP_KERNEL);
		if (request == NULL)
2042
			return -ENOMEM;
2043 2044 2045 2046 2047 2048 2049 2050

		ret = i915_add_request(dev, NULL, request, ring);
		if (ret) {
			kfree(request);
			return ret;
		}

		seqno = request->seqno;
2051
	}
2052
	BUG_ON(seqno == dev_priv->next_seqno);
2053

2054
	if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
2055
		if (HAS_PCH_SPLIT(dev))
2056 2057 2058
			ier = I915_READ(DEIER) | I915_READ(GTIER);
		else
			ier = I915_READ(IER);
2059 2060 2061 2062 2063 2064 2065
		if (!ier) {
			DRM_ERROR("something (likely vbetool) disabled "
				  "interrupts, re-enabling\n");
			i915_driver_irq_preinstall(dev);
			i915_driver_irq_postinstall(dev);
		}

C
Chris Wilson 已提交
2066 2067
		trace_i915_gem_request_wait_begin(dev, seqno);

2068
		ring->waiting_seqno = seqno;
2069
		ring->user_irq_get(ring);
2070
		if (interruptible)
2071
			ret = wait_event_interruptible(ring->irq_queue,
2072
				i915_seqno_passed(ring->get_seqno(ring), seqno)
2073
				|| atomic_read(&dev_priv->mm.wedged));
2074
		else
2075
			wait_event(ring->irq_queue,
2076
				i915_seqno_passed(ring->get_seqno(ring), seqno)
2077
				|| atomic_read(&dev_priv->mm.wedged));
2078

2079
		ring->user_irq_put(ring);
2080
		ring->waiting_seqno = 0;
C
Chris Wilson 已提交
2081 2082

		trace_i915_gem_request_wait_end(dev, seqno);
2083
	}
2084
	if (atomic_read(&dev_priv->mm.wedged))
2085
		ret = -EAGAIN;
2086 2087

	if (ret && ret != -ERESTARTSYS)
2088
		DRM_ERROR("%s returns %d (awaiting %d at %d, next %d)\n",
2089
			  __func__, ret, seqno, ring->get_seqno(ring),
2090
			  dev_priv->next_seqno);
2091 2092 2093 2094 2095 2096 2097

	/* Directly dispatch request retiring.  While we have the work queue
	 * to handle this, the waiter on a request often wants an associated
	 * buffer to have made it to the inactive list, and we would need
	 * a separate wait queue to handle that.
	 */
	if (ret == 0)
2098
		i915_gem_retire_requests_ring(dev, ring);
2099 2100 2101 2102

	return ret;
}

2103 2104 2105 2106 2107
/**
 * Waits for a sequence number to be signaled, and cleans up the
 * request and object lists appropriately for that event.
 */
static int
2108
i915_wait_request(struct drm_device *dev, uint32_t seqno,
2109
		  struct intel_ring_buffer *ring)
2110
{
2111
	return i915_do_wait_request(dev, seqno, 1, ring);
2112 2113
}

2114
static void
2115
i915_gem_flush_ring(struct drm_device *dev,
2116
		    struct drm_file *file_priv,
2117 2118 2119 2120
		    struct intel_ring_buffer *ring,
		    uint32_t invalidate_domains,
		    uint32_t flush_domains)
{
2121
	ring->flush(ring, invalidate_domains, flush_domains);
2122 2123 2124
	i915_gem_process_flushing_list(dev, flush_domains, ring);
}

2125 2126
static void
i915_gem_flush(struct drm_device *dev,
2127
	       struct drm_file *file_priv,
2128
	       uint32_t invalidate_domains,
2129 2130
	       uint32_t flush_domains,
	       uint32_t flush_rings)
2131 2132
{
	drm_i915_private_t *dev_priv = dev->dev_private;
2133

2134 2135
	if (flush_domains & I915_GEM_DOMAIN_CPU)
		drm_agp_chipset_flush(dev);
2136

2137 2138
	if ((flush_domains | invalidate_domains) & I915_GEM_GPU_DOMAINS) {
		if (flush_rings & RING_RENDER)
2139
			i915_gem_flush_ring(dev, file_priv,
2140 2141 2142
					    &dev_priv->render_ring,
					    invalidate_domains, flush_domains);
		if (flush_rings & RING_BSD)
2143
			i915_gem_flush_ring(dev, file_priv,
2144 2145
					    &dev_priv->bsd_ring,
					    invalidate_domains, flush_domains);
2146 2147 2148 2149
		if (flush_rings & RING_BLT)
			i915_gem_flush_ring(dev, file_priv,
					    &dev_priv->blt_ring,
					    invalidate_domains, flush_domains);
2150
	}
2151 2152
}

2153 2154 2155 2156 2157
/**
 * Ensures that all rendering to the object has completed and the object is
 * safe to unbind from the GTT or access from the CPU.
 */
static int
2158 2159
i915_gem_object_wait_rendering(struct drm_gem_object *obj,
			       bool interruptible)
2160 2161
{
	struct drm_device *dev = obj->dev;
2162
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2163 2164
	int ret;

2165 2166
	/* This function only exists to support waiting for existing rendering,
	 * not for emitting required flushes.
2167
	 */
2168
	BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
2169 2170 2171 2172 2173

	/* If there is rendering queued on the buffer being evicted, wait for
	 * it.
	 */
	if (obj_priv->active) {
2174 2175 2176 2177 2178
		ret = i915_do_wait_request(dev,
					   obj_priv->last_rendering_seqno,
					   interruptible,
					   obj_priv->ring);
		if (ret)
2179 2180 2181 2182 2183 2184 2185 2186 2187
			return ret;
	}

	return 0;
}

/**
 * Unbinds an object from the GTT aperture.
 */
2188
int
2189 2190 2191
i915_gem_object_unbind(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
2192
	struct drm_i915_private *dev_priv = dev->dev_private;
2193
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2194 2195 2196 2197 2198 2199 2200 2201 2202 2203
	int ret = 0;

	if (obj_priv->gtt_space == NULL)
		return 0;

	if (obj_priv->pin_count != 0) {
		DRM_ERROR("Attempting to unbind pinned buffer\n");
		return -EINVAL;
	}

2204 2205 2206
	/* blow away mappings if mapped through GTT */
	i915_gem_release_mmap(obj);

2207 2208 2209 2210 2211 2212
	/* Move the object to the CPU domain to ensure that
	 * any possible CPU writes while it's not in the GTT
	 * are flushed when we go to remap it. This will
	 * also ensure that all pending GPU writes are finished
	 * before we unbind.
	 */
2213
	ret = i915_gem_object_set_to_cpu_domain(obj, 1);
2214
	if (ret == -ERESTARTSYS)
2215
		return ret;
2216 2217 2218 2219
	/* Continue on if we fail due to EIO, the GPU is hung so we
	 * should be safe and we need to cleanup or else we might
	 * cause memory corruption through use-after-free.
	 */
2220 2221 2222 2223
	if (ret) {
		i915_gem_clflush_object(obj);
		obj->read_domains = obj->write_domain = I915_GEM_DOMAIN_CPU;
	}
2224

2225 2226 2227 2228
	/* release the fence reg _after_ flushing */
	if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
		i915_gem_clear_fence_reg(obj);

2229 2230
	drm_unbind_agp(obj_priv->agp_mem);
	drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
2231

2232
	i915_gem_object_put_pages(obj);
2233
	BUG_ON(obj_priv->pages_refcount);
2234

2235
	i915_gem_info_remove_gtt(dev_priv, obj);
2236
	list_del_init(&obj_priv->mm_list);
2237

2238 2239
	drm_mm_put_block(obj_priv->gtt_space);
	obj_priv->gtt_space = NULL;
2240
	obj_priv->gtt_offset = 0;
2241

2242 2243 2244
	if (i915_gem_object_is_purgeable(obj_priv))
		i915_gem_object_truncate(obj);

C
Chris Wilson 已提交
2245 2246
	trace_i915_gem_object_unbind(obj);

2247
	return ret;
2248 2249
}

2250 2251 2252
static int i915_ring_idle(struct drm_device *dev,
			  struct intel_ring_buffer *ring)
{
2253 2254 2255
	if (list_empty(&ring->gpu_write_list))
		return 0;

2256 2257 2258 2259 2260 2261 2262
	i915_gem_flush_ring(dev, NULL, ring,
			    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
	return i915_wait_request(dev,
				 i915_gem_next_request_seqno(dev, ring),
				 ring);
}

2263
int
2264 2265 2266 2267
i915_gpu_idle(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	bool lists_empty;
2268
	int ret;
2269

2270 2271
	lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
		       list_empty(&dev_priv->render_ring.active_list) &&
2272 2273
		       list_empty(&dev_priv->bsd_ring.active_list) &&
		       list_empty(&dev_priv->blt_ring.active_list));
2274 2275 2276 2277
	if (lists_empty)
		return 0;

	/* Flush everything onto the inactive list. */
2278
	ret = i915_ring_idle(dev, &dev_priv->render_ring);
2279 2280
	if (ret)
		return ret;
2281

2282 2283 2284
	ret = i915_ring_idle(dev, &dev_priv->bsd_ring);
	if (ret)
		return ret;
2285

2286 2287 2288
	ret = i915_ring_idle(dev, &dev_priv->blt_ring);
	if (ret)
		return ret;
2289

2290
	return 0;
2291 2292
}

2293
static int
2294 2295
i915_gem_object_get_pages(struct drm_gem_object *obj,
			  gfp_t gfpmask)
2296
{
2297
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2298 2299 2300 2301 2302
	int page_count, i;
	struct address_space *mapping;
	struct inode *inode;
	struct page *page;

2303 2304 2305
	BUG_ON(obj_priv->pages_refcount
			== DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);

2306
	if (obj_priv->pages_refcount++ != 0)
2307 2308 2309 2310 2311 2312
		return 0;

	/* Get the list of pages out of our struct file.  They'll be pinned
	 * at this point until we release them.
	 */
	page_count = obj->size / PAGE_SIZE;
2313
	BUG_ON(obj_priv->pages != NULL);
2314
	obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2315 2316
	if (obj_priv->pages == NULL) {
		obj_priv->pages_refcount--;
2317 2318 2319 2320 2321 2322
		return -ENOMEM;
	}

	inode = obj->filp->f_path.dentry->d_inode;
	mapping = inode->i_mapping;
	for (i = 0; i < page_count; i++) {
2323
		page = read_cache_page_gfp(mapping, i,
2324
					   GFP_HIGHUSER |
2325
					   __GFP_COLD |
2326
					   __GFP_RECLAIMABLE |
2327
					   gfpmask);
2328 2329 2330
		if (IS_ERR(page))
			goto err_pages;

2331
		obj_priv->pages[i] = page;
2332
	}
2333 2334 2335 2336

	if (obj_priv->tiling_mode != I915_TILING_NONE)
		i915_gem_object_do_bit_17_swizzle(obj);

2337
	return 0;
2338 2339 2340 2341 2342 2343 2344 2345 2346

err_pages:
	while (i--)
		page_cache_release(obj_priv->pages[i]);

	drm_free_large(obj_priv->pages);
	obj_priv->pages = NULL;
	obj_priv->pages_refcount--;
	return PTR_ERR(page);
2347 2348
}

2349 2350 2351 2352 2353
static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
{
	struct drm_gem_object *obj = reg->obj;
	struct drm_device *dev = obj->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
2354
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370
	int regnum = obj_priv->fence_reg;
	uint64_t val;

	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
		    0xfffff000) << 32;
	val |= obj_priv->gtt_offset & 0xfffff000;
	val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
		SANDYBRIDGE_FENCE_PITCH_SHIFT;

	if (obj_priv->tiling_mode == I915_TILING_Y)
		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
	val |= I965_FENCE_REG_VALID;

	I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
}

2371 2372 2373 2374 2375
static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
{
	struct drm_gem_object *obj = reg->obj;
	struct drm_device *dev = obj->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
2376
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395
	int regnum = obj_priv->fence_reg;
	uint64_t val;

	val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
		    0xfffff000) << 32;
	val |= obj_priv->gtt_offset & 0xfffff000;
	val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
	if (obj_priv->tiling_mode == I915_TILING_Y)
		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
	val |= I965_FENCE_REG_VALID;

	I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
}

static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
{
	struct drm_gem_object *obj = reg->obj;
	struct drm_device *dev = obj->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
2396
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2397
	int regnum = obj_priv->fence_reg;
2398
	int tile_width;
2399
	uint32_t fence_reg, val;
2400 2401 2402 2403
	uint32_t pitch_val;

	if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
	    (obj_priv->gtt_offset & (obj->size - 1))) {
2404
		WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2405
		     __func__, obj_priv->gtt_offset, obj->size);
2406 2407 2408
		return;
	}

2409 2410 2411
	if (obj_priv->tiling_mode == I915_TILING_Y &&
	    HAS_128_BYTE_Y_TILING(dev))
		tile_width = 128;
2412
	else
2413 2414 2415 2416 2417
		tile_width = 512;

	/* Note: pitch better be a power of two tile widths */
	pitch_val = obj_priv->stride / tile_width;
	pitch_val = ffs(pitch_val) - 1;
2418

2419 2420 2421 2422 2423 2424
	if (obj_priv->tiling_mode == I915_TILING_Y &&
	    HAS_128_BYTE_Y_TILING(dev))
		WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
	else
		WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);

2425 2426 2427 2428 2429 2430 2431
	val = obj_priv->gtt_offset;
	if (obj_priv->tiling_mode == I915_TILING_Y)
		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
	val |= I915_FENCE_SIZE_BITS(obj->size);
	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
	val |= I830_FENCE_REG_VALID;

2432 2433 2434 2435 2436
	if (regnum < 8)
		fence_reg = FENCE_REG_830_0 + (regnum * 4);
	else
		fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
	I915_WRITE(fence_reg, val);
2437 2438 2439 2440 2441 2442 2443
}

static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
{
	struct drm_gem_object *obj = reg->obj;
	struct drm_device *dev = obj->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
2444
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2445 2446 2447
	int regnum = obj_priv->fence_reg;
	uint32_t val;
	uint32_t pitch_val;
2448
	uint32_t fence_size_bits;
2449

2450
	if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2451
	    (obj_priv->gtt_offset & (obj->size - 1))) {
2452
		WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2453
		     __func__, obj_priv->gtt_offset);
2454 2455 2456
		return;
	}

2457 2458 2459 2460
	pitch_val = obj_priv->stride / 128;
	pitch_val = ffs(pitch_val) - 1;
	WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);

2461 2462 2463
	val = obj_priv->gtt_offset;
	if (obj_priv->tiling_mode == I915_TILING_Y)
		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2464 2465 2466
	fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
	WARN_ON(fence_size_bits & ~0x00000f00);
	val |= fence_size_bits;
2467 2468 2469 2470 2471 2472
	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
	val |= I830_FENCE_REG_VALID;

	I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
}

2473 2474
static int i915_find_fence_reg(struct drm_device *dev,
			       bool interruptible)
2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488
{
	struct drm_i915_fence_reg *reg = NULL;
	struct drm_i915_gem_object *obj_priv = NULL;
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct drm_gem_object *obj = NULL;
	int i, avail, ret;

	/* First try to find a free reg */
	avail = 0;
	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
		reg = &dev_priv->fence_regs[i];
		if (!reg->obj)
			return i;

2489
		obj_priv = to_intel_bo(reg->obj);
2490 2491 2492 2493 2494 2495 2496 2497 2498
		if (!obj_priv->pin_count)
		    avail++;
	}

	if (avail == 0)
		return -ENOSPC;

	/* None available, try to steal one or wait for a user to finish */
	i = I915_FENCE_REG_NONE;
2499 2500 2501 2502
	list_for_each_entry(reg, &dev_priv->mm.fence_list,
			    lru_list) {
		obj = reg->obj;
		obj_priv = to_intel_bo(obj);
2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518

		if (obj_priv->pin_count)
			continue;

		/* found one! */
		i = obj_priv->fence_reg;
		break;
	}

	BUG_ON(i == I915_FENCE_REG_NONE);

	/* We only have a reference on obj from the active list. put_fence_reg
	 * might drop that one, causing a use-after-free in it. So hold a
	 * private reference to obj like the other callers of put_fence_reg
	 * (set_tiling ioctl) do. */
	drm_gem_object_reference(obj);
2519
	ret = i915_gem_object_put_fence_reg(obj, interruptible);
2520 2521 2522 2523 2524 2525 2526
	drm_gem_object_unreference(obj);
	if (ret != 0)
		return ret;

	return i;
}

2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539
/**
 * i915_gem_object_get_fence_reg - set up a fence reg for an object
 * @obj: object to map through a fence reg
 *
 * When mapping objects through the GTT, userspace wants to be able to write
 * to them without having to worry about swizzling if the object is tiled.
 *
 * This function walks the fence regs looking for a free one for @obj,
 * stealing one if it can't find any.
 *
 * It then sets up the reg based on the object's properties: address, pitch
 * and tiling format.
 */
2540
int
2541 2542
i915_gem_object_get_fence_reg(struct drm_gem_object *obj,
			      bool interruptible)
2543 2544
{
	struct drm_device *dev = obj->dev;
J
Jesse Barnes 已提交
2545
	struct drm_i915_private *dev_priv = dev->dev_private;
2546
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2547
	struct drm_i915_fence_reg *reg = NULL;
2548
	int ret;
2549

2550 2551
	/* Just update our place in the LRU if our fence is getting used. */
	if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2552 2553
		reg = &dev_priv->fence_regs[obj_priv->fence_reg];
		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2554 2555 2556
		return 0;
	}

2557 2558 2559 2560 2561
	switch (obj_priv->tiling_mode) {
	case I915_TILING_NONE:
		WARN(1, "allocating a fence for non-tiled object?\n");
		break;
	case I915_TILING_X:
2562 2563 2564 2565 2566
		if (!obj_priv->stride)
			return -EINVAL;
		WARN((obj_priv->stride & (512 - 1)),
		     "object 0x%08x is X tiled but has non-512B pitch\n",
		     obj_priv->gtt_offset);
2567 2568
		break;
	case I915_TILING_Y:
2569 2570 2571 2572 2573
		if (!obj_priv->stride)
			return -EINVAL;
		WARN((obj_priv->stride & (128 - 1)),
		     "object 0x%08x is Y tiled but has non-128B pitch\n",
		     obj_priv->gtt_offset);
2574 2575 2576
		break;
	}

2577
	ret = i915_find_fence_reg(dev, interruptible);
2578 2579
	if (ret < 0)
		return ret;
2580

2581 2582
	obj_priv->fence_reg = ret;
	reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2583
	list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2584

2585 2586
	reg->obj = obj;

2587 2588
	switch (INTEL_INFO(dev)->gen) {
	case 6:
2589
		sandybridge_write_fence_reg(reg);
2590 2591 2592
		break;
	case 5:
	case 4:
2593
		i965_write_fence_reg(reg);
2594 2595
		break;
	case 3:
2596
		i915_write_fence_reg(reg);
2597 2598
		break;
	case 2:
2599
		i830_write_fence_reg(reg);
2600 2601
		break;
	}
2602

2603 2604
	trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
			obj_priv->tiling_mode);
C
Chris Wilson 已提交
2605

2606
	return 0;
2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619
}

/**
 * i915_gem_clear_fence_reg - clear out fence register info
 * @obj: object to clear
 *
 * Zeroes out the fence register itself and clears out the associated
 * data structures in dev_priv and obj_priv.
 */
static void
i915_gem_clear_fence_reg(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
J
Jesse Barnes 已提交
2620
	drm_i915_private_t *dev_priv = dev->dev_private;
2621
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2622 2623
	struct drm_i915_fence_reg *reg =
		&dev_priv->fence_regs[obj_priv->fence_reg];
2624
	uint32_t fence_reg;
2625

2626 2627
	switch (INTEL_INFO(dev)->gen) {
	case 6:
2628 2629
		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
			     (obj_priv->fence_reg * 8), 0);
2630 2631 2632
		break;
	case 5:
	case 4:
2633
		I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2634 2635
		break;
	case 3:
2636
		if (obj_priv->fence_reg >= 8)
2637
			fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4;
2638
		else
2639 2640
	case 2:
			fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2641 2642

		I915_WRITE(fence_reg, 0);
2643
		break;
2644
	}
2645

2646
	reg->obj = NULL;
2647
	obj_priv->fence_reg = I915_FENCE_REG_NONE;
2648
	list_del_init(&reg->lru_list);
2649 2650
}

2651 2652 2653 2654
/**
 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
 * to the buffer to finish, and then resets the fence register.
 * @obj: tiled object holding a fence register.
2655
 * @bool: whether the wait upon the fence is interruptible
2656 2657 2658 2659 2660
 *
 * Zeroes out the fence register itself and clears out the associated
 * data structures in dev_priv and obj_priv.
 */
int
2661 2662
i915_gem_object_put_fence_reg(struct drm_gem_object *obj,
			      bool interruptible)
2663 2664
{
	struct drm_device *dev = obj->dev;
C
Chris Wilson 已提交
2665
	struct drm_i915_private *dev_priv = dev->dev_private;
2666
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
C
Chris Wilson 已提交
2667
	struct drm_i915_fence_reg *reg;
2668 2669 2670 2671

	if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
		return 0;

2672 2673 2674 2675 2676 2677
	/* If we've changed tiling, GTT-mappings of the object
	 * need to re-fault to ensure that the correct fence register
	 * setup is in place.
	 */
	i915_gem_release_mmap(obj);

2678 2679 2680 2681
	/* On the i915, GPU access to tiled buffers is via a fence,
	 * therefore we must wait for any outstanding access to complete
	 * before clearing the fence.
	 */
C
Chris Wilson 已提交
2682 2683
	reg = &dev_priv->fence_regs[obj_priv->fence_reg];
	if (reg->gpu) {
2684 2685
		int ret;

2686
		ret = i915_gem_object_flush_gpu_write_domain(obj, true);
2687
		if (ret)
2688 2689
			return ret;

2690
		ret = i915_gem_object_wait_rendering(obj, interruptible);
2691
		if (ret)
2692
			return ret;
C
Chris Wilson 已提交
2693 2694

		reg->gpu = false;
2695 2696
	}

2697
	i915_gem_object_flush_gtt_write_domain(obj);
2698
	i915_gem_clear_fence_reg(obj);
2699 2700 2701 2702

	return 0;
}

2703 2704 2705 2706
/**
 * Finds free space in the GTT aperture and binds the object there.
 */
static int
2707 2708 2709
i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
			    unsigned alignment,
			    bool mappable)
2710 2711 2712
{
	struct drm_device *dev = obj->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
2713
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2714
	struct drm_mm_node *free_space;
2715
	gfp_t gfpmask =  __GFP_NORETRY | __GFP_NOWARN;
2716
	int ret;
2717

C
Chris Wilson 已提交
2718
	if (obj_priv->madv != I915_MADV_WILLNEED) {
2719 2720 2721 2722
		DRM_ERROR("Attempting to bind a purgeable object\n");
		return -EINVAL;
	}

2723
	if (alignment == 0)
2724
		alignment = i915_gem_get_gtt_alignment(obj);
2725
	if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2726 2727 2728 2729
		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
		return -EINVAL;
	}

2730 2731 2732
	/* If the object is bigger than the entire aperture, reject it early
	 * before evicting everything in a vain attempt to find space.
	 */
2733 2734
	if (obj->size >
	    (mappable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2735 2736 2737 2738
		DRM_ERROR("Attempting to bind an object larger than the aperture\n");
		return -E2BIG;
	}

2739
 search_free:
2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762
	if (mappable)
		free_space =
			drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
						    obj->size, alignment, 0,
						    dev_priv->mm.gtt_mappable_end,
						    0);
	else
		free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
						obj->size, alignment, 0);

	if (free_space != NULL) {
		if (mappable)
			obj_priv->gtt_space =
				drm_mm_get_block_range_generic(free_space,
							       obj->size,
							       alignment, 0,
							       dev_priv->mm.gtt_mappable_end,
							       0);
		else
			obj_priv->gtt_space =
				drm_mm_get_block(free_space, obj->size,
						 alignment);
	}
2763 2764 2765 2766
	if (obj_priv->gtt_space == NULL) {
		/* If the gtt is empty and we're still having trouble
		 * fitting our object in, we're out of memory.
		 */
2767 2768
		ret = i915_gem_evict_something(dev, obj->size, alignment,
					       mappable);
2769
		if (ret)
2770
			return ret;
2771

2772 2773 2774
		goto search_free;
	}

2775
	ret = i915_gem_object_get_pages(obj, gfpmask);
2776 2777 2778
	if (ret) {
		drm_mm_put_block(obj_priv->gtt_space);
		obj_priv->gtt_space = NULL;
2779 2780 2781

		if (ret == -ENOMEM) {
			/* first try to clear up some space from the GTT */
2782
			ret = i915_gem_evict_something(dev, obj->size,
2783
						       alignment, mappable);
2784 2785
			if (ret) {
				/* now try to shrink everyone else */
2786 2787 2788
				if (gfpmask) {
					gfpmask = 0;
					goto search_free;
2789 2790 2791 2792 2793 2794 2795 2796
				}

				return ret;
			}

			goto search_free;
		}

2797 2798 2799 2800 2801 2802 2803
		return ret;
	}

	/* Create an AGP memory structure pointing at our pages, and bind it
	 * into the GTT.
	 */
	obj_priv->agp_mem = drm_agp_bind_pages(dev,
2804
					       obj_priv->pages,
2805
					       obj->size >> PAGE_SHIFT,
2806
					       obj_priv->gtt_space->start,
2807
					       obj_priv->agp_type);
2808
	if (obj_priv->agp_mem == NULL) {
2809
		i915_gem_object_put_pages(obj);
2810 2811
		drm_mm_put_block(obj_priv->gtt_space);
		obj_priv->gtt_space = NULL;
2812

2813 2814
		ret = i915_gem_evict_something(dev, obj->size, alignment,
					       mappable);
2815
		if (ret)
2816 2817 2818
			return ret;

		goto search_free;
2819 2820
	}

2821 2822
	obj_priv->gtt_offset = obj_priv->gtt_space->start;

2823
	/* keep track of bounds object by adding it to the inactive list */
2824
	list_add_tail(&obj_priv->mm_list, &dev_priv->mm.inactive_list);
2825
	i915_gem_info_add_gtt(dev_priv, obj);
2826

2827 2828 2829 2830
	/* Assert that the object is not currently in any GPU domain. As it
	 * wasn't in the GTT, there shouldn't be any way it could have been in
	 * a GPU cache
	 */
2831 2832
	BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
	BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2833

2834
	trace_i915_gem_object_bind(obj, obj_priv->gtt_offset, mappable);
C
Chris Wilson 已提交
2835

2836 2837 2838 2839 2840 2841
	return 0;
}

void
i915_gem_clflush_object(struct drm_gem_object *obj)
{
2842
	struct drm_i915_gem_object	*obj_priv = to_intel_bo(obj);
2843 2844 2845 2846 2847

	/* If we don't have a page list set up, then we're not pinned
	 * to GPU, and we can ignore the cache flush because it'll happen
	 * again at bind time.
	 */
2848
	if (obj_priv->pages == NULL)
2849 2850
		return;

C
Chris Wilson 已提交
2851
	trace_i915_gem_object_clflush(obj);
2852

2853
	drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2854 2855
}

2856
/** Flushes any GPU write domain for the object if it's dirty. */
2857
static int
2858 2859
i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj,
				       bool pipelined)
2860 2861
{
	struct drm_device *dev = obj->dev;
C
Chris Wilson 已提交
2862
	uint32_t old_write_domain;
2863 2864

	if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2865
		return 0;
2866 2867

	/* Queue the GPU write cache flushing we need. */
C
Chris Wilson 已提交
2868
	old_write_domain = obj->write_domain;
2869
	i915_gem_flush_ring(dev, NULL,
2870 2871
			    to_intel_bo(obj)->ring,
			    0, obj->write_domain);
2872
	BUG_ON(obj->write_domain);
C
Chris Wilson 已提交
2873 2874 2875 2876

	trace_i915_gem_object_change_domain(obj,
					    obj->read_domains,
					    old_write_domain);
2877 2878 2879 2880

	if (pipelined)
		return 0;

2881
	return i915_gem_object_wait_rendering(obj, true);
2882 2883 2884 2885 2886 2887
}

/** Flushes the GTT write domain for the object if it's dirty. */
static void
i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
{
C
Chris Wilson 已提交
2888 2889
	uint32_t old_write_domain;

2890 2891 2892 2893 2894 2895 2896
	if (obj->write_domain != I915_GEM_DOMAIN_GTT)
		return;

	/* No actual flushing is required for the GTT write domain.   Writes
	 * to it immediately go to main memory as far as we know, so there's
	 * no chipset flush.  It also doesn't land in render cache.
	 */
C
Chris Wilson 已提交
2897
	old_write_domain = obj->write_domain;
2898
	obj->write_domain = 0;
C
Chris Wilson 已提交
2899 2900 2901 2902

	trace_i915_gem_object_change_domain(obj,
					    obj->read_domains,
					    old_write_domain);
2903 2904 2905 2906 2907 2908 2909
}

/** Flushes the CPU write domain for the object if it's dirty. */
static void
i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
C
Chris Wilson 已提交
2910
	uint32_t old_write_domain;
2911 2912 2913 2914 2915 2916

	if (obj->write_domain != I915_GEM_DOMAIN_CPU)
		return;

	i915_gem_clflush_object(obj);
	drm_agp_chipset_flush(dev);
C
Chris Wilson 已提交
2917
	old_write_domain = obj->write_domain;
2918
	obj->write_domain = 0;
C
Chris Wilson 已提交
2919 2920 2921 2922

	trace_i915_gem_object_change_domain(obj,
					    obj->read_domains,
					    old_write_domain);
2923 2924
}

2925 2926 2927 2928 2929 2930
/**
 * Moves a single object to the GTT read, and possibly write domain.
 *
 * This function returns when the move is complete, including waiting on
 * flushes to occur.
 */
J
Jesse Barnes 已提交
2931
int
2932 2933
i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
{
2934
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
C
Chris Wilson 已提交
2935
	uint32_t old_write_domain, old_read_domains;
2936
	int ret;
2937

2938 2939 2940 2941
	/* Not valid to be called on unbound objects. */
	if (obj_priv->gtt_space == NULL)
		return -EINVAL;

2942
	ret = i915_gem_object_flush_gpu_write_domain(obj, false);
2943 2944 2945
	if (ret != 0)
		return ret;

2946
	i915_gem_object_flush_cpu_write_domain(obj);
C
Chris Wilson 已提交
2947

2948
	if (write) {
2949
		ret = i915_gem_object_wait_rendering(obj, true);
2950 2951 2952
		if (ret)
			return ret;
	}
2953

C
Chris Wilson 已提交
2954 2955 2956
	old_write_domain = obj->write_domain;
	old_read_domains = obj->read_domains;

2957 2958 2959 2960 2961 2962
	/* It should now be out of any other write domains, and we can update
	 * the domain values for our changes.
	 */
	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
	obj->read_domains |= I915_GEM_DOMAIN_GTT;
	if (write) {
2963
		obj->read_domains = I915_GEM_DOMAIN_GTT;
2964 2965
		obj->write_domain = I915_GEM_DOMAIN_GTT;
		obj_priv->dirty = 1;
2966 2967
	}

C
Chris Wilson 已提交
2968 2969 2970 2971
	trace_i915_gem_object_change_domain(obj,
					    old_read_domains,
					    old_write_domain);

2972 2973 2974
	return 0;
}

2975 2976 2977 2978 2979
/*
 * Prepare buffer for display plane. Use uninterruptible for possible flush
 * wait, as in modesetting process we're not supposed to be interrupted.
 */
int
2980 2981
i915_gem_object_set_to_display_plane(struct drm_gem_object *obj,
				     bool pipelined)
2982
{
2983
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2984
	uint32_t old_read_domains;
2985 2986 2987 2988 2989 2990
	int ret;

	/* Not valid to be called on unbound objects. */
	if (obj_priv->gtt_space == NULL)
		return -EINVAL;

2991
	ret = i915_gem_object_flush_gpu_write_domain(obj, true);
2992 2993
	if (ret)
		return ret;
2994

2995 2996 2997 2998
	/* Currently, we are always called from an non-interruptible context. */
	if (!pipelined) {
		ret = i915_gem_object_wait_rendering(obj, false);
		if (ret)
2999 3000 3001
			return ret;
	}

3002 3003
	i915_gem_object_flush_cpu_write_domain(obj);

3004
	old_read_domains = obj->read_domains;
3005
	obj->read_domains |= I915_GEM_DOMAIN_GTT;
3006 3007 3008

	trace_i915_gem_object_change_domain(obj,
					    old_read_domains,
3009
					    obj->write_domain);
3010 3011 3012 3013

	return 0;
}

3014 3015 3016 3017 3018 3019 3020 3021 3022
/**
 * Moves a single object to the CPU read, and possibly write domain.
 *
 * This function returns when the move is complete, including waiting on
 * flushes to occur.
 */
static int
i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
{
C
Chris Wilson 已提交
3023
	uint32_t old_write_domain, old_read_domains;
3024 3025
	int ret;

3026
	ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3027 3028
	if (ret != 0)
		return ret;
3029

3030
	i915_gem_object_flush_gtt_write_domain(obj);
3031

3032 3033
	/* If we have a partially-valid cache of the object in the CPU,
	 * finish invalidating it and free the per-page flags.
3034
	 */
3035
	i915_gem_object_set_to_full_cpu_read_domain(obj);
3036

3037
	if (write) {
3038
		ret = i915_gem_object_wait_rendering(obj, true);
3039 3040 3041 3042
		if (ret)
			return ret;
	}

C
Chris Wilson 已提交
3043 3044 3045
	old_write_domain = obj->write_domain;
	old_read_domains = obj->read_domains;

3046 3047
	/* Flush the CPU cache if it's still invalid. */
	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
3048 3049
		i915_gem_clflush_object(obj);

3050
		obj->read_domains |= I915_GEM_DOMAIN_CPU;
3051 3052 3053 3054 3055
	}

	/* It should now be out of any other write domains, and we can update
	 * the domain values for our changes.
	 */
3056 3057 3058 3059 3060 3061
	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);

	/* If we're writing through the CPU, then the GPU read domains will
	 * need to be invalidated at next use.
	 */
	if (write) {
3062
		obj->read_domains = I915_GEM_DOMAIN_CPU;
3063 3064
		obj->write_domain = I915_GEM_DOMAIN_CPU;
	}
3065

C
Chris Wilson 已提交
3066 3067 3068 3069
	trace_i915_gem_object_change_domain(obj,
					    old_read_domains,
					    old_write_domain);

3070 3071 3072
	return 0;
}

3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183
/*
 * Set the next domain for the specified object. This
 * may not actually perform the necessary flushing/invaliding though,
 * as that may want to be batched with other set_domain operations
 *
 * This is (we hope) the only really tricky part of gem. The goal
 * is fairly simple -- track which caches hold bits of the object
 * and make sure they remain coherent. A few concrete examples may
 * help to explain how it works. For shorthand, we use the notation
 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
 * a pair of read and write domain masks.
 *
 * Case 1: the batch buffer
 *
 *	1. Allocated
 *	2. Written by CPU
 *	3. Mapped to GTT
 *	4. Read by GPU
 *	5. Unmapped from GTT
 *	6. Freed
 *
 *	Let's take these a step at a time
 *
 *	1. Allocated
 *		Pages allocated from the kernel may still have
 *		cache contents, so we set them to (CPU, CPU) always.
 *	2. Written by CPU (using pwrite)
 *		The pwrite function calls set_domain (CPU, CPU) and
 *		this function does nothing (as nothing changes)
 *	3. Mapped by GTT
 *		This function asserts that the object is not
 *		currently in any GPU-based read or write domains
 *	4. Read by GPU
 *		i915_gem_execbuffer calls set_domain (COMMAND, 0).
 *		As write_domain is zero, this function adds in the
 *		current read domains (CPU+COMMAND, 0).
 *		flush_domains is set to CPU.
 *		invalidate_domains is set to COMMAND
 *		clflush is run to get data out of the CPU caches
 *		then i915_dev_set_domain calls i915_gem_flush to
 *		emit an MI_FLUSH and drm_agp_chipset_flush
 *	5. Unmapped from GTT
 *		i915_gem_object_unbind calls set_domain (CPU, CPU)
 *		flush_domains and invalidate_domains end up both zero
 *		so no flushing/invalidating happens
 *	6. Freed
 *		yay, done
 *
 * Case 2: The shared render buffer
 *
 *	1. Allocated
 *	2. Mapped to GTT
 *	3. Read/written by GPU
 *	4. set_domain to (CPU,CPU)
 *	5. Read/written by CPU
 *	6. Read/written by GPU
 *
 *	1. Allocated
 *		Same as last example, (CPU, CPU)
 *	2. Mapped to GTT
 *		Nothing changes (assertions find that it is not in the GPU)
 *	3. Read/written by GPU
 *		execbuffer calls set_domain (RENDER, RENDER)
 *		flush_domains gets CPU
 *		invalidate_domains gets GPU
 *		clflush (obj)
 *		MI_FLUSH and drm_agp_chipset_flush
 *	4. set_domain (CPU, CPU)
 *		flush_domains gets GPU
 *		invalidate_domains gets CPU
 *		wait_rendering (obj) to make sure all drawing is complete.
 *		This will include an MI_FLUSH to get the data from GPU
 *		to memory
 *		clflush (obj) to invalidate the CPU cache
 *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
 *	5. Read/written by CPU
 *		cache lines are loaded and dirtied
 *	6. Read written by GPU
 *		Same as last GPU access
 *
 * Case 3: The constant buffer
 *
 *	1. Allocated
 *	2. Written by CPU
 *	3. Read by GPU
 *	4. Updated (written) by CPU again
 *	5. Read by GPU
 *
 *	1. Allocated
 *		(CPU, CPU)
 *	2. Written by CPU
 *		(CPU, CPU)
 *	3. Read by GPU
 *		(CPU+RENDER, 0)
 *		flush_domains = CPU
 *		invalidate_domains = RENDER
 *		clflush (obj)
 *		MI_FLUSH
 *		drm_agp_chipset_flush
 *	4. Updated (written) by CPU again
 *		(CPU, CPU)
 *		flush_domains = 0 (no previous write domain)
 *		invalidate_domains = 0 (no new read domains)
 *	5. Read by GPU
 *		(CPU+RENDER, 0)
 *		flush_domains = CPU
 *		invalidate_domains = RENDER
 *		clflush (obj)
 *		MI_FLUSH
 *		drm_agp_chipset_flush
 */
3184
static void
3185 3186
i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj,
				  struct intel_ring_buffer *ring)
3187 3188
{
	struct drm_device		*dev = obj->dev;
3189
	struct drm_i915_private		*dev_priv = dev->dev_private;
3190
	struct drm_i915_gem_object	*obj_priv = to_intel_bo(obj);
3191 3192
	uint32_t			invalidate_domains = 0;
	uint32_t			flush_domains = 0;
3193

3194 3195 3196 3197
	/*
	 * If the object isn't moving to a new write domain,
	 * let the object stay in multiple read domains
	 */
3198 3199
	if (obj->pending_write_domain == 0)
		obj->pending_read_domains |= obj->read_domains;
3200 3201 3202 3203 3204 3205 3206

	/*
	 * Flush the current write domain if
	 * the new read domains don't match. Invalidate
	 * any read domains which differ from the old
	 * write domain
	 */
3207 3208
	if (obj->write_domain &&
	    obj->write_domain != obj->pending_read_domains) {
3209
		flush_domains |= obj->write_domain;
3210 3211
		invalidate_domains |=
			obj->pending_read_domains & ~obj->write_domain;
3212 3213 3214 3215 3216
	}
	/*
	 * Invalidate any read caches which may have
	 * stale data. That is, any new read domains.
	 */
3217
	invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
3218
	if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
3219 3220
		i915_gem_clflush_object(obj);

3221 3222 3223 3224 3225 3226 3227 3228
	/* The actual obj->write_domain will be updated with
	 * pending_write_domain after we emit the accumulated flush for all
	 * of our domain changes in execbuffers (which clears objects'
	 * write_domains).  So if we have a current write domain that we
	 * aren't changing, set pending_write_domain to that.
	 */
	if (flush_domains == 0 && obj->pending_write_domain == 0)
		obj->pending_write_domain = obj->write_domain;
3229 3230 3231

	dev->invalidate_domains |= invalidate_domains;
	dev->flush_domains |= flush_domains;
3232
	if (flush_domains & I915_GEM_GPU_DOMAINS)
3233
		dev_priv->mm.flush_rings |= obj_priv->ring->id;
3234 3235
	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
		dev_priv->mm.flush_rings |= ring->id;
3236 3237 3238
}

/**
3239
 * Moves the object from a partially CPU read to a full one.
3240
 *
3241 3242
 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3243
 */
3244 3245
static void
i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3246
{
3247
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3248

3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259
	if (!obj_priv->page_cpu_valid)
		return;

	/* If we're partially in the CPU read domain, finish moving it in.
	 */
	if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
		int i;

		for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
			if (obj_priv->page_cpu_valid[i])
				continue;
3260
			drm_clflush_pages(obj_priv->pages + i, 1);
3261 3262 3263 3264 3265 3266
		}
	}

	/* Free the page_cpu_valid mappings which are now stale, whether
	 * or not we've got I915_GEM_DOMAIN_CPU.
	 */
3267
	kfree(obj_priv->page_cpu_valid);
3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286
	obj_priv->page_cpu_valid = NULL;
}

/**
 * Set the CPU read domain on a range of the object.
 *
 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
 * not entirely valid.  The page_cpu_valid member of the object flags which
 * pages have been flushed, and will be respected by
 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
 * of the whole object.
 *
 * This function returns when the move is complete, including waiting on
 * flushes to occur.
 */
static int
i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
					  uint64_t offset, uint64_t size)
{
3287
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
C
Chris Wilson 已提交
3288
	uint32_t old_read_domains;
3289
	int i, ret;
3290

3291 3292
	if (offset == 0 && size == obj->size)
		return i915_gem_object_set_to_cpu_domain(obj, 0);
3293

3294
	ret = i915_gem_object_flush_gpu_write_domain(obj, false);
3295
	if (ret != 0)
3296
		return ret;
3297 3298 3299 3300 3301 3302
	i915_gem_object_flush_gtt_write_domain(obj);

	/* If we're already fully in the CPU read domain, we're done. */
	if (obj_priv->page_cpu_valid == NULL &&
	    (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
		return 0;
3303

3304 3305 3306
	/* Otherwise, create/clear the per-page CPU read domain flag if we're
	 * newly adding I915_GEM_DOMAIN_CPU
	 */
3307
	if (obj_priv->page_cpu_valid == NULL) {
3308 3309
		obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
						   GFP_KERNEL);
3310 3311 3312 3313
		if (obj_priv->page_cpu_valid == NULL)
			return -ENOMEM;
	} else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
		memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3314 3315 3316 3317

	/* Flush the cache on any pages that are still invalid from the CPU's
	 * perspective.
	 */
3318 3319
	for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
	     i++) {
3320 3321 3322
		if (obj_priv->page_cpu_valid[i])
			continue;

3323
		drm_clflush_pages(obj_priv->pages + i, 1);
3324 3325 3326 3327

		obj_priv->page_cpu_valid[i] = 1;
	}

3328 3329 3330 3331 3332
	/* It should now be out of any other write domains, and we can update
	 * the domain values for our changes.
	 */
	BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);

C
Chris Wilson 已提交
3333
	old_read_domains = obj->read_domains;
3334 3335
	obj->read_domains |= I915_GEM_DOMAIN_CPU;

C
Chris Wilson 已提交
3336 3337 3338 3339
	trace_i915_gem_object_change_domain(obj,
					    old_read_domains,
					    obj->write_domain);

3340 3341 3342 3343 3344 3345 3346
	return 0;
}

/**
 * Pin an object to the GTT and evaluate the relocations landing in it.
 */
static int
3347 3348 3349
i915_gem_execbuffer_relocate(struct drm_i915_gem_object *obj,
			     struct drm_file *file_priv,
			     struct drm_i915_gem_exec_object2 *entry)
3350
{
3351
	struct drm_device *dev = obj->base.dev;
3352
	drm_i915_private_t *dev_priv = dev->dev_private;
3353
	struct drm_i915_gem_relocation_entry __user *user_relocs;
3354 3355 3356
	struct drm_gem_object *target_obj = NULL;
	uint32_t target_handle = 0;
	int i, ret = 0;
3357

3358
	user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
3359
	for (i = 0; i < entry->relocation_count; i++) {
3360
		struct drm_i915_gem_relocation_entry reloc;
3361
		uint32_t target_offset;
3362

3363 3364 3365 3366 3367
		if (__copy_from_user_inatomic(&reloc,
					      user_relocs+i,
					      sizeof(reloc))) {
			ret = -EFAULT;
			break;
J
Jesse Barnes 已提交
3368 3369
		}

3370 3371
		if (reloc.target_handle != target_handle) {
			drm_gem_object_unreference(target_obj);
3372

3373 3374 3375 3376 3377 3378 3379 3380
			target_obj = drm_gem_object_lookup(dev, file_priv,
							   reloc.target_handle);
			if (target_obj == NULL) {
				ret = -ENOENT;
				break;
			}

			target_handle = reloc.target_handle;
3381
		}
3382
		target_offset = to_intel_bo(target_obj)->gtt_offset;
3383

3384 3385 3386 3387 3388 3389
#if WATCH_RELOC
		DRM_INFO("%s: obj %p offset %08x target %d "
			 "read %08x write %08x gtt %08x "
			 "presumed %08x delta %08x\n",
			 __func__,
			 obj,
3390 3391 3392 3393
			 (int) reloc.offset,
			 (int) reloc.target_handle,
			 (int) reloc.read_domains,
			 (int) reloc.write_domain,
3394
			 (int) target_offset,
3395 3396
			 (int) reloc.presumed_offset,
			 reloc.delta);
3397 3398
#endif

3399 3400 3401
		/* The target buffer should have appeared before us in the
		 * exec_object list, so it should have a GTT space bound by now.
		 */
3402
		if (target_offset == 0) {
3403
			DRM_ERROR("No GTT space found for object %d\n",
3404
				  reloc.target_handle);
3405 3406
			ret = -EINVAL;
			break;
3407 3408
		}

3409
		/* Validate that the target is in a valid r/w GPU domain */
3410
		if (reloc.write_domain & (reloc.write_domain - 1)) {
3411 3412 3413
			DRM_ERROR("reloc with multiple write domains: "
				  "obj %p target %d offset %d "
				  "read %08x write %08x",
3414 3415 3416 3417
				  obj, reloc.target_handle,
				  (int) reloc.offset,
				  reloc.read_domains,
				  reloc.write_domain);
3418 3419
			ret = -EINVAL;
			break;
3420
		}
3421 3422
		if (reloc.write_domain & I915_GEM_DOMAIN_CPU ||
		    reloc.read_domains & I915_GEM_DOMAIN_CPU) {
3423 3424 3425
			DRM_ERROR("reloc with read/write CPU domains: "
				  "obj %p target %d offset %d "
				  "read %08x write %08x",
3426 3427 3428 3429
				  obj, reloc.target_handle,
				  (int) reloc.offset,
				  reloc.read_domains,
				  reloc.write_domain);
3430 3431
			ret = -EINVAL;
			break;
3432
		}
3433 3434
		if (reloc.write_domain && target_obj->pending_write_domain &&
		    reloc.write_domain != target_obj->pending_write_domain) {
3435 3436 3437
			DRM_ERROR("Write domain conflict: "
				  "obj %p target %d offset %d "
				  "new %08x old %08x\n",
3438 3439 3440
				  obj, reloc.target_handle,
				  (int) reloc.offset,
				  reloc.write_domain,
3441
				  target_obj->pending_write_domain);
3442 3443
			ret = -EINVAL;
			break;
3444 3445
		}

3446
		target_obj->pending_read_domains |= reloc.read_domains;
3447
		target_obj->pending_write_domain |= reloc.write_domain;
3448 3449 3450 3451

		/* If the relocation already has the right value in it, no
		 * more work needs to be done.
		 */
3452
		if (target_offset == reloc.presumed_offset)
3453 3454
			continue;

3455
		/* Check that the relocation address is valid... */
3456
		if (reloc.offset > obj->base.size - 4) {
3457 3458
			DRM_ERROR("Relocation beyond object bounds: "
				  "obj %p target %d offset %d size %d.\n",
3459
				  obj, reloc.target_handle,
3460 3461 3462
				  (int) reloc.offset, (int) obj->base.size);
			ret = -EINVAL;
			break;
3463
		}
3464
		if (reloc.offset & 3) {
3465 3466
			DRM_ERROR("Relocation not 4-byte aligned: "
				  "obj %p target %d offset %d.\n",
3467 3468
				  obj, reloc.target_handle,
				  (int) reloc.offset);
3469 3470
			ret = -EINVAL;
			break;
3471 3472 3473
		}

		/* and points to somewhere within the target object. */
3474
		if (reloc.delta >= target_obj->size) {
3475 3476
			DRM_ERROR("Relocation beyond target object bounds: "
				  "obj %p target %d delta %d size %d.\n",
3477 3478
				  obj, reloc.target_handle,
				  (int) reloc.delta, (int) target_obj->size);
3479 3480
			ret = -EINVAL;
			break;
3481 3482
		}

3483 3484
		reloc.delta += target_offset;
		if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
3485 3486
			uint32_t page_offset = reloc.offset & ~PAGE_MASK;
			char *vaddr;
3487

3488
			vaddr = kmap_atomic(obj->pages[reloc.offset >> PAGE_SHIFT]);
3489
			*(uint32_t *)(vaddr + page_offset) = reloc.delta;
3490
			kunmap_atomic(vaddr);
3491 3492 3493
		} else {
			uint32_t __iomem *reloc_entry;
			void __iomem *reloc_page;
3494

3495 3496 3497
			ret = i915_gem_object_set_to_gtt_domain(&obj->base, 1);
			if (ret)
				break;
3498

3499
			/* Map the page containing the relocation we're going to perform.  */
3500
			reloc.offset += obj->gtt_offset;
3501
			reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3502
							      reloc.offset & PAGE_MASK);
3503 3504 3505
			reloc_entry = (uint32_t __iomem *)
				(reloc_page + (reloc.offset & ~PAGE_MASK));
			iowrite32(reloc.delta, reloc_entry);
3506
			io_mapping_unmap_atomic(reloc_page);
3507
		}
3508

3509 3510 3511 3512 3513 3514 3515 3516
		/* and update the user's relocation entry */
		reloc.presumed_offset = target_offset;
		if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
					      &reloc.presumed_offset,
					      sizeof(reloc.presumed_offset))) {
		    ret = -EFAULT;
		    break;
		}
3517 3518
	}

3519
	drm_gem_object_unreference(target_obj);
3520 3521 3522
	return ret;
}

3523
static int
3524 3525 3526 3527 3528
i915_gem_execbuffer_pin(struct drm_device *dev,
			struct drm_file *file,
			struct drm_gem_object **object_list,
			struct drm_i915_gem_exec_object2 *exec_list,
			int count)
3529
{
3530 3531
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret, i, retry;
3532

3533 3534 3535 3536 3537
	/* attempt to pin all of the buffers into the GTT */
	for (retry = 0; retry < 2; retry++) {
		ret = 0;
		for (i = 0; i < count; i++) {
			struct drm_i915_gem_exec_object2 *entry = &exec_list[i];
3538
			struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
3539 3540 3541 3542
			bool need_fence =
				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
				obj->tiling_mode != I915_TILING_NONE;

3543 3544 3545 3546
			/* g33/pnv can't fence buffers in the unmappable part */
			bool need_mappable =
				entry->relocation_count ? true : need_fence;

3547 3548 3549 3550 3551 3552 3553 3554
			/* Check fence reg constraints and rebind if necessary */
			if (need_fence &&
			    !i915_gem_object_fence_offset_ok(&obj->base,
							     obj->tiling_mode)) {
				ret = i915_gem_object_unbind(&obj->base);
				if (ret)
					break;
			}
3555

3556
			ret = i915_gem_object_pin(&obj->base,
3557 3558
						  entry->alignment,
						  need_mappable);
3559 3560
			if (ret)
				break;
3561

3562 3563 3564 3565 3566 3567 3568 3569 3570 3571
			/*
			 * Pre-965 chips need a fence register set up in order
			 * to properly handle blits to/from tiled surfaces.
			 */
			if (need_fence) {
				ret = i915_gem_object_get_fence_reg(&obj->base, true);
				if (ret) {
					i915_gem_object_unpin(&obj->base);
					break;
				}
3572

3573 3574
				dev_priv->fence_regs[obj->fence_reg].gpu = true;
			}
3575

3576
			entry->offset = obj->gtt_offset;
3577 3578
		}

3579 3580 3581 3582 3583
		while (i--)
			i915_gem_object_unpin(object_list[i]);

		if (ret == 0)
			break;
3584

3585 3586 3587 3588 3589 3590
		if (ret != -ENOSPC || retry)
			return ret;

		ret = i915_gem_evict_everything(dev);
		if (ret)
			return ret;
3591 3592
	}

3593
	return 0;
3594 3595
}

3596 3597 3598
/* Throttle our rendering by waiting until the ring has completed our requests
 * emitted over 20 msec ago.
 *
3599 3600 3601 3602
 * Note that if we were to use the current jiffies each time around the loop,
 * we wouldn't escape the function with any frames outstanding if the time to
 * render a frame was over 20ms.
 *
3603 3604 3605
 * This should get us reasonable parallelism between CPU and GPU but also
 * relatively low latency when blocking on a particular request to finish.
 */
3606
static int
3607
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
3608
{
3609 3610
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct drm_i915_file_private *file_priv = file->driver_priv;
3611
	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3612 3613 3614 3615
	struct drm_i915_gem_request *request;
	struct intel_ring_buffer *ring = NULL;
	u32 seqno = 0;
	int ret;
3616

3617
	spin_lock(&file_priv->mm.lock);
3618
	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3619 3620
		if (time_after_eq(request->emitted_jiffies, recent_enough))
			break;
3621

3622 3623
		ring = request->ring;
		seqno = request->seqno;
3624
	}
3625
	spin_unlock(&file_priv->mm.lock);
3626

3627 3628
	if (seqno == 0)
		return 0;
3629

3630
	ret = 0;
3631
	if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
3632 3633 3634 3635 3636
		/* And wait for the seqno passing without holding any locks and
		 * causing extra latency for others. This is safe as the irq
		 * generation is designed to be run atomically and so is
		 * lockless.
		 */
3637
		ring->user_irq_get(ring);
3638
		ret = wait_event_interruptible(ring->irq_queue,
3639
					       i915_seqno_passed(ring->get_seqno(ring), seqno)
3640
					       || atomic_read(&dev_priv->mm.wedged));
3641
		ring->user_irq_put(ring);
3642

3643 3644
		if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
			ret = -EIO;
3645 3646
	}

3647 3648
	if (ret == 0)
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3649 3650 3651 3652

	return ret;
}

3653
static int
3654 3655
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
			  uint64_t exec_offset)
3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670
{
	uint32_t exec_start, exec_len;

	exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
	exec_len = (uint32_t) exec->batch_len;

	if ((exec_start | exec_len) & 0x7)
		return -EINVAL;

	if (!exec_start)
		return -EINVAL;

	return 0;
}

3671
static int
3672 3673
validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
		   int count)
3674
{
3675
	int i;
3676

3677 3678 3679
	for (i = 0; i < count; i++) {
		char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
		size_t length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry);
3680

3681 3682
		if (!access_ok(VERIFY_READ, ptr, length))
			return -EFAULT;
3683

3684 3685 3686 3687
		/* we may also need to update the presumed offsets */
		if (!access_ok(VERIFY_WRITE, ptr, length))
			return -EFAULT;

3688 3689
		if (fault_in_pages_readable(ptr, length))
			return -EFAULT;
3690 3691
	}

3692
	return 0;
3693 3694
}

C
Chris Wilson 已提交
3695
static int
J
Jesse Barnes 已提交
3696
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3697
		       struct drm_file *file,
J
Jesse Barnes 已提交
3698 3699
		       struct drm_i915_gem_execbuffer2 *args,
		       struct drm_i915_gem_exec_object2 *exec_list)
3700 3701 3702 3703
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_gem_object **object_list = NULL;
	struct drm_gem_object *batch_obj;
3704
	struct drm_clip_rect *cliprects = NULL;
C
Chris Wilson 已提交
3705
	struct drm_i915_gem_request *request = NULL;
3706
	int ret, i, flips;
3707 3708
	uint64_t exec_offset;

3709 3710
	struct intel_ring_buffer *ring = NULL;

3711 3712 3713 3714
	ret = i915_gem_check_is_wedged(dev);
	if (ret)
		return ret;

3715 3716 3717 3718
	ret = validate_exec_list(exec_list, args->buffer_count);
	if (ret)
		return ret;

3719 3720 3721 3722
#if WATCH_EXEC
	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
#endif
3723 3724 3725 3726 3727 3728
	switch (args->flags & I915_EXEC_RING_MASK) {
	case I915_EXEC_DEFAULT:
	case I915_EXEC_RENDER:
		ring = &dev_priv->render_ring;
		break;
	case I915_EXEC_BSD:
3729
		if (!HAS_BSD(dev)) {
3730
			DRM_ERROR("execbuf with invalid ring (BSD)\n");
3731 3732 3733
			return -EINVAL;
		}
		ring = &dev_priv->bsd_ring;
3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745
		break;
	case I915_EXEC_BLT:
		if (!HAS_BLT(dev)) {
			DRM_ERROR("execbuf with invalid ring (BLT)\n");
			return -EINVAL;
		}
		ring = &dev_priv->blt_ring;
		break;
	default:
		DRM_ERROR("execbuf with unknown ring: %d\n",
			  (int)(args->flags & I915_EXEC_RING_MASK));
		return -EINVAL;
3746 3747
	}

3748 3749 3750 3751
	if (args->buffer_count < 1) {
		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
		return -EINVAL;
	}
3752
	object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
J
Jesse Barnes 已提交
3753 3754
	if (object_list == NULL) {
		DRM_ERROR("Failed to allocate object list for %d buffers\n",
3755 3756 3757 3758 3759
			  args->buffer_count);
		ret = -ENOMEM;
		goto pre_mutex_err;
	}

3760
	if (args->num_cliprects != 0) {
3761 3762
		cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
				    GFP_KERNEL);
3763 3764
		if (cliprects == NULL) {
			ret = -ENOMEM;
3765
			goto pre_mutex_err;
3766
		}
3767 3768 3769 3770 3771 3772 3773 3774

		ret = copy_from_user(cliprects,
				     (struct drm_clip_rect __user *)
				     (uintptr_t) args->cliprects_ptr,
				     sizeof(*cliprects) * args->num_cliprects);
		if (ret != 0) {
			DRM_ERROR("copy %d cliprects failed: %d\n",
				  args->num_cliprects, ret);
3775
			ret = -EFAULT;
3776 3777 3778 3779
			goto pre_mutex_err;
		}
	}

C
Chris Wilson 已提交
3780 3781 3782
	request = kzalloc(sizeof(*request), GFP_KERNEL);
	if (request == NULL) {
		ret = -ENOMEM;
3783
		goto pre_mutex_err;
C
Chris Wilson 已提交
3784
	}
3785

3786 3787
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
3788
		goto pre_mutex_err;
3789 3790 3791

	if (dev_priv->mm.suspended) {
		mutex_unlock(&dev->struct_mutex);
3792 3793
		ret = -EBUSY;
		goto pre_mutex_err;
3794 3795
	}

3796
	/* Look up object handles */
3797
	for (i = 0; i < args->buffer_count; i++) {
3798 3799
		struct drm_i915_gem_object *obj_priv;

3800
		object_list[i] = drm_gem_object_lookup(dev, file,
3801 3802 3803 3804
						       exec_list[i].handle);
		if (object_list[i] == NULL) {
			DRM_ERROR("Invalid object handle %d at index %d\n",
				   exec_list[i].handle, i);
3805 3806
			/* prevent error path from reading uninitialized data */
			args->buffer_count = i + 1;
3807
			ret = -ENOENT;
3808 3809
			goto err;
		}
3810

3811
		obj_priv = to_intel_bo(object_list[i]);
3812 3813 3814
		if (obj_priv->in_execbuffer) {
			DRM_ERROR("Object %p appears more than once in object list\n",
				   object_list[i]);
3815 3816
			/* prevent error path from reading uninitialized data */
			args->buffer_count = i + 1;
3817
			ret = -EINVAL;
3818 3819 3820
			goto err;
		}
		obj_priv->in_execbuffer = true;
3821
	}
3822

3823 3824 3825 3826 3827 3828
	/* Move the objects en-masse into the GTT, evicting if necessary. */
	ret = i915_gem_execbuffer_pin(dev, file,
				      object_list, exec_list,
				      args->buffer_count);
	if (ret)
		goto err;
3829

3830 3831 3832 3833 3834 3835 3836
	/* The objects are in their final locations, apply the relocations. */
	for (i = 0; i < args->buffer_count; i++) {
		struct drm_i915_gem_object *obj = to_intel_bo(object_list[i]);
		obj->base.pending_read_domains = 0;
		obj->base.pending_write_domain = 0;
		ret = i915_gem_execbuffer_relocate(obj, file, &exec_list[i]);
		if (ret)
3837
			goto err;
3838 3839 3840 3841
	}

	/* Set the pending read domains for the batch buffer to COMMAND */
	batch_obj = object_list[args->buffer_count-1];
3842 3843 3844 3845 3846 3847
	if (batch_obj->pending_write_domain) {
		DRM_ERROR("Attempting to use self-modifying batch buffer\n");
		ret = -EINVAL;
		goto err;
	}
	batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3848

3849 3850 3851
	/* Sanity check the batch buffer */
	exec_offset = to_intel_bo(batch_obj)->gtt_offset;
	ret = i915_gem_check_execbuffer(args, exec_offset);
3852 3853 3854 3855 3856
	if (ret != 0) {
		DRM_ERROR("execbuf with invalid offset/length\n");
		goto err;
	}

3857 3858 3859 3860 3861 3862
	/* Zero the global flush/invalidate flags. These
	 * will be modified as new domains are computed
	 * for each object
	 */
	dev->invalidate_domains = 0;
	dev->flush_domains = 0;
3863
	dev_priv->mm.flush_rings = 0;
3864 3865
	for (i = 0; i < args->buffer_count; i++)
		i915_gem_object_set_to_gpu_domain(object_list[i], ring);
3866

3867 3868 3869 3870 3871 3872 3873
	if (dev->invalidate_domains | dev->flush_domains) {
#if WATCH_EXEC
		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
			  __func__,
			 dev->invalidate_domains,
			 dev->flush_domains);
#endif
3874
		i915_gem_flush(dev, file,
3875
			       dev->invalidate_domains,
3876 3877
			       dev->flush_domains,
			       dev_priv->mm.flush_rings);
3878
	}
3879 3880 3881 3882 3883 3884 3885 3886 3887

#if WATCH_COHERENCY
	for (i = 0; i < args->buffer_count; i++) {
		i915_gem_object_check_coherency(object_list[i],
						exec_list[i].handle);
	}
#endif

#if WATCH_EXEC
3888
	i915_gem_dump_object(batch_obj,
3889 3890 3891 3892 3893
			      args->batch_len,
			      __func__,
			      ~0);
#endif

3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914
	/* Check for any pending flips. As we only maintain a flip queue depth
	 * of 1, we can simply insert a WAIT for the next display flip prior
	 * to executing the batch and avoid stalling the CPU.
	 */
	flips = 0;
	for (i = 0; i < args->buffer_count; i++) {
		if (object_list[i]->write_domain)
			flips |= atomic_read(&to_intel_bo(object_list[i])->pending_flip);
	}
	if (flips) {
		int plane, flip_mask;

		for (plane = 0; flips >> plane; plane++) {
			if (((flips >> plane) & 1) == 0)
				continue;

			if (plane)
				flip_mask = MI_WAIT_FOR_PLANE_B_FLIP;
			else
				flip_mask = MI_WAIT_FOR_PLANE_A_FLIP;

3915 3916 3917 3918
			ret = intel_ring_begin(ring, 2);
			if (ret)
				goto err;

3919 3920 3921
			intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask);
			intel_ring_emit(ring, MI_NOOP);
			intel_ring_advance(ring);
3922 3923 3924
		}
	}

3925
	/* Exec the batchbuffer */
3926
	ret = ring->dispatch_execbuffer(ring, args, cliprects, exec_offset);
3927 3928 3929 3930 3931 3932 3933 3934
	if (ret) {
		DRM_ERROR("dispatch failed %d\n", ret);
		goto err;
	}

	for (i = 0; i < args->buffer_count; i++) {
		struct drm_gem_object *obj = object_list[i];

3935 3936 3937
		obj->read_domains = obj->pending_read_domains;
		obj->write_domain = obj->pending_write_domain;

3938
		i915_gem_object_move_to_active(obj, ring);
3939 3940 3941 3942
		if (obj->write_domain) {
			struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
			obj_priv->dirty = 1;
			list_move_tail(&obj_priv->gpu_write_list,
3943
				       &ring->gpu_write_list);
3944 3945 3946 3947 3948 3949
			intel_mark_busy(dev, obj);
		}

		trace_i915_gem_object_change_domain(obj,
						    obj->read_domains,
						    obj->write_domain);
3950 3951
	}

3952 3953 3954 3955 3956 3957
	/*
	 * Ensure that the commands in the batch buffer are
	 * finished before the interrupt fires
	 */
	i915_retire_commands(dev, ring);

3958 3959 3960 3961
	if (i915_add_request(dev, file, request, ring))
		ring->outstanding_lazy_request = true;
	else
		request = NULL;
3962 3963

err:
3964
	for (i = 0; i < args->buffer_count; i++) {
3965 3966 3967 3968
		if (object_list[i] == NULL)
		    break;

		to_intel_bo(object_list[i])->in_execbuffer = false;
3969
		drm_gem_object_unreference(object_list[i]);
3970
	}
3971 3972 3973

	mutex_unlock(&dev->struct_mutex);

3974
pre_mutex_err:
3975
	drm_free_large(object_list);
3976
	kfree(cliprects);
C
Chris Wilson 已提交
3977
	kfree(request);
3978 3979 3980 3981

	return ret;
}

J
Jesse Barnes 已提交
3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033
/*
 * Legacy execbuffer just creates an exec2 list from the original exec object
 * list array and passes it to the real function.
 */
int
i915_gem_execbuffer(struct drm_device *dev, void *data,
		    struct drm_file *file_priv)
{
	struct drm_i915_gem_execbuffer *args = data;
	struct drm_i915_gem_execbuffer2 exec2;
	struct drm_i915_gem_exec_object *exec_list = NULL;
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
	int ret, i;

#if WATCH_EXEC
	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
#endif

	if (args->buffer_count < 1) {
		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
		return -EINVAL;
	}

	/* Copy in the exec list from userland */
	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
	if (exec_list == NULL || exec2_list == NULL) {
		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
			  args->buffer_count);
		drm_free_large(exec_list);
		drm_free_large(exec2_list);
		return -ENOMEM;
	}
	ret = copy_from_user(exec_list,
			     (struct drm_i915_relocation_entry __user *)
			     (uintptr_t) args->buffers_ptr,
			     sizeof(*exec_list) * args->buffer_count);
	if (ret != 0) {
		DRM_ERROR("copy %d exec entries failed %d\n",
			  args->buffer_count, ret);
		drm_free_large(exec_list);
		drm_free_large(exec2_list);
		return -EFAULT;
	}

	for (i = 0; i < args->buffer_count; i++) {
		exec2_list[i].handle = exec_list[i].handle;
		exec2_list[i].relocation_count = exec_list[i].relocation_count;
		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
		exec2_list[i].alignment = exec_list[i].alignment;
		exec2_list[i].offset = exec_list[i].offset;
4034
		if (INTEL_INFO(dev)->gen < 4)
J
Jesse Barnes 已提交
4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047
			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
		else
			exec2_list[i].flags = 0;
	}

	exec2.buffers_ptr = args->buffers_ptr;
	exec2.buffer_count = args->buffer_count;
	exec2.batch_start_offset = args->batch_start_offset;
	exec2.batch_len = args->batch_len;
	exec2.DR1 = args->DR1;
	exec2.DR4 = args->DR4;
	exec2.num_cliprects = args->num_cliprects;
	exec2.cliprects_ptr = args->cliprects_ptr;
4048
	exec2.flags = I915_EXEC_RENDER;
J
Jesse Barnes 已提交
4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126

	ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
	if (!ret) {
		/* Copy the new buffer offsets back to the user's exec list. */
		for (i = 0; i < args->buffer_count; i++)
			exec_list[i].offset = exec2_list[i].offset;
		/* ... and back out to userspace */
		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
				   (uintptr_t) args->buffers_ptr,
				   exec_list,
				   sizeof(*exec_list) * args->buffer_count);
		if (ret) {
			ret = -EFAULT;
			DRM_ERROR("failed to copy %d exec entries "
				  "back to user (%d)\n",
				  args->buffer_count, ret);
		}
	}

	drm_free_large(exec_list);
	drm_free_large(exec2_list);
	return ret;
}

int
i915_gem_execbuffer2(struct drm_device *dev, void *data,
		     struct drm_file *file_priv)
{
	struct drm_i915_gem_execbuffer2 *args = data;
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
	int ret;

#if WATCH_EXEC
	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
#endif

	if (args->buffer_count < 1) {
		DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
		return -EINVAL;
	}

	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
	if (exec2_list == NULL) {
		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
			  args->buffer_count);
		return -ENOMEM;
	}
	ret = copy_from_user(exec2_list,
			     (struct drm_i915_relocation_entry __user *)
			     (uintptr_t) args->buffers_ptr,
			     sizeof(*exec2_list) * args->buffer_count);
	if (ret != 0) {
		DRM_ERROR("copy %d exec entries failed %d\n",
			  args->buffer_count, ret);
		drm_free_large(exec2_list);
		return -EFAULT;
	}

	ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
	if (!ret) {
		/* Copy the new buffer offsets back to the user's exec list. */
		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
				   (uintptr_t) args->buffers_ptr,
				   exec2_list,
				   sizeof(*exec2_list) * args->buffer_count);
		if (ret) {
			ret = -EFAULT;
			DRM_ERROR("failed to copy %d exec entries "
				  "back to user (%d)\n",
				  args->buffer_count, ret);
		}
	}

	drm_free_large(exec2_list);
	return ret;
}

4127
int
4128 4129
i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment,
		    bool mappable)
4130 4131
{
	struct drm_device *dev = obj->dev;
C
Chris Wilson 已提交
4132
	struct drm_i915_private *dev_priv = dev->dev_private;
4133
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4134 4135
	int ret;

4136
	BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4137
	WARN_ON(i915_verify_lists(dev));
4138 4139 4140 4141

	if (obj_priv->gtt_space != NULL) {
		if (alignment == 0)
			alignment = i915_gem_get_gtt_alignment(obj);
4142 4143
		if (obj_priv->gtt_offset & (alignment - 1) ||
		    (mappable && !i915_gem_object_cpu_accessible(obj_priv))) {
4144 4145 4146 4147
			WARN(obj_priv->pin_count,
			     "bo is already pinned with incorrect alignment:"
			     " offset=%x, req.alignment=%x\n",
			     obj_priv->gtt_offset, alignment);
4148 4149 4150 4151 4152 4153
			ret = i915_gem_object_unbind(obj);
			if (ret)
				return ret;
		}
	}

4154
	if (obj_priv->gtt_space == NULL) {
4155
		ret = i915_gem_object_bind_to_gtt(obj, alignment, mappable);
4156
		if (ret)
4157
			return ret;
4158
	}
J
Jesse Barnes 已提交
4159

4160 4161 4162 4163 4164 4165
	obj_priv->pin_count++;

	/* If the object is not active and not pending a flush,
	 * remove it from the inactive list
	 */
	if (obj_priv->pin_count == 1) {
4166
		i915_gem_info_add_pin(dev_priv, obj, mappable);
C
Chris Wilson 已提交
4167
		if (!obj_priv->active)
4168
			list_move_tail(&obj_priv->mm_list,
C
Chris Wilson 已提交
4169
				       &dev_priv->mm.pinned_list);
4170
	}
4171
	BUG_ON(!obj_priv->pin_mappable && mappable);
4172

4173
	WARN_ON(i915_verify_lists(dev));
4174 4175 4176 4177 4178 4179 4180 4181
	return 0;
}

void
i915_gem_object_unpin(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
4182
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4183

4184
	WARN_ON(i915_verify_lists(dev));
4185 4186 4187 4188 4189 4190 4191 4192 4193
	obj_priv->pin_count--;
	BUG_ON(obj_priv->pin_count < 0);
	BUG_ON(obj_priv->gtt_space == NULL);

	/* If the object is no longer pinned, and is
	 * neither active nor being flushed, then stick it on
	 * the inactive list
	 */
	if (obj_priv->pin_count == 0) {
C
Chris Wilson 已提交
4194
		if (!obj_priv->active)
4195
			list_move_tail(&obj_priv->mm_list,
4196
				       &dev_priv->mm.inactive_list);
4197
		i915_gem_info_remove_pin(dev_priv, obj);
4198
	}
4199
	WARN_ON(i915_verify_lists(dev));
4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210
}

int
i915_gem_pin_ioctl(struct drm_device *dev, void *data,
		   struct drm_file *file_priv)
{
	struct drm_i915_gem_pin *args = data;
	struct drm_gem_object *obj;
	struct drm_i915_gem_object *obj_priv;
	int ret;

4211 4212 4213
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ret;
4214 4215 4216

	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
	if (obj == NULL) {
4217 4218
		ret = -ENOENT;
		goto unlock;
4219
	}
4220
	obj_priv = to_intel_bo(obj);
4221

C
Chris Wilson 已提交
4222 4223
	if (obj_priv->madv != I915_MADV_WILLNEED) {
		DRM_ERROR("Attempting to pin a purgeable buffer\n");
4224 4225
		ret = -EINVAL;
		goto out;
4226 4227
	}

J
Jesse Barnes 已提交
4228 4229 4230
	if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
		DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
			  args->handle);
4231 4232
		ret = -EINVAL;
		goto out;
J
Jesse Barnes 已提交
4233 4234 4235 4236 4237
	}

	obj_priv->user_pin_count++;
	obj_priv->pin_filp = file_priv;
	if (obj_priv->user_pin_count == 1) {
4238
		ret = i915_gem_object_pin(obj, args->alignment, true);
4239 4240
		if (ret)
			goto out;
4241 4242 4243 4244 4245
	}

	/* XXX - flush the CPU caches for pinned objects
	 * as the X server doesn't manage domains yet
	 */
4246
	i915_gem_object_flush_cpu_write_domain(obj);
4247
	args->offset = obj_priv->gtt_offset;
4248
out:
4249
	drm_gem_object_unreference(obj);
4250
unlock:
4251
	mutex_unlock(&dev->struct_mutex);
4252
	return ret;
4253 4254 4255 4256 4257 4258 4259 4260
}

int
i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
		     struct drm_file *file_priv)
{
	struct drm_i915_gem_pin *args = data;
	struct drm_gem_object *obj;
J
Jesse Barnes 已提交
4261
	struct drm_i915_gem_object *obj_priv;
4262
	int ret;
4263

4264 4265 4266
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ret;
4267 4268 4269

	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
	if (obj == NULL) {
4270 4271
		ret = -ENOENT;
		goto unlock;
4272
	}
4273
	obj_priv = to_intel_bo(obj);
4274

J
Jesse Barnes 已提交
4275 4276 4277
	if (obj_priv->pin_filp != file_priv) {
		DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
			  args->handle);
4278 4279
		ret = -EINVAL;
		goto out;
J
Jesse Barnes 已提交
4280 4281 4282 4283 4284 4285
	}
	obj_priv->user_pin_count--;
	if (obj_priv->user_pin_count == 0) {
		obj_priv->pin_filp = NULL;
		i915_gem_object_unpin(obj);
	}
4286

4287
out:
4288
	drm_gem_object_unreference(obj);
4289
unlock:
4290
	mutex_unlock(&dev->struct_mutex);
4291
	return ret;
4292 4293 4294 4295 4296 4297 4298 4299 4300
}

int
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
		    struct drm_file *file_priv)
{
	struct drm_i915_gem_busy *args = data;
	struct drm_gem_object *obj;
	struct drm_i915_gem_object *obj_priv;
4301 4302
	int ret;

4303
	ret = i915_mutex_lock_interruptible(dev);
4304
	if (ret)
4305
		return ret;
4306 4307 4308

	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
	if (obj == NULL) {
4309 4310
		ret = -ENOENT;
		goto unlock;
4311
	}
4312
	obj_priv = to_intel_bo(obj);
4313

4314 4315 4316 4317
	/* Count all active objects as busy, even if they are currently not used
	 * by the gpu. Users of this interface expect objects to eventually
	 * become non-busy without any further actions, therefore emit any
	 * necessary flushes here.
4318
	 */
4319 4320 4321 4322 4323 4324 4325
	args->busy = obj_priv->active;
	if (args->busy) {
		/* Unconditionally flush objects, even when the gpu still uses this
		 * object. Userspace calling this function indicates that it wants to
		 * use this buffer rather sooner than later, so issuing the required
		 * flush earlier is beneficial.
		 */
4326 4327
		if (obj->write_domain & I915_GEM_GPU_DOMAINS)
			i915_gem_flush_ring(dev, file_priv,
4328 4329
					    obj_priv->ring,
					    0, obj->write_domain);
4330 4331 4332 4333 4334 4335 4336 4337 4338 4339

		/* Update the active list for the hardware's current position.
		 * Otherwise this only updates on a delayed timer or when irqs
		 * are actually unmasked, and our working set ends up being
		 * larger than required.
		 */
		i915_gem_retire_requests_ring(dev, obj_priv->ring);

		args->busy = obj_priv->active;
	}
4340 4341

	drm_gem_object_unreference(obj);
4342
unlock:
4343
	mutex_unlock(&dev->struct_mutex);
4344
	return ret;
4345 4346 4347 4348 4349 4350 4351 4352 4353
}

int
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
			struct drm_file *file_priv)
{
    return i915_gem_ring_throttle(dev, file_priv);
}

4354 4355 4356 4357 4358 4359 4360
int
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
		       struct drm_file *file_priv)
{
	struct drm_i915_gem_madvise *args = data;
	struct drm_gem_object *obj;
	struct drm_i915_gem_object *obj_priv;
4361
	int ret;
4362 4363 4364 4365 4366 4367 4368 4369 4370

	switch (args->madv) {
	case I915_MADV_DONTNEED:
	case I915_MADV_WILLNEED:
	    break;
	default:
	    return -EINVAL;
	}

4371 4372 4373 4374
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ret;

4375 4376
	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
	if (obj == NULL) {
4377 4378
		ret = -ENOENT;
		goto unlock;
4379
	}
4380
	obj_priv = to_intel_bo(obj);
4381 4382

	if (obj_priv->pin_count) {
4383 4384
		ret = -EINVAL;
		goto out;
4385 4386
	}

C
Chris Wilson 已提交
4387 4388
	if (obj_priv->madv != __I915_MADV_PURGED)
		obj_priv->madv = args->madv;
4389

4390 4391 4392 4393 4394
	/* if the object is no longer bound, discard its backing storage */
	if (i915_gem_object_is_purgeable(obj_priv) &&
	    obj_priv->gtt_space == NULL)
		i915_gem_object_truncate(obj);

C
Chris Wilson 已提交
4395 4396
	args->retained = obj_priv->madv != __I915_MADV_PURGED;

4397
out:
4398
	drm_gem_object_unreference(obj);
4399
unlock:
4400
	mutex_unlock(&dev->struct_mutex);
4401
	return ret;
4402 4403
}

4404 4405 4406
struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
					      size_t size)
{
4407
	struct drm_i915_private *dev_priv = dev->dev_private;
4408
	struct drm_i915_gem_object *obj;
4409

4410 4411 4412
	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
	if (obj == NULL)
		return NULL;
4413

4414 4415 4416 4417
	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
		kfree(obj);
		return NULL;
	}
4418

4419 4420
	i915_gem_info_add_obj(dev_priv, size);

4421 4422
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4423

4424
	obj->agp_type = AGP_USER_MEMORY;
4425
	obj->base.driver_private = NULL;
4426
	obj->fence_reg = I915_FENCE_REG_NONE;
4427 4428
	INIT_LIST_HEAD(&obj->mm_list);
	INIT_LIST_HEAD(&obj->ring_list);
4429 4430
	INIT_LIST_HEAD(&obj->gpu_write_list);
	obj->madv = I915_MADV_WILLNEED;
4431

4432 4433 4434 4435 4436 4437
	return &obj->base;
}

int i915_gem_init_object(struct drm_gem_object *obj)
{
	BUG();
4438

4439 4440 4441
	return 0;
}

4442
static void i915_gem_free_object_tail(struct drm_gem_object *obj)
4443
{
4444
	struct drm_device *dev = obj->dev;
4445
	drm_i915_private_t *dev_priv = dev->dev_private;
4446
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4447
	int ret;
4448

4449 4450
	ret = i915_gem_object_unbind(obj);
	if (ret == -ERESTARTSYS) {
4451
		list_move(&obj_priv->mm_list,
4452 4453 4454
			  &dev_priv->mm.deferred_free_list);
		return;
	}
4455

C
Chris Wilson 已提交
4456
	if (obj->map_list.map)
4457
		i915_gem_free_mmap_offset(obj);
4458

4459
	drm_gem_object_release(obj);
4460
	i915_gem_info_remove_obj(dev_priv, obj->size);
4461

4462
	kfree(obj_priv->page_cpu_valid);
4463
	kfree(obj_priv->bit_17);
4464
	kfree(obj_priv);
4465 4466
}

4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482
void i915_gem_free_object(struct drm_gem_object *obj)
{
	struct drm_device *dev = obj->dev;
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);

	trace_i915_gem_object_destroy(obj);

	while (obj_priv->pin_count > 0)
		i915_gem_object_unpin(obj);

	if (obj_priv->phys_obj)
		i915_gem_detach_phys_object(dev, obj);

	i915_gem_free_object_tail(obj);
}

4483 4484 4485 4486 4487
int
i915_gem_idle(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret;
4488

4489
	mutex_lock(&dev->struct_mutex);
C
Chris Wilson 已提交
4490

4491
	if (dev_priv->mm.suspended) {
4492 4493
		mutex_unlock(&dev->struct_mutex);
		return 0;
4494 4495
	}

4496
	ret = i915_gpu_idle(dev);
4497 4498
	if (ret) {
		mutex_unlock(&dev->struct_mutex);
4499
		return ret;
4500
	}
4501

4502 4503
	/* Under UMS, be paranoid and evict. */
	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4504
		ret = i915_gem_evict_inactive(dev);
4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515
		if (ret) {
			mutex_unlock(&dev->struct_mutex);
			return ret;
		}
	}

	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
	 * We need to replace this with a semaphore, or something.
	 * And not confound mm.suspended!
	 */
	dev_priv->mm.suspended = 1;
4516
	del_timer_sync(&dev_priv->hangcheck_timer);
4517 4518

	i915_kernel_lost_context(dev);
4519
	i915_gem_cleanup_ringbuffer(dev);
4520

4521 4522
	mutex_unlock(&dev->struct_mutex);

4523 4524 4525
	/* Cancel the retire work handler, which should be idle now. */
	cancel_delayed_work_sync(&dev_priv->mm.retire_work);

4526 4527 4528
	return 0;
}

4529 4530 4531 4532
/*
 * 965+ support PIPE_CONTROL commands, which provide finer grained control
 * over cache flushing.
 */
4533
static int
4534 4535 4536 4537 4538 4539 4540
i915_gem_init_pipe_control(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_gem_object *obj;
	struct drm_i915_gem_object *obj_priv;
	int ret;

4541
	obj = i915_gem_alloc_object(dev, 4096);
4542 4543 4544 4545 4546 4547 4548 4549
	if (obj == NULL) {
		DRM_ERROR("Failed to allocate seqno page\n");
		ret = -ENOMEM;
		goto err;
	}
	obj_priv = to_intel_bo(obj);
	obj_priv->agp_type = AGP_USER_CACHED_MEMORY;

4550
	ret = i915_gem_object_pin(obj, 4096, true);
4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571
	if (ret)
		goto err_unref;

	dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
	dev_priv->seqno_page =  kmap(obj_priv->pages[0]);
	if (dev_priv->seqno_page == NULL)
		goto err_unpin;

	dev_priv->seqno_obj = obj;
	memset(dev_priv->seqno_page, 0, PAGE_SIZE);

	return 0;

err_unpin:
	i915_gem_object_unpin(obj);
err_unref:
	drm_gem_object_unreference(obj);
err:
	return ret;
}

4572 4573

static void
4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587
i915_gem_cleanup_pipe_control(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_gem_object *obj;
	struct drm_i915_gem_object *obj_priv;

	obj = dev_priv->seqno_obj;
	obj_priv = to_intel_bo(obj);
	kunmap(obj_priv->pages[0]);
	i915_gem_object_unpin(obj);
	drm_gem_object_unreference(obj);
	dev_priv->seqno_obj = NULL;

	dev_priv->seqno_page = NULL;
4588 4589
}

4590 4591 4592 4593 4594
int
i915_gem_init_ringbuffer(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret;
4595

4596 4597 4598 4599 4600
	if (HAS_PIPE_CONTROL(dev)) {
		ret = i915_gem_init_pipe_control(dev);
		if (ret)
			return ret;
	}
4601

4602
	ret = intel_init_render_ring_buffer(dev);
4603 4604 4605 4606
	if (ret)
		goto cleanup_pipe_control;

	if (HAS_BSD(dev)) {
4607
		ret = intel_init_bsd_ring_buffer(dev);
4608 4609
		if (ret)
			goto cleanup_render_ring;
4610
	}
4611

4612 4613 4614 4615 4616 4617
	if (HAS_BLT(dev)) {
		ret = intel_init_blt_ring_buffer(dev);
		if (ret)
			goto cleanup_bsd_ring;
	}

4618 4619
	dev_priv->next_seqno = 1;

4620 4621
	return 0;

4622
cleanup_bsd_ring:
4623
	intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
4624
cleanup_render_ring:
4625
	intel_cleanup_ring_buffer(&dev_priv->render_ring);
4626 4627 4628
cleanup_pipe_control:
	if (HAS_PIPE_CONTROL(dev))
		i915_gem_cleanup_pipe_control(dev);
4629 4630 4631 4632 4633 4634 4635 4636
	return ret;
}

void
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;

4637 4638 4639
	intel_cleanup_ring_buffer(&dev_priv->render_ring);
	intel_cleanup_ring_buffer(&dev_priv->bsd_ring);
	intel_cleanup_ring_buffer(&dev_priv->blt_ring);
4640 4641 4642 4643
	if (HAS_PIPE_CONTROL(dev))
		i915_gem_cleanup_pipe_control(dev);
}

4644 4645 4646 4647 4648 4649 4650
int
i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
		       struct drm_file *file_priv)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret;

J
Jesse Barnes 已提交
4651 4652 4653
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return 0;

4654
	if (atomic_read(&dev_priv->mm.wedged)) {
4655
		DRM_ERROR("Reenabling wedged hardware, good luck\n");
4656
		atomic_set(&dev_priv->mm.wedged, 0);
4657 4658 4659
	}

	mutex_lock(&dev->struct_mutex);
4660 4661 4662
	dev_priv->mm.suspended = 0;

	ret = i915_gem_init_ringbuffer(dev);
4663 4664
	if (ret != 0) {
		mutex_unlock(&dev->struct_mutex);
4665
		return ret;
4666
	}
4667

4668
	BUG_ON(!list_empty(&dev_priv->mm.active_list));
4669
	BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4670
	BUG_ON(!list_empty(&dev_priv->bsd_ring.active_list));
4671
	BUG_ON(!list_empty(&dev_priv->blt_ring.active_list));
4672 4673
	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4674
	BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4675
	BUG_ON(!list_empty(&dev_priv->bsd_ring.request_list));
4676
	BUG_ON(!list_empty(&dev_priv->blt_ring.request_list));
4677
	mutex_unlock(&dev->struct_mutex);
4678

4679 4680 4681
	ret = drm_irq_install(dev);
	if (ret)
		goto cleanup_ringbuffer;
4682

4683
	return 0;
4684 4685 4686 4687 4688 4689 4690 4691

cleanup_ringbuffer:
	mutex_lock(&dev->struct_mutex);
	i915_gem_cleanup_ringbuffer(dev);
	dev_priv->mm.suspended = 1;
	mutex_unlock(&dev->struct_mutex);

	return ret;
4692 4693 4694 4695 4696 4697
}

int
i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
		       struct drm_file *file_priv)
{
J
Jesse Barnes 已提交
4698 4699 4700
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return 0;

4701
	drm_irq_uninstall(dev);
4702
	return i915_gem_idle(dev);
4703 4704 4705 4706 4707 4708 4709
}

void
i915_gem_lastclose(struct drm_device *dev)
{
	int ret;

4710 4711 4712
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return;

4713 4714 4715
	ret = i915_gem_idle(dev);
	if (ret)
		DRM_ERROR("failed to idle hardware: %d\n", ret);
4716 4717
}

4718 4719 4720 4721 4722 4723 4724 4725
static void
init_ring_lists(struct intel_ring_buffer *ring)
{
	INIT_LIST_HEAD(&ring->active_list);
	INIT_LIST_HEAD(&ring->request_list);
	INIT_LIST_HEAD(&ring->gpu_write_list);
}

4726 4727 4728
void
i915_gem_load(struct drm_device *dev)
{
4729
	int i;
4730 4731
	drm_i915_private_t *dev_priv = dev->dev_private;

4732
	INIT_LIST_HEAD(&dev_priv->mm.active_list);
4733 4734
	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
C
Chris Wilson 已提交
4735
	INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
4736
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4737
	INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4738 4739 4740
	init_ring_lists(&dev_priv->render_ring);
	init_ring_lists(&dev_priv->bsd_ring);
	init_ring_lists(&dev_priv->blt_ring);
4741 4742
	for (i = 0; i < 16; i++)
		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4743 4744
	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
			  i915_gem_retire_work_handler);
4745
	init_completion(&dev_priv->error_completion);
4746

4747 4748 4749 4750 4751 4752 4753 4754 4755 4756
	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
	if (IS_GEN3(dev)) {
		u32 tmp = I915_READ(MI_ARB_STATE);
		if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
			/* arb state is a masked write, so set bit + bit in mask */
			tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
			I915_WRITE(MI_ARB_STATE, tmp);
		}
	}

4757
	/* Old X drivers will take 0-2 for front, back, depth buffers */
4758 4759
	if (!drm_core_check_feature(dev, DRIVER_MODESET))
		dev_priv->fence_reg_start = 3;
4760

4761
	if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4762 4763 4764 4765
		dev_priv->num_fence_regs = 16;
	else
		dev_priv->num_fence_regs = 8;

4766
	/* Initialize fence registers to zero */
4767 4768 4769 4770 4771 4772 4773
	switch (INTEL_INFO(dev)->gen) {
	case 6:
		for (i = 0; i < 16; i++)
			I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0);
		break;
	case 5:
	case 4:
4774 4775
		for (i = 0; i < 16; i++)
			I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4776 4777
		break;
	case 3:
4778 4779 4780
		if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
			for (i = 0; i < 8; i++)
				I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4781 4782 4783 4784
	case 2:
		for (i = 0; i < 8; i++)
			I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
		break;
4785
	}
4786
	i915_gem_detect_bit_6_swizzle(dev);
4787
	init_waitqueue_head(&dev_priv->pending_flip_queue);
4788 4789 4790 4791

	dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
	dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
	register_shrinker(&dev_priv->mm.inactive_shrinker);
4792
}
4793 4794 4795 4796 4797

/*
 * Create a physically contiguous memory object for this object
 * e.g. for cursor + overlay regs
 */
4798 4799
static int i915_gem_init_phys_object(struct drm_device *dev,
				     int id, int size, int align)
4800 4801 4802 4803 4804 4805 4806 4807
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_i915_gem_phys_object *phys_obj;
	int ret;

	if (dev_priv->mm.phys_objs[id - 1] || !size)
		return 0;

4808
	phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4809 4810 4811 4812 4813
	if (!phys_obj)
		return -ENOMEM;

	phys_obj->id = id;

4814
	phys_obj->handle = drm_pci_alloc(dev, size, align);
4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826
	if (!phys_obj->handle) {
		ret = -ENOMEM;
		goto kfree_obj;
	}
#ifdef CONFIG_X86
	set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
#endif

	dev_priv->mm.phys_objs[id - 1] = phys_obj;

	return 0;
kfree_obj:
4827
	kfree(phys_obj);
4828 4829 4830
	return ret;
}

4831
static void i915_gem_free_phys_object(struct drm_device *dev, int id)
4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_i915_gem_phys_object *phys_obj;

	if (!dev_priv->mm.phys_objs[id - 1])
		return;

	phys_obj = dev_priv->mm.phys_objs[id - 1];
	if (phys_obj->cur_obj) {
		i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
	}

#ifdef CONFIG_X86
	set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
#endif
	drm_pci_free(dev, phys_obj->handle);
	kfree(phys_obj);
	dev_priv->mm.phys_objs[id - 1] = NULL;
}

void i915_gem_free_all_phys_object(struct drm_device *dev)
{
	int i;

4856
	for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867
		i915_gem_free_phys_object(dev, i);
}

void i915_gem_detach_phys_object(struct drm_device *dev,
				 struct drm_gem_object *obj)
{
	struct drm_i915_gem_object *obj_priv;
	int i;
	int ret;
	int page_count;

4868
	obj_priv = to_intel_bo(obj);
4869 4870 4871
	if (!obj_priv->phys_obj)
		return;

4872
	ret = i915_gem_object_get_pages(obj, 0);
4873 4874 4875 4876 4877 4878
	if (ret)
		goto out;

	page_count = obj->size / PAGE_SIZE;

	for (i = 0; i < page_count; i++) {
P
Peter Zijlstra 已提交
4879
		char *dst = kmap_atomic(obj_priv->pages[i]);
4880 4881 4882
		char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);

		memcpy(dst, src, PAGE_SIZE);
P
Peter Zijlstra 已提交
4883
		kunmap_atomic(dst);
4884
	}
4885
	drm_clflush_pages(obj_priv->pages, page_count);
4886
	drm_agp_chipset_flush(dev);
4887 4888

	i915_gem_object_put_pages(obj);
4889 4890 4891 4892 4893 4894 4895
out:
	obj_priv->phys_obj->cur_obj = NULL;
	obj_priv->phys_obj = NULL;
}

int
i915_gem_attach_phys_object(struct drm_device *dev,
4896 4897 4898
			    struct drm_gem_object *obj,
			    int id,
			    int align)
4899 4900 4901 4902 4903 4904 4905 4906 4907 4908
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_i915_gem_object *obj_priv;
	int ret = 0;
	int page_count;
	int i;

	if (id > I915_MAX_PHYS_OBJECT)
		return -EINVAL;

4909
	obj_priv = to_intel_bo(obj);
4910 4911 4912 4913 4914 4915 4916 4917 4918 4919

	if (obj_priv->phys_obj) {
		if (obj_priv->phys_obj->id == id)
			return 0;
		i915_gem_detach_phys_object(dev, obj);
	}

	/* create a new object */
	if (!dev_priv->mm.phys_objs[id - 1]) {
		ret = i915_gem_init_phys_object(dev, id,
4920
						obj->size, align);
4921
		if (ret) {
4922
			DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
4923 4924 4925 4926 4927 4928 4929 4930
			goto out;
		}
	}

	/* bind to the object */
	obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
	obj_priv->phys_obj->cur_obj = obj;

4931
	ret = i915_gem_object_get_pages(obj, 0);
4932 4933 4934 4935 4936 4937 4938 4939
	if (ret) {
		DRM_ERROR("failed to get page list\n");
		goto out;
	}

	page_count = obj->size / PAGE_SIZE;

	for (i = 0; i < page_count; i++) {
P
Peter Zijlstra 已提交
4940
		char *src = kmap_atomic(obj_priv->pages[i]);
4941 4942 4943
		char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);

		memcpy(dst, src, PAGE_SIZE);
P
Peter Zijlstra 已提交
4944
		kunmap_atomic(src);
4945 4946
	}

4947 4948
	i915_gem_object_put_pages(obj);

4949 4950 4951 4952 4953 4954 4955 4956 4957 4958
	return 0;
out:
	return ret;
}

static int
i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
		     struct drm_i915_gem_pwrite *args,
		     struct drm_file *file_priv)
{
4959
	struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4960 4961 4962 4963 4964 4965 4966
	void *obj_addr;
	int ret;
	char __user *user_data;

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;

4967
	DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
4968 4969 4970 4971 4972 4973 4974
	ret = copy_from_user(obj_addr, user_data, args->size);
	if (ret)
		return -EFAULT;

	drm_agp_chipset_flush(dev);
	return 0;
}
4975

4976
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4977
{
4978
	struct drm_i915_file_private *file_priv = file->driver_priv;
4979 4980 4981 4982 4983

	/* Clean up our request list when the client is going away, so that
	 * later retire_requests won't dereference our soon-to-be-gone
	 * file_priv.
	 */
4984
	spin_lock(&file_priv->mm.lock);
4985 4986 4987 4988 4989 4990 4991 4992 4993
	while (!list_empty(&file_priv->mm.request_list)) {
		struct drm_i915_gem_request *request;

		request = list_first_entry(&file_priv->mm.request_list,
					   struct drm_i915_gem_request,
					   client_list);
		list_del(&request->client_list);
		request->file_priv = NULL;
	}
4994
	spin_unlock(&file_priv->mm.lock);
4995
}
4996

4997 4998 4999 5000 5001 5002 5003
static int
i915_gpu_is_active(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int lists_empty;

	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
5004
		      list_empty(&dev_priv->mm.active_list);
5005 5006 5007 5008

	return !lists_empty;
}

5009
static int
5010 5011 5012
i915_gem_inactive_shrink(struct shrinker *shrinker,
			 int nr_to_scan,
			 gfp_t gfp_mask)
5013
{
5014 5015 5016 5017 5018 5019 5020 5021 5022 5023
	struct drm_i915_private *dev_priv =
		container_of(shrinker,
			     struct drm_i915_private,
			     mm.inactive_shrinker);
	struct drm_device *dev = dev_priv->dev;
	struct drm_i915_gem_object *obj, *next;
	int cnt;

	if (!mutex_trylock(&dev->struct_mutex))
		return nr_to_scan ? 0 : -1;
5024 5025 5026

	/* "fast-path" to count number of available objects */
	if (nr_to_scan == 0) {
5027 5028 5029 5030 5031 5032 5033
		cnt = 0;
		list_for_each_entry(obj,
				    &dev_priv->mm.inactive_list,
				    mm_list)
			cnt++;
		mutex_unlock(&dev->struct_mutex);
		return cnt / 100 * sysctl_vfs_cache_pressure;
5034 5035
	}

5036
rescan:
5037
	/* first scan for clean buffers */
5038
	i915_gem_retire_requests(dev);
5039

5040 5041 5042 5043 5044 5045 5046
	list_for_each_entry_safe(obj, next,
				 &dev_priv->mm.inactive_list,
				 mm_list) {
		if (i915_gem_object_is_purgeable(obj)) {
			i915_gem_object_unbind(&obj->base);
			if (--nr_to_scan == 0)
				break;
5047 5048 5049 5050
		}
	}

	/* second pass, evict/count anything still on the inactive list */
5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062
	cnt = 0;
	list_for_each_entry_safe(obj, next,
				 &dev_priv->mm.inactive_list,
				 mm_list) {
		if (nr_to_scan) {
			i915_gem_object_unbind(&obj->base);
			nr_to_scan--;
		} else
			cnt++;
	}

	if (nr_to_scan && i915_gpu_is_active(dev)) {
5063 5064 5065 5066 5067 5068
		/*
		 * We are desperate for pages, so as a last resort, wait
		 * for the GPU to finish and discard whatever we can.
		 * This has a dramatic impact to reduce the number of
		 * OOM-killer events whilst running the GPU aggressively.
		 */
5069
		if (i915_gpu_idle(dev) == 0)
5070 5071
			goto rescan;
	}
5072 5073
	mutex_unlock(&dev->struct_mutex);
	return cnt / 100 * sysctl_vfs_cache_pressure;
5074
}