i915_gem.c 99.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * Copyright © 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *
 */

#include "drmP.h"
#include "drm.h"
#include "i915_drm.h"
#include "i915_drv.h"
C
Chris Wilson 已提交
32
#include "i915_trace.h"
33
#include "intel_drv.h"
34
#include <linux/shmem_fs.h>
35
#include <linux/slab.h>
36
#include <linux/swap.h>
J
Jesse Barnes 已提交
37
#include <linux/pci.h>
38

39
static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj);
40 41
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
42 43 44
static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
						    unsigned alignment,
						    bool map_and_fenceable);
45 46
static void i915_gem_clear_fence_reg(struct drm_device *dev,
				     struct drm_i915_fence_reg *reg);
47 48
static int i915_gem_phys_pwrite(struct drm_device *dev,
				struct drm_i915_gem_object *obj,
49
				struct drm_i915_gem_pwrite *args,
50 51
				struct drm_file *file);
static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj);
52

53
static int i915_gem_inactive_shrink(struct shrinker *shrinker,
54
				    struct shrink_control *sc);
55
static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
56

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
/* some bookkeeping */
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
				  size_t size)
{
	dev_priv->mm.object_count++;
	dev_priv->mm.object_memory += size;
}

static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
				     size_t size)
{
	dev_priv->mm.object_count--;
	dev_priv->mm.object_memory -= size;
}

72 73
static int
i915_gem_wait_for_error(struct drm_device *dev)
74 75 76 77 78 79 80 81 82 83 84 85 86
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct completion *x = &dev_priv->error_completion;
	unsigned long flags;
	int ret;

	if (!atomic_read(&dev_priv->mm.wedged))
		return 0;

	ret = wait_for_completion_interruptible(x);
	if (ret)
		return ret;

87 88 89 90 91 92 93 94 95 96 97
	if (atomic_read(&dev_priv->mm.wedged)) {
		/* GPU is hung, bump the completion count to account for
		 * the token we just consumed so that we never hit zero and
		 * end up waiting upon a subsequent completion event that
		 * will never happen.
		 */
		spin_lock_irqsave(&x->wait.lock, flags);
		x->done++;
		spin_unlock_irqrestore(&x->wait.lock, flags);
	}
	return 0;
98 99
}

100
int i915_mutex_lock_interruptible(struct drm_device *dev)
101 102 103
{
	int ret;

104
	ret = i915_gem_wait_for_error(dev);
105 106 107 108 109 110 111
	if (ret)
		return ret;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		return ret;

112
	WARN_ON(i915_verify_lists(dev));
113 114
	return 0;
}
115

116
static inline bool
117
i915_gem_object_is_inactive(struct drm_i915_gem_object *obj)
118
{
119
	return obj->gtt_space && !obj->active && obj->pin_count == 0;
120 121
}

J
Jesse Barnes 已提交
122 123
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
124
		    struct drm_file *file)
J
Jesse Barnes 已提交
125 126
{
	struct drm_i915_gem_init *args = data;
127 128 129 130

	if (args->gtt_start >= args->gtt_end ||
	    (args->gtt_end | args->gtt_start) & (PAGE_SIZE - 1))
		return -EINVAL;
J
Jesse Barnes 已提交
131 132

	mutex_lock(&dev->struct_mutex);
133 134
	i915_gem_init_global_gtt(dev, args->gtt_start,
				 args->gtt_end, args->gtt_end);
135 136
	mutex_unlock(&dev->struct_mutex);

137
	return 0;
138 139
}

140 141
int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
142
			    struct drm_file *file)
143
{
144
	struct drm_i915_private *dev_priv = dev->dev_private;
145
	struct drm_i915_gem_get_aperture *args = data;
146 147
	struct drm_i915_gem_object *obj;
	size_t pinned;
148 149 150 151

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

152
	pinned = 0;
153
	mutex_lock(&dev->struct_mutex);
154 155
	list_for_each_entry(obj, &dev_priv->mm.pinned_list, mm_list)
		pinned += obj->gtt_space->size;
156
	mutex_unlock(&dev->struct_mutex);
157

158
	args->aper_size = dev_priv->mm.gtt_total;
159
	args->aper_available_size = args->aper_size - pinned;
160

161 162 163
	return 0;
}

164 165 166 167 168
static int
i915_gem_create(struct drm_file *file,
		struct drm_device *dev,
		uint64_t size,
		uint32_t *handle_p)
169
{
170
	struct drm_i915_gem_object *obj;
171 172
	int ret;
	u32 handle;
173

174
	size = roundup(size, PAGE_SIZE);
175 176
	if (size == 0)
		return -EINVAL;
177 178

	/* Allocate the new object */
179
	obj = i915_gem_alloc_object(dev, size);
180 181 182
	if (obj == NULL)
		return -ENOMEM;

183
	ret = drm_gem_handle_create(file, &obj->base, &handle);
184
	if (ret) {
185 186
		drm_gem_object_release(&obj->base);
		i915_gem_info_remove_obj(dev->dev_private, obj->base.size);
187
		kfree(obj);
188
		return ret;
189
	}
190

191
	/* drop reference from allocate - handle holds it now */
192
	drm_gem_object_unreference(&obj->base);
193 194
	trace_i915_gem_object_create(obj);

195
	*handle_p = handle;
196 197 198
	return 0;
}

199 200 201 202 203 204
int
i915_gem_dumb_create(struct drm_file *file,
		     struct drm_device *dev,
		     struct drm_mode_create_dumb *args)
{
	/* have to work out size/pitch and return them */
205
	args->pitch = ALIGN(args->width * ((args->bpp + 7) / 8), 64);
206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
	args->size = args->pitch * args->height;
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

int i915_gem_dumb_destroy(struct drm_file *file,
			  struct drm_device *dev,
			  uint32_t handle)
{
	return drm_gem_handle_delete(file, handle);
}

/**
 * Creates a new mm object and returns a handle to it.
 */
int
i915_gem_create_ioctl(struct drm_device *dev, void *data,
		      struct drm_file *file)
{
	struct drm_i915_gem_create *args = data;
	return i915_gem_create(file, dev,
			       args->size, &args->handle);
}

230
static int i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
231
{
232
	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
233 234

	return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
235
		obj->tiling_mode != I915_TILING_NONE;
236 237
}

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
static inline int
__copy_to_user_swizzled(char __user *cpu_vaddr,
			const char *gpu_vaddr, int gpu_offset,
			int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_to_user(cpu_vaddr + cpu_offset,
				     gpu_vaddr + swizzled_gpu_offset,
				     this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
static inline int
__copy_from_user_swizzled(char __user *gpu_vaddr, int gpu_offset,
			  const char *cpu_vaddr,
			  int length)
{
	int ret, cpu_offset = 0;

	while (length > 0) {
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
		int this_length = min(cacheline_end - gpu_offset, length);
		int swizzled_gpu_offset = gpu_offset ^ 64;

		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
				       cpu_vaddr + cpu_offset,
				       this_length);
		if (ret)
			return ret + length;

		cpu_offset += this_length;
		gpu_offset += this_length;
		length -= this_length;
	}

	return 0;
}

290
static int
291 292 293 294
i915_gem_shmem_pread(struct drm_device *dev,
		     struct drm_i915_gem_object *obj,
		     struct drm_i915_gem_pread *args,
		     struct drm_file *file)
295
{
296
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
297
	char __user *user_data;
298
	ssize_t remain;
299
	loff_t offset;
300
	int shmem_page_offset, page_length, ret = 0;
301
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
302
	int hit_slowpath = 0;
303
	int prefaulted = 0;
304
	int needs_clflush = 0;
305
	int release_page;
306

307
	user_data = (char __user *) (uintptr_t) args->data_ptr;
308 309
	remain = args->size;

310
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
311

312 313 314 315 316 317 318 319 320 321 322 323
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
		/* If we're not in the cpu read domain, set ourself into the gtt
		 * read domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will dirty the data
		 * anyway again before the next pread happens. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush = 1;
		ret = i915_gem_object_set_to_gtt_domain(obj, false);
		if (ret)
			return ret;
	}

324
	offset = args->offset;
325 326

	while (remain > 0) {
327
		struct page *page;
328
		char *vaddr;
329

330 331 332 333 334
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
335
		shmem_page_offset = offset_in_page(offset);
336 337 338 339
		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

340 341 342 343 344 345 346 347 348 349
		if (obj->pages) {
			page = obj->pages[offset >> PAGE_SHIFT];
			release_page = 0;
		} else {
			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
			if (IS_ERR(page)) {
				ret = PTR_ERR(page);
				goto out;
			}
			release_page = 1;
350
		}
351

352 353 354
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

355 356
		if (!page_do_bit17_swizzling) {
			vaddr = kmap_atomic(page);
357 358 359
			if (needs_clflush)
				drm_clflush_virt_range(vaddr + shmem_page_offset,
						       page_length);
360 361 362 363 364 365 366 367 368
			ret = __copy_to_user_inatomic(user_data,
						      vaddr + shmem_page_offset,
						      page_length);
			kunmap_atomic(vaddr);
			if (ret == 0) 
				goto next_page;
		}

		hit_slowpath = 1;
369
		page_cache_get(page);
370 371
		mutex_unlock(&dev->struct_mutex);

372 373 374 375 376 377 378 379 380 381
		if (!prefaulted) {
			ret = fault_in_pages_writeable(user_data, remain);
			/* Userspace is tricking us, but we've already clobbered
			 * its pages with the prefault and promised to write the
			 * data up to the first fault. Hence ignore any errors
			 * and just continue. */
			(void)ret;
			prefaulted = 1;
		}

382
		vaddr = kmap(page);
383 384 385 386
		if (needs_clflush)
			drm_clflush_virt_range(vaddr + shmem_page_offset,
					       page_length);

387 388 389 390 391 392 393 394 395
		if (page_do_bit17_swizzling)
			ret = __copy_to_user_swizzled(user_data,
						      vaddr, shmem_page_offset,
						      page_length);
		else
			ret = __copy_to_user(user_data,
					     vaddr + shmem_page_offset,
					     page_length);
		kunmap(page);
396

397
		mutex_lock(&dev->struct_mutex);
398
		page_cache_release(page);
399
next_page:
400
		mark_page_accessed(page);
401 402
		if (release_page)
			page_cache_release(page);
403

404 405 406 407 408
		if (ret) {
			ret = -EFAULT;
			goto out;
		}

409
		remain -= page_length;
410
		user_data += page_length;
411 412 413
		offset += page_length;
	}

414
out:
415 416 417 418 419
	if (hit_slowpath) {
		/* Fixup: Kill any reinstated backing storage pages */
		if (obj->madv == __I915_MADV_PURGED)
			i915_gem_object_truncate(obj);
	}
420 421 422 423

	return ret;
}

424 425 426 427 428 429 430
/**
 * Reads data from the object referenced by handle.
 *
 * On error, the contents of *data are undefined.
 */
int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
431
		     struct drm_file *file)
432 433
{
	struct drm_i915_gem_pread *args = data;
434
	struct drm_i915_gem_object *obj;
435
	int ret = 0;
436

437 438 439 440 441 442 443 444
	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_WRITE,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

445
	ret = i915_mutex_lock_interruptible(dev);
446
	if (ret)
447
		return ret;
448

449
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
450
	if (&obj->base == NULL) {
451 452
		ret = -ENOENT;
		goto unlock;
453
	}
454

455
	/* Bounds check source.  */
456 457
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
C
Chris Wilson 已提交
458
		ret = -EINVAL;
459
		goto out;
C
Chris Wilson 已提交
460 461
	}

C
Chris Wilson 已提交
462 463
	trace_i915_gem_object_pread(obj, args->offset, args->size);

464
	ret = i915_gem_shmem_pread(dev, obj, args, file);
465

466
out:
467
	drm_gem_object_unreference(&obj->base);
468
unlock:
469
	mutex_unlock(&dev->struct_mutex);
470
	return ret;
471 472
}

473 474
/* This is the fast write path which cannot handle
 * page faults in the source data
475
 */
476 477 478 479 480 481

static inline int
fast_user_write(struct io_mapping *mapping,
		loff_t page_base, int page_offset,
		char __user *user_data,
		int length)
482 483
{
	char *vaddr_atomic;
484
	unsigned long unwritten;
485

P
Peter Zijlstra 已提交
486
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
487 488
	unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
						      user_data, length);
P
Peter Zijlstra 已提交
489
	io_mapping_unmap_atomic(vaddr_atomic);
490
	return unwritten;
491 492
}

493 494 495 496
/**
 * This is the fast pwrite path, where we copy the data directly from the
 * user into the GTT, uncached.
 */
497
static int
498 499
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
			 struct drm_i915_gem_object *obj,
500
			 struct drm_i915_gem_pwrite *args,
501
			 struct drm_file *file)
502
{
503
	drm_i915_private_t *dev_priv = dev->dev_private;
504
	ssize_t remain;
505
	loff_t offset, page_base;
506
	char __user *user_data;
D
Daniel Vetter 已提交
507 508 509 510 511 512 513 514 515 516 517 518 519
	int page_offset, page_length, ret;

	ret = i915_gem_object_pin(obj, 0, true);
	if (ret)
		goto out;

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		goto out_unpin;

	ret = i915_gem_object_put_fence(obj);
	if (ret)
		goto out_unpin;
520 521 522 523

	user_data = (char __user *) (uintptr_t) args->data_ptr;
	remain = args->size;

524
	offset = obj->gtt_offset + args->offset;
525 526 527 528

	while (remain > 0) {
		/* Operation in this page
		 *
529 530 531
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
532
		 */
533 534
		page_base = offset & PAGE_MASK;
		page_offset = offset_in_page(offset);
535 536 537 538 539
		page_length = remain;
		if ((page_offset + remain) > PAGE_SIZE)
			page_length = PAGE_SIZE - page_offset;

		/* If we get a fault while copying data, then (presumably) our
540 541
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
542
		 */
543
		if (fast_user_write(dev_priv->mm.gtt_mapping, page_base,
D
Daniel Vetter 已提交
544 545 546 547
				    page_offset, user_data, page_length)) {
			ret = -EFAULT;
			goto out_unpin;
		}
548

549 550 551
		remain -= page_length;
		user_data += page_length;
		offset += page_length;
552 553
	}

D
Daniel Vetter 已提交
554 555 556
out_unpin:
	i915_gem_object_unpin(obj);
out:
557 558 559
	return ret;
}

560
static int
561 562 563 564
i915_gem_shmem_pwrite(struct drm_device *dev,
		      struct drm_i915_gem_object *obj,
		      struct drm_i915_gem_pwrite *args,
		      struct drm_file *file)
565
{
566
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
567
	ssize_t remain;
568 569
	loff_t offset;
	char __user *user_data;
570
	int shmem_page_offset, page_length, ret = 0;
571
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
572
	int hit_slowpath = 0;
573 574
	int needs_clflush_after = 0;
	int needs_clflush_before = 0;
575
	int release_page;
576

577
	user_data = (char __user *) (uintptr_t) args->data_ptr;
578 579
	remain = args->size;

580
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
581

582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
		/* If we're not in the cpu write domain, set ourself into the gtt
		 * write domain and manually flush cachelines (if required). This
		 * optimizes for the case when the gpu will use the data
		 * right away and we therefore have to clflush anyway. */
		if (obj->cache_level == I915_CACHE_NONE)
			needs_clflush_after = 1;
		ret = i915_gem_object_set_to_gtt_domain(obj, true);
		if (ret)
			return ret;
	}
	/* Same trick applies for invalidate partially written cachelines before
	 * writing.  */
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)
	    && obj->cache_level == I915_CACHE_NONE)
		needs_clflush_before = 1;

599
	offset = args->offset;
600
	obj->dirty = 1;
601

602
	while (remain > 0) {
603
		struct page *page;
604
		char *vaddr;
605
		int partial_cacheline_write;
606

607 608 609 610 611
		/* Operation in this page
		 *
		 * shmem_page_offset = offset within page in shmem file
		 * page_length = bytes to copy for this page
		 */
612
		shmem_page_offset = offset_in_page(offset);
613 614 615 616 617

		page_length = remain;
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
			page_length = PAGE_SIZE - shmem_page_offset;

618 619 620 621 622 623 624
		/* If we don't overwrite a cacheline completely we need to be
		 * careful to have up-to-date data by first clflushing. Don't
		 * overcomplicate things and flush the entire patch. */
		partial_cacheline_write = needs_clflush_before &&
			((shmem_page_offset | page_length)
				& (boot_cpu_data.x86_clflush_size - 1));

625 626 627 628 629 630 631 632 633 634
		if (obj->pages) {
			page = obj->pages[offset >> PAGE_SHIFT];
			release_page = 0;
		} else {
			page = shmem_read_mapping_page(mapping, offset >> PAGE_SHIFT);
			if (IS_ERR(page)) {
				ret = PTR_ERR(page);
				goto out;
			}
			release_page = 1;
635 636
		}

637 638 639
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
			(page_to_phys(page) & (1 << 17)) != 0;

640 641
		if (!page_do_bit17_swizzling) {
			vaddr = kmap_atomic(page);
642 643 644
			if (partial_cacheline_write)
				drm_clflush_virt_range(vaddr + shmem_page_offset,
						       page_length);
645 646 647
			ret = __copy_from_user_inatomic_nocache(vaddr + shmem_page_offset,
								user_data,
								page_length);
648 649 650
			if (needs_clflush_after)
				drm_clflush_virt_range(vaddr + shmem_page_offset,
						       page_length);
651 652 653 654 655 656 657
			kunmap_atomic(vaddr);

			if (ret == 0)
				goto next_page;
		}

		hit_slowpath = 1;
658
		page_cache_get(page);
659 660
		mutex_unlock(&dev->struct_mutex);

661
		vaddr = kmap(page);
662 663 664
		if (partial_cacheline_write)
			drm_clflush_virt_range(vaddr + shmem_page_offset,
					       page_length);
665 666 667 668 669 670 671 672
		if (page_do_bit17_swizzling)
			ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
							user_data,
							page_length);
		else
			ret = __copy_from_user(vaddr + shmem_page_offset,
					       user_data,
					       page_length);
673 674 675
		if (needs_clflush_after)
			drm_clflush_virt_range(vaddr + shmem_page_offset,
					       page_length);
676
		kunmap(page);
677

678
		mutex_lock(&dev->struct_mutex);
679
		page_cache_release(page);
680
next_page:
681 682
		set_page_dirty(page);
		mark_page_accessed(page);
683 684
		if (release_page)
			page_cache_release(page);
685

686 687 688 689 690
		if (ret) {
			ret = -EFAULT;
			goto out;
		}

691
		remain -= page_length;
692
		user_data += page_length;
693
		offset += page_length;
694 695
	}

696
out:
697 698 699 700 701 702 703 704 705 706
	if (hit_slowpath) {
		/* Fixup: Kill any reinstated backing storage pages */
		if (obj->madv == __I915_MADV_PURGED)
			i915_gem_object_truncate(obj);
		/* and flush dirty cachelines in case the object isn't in the cpu write
		 * domain anymore. */
		if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
			i915_gem_clflush_object(obj);
			intel_gtt_chipset_flush();
		}
707
	}
708

709 710 711
	if (needs_clflush_after)
		intel_gtt_chipset_flush();

712
	return ret;
713 714 715 716 717 718 719 720 721
}

/**
 * Writes data to the object referenced by handle.
 *
 * On error, the contents of the buffer that were to be modified are undefined.
 */
int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
722
		      struct drm_file *file)
723 724
{
	struct drm_i915_gem_pwrite *args = data;
725
	struct drm_i915_gem_object *obj;
726 727 728 729 730 731 732 733 734 735 736 737 738 739
	int ret;

	if (args->size == 0)
		return 0;

	if (!access_ok(VERIFY_READ,
		       (char __user *)(uintptr_t)args->data_ptr,
		       args->size))
		return -EFAULT;

	ret = fault_in_pages_readable((char __user *)(uintptr_t)args->data_ptr,
				      args->size);
	if (ret)
		return -EFAULT;
740

741
	ret = i915_mutex_lock_interruptible(dev);
742
	if (ret)
743
		return ret;
744

745
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
746
	if (&obj->base == NULL) {
747 748
		ret = -ENOENT;
		goto unlock;
749
	}
750

751
	/* Bounds check destination. */
752 753
	if (args->offset > obj->base.size ||
	    args->size > obj->base.size - args->offset) {
C
Chris Wilson 已提交
754
		ret = -EINVAL;
755
		goto out;
C
Chris Wilson 已提交
756 757
	}

C
Chris Wilson 已提交
758 759
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);

D
Daniel Vetter 已提交
760
	ret = -EFAULT;
761 762 763 764 765 766
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
	 * it would end up going through the fenced access, and we'll get
	 * different detiling behavior between reading and writing.
	 * pread/pwrite currently are reading and writing from the CPU
	 * perspective, requiring manual detiling by the client.
	 */
767
	if (obj->phys_obj) {
768
		ret = i915_gem_phys_pwrite(dev, obj, args, file);
769 770 771 772
		goto out;
	}

	if (obj->gtt_space &&
773
	    obj->cache_level == I915_CACHE_NONE &&
774
	    obj->map_and_fenceable &&
775
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
776
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
D
Daniel Vetter 已提交
777 778 779
		/* Note that the gtt paths might fail with non-page-backed user
		 * pointers (e.g. gtt mappings when moving data between
		 * textures). Fallback to the shmem path in that case. */
780
	}
781

D
Daniel Vetter 已提交
782 783
	if (ret == -EFAULT)
		ret = i915_gem_shmem_pwrite(dev, obj, args, file);
784

785
out:
786
	drm_gem_object_unreference(&obj->base);
787
unlock:
788
	mutex_unlock(&dev->struct_mutex);
789 790 791 792
	return ret;
}

/**
793 794
 * Called when user space prepares to use an object with the CPU, either
 * through the mmap ioctl's mapping or a GTT mapping.
795 796 797
 */
int
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
798
			  struct drm_file *file)
799 800
{
	struct drm_i915_gem_set_domain *args = data;
801
	struct drm_i915_gem_object *obj;
802 803
	uint32_t read_domains = args->read_domains;
	uint32_t write_domain = args->write_domain;
804 805 806 807 808
	int ret;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

809
	/* Only handle setting domains to types used by the CPU. */
810
	if (write_domain & I915_GEM_GPU_DOMAINS)
811 812
		return -EINVAL;

813
	if (read_domains & I915_GEM_GPU_DOMAINS)
814 815 816 817 818 819 820 821
		return -EINVAL;

	/* Having something in the write domain implies it's in the read
	 * domain, and only that read domain.  Enforce that in the request.
	 */
	if (write_domain != 0 && read_domains != write_domain)
		return -EINVAL;

822
	ret = i915_mutex_lock_interruptible(dev);
823
	if (ret)
824
		return ret;
825

826
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
827
	if (&obj->base == NULL) {
828 829
		ret = -ENOENT;
		goto unlock;
830
	}
831

832 833
	if (read_domains & I915_GEM_DOMAIN_GTT) {
		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
834 835 836 837 838 839 840

		/* Silently promote "you're not bound, there was nothing to do"
		 * to success, since the client was just asking us to
		 * make sure everything was done.
		 */
		if (ret == -EINVAL)
			ret = 0;
841
	} else {
842
		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
843 844
	}

845
	drm_gem_object_unreference(&obj->base);
846
unlock:
847 848 849 850 851 852 853 854 855
	mutex_unlock(&dev->struct_mutex);
	return ret;
}

/**
 * Called when user space has done writes to this buffer
 */
int
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
856
			 struct drm_file *file)
857 858
{
	struct drm_i915_gem_sw_finish *args = data;
859
	struct drm_i915_gem_object *obj;
860 861 862 863 864
	int ret = 0;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

865
	ret = i915_mutex_lock_interruptible(dev);
866
	if (ret)
867
		return ret;
868

869
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
870
	if (&obj->base == NULL) {
871 872
		ret = -ENOENT;
		goto unlock;
873 874 875
	}

	/* Pinned buffers may be scanout, so flush the cache */
876
	if (obj->pin_count)
877 878
		i915_gem_object_flush_cpu_write_domain(obj);

879
	drm_gem_object_unreference(&obj->base);
880
unlock:
881 882 883 884 885 886 887 888 889 890 891 892 893
	mutex_unlock(&dev->struct_mutex);
	return ret;
}

/**
 * Maps the contents of an object, returning the address it is mapped
 * into.
 *
 * While the mapping holds a reference on the contents of the object, it doesn't
 * imply a ref on the object itself.
 */
int
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
894
		    struct drm_file *file)
895 896 897 898 899 900 901 902
{
	struct drm_i915_gem_mmap *args = data;
	struct drm_gem_object *obj;
	unsigned long addr;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

903
	obj = drm_gem_object_lookup(dev, file, args->handle);
904
	if (obj == NULL)
905
		return -ENOENT;
906 907 908 909 910 911

	down_write(&current->mm->mmap_sem);
	addr = do_mmap(obj->filp, 0, args->size,
		       PROT_READ | PROT_WRITE, MAP_SHARED,
		       args->offset);
	up_write(&current->mm->mmap_sem);
912
	drm_gem_object_unreference_unlocked(obj);
913 914 915 916 917 918 919 920
	if (IS_ERR((void *)addr))
		return addr;

	args->addr_ptr = (uint64_t) addr;

	return 0;
}

921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938
/**
 * i915_gem_fault - fault a page into the GTT
 * vma: VMA in question
 * vmf: fault info
 *
 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
 * from userspace.  The fault handler takes care of binding the object to
 * the GTT (if needed), allocating and programming a fence register (again,
 * only if needed based on whether the old reg is still valid or the object
 * is tiled) and inserting a new PTE into the faulting process.
 *
 * Note that the faulting process may involve evicting existing objects
 * from the GTT and/or fence registers to make room.  So performance may
 * suffer if the GTT working set is large or there are few fence registers
 * left.
 */
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
939 940
	struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data);
	struct drm_device *dev = obj->base.dev;
941
	drm_i915_private_t *dev_priv = dev->dev_private;
942 943 944
	pgoff_t page_offset;
	unsigned long pfn;
	int ret = 0;
945
	bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
946 947 948 949 950

	/* We don't use vmf->pgoff since that has the fake offset */
	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
		PAGE_SHIFT;

951 952 953
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		goto out;
954

C
Chris Wilson 已提交
955 956
	trace_i915_gem_object_fault(obj, page_offset, true, write);

957
	/* Now bind it into the GTT if needed */
958 959 960 961
	if (!obj->map_and_fenceable) {
		ret = i915_gem_object_unbind(obj);
		if (ret)
			goto unlock;
962
	}
963
	if (!obj->gtt_space) {
964
		ret = i915_gem_object_bind_to_gtt(obj, 0, true);
965 966
		if (ret)
			goto unlock;
967

968 969 970 971
		ret = i915_gem_object_set_to_gtt_domain(obj, write);
		if (ret)
			goto unlock;
	}
972

973 974 975
	if (!obj->has_global_gtt_mapping)
		i915_gem_gtt_bind_object(obj, obj->cache_level);

976 977 978
	if (obj->tiling_mode == I915_TILING_NONE)
		ret = i915_gem_object_put_fence(obj);
	else
979
		ret = i915_gem_object_get_fence(obj, NULL);
980 981
	if (ret)
		goto unlock;
982

983 984
	if (i915_gem_object_is_inactive(obj))
		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
985

986 987
	obj->fault_mappable = true;

988
	pfn = ((dev->agp->base + obj->gtt_offset) >> PAGE_SHIFT) +
989 990 991 992
		page_offset;

	/* Finally, remap it using the new GTT offset */
	ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
993
unlock:
994
	mutex_unlock(&dev->struct_mutex);
995
out:
996
	switch (ret) {
997
	case -EIO:
998
	case -EAGAIN:
999 1000 1001 1002 1003 1004 1005
		/* Give the error handler a chance to run and move the
		 * objects off the GPU active list. Next time we service the
		 * fault, we should be able to transition the page into the
		 * GTT without touching the GPU (and so avoid further
		 * EIO/EGAIN). If the GPU is wedged, then there is no issue
		 * with coherency, just lost writes.
		 */
1006
		set_need_resched();
1007 1008
	case 0:
	case -ERESTARTSYS:
1009
	case -EINTR:
1010
		return VM_FAULT_NOPAGE;
1011 1012 1013
	case -ENOMEM:
		return VM_FAULT_OOM;
	default:
1014
		return VM_FAULT_SIGBUS;
1015 1016 1017
	}
}

1018 1019 1020 1021
/**
 * i915_gem_release_mmap - remove physical page mappings
 * @obj: obj in question
 *
1022
 * Preserve the reservation of the mmapping with the DRM core code, but
1023 1024 1025 1026 1027 1028 1029 1030 1031
 * relinquish ownership of the pages back to the system.
 *
 * It is vital that we remove the page mapping if we have mapped a tiled
 * object through the GTT and then lose the fence register due to
 * resource pressure. Similarly if the object has been moved out of the
 * aperture, than pages mapped into userspace must be revoked. Removing the
 * mapping will then trigger a page fault on the next user access, allowing
 * fixup by i915_gem_fault().
 */
1032
void
1033
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1034
{
1035 1036
	if (!obj->fault_mappable)
		return;
1037

1038 1039 1040 1041
	if (obj->base.dev->dev_mapping)
		unmap_mapping_range(obj->base.dev->dev_mapping,
				    (loff_t)obj->base.map_list.hash.key<<PAGE_SHIFT,
				    obj->base.size, 1);
1042

1043
	obj->fault_mappable = false;
1044 1045
}

1046
static uint32_t
1047
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1048
{
1049
	uint32_t gtt_size;
1050 1051

	if (INTEL_INFO(dev)->gen >= 4 ||
1052 1053
	    tiling_mode == I915_TILING_NONE)
		return size;
1054 1055 1056

	/* Previous chips need a power-of-two fence region when tiling */
	if (INTEL_INFO(dev)->gen == 3)
1057
		gtt_size = 1024*1024;
1058
	else
1059
		gtt_size = 512*1024;
1060

1061 1062
	while (gtt_size < size)
		gtt_size <<= 1;
1063

1064
	return gtt_size;
1065 1066
}

1067 1068 1069 1070 1071
/**
 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
 * @obj: object to check
 *
 * Return the required GTT alignment for an object, taking into account
1072
 * potential fence register mapping.
1073 1074
 */
static uint32_t
1075 1076 1077
i915_gem_get_gtt_alignment(struct drm_device *dev,
			   uint32_t size,
			   int tiling_mode)
1078 1079 1080 1081 1082
{
	/*
	 * Minimum alignment is 4k (GTT page size), but might be greater
	 * if a fence register is needed for the object.
	 */
1083
	if (INTEL_INFO(dev)->gen >= 4 ||
1084
	    tiling_mode == I915_TILING_NONE)
1085 1086
		return 4096;

1087 1088 1089 1090
	/*
	 * Previous chips need to be aligned to the size of the smallest
	 * fence register that can contain the object.
	 */
1091
	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1092 1093
}

1094 1095 1096
/**
 * i915_gem_get_unfenced_gtt_alignment - return required GTT alignment for an
 *					 unfenced object
1097 1098 1099
 * @dev: the device
 * @size: size of the object
 * @tiling_mode: tiling mode of the object
1100 1101 1102 1103
 *
 * Return the required GTT alignment for an object, only taking into account
 * unfenced tiled surface requirements.
 */
1104
uint32_t
1105 1106 1107
i915_gem_get_unfenced_gtt_alignment(struct drm_device *dev,
				    uint32_t size,
				    int tiling_mode)
1108 1109 1110 1111 1112
{
	/*
	 * Minimum alignment is 4k (GTT page size) for sane hw.
	 */
	if (INTEL_INFO(dev)->gen >= 4 || IS_G33(dev) ||
1113
	    tiling_mode == I915_TILING_NONE)
1114 1115
		return 4096;

1116 1117 1118
	/* Previous hardware however needs to be aligned to a power-of-two
	 * tile height. The simplest method for determining this is to reuse
	 * the power-of-tile object size.
1119
	 */
1120
	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1121 1122
}

1123
int
1124 1125 1126 1127
i915_gem_mmap_gtt(struct drm_file *file,
		  struct drm_device *dev,
		  uint32_t handle,
		  uint64_t *offset)
1128
{
1129
	struct drm_i915_private *dev_priv = dev->dev_private;
1130
	struct drm_i915_gem_object *obj;
1131 1132 1133 1134 1135
	int ret;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

1136
	ret = i915_mutex_lock_interruptible(dev);
1137
	if (ret)
1138
		return ret;
1139

1140
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1141
	if (&obj->base == NULL) {
1142 1143 1144
		ret = -ENOENT;
		goto unlock;
	}
1145

1146
	if (obj->base.size > dev_priv->mm.gtt_mappable_end) {
1147
		ret = -E2BIG;
1148
		goto out;
1149 1150
	}

1151
	if (obj->madv != I915_MADV_WILLNEED) {
1152
		DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1153 1154
		ret = -EINVAL;
		goto out;
1155 1156
	}

1157
	if (!obj->base.map_list.map) {
1158
		ret = drm_gem_create_mmap_offset(&obj->base);
1159 1160
		if (ret)
			goto out;
1161 1162
	}

1163
	*offset = (u64)obj->base.map_list.hash.key << PAGE_SHIFT;
1164

1165
out:
1166
	drm_gem_object_unreference(&obj->base);
1167
unlock:
1168
	mutex_unlock(&dev->struct_mutex);
1169
	return ret;
1170 1171
}

1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
/**
 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
 * @dev: DRM device
 * @data: GTT mapping ioctl data
 * @file: GEM object info
 *
 * Simply returns the fake offset to userspace so it can mmap it.
 * The mmap call will end up in drm_gem_mmap(), which will set things
 * up so we can get faults in the handler above.
 *
 * The fault handler will take care of binding the object into the GTT
 * (since it may have been evicted to make room for something), allocating
 * a fence register, and mapping the appropriate aperture address into
 * userspace.
 */
int
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
			struct drm_file *file)
{
	struct drm_i915_gem_mmap_gtt *args = data;

	if (!(dev->driver->driver_features & DRIVER_GEM))
		return -ENODEV;

	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
}


1200
static int
1201
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
			      gfp_t gfpmask)
{
	int page_count, i;
	struct address_space *mapping;
	struct inode *inode;
	struct page *page;

	/* Get the list of pages out of our struct file.  They'll be pinned
	 * at this point until we release them.
	 */
1212 1213 1214 1215
	page_count = obj->base.size / PAGE_SIZE;
	BUG_ON(obj->pages != NULL);
	obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
	if (obj->pages == NULL)
1216 1217
		return -ENOMEM;

1218
	inode = obj->base.filp->f_path.dentry->d_inode;
1219
	mapping = inode->i_mapping;
1220 1221
	gfpmask |= mapping_gfp_mask(mapping);

1222
	for (i = 0; i < page_count; i++) {
1223
		page = shmem_read_mapping_page_gfp(mapping, i, gfpmask);
1224 1225 1226
		if (IS_ERR(page))
			goto err_pages;

1227
		obj->pages[i] = page;
1228 1229
	}

1230
	if (i915_gem_object_needs_bit17_swizzle(obj))
1231 1232 1233 1234 1235 1236
		i915_gem_object_do_bit_17_swizzle(obj);

	return 0;

err_pages:
	while (i--)
1237
		page_cache_release(obj->pages[i]);
1238

1239 1240
	drm_free_large(obj->pages);
	obj->pages = NULL;
1241 1242 1243
	return PTR_ERR(page);
}

1244
static void
1245
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1246
{
1247
	int page_count = obj->base.size / PAGE_SIZE;
1248 1249
	int i;

1250
	BUG_ON(obj->madv == __I915_MADV_PURGED);
1251

1252
	if (i915_gem_object_needs_bit17_swizzle(obj))
1253 1254
		i915_gem_object_save_bit_17_swizzle(obj);

1255 1256
	if (obj->madv == I915_MADV_DONTNEED)
		obj->dirty = 0;
1257 1258

	for (i = 0; i < page_count; i++) {
1259 1260
		if (obj->dirty)
			set_page_dirty(obj->pages[i]);
1261

1262 1263
		if (obj->madv == I915_MADV_WILLNEED)
			mark_page_accessed(obj->pages[i]);
1264

1265
		page_cache_release(obj->pages[i]);
1266
	}
1267
	obj->dirty = 0;
1268

1269 1270
	drm_free_large(obj->pages);
	obj->pages = NULL;
1271 1272
}

1273
void
1274
i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
1275 1276
			       struct intel_ring_buffer *ring,
			       u32 seqno)
1277
{
1278
	struct drm_device *dev = obj->base.dev;
1279
	struct drm_i915_private *dev_priv = dev->dev_private;
1280

1281
	BUG_ON(ring == NULL);
1282
	obj->ring = ring;
1283 1284

	/* Add a reference if we're newly entering the active list. */
1285 1286 1287
	if (!obj->active) {
		drm_gem_object_reference(&obj->base);
		obj->active = 1;
1288
	}
1289

1290
	/* Move from whatever list we were on to the tail of execution. */
1291 1292
	list_move_tail(&obj->mm_list, &dev_priv->mm.active_list);
	list_move_tail(&obj->ring_list, &ring->active_list);
1293

1294
	obj->last_rendering_seqno = seqno;
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
	if (obj->fenced_gpu_access) {
		struct drm_i915_fence_reg *reg;

		BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE);

		obj->last_fenced_seqno = seqno;
		obj->last_fenced_ring = ring;

		reg = &dev_priv->fence_regs[obj->fence_reg];
		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
	}
}

static void
i915_gem_object_move_off_active(struct drm_i915_gem_object *obj)
{
	list_del_init(&obj->ring_list);
	obj->last_rendering_seqno = 0;
1313 1314
}

1315
static void
1316
i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj)
1317
{
1318
	struct drm_device *dev = obj->base.dev;
1319 1320
	drm_i915_private_t *dev_priv = dev->dev_private;

1321 1322
	BUG_ON(!obj->active);
	list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list);
1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345

	i915_gem_object_move_off_active(obj);
}

static void
i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
{
	struct drm_device *dev = obj->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;

	if (obj->pin_count != 0)
		list_move_tail(&obj->mm_list, &dev_priv->mm.pinned_list);
	else
		list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list);

	BUG_ON(!list_empty(&obj->gpu_write_list));
	BUG_ON(!obj->active);
	obj->ring = NULL;

	i915_gem_object_move_off_active(obj);
	obj->fenced_gpu_access = false;

	obj->active = 0;
1346
	obj->pending_gpu_write = false;
1347 1348 1349
	drm_gem_object_unreference(&obj->base);

	WARN_ON(i915_verify_lists(dev));
1350
}
1351

1352 1353
/* Immediately discard the backing storage */
static void
1354
i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1355
{
C
Chris Wilson 已提交
1356
	struct inode *inode;
1357

1358 1359 1360
	/* Our goal here is to return as much of the memory as
	 * is possible back to the system as we are called from OOM.
	 * To do this we must instruct the shmfs to drop all of its
1361
	 * backing pages, *now*.
1362
	 */
1363
	inode = obj->base.filp->f_path.dentry->d_inode;
1364
	shmem_truncate_range(inode, 0, (loff_t)-1);
C
Chris Wilson 已提交
1365

1366 1367 1368
	if (obj->base.map_list.map)
		drm_gem_free_mmap_offset(&obj->base);

1369
	obj->madv = __I915_MADV_PURGED;
1370 1371 1372
}

static inline int
1373
i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj)
1374
{
1375
	return obj->madv == I915_MADV_DONTNEED;
1376 1377
}

1378
static void
C
Chris Wilson 已提交
1379 1380
i915_gem_process_flushing_list(struct intel_ring_buffer *ring,
			       uint32_t flush_domains)
1381
{
1382
	struct drm_i915_gem_object *obj, *next;
1383

1384
	list_for_each_entry_safe(obj, next,
1385
				 &ring->gpu_write_list,
1386
				 gpu_write_list) {
1387 1388
		if (obj->base.write_domain & flush_domains) {
			uint32_t old_write_domain = obj->base.write_domain;
1389

1390 1391
			obj->base.write_domain = 0;
			list_del_init(&obj->gpu_write_list);
1392
			i915_gem_object_move_to_active(obj, ring,
C
Chris Wilson 已提交
1393
						       i915_gem_next_request_seqno(ring));
1394 1395

			trace_i915_gem_object_change_domain(obj,
1396
							    obj->base.read_domains,
1397 1398 1399 1400
							    old_write_domain);
		}
	}
}
1401

1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
static u32
i915_gem_get_seqno(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	u32 seqno = dev_priv->next_seqno;

	/* reserve 0 for non-seqno */
	if (++dev_priv->next_seqno == 0)
		dev_priv->next_seqno = 1;

	return seqno;
}

u32
i915_gem_next_request_seqno(struct intel_ring_buffer *ring)
{
	if (ring->outstanding_lazy_request == 0)
		ring->outstanding_lazy_request = i915_gem_get_seqno(ring->dev);

	return ring->outstanding_lazy_request;
}

1424
int
C
Chris Wilson 已提交
1425
i915_add_request(struct intel_ring_buffer *ring,
1426
		 struct drm_file *file,
C
Chris Wilson 已提交
1427
		 struct drm_i915_gem_request *request)
1428
{
C
Chris Wilson 已提交
1429
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1430
	uint32_t seqno;
1431
	u32 request_ring_position;
1432
	int was_empty;
1433 1434 1435
	int ret;

	BUG_ON(request == NULL);
1436
	seqno = i915_gem_next_request_seqno(ring);
1437

1438 1439 1440 1441 1442 1443 1444
	/* Record the position of the start of the request so that
	 * should we detect the updated seqno part-way through the
	 * GPU processing the request, we never over-estimate the
	 * position of the head.
	 */
	request_ring_position = intel_ring_get_tail(ring);

1445 1446 1447
	ret = ring->add_request(ring, &seqno);
	if (ret)
	    return ret;
1448

C
Chris Wilson 已提交
1449
	trace_i915_gem_request_add(ring, seqno);
1450 1451

	request->seqno = seqno;
1452
	request->ring = ring;
1453
	request->tail = request_ring_position;
1454
	request->emitted_jiffies = jiffies;
1455 1456 1457
	was_empty = list_empty(&ring->request_list);
	list_add_tail(&request->list, &ring->request_list);

C
Chris Wilson 已提交
1458 1459 1460
	if (file) {
		struct drm_i915_file_private *file_priv = file->driver_priv;

1461
		spin_lock(&file_priv->mm.lock);
1462
		request->file_priv = file_priv;
1463
		list_add_tail(&request->client_list,
1464
			      &file_priv->mm.request_list);
1465
		spin_unlock(&file_priv->mm.lock);
1466
	}
1467

1468
	ring->outstanding_lazy_request = 0;
C
Chris Wilson 已提交
1469

B
Ben Gamari 已提交
1470
	if (!dev_priv->mm.suspended) {
1471 1472 1473 1474 1475
		if (i915_enable_hangcheck) {
			mod_timer(&dev_priv->hangcheck_timer,
				  jiffies +
				  msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
		}
B
Ben Gamari 已提交
1476
		if (was_empty)
1477 1478
			queue_delayed_work(dev_priv->wq,
					   &dev_priv->mm.retire_work, HZ);
B
Ben Gamari 已提交
1479
	}
1480
	return 0;
1481 1482
}

1483 1484
static inline void
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1485
{
1486
	struct drm_i915_file_private *file_priv = request->file_priv;
1487

1488 1489
	if (!file_priv)
		return;
C
Chris Wilson 已提交
1490

1491
	spin_lock(&file_priv->mm.lock);
1492 1493 1494 1495
	if (request->file_priv) {
		list_del(&request->client_list);
		request->file_priv = NULL;
	}
1496
	spin_unlock(&file_priv->mm.lock);
1497 1498
}

1499 1500
static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv,
				      struct intel_ring_buffer *ring)
1501
{
1502 1503
	while (!list_empty(&ring->request_list)) {
		struct drm_i915_gem_request *request;
1504

1505 1506 1507
		request = list_first_entry(&ring->request_list,
					   struct drm_i915_gem_request,
					   list);
1508

1509
		list_del(&request->list);
1510
		i915_gem_request_remove_from_client(request);
1511 1512
		kfree(request);
	}
1513

1514
	while (!list_empty(&ring->active_list)) {
1515
		struct drm_i915_gem_object *obj;
1516

1517 1518 1519
		obj = list_first_entry(&ring->active_list,
				       struct drm_i915_gem_object,
				       ring_list);
1520

1521 1522 1523
		obj->base.write_domain = 0;
		list_del_init(&obj->gpu_write_list);
		i915_gem_object_move_to_inactive(obj);
1524 1525 1526
	}
}

1527 1528 1529 1530 1531
static void i915_gem_reset_fences(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	int i;

1532
	for (i = 0; i < dev_priv->num_fence_regs; i++) {
1533
		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
1534 1535 1536 1537 1538 1539 1540 1541
		struct drm_i915_gem_object *obj = reg->obj;

		if (!obj)
			continue;

		if (obj->tiling_mode)
			i915_gem_release_mmap(obj);

1542 1543 1544 1545 1546
		reg->obj->fence_reg = I915_FENCE_REG_NONE;
		reg->obj->fenced_gpu_access = false;
		reg->obj->last_fenced_seqno = 0;
		reg->obj->last_fenced_ring = NULL;
		i915_gem_clear_fence_reg(dev, reg);
1547 1548 1549
	}
}

1550
void i915_gem_reset(struct drm_device *dev)
1551
{
1552
	struct drm_i915_private *dev_priv = dev->dev_private;
1553
	struct drm_i915_gem_object *obj;
1554
	int i;
1555

1556 1557
	for (i = 0; i < I915_NUM_RINGS; i++)
		i915_gem_reset_ring_lists(dev_priv, &dev_priv->ring[i]);
1558 1559 1560 1561 1562 1563

	/* Remove anything from the flushing lists. The GPU cache is likely
	 * to be lost on reset along with the data, so simply move the
	 * lost bo to the inactive list.
	 */
	while (!list_empty(&dev_priv->mm.flushing_list)) {
1564
		obj = list_first_entry(&dev_priv->mm.flushing_list,
1565 1566
				      struct drm_i915_gem_object,
				      mm_list);
1567

1568 1569 1570
		obj->base.write_domain = 0;
		list_del_init(&obj->gpu_write_list);
		i915_gem_object_move_to_inactive(obj);
1571 1572 1573 1574 1575
	}

	/* Move everything out of the GPU domains to ensure we do any
	 * necessary invalidation upon reuse.
	 */
1576
	list_for_each_entry(obj,
1577
			    &dev_priv->mm.inactive_list,
1578
			    mm_list)
1579
	{
1580
		obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
1581
	}
1582 1583

	/* The fence registers are invalidated so clear them out */
1584
	i915_gem_reset_fences(dev);
1585 1586 1587 1588 1589
}

/**
 * This function clears the request list as sequence numbers are passed.
 */
1590
void
C
Chris Wilson 已提交
1591
i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
1592 1593
{
	uint32_t seqno;
1594
	int i;
1595

C
Chris Wilson 已提交
1596
	if (list_empty(&ring->request_list))
1597 1598
		return;

C
Chris Wilson 已提交
1599
	WARN_ON(i915_verify_lists(ring->dev));
1600

1601
	seqno = ring->get_seqno(ring);
1602

1603
	for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++)
1604 1605 1606
		if (seqno >= ring->sync_seqno[i])
			ring->sync_seqno[i] = 0;

1607
	while (!list_empty(&ring->request_list)) {
1608 1609
		struct drm_i915_gem_request *request;

1610
		request = list_first_entry(&ring->request_list,
1611 1612 1613
					   struct drm_i915_gem_request,
					   list);

1614
		if (!i915_seqno_passed(seqno, request->seqno))
1615 1616
			break;

C
Chris Wilson 已提交
1617
		trace_i915_gem_request_retire(ring, request->seqno);
1618 1619 1620 1621 1622 1623
		/* We know the GPU must have read the request to have
		 * sent us the seqno + interrupt, so use the position
		 * of tail of the request to update the last known position
		 * of the GPU head.
		 */
		ring->last_retired_head = request->tail;
1624 1625

		list_del(&request->list);
1626
		i915_gem_request_remove_from_client(request);
1627 1628
		kfree(request);
	}
1629

1630 1631 1632 1633
	/* Move any buffers on the active list that are no longer referenced
	 * by the ringbuffer to the flushing/inactive lists as appropriate.
	 */
	while (!list_empty(&ring->active_list)) {
1634
		struct drm_i915_gem_object *obj;
1635

1636
		obj = list_first_entry(&ring->active_list,
1637 1638
				      struct drm_i915_gem_object,
				      ring_list);
1639

1640
		if (!i915_seqno_passed(seqno, obj->last_rendering_seqno))
1641
			break;
1642

1643
		if (obj->base.write_domain != 0)
1644 1645 1646
			i915_gem_object_move_to_flushing(obj);
		else
			i915_gem_object_move_to_inactive(obj);
1647
	}
1648

C
Chris Wilson 已提交
1649 1650
	if (unlikely(ring->trace_irq_seqno &&
		     i915_seqno_passed(seqno, ring->trace_irq_seqno))) {
1651
		ring->irq_put(ring);
C
Chris Wilson 已提交
1652
		ring->trace_irq_seqno = 0;
1653
	}
1654

C
Chris Wilson 已提交
1655
	WARN_ON(i915_verify_lists(ring->dev));
1656 1657
}

1658 1659 1660 1661
void
i915_gem_retire_requests(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
1662
	int i;
1663

1664
	if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1665
	    struct drm_i915_gem_object *obj, *next;
1666 1667 1668 1669 1670 1671

	    /* We must be careful that during unbind() we do not
	     * accidentally infinitely recurse into retire requests.
	     * Currently:
	     *   retire -> free -> unbind -> wait -> retire_ring
	     */
1672
	    list_for_each_entry_safe(obj, next,
1673
				     &dev_priv->mm.deferred_free_list,
1674
				     mm_list)
1675
		    i915_gem_free_object_tail(obj);
1676 1677
	}

1678
	for (i = 0; i < I915_NUM_RINGS; i++)
C
Chris Wilson 已提交
1679
		i915_gem_retire_requests_ring(&dev_priv->ring[i]);
1680 1681
}

1682
static void
1683 1684 1685 1686
i915_gem_retire_work_handler(struct work_struct *work)
{
	drm_i915_private_t *dev_priv;
	struct drm_device *dev;
1687 1688
	bool idle;
	int i;
1689 1690 1691 1692 1693

	dev_priv = container_of(work, drm_i915_private_t,
				mm.retire_work.work);
	dev = dev_priv->dev;

1694 1695 1696 1697 1698 1699
	/* Come back later if the device is busy... */
	if (!mutex_trylock(&dev->struct_mutex)) {
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
		return;
	}

1700
	i915_gem_retire_requests(dev);
1701

1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712
	/* Send a periodic flush down the ring so we don't hold onto GEM
	 * objects indefinitely.
	 */
	idle = true;
	for (i = 0; i < I915_NUM_RINGS; i++) {
		struct intel_ring_buffer *ring = &dev_priv->ring[i];

		if (!list_empty(&ring->gpu_write_list)) {
			struct drm_i915_gem_request *request;
			int ret;

C
Chris Wilson 已提交
1713 1714
			ret = i915_gem_flush_ring(ring,
						  0, I915_GEM_GPU_DOMAINS);
1715 1716
			request = kzalloc(sizeof(*request), GFP_KERNEL);
			if (ret || request == NULL ||
C
Chris Wilson 已提交
1717
			    i915_add_request(ring, NULL, request))
1718 1719 1720 1721 1722 1723 1724
			    kfree(request);
		}

		idle &= list_empty(&ring->request_list);
	}

	if (!dev_priv->mm.suspended && !idle)
1725
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1726

1727 1728 1729
	mutex_unlock(&dev->struct_mutex);
}

C
Chris Wilson 已提交
1730 1731 1732 1733
/**
 * Waits for a sequence number to be signaled, and cleans up the
 * request and object lists appropriately for that event.
 */
1734
int
C
Chris Wilson 已提交
1735
i915_wait_request(struct intel_ring_buffer *ring,
1736 1737
		  uint32_t seqno,
		  bool do_retire)
1738
{
C
Chris Wilson 已提交
1739
	drm_i915_private_t *dev_priv = ring->dev->dev_private;
1740
	u32 ier;
1741 1742 1743 1744
	int ret = 0;

	BUG_ON(seqno == 0);

1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756
	if (atomic_read(&dev_priv->mm.wedged)) {
		struct completion *x = &dev_priv->error_completion;
		bool recovery_complete;
		unsigned long flags;

		/* Give the error handler a chance to run. */
		spin_lock_irqsave(&x->wait.lock, flags);
		recovery_complete = x->done > 0;
		spin_unlock_irqrestore(&x->wait.lock, flags);

		return recovery_complete ? -EIO : -EAGAIN;
	}
1757

1758
	if (seqno == ring->outstanding_lazy_request) {
1759 1760 1761 1762
		struct drm_i915_gem_request *request;

		request = kzalloc(sizeof(*request), GFP_KERNEL);
		if (request == NULL)
1763
			return -ENOMEM;
1764

C
Chris Wilson 已提交
1765
		ret = i915_add_request(ring, NULL, request);
1766 1767 1768 1769 1770 1771
		if (ret) {
			kfree(request);
			return ret;
		}

		seqno = request->seqno;
1772
	}
1773

1774
	if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
C
Chris Wilson 已提交
1775
		if (HAS_PCH_SPLIT(ring->dev))
1776 1777 1778
			ier = I915_READ(DEIER) | I915_READ(GTIER);
		else
			ier = I915_READ(IER);
1779 1780 1781
		if (!ier) {
			DRM_ERROR("something (likely vbetool) disabled "
				  "interrupts, re-enabling\n");
1782 1783
			ring->dev->driver->irq_preinstall(ring->dev);
			ring->dev->driver->irq_postinstall(ring->dev);
1784 1785
		}

C
Chris Wilson 已提交
1786
		trace_i915_gem_request_wait_begin(ring, seqno);
C
Chris Wilson 已提交
1787

1788
		ring->waiting_seqno = seqno;
1789
		if (ring->irq_get(ring)) {
1790
			if (dev_priv->mm.interruptible)
1791 1792 1793 1794 1795 1796 1797 1798 1799
				ret = wait_event_interruptible(ring->irq_queue,
							       i915_seqno_passed(ring->get_seqno(ring), seqno)
							       || atomic_read(&dev_priv->mm.wedged));
			else
				wait_event(ring->irq_queue,
					   i915_seqno_passed(ring->get_seqno(ring), seqno)
					   || atomic_read(&dev_priv->mm.wedged));

			ring->irq_put(ring);
1800 1801 1802
		} else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
							     seqno) ||
					   atomic_read(&dev_priv->mm.wedged), 3000))
1803
			ret = -EBUSY;
1804
		ring->waiting_seqno = 0;
C
Chris Wilson 已提交
1805

C
Chris Wilson 已提交
1806
		trace_i915_gem_request_wait_end(ring, seqno);
1807
	}
1808
	if (atomic_read(&dev_priv->mm.wedged))
1809
		ret = -EAGAIN;
1810 1811 1812 1813 1814 1815

	/* Directly dispatch request retiring.  While we have the work queue
	 * to handle this, the waiter on a request often wants an associated
	 * buffer to have made it to the inactive list, and we would need
	 * a separate wait queue to handle that.
	 */
1816
	if (ret == 0 && do_retire)
C
Chris Wilson 已提交
1817
		i915_gem_retire_requests_ring(ring);
1818 1819 1820 1821 1822 1823 1824 1825

	return ret;
}

/**
 * Ensures that all rendering to the object has completed and the object is
 * safe to unbind from the GTT or access from the CPU.
 */
1826
int
1827
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj)
1828 1829 1830
{
	int ret;

1831 1832
	/* This function only exists to support waiting for existing rendering,
	 * not for emitting required flushes.
1833
	 */
1834
	BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0);
1835 1836 1837 1838

	/* If there is rendering queued on the buffer being evicted, wait for
	 * it.
	 */
1839
	if (obj->active) {
1840 1841
		ret = i915_wait_request(obj->ring, obj->last_rendering_seqno,
					true);
1842
		if (ret)
1843 1844 1845 1846 1847 1848
			return ret;
	}

	return 0;
}

1849 1850 1851 1852 1853 1854 1855 1856 1857 1858
static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
{
	u32 old_write_domain, old_read_domains;

	/* Act a barrier for all accesses through the GTT */
	mb();

	/* Force a pagefault for domain tracking on next user access */
	i915_gem_release_mmap(obj);

1859 1860 1861
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
		return;

1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872
	old_read_domains = obj->base.read_domains;
	old_write_domain = obj->base.write_domain;

	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;

	trace_i915_gem_object_change_domain(obj,
					    old_read_domains,
					    old_write_domain);
}

1873 1874 1875
/**
 * Unbinds an object from the GTT aperture.
 */
1876
int
1877
i915_gem_object_unbind(struct drm_i915_gem_object *obj)
1878
{
1879
	drm_i915_private_t *dev_priv = obj->base.dev->dev_private;
1880 1881
	int ret = 0;

1882
	if (obj->gtt_space == NULL)
1883 1884
		return 0;

1885
	if (obj->pin_count != 0) {
1886 1887 1888 1889
		DRM_ERROR("Attempting to unbind pinned buffer\n");
		return -EINVAL;
	}

1890 1891 1892 1893 1894 1895 1896 1897
	ret = i915_gem_object_finish_gpu(obj);
	if (ret == -ERESTARTSYS)
		return ret;
	/* Continue on if we fail due to EIO, the GPU is hung so we
	 * should be safe and we need to cleanup or else we might
	 * cause memory corruption through use-after-free.
	 */

1898
	i915_gem_object_finish_gtt(obj);
1899

1900 1901
	/* Move the object to the CPU domain to ensure that
	 * any possible CPU writes while it's not in the GTT
1902
	 * are flushed when we go to remap it.
1903
	 */
1904 1905
	if (ret == 0)
		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1906
	if (ret == -ERESTARTSYS)
1907
		return ret;
1908
	if (ret) {
1909 1910 1911
		/* In the event of a disaster, abandon all caches and
		 * hope for the best.
		 */
1912
		i915_gem_clflush_object(obj);
1913
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1914
	}
1915

1916
	/* release the fence reg _after_ flushing */
1917 1918 1919
	ret = i915_gem_object_put_fence(obj);
	if (ret == -ERESTARTSYS)
		return ret;
1920

C
Chris Wilson 已提交
1921 1922
	trace_i915_gem_object_unbind(obj);

1923 1924
	if (obj->has_global_gtt_mapping)
		i915_gem_gtt_unbind_object(obj);
1925 1926 1927 1928
	if (obj->has_aliasing_ppgtt_mapping) {
		i915_ppgtt_unbind_object(dev_priv->mm.aliasing_ppgtt, obj);
		obj->has_aliasing_ppgtt_mapping = 0;
	}
1929
	i915_gem_gtt_finish_object(obj);
1930

1931
	i915_gem_object_put_pages_gtt(obj);
1932

1933
	list_del_init(&obj->gtt_list);
1934
	list_del_init(&obj->mm_list);
1935
	/* Avoid an unnecessary call to unbind on rebind. */
1936
	obj->map_and_fenceable = true;
1937

1938 1939 1940
	drm_mm_put_block(obj->gtt_space);
	obj->gtt_space = NULL;
	obj->gtt_offset = 0;
1941

1942
	if (i915_gem_object_is_purgeable(obj))
1943 1944
		i915_gem_object_truncate(obj);

1945
	return ret;
1946 1947
}

1948
int
C
Chris Wilson 已提交
1949
i915_gem_flush_ring(struct intel_ring_buffer *ring,
1950 1951 1952
		    uint32_t invalidate_domains,
		    uint32_t flush_domains)
{
1953 1954
	int ret;

1955 1956 1957
	if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0)
		return 0;

C
Chris Wilson 已提交
1958 1959
	trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains);

1960 1961 1962 1963
	ret = ring->flush(ring, invalidate_domains, flush_domains);
	if (ret)
		return ret;

1964 1965 1966
	if (flush_domains & I915_GEM_GPU_DOMAINS)
		i915_gem_process_flushing_list(ring, flush_domains);

1967
	return 0;
1968 1969
}

1970
static int i915_ring_idle(struct intel_ring_buffer *ring, bool do_retire)
1971
{
1972 1973
	int ret;

1974
	if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list))
1975 1976
		return 0;

1977
	if (!list_empty(&ring->gpu_write_list)) {
C
Chris Wilson 已提交
1978
		ret = i915_gem_flush_ring(ring,
1979
				    I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1980 1981 1982 1983
		if (ret)
			return ret;
	}

1984 1985
	return i915_wait_request(ring, i915_gem_next_request_seqno(ring),
				 do_retire);
1986 1987
}

1988
int i915_gpu_idle(struct drm_device *dev, bool do_retire)
1989 1990
{
	drm_i915_private_t *dev_priv = dev->dev_private;
1991
	int ret, i;
1992 1993

	/* Flush everything onto the inactive list. */
1994
	for (i = 0; i < I915_NUM_RINGS; i++) {
1995
		ret = i915_ring_idle(&dev_priv->ring[i], do_retire);
1996 1997 1998
		if (ret)
			return ret;
	}
1999

2000
	return 0;
2001 2002
}

2003 2004
static int sandybridge_write_fence_reg(struct drm_i915_gem_object *obj,
				       struct intel_ring_buffer *pipelined)
2005
{
2006
	struct drm_device *dev = obj->base.dev;
2007
	drm_i915_private_t *dev_priv = dev->dev_private;
2008 2009
	u32 size = obj->gtt_space->size;
	int regnum = obj->fence_reg;
2010 2011
	uint64_t val;

2012
	val = (uint64_t)((obj->gtt_offset + size - 4096) &
2013
			 0xfffff000) << 32;
2014 2015
	val |= obj->gtt_offset & 0xfffff000;
	val |= (uint64_t)((obj->stride / 128) - 1) <<
2016 2017
		SANDYBRIDGE_FENCE_PITCH_SHIFT;

2018
	if (obj->tiling_mode == I915_TILING_Y)
2019 2020 2021
		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
	val |= I965_FENCE_REG_VALID;

2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037
	if (pipelined) {
		int ret = intel_ring_begin(pipelined, 6);
		if (ret)
			return ret;

		intel_ring_emit(pipelined, MI_NOOP);
		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
		intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8);
		intel_ring_emit(pipelined, (u32)val);
		intel_ring_emit(pipelined, FENCE_REG_SANDYBRIDGE_0 + regnum*8 + 4);
		intel_ring_emit(pipelined, (u32)(val >> 32));
		intel_ring_advance(pipelined);
	} else
		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + regnum * 8, val);

	return 0;
2038 2039
}

2040 2041
static int i965_write_fence_reg(struct drm_i915_gem_object *obj,
				struct intel_ring_buffer *pipelined)
2042
{
2043
	struct drm_device *dev = obj->base.dev;
2044
	drm_i915_private_t *dev_priv = dev->dev_private;
2045 2046
	u32 size = obj->gtt_space->size;
	int regnum = obj->fence_reg;
2047 2048
	uint64_t val;

2049
	val = (uint64_t)((obj->gtt_offset + size - 4096) &
2050
		    0xfffff000) << 32;
2051 2052 2053
	val |= obj->gtt_offset & 0xfffff000;
	val |= ((obj->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
	if (obj->tiling_mode == I915_TILING_Y)
2054 2055 2056
		val |= 1 << I965_FENCE_TILING_Y_SHIFT;
	val |= I965_FENCE_REG_VALID;

2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072
	if (pipelined) {
		int ret = intel_ring_begin(pipelined, 6);
		if (ret)
			return ret;

		intel_ring_emit(pipelined, MI_NOOP);
		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(2));
		intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8);
		intel_ring_emit(pipelined, (u32)val);
		intel_ring_emit(pipelined, FENCE_REG_965_0 + regnum*8 + 4);
		intel_ring_emit(pipelined, (u32)(val >> 32));
		intel_ring_advance(pipelined);
	} else
		I915_WRITE64(FENCE_REG_965_0 + regnum * 8, val);

	return 0;
2073 2074
}

2075 2076
static int i915_write_fence_reg(struct drm_i915_gem_object *obj,
				struct intel_ring_buffer *pipelined)
2077
{
2078
	struct drm_device *dev = obj->base.dev;
2079
	drm_i915_private_t *dev_priv = dev->dev_private;
2080
	u32 size = obj->gtt_space->size;
2081
	u32 fence_reg, val, pitch_val;
2082
	int tile_width;
2083

2084 2085 2086 2087 2088 2089
	if (WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
		 (size & -size) != size ||
		 (obj->gtt_offset & (size - 1)),
		 "object 0x%08x [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n",
		 obj->gtt_offset, obj->map_and_fenceable, size))
		return -EINVAL;
2090

2091
	if (obj->tiling_mode == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev))
2092
		tile_width = 128;
2093
	else
2094 2095 2096
		tile_width = 512;

	/* Note: pitch better be a power of two tile widths */
2097
	pitch_val = obj->stride / tile_width;
2098
	pitch_val = ffs(pitch_val) - 1;
2099

2100 2101
	val = obj->gtt_offset;
	if (obj->tiling_mode == I915_TILING_Y)
2102
		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2103
	val |= I915_FENCE_SIZE_BITS(size);
2104 2105 2106
	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
	val |= I830_FENCE_REG_VALID;

2107
	fence_reg = obj->fence_reg;
2108 2109
	if (fence_reg < 8)
		fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2110
	else
2111
		fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126

	if (pipelined) {
		int ret = intel_ring_begin(pipelined, 4);
		if (ret)
			return ret;

		intel_ring_emit(pipelined, MI_NOOP);
		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
		intel_ring_emit(pipelined, fence_reg);
		intel_ring_emit(pipelined, val);
		intel_ring_advance(pipelined);
	} else
		I915_WRITE(fence_reg, val);

	return 0;
2127 2128
}

2129 2130
static int i830_write_fence_reg(struct drm_i915_gem_object *obj,
				struct intel_ring_buffer *pipelined)
2131
{
2132
	struct drm_device *dev = obj->base.dev;
2133
	drm_i915_private_t *dev_priv = dev->dev_private;
2134 2135
	u32 size = obj->gtt_space->size;
	int regnum = obj->fence_reg;
2136 2137 2138
	uint32_t val;
	uint32_t pitch_val;

2139 2140 2141 2142 2143 2144
	if (WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
		 (size & -size) != size ||
		 (obj->gtt_offset & (size - 1)),
		 "object 0x%08x not 512K or pot-size 0x%08x aligned\n",
		 obj->gtt_offset, size))
		return -EINVAL;
2145

2146
	pitch_val = obj->stride / 128;
2147 2148
	pitch_val = ffs(pitch_val) - 1;

2149 2150
	val = obj->gtt_offset;
	if (obj->tiling_mode == I915_TILING_Y)
2151
		val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2152
	val |= I830_FENCE_SIZE_BITS(size);
2153 2154 2155
	val |= pitch_val << I830_FENCE_PITCH_SHIFT;
	val |= I830_FENCE_REG_VALID;

2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169
	if (pipelined) {
		int ret = intel_ring_begin(pipelined, 4);
		if (ret)
			return ret;

		intel_ring_emit(pipelined, MI_NOOP);
		intel_ring_emit(pipelined, MI_LOAD_REGISTER_IMM(1));
		intel_ring_emit(pipelined, FENCE_REG_830_0 + regnum*4);
		intel_ring_emit(pipelined, val);
		intel_ring_advance(pipelined);
	} else
		I915_WRITE(FENCE_REG_830_0 + regnum * 4, val);

	return 0;
2170 2171
}

2172 2173 2174 2175 2176 2177 2178
static bool ring_passed_seqno(struct intel_ring_buffer *ring, u32 seqno)
{
	return i915_seqno_passed(ring->get_seqno(ring), seqno);
}

static int
i915_gem_object_flush_fence(struct drm_i915_gem_object *obj,
2179
			    struct intel_ring_buffer *pipelined)
2180 2181 2182 2183
{
	int ret;

	if (obj->fenced_gpu_access) {
2184
		if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
C
Chris Wilson 已提交
2185
			ret = i915_gem_flush_ring(obj->last_fenced_ring,
2186 2187 2188 2189
						  0, obj->base.write_domain);
			if (ret)
				return ret;
		}
2190 2191 2192 2193 2194 2195 2196

		obj->fenced_gpu_access = false;
	}

	if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) {
		if (!ring_passed_seqno(obj->last_fenced_ring,
				       obj->last_fenced_seqno)) {
C
Chris Wilson 已提交
2197
			ret = i915_wait_request(obj->last_fenced_ring,
2198 2199
						obj->last_fenced_seqno,
						true);
2200 2201 2202 2203 2204 2205 2206 2207
			if (ret)
				return ret;
		}

		obj->last_fenced_seqno = 0;
		obj->last_fenced_ring = NULL;
	}

2208 2209 2210 2211 2212 2213
	/* Ensure that all CPU reads are completed before installing a fence
	 * and all writes before removing the fence.
	 */
	if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
		mb();

2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224
	return 0;
}

int
i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
{
	int ret;

	if (obj->tiling_mode)
		i915_gem_release_mmap(obj);

2225
	ret = i915_gem_object_flush_fence(obj, NULL);
2226 2227 2228 2229 2230
	if (ret)
		return ret;

	if (obj->fence_reg != I915_FENCE_REG_NONE) {
		struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2231 2232

		WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count);
2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244
		i915_gem_clear_fence_reg(obj->base.dev,
					 &dev_priv->fence_regs[obj->fence_reg]);

		obj->fence_reg = I915_FENCE_REG_NONE;
	}

	return 0;
}

static struct drm_i915_fence_reg *
i915_find_fence_reg(struct drm_device *dev,
		    struct intel_ring_buffer *pipelined)
2245 2246
{
	struct drm_i915_private *dev_priv = dev->dev_private;
2247 2248
	struct drm_i915_fence_reg *reg, *first, *avail;
	int i;
2249 2250

	/* First try to find a free reg */
2251
	avail = NULL;
2252 2253 2254
	for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
		reg = &dev_priv->fence_regs[i];
		if (!reg->obj)
2255
			return reg;
2256

2257
		if (!reg->pin_count)
2258
			avail = reg;
2259 2260
	}

2261 2262
	if (avail == NULL)
		return NULL;
2263 2264

	/* None available, try to steal one or wait for a user to finish */
2265 2266
	avail = first = NULL;
	list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) {
2267
		if (reg->pin_count)
2268 2269
			continue;

2270 2271 2272 2273 2274 2275 2276 2277 2278
		if (first == NULL)
			first = reg;

		if (!pipelined ||
		    !reg->obj->last_fenced_ring ||
		    reg->obj->last_fenced_ring == pipelined) {
			avail = reg;
			break;
		}
2279 2280
	}

2281 2282
	if (avail == NULL)
		avail = first;
2283

2284
	return avail;
2285 2286
}

2287
/**
2288
 * i915_gem_object_get_fence - set up a fence reg for an object
2289
 * @obj: object to map through a fence reg
2290 2291
 * @pipelined: ring on which to queue the change, or NULL for CPU access
 * @interruptible: must we wait uninterruptibly for the register to retire?
2292 2293 2294 2295 2296 2297 2298 2299 2300 2301
 *
 * When mapping objects through the GTT, userspace wants to be able to write
 * to them without having to worry about swizzling if the object is tiled.
 *
 * This function walks the fence regs looking for a free one for @obj,
 * stealing one if it can't find any.
 *
 * It then sets up the reg based on the object's properties: address, pitch
 * and tiling format.
 */
2302
int
2303
i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
2304
			  struct intel_ring_buffer *pipelined)
2305
{
2306
	struct drm_device *dev = obj->base.dev;
J
Jesse Barnes 已提交
2307
	struct drm_i915_private *dev_priv = dev->dev_private;
2308
	struct drm_i915_fence_reg *reg;
2309
	int ret;
2310

2311 2312 2313
	/* XXX disable pipelining. There are bugs. Shocking. */
	pipelined = NULL;

2314
	/* Just update our place in the LRU if our fence is getting reused. */
2315 2316
	if (obj->fence_reg != I915_FENCE_REG_NONE) {
		reg = &dev_priv->fence_regs[obj->fence_reg];
2317
		list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
2318

2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335
		if (obj->tiling_changed) {
			ret = i915_gem_object_flush_fence(obj, pipelined);
			if (ret)
				return ret;

			if (!obj->fenced_gpu_access && !obj->last_fenced_seqno)
				pipelined = NULL;

			if (pipelined) {
				reg->setup_seqno =
					i915_gem_next_request_seqno(pipelined);
				obj->last_fenced_seqno = reg->setup_seqno;
				obj->last_fenced_ring = pipelined;
			}

			goto update;
		}
2336 2337 2338 2339 2340

		if (!pipelined) {
			if (reg->setup_seqno) {
				if (!ring_passed_seqno(obj->last_fenced_ring,
						       reg->setup_seqno)) {
C
Chris Wilson 已提交
2341
					ret = i915_wait_request(obj->last_fenced_ring,
2342 2343
								reg->setup_seqno,
								true);
2344 2345 2346 2347 2348 2349 2350 2351
					if (ret)
						return ret;
				}

				reg->setup_seqno = 0;
			}
		} else if (obj->last_fenced_ring &&
			   obj->last_fenced_ring != pipelined) {
2352
			ret = i915_gem_object_flush_fence(obj, pipelined);
2353 2354 2355 2356
			if (ret)
				return ret;
		}

2357 2358 2359
		return 0;
	}

2360 2361
	reg = i915_find_fence_reg(dev, pipelined);
	if (reg == NULL)
2362
		return -EDEADLK;
2363

2364
	ret = i915_gem_object_flush_fence(obj, pipelined);
2365
	if (ret)
2366
		return ret;
2367

2368 2369 2370 2371 2372 2373 2374 2375
	if (reg->obj) {
		struct drm_i915_gem_object *old = reg->obj;

		drm_gem_object_reference(&old->base);

		if (old->tiling_mode)
			i915_gem_release_mmap(old);

2376
		ret = i915_gem_object_flush_fence(old, pipelined);
2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387
		if (ret) {
			drm_gem_object_unreference(&old->base);
			return ret;
		}

		if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0)
			pipelined = NULL;

		old->fence_reg = I915_FENCE_REG_NONE;
		old->last_fenced_ring = pipelined;
		old->last_fenced_seqno =
C
Chris Wilson 已提交
2388
			pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
2389 2390 2391 2392

		drm_gem_object_unreference(&old->base);
	} else if (obj->last_fenced_seqno == 0)
		pipelined = NULL;
2393

2394
	reg->obj = obj;
2395 2396 2397
	list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
	obj->fence_reg = reg - dev_priv->fence_regs;
	obj->last_fenced_ring = pipelined;
2398

2399
	reg->setup_seqno =
C
Chris Wilson 已提交
2400
		pipelined ? i915_gem_next_request_seqno(pipelined) : 0;
2401 2402 2403 2404
	obj->last_fenced_seqno = reg->setup_seqno;

update:
	obj->tiling_changed = false;
2405
	switch (INTEL_INFO(dev)->gen) {
2406
	case 7:
2407
	case 6:
2408
		ret = sandybridge_write_fence_reg(obj, pipelined);
2409 2410 2411
		break;
	case 5:
	case 4:
2412
		ret = i965_write_fence_reg(obj, pipelined);
2413 2414
		break;
	case 3:
2415
		ret = i915_write_fence_reg(obj, pipelined);
2416 2417
		break;
	case 2:
2418
		ret = i830_write_fence_reg(obj, pipelined);
2419 2420
		break;
	}
2421

2422
	return ret;
2423 2424 2425 2426 2427 2428 2429
}

/**
 * i915_gem_clear_fence_reg - clear out fence register info
 * @obj: object to clear
 *
 * Zeroes out the fence register itself and clears out the associated
2430
 * data structures in dev_priv and obj.
2431 2432
 */
static void
2433 2434
i915_gem_clear_fence_reg(struct drm_device *dev,
			 struct drm_i915_fence_reg *reg)
2435
{
J
Jesse Barnes 已提交
2436
	drm_i915_private_t *dev_priv = dev->dev_private;
2437
	uint32_t fence_reg = reg - dev_priv->fence_regs;
2438

2439
	switch (INTEL_INFO(dev)->gen) {
2440
	case 7:
2441
	case 6:
2442
		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
2443 2444 2445
		break;
	case 5:
	case 4:
2446
		I915_WRITE64(FENCE_REG_965_0 + fence_reg*8, 0);
2447 2448
		break;
	case 3:
2449 2450
		if (fence_reg >= 8)
			fence_reg = FENCE_REG_945_8 + (fence_reg - 8) * 4;
2451
		else
2452
	case 2:
2453
			fence_reg = FENCE_REG_830_0 + fence_reg * 4;
2454 2455

		I915_WRITE(fence_reg, 0);
2456
		break;
2457
	}
2458

2459
	list_del_init(&reg->lru_list);
2460 2461
	reg->obj = NULL;
	reg->setup_seqno = 0;
2462
	reg->pin_count = 0;
2463 2464
}

2465 2466 2467 2468
/**
 * Finds free space in the GTT aperture and binds the object there.
 */
static int
2469
i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
2470
			    unsigned alignment,
2471
			    bool map_and_fenceable)
2472
{
2473
	struct drm_device *dev = obj->base.dev;
2474 2475
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_mm_node *free_space;
2476
	gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2477
	u32 size, fence_size, fence_alignment, unfenced_alignment;
2478
	bool mappable, fenceable;
2479
	int ret;
2480

2481
	if (obj->madv != I915_MADV_WILLNEED) {
2482 2483 2484 2485
		DRM_ERROR("Attempting to bind a purgeable object\n");
		return -EINVAL;
	}

2486 2487 2488 2489 2490 2491 2492 2493 2494 2495
	fence_size = i915_gem_get_gtt_size(dev,
					   obj->base.size,
					   obj->tiling_mode);
	fence_alignment = i915_gem_get_gtt_alignment(dev,
						     obj->base.size,
						     obj->tiling_mode);
	unfenced_alignment =
		i915_gem_get_unfenced_gtt_alignment(dev,
						    obj->base.size,
						    obj->tiling_mode);
2496

2497
	if (alignment == 0)
2498 2499
		alignment = map_and_fenceable ? fence_alignment :
						unfenced_alignment;
2500
	if (map_and_fenceable && alignment & (fence_alignment - 1)) {
2501 2502 2503 2504
		DRM_ERROR("Invalid object alignment requested %u\n", alignment);
		return -EINVAL;
	}

2505
	size = map_and_fenceable ? fence_size : obj->base.size;
2506

2507 2508 2509
	/* If the object is bigger than the entire aperture, reject it early
	 * before evicting everything in a vain attempt to find space.
	 */
2510
	if (obj->base.size >
2511
	    (map_and_fenceable ? dev_priv->mm.gtt_mappable_end : dev_priv->mm.gtt_total)) {
2512 2513 2514 2515
		DRM_ERROR("Attempting to bind an object larger than the aperture\n");
		return -E2BIG;
	}

2516
 search_free:
2517
	if (map_and_fenceable)
2518 2519
		free_space =
			drm_mm_search_free_in_range(&dev_priv->mm.gtt_space,
2520
						    size, alignment, 0,
2521 2522 2523 2524
						    dev_priv->mm.gtt_mappable_end,
						    0);
	else
		free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2525
						size, alignment, 0);
2526 2527

	if (free_space != NULL) {
2528
		if (map_and_fenceable)
2529
			obj->gtt_space =
2530
				drm_mm_get_block_range_generic(free_space,
2531
							       size, alignment, 0,
2532 2533 2534
							       dev_priv->mm.gtt_mappable_end,
							       0);
		else
2535
			obj->gtt_space =
2536
				drm_mm_get_block(free_space, size, alignment);
2537
	}
2538
	if (obj->gtt_space == NULL) {
2539 2540 2541
		/* If the gtt is empty and we're still having trouble
		 * fitting our object in, we're out of memory.
		 */
2542 2543
		ret = i915_gem_evict_something(dev, size, alignment,
					       map_and_fenceable);
2544
		if (ret)
2545
			return ret;
2546

2547 2548 2549
		goto search_free;
	}

2550
	ret = i915_gem_object_get_pages_gtt(obj, gfpmask);
2551
	if (ret) {
2552 2553
		drm_mm_put_block(obj->gtt_space);
		obj->gtt_space = NULL;
2554 2555

		if (ret == -ENOMEM) {
2556 2557
			/* first try to reclaim some memory by clearing the GTT */
			ret = i915_gem_evict_everything(dev, false);
2558 2559
			if (ret) {
				/* now try to shrink everyone else */
2560 2561 2562
				if (gfpmask) {
					gfpmask = 0;
					goto search_free;
2563 2564
				}

2565
				return -ENOMEM;
2566 2567 2568 2569 2570
			}

			goto search_free;
		}

2571 2572 2573
		return ret;
	}

2574
	ret = i915_gem_gtt_prepare_object(obj);
2575
	if (ret) {
2576
		i915_gem_object_put_pages_gtt(obj);
2577 2578
		drm_mm_put_block(obj->gtt_space);
		obj->gtt_space = NULL;
2579

2580
		if (i915_gem_evict_everything(dev, false))
2581 2582 2583
			return ret;

		goto search_free;
2584
	}
2585 2586 2587

	if (!dev_priv->mm.aliasing_ppgtt)
		i915_gem_gtt_bind_object(obj, obj->cache_level);
2588

2589
	list_add_tail(&obj->gtt_list, &dev_priv->mm.gtt_list);
2590
	list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list);
2591

2592 2593 2594 2595
	/* Assert that the object is not currently in any GPU domain. As it
	 * wasn't in the GTT, there shouldn't be any way it could have been in
	 * a GPU cache
	 */
2596 2597
	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
2598

2599
	obj->gtt_offset = obj->gtt_space->start;
C
Chris Wilson 已提交
2600

2601
	fenceable =
2602
		obj->gtt_space->size == fence_size &&
2603
		(obj->gtt_space->start & (fence_alignment - 1)) == 0;
2604

2605
	mappable =
2606
		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
2607

2608
	obj->map_and_fenceable = mappable && fenceable;
2609

C
Chris Wilson 已提交
2610
	trace_i915_gem_object_bind(obj, map_and_fenceable);
2611 2612 2613 2614
	return 0;
}

void
2615
i915_gem_clflush_object(struct drm_i915_gem_object *obj)
2616 2617 2618 2619 2620
{
	/* If we don't have a page list set up, then we're not pinned
	 * to GPU, and we can ignore the cache flush because it'll happen
	 * again at bind time.
	 */
2621
	if (obj->pages == NULL)
2622 2623
		return;

2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634
	/* If the GPU is snooping the contents of the CPU cache,
	 * we do not need to manually clear the CPU cache lines.  However,
	 * the caches are only snooped when the render cache is
	 * flushed/invalidated.  As we always have to emit invalidations
	 * and flushes when moving into and out of the RENDER domain, correct
	 * snooping behaviour occurs naturally as the result of our domain
	 * tracking.
	 */
	if (obj->cache_level != I915_CACHE_NONE)
		return;

C
Chris Wilson 已提交
2635
	trace_i915_gem_object_clflush(obj);
2636

2637
	drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
2638 2639
}

2640
/** Flushes any GPU write domain for the object if it's dirty. */
2641
static int
2642
i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj)
2643
{
2644
	if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0)
2645
		return 0;
2646 2647

	/* Queue the GPU write cache flushing we need. */
C
Chris Wilson 已提交
2648
	return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2649 2650 2651 2652
}

/** Flushes the GTT write domain for the object if it's dirty. */
static void
2653
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
2654
{
C
Chris Wilson 已提交
2655 2656
	uint32_t old_write_domain;

2657
	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
2658 2659
		return;

2660
	/* No actual flushing is required for the GTT write domain.  Writes
2661 2662
	 * to it immediately go to main memory as far as we know, so there's
	 * no chipset flush.  It also doesn't land in render cache.
2663 2664 2665 2666
	 *
	 * However, we do have to enforce the order so that all writes through
	 * the GTT land before any writes to the device, such as updates to
	 * the GATT itself.
2667
	 */
2668 2669
	wmb();

2670 2671
	old_write_domain = obj->base.write_domain;
	obj->base.write_domain = 0;
C
Chris Wilson 已提交
2672 2673

	trace_i915_gem_object_change_domain(obj,
2674
					    obj->base.read_domains,
C
Chris Wilson 已提交
2675
					    old_write_domain);
2676 2677 2678 2679
}

/** Flushes the CPU write domain for the object if it's dirty. */
static void
2680
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2681
{
C
Chris Wilson 已提交
2682
	uint32_t old_write_domain;
2683

2684
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
2685 2686 2687
		return;

	i915_gem_clflush_object(obj);
2688
	intel_gtt_chipset_flush();
2689 2690
	old_write_domain = obj->base.write_domain;
	obj->base.write_domain = 0;
C
Chris Wilson 已提交
2691 2692

	trace_i915_gem_object_change_domain(obj,
2693
					    obj->base.read_domains,
C
Chris Wilson 已提交
2694
					    old_write_domain);
2695 2696
}

2697 2698 2699 2700 2701 2702
/**
 * Moves a single object to the GTT read, and possibly write domain.
 *
 * This function returns when the move is complete, including waiting on
 * flushes to occur.
 */
J
Jesse Barnes 已提交
2703
int
2704
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
2705
{
C
Chris Wilson 已提交
2706
	uint32_t old_write_domain, old_read_domains;
2707
	int ret;
2708

2709
	/* Not valid to be called on unbound objects. */
2710
	if (obj->gtt_space == NULL)
2711 2712
		return -EINVAL;

2713 2714 2715
	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
		return 0;

2716 2717 2718 2719
	ret = i915_gem_object_flush_gpu_write_domain(obj);
	if (ret)
		return ret;

2720
	if (obj->pending_gpu_write || write) {
2721
		ret = i915_gem_object_wait_rendering(obj);
2722 2723 2724
		if (ret)
			return ret;
	}
2725

2726
	i915_gem_object_flush_cpu_write_domain(obj);
C
Chris Wilson 已提交
2727

2728 2729
	old_write_domain = obj->base.write_domain;
	old_read_domains = obj->base.read_domains;
C
Chris Wilson 已提交
2730

2731 2732 2733
	/* It should now be out of any other write domains, and we can update
	 * the domain values for our changes.
	 */
2734 2735
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2736
	if (write) {
2737 2738 2739
		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
		obj->dirty = 1;
2740 2741
	}

C
Chris Wilson 已提交
2742 2743 2744 2745
	trace_i915_gem_object_change_domain(obj,
					    old_read_domains,
					    old_write_domain);

2746 2747 2748
	return 0;
}

2749 2750 2751
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
				    enum i915_cache_level cache_level)
{
2752 2753
	struct drm_device *dev = obj->base.dev;
	drm_i915_private_t *dev_priv = dev->dev_private;
2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780
	int ret;

	if (obj->cache_level == cache_level)
		return 0;

	if (obj->pin_count) {
		DRM_DEBUG("can not change the cache level of pinned objects\n");
		return -EBUSY;
	}

	if (obj->gtt_space) {
		ret = i915_gem_object_finish_gpu(obj);
		if (ret)
			return ret;

		i915_gem_object_finish_gtt(obj);

		/* Before SandyBridge, you could not use tiling or fence
		 * registers with snooped memory, so relinquish any fences
		 * currently pointing to our region in the aperture.
		 */
		if (INTEL_INFO(obj->base.dev)->gen < 6) {
			ret = i915_gem_object_put_fence(obj);
			if (ret)
				return ret;
		}

2781 2782
		if (obj->has_global_gtt_mapping)
			i915_gem_gtt_bind_object(obj, cache_level);
2783 2784 2785
		if (obj->has_aliasing_ppgtt_mapping)
			i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
					       obj, cache_level);
2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814
	}

	if (cache_level == I915_CACHE_NONE) {
		u32 old_read_domains, old_write_domain;

		/* If we're coming from LLC cached, then we haven't
		 * actually been tracking whether the data is in the
		 * CPU cache or not, since we only allow one bit set
		 * in obj->write_domain and have been skipping the clflushes.
		 * Just set it to the CPU cache for now.
		 */
		WARN_ON(obj->base.write_domain & ~I915_GEM_DOMAIN_CPU);
		WARN_ON(obj->base.read_domains & ~I915_GEM_DOMAIN_CPU);

		old_read_domains = obj->base.read_domains;
		old_write_domain = obj->base.write_domain;

		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
		obj->base.write_domain = I915_GEM_DOMAIN_CPU;

		trace_i915_gem_object_change_domain(obj,
						    old_read_domains,
						    old_write_domain);
	}

	obj->cache_level = cache_level;
	return 0;
}

2815
/*
2816 2817 2818 2819 2820 2821 2822 2823
 * Prepare buffer for display plane (scanout, cursors, etc).
 * Can be called from an uninterruptible phase (modesetting) and allows
 * any flushes to be pipelined (for pageflips).
 *
 * For the display plane, we want to be in the GTT but out of any write
 * domains. So in many ways this looks like set_to_gtt_domain() apart from the
 * ability to pipeline the waits, pinning and any additional subtleties
 * that may differentiate the display plane from ordinary buffers.
2824 2825
 */
int
2826 2827
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
				     u32 alignment,
2828
				     struct intel_ring_buffer *pipelined)
2829
{
2830
	u32 old_read_domains, old_write_domain;
2831 2832
	int ret;

2833 2834 2835 2836
	ret = i915_gem_object_flush_gpu_write_domain(obj);
	if (ret)
		return ret;

2837
	if (pipelined != obj->ring) {
2838
		ret = i915_gem_object_wait_rendering(obj);
2839
		if (ret == -ERESTARTSYS)
2840 2841 2842
			return ret;
	}

2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855
	/* The display engine is not coherent with the LLC cache on gen6.  As
	 * a result, we make sure that the pinning that is about to occur is
	 * done with uncached PTEs. This is lowest common denominator for all
	 * chipsets.
	 *
	 * However for gen6+, we could do better by using the GFDT bit instead
	 * of uncaching, which would allow us to flush all the LLC-cached data
	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
	 */
	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_NONE);
	if (ret)
		return ret;

2856 2857 2858 2859 2860 2861 2862 2863
	/* As the user may map the buffer once pinned in the display plane
	 * (e.g. libkms for the bootup splash), we have to ensure that we
	 * always use map_and_fenceable for all scanout buffers.
	 */
	ret = i915_gem_object_pin(obj, alignment, true);
	if (ret)
		return ret;

2864 2865
	i915_gem_object_flush_cpu_write_domain(obj);

2866
	old_write_domain = obj->base.write_domain;
2867
	old_read_domains = obj->base.read_domains;
2868 2869 2870 2871 2872

	/* It should now be out of any other write domains, and we can update
	 * the domain values for our changes.
	 */
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2873
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2874 2875 2876

	trace_i915_gem_object_change_domain(obj,
					    old_read_domains,
2877
					    old_write_domain);
2878 2879 2880 2881

	return 0;
}

2882
int
2883
i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj)
2884
{
2885 2886
	int ret;

2887
	if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0)
2888 2889
		return 0;

2890
	if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
C
Chris Wilson 已提交
2891
		ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain);
2892 2893 2894
		if (ret)
			return ret;
	}
2895

2896 2897 2898 2899
	ret = i915_gem_object_wait_rendering(obj);
	if (ret)
		return ret;

2900 2901
	/* Ensure that we invalidate the GPU's caches and TLBs. */
	obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS;
2902
	return 0;
2903 2904
}

2905 2906 2907 2908 2909 2910
/**
 * Moves a single object to the CPU read, and possibly write domain.
 *
 * This function returns when the move is complete, including waiting on
 * flushes to occur.
 */
2911
int
2912
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
2913
{
C
Chris Wilson 已提交
2914
	uint32_t old_write_domain, old_read_domains;
2915 2916
	int ret;

2917 2918 2919
	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
		return 0;

2920 2921 2922 2923
	ret = i915_gem_object_flush_gpu_write_domain(obj);
	if (ret)
		return ret;

2924
	ret = i915_gem_object_wait_rendering(obj);
2925
	if (ret)
2926
		return ret;
2927

2928
	i915_gem_object_flush_gtt_write_domain(obj);
2929

2930 2931
	old_write_domain = obj->base.write_domain;
	old_read_domains = obj->base.read_domains;
C
Chris Wilson 已提交
2932

2933
	/* Flush the CPU cache if it's still invalid. */
2934
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2935 2936
		i915_gem_clflush_object(obj);

2937
		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
2938 2939 2940 2941 2942
	}

	/* It should now be out of any other write domains, and we can update
	 * the domain values for our changes.
	 */
2943
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2944 2945 2946 2947 2948

	/* If we're writing through the CPU, then the GPU read domains will
	 * need to be invalidated at next use.
	 */
	if (write) {
2949 2950
		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
2951
	}
2952

C
Chris Wilson 已提交
2953 2954 2955 2956
	trace_i915_gem_object_change_domain(obj,
					    old_read_domains,
					    old_write_domain);

2957 2958 2959
	return 0;
}

2960 2961 2962
/* Throttle our rendering by waiting until the ring has completed our requests
 * emitted over 20 msec ago.
 *
2963 2964 2965 2966
 * Note that if we were to use the current jiffies each time around the loop,
 * we wouldn't escape the function with any frames outstanding if the time to
 * render a frame was over 20ms.
 *
2967 2968 2969
 * This should get us reasonable parallelism between CPU and GPU but also
 * relatively low latency when blocking on a particular request to finish.
 */
2970
static int
2971
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
2972
{
2973 2974
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct drm_i915_file_private *file_priv = file->driver_priv;
2975
	unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
2976 2977 2978 2979
	struct drm_i915_gem_request *request;
	struct intel_ring_buffer *ring = NULL;
	u32 seqno = 0;
	int ret;
2980

2981 2982 2983
	if (atomic_read(&dev_priv->mm.wedged))
		return -EIO;

2984
	spin_lock(&file_priv->mm.lock);
2985
	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
2986 2987
		if (time_after_eq(request->emitted_jiffies, recent_enough))
			break;
2988

2989 2990
		ring = request->ring;
		seqno = request->seqno;
2991
	}
2992
	spin_unlock(&file_priv->mm.lock);
2993

2994 2995
	if (seqno == 0)
		return 0;
2996

2997
	ret = 0;
2998
	if (!i915_seqno_passed(ring->get_seqno(ring), seqno)) {
2999 3000 3001 3002 3003
		/* And wait for the seqno passing without holding any locks and
		 * causing extra latency for others. This is safe as the irq
		 * generation is designed to be run atomically and so is
		 * lockless.
		 */
3004 3005 3006 3007 3008
		if (ring->irq_get(ring)) {
			ret = wait_event_interruptible(ring->irq_queue,
						       i915_seqno_passed(ring->get_seqno(ring), seqno)
						       || atomic_read(&dev_priv->mm.wedged));
			ring->irq_put(ring);
3009

3010 3011
			if (ret == 0 && atomic_read(&dev_priv->mm.wedged))
				ret = -EIO;
3012 3013
		} else if (wait_for_atomic(i915_seqno_passed(ring->get_seqno(ring),
							     seqno) ||
3014 3015
				    atomic_read(&dev_priv->mm.wedged), 3000)) {
			ret = -EBUSY;
3016
		}
3017 3018
	}

3019 3020
	if (ret == 0)
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
3021 3022 3023 3024

	return ret;
}

3025
int
3026 3027
i915_gem_object_pin(struct drm_i915_gem_object *obj,
		    uint32_t alignment,
3028
		    bool map_and_fenceable)
3029
{
3030
	struct drm_device *dev = obj->base.dev;
C
Chris Wilson 已提交
3031
	struct drm_i915_private *dev_priv = dev->dev_private;
3032 3033
	int ret;

3034
	BUG_ON(obj->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
3035
	WARN_ON(i915_verify_lists(dev));
3036

3037 3038 3039 3040
	if (obj->gtt_space != NULL) {
		if ((alignment && obj->gtt_offset & (alignment - 1)) ||
		    (map_and_fenceable && !obj->map_and_fenceable)) {
			WARN(obj->pin_count,
3041
			     "bo is already pinned with incorrect alignment:"
3042 3043
			     " offset=%x, req.alignment=%x, req.map_and_fenceable=%d,"
			     " obj->map_and_fenceable=%d\n",
3044
			     obj->gtt_offset, alignment,
3045
			     map_and_fenceable,
3046
			     obj->map_and_fenceable);
3047 3048 3049 3050 3051 3052
			ret = i915_gem_object_unbind(obj);
			if (ret)
				return ret;
		}
	}

3053
	if (obj->gtt_space == NULL) {
3054
		ret = i915_gem_object_bind_to_gtt(obj, alignment,
3055
						  map_and_fenceable);
3056
		if (ret)
3057
			return ret;
3058
	}
J
Jesse Barnes 已提交
3059

3060 3061 3062
	if (!obj->has_global_gtt_mapping && map_and_fenceable)
		i915_gem_gtt_bind_object(obj, obj->cache_level);

3063 3064 3065
	if (obj->pin_count++ == 0) {
		if (!obj->active)
			list_move_tail(&obj->mm_list,
C
Chris Wilson 已提交
3066
				       &dev_priv->mm.pinned_list);
3067
	}
3068
	obj->pin_mappable |= map_and_fenceable;
3069

3070
	WARN_ON(i915_verify_lists(dev));
3071 3072 3073 3074
	return 0;
}

void
3075
i915_gem_object_unpin(struct drm_i915_gem_object *obj)
3076
{
3077
	struct drm_device *dev = obj->base.dev;
3078 3079
	drm_i915_private_t *dev_priv = dev->dev_private;

3080
	WARN_ON(i915_verify_lists(dev));
3081 3082
	BUG_ON(obj->pin_count == 0);
	BUG_ON(obj->gtt_space == NULL);
3083

3084 3085 3086
	if (--obj->pin_count == 0) {
		if (!obj->active)
			list_move_tail(&obj->mm_list,
3087
				       &dev_priv->mm.inactive_list);
3088
		obj->pin_mappable = false;
3089
	}
3090
	WARN_ON(i915_verify_lists(dev));
3091 3092 3093 3094
}

int
i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3095
		   struct drm_file *file)
3096 3097
{
	struct drm_i915_gem_pin *args = data;
3098
	struct drm_i915_gem_object *obj;
3099 3100
	int ret;

3101 3102 3103
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ret;
3104

3105
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3106
	if (&obj->base == NULL) {
3107 3108
		ret = -ENOENT;
		goto unlock;
3109 3110
	}

3111
	if (obj->madv != I915_MADV_WILLNEED) {
C
Chris Wilson 已提交
3112
		DRM_ERROR("Attempting to pin a purgeable buffer\n");
3113 3114
		ret = -EINVAL;
		goto out;
3115 3116
	}

3117
	if (obj->pin_filp != NULL && obj->pin_filp != file) {
J
Jesse Barnes 已提交
3118 3119
		DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
			  args->handle);
3120 3121
		ret = -EINVAL;
		goto out;
J
Jesse Barnes 已提交
3122 3123
	}

3124 3125 3126
	obj->user_pin_count++;
	obj->pin_filp = file;
	if (obj->user_pin_count == 1) {
3127
		ret = i915_gem_object_pin(obj, args->alignment, true);
3128 3129
		if (ret)
			goto out;
3130 3131 3132 3133 3134
	}

	/* XXX - flush the CPU caches for pinned objects
	 * as the X server doesn't manage domains yet
	 */
3135
	i915_gem_object_flush_cpu_write_domain(obj);
3136
	args->offset = obj->gtt_offset;
3137
out:
3138
	drm_gem_object_unreference(&obj->base);
3139
unlock:
3140
	mutex_unlock(&dev->struct_mutex);
3141
	return ret;
3142 3143 3144 3145
}

int
i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3146
		     struct drm_file *file)
3147 3148
{
	struct drm_i915_gem_pin *args = data;
3149
	struct drm_i915_gem_object *obj;
3150
	int ret;
3151

3152 3153 3154
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ret;
3155

3156
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3157
	if (&obj->base == NULL) {
3158 3159
		ret = -ENOENT;
		goto unlock;
3160
	}
3161

3162
	if (obj->pin_filp != file) {
J
Jesse Barnes 已提交
3163 3164
		DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
			  args->handle);
3165 3166
		ret = -EINVAL;
		goto out;
J
Jesse Barnes 已提交
3167
	}
3168 3169 3170
	obj->user_pin_count--;
	if (obj->user_pin_count == 0) {
		obj->pin_filp = NULL;
J
Jesse Barnes 已提交
3171 3172
		i915_gem_object_unpin(obj);
	}
3173

3174
out:
3175
	drm_gem_object_unreference(&obj->base);
3176
unlock:
3177
	mutex_unlock(&dev->struct_mutex);
3178
	return ret;
3179 3180 3181 3182
}

int
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3183
		    struct drm_file *file)
3184 3185
{
	struct drm_i915_gem_busy *args = data;
3186
	struct drm_i915_gem_object *obj;
3187 3188
	int ret;

3189
	ret = i915_mutex_lock_interruptible(dev);
3190
	if (ret)
3191
		return ret;
3192

3193
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3194
	if (&obj->base == NULL) {
3195 3196
		ret = -ENOENT;
		goto unlock;
3197
	}
3198

3199 3200 3201 3202
	/* Count all active objects as busy, even if they are currently not used
	 * by the gpu. Users of this interface expect objects to eventually
	 * become non-busy without any further actions, therefore emit any
	 * necessary flushes here.
3203
	 */
3204
	args->busy = obj->active;
3205 3206 3207 3208 3209 3210
	if (args->busy) {
		/* Unconditionally flush objects, even when the gpu still uses this
		 * object. Userspace calling this function indicates that it wants to
		 * use this buffer rather sooner than later, so issuing the required
		 * flush earlier is beneficial.
		 */
3211
		if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) {
C
Chris Wilson 已提交
3212
			ret = i915_gem_flush_ring(obj->ring,
3213
						  0, obj->base.write_domain);
3214 3215 3216 3217
		} else if (obj->ring->outstanding_lazy_request ==
			   obj->last_rendering_seqno) {
			struct drm_i915_gem_request *request;

3218 3219 3220
			/* This ring is not being cleared by active usage,
			 * so emit a request to do so.
			 */
3221
			request = kzalloc(sizeof(*request), GFP_KERNEL);
3222
			if (request) {
3223
				ret = i915_add_request(obj->ring, NULL, request);
3224 3225 3226
				if (ret)
					kfree(request);
			} else
3227 3228
				ret = -ENOMEM;
		}
3229 3230 3231 3232 3233 3234

		/* Update the active list for the hardware's current position.
		 * Otherwise this only updates on a delayed timer or when irqs
		 * are actually unmasked, and our working set ends up being
		 * larger than required.
		 */
C
Chris Wilson 已提交
3235
		i915_gem_retire_requests_ring(obj->ring);
3236

3237
		args->busy = obj->active;
3238
	}
3239

3240
	drm_gem_object_unreference(&obj->base);
3241
unlock:
3242
	mutex_unlock(&dev->struct_mutex);
3243
	return ret;
3244 3245 3246 3247 3248 3249
}

int
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
			struct drm_file *file_priv)
{
3250
	return i915_gem_ring_throttle(dev, file_priv);
3251 3252
}

3253 3254 3255 3256 3257
int
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
		       struct drm_file *file_priv)
{
	struct drm_i915_gem_madvise *args = data;
3258
	struct drm_i915_gem_object *obj;
3259
	int ret;
3260 3261 3262 3263 3264 3265 3266 3267 3268

	switch (args->madv) {
	case I915_MADV_DONTNEED:
	case I915_MADV_WILLNEED:
	    break;
	default:
	    return -EINVAL;
	}

3269 3270 3271 3272
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ret;

3273
	obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3274
	if (&obj->base == NULL) {
3275 3276
		ret = -ENOENT;
		goto unlock;
3277 3278
	}

3279
	if (obj->pin_count) {
3280 3281
		ret = -EINVAL;
		goto out;
3282 3283
	}

3284 3285
	if (obj->madv != __I915_MADV_PURGED)
		obj->madv = args->madv;
3286

3287
	/* if the object is no longer bound, discard its backing storage */
3288 3289
	if (i915_gem_object_is_purgeable(obj) &&
	    obj->gtt_space == NULL)
3290 3291
		i915_gem_object_truncate(obj);

3292
	args->retained = obj->madv != __I915_MADV_PURGED;
C
Chris Wilson 已提交
3293

3294
out:
3295
	drm_gem_object_unreference(&obj->base);
3296
unlock:
3297
	mutex_unlock(&dev->struct_mutex);
3298
	return ret;
3299 3300
}

3301 3302
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
						  size_t size)
3303
{
3304
	struct drm_i915_private *dev_priv = dev->dev_private;
3305
	struct drm_i915_gem_object *obj;
3306
	struct address_space *mapping;
3307

3308 3309 3310
	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
	if (obj == NULL)
		return NULL;
3311

3312 3313 3314 3315
	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
		kfree(obj);
		return NULL;
	}
3316

3317 3318 3319
	mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
	mapping_set_gfp_mask(mapping, GFP_HIGHUSER | __GFP_RECLAIMABLE);

3320 3321
	i915_gem_info_add_obj(dev_priv, size);

3322 3323
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3324

3325 3326
	if (HAS_LLC(dev)) {
		/* On some devices, we can have the GPU use the LLC (the CPU
3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341
		 * cache) for about a 10% performance improvement
		 * compared to uncached.  Graphics requests other than
		 * display scanout are coherent with the CPU in
		 * accessing this cache.  This means in this mode we
		 * don't need to clflush on the CPU side, and on the
		 * GPU side we only need to flush internal caches to
		 * get data visible to the CPU.
		 *
		 * However, we maintain the display planes as UC, and so
		 * need to rebind when first used as such.
		 */
		obj->cache_level = I915_CACHE_LLC;
	} else
		obj->cache_level = I915_CACHE_NONE;

3342
	obj->base.driver_private = NULL;
3343
	obj->fence_reg = I915_FENCE_REG_NONE;
3344
	INIT_LIST_HEAD(&obj->mm_list);
D
Daniel Vetter 已提交
3345
	INIT_LIST_HEAD(&obj->gtt_list);
3346
	INIT_LIST_HEAD(&obj->ring_list);
3347
	INIT_LIST_HEAD(&obj->exec_list);
3348 3349
	INIT_LIST_HEAD(&obj->gpu_write_list);
	obj->madv = I915_MADV_WILLNEED;
3350 3351
	/* Avoid an unnecessary call to unbind on the first bind. */
	obj->map_and_fenceable = true;
3352

3353
	return obj;
3354 3355 3356 3357 3358
}

int i915_gem_init_object(struct drm_gem_object *obj)
{
	BUG();
3359

3360 3361 3362
	return 0;
}

3363
static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
3364
{
3365
	struct drm_device *dev = obj->base.dev;
3366 3367
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret;
3368

3369 3370
	ret = i915_gem_object_unbind(obj);
	if (ret == -ERESTARTSYS) {
3371
		list_move(&obj->mm_list,
3372 3373 3374
			  &dev_priv->mm.deferred_free_list);
		return;
	}
3375

3376 3377
	trace_i915_gem_object_destroy(obj);

3378
	if (obj->base.map_list.map)
3379
		drm_gem_free_mmap_offset(&obj->base);
3380

3381 3382
	drm_gem_object_release(&obj->base);
	i915_gem_info_remove_obj(dev_priv, obj->base.size);
3383

3384 3385
	kfree(obj->bit_17);
	kfree(obj);
3386 3387
}

3388
void i915_gem_free_object(struct drm_gem_object *gem_obj)
3389
{
3390 3391
	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
	struct drm_device *dev = obj->base.dev;
3392

3393
	while (obj->pin_count > 0)
3394 3395
		i915_gem_object_unpin(obj);

3396
	if (obj->phys_obj)
3397 3398 3399 3400 3401
		i915_gem_detach_phys_object(dev, obj);

	i915_gem_free_object_tail(obj);
}

3402 3403 3404 3405 3406
int
i915_gem_idle(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret;
3407

3408
	mutex_lock(&dev->struct_mutex);
C
Chris Wilson 已提交
3409

3410
	if (dev_priv->mm.suspended) {
3411 3412
		mutex_unlock(&dev->struct_mutex);
		return 0;
3413 3414
	}

3415
	ret = i915_gpu_idle(dev, true);
3416 3417
	if (ret) {
		mutex_unlock(&dev->struct_mutex);
3418
		return ret;
3419
	}
3420

3421 3422
	/* Under UMS, be paranoid and evict. */
	if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
3423
		ret = i915_gem_evict_inactive(dev, false);
3424 3425 3426 3427 3428 3429
		if (ret) {
			mutex_unlock(&dev->struct_mutex);
			return ret;
		}
	}

3430 3431
	i915_gem_reset_fences(dev);

3432 3433 3434 3435 3436
	/* Hack!  Don't let anybody do execbuf while we don't control the chip.
	 * We need to replace this with a semaphore, or something.
	 * And not confound mm.suspended!
	 */
	dev_priv->mm.suspended = 1;
3437
	del_timer_sync(&dev_priv->hangcheck_timer);
3438 3439

	i915_kernel_lost_context(dev);
3440
	i915_gem_cleanup_ringbuffer(dev);
3441

3442 3443
	mutex_unlock(&dev->struct_mutex);

3444 3445 3446
	/* Cancel the retire work handler, which should be idle now. */
	cancel_delayed_work_sync(&dev_priv->mm.retire_work);

3447 3448 3449
	return 0;
}

3450 3451 3452 3453
void i915_gem_init_swizzling(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;

3454
	if (INTEL_INFO(dev)->gen < 5 ||
3455 3456 3457 3458 3459 3460
	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
		return;

	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
				 DISP_TILE_SURFACE_SWIZZLING);

3461 3462 3463
	if (IS_GEN5(dev))
		return;

3464 3465 3466 3467 3468 3469
	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
	if (IS_GEN6(dev))
		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_SNB));
	else
		I915_WRITE(ARB_MODE, ARB_MODE_ENABLE(ARB_MODE_SWIZZLE_IVB));
}
D
Daniel Vetter 已提交
3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506

void i915_gem_init_ppgtt(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	uint32_t pd_offset;
	struct intel_ring_buffer *ring;
	int i;

	if (!dev_priv->mm.aliasing_ppgtt)
		return;

	pd_offset = dev_priv->mm.aliasing_ppgtt->pd_offset;
	pd_offset /= 64; /* in cachelines, */
	pd_offset <<= 16;

	if (INTEL_INFO(dev)->gen == 6) {
		uint32_t ecochk = I915_READ(GAM_ECOCHK);
		I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT |
				       ECOCHK_PPGTT_CACHE64B);
		I915_WRITE(GFX_MODE, GFX_MODE_ENABLE(GFX_PPGTT_ENABLE));
	} else if (INTEL_INFO(dev)->gen >= 7) {
		I915_WRITE(GAM_ECOCHK, ECOCHK_PPGTT_CACHE64B);
		/* GFX_MODE is per-ring on gen7+ */
	}

	for (i = 0; i < I915_NUM_RINGS; i++) {
		ring = &dev_priv->ring[i];

		if (INTEL_INFO(dev)->gen >= 7)
			I915_WRITE(RING_MODE_GEN7(ring),
				   GFX_MODE_ENABLE(GFX_PPGTT_ENABLE));

		I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
		I915_WRITE(RING_PP_DIR_BASE(ring), pd_offset);
	}
}

3507
int
3508
i915_gem_init_hw(struct drm_device *dev)
3509 3510 3511
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret;
3512

3513 3514
	i915_gem_init_swizzling(dev);

3515
	ret = intel_init_render_ring_buffer(dev);
3516
	if (ret)
3517
		return ret;
3518 3519

	if (HAS_BSD(dev)) {
3520
		ret = intel_init_bsd_ring_buffer(dev);
3521 3522
		if (ret)
			goto cleanup_render_ring;
3523
	}
3524

3525 3526 3527 3528 3529 3530
	if (HAS_BLT(dev)) {
		ret = intel_init_blt_ring_buffer(dev);
		if (ret)
			goto cleanup_bsd_ring;
	}

3531 3532
	dev_priv->next_seqno = 1;

D
Daniel Vetter 已提交
3533 3534
	i915_gem_init_ppgtt(dev);

3535 3536
	return 0;

3537
cleanup_bsd_ring:
3538
	intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
3539
cleanup_render_ring:
3540
	intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3541 3542 3543 3544 3545 3546 3547
	return ret;
}

void
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
3548
	int i;
3549

3550 3551
	for (i = 0; i < I915_NUM_RINGS; i++)
		intel_cleanup_ring_buffer(&dev_priv->ring[i]);
3552 3553
}

3554 3555 3556 3557 3558
int
i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
		       struct drm_file *file_priv)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
3559
	int ret, i;
3560

J
Jesse Barnes 已提交
3561 3562 3563
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return 0;

3564
	if (atomic_read(&dev_priv->mm.wedged)) {
3565
		DRM_ERROR("Reenabling wedged hardware, good luck\n");
3566
		atomic_set(&dev_priv->mm.wedged, 0);
3567 3568 3569
	}

	mutex_lock(&dev->struct_mutex);
3570 3571
	dev_priv->mm.suspended = 0;

3572
	ret = i915_gem_init_hw(dev);
3573 3574
	if (ret != 0) {
		mutex_unlock(&dev->struct_mutex);
3575
		return ret;
3576
	}
3577

3578
	BUG_ON(!list_empty(&dev_priv->mm.active_list));
3579 3580
	BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
	BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
3581 3582 3583 3584
	for (i = 0; i < I915_NUM_RINGS; i++) {
		BUG_ON(!list_empty(&dev_priv->ring[i].active_list));
		BUG_ON(!list_empty(&dev_priv->ring[i].request_list));
	}
3585
	mutex_unlock(&dev->struct_mutex);
3586

3587 3588 3589
	ret = drm_irq_install(dev);
	if (ret)
		goto cleanup_ringbuffer;
3590

3591
	return 0;
3592 3593 3594 3595 3596 3597 3598 3599

cleanup_ringbuffer:
	mutex_lock(&dev->struct_mutex);
	i915_gem_cleanup_ringbuffer(dev);
	dev_priv->mm.suspended = 1;
	mutex_unlock(&dev->struct_mutex);

	return ret;
3600 3601 3602 3603 3604 3605
}

int
i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
		       struct drm_file *file_priv)
{
J
Jesse Barnes 已提交
3606 3607 3608
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return 0;

3609
	drm_irq_uninstall(dev);
3610
	return i915_gem_idle(dev);
3611 3612 3613 3614 3615 3616 3617
}

void
i915_gem_lastclose(struct drm_device *dev)
{
	int ret;

3618 3619 3620
	if (drm_core_check_feature(dev, DRIVER_MODESET))
		return;

3621 3622 3623
	ret = i915_gem_idle(dev);
	if (ret)
		DRM_ERROR("failed to idle hardware: %d\n", ret);
3624 3625
}

3626 3627 3628 3629 3630 3631 3632 3633
static void
init_ring_lists(struct intel_ring_buffer *ring)
{
	INIT_LIST_HEAD(&ring->active_list);
	INIT_LIST_HEAD(&ring->request_list);
	INIT_LIST_HEAD(&ring->gpu_write_list);
}

3634 3635 3636
void
i915_gem_load(struct drm_device *dev)
{
3637
	int i;
3638 3639
	drm_i915_private_t *dev_priv = dev->dev_private;

3640
	INIT_LIST_HEAD(&dev_priv->mm.active_list);
3641 3642
	INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
	INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
C
Chris Wilson 已提交
3643
	INIT_LIST_HEAD(&dev_priv->mm.pinned_list);
3644
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
3645
	INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
D
Daniel Vetter 已提交
3646
	INIT_LIST_HEAD(&dev_priv->mm.gtt_list);
3647 3648
	for (i = 0; i < I915_NUM_RINGS; i++)
		init_ring_lists(&dev_priv->ring[i]);
3649
	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
3650
		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
3651 3652
	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
			  i915_gem_retire_work_handler);
3653
	init_completion(&dev_priv->error_completion);
3654

3655 3656 3657 3658 3659 3660 3661 3662 3663 3664
	/* On GEN3 we really need to make sure the ARB C3 LP bit is set */
	if (IS_GEN3(dev)) {
		u32 tmp = I915_READ(MI_ARB_STATE);
		if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
			/* arb state is a masked write, so set bit + bit in mask */
			tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
			I915_WRITE(MI_ARB_STATE, tmp);
		}
	}

3665 3666
	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;

3667
	/* Old X drivers will take 0-2 for front, back, depth buffers */
3668 3669
	if (!drm_core_check_feature(dev, DRIVER_MODESET))
		dev_priv->fence_reg_start = 3;
3670

3671
	if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
3672 3673 3674 3675
		dev_priv->num_fence_regs = 16;
	else
		dev_priv->num_fence_regs = 8;

3676
	/* Initialize fence registers to zero */
3677 3678
	for (i = 0; i < dev_priv->num_fence_regs; i++) {
		i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]);
3679
	}
3680

3681
	i915_gem_detect_bit_6_swizzle(dev);
3682
	init_waitqueue_head(&dev_priv->pending_flip_queue);
3683

3684 3685
	dev_priv->mm.interruptible = true;

3686 3687 3688
	dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
	dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
	register_shrinker(&dev_priv->mm.inactive_shrinker);
3689
}
3690 3691 3692 3693 3694

/*
 * Create a physically contiguous memory object for this object
 * e.g. for cursor + overlay regs
 */
3695 3696
static int i915_gem_init_phys_object(struct drm_device *dev,
				     int id, int size, int align)
3697 3698 3699 3700 3701 3702 3703 3704
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_i915_gem_phys_object *phys_obj;
	int ret;

	if (dev_priv->mm.phys_objs[id - 1] || !size)
		return 0;

3705
	phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
3706 3707 3708 3709 3710
	if (!phys_obj)
		return -ENOMEM;

	phys_obj->id = id;

3711
	phys_obj->handle = drm_pci_alloc(dev, size, align);
3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723
	if (!phys_obj->handle) {
		ret = -ENOMEM;
		goto kfree_obj;
	}
#ifdef CONFIG_X86
	set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
#endif

	dev_priv->mm.phys_objs[id - 1] = phys_obj;

	return 0;
kfree_obj:
3724
	kfree(phys_obj);
3725 3726 3727
	return ret;
}

3728
static void i915_gem_free_phys_object(struct drm_device *dev, int id)
3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	struct drm_i915_gem_phys_object *phys_obj;

	if (!dev_priv->mm.phys_objs[id - 1])
		return;

	phys_obj = dev_priv->mm.phys_objs[id - 1];
	if (phys_obj->cur_obj) {
		i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
	}

#ifdef CONFIG_X86
	set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
#endif
	drm_pci_free(dev, phys_obj->handle);
	kfree(phys_obj);
	dev_priv->mm.phys_objs[id - 1] = NULL;
}

void i915_gem_free_all_phys_object(struct drm_device *dev)
{
	int i;

3753
	for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
3754 3755 3756 3757
		i915_gem_free_phys_object(dev, i);
}

void i915_gem_detach_phys_object(struct drm_device *dev,
3758
				 struct drm_i915_gem_object *obj)
3759
{
3760
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3761
	char *vaddr;
3762 3763 3764
	int i;
	int page_count;

3765
	if (!obj->phys_obj)
3766
		return;
3767
	vaddr = obj->phys_obj->handle->vaddr;
3768

3769
	page_count = obj->base.size / PAGE_SIZE;
3770
	for (i = 0; i < page_count; i++) {
3771
		struct page *page = shmem_read_mapping_page(mapping, i);
3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782
		if (!IS_ERR(page)) {
			char *dst = kmap_atomic(page);
			memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE);
			kunmap_atomic(dst);

			drm_clflush_pages(&page, 1);

			set_page_dirty(page);
			mark_page_accessed(page);
			page_cache_release(page);
		}
3783
	}
3784
	intel_gtt_chipset_flush();
3785

3786 3787
	obj->phys_obj->cur_obj = NULL;
	obj->phys_obj = NULL;
3788 3789 3790 3791
}

int
i915_gem_attach_phys_object(struct drm_device *dev,
3792
			    struct drm_i915_gem_object *obj,
3793 3794
			    int id,
			    int align)
3795
{
3796
	struct address_space *mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
3797 3798 3799 3800 3801 3802 3803 3804
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret = 0;
	int page_count;
	int i;

	if (id > I915_MAX_PHYS_OBJECT)
		return -EINVAL;

3805 3806
	if (obj->phys_obj) {
		if (obj->phys_obj->id == id)
3807 3808 3809 3810 3811 3812 3813
			return 0;
		i915_gem_detach_phys_object(dev, obj);
	}

	/* create a new object */
	if (!dev_priv->mm.phys_objs[id - 1]) {
		ret = i915_gem_init_phys_object(dev, id,
3814
						obj->base.size, align);
3815
		if (ret) {
3816 3817
			DRM_ERROR("failed to init phys object %d size: %zu\n",
				  id, obj->base.size);
3818
			return ret;
3819 3820 3821 3822
		}
	}

	/* bind to the object */
3823 3824
	obj->phys_obj = dev_priv->mm.phys_objs[id - 1];
	obj->phys_obj->cur_obj = obj;
3825

3826
	page_count = obj->base.size / PAGE_SIZE;
3827 3828

	for (i = 0; i < page_count; i++) {
3829 3830 3831
		struct page *page;
		char *dst, *src;

3832
		page = shmem_read_mapping_page(mapping, i);
3833 3834
		if (IS_ERR(page))
			return PTR_ERR(page);
3835

3836
		src = kmap_atomic(page);
3837
		dst = obj->phys_obj->handle->vaddr + (i * PAGE_SIZE);
3838
		memcpy(dst, src, PAGE_SIZE);
P
Peter Zijlstra 已提交
3839
		kunmap_atomic(src);
3840

3841 3842 3843
		mark_page_accessed(page);
		page_cache_release(page);
	}
3844

3845 3846 3847 3848
	return 0;
}

static int
3849 3850
i915_gem_phys_pwrite(struct drm_device *dev,
		     struct drm_i915_gem_object *obj,
3851 3852 3853
		     struct drm_i915_gem_pwrite *args,
		     struct drm_file *file_priv)
{
3854
	void *vaddr = obj->phys_obj->handle->vaddr + args->offset;
3855
	char __user *user_data = (char __user *) (uintptr_t) args->data_ptr;
3856

3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869
	if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
		unsigned long unwritten;

		/* The physical object once assigned is fixed for the lifetime
		 * of the obj, so we can safely drop the lock and continue
		 * to access vaddr.
		 */
		mutex_unlock(&dev->struct_mutex);
		unwritten = copy_from_user(vaddr, user_data, args->size);
		mutex_lock(&dev->struct_mutex);
		if (unwritten)
			return -EFAULT;
	}
3870

3871
	intel_gtt_chipset_flush();
3872 3873
	return 0;
}
3874

3875
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
3876
{
3877
	struct drm_i915_file_private *file_priv = file->driver_priv;
3878 3879 3880 3881 3882

	/* Clean up our request list when the client is going away, so that
	 * later retire_requests won't dereference our soon-to-be-gone
	 * file_priv.
	 */
3883
	spin_lock(&file_priv->mm.lock);
3884 3885 3886 3887 3888 3889 3890 3891 3892
	while (!list_empty(&file_priv->mm.request_list)) {
		struct drm_i915_gem_request *request;

		request = list_first_entry(&file_priv->mm.request_list,
					   struct drm_i915_gem_request,
					   client_list);
		list_del(&request->client_list);
		request->file_priv = NULL;
	}
3893
	spin_unlock(&file_priv->mm.lock);
3894
}
3895

3896 3897 3898 3899 3900 3901 3902
static int
i915_gpu_is_active(struct drm_device *dev)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int lists_empty;

	lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
3903
		      list_empty(&dev_priv->mm.active_list);
3904 3905 3906 3907

	return !lists_empty;
}

3908
static int
3909
i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
3910
{
3911 3912 3913 3914 3915 3916
	struct drm_i915_private *dev_priv =
		container_of(shrinker,
			     struct drm_i915_private,
			     mm.inactive_shrinker);
	struct drm_device *dev = dev_priv->dev;
	struct drm_i915_gem_object *obj, *next;
3917
	int nr_to_scan = sc->nr_to_scan;
3918 3919 3920
	int cnt;

	if (!mutex_trylock(&dev->struct_mutex))
3921
		return 0;
3922 3923 3924

	/* "fast-path" to count number of available objects */
	if (nr_to_scan == 0) {
3925 3926 3927 3928 3929 3930 3931
		cnt = 0;
		list_for_each_entry(obj,
				    &dev_priv->mm.inactive_list,
				    mm_list)
			cnt++;
		mutex_unlock(&dev->struct_mutex);
		return cnt / 100 * sysctl_vfs_cache_pressure;
3932 3933
	}

3934
rescan:
3935
	/* first scan for clean buffers */
3936
	i915_gem_retire_requests(dev);
3937

3938 3939 3940 3941
	list_for_each_entry_safe(obj, next,
				 &dev_priv->mm.inactive_list,
				 mm_list) {
		if (i915_gem_object_is_purgeable(obj)) {
3942 3943
			if (i915_gem_object_unbind(obj) == 0 &&
			    --nr_to_scan == 0)
3944
				break;
3945 3946 3947 3948
		}
	}

	/* second pass, evict/count anything still on the inactive list */
3949 3950 3951 3952
	cnt = 0;
	list_for_each_entry_safe(obj, next,
				 &dev_priv->mm.inactive_list,
				 mm_list) {
3953 3954
		if (nr_to_scan &&
		    i915_gem_object_unbind(obj) == 0)
3955
			nr_to_scan--;
3956
		else
3957 3958 3959 3960
			cnt++;
	}

	if (nr_to_scan && i915_gpu_is_active(dev)) {
3961 3962 3963 3964 3965 3966
		/*
		 * We are desperate for pages, so as a last resort, wait
		 * for the GPU to finish and discard whatever we can.
		 * This has a dramatic impact to reduce the number of
		 * OOM-killer events whilst running the GPU aggressively.
		 */
3967
		if (i915_gpu_idle(dev, true) == 0)
3968 3969
			goto rescan;
	}
3970 3971
	mutex_unlock(&dev->struct_mutex);
	return cnt / 100 * sysctl_vfs_cache_pressure;
3972
}