i915_gem.c 60.0 KB
Newer Older
1
/*
2
 * Copyright © 2008-2015 Intel Corporation
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *
 */

28
#include <drm/drm_vma_manager.h>
29
#include <drm/i915_drm.h>
30
#include <linux/dma-fence-array.h>
31
#include <linux/kthread.h>
32
#include <linux/reservation.h>
33
#include <linux/shmem_fs.h>
34
#include <linux/slab.h>
35
#include <linux/stop_machine.h>
36
#include <linux/swap.h>
J
Jesse Barnes 已提交
37
#include <linux/pci.h>
38
#include <linux/dma-buf.h>
39
#include <linux/mman.h>
40

41 42
#include "gem/i915_gem_clflush.h"
#include "gem/i915_gem_context.h"
43
#include "gem/i915_gem_ioctls.h"
44 45
#include "gem/i915_gem_pm.h"
#include "gem/i915_gemfs.h"
46 47
#include "gt/intel_engine_pm.h"
#include "gt/intel_gt_pm.h"
48 49 50 51
#include "gt/intel_mocs.h"
#include "gt/intel_reset.h"
#include "gt/intel_workarounds.h"

52
#include "i915_drv.h"
53
#include "i915_scatterlist.h"
54 55 56
#include "i915_trace.h"
#include "i915_vgpu.h"

57
#include "intel_display.h"
58 59
#include "intel_drv.h"
#include "intel_frontbuffer.h"
60
#include "intel_pm.h"
61

62
static int
63
insert_mappable_node(struct i915_ggtt *ggtt,
64 65 66
                     struct drm_mm_node *node, u32 size)
{
	memset(node, 0, sizeof(*node));
67
	return drm_mm_insert_node_in_range(&ggtt->vm.mm, node,
68 69 70
					   size, 0, I915_COLOR_UNEVICTABLE,
					   0, ggtt->mappable_end,
					   DRM_MM_INSERT_LOW);
71 72 73 74 75 76 77 78
}

static void
remove_mappable_node(struct drm_mm_node *node)
{
	drm_mm_remove_node(node);
}

79 80
int
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
81
			    struct drm_file *file)
82
{
83
	struct i915_ggtt *ggtt = &to_i915(dev)->ggtt;
84
	struct drm_i915_gem_get_aperture *args = data;
85
	struct i915_vma *vma;
86
	u64 pinned;
87

88 89
	mutex_lock(&ggtt->vm.mutex);

90
	pinned = ggtt->vm.reserved;
91
	list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link)
92
		if (i915_vma_is_pinned(vma))
93
			pinned += vma->node.size;
94 95

	mutex_unlock(&ggtt->vm.mutex);
96

97
	args->aper_size = ggtt->vm.total;
98
	args->aper_available_size = args->aper_size - pinned;
99

100 101 102
	return 0;
}

103
int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
104 105 106
{
	struct i915_vma *vma;
	LIST_HEAD(still_in_list);
107 108 109
	int ret;

	lockdep_assert_held(&obj->base.dev->struct_mutex);
110

111 112 113 114
	/* Closed vma are removed from the obj->vma_list - but they may
	 * still have an active binding on the object. To remove those we
	 * must wait for all rendering to complete to the object (as unbinding
	 * must anyway), and retire the requests.
115
	 */
116
	ret = i915_gem_object_set_to_cpu_domain(obj, false);
117 118 119
	if (ret)
		return ret;

120 121 122 123
	spin_lock(&obj->vma.lock);
	while (!ret && (vma = list_first_entry_or_null(&obj->vma.list,
						       struct i915_vma,
						       obj_link))) {
124
		list_move_tail(&vma->obj_link, &still_in_list);
125 126
		spin_unlock(&obj->vma.lock);

127
		ret = i915_vma_unbind(vma);
128 129

		spin_lock(&obj->vma.lock);
130
	}
131 132
	list_splice(&still_in_list, &obj->vma.list);
	spin_unlock(&obj->vma.lock);
133 134 135 136

	return ret;
}

137 138 139
static long
i915_gem_object_wait_fence(struct dma_fence *fence,
			   unsigned int flags,
140
			   long timeout)
141
{
142
	struct i915_request *rq;
143

144
	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
145

146 147 148 149 150 151 152 153 154
	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
		return timeout;

	if (!dma_fence_is_i915(fence))
		return dma_fence_wait_timeout(fence,
					      flags & I915_WAIT_INTERRUPTIBLE,
					      timeout);

	rq = to_request(fence);
155
	if (i915_request_completed(rq))
156 157
		goto out;

158
	timeout = i915_request_wait(rq, flags, timeout);
159 160

out:
161 162
	if (flags & I915_WAIT_LOCKED && i915_request_completed(rq))
		i915_request_retire_upto(rq);
163 164 165 166 167 168 169

	return timeout;
}

static long
i915_gem_object_wait_reservation(struct reservation_object *resv,
				 unsigned int flags,
170
				 long timeout)
171
{
172
	unsigned int seq = __read_seqcount_begin(&resv->seq);
173
	struct dma_fence *excl;
174
	bool prune_fences = false;
175 176 177 178

	if (flags & I915_WAIT_ALL) {
		struct dma_fence **shared;
		unsigned int count, i;
179 180
		int ret;

181 182
		ret = reservation_object_get_fences_rcu(resv,
							&excl, &count, &shared);
183 184 185
		if (ret)
			return ret;

186 187
		for (i = 0; i < count; i++) {
			timeout = i915_gem_object_wait_fence(shared[i],
188
							     flags, timeout);
189
			if (timeout < 0)
190
				break;
191

192 193 194 195 196 197
			dma_fence_put(shared[i]);
		}

		for (; i < count; i++)
			dma_fence_put(shared[i]);
		kfree(shared);
198

199 200 201 202 203 204 205 206 207
		/*
		 * If both shared fences and an exclusive fence exist,
		 * then by construction the shared fences must be later
		 * than the exclusive fence. If we successfully wait for
		 * all the shared fences, we know that the exclusive fence
		 * must all be signaled. If all the shared fences are
		 * signaled, we can prune the array and recover the
		 * floating references on the fences/requests.
		 */
208
		prune_fences = count && timeout >= 0;
209 210
	} else {
		excl = reservation_object_get_excl_rcu(resv);
211 212
	}

213
	if (excl && timeout >= 0)
214
		timeout = i915_gem_object_wait_fence(excl, flags, timeout);
215 216 217

	dma_fence_put(excl);

218 219
	/*
	 * Opportunistically prune the fences iff we know they have *all* been
220 221 222
	 * signaled and that the reservation object has not been changed (i.e.
	 * no new fences have been added).
	 */
223
	if (prune_fences && !__read_seqcount_retry(&resv->seq, seq)) {
224 225 226 227 228
		if (reservation_object_trylock(resv)) {
			if (!__read_seqcount_retry(&resv->seq, seq))
				reservation_object_add_excl_fence(resv, NULL);
			reservation_object_unlock(resv);
		}
229 230
	}

231
	return timeout;
232 233
}

234 235
static void __fence_set_priority(struct dma_fence *fence,
				 const struct i915_sched_attr *attr)
236
{
237
	struct i915_request *rq;
238 239
	struct intel_engine_cs *engine;

240
	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
241 242 243 244 245
		return;

	rq = to_request(fence);
	engine = rq->engine;

246 247
	local_bh_disable();
	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
248
	if (engine->schedule)
249
		engine->schedule(rq, attr);
250
	rcu_read_unlock();
251
	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
252 253
}

254 255
static void fence_set_priority(struct dma_fence *fence,
			       const struct i915_sched_attr *attr)
256 257 258 259 260 261 262
{
	/* Recurse once into a fence-array */
	if (dma_fence_is_array(fence)) {
		struct dma_fence_array *array = to_dma_fence_array(fence);
		int i;

		for (i = 0; i < array->num_fences; i++)
263
			__fence_set_priority(array->fences[i], attr);
264
	} else {
265
		__fence_set_priority(fence, attr);
266 267 268 269 270 271
	}
}

int
i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
			      unsigned int flags,
272
			      const struct i915_sched_attr *attr)
273 274 275 276 277 278 279 280 281 282 283 284 285 286
{
	struct dma_fence *excl;

	if (flags & I915_WAIT_ALL) {
		struct dma_fence **shared;
		unsigned int count, i;
		int ret;

		ret = reservation_object_get_fences_rcu(obj->resv,
							&excl, &count, &shared);
		if (ret)
			return ret;

		for (i = 0; i < count; i++) {
287
			fence_set_priority(shared[i], attr);
288 289 290 291 292 293 294 295 296
			dma_fence_put(shared[i]);
		}

		kfree(shared);
	} else {
		excl = reservation_object_get_excl_rcu(obj->resv);
	}

	if (excl) {
297
		fence_set_priority(excl, attr);
298 299 300 301 302
		dma_fence_put(excl);
	}
	return 0;
}

303 304 305 306 307
/**
 * Waits for rendering to the object to be completed
 * @obj: i915 gem object
 * @flags: how to wait (under a lock, for all rendering or just for writes etc)
 * @timeout: how long to wait
308
 */
309 310 311
int
i915_gem_object_wait(struct drm_i915_gem_object *obj,
		     unsigned int flags,
312
		     long timeout)
313
{
314 315
	might_sleep();
	GEM_BUG_ON(timeout < 0);
316

317
	timeout = i915_gem_object_wait_reservation(obj->resv, flags, timeout);
318
	return timeout < 0 ? timeout : 0;
319 320
}

321 322 323
static int
i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
		     struct drm_i915_gem_pwrite *args,
324
		     struct drm_file *file)
325 326
{
	void *vaddr = obj->phys_handle->vaddr + args->offset;
327
	char __user *user_data = u64_to_user_ptr(args->data_ptr);
328 329 330 331

	/* We manually control the domain here and pretend that it
	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
	 */
332
	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
333 334
	if (copy_from_user(vaddr, user_data, args->size))
		return -EFAULT;
335

336
	drm_clflush_virt_range(vaddr, args->size);
337
	i915_gem_chipset_flush(to_i915(obj->base.dev));
338

339
	intel_fb_obj_flush(obj, ORIGIN_CPU);
340
	return 0;
341 342
}

343 344
static int
i915_gem_create(struct drm_file *file,
345
		struct drm_i915_private *dev_priv,
346
		u64 *size_p,
347
		u32 *handle_p)
348
{
349
	struct drm_i915_gem_object *obj;
350
	u32 handle;
351 352
	u64 size;
	int ret;
353

354
	size = round_up(*size_p, PAGE_SIZE);
355 356
	if (size == 0)
		return -EINVAL;
357 358

	/* Allocate the new object */
359
	obj = i915_gem_object_create_shmem(dev_priv, size);
360 361
	if (IS_ERR(obj))
		return PTR_ERR(obj);
362

363
	ret = drm_gem_handle_create(file, &obj->base, &handle);
364
	/* drop reference from allocate - handle holds it now */
C
Chris Wilson 已提交
365
	i915_gem_object_put(obj);
366 367
	if (ret)
		return ret;
368

369
	*handle_p = handle;
370
	*size_p = size;
371 372 373
	return 0;
}

374 375 376 377 378
int
i915_gem_dumb_create(struct drm_file *file,
		     struct drm_device *dev,
		     struct drm_mode_create_dumb *args)
{
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
	int cpp = DIV_ROUND_UP(args->bpp, 8);
	u32 format;

	switch (cpp) {
	case 1:
		format = DRM_FORMAT_C8;
		break;
	case 2:
		format = DRM_FORMAT_RGB565;
		break;
	case 4:
		format = DRM_FORMAT_XRGB8888;
		break;
	default:
		return -EINVAL;
	}

396
	/* have to work out size/pitch and return them */
397 398 399 400 401 402 403
	args->pitch = ALIGN(args->width * cpp, 64);

	/* align stride to page size so that we can remap */
	if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
						    DRM_FORMAT_MOD_LINEAR))
		args->pitch = ALIGN(args->pitch, 4096);

404
	args->size = args->pitch * args->height;
405
	return i915_gem_create(file, to_i915(dev),
406
			       &args->size, &args->handle);
407 408 409 410
}

/**
 * Creates a new mm object and returns a handle to it.
411 412 413
 * @dev: drm device pointer
 * @data: ioctl data blob
 * @file: drm file pointer
414 415 416 417 418
 */
int
i915_gem_create_ioctl(struct drm_device *dev, void *data,
		      struct drm_file *file)
{
419
	struct drm_i915_private *dev_priv = to_i915(dev);
420
	struct drm_i915_gem_create *args = data;
421

422
	i915_gem_flush_free_objects(dev_priv);
423

424
	return i915_gem_create(file, dev_priv,
425
			       &args->size, &args->handle);
426 427
}

428
void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
429
{
430 431
	intel_wakeref_t wakeref;

432 433 434 435 436
	/*
	 * No actual flushing is required for the GTT write domain for reads
	 * from the GTT domain. Writes to it "immediately" go to main memory
	 * as far as we know, so there's no chipset flush. It also doesn't
	 * land in the GPU render cache.
437 438 439 440 441 442 443 444 445 446
	 *
	 * However, we do have to enforce the order so that all writes through
	 * the GTT land before any writes to the device, such as updates to
	 * the GATT itself.
	 *
	 * We also have to wait a bit for the writes to land from the GTT.
	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
	 * timing. This issue has only been observed when switching quickly
	 * between GTT writes and CPU reads from inside the kernel on recent hw,
	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
447 448
	 * system agents we cannot reproduce this behaviour, until Cannonlake
	 * that was!).
449
	 */
450

451 452 453 454 455
	wmb();

	if (INTEL_INFO(dev_priv)->has_coherent_ggtt)
		return;

456
	i915_gem_chipset_flush(dev_priv);
457

458 459
	with_intel_runtime_pm(dev_priv, wakeref) {
		spin_lock_irq(&dev_priv->uncore.lock);
460

461
		POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
462

463 464
		spin_unlock_irq(&dev_priv->uncore.lock);
	}
465 466
}

467
static int
468 469
shmem_pread(struct page *page, int offset, int len, char __user *user_data,
	    bool needs_clflush)
470 471 472 473 474 475
{
	char *vaddr;
	int ret;

	vaddr = kmap(page);

476 477
	if (needs_clflush)
		drm_clflush_virt_range(vaddr + offset, len);
478

479
	ret = __copy_to_user(user_data, vaddr + offset, len);
480

481
	kunmap(page);
482

483
	return ret ? -EFAULT : 0;
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
}

static int
i915_gem_shmem_pread(struct drm_i915_gem_object *obj,
		     struct drm_i915_gem_pread *args)
{
	char __user *user_data;
	u64 remain;
	unsigned int needs_clflush;
	unsigned int idx, offset;
	int ret;

	ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex);
	if (ret)
		return ret;

500
	ret = i915_gem_object_prepare_read(obj, &needs_clflush);
501 502 503 504 505 506 507 508 509
	mutex_unlock(&obj->base.dev->struct_mutex);
	if (ret)
		return ret;

	remain = args->size;
	user_data = u64_to_user_ptr(args->data_ptr);
	offset = offset_in_page(args->offset);
	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
		struct page *page = i915_gem_object_get_page(obj, idx);
510
		unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
511 512 513 514 515 516 517 518 519 520 521

		ret = shmem_pread(page, offset, length, user_data,
				  needs_clflush);
		if (ret)
			break;

		remain -= length;
		user_data += length;
		offset = 0;
	}

522
	i915_gem_object_finish_access(obj);
523 524 525 526 527 528 529
	return ret;
}

static inline bool
gtt_user_read(struct io_mapping *mapping,
	      loff_t base, int offset,
	      char __user *user_data, int length)
530
{
531
	void __iomem *vaddr;
532
	unsigned long unwritten;
533 534

	/* We can use the cpu mem copy function because this is X86. */
535 536 537 538
	vaddr = io_mapping_map_atomic_wc(mapping, base);
	unwritten = __copy_to_user_inatomic(user_data,
					    (void __force *)vaddr + offset,
					    length);
539 540
	io_mapping_unmap_atomic(vaddr);
	if (unwritten) {
541 542 543 544
		vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
		unwritten = copy_to_user(user_data,
					 (void __force *)vaddr + offset,
					 length);
545 546
		io_mapping_unmap(vaddr);
	}
547 548 549 550
	return unwritten;
}

static int
551 552
i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
		   const struct drm_i915_gem_pread *args)
553
{
554 555
	struct drm_i915_private *i915 = to_i915(obj->base.dev);
	struct i915_ggtt *ggtt = &i915->ggtt;
556
	intel_wakeref_t wakeref;
557
	struct drm_mm_node node;
558 559 560
	struct i915_vma *vma;
	void __user *user_data;
	u64 remain, offset;
561 562
	int ret;

563 564 565 566
	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
	if (ret)
		return ret;

567
	wakeref = intel_runtime_pm_get(i915);
568
	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
569 570 571
				       PIN_MAPPABLE |
				       PIN_NONFAULT |
				       PIN_NONBLOCK);
572 573 574
	if (!IS_ERR(vma)) {
		node.start = i915_ggtt_offset(vma);
		node.allocated = false;
575
		ret = i915_vma_put_fence(vma);
576 577 578 579 580
		if (ret) {
			i915_vma_unpin(vma);
			vma = ERR_PTR(ret);
		}
	}
C
Chris Wilson 已提交
581
	if (IS_ERR(vma)) {
582
		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
583
		if (ret)
584 585
			goto out_unlock;
		GEM_BUG_ON(!node.allocated);
586 587 588 589 590 591
	}

	ret = i915_gem_object_set_to_gtt_domain(obj, false);
	if (ret)
		goto out_unpin;

592
	mutex_unlock(&i915->drm.struct_mutex);
593

594 595 596
	user_data = u64_to_user_ptr(args->data_ptr);
	remain = args->size;
	offset = args->offset;
597 598 599 600 601 602 603 604 605 606 607 608 609 610

	while (remain > 0) {
		/* Operation in this page
		 *
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
		 */
		u32 page_base = node.start;
		unsigned page_offset = offset_in_page(offset);
		unsigned page_length = PAGE_SIZE - page_offset;
		page_length = remain < page_length ? remain : page_length;
		if (node.allocated) {
			wmb();
611 612 613
			ggtt->vm.insert_page(&ggtt->vm,
					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
					     node.start, I915_CACHE_NONE, 0);
614 615 616 617
			wmb();
		} else {
			page_base += offset & PAGE_MASK;
		}
618

619
		if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
620
				  user_data, page_length)) {
621 622 623 624 625 626 627 628 629
			ret = -EFAULT;
			break;
		}

		remain -= page_length;
		user_data += page_length;
		offset += page_length;
	}

630
	mutex_lock(&i915->drm.struct_mutex);
631 632 633
out_unpin:
	if (node.allocated) {
		wmb();
634
		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
635 636
		remove_mappable_node(&node);
	} else {
C
Chris Wilson 已提交
637
		i915_vma_unpin(vma);
638
	}
639
out_unlock:
640
	intel_runtime_pm_put(i915, wakeref);
641
	mutex_unlock(&i915->drm.struct_mutex);
642

643 644 645
	return ret;
}

646 647
/**
 * Reads data from the object referenced by handle.
648 649 650
 * @dev: drm device pointer
 * @data: ioctl data blob
 * @file: drm file pointer
651 652 653 654 655
 *
 * On error, the contents of *data are undefined.
 */
int
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
656
		     struct drm_file *file)
657 658
{
	struct drm_i915_gem_pread *args = data;
659
	struct drm_i915_gem_object *obj;
660
	int ret;
661

662 663 664
	if (args->size == 0)
		return 0;

665
	if (!access_ok(u64_to_user_ptr(args->data_ptr),
666 667 668
		       args->size))
		return -EFAULT;

669
	obj = i915_gem_object_lookup(file, args->handle);
670 671
	if (!obj)
		return -ENOENT;
672

673
	/* Bounds check source.  */
674
	if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
C
Chris Wilson 已提交
675
		ret = -EINVAL;
676
		goto out;
C
Chris Wilson 已提交
677 678
	}

C
Chris Wilson 已提交
679 680
	trace_i915_gem_object_pread(obj, args->offset, args->size);

681 682
	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE,
683
				   MAX_SCHEDULE_TIMEOUT);
684
	if (ret)
685
		goto out;
686

687
	ret = i915_gem_object_pin_pages(obj);
688
	if (ret)
689
		goto out;
690

691
	ret = i915_gem_shmem_pread(obj, args);
692
	if (ret == -EFAULT || ret == -ENODEV)
693
		ret = i915_gem_gtt_pread(obj, args);
694

695 696
	i915_gem_object_unpin_pages(obj);
out:
C
Chris Wilson 已提交
697
	i915_gem_object_put(obj);
698
	return ret;
699 700
}

701 702
/* This is the fast write path which cannot handle
 * page faults in the source data
703
 */
704

705 706 707 708
static inline bool
ggtt_write(struct io_mapping *mapping,
	   loff_t base, int offset,
	   char __user *user_data, int length)
709
{
710
	void __iomem *vaddr;
711
	unsigned long unwritten;
712

713
	/* We can use the cpu mem copy function because this is X86. */
714 715
	vaddr = io_mapping_map_atomic_wc(mapping, base);
	unwritten = __copy_from_user_inatomic_nocache((void __force *)vaddr + offset,
716
						      user_data, length);
717 718
	io_mapping_unmap_atomic(vaddr);
	if (unwritten) {
719 720 721
		vaddr = io_mapping_map_wc(mapping, base, PAGE_SIZE);
		unwritten = copy_from_user((void __force *)vaddr + offset,
					   user_data, length);
722 723
		io_mapping_unmap(vaddr);
	}
724 725 726 727

	return unwritten;
}

728 729 730
/**
 * This is the fast pwrite path, where we copy the data directly from the
 * user into the GTT, uncached.
731
 * @obj: i915 GEM object
732
 * @args: pwrite arguments structure
733
 */
734
static int
735 736
i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
			 const struct drm_i915_gem_pwrite *args)
737
{
738
	struct drm_i915_private *i915 = to_i915(obj->base.dev);
739
	struct i915_ggtt *ggtt = &i915->ggtt;
740
	intel_wakeref_t wakeref;
741
	struct drm_mm_node node;
742 743 744
	struct i915_vma *vma;
	u64 remain, offset;
	void __user *user_data;
745
	int ret;
746

747 748 749
	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
	if (ret)
		return ret;
D
Daniel Vetter 已提交
750

751 752 753 754 755 756 757 758
	if (i915_gem_object_has_struct_page(obj)) {
		/*
		 * Avoid waking the device up if we can fallback, as
		 * waking/resuming is very slow (worst-case 10-100 ms
		 * depending on PCI sleeps and our own resume time).
		 * This easily dwarfs any performance advantage from
		 * using the cache bypass of indirect GGTT access.
		 */
759 760
		wakeref = intel_runtime_pm_get_if_in_use(i915);
		if (!wakeref) {
761 762 763 764 765
			ret = -EFAULT;
			goto out_unlock;
		}
	} else {
		/* No backing pages, no fallback, we must force GGTT access */
766
		wakeref = intel_runtime_pm_get(i915);
767 768
	}

C
Chris Wilson 已提交
769
	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0,
770 771 772
				       PIN_MAPPABLE |
				       PIN_NONFAULT |
				       PIN_NONBLOCK);
773 774 775
	if (!IS_ERR(vma)) {
		node.start = i915_ggtt_offset(vma);
		node.allocated = false;
776
		ret = i915_vma_put_fence(vma);
777 778 779 780 781
		if (ret) {
			i915_vma_unpin(vma);
			vma = ERR_PTR(ret);
		}
	}
C
Chris Wilson 已提交
782
	if (IS_ERR(vma)) {
783
		ret = insert_mappable_node(ggtt, &node, PAGE_SIZE);
784
		if (ret)
785
			goto out_rpm;
786
		GEM_BUG_ON(!node.allocated);
787
	}
D
Daniel Vetter 已提交
788 789 790 791 792

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		goto out_unpin;

793 794
	mutex_unlock(&i915->drm.struct_mutex);

795
	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
796

797 798 799 800
	user_data = u64_to_user_ptr(args->data_ptr);
	offset = args->offset;
	remain = args->size;
	while (remain) {
801 802
		/* Operation in this page
		 *
803 804 805
		 * page_base = page offset within aperture
		 * page_offset = offset within page
		 * page_length = bytes to copy for this page
806
		 */
807
		u32 page_base = node.start;
808 809
		unsigned int page_offset = offset_in_page(offset);
		unsigned int page_length = PAGE_SIZE - page_offset;
810 811 812
		page_length = remain < page_length ? remain : page_length;
		if (node.allocated) {
			wmb(); /* flush the write before we modify the GGTT */
813 814 815
			ggtt->vm.insert_page(&ggtt->vm,
					     i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT),
					     node.start, I915_CACHE_NONE, 0);
816 817 818 819
			wmb(); /* flush modifications to the GGTT (insert_page) */
		} else {
			page_base += offset & PAGE_MASK;
		}
820
		/* If we get a fault while copying data, then (presumably) our
821 822
		 * source page isn't available.  Return the error and we'll
		 * retry in the slow path.
823 824
		 * If the object is non-shmem backed, we retry again with the
		 * path that handles page fault.
825
		 */
826
		if (ggtt_write(&ggtt->iomap, page_base, page_offset,
827 828 829
			       user_data, page_length)) {
			ret = -EFAULT;
			break;
D
Daniel Vetter 已提交
830
		}
831

832 833 834
		remain -= page_length;
		user_data += page_length;
		offset += page_length;
835
	}
836
	intel_fb_obj_flush(obj, ORIGIN_CPU);
837 838

	mutex_lock(&i915->drm.struct_mutex);
D
Daniel Vetter 已提交
839
out_unpin:
840 841
	if (node.allocated) {
		wmb();
842
		ggtt->vm.clear_range(&ggtt->vm, node.start, node.size);
843 844
		remove_mappable_node(&node);
	} else {
C
Chris Wilson 已提交
845
		i915_vma_unpin(vma);
846
	}
847
out_rpm:
848
	intel_runtime_pm_put(i915, wakeref);
849
out_unlock:
850
	mutex_unlock(&i915->drm.struct_mutex);
851
	return ret;
852 853
}

854 855 856 857 858
/* Per-page copy function for the shmem pwrite fastpath.
 * Flushes invalid cachelines before writing to the target if
 * needs_clflush_before is set and flushes out any written cachelines after
 * writing if needs_clflush is set.
 */
859
static int
860 861 862
shmem_pwrite(struct page *page, int offset, int len, char __user *user_data,
	     bool needs_clflush_before,
	     bool needs_clflush_after)
863
{
864
	char *vaddr;
865 866
	int ret;

867
	vaddr = kmap(page);
868

869 870
	if (needs_clflush_before)
		drm_clflush_virt_range(vaddr + offset, len);
871

872 873 874
	ret = __copy_from_user(vaddr + offset, user_data, len);
	if (!ret && needs_clflush_after)
		drm_clflush_virt_range(vaddr + offset, len);
875

876 877 878
	kunmap(page);

	return ret ? -EFAULT : 0;
879 880 881 882 883 884 885 886 887 888
}

static int
i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
		      const struct drm_i915_gem_pwrite *args)
{
	struct drm_i915_private *i915 = to_i915(obj->base.dev);
	void __user *user_data;
	u64 remain;
	unsigned int partial_cacheline_write;
889
	unsigned int needs_clflush;
890 891
	unsigned int offset, idx;
	int ret;
892

893
	ret = mutex_lock_interruptible(&i915->drm.struct_mutex);
894 895 896
	if (ret)
		return ret;

897
	ret = i915_gem_object_prepare_write(obj, &needs_clflush);
898 899 900
	mutex_unlock(&i915->drm.struct_mutex);
	if (ret)
		return ret;
901

902 903 904 905 906 907 908
	/* If we don't overwrite a cacheline completely we need to be
	 * careful to have up-to-date data by first clflushing. Don't
	 * overcomplicate things and flush the entire patch.
	 */
	partial_cacheline_write = 0;
	if (needs_clflush & CLFLUSH_BEFORE)
		partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1;
909

910 911 912 913 914
	user_data = u64_to_user_ptr(args->data_ptr);
	remain = args->size;
	offset = offset_in_page(args->offset);
	for (idx = args->offset >> PAGE_SHIFT; remain; idx++) {
		struct page *page = i915_gem_object_get_page(obj, idx);
915
		unsigned int length = min_t(u64, remain, PAGE_SIZE - offset);
916

917 918 919
		ret = shmem_pwrite(page, offset, length, user_data,
				   (offset | length) & partial_cacheline_write,
				   needs_clflush & CLFLUSH_AFTER);
920
		if (ret)
921
			break;
922

923 924 925
		remain -= length;
		user_data += length;
		offset = 0;
926
	}
927

928
	intel_fb_obj_flush(obj, ORIGIN_CPU);
929
	i915_gem_object_finish_access(obj);
930
	return ret;
931 932 933 934
}

/**
 * Writes data to the object referenced by handle.
935 936 937
 * @dev: drm device
 * @data: ioctl data blob
 * @file: drm file
938 939 940 941 942
 *
 * On error, the contents of the buffer that were to be modified are undefined.
 */
int
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
943
		      struct drm_file *file)
944 945
{
	struct drm_i915_gem_pwrite *args = data;
946
	struct drm_i915_gem_object *obj;
947 948 949 950 951
	int ret;

	if (args->size == 0)
		return 0;

952
	if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
953 954
		return -EFAULT;

955
	obj = i915_gem_object_lookup(file, args->handle);
956 957
	if (!obj)
		return -ENOENT;
958

959
	/* Bounds check destination. */
960
	if (range_overflows_t(u64, args->offset, args->size, obj->base.size)) {
C
Chris Wilson 已提交
961
		ret = -EINVAL;
962
		goto err;
C
Chris Wilson 已提交
963 964
	}

965 966 967 968 969 970
	/* Writes not allowed into this read-only object */
	if (i915_gem_object_is_readonly(obj)) {
		ret = -EINVAL;
		goto err;
	}

C
Chris Wilson 已提交
971 972
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);

973 974 975 976 977 978
	ret = -ENODEV;
	if (obj->ops->pwrite)
		ret = obj->ops->pwrite(obj, args);
	if (ret != -ENODEV)
		goto err;

979 980 981
	ret = i915_gem_object_wait(obj,
				   I915_WAIT_INTERRUPTIBLE |
				   I915_WAIT_ALL,
982
				   MAX_SCHEDULE_TIMEOUT);
983 984 985
	if (ret)
		goto err;

986
	ret = i915_gem_object_pin_pages(obj);
987
	if (ret)
988
		goto err;
989

D
Daniel Vetter 已提交
990
	ret = -EFAULT;
991 992 993 994 995 996
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
	 * it would end up going through the fenced access, and we'll get
	 * different detiling behavior between reading and writing.
	 * pread/pwrite currently are reading and writing from the CPU
	 * perspective, requiring manual detiling by the client.
	 */
997
	if (!i915_gem_object_has_struct_page(obj) ||
998
	    cpu_write_needs_clflush(obj))
D
Daniel Vetter 已提交
999 1000
		/* Note that the gtt paths might fail with non-page-backed user
		 * pointers (e.g. gtt mappings when moving data between
1001 1002
		 * textures). Fallback to the shmem path in that case.
		 */
1003
		ret = i915_gem_gtt_pwrite_fast(obj, args);
1004

1005
	if (ret == -EFAULT || ret == -ENOSPC) {
1006 1007
		if (obj->phys_handle)
			ret = i915_gem_phys_pwrite(obj, args, file);
1008
		else
1009
			ret = i915_gem_shmem_pwrite(obj, args);
1010
	}
1011

1012
	i915_gem_object_unpin_pages(obj);
1013
err:
C
Chris Wilson 已提交
1014
	i915_gem_object_put(obj);
1015
	return ret;
1016 1017 1018 1019
}

/**
 * Called when user space has done writes to this buffer
1020 1021 1022
 * @dev: drm device
 * @data: ioctl data blob
 * @file: drm file
1023 1024 1025
 */
int
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1026
			 struct drm_file *file)
1027 1028
{
	struct drm_i915_gem_sw_finish *args = data;
1029
	struct drm_i915_gem_object *obj;
1030

1031
	obj = i915_gem_object_lookup(file, args->handle);
1032 1033
	if (!obj)
		return -ENOENT;
1034

T
Tina Zhang 已提交
1035 1036 1037 1038 1039
	/*
	 * Proxy objects are barred from CPU access, so there is no
	 * need to ban sw_finish as it is a nop.
	 */

1040
	/* Pinned buffers may be scanout, so flush the cache */
1041
	i915_gem_object_flush_if_display(obj);
C
Chris Wilson 已提交
1042
	i915_gem_object_put(obj);
1043 1044

	return 0;
1045 1046
}

1047
void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
1048
{
1049
	struct drm_i915_gem_object *obj, *on;
1050
	int i;
1051

1052 1053 1054 1055 1056 1057
	/*
	 * Only called during RPM suspend. All users of the userfault_list
	 * must be holding an RPM wakeref to ensure that this can not
	 * run concurrently with themselves (and use the struct_mutex for
	 * protection between themselves).
	 */
1058

1059
	list_for_each_entry_safe(obj, on,
1060 1061
				 &dev_priv->mm.userfault_list, userfault_link)
		__i915_gem_object_release_mmap(obj);
1062 1063 1064 1065 1066 1067 1068 1069

	/* The fence will be lost when the device powers down. If any were
	 * in use by hardware (i.e. they are pinned), we should not be powering
	 * down! All other fences will be reacquired by the user upon waking.
	 */
	for (i = 0; i < dev_priv->num_fence_regs; i++) {
		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];

1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
		/* Ideally we want to assert that the fence register is not
		 * live at this point (i.e. that no piece of code will be
		 * trying to write through fence + GTT, as that both violates
		 * our tracking of activity and associated locking/barriers,
		 * but also is illegal given that the hw is powered down).
		 *
		 * Previously we used reg->pin_count as a "liveness" indicator.
		 * That is not sufficient, and we need a more fine-grained
		 * tool if we want to have a sanity check here.
		 */
1080 1081 1082 1083

		if (!reg->vma)
			continue;

1084
		GEM_BUG_ON(i915_vma_has_userfault(reg->vma));
1085 1086
		reg->dirty = true;
	}
1087 1088
}

1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099
static unsigned long to_wait_timeout(s64 timeout_ns)
{
	if (timeout_ns < 0)
		return MAX_SCHEDULE_TIMEOUT;

	if (timeout_ns == 0)
		return 0;

	return nsecs_to_jiffies_timeout(timeout_ns);
}

1100 1101
/**
 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
1102 1103 1104
 * @dev: drm device pointer
 * @data: ioctl data blob
 * @file: drm file pointer
1105 1106 1107 1108 1109 1110 1111
 *
 * Returns 0 if successful, else an error is returned with the remaining time in
 * the timeout parameter.
 *  -ETIME: object is still busy after timeout
 *  -ERESTARTSYS: signal interrupted the wait
 *  -ENONENT: object doesn't exist
 * Also possible, but rare:
1112
 *  -EAGAIN: incomplete, restart syscall
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128
 *  -ENOMEM: damn
 *  -ENODEV: Internal IRQ fail
 *  -E?: The add request failed
 *
 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
 * non-zero timeout parameter the wait ioctl will wait for the given number of
 * nanoseconds on an object becoming unbusy. Since the wait itself does so
 * without holding struct_mutex the object may become re-busied before this
 * function completes. A similar but shorter * race condition exists in the busy
 * ioctl
 */
int
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
	struct drm_i915_gem_wait *args = data;
	struct drm_i915_gem_object *obj;
1129 1130
	ktime_t start;
	long ret;
1131

1132 1133 1134
	if (args->flags != 0)
		return -EINVAL;

1135
	obj = i915_gem_object_lookup(file, args->bo_handle);
1136
	if (!obj)
1137 1138
		return -ENOENT;

1139 1140 1141
	start = ktime_get();

	ret = i915_gem_object_wait(obj,
1142 1143 1144
				   I915_WAIT_INTERRUPTIBLE |
				   I915_WAIT_PRIORITY |
				   I915_WAIT_ALL,
1145
				   to_wait_timeout(args->timeout_ns));
1146 1147 1148 1149 1150

	if (args->timeout_ns > 0) {
		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
		if (args->timeout_ns < 0)
			args->timeout_ns = 0;
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160

		/*
		 * Apparently ktime isn't accurate enough and occasionally has a
		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
		 * things up to make the test happy. We allow up to 1 jiffy.
		 *
		 * This is a regression from the timespec->ktime conversion.
		 */
		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
			args->timeout_ns = 0;
1161 1162 1163 1164

		/* Asked to wait beyond the jiffie/scheduler precision? */
		if (ret == -ETIME && args->timeout_ns)
			ret = -EAGAIN;
1165 1166
	}

C
Chris Wilson 已提交
1167
	i915_gem_object_put(obj);
1168
	return ret;
1169 1170
}

1171 1172
static int wait_for_engines(struct drm_i915_private *i915)
{
1173
	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
1174 1175
		dev_err(i915->drm.dev,
			"Failed to idle engines, declaring wedged!\n");
1176
		GEM_TRACE_DUMP();
1177 1178
		i915_gem_set_wedged(i915);
		return -EIO;
1179 1180 1181 1182 1183
	}

	return 0;
}

1184 1185 1186 1187 1188 1189 1190 1191
static long
wait_for_timelines(struct drm_i915_private *i915,
		   unsigned int flags, long timeout)
{
	struct i915_gt_timelines *gt = &i915->gt.timelines;
	struct i915_timeline *tl;

	mutex_lock(&gt->mutex);
C
Chris Wilson 已提交
1192
	list_for_each_entry(tl, &gt->active_list, link) {
1193 1194
		struct i915_request *rq;

1195
		rq = i915_active_request_get_unlocked(&tl->last_request);
1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210
		if (!rq)
			continue;

		mutex_unlock(&gt->mutex);

		/*
		 * "Race-to-idle".
		 *
		 * Switching to the kernel context is often used a synchronous
		 * step prior to idling, e.g. in suspend for flushing all
		 * current operations to memory before sleeping. These we
		 * want to complete as quickly as possible to avoid prolonged
		 * stalls, so allow the gpu to boost to maximum clocks.
		 */
		if (flags & I915_WAIT_FOR_IDLE_BOOST)
1211
			gen6_rps_boost(rq);
1212 1213 1214 1215 1216 1217 1218 1219

		timeout = i915_request_wait(rq, flags, timeout);
		i915_request_put(rq);
		if (timeout < 0)
			return timeout;

		/* restart after reacquiring the lock */
		mutex_lock(&gt->mutex);
C
Chris Wilson 已提交
1220
		tl = list_entry(&gt->active_list, typeof(*tl), link);
1221 1222 1223 1224 1225 1226
	}
	mutex_unlock(&gt->mutex);

	return timeout;
}

1227 1228
int i915_gem_wait_for_idle(struct drm_i915_private *i915,
			   unsigned int flags, long timeout)
1229
{
1230
	GEM_TRACE("flags=%x (%s), timeout=%ld%s, awake?=%s\n",
1231
		  flags, flags & I915_WAIT_LOCKED ? "locked" : "unlocked",
1232 1233
		  timeout, timeout == MAX_SCHEDULE_TIMEOUT ? " (forever)" : "",
		  yesno(i915->gt.awake));
1234

1235 1236 1237 1238
	/* If the device is asleep, we have no requests outstanding */
	if (!READ_ONCE(i915->gt.awake))
		return 0;

1239 1240 1241 1242
	timeout = wait_for_timelines(i915, flags, timeout);
	if (timeout < 0)
		return timeout;

1243
	if (flags & I915_WAIT_LOCKED) {
1244
		int err;
1245 1246 1247

		lockdep_assert_held(&i915->drm.struct_mutex);

1248 1249 1250 1251
		err = wait_for_engines(i915);
		if (err)
			return err;

1252
		i915_retire_requests(i915);
1253
	}
1254 1255

	return 0;
1256 1257
}

1258 1259 1260
/* Throttle our rendering by waiting until the ring has completed our requests
 * emitted over 20 msec ago.
 *
1261 1262 1263 1264
 * Note that if we were to use the current jiffies each time around the loop,
 * we wouldn't escape the function with any frames outstanding if the time to
 * render a frame was over 20ms.
 *
1265 1266 1267
 * This should get us reasonable parallelism between CPU and GPU but also
 * relatively low latency when blocking on a particular request to finish.
 */
1268
static int
1269
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
1270
{
1271
	struct drm_i915_private *dev_priv = to_i915(dev);
1272
	struct drm_i915_file_private *file_priv = file->driver_priv;
1273
	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
1274
	struct i915_request *request, *target = NULL;
1275
	long ret;
1276

1277
	/* ABI: return -EIO if already wedged */
1278 1279 1280
	ret = i915_terminally_wedged(dev_priv);
	if (ret)
		return ret;
1281

1282
	spin_lock(&file_priv->mm.lock);
1283
	list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
1284 1285
		if (time_after_eq(request->emitted_jiffies, recent_enough))
			break;
1286

1287 1288 1289 1290
		if (target) {
			list_del(&target->client_link);
			target->file_priv = NULL;
		}
1291

1292
		target = request;
1293
	}
1294
	if (target)
1295
		i915_request_get(target);
1296
	spin_unlock(&file_priv->mm.lock);
1297

1298
	if (target == NULL)
1299
		return 0;
1300

1301
	ret = i915_request_wait(target,
1302 1303
				I915_WAIT_INTERRUPTIBLE,
				MAX_SCHEDULE_TIMEOUT);
1304
	i915_request_put(target);
1305

1306
	return ret < 0 ? ret : 0;
1307 1308
}

C
Chris Wilson 已提交
1309
struct i915_vma *
1310 1311
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
			 const struct i915_ggtt_view *view,
1312
			 u64 size,
1313 1314
			 u64 alignment,
			 u64 flags)
1315
{
1316
	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
1317
	struct i915_address_space *vm = &dev_priv->ggtt.vm;
1318 1319
	struct i915_vma *vma;
	int ret;
1320

1321 1322
	lockdep_assert_held(&obj->base.dev->struct_mutex);

1323 1324
	if (flags & PIN_MAPPABLE &&
	    (!view || view->type == I915_GGTT_VIEW_NORMAL)) {
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354
		/* If the required space is larger than the available
		 * aperture, we will not able to find a slot for the
		 * object and unbinding the object now will be in
		 * vain. Worse, doing so may cause us to ping-pong
		 * the object in and out of the Global GTT and
		 * waste a lot of cycles under the mutex.
		 */
		if (obj->base.size > dev_priv->ggtt.mappable_end)
			return ERR_PTR(-E2BIG);

		/* If NONBLOCK is set the caller is optimistically
		 * trying to cache the full object within the mappable
		 * aperture, and *must* have a fallback in place for
		 * situations where we cannot bind the object. We
		 * can be a little more lax here and use the fallback
		 * more often to avoid costly migrations of ourselves
		 * and other objects within the aperture.
		 *
		 * Half-the-aperture is used as a simple heuristic.
		 * More interesting would to do search for a free
		 * block prior to making the commitment to unbind.
		 * That caters for the self-harm case, and with a
		 * little more heuristics (e.g. NOFAULT, NOEVICT)
		 * we could try to minimise harm to others.
		 */
		if (flags & PIN_NONBLOCK &&
		    obj->base.size > dev_priv->ggtt.mappable_end / 2)
			return ERR_PTR(-ENOSPC);
	}

1355
	vma = i915_vma_instance(obj, vm, view);
1356
	if (IS_ERR(vma))
C
Chris Wilson 已提交
1357
		return vma;
1358 1359

	if (i915_vma_misplaced(vma, size, alignment, flags)) {
1360 1361 1362
		if (flags & PIN_NONBLOCK) {
			if (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))
				return ERR_PTR(-ENOSPC);
1363

1364
			if (flags & PIN_MAPPABLE &&
1365
			    vma->fence_size > dev_priv->ggtt.mappable_end / 2)
1366 1367 1368
				return ERR_PTR(-ENOSPC);
		}

1369 1370
		WARN(i915_vma_is_pinned(vma),
		     "bo is already pinned in ggtt with incorrect alignment:"
1371 1372 1373
		     " offset=%08x, req.alignment=%llx,"
		     " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n",
		     i915_ggtt_offset(vma), alignment,
1374
		     !!(flags & PIN_MAPPABLE),
1375
		     i915_vma_is_map_and_fenceable(vma));
1376 1377
		ret = i915_vma_unbind(vma);
		if (ret)
C
Chris Wilson 已提交
1378
			return ERR_PTR(ret);
1379 1380
	}

C
Chris Wilson 已提交
1381 1382 1383
	ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL);
	if (ret)
		return ERR_PTR(ret);
1384

C
Chris Wilson 已提交
1385
	return vma;
1386 1387
}

1388
static __always_inline u32 __busy_read_flag(u8 id)
1389
{
1390 1391
	if (id == (u8)I915_ENGINE_CLASS_INVALID)
		return 0xffff0000u;
1392 1393

	GEM_BUG_ON(id >= 16);
1394
	return 0x10000u << id;
1395 1396
}

1397
static __always_inline u32 __busy_write_id(u8 id)
1398
{
1399 1400
	/*
	 * The uABI guarantees an active writer is also amongst the read
1401 1402 1403 1404 1405 1406 1407
	 * engines. This would be true if we accessed the activity tracking
	 * under the lock, but as we perform the lookup of the object and
	 * its activity locklessly we can not guarantee that the last_write
	 * being active implies that we have set the same engine flag from
	 * last_read - hence we always set both read and write busy for
	 * last_write.
	 */
1408 1409
	if (id == (u8)I915_ENGINE_CLASS_INVALID)
		return 0xffffffffu;
1410 1411

	return (id + 1) | __busy_read_flag(id);
1412 1413
}

1414
static __always_inline unsigned int
1415
__busy_set_if_active(const struct dma_fence *fence, u32 (*flag)(u8 id))
1416
{
1417
	const struct i915_request *rq;
1418

1419 1420
	/*
	 * We have to check the current hw status of the fence as the uABI
1421 1422 1423
	 * guarantees forward progress. We could rely on the idle worker
	 * to eventually flush us, but to minimise latency just ask the
	 * hardware.
1424
	 *
1425
	 * Note we only report on the status of native fences.
1426
	 */
1427 1428 1429 1430
	if (!dma_fence_is_i915(fence))
		return 0;

	/* opencode to_request() in order to avoid const warnings */
1431
	rq = container_of(fence, const struct i915_request, fence);
1432
	if (i915_request_completed(rq))
1433 1434
		return 0;

1435 1436
	/* Beware type-expansion follies! */
	BUILD_BUG_ON(!typecheck(u8, rq->engine->uabi_class));
1437
	return flag(rq->engine->uabi_class);
1438 1439
}

1440
static __always_inline unsigned int
1441
busy_check_reader(const struct dma_fence *fence)
1442
{
1443
	return __busy_set_if_active(fence, __busy_read_flag);
1444 1445
}

1446
static __always_inline unsigned int
1447
busy_check_writer(const struct dma_fence *fence)
1448
{
1449 1450 1451 1452
	if (!fence)
		return 0;

	return __busy_set_if_active(fence, __busy_write_id);
1453 1454
}

1455 1456
int
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
1457
		    struct drm_file *file)
1458 1459
{
	struct drm_i915_gem_busy *args = data;
1460
	struct drm_i915_gem_object *obj;
1461 1462
	struct reservation_object_list *list;
	unsigned int seq;
1463
	int err;
1464

1465
	err = -ENOENT;
1466 1467
	rcu_read_lock();
	obj = i915_gem_object_lookup_rcu(file, args->handle);
1468
	if (!obj)
1469
		goto out;
1470

1471 1472
	/*
	 * A discrepancy here is that we do not report the status of
1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489
	 * non-i915 fences, i.e. even though we may report the object as idle,
	 * a call to set-domain may still stall waiting for foreign rendering.
	 * This also means that wait-ioctl may report an object as busy,
	 * where busy-ioctl considers it idle.
	 *
	 * We trade the ability to warn of foreign fences to report on which
	 * i915 engines are active for the object.
	 *
	 * Alternatively, we can trade that extra information on read/write
	 * activity with
	 *	args->busy =
	 *		!reservation_object_test_signaled_rcu(obj->resv, true);
	 * to report the overall busyness. This is what the wait-ioctl does.
	 *
	 */
retry:
	seq = raw_read_seqcount(&obj->resv->seq);
1490

1491 1492
	/* Translate the exclusive fence to the READ *and* WRITE engine */
	args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl));
1493

1494 1495 1496 1497
	/* Translate shared fences to READ set of engines */
	list = rcu_dereference(obj->resv->fence);
	if (list) {
		unsigned int shared_count = list->shared_count, i;
1498

1499 1500 1501 1502 1503 1504
		for (i = 0; i < shared_count; ++i) {
			struct dma_fence *fence =
				rcu_dereference(list->shared[i]);

			args->busy |= busy_check_reader(fence);
		}
1505
	}
1506

1507 1508 1509 1510
	if (args->busy && read_seqcount_retry(&obj->resv->seq, seq))
		goto retry;

	err = 0;
1511 1512 1513
out:
	rcu_read_unlock();
	return err;
1514 1515 1516 1517 1518 1519
}

int
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
			struct drm_file *file_priv)
{
1520
	return i915_gem_ring_throttle(dev, file_priv);
1521 1522
}

1523 1524 1525 1526
int
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
		       struct drm_file *file_priv)
{
1527
	struct drm_i915_private *dev_priv = to_i915(dev);
1528
	struct drm_i915_gem_madvise *args = data;
1529
	struct drm_i915_gem_object *obj;
1530
	int err;
1531 1532 1533 1534 1535 1536 1537 1538 1539

	switch (args->madv) {
	case I915_MADV_DONTNEED:
	case I915_MADV_WILLNEED:
	    break;
	default:
	    return -EINVAL;
	}

1540
	obj = i915_gem_object_lookup(file_priv, args->handle);
1541 1542 1543 1544 1545 1546
	if (!obj)
		return -ENOENT;

	err = mutex_lock_interruptible(&obj->mm.lock);
	if (err)
		goto out;
1547

1548
	if (i915_gem_object_has_pages(obj) &&
1549
	    i915_gem_object_is_tiled(obj) &&
1550
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
1551 1552
		if (obj->mm.madv == I915_MADV_WILLNEED) {
			GEM_BUG_ON(!obj->mm.quirked);
C
Chris Wilson 已提交
1553
			__i915_gem_object_unpin_pages(obj);
1554 1555 1556
			obj->mm.quirked = false;
		}
		if (args->madv == I915_MADV_WILLNEED) {
1557
			GEM_BUG_ON(obj->mm.quirked);
C
Chris Wilson 已提交
1558
			__i915_gem_object_pin_pages(obj);
1559 1560
			obj->mm.quirked = true;
		}
1561 1562
	}

C
Chris Wilson 已提交
1563 1564
	if (obj->mm.madv != __I915_MADV_PURGED)
		obj->mm.madv = args->madv;
1565

C
Chris Wilson 已提交
1566
	/* if the object is no longer attached, discard its backing storage */
1567 1568
	if (obj->mm.madv == I915_MADV_DONTNEED &&
	    !i915_gem_object_has_pages(obj))
1569
		i915_gem_object_truncate(obj);
1570

C
Chris Wilson 已提交
1571
	args->retained = obj->mm.madv != __I915_MADV_PURGED;
1572
	mutex_unlock(&obj->mm.lock);
C
Chris Wilson 已提交
1573

1574
out:
1575
	i915_gem_object_put(obj);
1576
	return err;
1577 1578
}

1579 1580
void i915_gem_sanitize(struct drm_i915_private *i915)
{
1581 1582
	intel_wakeref_t wakeref;

1583 1584
	GEM_TRACE("\n");

1585
	wakeref = intel_runtime_pm_get(i915);
1586
	intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL);
1587 1588 1589 1590 1591 1592 1593

	/*
	 * As we have just resumed the machine and woken the device up from
	 * deep PCI sleep (presumably D3_cold), assume the HW has been reset
	 * back to defaults, recovering from whatever wedged state we left it
	 * in and so worth trying to use the device once more.
	 */
1594
	if (i915_terminally_wedged(i915))
1595 1596
		i915_gem_unset_wedged(i915);

1597 1598 1599 1600 1601 1602
	/*
	 * If we inherit context state from the BIOS or earlier occupants
	 * of the GPU, the GPU may be in an inconsistent state when we
	 * try to take over. The only way to remove the earlier state
	 * is by resetting. However, resetting on earlier gen is tricky as
	 * it may impact the display and we are uncertain about the stability
1603
	 * of the reset, so this could be applied to even earlier gen.
1604
	 */
1605
	intel_gt_sanitize(i915, false);
1606

1607
	intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
1608
	intel_runtime_pm_put(i915, wakeref);
1609

1610
	mutex_lock(&i915->drm.struct_mutex);
1611 1612
	i915_gem_contexts_lost(i915);
	mutex_unlock(&i915->drm.struct_mutex);
1613 1614
}

1615
void i915_gem_init_swizzling(struct drm_i915_private *dev_priv)
1616
{
1617
	if (INTEL_GEN(dev_priv) < 5 ||
1618 1619 1620 1621 1622 1623
	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
		return;

	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
				 DISP_TILE_SURFACE_SWIZZLING);

1624
	if (IS_GEN(dev_priv, 5))
1625 1626
		return;

1627
	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
1628
	if (IS_GEN(dev_priv, 6))
1629
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
1630
	else if (IS_GEN(dev_priv, 7))
1631
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
1632
	else if (IS_GEN(dev_priv, 8))
B
Ben Widawsky 已提交
1633
		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
1634 1635
	else
		BUG();
1636
}
D
Daniel Vetter 已提交
1637

1638
static void init_unused_ring(struct drm_i915_private *dev_priv, u32 base)
1639 1640 1641 1642 1643 1644 1645
{
	I915_WRITE(RING_CTL(base), 0);
	I915_WRITE(RING_HEAD(base), 0);
	I915_WRITE(RING_TAIL(base), 0);
	I915_WRITE(RING_START(base), 0);
}

1646
static void init_unused_rings(struct drm_i915_private *dev_priv)
1647
{
1648 1649 1650 1651 1652 1653
	if (IS_I830(dev_priv)) {
		init_unused_ring(dev_priv, PRB1_BASE);
		init_unused_ring(dev_priv, SRB0_BASE);
		init_unused_ring(dev_priv, SRB1_BASE);
		init_unused_ring(dev_priv, SRB2_BASE);
		init_unused_ring(dev_priv, SRB3_BASE);
1654
	} else if (IS_GEN(dev_priv, 2)) {
1655 1656
		init_unused_ring(dev_priv, SRB0_BASE);
		init_unused_ring(dev_priv, SRB1_BASE);
1657
	} else if (IS_GEN(dev_priv, 3)) {
1658 1659
		init_unused_ring(dev_priv, PRB1_BASE);
		init_unused_ring(dev_priv, PRB2_BASE);
1660 1661 1662
	}
}

1663 1664
int i915_gem_init_hw(struct drm_i915_private *dev_priv)
{
C
Chris Wilson 已提交
1665
	int ret;
1666

1667 1668
	dev_priv->gt.last_init_time = ktime_get();

1669
	/* Double layer security blanket, see i915_gem_init() */
1670
	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
1671

1672
	if (HAS_EDRAM(dev_priv) && INTEL_GEN(dev_priv) < 9)
1673
		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
1674

1675
	if (IS_HASWELL(dev_priv))
1676
		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev_priv) ?
1677
			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
1678

1679
	/* Apply the GT workarounds... */
1680
	intel_gt_apply_workarounds(dev_priv);
1681 1682
	/* ...and determine whether they are sticking. */
	intel_gt_verify_workarounds(dev_priv, "init");
1683

1684
	i915_gem_init_swizzling(dev_priv);
1685

1686 1687 1688 1689 1690 1691
	/*
	 * At least 830 can leave some of the unused rings
	 * "active" (ie. head != tail) after resume which
	 * will prevent c3 entry. Makes sure all unused rings
	 * are totally idle.
	 */
1692
	init_unused_rings(dev_priv);
1693

1694
	BUG_ON(!dev_priv->kernel_context);
1695 1696
	ret = i915_terminally_wedged(dev_priv);
	if (ret)
1697
		goto out;
1698

1699
	ret = i915_ppgtt_init_hw(dev_priv);
1700
	if (ret) {
1701
		DRM_ERROR("Enabling PPGTT failed (%d)\n", ret);
1702 1703 1704
		goto out;
	}

1705 1706 1707 1708 1709 1710
	ret = intel_wopcm_init_hw(&dev_priv->wopcm);
	if (ret) {
		DRM_ERROR("Enabling WOPCM failed (%d)\n", ret);
		goto out;
	}

1711 1712
	/* We can't enable contexts until all firmware is loaded */
	ret = intel_uc_init_hw(dev_priv);
1713 1714
	if (ret) {
		DRM_ERROR("Enabling uc failed (%d)\n", ret);
1715
		goto out;
1716
	}
1717

1718
	intel_mocs_init_l3cc_table(dev_priv);
1719

1720
	/* Only when the HW is re-initialised, can we replay the requests */
1721
	ret = intel_engines_resume(dev_priv);
1722 1723
	if (ret)
		goto cleanup_uc;
1724

1725
	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
1726

1727
	intel_engines_set_scheduler_caps(dev_priv);
1728
	return 0;
1729 1730 1731

cleanup_uc:
	intel_uc_fini_hw(dev_priv);
1732
out:
1733
	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
1734 1735

	return ret;
1736 1737
}

1738 1739 1740
static int __intel_engines_record_defaults(struct drm_i915_private *i915)
{
	struct intel_engine_cs *engine;
1741 1742
	struct i915_gem_context *ctx;
	struct i915_gem_engines *e;
1743
	enum intel_engine_id id;
1744
	int err = 0;
1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758

	/*
	 * As we reset the gpu during very early sanitisation, the current
	 * register state on the GPU should reflect its defaults values.
	 * We load a context onto the hw (with restore-inhibit), then switch
	 * over to a second context to save that default register state. We
	 * can then prime every new context with that state so they all start
	 * from the same default HW values.
	 */

	ctx = i915_gem_context_create_kernel(i915, 0);
	if (IS_ERR(ctx))
		return PTR_ERR(ctx);

1759 1760
	e = i915_gem_context_lock_engines(ctx);

1761
	for_each_engine(engine, i915, id) {
1762
		struct intel_context *ce = e->engines[id];
1763
		struct i915_request *rq;
1764

1765
		rq = intel_context_create_request(ce);
1766 1767
		if (IS_ERR(rq)) {
			err = PTR_ERR(rq);
1768
			goto err_active;
1769 1770
		}

1771
		err = 0;
1772 1773
		if (rq->engine->init_context)
			err = rq->engine->init_context(rq);
1774

1775
		i915_request_add(rq);
1776 1777 1778 1779
		if (err)
			goto err_active;
	}

1780
	/* Flush the default context image to memory, and enable powersaving. */
1781
	if (!i915_gem_load_power_context(i915)) {
1782
		err = -EIO;
1783
		goto err_active;
1784
	}
1785 1786

	for_each_engine(engine, i915, id) {
1787 1788
		struct intel_context *ce = e->engines[id];
		struct i915_vma *state = ce->state;
1789
		void *vaddr;
1790 1791 1792 1793

		if (!state)
			continue;

1794
		GEM_BUG_ON(intel_context_is_pinned(ce));
1795

1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812
		/*
		 * As we will hold a reference to the logical state, it will
		 * not be torn down with the context, and importantly the
		 * object will hold onto its vma (making it possible for a
		 * stray GTT write to corrupt our defaults). Unmap the vma
		 * from the GTT to prevent such accidents and reclaim the
		 * space.
		 */
		err = i915_vma_unbind(state);
		if (err)
			goto err_active;

		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
		if (err)
			goto err_active;

		engine->default_state = i915_gem_object_get(state->obj);
1813 1814
		i915_gem_object_set_cache_coherency(engine->default_state,
						    I915_CACHE_LLC);
1815 1816 1817

		/* Check we can acquire the image of the context state */
		vaddr = i915_gem_object_pin_map(engine->default_state,
1818
						I915_MAP_FORCE_WB);
1819 1820 1821 1822 1823 1824
		if (IS_ERR(vaddr)) {
			err = PTR_ERR(vaddr);
			goto err_active;
		}

		i915_gem_object_unpin_map(engine->default_state);
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845
	}

	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
		unsigned int found = intel_engines_has_context_isolation(i915);

		/*
		 * Make sure that classes with multiple engine instances all
		 * share the same basic configuration.
		 */
		for_each_engine(engine, i915, id) {
			unsigned int bit = BIT(engine->uabi_class);
			unsigned int expected = engine->default_state ? bit : 0;

			if ((found & bit) != expected) {
				DRM_ERROR("mismatching default context state for class %d on engine %s\n",
					  engine->uabi_class, engine->name);
			}
		}
	}

out_ctx:
1846
	i915_gem_context_unlock_engines(ctx);
1847 1848 1849 1850 1851 1852 1853
	i915_gem_context_set_closed(ctx);
	i915_gem_context_put(ctx);
	return err;

err_active:
	/*
	 * If we have to abandon now, we expect the engines to be idle
1854 1855
	 * and ready to be torn-down. The quickest way we can accomplish
	 * this is by declaring ourselves wedged.
1856
	 */
1857
	i915_gem_set_wedged(i915);
1858 1859 1860
	goto out_ctx;
}

1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898
static int
i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
{
	struct drm_i915_gem_object *obj;
	struct i915_vma *vma;
	int ret;

	obj = i915_gem_object_create_stolen(i915, size);
	if (!obj)
		obj = i915_gem_object_create_internal(i915, size);
	if (IS_ERR(obj)) {
		DRM_ERROR("Failed to allocate scratch page\n");
		return PTR_ERR(obj);
	}

	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
	if (IS_ERR(vma)) {
		ret = PTR_ERR(vma);
		goto err_unref;
	}

	ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
	if (ret)
		goto err_unref;

	i915->gt.scratch = vma;
	return 0;

err_unref:
	i915_gem_object_put(obj);
	return ret;
}

static void i915_gem_fini_scratch(struct drm_i915_private *i915)
{
	i915_vma_unpin_and_release(&i915->gt.scratch, 0);
}

1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915
static int intel_engines_verify_workarounds(struct drm_i915_private *i915)
{
	struct intel_engine_cs *engine;
	enum intel_engine_id id;
	int err = 0;

	if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
		return 0;

	for_each_engine(engine, i915, id) {
		if (intel_engine_verify_workarounds(engine, "load"))
			err = -EIO;
	}

	return err;
}

1916
int i915_gem_init(struct drm_i915_private *dev_priv)
1917 1918 1919
{
	int ret;

1920 1921
	/* We need to fallback to 4K pages if host doesn't support huge gtt. */
	if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_huge_gtt(dev_priv))
1922 1923 1924
		mkwrite_device_info(dev_priv)->page_sizes =
			I915_GTT_PAGE_SIZE_4K;

1925
	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);
1926

1927 1928
	i915_timelines_init(dev_priv);

1929 1930 1931 1932
	ret = i915_gem_init_userptr(dev_priv);
	if (ret)
		return ret;

1933
	ret = intel_uc_init_misc(dev_priv);
1934 1935 1936
	if (ret)
		return ret;

1937
	ret = intel_wopcm_init(&dev_priv->wopcm);
1938
	if (ret)
1939
		goto err_uc_misc;
1940

1941 1942 1943 1944 1945 1946
	/* This is just a security blanket to placate dragons.
	 * On some systems, we very sporadically observe that the first TLBs
	 * used by the CS may be stale, despite us poking the TLB reset. If
	 * we hold the forcewake during initialisation these problems
	 * just magically go away.
	 */
1947
	mutex_lock(&dev_priv->drm.struct_mutex);
1948
	intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL);
1949

1950
	ret = i915_gem_init_ggtt(dev_priv);
1951 1952 1953 1954
	if (ret) {
		GEM_BUG_ON(ret == -EIO);
		goto err_unlock;
	}
1955

1956
	ret = i915_gem_init_scratch(dev_priv,
1957
				    IS_GEN(dev_priv, 2) ? SZ_256K : PAGE_SIZE);
1958 1959 1960 1961
	if (ret) {
		GEM_BUG_ON(ret == -EIO);
		goto err_ggtt;
	}
1962

1963 1964 1965 1966 1967 1968
	ret = intel_engines_setup(dev_priv);
	if (ret) {
		GEM_BUG_ON(ret == -EIO);
		goto err_unlock;
	}

1969 1970 1971 1972 1973 1974
	ret = i915_gem_contexts_init(dev_priv);
	if (ret) {
		GEM_BUG_ON(ret == -EIO);
		goto err_scratch;
	}

1975
	ret = intel_engines_init(dev_priv);
1976 1977 1978 1979
	if (ret) {
		GEM_BUG_ON(ret == -EIO);
		goto err_context;
	}
1980

1981 1982
	intel_init_gt_powersave(dev_priv);

1983
	ret = intel_uc_init(dev_priv);
1984
	if (ret)
1985
		goto err_pm;
1986

1987 1988 1989 1990
	ret = i915_gem_init_hw(dev_priv);
	if (ret)
		goto err_uc_init;

1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001
	/*
	 * Despite its name intel_init_clock_gating applies both display
	 * clock gating workarounds; GT mmio workarounds and the occasional
	 * GT power context workaround. Worse, sometimes it includes a context
	 * register workaround which we need to apply before we record the
	 * default HW state for all contexts.
	 *
	 * FIXME: break up the workarounds and apply them at the right time!
	 */
	intel_init_clock_gating(dev_priv);

2002 2003 2004 2005
	ret = intel_engines_verify_workarounds(dev_priv);
	if (ret)
		goto err_init_hw;

2006
	ret = __intel_engines_record_defaults(dev_priv);
2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
	if (ret)
		goto err_init_hw;

	if (i915_inject_load_failure()) {
		ret = -ENODEV;
		goto err_init_hw;
	}

	if (i915_inject_load_failure()) {
		ret = -EIO;
		goto err_init_hw;
	}

2020
	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031
	mutex_unlock(&dev_priv->drm.struct_mutex);

	return 0;

	/*
	 * Unwinding is complicated by that we want to handle -EIO to mean
	 * disable GPU submission but keep KMS alive. We want to mark the
	 * HW as irrevisibly wedged, but keep enough state around that the
	 * driver doesn't explode during runtime.
	 */
err_init_hw:
2032 2033
	mutex_unlock(&dev_priv->drm.struct_mutex);

2034
	i915_gem_set_wedged(dev_priv);
2035
	i915_gem_suspend(dev_priv);
2036 2037
	i915_gem_suspend_late(dev_priv);

2038 2039
	i915_gem_drain_workqueue(dev_priv);

2040
	mutex_lock(&dev_priv->drm.struct_mutex);
2041
	intel_uc_fini_hw(dev_priv);
2042 2043
err_uc_init:
	intel_uc_fini(dev_priv);
2044 2045 2046
err_pm:
	if (ret != -EIO) {
		intel_cleanup_gt_powersave(dev_priv);
2047
		intel_engines_cleanup(dev_priv);
2048 2049 2050 2051
	}
err_context:
	if (ret != -EIO)
		i915_gem_contexts_fini(dev_priv);
2052 2053
err_scratch:
	i915_gem_fini_scratch(dev_priv);
2054 2055
err_ggtt:
err_unlock:
2056
	intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
2057 2058
	mutex_unlock(&dev_priv->drm.struct_mutex);

2059
err_uc_misc:
2060
	intel_uc_fini_misc(dev_priv);
2061

2062
	if (ret != -EIO) {
2063
		i915_gem_cleanup_userptr(dev_priv);
2064 2065
		i915_timelines_fini(dev_priv);
	}
2066

2067
	if (ret == -EIO) {
2068 2069
		mutex_lock(&dev_priv->drm.struct_mutex);

2070 2071
		/*
		 * Allow engine initialisation to fail by marking the GPU as
2072 2073 2074
		 * wedged. But we only want to do this where the GPU is angry,
		 * for all other failure, such as an allocation failure, bail.
		 */
2075
		if (!i915_reset_failed(dev_priv)) {
2076 2077
			i915_load_error(dev_priv,
					"Failed to initialize GPU, declaring it wedged!\n");
2078 2079
			i915_gem_set_wedged(dev_priv);
		}
2080 2081 2082 2083 2084 2085 2086 2087

		/* Minimal basic recovery for KMS */
		ret = i915_ggtt_enable_hw(dev_priv);
		i915_gem_restore_gtt_mappings(dev_priv);
		i915_gem_restore_fences(dev_priv);
		intel_init_clock_gating(dev_priv);

		mutex_unlock(&dev_priv->drm.struct_mutex);
2088 2089
	}

2090
	i915_gem_drain_freed_objects(dev_priv);
2091
	return ret;
2092 2093
}

2094 2095
void i915_gem_fini(struct drm_i915_private *dev_priv)
{
2096 2097
	GEM_BUG_ON(dev_priv->gt.awake);

2098 2099
	intel_wakeref_auto_fini(&dev_priv->mm.userfault_wakeref);

2100
	i915_gem_suspend_late(dev_priv);
2101
	intel_disable_gt_powersave(dev_priv);
2102 2103 2104 2105 2106 2107 2108

	/* Flush any outstanding unpin_work. */
	i915_gem_drain_workqueue(dev_priv);

	mutex_lock(&dev_priv->drm.struct_mutex);
	intel_uc_fini_hw(dev_priv);
	intel_uc_fini(dev_priv);
2109
	intel_engines_cleanup(dev_priv);
2110
	i915_gem_contexts_fini(dev_priv);
2111
	i915_gem_fini_scratch(dev_priv);
2112 2113
	mutex_unlock(&dev_priv->drm.struct_mutex);

2114 2115
	intel_wa_list_free(&dev_priv->gt_wa_list);

2116 2117
	intel_cleanup_gt_powersave(dev_priv);

2118 2119
	intel_uc_fini_misc(dev_priv);
	i915_gem_cleanup_userptr(dev_priv);
2120
	i915_timelines_fini(dev_priv);
2121 2122 2123 2124 2125 2126

	i915_gem_drain_freed_objects(dev_priv);

	WARN_ON(!list_empty(&dev_priv->contexts.list));
}

2127 2128 2129 2130 2131
void i915_gem_init_mmio(struct drm_i915_private *i915)
{
	i915_gem_sanitize(i915);
}

2132 2133 2134
void
i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
{
2135
	int i;
2136

2137
	if (INTEL_GEN(dev_priv) >= 7 && !IS_VALLEYVIEW(dev_priv) &&
2138 2139
	    !IS_CHERRYVIEW(dev_priv))
		dev_priv->num_fence_regs = 32;
2140
	else if (INTEL_GEN(dev_priv) >= 4 ||
2141 2142
		 IS_I945G(dev_priv) || IS_I945GM(dev_priv) ||
		 IS_G33(dev_priv) || IS_PINEVIEW(dev_priv))
2143 2144 2145 2146
		dev_priv->num_fence_regs = 16;
	else
		dev_priv->num_fence_regs = 8;

2147
	if (intel_vgpu_active(dev_priv))
2148 2149 2150 2151
		dev_priv->num_fence_regs =
				I915_READ(vgtif_reg(avail_rs.fence_num));

	/* Initialize fence registers to zero */
2152 2153 2154 2155 2156 2157 2158
	for (i = 0; i < dev_priv->num_fence_regs; i++) {
		struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i];

		fence->i915 = dev_priv;
		fence->id = i;
		list_add_tail(&fence->link, &dev_priv->mm.fence_list);
	}
2159
	i915_gem_restore_fences(dev_priv);
2160

2161
	i915_gem_detect_bit_6_swizzle(dev_priv);
2162 2163
}

2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174
static void i915_gem_init__mm(struct drm_i915_private *i915)
{
	spin_lock_init(&i915->mm.object_stat_lock);
	spin_lock_init(&i915->mm.obj_lock);
	spin_lock_init(&i915->mm.free_lock);

	init_llist_head(&i915->mm.free_list);

	INIT_LIST_HEAD(&i915->mm.unbound_list);
	INIT_LIST_HEAD(&i915->mm.bound_list);
	INIT_LIST_HEAD(&i915->mm.fence_list);
2175

2176
	INIT_LIST_HEAD(&i915->mm.userfault_list);
2177
	intel_wakeref_auto_init(&i915->mm.userfault_wakeref, i915);
2178

2179
	i915_gem_init__objects(i915);
2180 2181
}

2182
int i915_gem_init_early(struct drm_i915_private *dev_priv)
2183
{
2184
	int err;
2185

2186 2187
	intel_gt_pm_init(dev_priv);

2188
	INIT_LIST_HEAD(&dev_priv->gt.active_rings);
2189
	INIT_LIST_HEAD(&dev_priv->gt.closed_vma);
2190

2191
	i915_gem_init__mm(dev_priv);
2192
	i915_gem_init__pm(dev_priv);
2193

2194
	init_waitqueue_head(&dev_priv->gpu_error.wait_queue);
2195
	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
2196
	mutex_init(&dev_priv->gpu_error.wedge_mutex);
2197
	init_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
2198

2199 2200
	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);

2201
	spin_lock_init(&dev_priv->fb_tracking.lock);
2202

M
Matthew Auld 已提交
2203 2204 2205 2206
	err = i915_gemfs_init(dev_priv);
	if (err)
		DRM_NOTE("Unable to create a private tmpfs mount, hugepage support will be disabled(%d).\n", err);

2207
	return 0;
2208
}
2209

2210
void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
2211
{
2212
	i915_gem_drain_freed_objects(dev_priv);
2213 2214
	GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
	GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
2215
	WARN_ON(dev_priv->mm.object_count);
2216

2217 2218
	cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);

M
Matthew Auld 已提交
2219
	i915_gemfs_fini(dev_priv);
2220 2221
}

2222 2223
int i915_gem_freeze(struct drm_i915_private *dev_priv)
{
2224 2225 2226
	/* Discard all purgeable objects, let userspace recover those as
	 * required after resuming.
	 */
2227 2228 2229 2230 2231
	i915_gem_shrink_all(dev_priv);

	return 0;
}

2232
int i915_gem_freeze_late(struct drm_i915_private *i915)
2233 2234
{
	struct drm_i915_gem_object *obj;
2235
	struct list_head *phases[] = {
2236 2237
		&i915->mm.unbound_list,
		&i915->mm.bound_list,
2238
		NULL
2239
	}, **phase;
2240

2241 2242
	/*
	 * Called just before we write the hibernation image.
2243 2244 2245 2246 2247 2248 2249 2250
	 *
	 * We need to update the domain tracking to reflect that the CPU
	 * will be accessing all the pages to create and restore from the
	 * hibernation, and so upon restoration those pages will be in the
	 * CPU domain.
	 *
	 * To make sure the hibernation image contains the latest state,
	 * we update that state just before writing out the image.
2251 2252
	 *
	 * To try and reduce the hibernation image, we manually shrink
2253
	 * the objects as well, see i915_gem_freeze()
2254 2255
	 */

2256 2257
	i915_gem_shrink(i915, -1UL, NULL, I915_SHRINK_UNBOUND);
	i915_gem_drain_freed_objects(i915);
2258

2259 2260 2261 2262
	mutex_lock(&i915->drm.struct_mutex);
	for (phase = phases; *phase; phase++) {
		list_for_each_entry(obj, *phase, mm.link)
			WARN_ON(i915_gem_object_set_to_cpu_domain(obj, true));
2263
	}
2264
	mutex_unlock(&i915->drm.struct_mutex);
2265 2266 2267 2268

	return 0;
}

2269
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
2270
{
2271
	struct drm_i915_file_private *file_priv = file->driver_priv;
2272
	struct i915_request *request;
2273 2274 2275 2276 2277

	/* Clean up our request list when the client is going away, so that
	 * later retire_requests won't dereference our soon-to-be-gone
	 * file_priv.
	 */
2278
	spin_lock(&file_priv->mm.lock);
2279
	list_for_each_entry(request, &file_priv->mm.request_list, client_link)
2280
		request->file_priv = NULL;
2281
	spin_unlock(&file_priv->mm.lock);
2282 2283
}

2284
int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
2285 2286
{
	struct drm_i915_file_private *file_priv;
2287
	int ret;
2288

2289
	DRM_DEBUG("\n");
2290 2291 2292 2293 2294 2295

	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
	if (!file_priv)
		return -ENOMEM;

	file->driver_priv = file_priv;
2296
	file_priv->dev_priv = i915;
2297
	file_priv->file = file;
2298 2299 2300 2301

	spin_lock_init(&file_priv->mm.lock);
	INIT_LIST_HEAD(&file_priv->mm.request_list);

2302
	file_priv->bsd_engine = -1;
2303
	file_priv->hang_timestamp = jiffies;
2304

2305
	ret = i915_gem_context_open(i915, file);
2306 2307
	if (ret)
		kfree(file_priv);
2308

2309
	return ret;
2310 2311
}

2312 2313
/**
 * i915_gem_track_fb - update frontbuffer tracking
2314 2315 2316
 * @old: current GEM buffer for the frontbuffer slots
 * @new: new GEM buffer for the frontbuffer slots
 * @frontbuffer_bits: bitmask of frontbuffer slots
2317 2318 2319 2320
 *
 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
 * from @old and setting them in @new. Both @old and @new can be NULL.
 */
2321 2322 2323 2324
void i915_gem_track_fb(struct drm_i915_gem_object *old,
		       struct drm_i915_gem_object *new,
		       unsigned frontbuffer_bits)
{
2325 2326 2327 2328 2329 2330 2331
	/* Control of individual bits within the mask are guarded by
	 * the owning plane->mutex, i.e. we can never see concurrent
	 * manipulation of individual bits. But since the bitfield as a whole
	 * is updated using RMW, we need to use atomics in order to update
	 * the bits.
	 */
	BUILD_BUG_ON(INTEL_FRONTBUFFER_BITS_PER_PIPE * I915_MAX_PIPES >
2332
		     BITS_PER_TYPE(atomic_t));
2333

2334
	if (old) {
2335 2336
		WARN_ON(!(atomic_read(&old->frontbuffer_bits) & frontbuffer_bits));
		atomic_andnot(frontbuffer_bits, &old->frontbuffer_bits);
2337 2338 2339
	}

	if (new) {
2340 2341
		WARN_ON(atomic_read(&new->frontbuffer_bits) & frontbuffer_bits);
		atomic_or(frontbuffer_bits, &new->frontbuffer_bits);
2342 2343 2344
	}
}

2345
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2346
#include "selftests/mock_gem_device.c"
2347
#include "selftests/i915_gem.c"
2348
#endif