i915_request.c 42.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright © 2008-2015 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

25
#include <linux/prefetch.h>
26
#include <linux/dma-fence-array.h>
27 28
#include <linux/sched.h>
#include <linux/sched/clock.h>
29
#include <linux/sched/signal.h>
30

31 32
#include "i915_drv.h"

33
static const char *i915_fence_get_driver_name(struct dma_fence *fence)
34 35 36 37
{
	return "i915";
}

38
static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
39
{
40 41
	/*
	 * The timeline struct (as part of the ppgtt underneath a context)
42 43 44 45 46 47 48 49 50 51
	 * may be freed when the request is no longer in use by the GPU.
	 * We could extend the life of a context to beyond that of all
	 * fences, possibly keeping the hw resource around indefinitely,
	 * or we just give them a false name. Since
	 * dma_fence_ops.get_timeline_name is a debug feature, the occasional
	 * lie seems justifiable.
	 */
	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
		return "signaled";

52
	return to_request(fence)->timeline->name;
53 54
}

55
static bool i915_fence_signaled(struct dma_fence *fence)
56
{
57
	return i915_request_completed(to_request(fence));
58 59
}

60
static bool i915_fence_enable_signaling(struct dma_fence *fence)
61
{
62
	return intel_engine_enable_signaling(to_request(fence), true);
63 64
}

65
static signed long i915_fence_wait(struct dma_fence *fence,
66
				   bool interruptible,
67
				   signed long timeout)
68
{
69
	return i915_request_wait(to_request(fence), interruptible, timeout);
70 71
}

72
static void i915_fence_release(struct dma_fence *fence)
73
{
74
	struct i915_request *rq = to_request(fence);
75

76 77
	/*
	 * The request is put onto a RCU freelist (i.e. the address
78 79 80 81 82
	 * is immediately reused), mark the fences as being freed now.
	 * Otherwise the debugobjects for the fences are only marked as
	 * freed when the slab cache itself is freed, and so we would get
	 * caught trying to reuse dead objects.
	 */
83
	i915_sw_fence_fini(&rq->submit);
84

85
	kmem_cache_free(rq->i915->requests, rq);
86 87
}

88
const struct dma_fence_ops i915_fence_ops = {
89 90 91 92 93 94 95 96
	.get_driver_name = i915_fence_get_driver_name,
	.get_timeline_name = i915_fence_get_timeline_name,
	.enable_signaling = i915_fence_enable_signaling,
	.signaled = i915_fence_signaled,
	.wait = i915_fence_wait,
	.release = i915_fence_release,
};

97
static inline void
98
i915_request_remove_from_client(struct i915_request *request)
99
{
100
	struct drm_i915_file_private *file_priv;
101

102
	file_priv = request->file_priv;
103 104 105 106
	if (!file_priv)
		return;

	spin_lock(&file_priv->mm.lock);
107 108 109 110
	if (request->file_priv) {
		list_del(&request->client_link);
		request->file_priv = NULL;
	}
111 112 113
	spin_unlock(&file_priv->mm.lock);
}

114 115 116 117 118 119 120 121 122 123 124 125 126 127
static struct i915_dependency *
i915_dependency_alloc(struct drm_i915_private *i915)
{
	return kmem_cache_alloc(i915->dependencies, GFP_KERNEL);
}

static void
i915_dependency_free(struct drm_i915_private *i915,
		     struct i915_dependency *dep)
{
	kmem_cache_free(i915->dependencies, dep);
}

static void
128 129 130 131
__i915_sched_node_add_dependency(struct i915_sched_node *node,
				 struct i915_sched_node *signal,
				 struct i915_dependency *dep,
				 unsigned long flags)
132
{
133
	INIT_LIST_HEAD(&dep->dfs_link);
134
	list_add(&dep->wait_link, &signal->waiters_list);
135
	list_add(&dep->signal_link, &node->signalers_list);
136 137 138 139 140
	dep->signaler = signal;
	dep->flags = flags;
}

static int
141 142 143
i915_sched_node_add_dependency(struct drm_i915_private *i915,
			       struct i915_sched_node *node,
			       struct i915_sched_node *signal)
144 145 146 147 148 149 150
{
	struct i915_dependency *dep;

	dep = i915_dependency_alloc(i915);
	if (!dep)
		return -ENOMEM;

151 152
	__i915_sched_node_add_dependency(node, signal, dep,
					 I915_DEPENDENCY_ALLOC);
153 154 155 156
	return 0;
}

static void
157 158
i915_sched_node_fini(struct drm_i915_private *i915,
		     struct i915_sched_node *node)
159
{
160
	struct i915_dependency *dep, *tmp;
161

162
	GEM_BUG_ON(!list_empty(&node->link));
163

164 165
	/*
	 * Everyone we depended upon (the fences we wait to be signaled)
166 167 168 169
	 * should retire before us and remove themselves from our list.
	 * However, retirement is run independently on each timeline and
	 * so we may be called out-of-order.
	 */
170 171
	list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
		GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler));
172 173
		GEM_BUG_ON(!list_empty(&dep->dfs_link));

174 175 176 177 178 179
		list_del(&dep->wait_link);
		if (dep->flags & I915_DEPENDENCY_ALLOC)
			i915_dependency_free(i915, dep);
	}

	/* Remove ourselves from everyone who depends upon us */
180 181
	list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) {
		GEM_BUG_ON(dep->signaler != node);
182 183
		GEM_BUG_ON(!list_empty(&dep->dfs_link));

184 185 186 187 188 189 190
		list_del(&dep->signal_link);
		if (dep->flags & I915_DEPENDENCY_ALLOC)
			i915_dependency_free(i915, dep);
	}
}

static void
191
i915_sched_node_init(struct i915_sched_node *node)
192
{
193 194 195
	INIT_LIST_HEAD(&node->signalers_list);
	INIT_LIST_HEAD(&node->waiters_list);
	INIT_LIST_HEAD(&node->link);
196
	node->attr.priority = I915_PRIORITY_INVALID;
197 198
}

199 200 201
static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
{
	struct intel_engine_cs *engine;
202
	struct i915_timeline *timeline;
203 204 205 206 207 208 209 210 211 212
	enum intel_engine_id id;
	int ret;

	/* Carefully retire all requests without writing to the rings */
	ret = i915_gem_wait_for_idle(i915,
				     I915_WAIT_INTERRUPTIBLE |
				     I915_WAIT_LOCKED);
	if (ret)
		return ret;

213 214
	GEM_BUG_ON(i915->gt.active_requests);

215 216
	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
	for_each_engine(engine, i915, id) {
217 218
		GEM_TRACE("%s seqno %d (current %d) -> %d\n",
			  engine->name,
219
			  engine->timeline.seqno,
220 221
			  intel_engine_get_seqno(engine),
			  seqno);
222

223
		if (!i915_seqno_passed(seqno, engine->timeline.seqno)) {
224 225
			/* Flush any waiters before we reuse the seqno */
			intel_engine_disarm_breadcrumbs(engine);
226
			intel_engine_init_hangcheck(engine);
227
			GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals));
228 229
		}

230 231
		/* Check we are idle before we fiddle with hw state! */
		GEM_BUG_ON(!intel_engine_is_idle(engine));
232
		GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request));
233

234 235
		/* Finally reset hw state */
		intel_engine_init_global_seqno(engine, seqno);
236
		engine->timeline.seqno = seqno;
237 238
	}

239 240 241
	list_for_each_entry(timeline, &i915->gt.timelines, link)
		memset(timeline->global_sync, 0, sizeof(timeline->global_sync));

242
	i915->gt.request_serial = seqno;
243

244 245 246 247 248
	return 0;
}

int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno)
{
249
	struct drm_i915_private *i915 = to_i915(dev);
250

251
	lockdep_assert_held(&i915->drm.struct_mutex);
252 253 254 255

	if (seqno == 0)
		return -EINVAL;

256 257
	/* HWS page needs to be set less than what we will inject to ring */
	return reset_all_global_seqno(i915, seqno - 1);
258 259
}

260
static int reserve_gt(struct drm_i915_private *i915)
261
{
262 263
	int ret;

264 265 266 267 268 269 270 271 272
	/*
	 * Reservation is fine until we may need to wrap around
	 *
	 * By incrementing the serial for every request, we know that no
	 * individual engine may exceed that serial (as each is reset to 0
	 * on any wrap). This protects even the most pessimistic of migrations
	 * of every request from all engines onto just one.
	 */
	while (unlikely(++i915->gt.request_serial == 0)) {
273 274
		ret = reset_all_global_seqno(i915, 0);
		if (ret) {
275
			i915->gt.request_serial--;
276 277
			return ret;
		}
278 279
	}

280
	if (!i915->gt.active_requests++)
281
		i915_gem_unpark(i915);
282

283 284 285
	return 0;
}

286
static void unreserve_gt(struct drm_i915_private *i915)
287
{
288
	GEM_BUG_ON(!i915->gt.active_requests);
289 290
	if (!--i915->gt.active_requests)
		i915_gem_park(i915);
291 292
}

293
void i915_gem_retire_noop(struct i915_gem_active *active,
294
			  struct i915_request *request)
295 296 297 298
{
	/* Space left intentionally blank */
}

299
static void advance_ring(struct i915_request *request)
300
{
301
	struct intel_ring *ring = request->ring;
302 303
	unsigned int tail;

304 305
	/*
	 * We know the GPU must have read the request to have
306 307 308 309 310 311 312
	 * sent us the seqno + interrupt, so use the position
	 * of tail of the request to update the last known position
	 * of the GPU head.
	 *
	 * Note this requires that we are always called in request
	 * completion order.
	 */
313 314
	GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list));
	if (list_is_last(&request->ring_link, &ring->request_list)) {
315 316
		/*
		 * We may race here with execlists resubmitting this request
317 318 319 320 321 322
		 * as we retire it. The resubmission will move the ring->tail
		 * forwards (to request->wa_tail). We either read the
		 * current value that was written to hw, or the value that
		 * is just about to be. Either works, if we miss the last two
		 * noops - they are safe to be replayed on a reset.
		 */
323
		tail = READ_ONCE(request->tail);
324
		list_del(&ring->active_link);
325
	} else {
326
		tail = request->postfix;
327
	}
328
	list_del_init(&request->ring_link);
329

330
	ring->head = tail;
331 332
}

333
static void free_capture_list(struct i915_request *request)
334
{
335
	struct i915_capture_list *capture;
336 337 338

	capture = request->capture_list;
	while (capture) {
339
		struct i915_capture_list *next = capture->next;
340 341 342 343 344 345

		kfree(capture);
		capture = next;
	}
}

346 347 348 349 350 351 352 353 354 355 356 357 358
static void __retire_engine_request(struct intel_engine_cs *engine,
				    struct i915_request *rq)
{
	GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n",
		  __func__, engine->name,
		  rq->fence.context, rq->fence.seqno,
		  rq->global_seqno,
		  intel_engine_get_seqno(engine));

	GEM_BUG_ON(!i915_request_completed(rq));

	local_irq_disable();

359 360
	spin_lock(&engine->timeline.lock);
	GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests));
361
	list_del_init(&rq->link);
362
	spin_unlock(&engine->timeline.lock);
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386

	spin_lock(&rq->lock);
	if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags))
		dma_fence_signal_locked(&rq->fence);
	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags))
		intel_engine_cancel_signaling(rq);
	if (rq->waitboost) {
		GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters));
		atomic_dec(&rq->i915->gt_pm.rps.num_waiters);
	}
	spin_unlock(&rq->lock);

	local_irq_enable();

	/*
	 * The backing object for the context is done after switching to the
	 * *next* context. Therefore we cannot retire the previous context until
	 * the next context has already started running. However, since we
	 * cannot take the required locks at i915_request_submit() we
	 * defer the unpinning of the active context to now, retirement of
	 * the subsequent request.
	 */
	if (engine->last_retired_context)
		intel_context_unpin(engine->last_retired_context, engine);
C
Chris Wilson 已提交
387
	engine->last_retired_context = rq->gem_context;
388 389 390 391 392 393 394 395 396 397 398
}

static void __retire_engine_upto(struct intel_engine_cs *engine,
				 struct i915_request *rq)
{
	struct i915_request *tmp;

	if (list_empty(&rq->link))
		return;

	do {
399
		tmp = list_first_entry(&engine->timeline.requests,
400 401 402 403 404 405 406
				       typeof(*tmp), link);

		GEM_BUG_ON(tmp->engine != engine);
		__retire_engine_request(engine, tmp);
	} while (tmp != rq);
}

407
static void i915_request_retire(struct i915_request *request)
408
{
409 410
	struct i915_gem_active *active, *next;

411
	GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
412
		  request->engine->name,
413
		  request->fence.context, request->fence.seqno,
414
		  request->global_seqno,
415
		  intel_engine_get_seqno(request->engine));
416

417
	lockdep_assert_held(&request->i915->drm.struct_mutex);
418
	GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit));
419
	GEM_BUG_ON(!i915_request_completed(request));
420

421
	trace_i915_request_retire(request);
C
Chris Wilson 已提交
422

423
	advance_ring(request);
424 425
	free_capture_list(request);

426 427
	/*
	 * Walk through the active list, calling retire on each. This allows
428 429 430 431 432 433 434 435 436
	 * objects to track their GPU activity and mark themselves as idle
	 * when their *last* active request is completed (updating state
	 * tracking lists for eviction, active references for GEM, etc).
	 *
	 * As the ->retire() may free the node, we decouple it first and
	 * pass along the auxiliary information (to avoid dereferencing
	 * the node after the callback).
	 */
	list_for_each_entry_safe(active, next, &request->active_list, link) {
437 438
		/*
		 * In microbenchmarks or focusing upon time inside the kernel,
439 440 441 442 443 444 445 446 447 448 449
		 * we may spend an inordinate amount of time simply handling
		 * the retirement of requests and processing their callbacks.
		 * Of which, this loop itself is particularly hot due to the
		 * cache misses when jumping around the list of i915_gem_active.
		 * So we try to keep this loop as streamlined as possible and
		 * also prefetch the next i915_gem_active to try and hide
		 * the likely cache miss.
		 */
		prefetchw(next);

		INIT_LIST_HEAD(&active->link);
450
		RCU_INIT_POINTER(active->request, NULL);
451 452 453 454

		active->retire(active, request);
	}

455
	i915_request_remove_from_client(request);
456

457
	/* Retirement decays the ban score as it is a sign of ctx progress */
C
Chris Wilson 已提交
458 459
	atomic_dec_if_positive(&request->gem_context->ban_score);
	intel_context_unpin(request->gem_context, request->engine);
460

461
	__retire_engine_upto(request->engine, request);
462

463 464
	unreserve_gt(request->i915);

465
	i915_sched_node_fini(request->i915, &request->sched);
466
	i915_request_put(request);
467 468
}

469
void i915_request_retire_upto(struct i915_request *rq)
470
{
471
	struct intel_ring *ring = rq->ring;
472
	struct i915_request *tmp;
473

474 475 476 477 478 479
	GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n",
		  rq->engine->name,
		  rq->fence.context, rq->fence.seqno,
		  rq->global_seqno,
		  intel_engine_get_seqno(rq->engine));

480 481
	lockdep_assert_held(&rq->i915->drm.struct_mutex);
	GEM_BUG_ON(!i915_request_completed(rq));
482

483
	if (list_empty(&rq->ring_link))
484
		return;
485 486

	do {
487 488
		tmp = list_first_entry(&ring->request_list,
				       typeof(*tmp), ring_link);
489

490 491
		i915_request_retire(tmp);
	} while (tmp != rq);
492 493
}

494
static u32 timeline_get_seqno(struct i915_timeline *tl)
495
{
496
	return ++tl->seqno;
497 498
}

499
static void move_to_timeline(struct i915_request *request,
500
			     struct i915_timeline *timeline)
501
{
502 503
	GEM_BUG_ON(request->timeline == &request->engine->timeline);
	lockdep_assert_held(&request->engine->timeline.lock);
504

505
	spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING);
506 507 508 509
	list_move_tail(&request->link, &timeline->requests);
	spin_unlock(&request->timeline->lock);
}

510
void __i915_request_submit(struct i915_request *request)
511
{
512
	struct intel_engine_cs *engine = request->engine;
513
	u32 seqno;
514

515
	GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n",
516
		  engine->name,
517
		  request->fence.context, request->fence.seqno,
518
		  engine->timeline.seqno + 1,
519
		  intel_engine_get_seqno(engine));
520

521
	GEM_BUG_ON(!irqs_disabled());
522
	lockdep_assert_held(&engine->timeline.lock);
523

524
	GEM_BUG_ON(request->global_seqno);
525

526
	seqno = timeline_get_seqno(&engine->timeline);
527 528 529 530 531 532 533
	GEM_BUG_ON(!seqno);
	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno));

	/* We may be recursing from the signal callback of another i915 fence */
	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
	request->global_seqno = seqno;
	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
534
		intel_engine_enable_signaling(request, false);
535 536
	spin_unlock(&request->lock);

C
Chris Wilson 已提交
537 538
	engine->emit_breadcrumb(request,
				request->ring->vaddr + request->postfix);
539

540
	/* Transfer from per-context onto the global per-engine timeline */
541
	move_to_timeline(request, &engine->timeline);
C
Chris Wilson 已提交
542

543
	trace_i915_request_execute(request);
544

545
	wake_up_all(&request->execute);
546 547
}

548
void i915_request_submit(struct i915_request *request)
549 550 551
{
	struct intel_engine_cs *engine = request->engine;
	unsigned long flags;
552

553
	/* Will be called from irq-context when using foreign fences. */
554
	spin_lock_irqsave(&engine->timeline.lock, flags);
555

556
	__i915_request_submit(request);
557

558
	spin_unlock_irqrestore(&engine->timeline.lock, flags);
559 560
}

561
void __i915_request_unsubmit(struct i915_request *request)
562
{
563
	struct intel_engine_cs *engine = request->engine;
564

565
	GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n",
566
		  engine->name,
567
		  request->fence.context, request->fence.seqno,
568 569
		  request->global_seqno,
		  intel_engine_get_seqno(engine));
570

571
	GEM_BUG_ON(!irqs_disabled());
572
	lockdep_assert_held(&engine->timeline.lock);
573

574 575
	/*
	 * Only unwind in reverse order, required so that the per-context list
576 577
	 * is kept in seqno/ring order.
	 */
578
	GEM_BUG_ON(!request->global_seqno);
579
	GEM_BUG_ON(request->global_seqno != engine->timeline.seqno);
580 581
	GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine),
				     request->global_seqno));
582
	engine->timeline.seqno--;
C
Chris Wilson 已提交
583

584 585 586 587 588 589 590 591
	/* We may be recursing from the signal callback of another i915 fence */
	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
	request->global_seqno = 0;
	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
		intel_engine_cancel_signaling(request);
	spin_unlock(&request->lock);

	/* Transfer back from the global per-engine timeline to per-context */
592
	move_to_timeline(request, request->timeline);
593

594 595
	/*
	 * We don't need to wake_up any waiters on request->execute, they
596
	 * will get woken by any other event or us re-adding this request
597
	 * to the engine timeline (__i915_request_submit()). The waiters
598 599 600 601 602
	 * should be quite adapt at finding that the request now has a new
	 * global_seqno to the one they went to sleep on.
	 */
}

603
void i915_request_unsubmit(struct i915_request *request)
604 605 606 607 608
{
	struct intel_engine_cs *engine = request->engine;
	unsigned long flags;

	/* Will be called from irq-context when using foreign fences. */
609
	spin_lock_irqsave(&engine->timeline.lock, flags);
610

611
	__i915_request_unsubmit(request);
612

613
	spin_unlock_irqrestore(&engine->timeline.lock, flags);
614 615
}

616
static int __i915_sw_fence_call
617
submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
618
{
619
	struct i915_request *request =
620 621 622 623
		container_of(fence, typeof(*request), submit);

	switch (state) {
	case FENCE_COMPLETE:
624
		trace_i915_request_submit(request);
625
		/*
626 627 628 629 630 631
		 * We need to serialize use of the submit_request() callback
		 * with its hotplugging performed during an emergency
		 * i915_gem_set_wedged().  We use the RCU mechanism to mark the
		 * critical section in order to force i915_gem_set_wedged() to
		 * wait until the submit_request() is completed before
		 * proceeding.
632 633
		 */
		rcu_read_lock();
634
		request->engine->submit_request(request);
635
		rcu_read_unlock();
636 637 638
		break;

	case FENCE_FREE:
639
		i915_request_put(request);
640 641 642
		break;
	}

643 644 645
	return NOTIFY_DONE;
}

646
/**
647
 * i915_request_alloc - allocate a request structure
648 649 650 651 652 653 654
 *
 * @engine: engine that we wish to issue the request on.
 * @ctx: context that the request will be associated with.
 *
 * Returns a pointer to the allocated request if successful,
 * or an error code if not.
 */
655 656
struct i915_request *
i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx)
657
{
658 659
	struct drm_i915_private *i915 = engine->i915;
	struct i915_request *rq;
660
	struct intel_ring *ring;
661 662
	int ret;

663
	lockdep_assert_held(&i915->drm.struct_mutex);
664

665 666 667 668 669
	/*
	 * Preempt contexts are reserved for exclusive use to inject a
	 * preemption context switch. They are never to be used for any trivial
	 * request!
	 */
670
	GEM_BUG_ON(ctx == i915->preempt_context);
671

672 673
	/*
	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
674
	 * EIO if the GPU is already wedged.
675
	 */
676
	if (i915_terminally_wedged(&i915->gpu_error))
677
		return ERR_PTR(-EIO);
678

679 680
	/*
	 * Pinning the contexts may generate requests in order to acquire
681 682 683
	 * GGTT space, so do this first before we reserve a seqno for
	 * ourselves.
	 */
684
	ring = intel_context_pin(ctx, engine);
685 686 687
	if (IS_ERR(ring))
		return ERR_CAST(ring);
	GEM_BUG_ON(!ring);
688

689
	ret = reserve_gt(i915);
690 691 692
	if (ret)
		goto err_unpin;

693 694 695 696
	ret = intel_ring_wait_for_space(ring, MIN_SPACE_FOR_ADD_REQUEST);
	if (ret)
		goto err_unreserve;

697
	/* Move our oldest request to the slab-cache (if not in use!) */
698 699 700
	rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link);
	if (!list_is_last(&rq->ring_link, &ring->request_list) &&
	    i915_request_completed(rq))
701
		i915_request_retire(rq);
702

703 704
	/*
	 * Beware: Dragons be flying overhead.
705 706 707 708
	 *
	 * We use RCU to look up requests in flight. The lookups may
	 * race with the request being allocated from the slab freelist.
	 * That is the request we are writing to here, may be in the process
709
	 * of being read by __i915_gem_active_get_rcu(). As such,
710 711
	 * we have to be very careful when overwriting the contents. During
	 * the RCU lookup, we change chase the request->engine pointer,
712
	 * read the request->global_seqno and increment the reference count.
713 714 715 716
	 *
	 * The reference count is incremented atomically. If it is zero,
	 * the lookup knows the request is unallocated and complete. Otherwise,
	 * it is either still in use, or has been reallocated and reset
717 718
	 * with dma_fence_init(). This increment is safe for release as we
	 * check that the request we have a reference to and matches the active
719 720 721 722 723 724 725 726 727 728 729 730 731
	 * request.
	 *
	 * Before we increment the refcount, we chase the request->engine
	 * pointer. We must not call kmem_cache_zalloc() or else we set
	 * that pointer to NULL and cause a crash during the lookup. If
	 * we see the request is completed (based on the value of the
	 * old engine and seqno), the lookup is complete and reports NULL.
	 * If we decide the request is not completed (new engine or seqno),
	 * then we grab a reference and double check that it is still the
	 * active request - which it won't be and restart the lookup.
	 *
	 * Do not use kmem_cache_zalloc() here!
	 */
732 733 734
	rq = kmem_cache_alloc(i915->requests,
			      GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
	if (unlikely(!rq)) {
735
		/* Ratelimit ourselves to prevent oom from malicious clients */
736
		ret = i915_gem_wait_for_idle(i915,
737 738 739 740 741
					     I915_WAIT_LOCKED |
					     I915_WAIT_INTERRUPTIBLE);
		if (ret)
			goto err_unreserve;

742 743 744 745 746 747 748 749
		/*
		 * We've forced the client to stall and catch up with whatever
		 * backlog there might have been. As we are assuming that we
		 * caused the mempressure, now is an opportune time to
		 * recover as much memory from the request pool as is possible.
		 * Having already penalized the client to stall, we spend
		 * a little extra time to re-optimise page allocation.
		 */
750
		kmem_cache_shrink(i915->requests);
751 752
		rcu_barrier(); /* Recover the TYPESAFE_BY_RCU pages */

753 754
		rq = kmem_cache_alloc(i915->requests, GFP_KERNEL);
		if (!rq) {
755 756 757
			ret = -ENOMEM;
			goto err_unreserve;
		}
758
	}
759

760 761 762
	INIT_LIST_HEAD(&rq->active_list);
	rq->i915 = i915;
	rq->engine = engine;
C
Chris Wilson 已提交
763
	rq->gem_context = ctx;
764 765
	rq->ring = ring;
	rq->timeline = ring->timeline;
766
	GEM_BUG_ON(rq->timeline == &engine->timeline);
767

768 769
	spin_lock_init(&rq->lock);
	dma_fence_init(&rq->fence,
770
		       &i915_fence_ops,
771 772 773
		       &rq->lock,
		       rq->timeline->fence_context,
		       timeline_get_seqno(rq->timeline));
774

775
	/* We bump the ref for the fence chain */
776 777
	i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify);
	init_waitqueue_head(&rq->execute);
778

779
	i915_sched_node_init(&rq->sched);
780

781
	/* No zalloc, must clear what we need by hand */
782 783 784 785 786 787
	rq->global_seqno = 0;
	rq->signaling.wait.seqno = 0;
	rq->file_priv = NULL;
	rq->batch = NULL;
	rq->capture_list = NULL;
	rq->waitboost = false;
788

789 790 791
	/*
	 * Reserve space in the ring buffer for all the commands required to
	 * eventually emit this request. This is to guarantee that the
792
	 * i915_request_add() call can't fail. Note that the reserve may need
793 794 795
	 * to be redone if the request is not actually submitted straight
	 * away, e.g. because a GPU scheduler has deferred it.
	 */
796 797
	rq->reserved_space = MIN_SPACE_FOR_ADD_REQUEST;
	GEM_BUG_ON(rq->reserved_space < engine->emit_breadcrumb_sz);
798

799 800
	/*
	 * Record the position of the start of the request so that
801 802 803 804
	 * should we detect the updated seqno part-way through the
	 * GPU processing the request, we never over-estimate the
	 * position of the head.
	 */
805
	rq->head = rq->ring->emit;
806

807
	/* Unconditionally invalidate GPU caches and TLBs. */
808
	ret = engine->emit_flush(rq, EMIT_INVALIDATE);
809
	if (ret)
810
		goto err_unwind;
811

812
	ret = engine->request_alloc(rq);
813 814
	if (ret)
		goto err_unwind;
815

816
	/* Keep a second pin for the dual retirement along engine and ring */
C
Chris Wilson 已提交
817
	__intel_context_pin(rq->gem_context, engine);
818

819
	/* Check that we didn't interrupt ourselves with a new request */
820 821
	GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno);
	return rq;
822

823
err_unwind:
824
	rq->ring->emit = rq->head;
825

826
	/* Make sure we didn't add ourselves to external state before freeing */
827
	GEM_BUG_ON(!list_empty(&rq->active_list));
828 829
	GEM_BUG_ON(!list_empty(&rq->sched.signalers_list));
	GEM_BUG_ON(!list_empty(&rq->sched.waiters_list));
830

831
	kmem_cache_free(i915->requests, rq);
832
err_unreserve:
833
	unreserve_gt(i915);
834
err_unpin:
835
	intel_context_unpin(ctx, engine);
836
	return ERR_PTR(ret);
837 838
}

839
static int
840
i915_request_await_request(struct i915_request *to, struct i915_request *from)
841
{
842
	int ret;
843 844

	GEM_BUG_ON(to == from);
845
	GEM_BUG_ON(to->timeline == from->timeline);
846

847
	if (i915_request_completed(from))
848 849
		return 0;

850
	if (to->engine->schedule) {
851 852 853
		ret = i915_sched_node_add_dependency(to->i915,
						     &to->sched,
						     &from->sched);
854 855 856 857
		if (ret < 0)
			return ret;
	}

858 859 860
	if (to->engine == from->engine) {
		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
						       &from->submit,
861
						       I915_FENCE_GFP);
862 863 864
		return ret < 0 ? ret : 0;
	}

865 866
	if (to->engine->semaphore.sync_to) {
		u32 seqno;
867

868
		GEM_BUG_ON(!from->engine->semaphore.signal);
869

870
		seqno = i915_request_global_seqno(from);
871
		if (!seqno)
872
			goto await_dma_fence;
873

874 875 876 877
		if (seqno <= to->timeline->global_sync[from->engine->id])
			return 0;

		trace_i915_gem_ring_sync_to(to, from);
878 879 880
		ret = to->engine->semaphore.sync_to(to, from);
		if (ret)
			return ret;
881 882

		to->timeline->global_sync[from->engine->id] = seqno;
883
		return 0;
884 885
	}

886 887 888
await_dma_fence:
	ret = i915_sw_fence_await_dma_fence(&to->submit,
					    &from->fence, 0,
889
					    I915_FENCE_GFP);
890
	return ret < 0 ? ret : 0;
891 892
}

893
int
894
i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
895
{
896 897
	struct dma_fence **child = &fence;
	unsigned int nchild = 1;
898 899
	int ret;

900 901
	/*
	 * Note that if the fence-array was created in signal-on-any mode,
902 903 904 905 906 907
	 * we should *not* decompose it into its individual fences. However,
	 * we don't currently store which mode the fence-array is operating
	 * in. Fortunately, the only user of signal-on-any is private to
	 * amdgpu and we should not see any incoming fence-array from
	 * sync-file being in signal-on-any mode.
	 */
908 909 910 911 912 913 914
	if (dma_fence_is_array(fence)) {
		struct dma_fence_array *array = to_dma_fence_array(fence);

		child = array->fences;
		nchild = array->num_fences;
		GEM_BUG_ON(!nchild);
	}
915

916 917 918 919
	do {
		fence = *child++;
		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
			continue;
920

921 922
		/*
		 * Requests on the same timeline are explicitly ordered, along
923
		 * with their dependencies, by i915_request_add() which ensures
924 925
		 * that requests are submitted in-order through each ring.
		 */
926
		if (fence->context == rq->fence.context)
927 928
			continue;

929
		/* Squash repeated waits to the same timelines */
930
		if (fence->context != rq->i915->mm.unordered_timeline &&
931
		    i915_timeline_sync_is_later(rq->timeline, fence))
932 933
			continue;

934
		if (dma_fence_is_i915(fence))
935
			ret = i915_request_await_request(rq, to_request(fence));
936
		else
937
			ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
938
							    I915_FENCE_TIMEOUT,
939
							    I915_FENCE_GFP);
940 941
		if (ret < 0)
			return ret;
942 943

		/* Record the latest fence used against each timeline */
944
		if (fence->context != rq->i915->mm.unordered_timeline)
945
			i915_timeline_sync_set(rq->timeline, fence);
946
	} while (--nchild);
947 948 949 950

	return 0;
}

951
/**
952
 * i915_request_await_object - set this request to (async) wait upon a bo
953 954
 * @to: request we are wishing to use
 * @obj: object which may be in use on another ring.
955
 * @write: whether the wait is on behalf of a writer
956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971
 *
 * This code is meant to abstract object synchronization with the GPU.
 * Conceptually we serialise writes between engines inside the GPU.
 * We only allow one engine to write into a buffer at any time, but
 * multiple readers. To ensure each has a coherent view of memory, we must:
 *
 * - If there is an outstanding write request to the object, the new
 *   request must wait for it to complete (either CPU or in hw, requests
 *   on the same ring will be naturally ordered).
 *
 * - If we are a write request (pending_write_domain is set), the new
 *   request must wait for outstanding read requests to complete.
 *
 * Returns 0 if successful, else propagates up the lower layer error.
 */
int
972 973 974
i915_request_await_object(struct i915_request *to,
			  struct drm_i915_gem_object *obj,
			  bool write)
975
{
976 977
	struct dma_fence *excl;
	int ret = 0;
978 979

	if (write) {
980 981 982 983 984 985 986 987 988
		struct dma_fence **shared;
		unsigned int count, i;

		ret = reservation_object_get_fences_rcu(obj->resv,
							&excl, &count, &shared);
		if (ret)
			return ret;

		for (i = 0; i < count; i++) {
989
			ret = i915_request_await_dma_fence(to, shared[i]);
990 991 992 993 994 995 996 997 998
			if (ret)
				break;

			dma_fence_put(shared[i]);
		}

		for (; i < count; i++)
			dma_fence_put(shared[i]);
		kfree(shared);
999
	} else {
1000
		excl = reservation_object_get_excl_rcu(obj->resv);
1001 1002
	}

1003 1004
	if (excl) {
		if (ret == 0)
1005
			ret = i915_request_await_dma_fence(to, excl);
1006

1007
		dma_fence_put(excl);
1008 1009
	}

1010
	return ret;
1011 1012
}

1013 1014 1015 1016 1017
/*
 * NB: This function is not allowed to fail. Doing so would mean the the
 * request is not being tracked for completion but the work itself is
 * going to happen on the hardware. This would be a Bad Thing(tm).
 */
1018
void __i915_request_add(struct i915_request *request, bool flush_caches)
1019
{
1020 1021
	struct intel_engine_cs *engine = request->engine;
	struct intel_ring *ring = request->ring;
1022
	struct i915_timeline *timeline = request->timeline;
1023
	struct i915_request *prev;
1024
	u32 *cs;
C
Chris Wilson 已提交
1025
	int err;
1026

1027 1028 1029
	GEM_TRACE("%s fence %llx:%d\n",
		  engine->name, request->fence.context, request->fence.seqno);

1030
	lockdep_assert_held(&request->i915->drm.struct_mutex);
1031
	trace_i915_request_add(request);
1032

1033 1034
	/*
	 * Make sure that no request gazumped us - if it was allocated after
1035
	 * our i915_request_alloc() and called __i915_request_add() before
1036 1037
	 * us, the timeline will hold its seqno which is later than ours.
	 */
1038
	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
1039

1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
	/*
	 * To ensure that this call will not fail, space for its emissions
	 * should already have been reserved in the ring buffer. Let the ring
	 * know that it is time to use that space up.
	 */
	request->reserved_space = 0;

	/*
	 * Emit any outstanding flushes - execbuf can fail to emit the flush
	 * after having emitted the batchbuffer command. Hence we need to fix
	 * things up similar to emitting the lazy request. The difference here
	 * is that the flush _must_ happen before the next request, no matter
	 * what.
	 */
	if (flush_caches) {
C
Chris Wilson 已提交
1055
		err = engine->emit_flush(request, EMIT_FLUSH);
1056

1057
		/* Not allowed to fail! */
C
Chris Wilson 已提交
1058
		WARN(err, "engine->emit_flush() failed: %d!\n", err);
1059 1060
	}

1061 1062
	/*
	 * Record the position of the start of the breadcrumb so that
1063 1064
	 * should we detect the updated seqno part-way through the
	 * GPU processing the request, we never over-estimate the
1065
	 * position of the ring's HEAD.
1066
	 */
1067 1068 1069
	cs = intel_ring_begin(request, engine->emit_breadcrumb_sz);
	GEM_BUG_ON(IS_ERR(cs));
	request->postfix = intel_ring_offset(request, cs);
1070

1071 1072
	/*
	 * Seal the request and mark it as pending execution. Note that
1073 1074 1075 1076
	 * we may inspect this state, without holding any locks, during
	 * hangcheck. Hence we apply the barrier to ensure that we do not
	 * see a more recent value in the hws than we are tracking.
	 */
1077

1078
	prev = i915_gem_active_raw(&timeline->last_request,
1079
				   &request->i915->drm.struct_mutex);
1080
	if (prev && !i915_request_completed(prev)) {
1081 1082
		i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
					     &request->submitq);
1083
		if (engine->schedule)
1084 1085 1086 1087
			__i915_sched_node_add_dependency(&request->sched,
							 &prev->sched,
							 &request->dep,
							 0);
1088
	}
1089

C
Chris Wilson 已提交
1090
	spin_lock_irq(&timeline->lock);
1091
	list_add_tail(&request->link, &timeline->requests);
C
Chris Wilson 已提交
1092 1093
	spin_unlock_irq(&timeline->lock);

1094
	GEM_BUG_ON(timeline->seqno != request->fence.seqno);
1095
	i915_gem_active_set(&timeline->last_request, request);
1096

1097
	list_add_tail(&request->ring_link, &ring->request_list);
1098 1099
	if (list_is_first(&request->ring_link, &ring->request_list))
		list_add(&ring->active_link, &request->i915->gt.active_rings);
1100
	request->emitted_jiffies = jiffies;
1101

1102 1103
	/*
	 * Let the backend know a new request has arrived that may need
1104 1105 1106 1107 1108 1109 1110 1111 1112
	 * to adjust the existing execution schedule due to a high priority
	 * request - i.e. we may want to preempt the current request in order
	 * to run a high priority dependency chain *before* we can execute this
	 * request.
	 *
	 * This is called before the request is ready to run so that we can
	 * decide whether to preempt the entire chain so that it is ready to
	 * run at the earliest possible convenience.
	 */
1113 1114
	local_bh_disable();
	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
1115
	if (engine->schedule)
C
Chris Wilson 已提交
1116
		engine->schedule(request, &request->gem_context->sched);
1117
	rcu_read_unlock();
1118 1119
	i915_sw_fence_commit(&request->submit);
	local_bh_enable(); /* Kick the execlists tasklet if just scheduled */
1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137

	/*
	 * In typical scenarios, we do not expect the previous request on
	 * the timeline to be still tracked by timeline->last_request if it
	 * has been completed. If the completed request is still here, that
	 * implies that request retirement is a long way behind submission,
	 * suggesting that we haven't been retiring frequently enough from
	 * the combination of retire-before-alloc, waiters and the background
	 * retirement worker. So if the last request on this timeline was
	 * already completed, do a catch up pass, flushing the retirement queue
	 * up to this client. Since we have now moved the heaviest operations
	 * during retirement onto secondary workers, such as freeing objects
	 * or contexts, retiring a bunch of requests is mostly list management
	 * (and cache misses), and so we should not be overly penalizing this
	 * client by performing excess work, though we may still performing
	 * work on behalf of others -- but instead we should benefit from
	 * improved resource management. (Well, that's the theory at least.)
	 */
1138 1139
	if (prev && i915_request_completed(prev))
		i915_request_retire_upto(prev);
1140 1141 1142 1143 1144 1145
}

static unsigned long local_clock_us(unsigned int *cpu)
{
	unsigned long t;

1146 1147
	/*
	 * Cheaply and approximately convert from nanoseconds to microseconds.
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174
	 * The result and subsequent calculations are also defined in the same
	 * approximate microseconds units. The principal source of timing
	 * error here is from the simple truncation.
	 *
	 * Note that local_clock() is only defined wrt to the current CPU;
	 * the comparisons are no longer valid if we switch CPUs. Instead of
	 * blocking preemption for the entire busywait, we can detect the CPU
	 * switch and use that as indicator of system load and a reason to
	 * stop busywaiting, see busywait_stop().
	 */
	*cpu = get_cpu();
	t = local_clock() >> 10;
	put_cpu();

	return t;
}

static bool busywait_stop(unsigned long timeout, unsigned int cpu)
{
	unsigned int this_cpu;

	if (time_after(local_clock_us(&this_cpu), timeout))
		return true;

	return this_cpu != cpu;
}

1175
static bool __i915_spin_request(const struct i915_request *rq,
1176
				u32 seqno, int state, unsigned long timeout_us)
1177
{
1178
	struct intel_engine_cs *engine = rq->engine;
1179
	unsigned int irq, cpu;
1180

1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
	GEM_BUG_ON(!seqno);

	/*
	 * Only wait for the request if we know it is likely to complete.
	 *
	 * We don't track the timestamps around requests, nor the average
	 * request length, so we do not have a good indicator that this
	 * request will complete within the timeout. What we do know is the
	 * order in which requests are executed by the engine and so we can
	 * tell if the request has started. If the request hasn't started yet,
	 * it is a fair assumption that it will not complete within our
	 * relatively short timeout.
	 */
	if (!i915_seqno_passed(intel_engine_get_seqno(engine), seqno - 1))
		return false;

1197 1198
	/*
	 * When waiting for high frequency requests, e.g. during synchronous
1199 1200 1201 1202 1203 1204 1205 1206 1207
	 * rendering split between the CPU and GPU, the finite amount of time
	 * required to set up the irq and wait upon it limits the response
	 * rate. By busywaiting on the request completion for a short while we
	 * can service the high frequency waits as quick as possible. However,
	 * if it is a slow request, we want to sleep as quickly as possible.
	 * The tradeoff between waiting and sleeping is roughly the time it
	 * takes to sleep on a request, on the order of a microsecond.
	 */

1208
	irq = atomic_read(&engine->irq_count);
1209 1210
	timeout_us += local_clock_us(&cpu);
	do {
1211
		if (i915_seqno_passed(intel_engine_get_seqno(engine), seqno))
1212
			return seqno == i915_request_global_seqno(rq);
1213

1214 1215
		/*
		 * Seqno are meant to be ordered *before* the interrupt. If
1216 1217 1218 1219 1220 1221 1222
		 * we see an interrupt without a corresponding seqno advance,
		 * assume we won't see one in the near future but require
		 * the engine->seqno_barrier() to fixup coherency.
		 */
		if (atomic_read(&engine->irq_count) != irq)
			break;

1223 1224 1225 1226 1227 1228
		if (signal_pending_state(state, current))
			break;

		if (busywait_stop(timeout_us, cpu))
			break;

1229
		cpu_relax();
1230 1231 1232 1233 1234
	} while (!need_resched());

	return false;
}

1235
static bool __i915_wait_request_check_and_reset(struct i915_request *request)
1236
{
1237 1238 1239
	struct i915_gpu_error *error = &request->i915->gpu_error;

	if (likely(!i915_reset_handoff(error)))
1240
		return false;
1241

1242
	__set_current_state(TASK_RUNNING);
1243
	i915_reset(request->i915, error->stalled_mask, error->reason);
1244
	return true;
1245 1246
}

1247
/**
1248
 * i915_request_wait - wait until execution of request has finished
1249
 * @rq: the request to wait upon
1250
 * @flags: how to wait
1251 1252
 * @timeout: how long to wait in jiffies
 *
1253
 * i915_request_wait() waits for the request to be completed, for a
1254 1255
 * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an
 * unbounded wait).
1256
 *
1257 1258 1259
 * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED
 * in via the flags, and vice versa if the struct_mutex is not held, the caller
 * must not specify that the wait is locked.
1260
 *
1261 1262 1263 1264
 * Returns the remaining time (in jiffies) if the request completed, which may
 * be zero or -ETIME if the request is unfinished after the timeout expires.
 * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is
 * pending before the request completes.
1265
 */
1266
long i915_request_wait(struct i915_request *rq,
1267 1268
		       unsigned int flags,
		       long timeout)
1269
{
1270 1271
	const int state = flags & I915_WAIT_INTERRUPTIBLE ?
		TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
1272
	wait_queue_head_t *errq = &rq->i915->gpu_error.wait_queue;
1273 1274
	DEFINE_WAIT_FUNC(reset, default_wake_function);
	DEFINE_WAIT_FUNC(exec, default_wake_function);
1275 1276 1277
	struct intel_wait wait;

	might_sleep();
1278
#if IS_ENABLED(CONFIG_LOCKDEP)
1279
	GEM_BUG_ON(debug_locks &&
1280
		   !!lockdep_is_held(&rq->i915->drm.struct_mutex) !=
1281 1282
		   !!(flags & I915_WAIT_LOCKED));
#endif
1283
	GEM_BUG_ON(timeout < 0);
1284

1285
	if (i915_request_completed(rq))
1286
		return timeout;
1287

1288 1289
	if (!timeout)
		return -ETIME;
1290

1291
	trace_i915_request_wait_begin(rq, flags);
1292

1293
	add_wait_queue(&rq->execute, &exec);
1294 1295 1296
	if (flags & I915_WAIT_LOCKED)
		add_wait_queue(errq, &reset);

1297
	intel_wait_init(&wait, rq);
1298

1299
restart:
1300 1301
	do {
		set_current_state(state);
1302
		if (intel_wait_update_request(&wait, rq))
1303
			break;
1304

1305
		if (flags & I915_WAIT_LOCKED &&
1306
		    __i915_wait_request_check_and_reset(rq))
1307
			continue;
1308

1309 1310
		if (signal_pending_state(state, current)) {
			timeout = -ERESTARTSYS;
1311
			goto complete;
1312
		}
1313

1314 1315 1316 1317
		if (!timeout) {
			timeout = -ETIME;
			goto complete;
		}
1318

1319 1320
		timeout = io_schedule_timeout(timeout);
	} while (1);
1321

1322
	GEM_BUG_ON(!intel_wait_has_seqno(&wait));
1323
	GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit));
1324

1325
	/* Optimistic short spin before touching IRQs */
1326
	if (__i915_spin_request(rq, wait.seqno, state, 5))
1327 1328 1329
		goto complete;

	set_current_state(state);
1330 1331 1332
	if (intel_engine_add_wait(rq->engine, &wait))
		/*
		 * In order to check that we haven't missed the interrupt
1333 1334 1335 1336 1337
		 * as we enabled it, we need to kick ourselves to do a
		 * coherent check on the seqno before we sleep.
		 */
		goto wakeup;

1338
	if (flags & I915_WAIT_LOCKED)
1339
		__i915_wait_request_check_and_reset(rq);
1340

1341 1342
	for (;;) {
		if (signal_pending_state(state, current)) {
1343
			timeout = -ERESTARTSYS;
1344 1345 1346
			break;
		}

1347 1348
		if (!timeout) {
			timeout = -ETIME;
1349 1350 1351
			break;
		}

1352 1353
		timeout = io_schedule_timeout(timeout);

1354
		if (intel_wait_complete(&wait) &&
1355
		    intel_wait_check_request(&wait, rq))
1356 1357 1358 1359 1360
			break;

		set_current_state(state);

wakeup:
1361 1362
		/*
		 * Carefully check if the request is complete, giving time
1363 1364 1365 1366
		 * for the seqno to be visible following the interrupt.
		 * We also have to check in case we are kicked by the GPU
		 * reset in order to drop the struct_mutex.
		 */
1367
		if (__i915_request_irq_complete(rq))
1368 1369
			break;

1370 1371
		/*
		 * If the GPU is hung, and we hold the lock, reset the GPU
1372 1373 1374 1375 1376 1377 1378 1379 1380 1381
		 * and then check for completion. On a full reset, the engine's
		 * HW seqno will be advanced passed us and we are complete.
		 * If we do a partial reset, we have to wait for the GPU to
		 * resume and update the breadcrumb.
		 *
		 * If we don't hold the mutex, we can just wait for the worker
		 * to come along and update the breadcrumb (either directly
		 * itself, or indirectly by recovering the GPU).
		 */
		if (flags & I915_WAIT_LOCKED &&
1382
		    __i915_wait_request_check_and_reset(rq))
1383 1384
			continue;

1385
		/* Only spin if we know the GPU is processing this request */
1386
		if (__i915_spin_request(rq, wait.seqno, state, 2))
1387
			break;
1388

1389 1390
		if (!intel_wait_check_request(&wait, rq)) {
			intel_engine_remove_wait(rq->engine, &wait);
1391 1392
			goto restart;
		}
1393 1394
	}

1395
	intel_engine_remove_wait(rq->engine, &wait);
1396
complete:
1397
	__set_current_state(TASK_RUNNING);
1398 1399
	if (flags & I915_WAIT_LOCKED)
		remove_wait_queue(errq, &reset);
1400 1401
	remove_wait_queue(&rq->execute, &exec);
	trace_i915_request_wait_end(rq);
1402

1403
	return timeout;
1404
}
1405

1406
static void ring_retire_requests(struct intel_ring *ring)
1407
{
1408
	struct i915_request *request, *next;
1409

1410
	list_for_each_entry_safe(request, next,
1411 1412
				 &ring->request_list, ring_link) {
		if (!i915_request_completed(request))
1413
			break;
1414

1415
		i915_request_retire(request);
1416
	}
1417 1418
}

1419
void i915_retire_requests(struct drm_i915_private *i915)
1420
{
1421
	struct intel_ring *ring, *tmp;
1422

1423
	lockdep_assert_held(&i915->drm.struct_mutex);
1424

1425
	if (!i915->gt.active_requests)
1426 1427
		return;

1428
	list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link)
1429
		ring_retire_requests(ring);
1430
}
1431 1432 1433

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_request.c"
1434
#include "selftests/i915_request.c"
1435
#endif