i915_request.h 26.7 KB
Newer Older
1
/*
2
 * Copyright © 2008-2018 Intel Corporation
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

25 26
#ifndef I915_REQUEST_H
#define I915_REQUEST_H
27

28
#include <linux/dma-fence.h>
29 30

#include "i915_gem.h"
31
#include "i915_scheduler.h"
32
#include "i915_sw_fence.h"
33

34 35
#include <uapi/drm/i915_drm.h>

J
Joonas Lahtinen 已提交
36 37
struct drm_file;
struct drm_i915_gem_object;
38
struct i915_request;
39
struct i915_timeline;
J
Joonas Lahtinen 已提交
40

41 42
struct i915_capture_list {
	struct i915_capture_list *next;
43 44 45
	struct i915_vma *vma;
};

46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
enum {
	/*
	 * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW.
	 *
	 * Set by __i915_request_submit() on handing over to HW, and cleared
	 * by __i915_request_unsubmit() if we preempt this request.
	 *
	 * Finally cleared for consistency on retiring the request, when
	 * we know the HW is no longer running this request.
	 *
	 * See i915_request_is_active()
	 */
	I915_FENCE_FLAG_ACTIVE = DMA_FENCE_FLAG_USER_BITS,

	/*
	 * I915_FENCE_FLAG_SIGNAL - this request is currently on signal_list
	 *
	 * Internal bookkeeping used by the breadcrumb code to track when
	 * a request is on the various signal_list.
	 */
	I915_FENCE_FLAG_SIGNAL,
};

69 70 71 72 73 74 75 76 77 78 79
/**
 * Request queue structure.
 *
 * The request queue allows us to note sequence numbers that have been emitted
 * and may be associated with active buffers to be retired.
 *
 * By keeping this list, we can avoid having to do questionable sequence
 * number comparisons on buffer last_read|write_seqno. It also allows an
 * emission time to be associated with the request for tracking how far ahead
 * of the GPU the submission is.
 *
80 81 82 83 84 85 86
 * When modifying this structure be very aware that we perform a lockless
 * RCU lookup of it that may race against reallocation of the struct
 * from the slab freelist. We intentionally do not zero the structure on
 * allocation so that the lookup can use the dangling pointers (and is
 * cogniscent that those pointers may be wrong). Instead, everything that
 * needs to be initialised must be done so explicitly.
 *
87
 * The requests are reference counted.
88
 */
89
struct i915_request {
90
	struct dma_fence fence;
91
	spinlock_t lock;
92 93 94 95 96 97 98 99 100 101 102

	/** On Which ring this request was generated */
	struct drm_i915_private *i915;

	/**
	 * Context and ring buffer related to this request
	 * Contexts are refcounted, so when this request is associated with a
	 * context, we must increment the context's refcount, to guarantee that
	 * it persists while any request is linked to it. Requests themselves
	 * are also refcounted, so the request will only be freed when the last
	 * reference to it is dismissed, and the code in
103
	 * i915_request_free() will then decrement the refcount on the
104 105
	 * context.
	 */
C
Chris Wilson 已提交
106
	struct i915_gem_context *gem_context;
107
	struct intel_engine_cs *engine;
108
	struct intel_context *hw_context;
109
	struct intel_ring *ring;
110
	struct i915_timeline *timeline;
111
	struct list_head signal_link;
112

113 114 115 116 117 118 119 120
	/*
	 * The rcu epoch of when this request was allocated. Used to judiciously
	 * apply backpressure on future allocations to ensure that under
	 * mempressure there is sufficient RCU ticks for us to reclaim our
	 * RCU protected slabs.
	 */
	unsigned long rcustate;

121 122
	/*
	 * Fences for the various phases in the request's lifetime.
123 124 125 126 127
	 *
	 * The submit fence is used to await upon all of the request's
	 * dependencies. When it is signaled, the request is ready to run.
	 * It is used by the driver to then queue the request for execution.
	 */
128
	struct i915_sw_fence submit;
129
	wait_queue_entry_t submitq;
130

131 132
	/*
	 * A list of everyone we wait upon, and everyone who waits upon us.
133 134 135 136 137 138 139
	 * Even though we will not be submitted to the hardware before the
	 * submit fence is signaled (it waits for all external events as well
	 * as our own requests), the scheduler still needs to know the
	 * dependency tree for the lifetime of the request (from execbuf
	 * to retirement), i.e. bidirectional dependency information for the
	 * request not tied to individual fences.
	 */
140
	struct i915_sched_node sched;
141 142
	struct i915_dependency dep;

143 144 145 146 147 148 149
	/*
	 * A convenience pointer to the current breadcrumb value stored in
	 * the HW status page (or our timeline's local equivalent). The full
	 * path would be rq->hw_context->ring->timeline->hwsp_seqno.
	 */
	const u32 *hwsp_seqno;

150 151
	/**
	 * GEM sequence number associated with this request on the
152 153 154 155
	 * global execution timeline. It is zero when the request is not
	 * on the HW queue (i.e. not on the engine timeline list).
	 * Its value is guarded by the timeline spinlock.
	 */
156 157
	u32 global_seqno;

158
	/** Position in the ring of the start of the request */
159 160
	u32 head;

161 162 163
	/** Position in the ring of the start of the user packets */
	u32 infix;

164
	/**
165 166 167
	 * Position in the ring of the start of the postfix.
	 * This is required to calculate the maximum available ring space
	 * without overwriting the postfix.
168 169 170
	 */
	u32 postfix;

171
	/** Position in the ring of the end of the whole request */
172 173
	u32 tail;

174 175 176 177
	/** Position in the ring of the end of any workarounds after the tail */
	u32 wa_tail;

	/** Preallocate space in the ring for the emitting the request */
178 179 180 181 182
	u32 reserved_space;

	/** Batch buffer related to this request if any (used for
	 * error state dump only).
	 */
C
Chris Wilson 已提交
183
	struct i915_vma *batch;
184 185
	/**
	 * Additional buffers requested by userspace to be captured upon
186 187 188 189
	 * a GPU hang. The vma/obj on this list are protected by their
	 * active reference - all objects on this list must also be
	 * on the active_list (of their final request).
	 */
190
	struct i915_capture_list *capture_list;
191
	struct list_head active_list;
192 193 194 195

	/** Time at which this request was emitted, in jiffies. */
	unsigned long emitted_jiffies;

196 197
	bool waitboost;

198 199
	/** engine->request_list entry for this request */
	struct list_head link;
200

201 202 203
	/** ring->request_list entry for this request */
	struct list_head ring_link;

204 205
	struct drm_i915_file_private *file_priv;
	/** file_priv list entry for this request */
206
	struct list_head client_link;
207 208
};

209 210
#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)

211
extern const struct dma_fence_ops i915_fence_ops;
212

213
static inline bool dma_fence_is_i915(const struct dma_fence *fence)
214 215 216 217
{
	return fence->ops == &i915_fence_ops;
}

218 219 220 221
struct i915_request * __must_check
i915_request_alloc(struct intel_engine_cs *engine,
		   struct i915_gem_context *ctx);
void i915_request_retire_upto(struct i915_request *rq);
222

223
static inline struct i915_request *
224
to_request(struct dma_fence *fence)
225 226
{
	/* We assume that NULL fence/request are interoperable */
227
	BUILD_BUG_ON(offsetof(struct i915_request, fence) != 0);
228
	GEM_BUG_ON(fence && !dma_fence_is_i915(fence));
229
	return container_of(fence, struct i915_request, fence);
230 231
}

232 233
static inline struct i915_request *
i915_request_get(struct i915_request *rq)
234
{
235
	return to_request(dma_fence_get(&rq->fence));
236 237
}

238 239
static inline struct i915_request *
i915_request_get_rcu(struct i915_request *rq)
240
{
241
	return to_request(dma_fence_get_rcu(&rq->fence));
242 243
}

244
static inline void
245
i915_request_put(struct i915_request *rq)
246
{
247
	dma_fence_put(&rq->fence);
248 249
}

250
/**
251
 * i915_request_global_seqno - report the current global seqno
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
 * @request - the request
 *
 * A request is assigned a global seqno only when it is on the hardware
 * execution queue. The global seqno can be used to maintain a list of
 * requests on the same engine in retirement order, for example for
 * constructing a priority queue for waiting. Prior to its execution, or
 * if it is subsequently removed in the event of preemption, its global
 * seqno is zero. As both insertion and removal from the execution queue
 * may operate in IRQ context, it is not guarded by the usual struct_mutex
 * BKL. Instead those relying on the global seqno must be prepared for its
 * value to change between reads. Only when the request is complete can
 * the global seqno be stable (due to the memory barriers on submitting
 * the commands to the hardware to write the breadcrumb, if the HWS shows
 * that it has passed the global seqno and the global seqno is unchanged
 * after the read, it is indeed complete).
 */
268
static inline u32
269
i915_request_global_seqno(const struct i915_request *request)
270 271 272 273
{
	return READ_ONCE(request->global_seqno);
}

274
int i915_request_await_object(struct i915_request *to,
275 276
			      struct drm_i915_gem_object *obj,
			      bool write);
277 278
int i915_request_await_dma_fence(struct i915_request *rq,
				 struct dma_fence *fence);
279

280
void i915_request_add(struct i915_request *rq);
281

282 283
void __i915_request_submit(struct i915_request *request);
void i915_request_submit(struct i915_request *request);
284

285 286
void i915_request_skip(struct i915_request *request, int error);

287 288
void __i915_request_unsubmit(struct i915_request *request);
void i915_request_unsubmit(struct i915_request *request);
289

290 291 292 293
/* Note: part of the intel_breadcrumbs family */
bool i915_request_enable_breadcrumb(struct i915_request *request);
void i915_request_cancel_breadcrumb(struct i915_request *request);

294
long i915_request_wait(struct i915_request *rq,
295 296
		       unsigned int flags,
		       long timeout)
297
	__attribute__((nonnull(1)));
298 299
#define I915_WAIT_INTERRUPTIBLE	BIT(0)
#define I915_WAIT_LOCKED	BIT(1) /* struct_mutex held, handle GPU reset */
300 301 302
#define I915_WAIT_PRIORITY	BIT(2) /* small priority bump for the request */
#define I915_WAIT_ALL		BIT(3) /* used by i915_gem_object_wait() */
#define I915_WAIT_FOR_IDLE_BOOST BIT(4)
303

304 305
static inline bool i915_request_signaled(const struct i915_request *rq)
{
306
	/* The request may live longer than its HWSP, so check flags first! */
307 308 309
	return test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags);
}

310 311 312 313 314
static inline bool i915_request_is_active(const struct i915_request *rq)
{
	return test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags);
}

315 316 317 318 319 320 321 322
/**
 * Returns true if seq1 is later than seq2.
 */
static inline bool i915_seqno_passed(u32 seq1, u32 seq2)
{
	return (s32)(seq1 - seq2) >= 0;
}

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
static inline u32 __hwsp_seqno(const struct i915_request *rq)
{
	return READ_ONCE(*rq->hwsp_seqno);
}

/**
 * hwsp_seqno - the current breadcrumb value in the HW status page
 * @rq: the request, to chase the relevant HW status page
 *
 * The emphasis in naming here is that hwsp_seqno() is not a property of the
 * request, but an indication of the current HW state (associated with this
 * request). Its value will change as the GPU executes more requests.
 *
 * Returns the current breadcrumb value in the associated HW status page (or
 * the local timeline's equivalent) for this request. The request itself
 * has the associated breadcrumb value of rq->fence.seqno, when the HW
 * status page has that breadcrumb or later, this request is complete.
 */
static inline u32 hwsp_seqno(const struct i915_request *rq)
{
	u32 seqno;

	rcu_read_lock(); /* the HWSP may be freed at runtime */
	seqno = __hwsp_seqno(rq);
	rcu_read_unlock();

	return seqno;
}

352 353 354 355 356
static inline bool __i915_request_has_started(const struct i915_request *rq)
{
	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1);
}

357 358 359 360 361 362 363 364 365 366 367
/**
 * i915_request_started - check if the request has begun being executed
 * @rq: the request
 *
 * Returns true if the request has been submitted to hardware, and the hardware
 * has advanced passed the end of the previous request and so should be either
 * currently processing the request (though it may be preempted and so
 * not necessarily the next request to complete) or have completed the request.
 */
static inline bool i915_request_started(const struct i915_request *rq)
{
368 369
	if (i915_request_signaled(rq))
		return true;
370

371
	/* Remember: started but may have since been preempted! */
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
	return __i915_request_has_started(rq);
}

/**
 * i915_request_is_running - check if the request may actually be executing
 * @rq: the request
 *
 * Returns true if the request is currently submitted to hardware, has passed
 * its start point (i.e. the context is setup and not busywaiting). Note that
 * it may no longer be running by the time the function returns!
 */
static inline bool i915_request_is_running(const struct i915_request *rq)
{
	if (!i915_request_is_active(rq))
		return false;

	return __i915_request_has_started(rq);
389 390
}

391
static inline bool i915_request_completed(const struct i915_request *rq)
392
{
393 394
	if (i915_request_signaled(rq))
		return true;
395

396 397
	return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno);
}
398

399 400 401
static inline void i915_request_mark_complete(struct i915_request *rq)
{
	rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */
402 403
}

404 405 406 407
void i915_retire_requests(struct drm_i915_private *i915);

/*
 * We treat requests as fences. This is not be to confused with our
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
 * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
 * We use the fences to synchronize access from the CPU with activity on the
 * GPU, for example, we should not rewrite an object's PTE whilst the GPU
 * is reading them. We also track fences at a higher level to provide
 * implicit synchronisation around GEM objects, e.g. set-domain will wait
 * for outstanding GPU rendering before marking the object ready for CPU
 * access, or a pageflip will wait until the GPU is complete before showing
 * the frame on the scanout.
 *
 * In order to use a fence, the object must track the fence it needs to
 * serialise with. For example, GEM objects want to track both read and
 * write access so that we can perform concurrent read operations between
 * the CPU and GPU engines, as well as waiting for all rendering to
 * complete, or waiting for the last GPU user of a "fence register". The
 * object then embeds a #i915_gem_active to track the most recent (in
 * retirement order) request relevant for the desired mode of access.
 * The #i915_gem_active is updated with i915_gem_active_set() to track the
 * most recent fence request, typically this is done as part of
 * i915_vma_move_to_active().
 *
 * When the #i915_gem_active completes (is retired), it will
 * signal its completion to the owner through a callback as well as mark
 * itself as idle (i915_gem_active.request == NULL). The owner
 * can then perform any action, such as delayed freeing of an active
 * resource including itself.
 */
434 435 436
struct i915_gem_active;

typedef void (*i915_gem_retire_fn)(struct i915_gem_active *,
437
				   struct i915_request *);
438

439
struct i915_gem_active {
440
	struct i915_request __rcu *request;
441 442
	struct list_head link;
	i915_gem_retire_fn retire;
443 444
};

445
void i915_gem_retire_noop(struct i915_gem_active *,
446
			  struct i915_request *request);
447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462

/**
 * init_request_active - prepares the activity tracker for use
 * @active - the active tracker
 * @func - a callback when then the tracker is retired (becomes idle),
 *         can be NULL
 *
 * init_request_active() prepares the embedded @active struct for use as
 * an activity tracker, that is for tracking the last known active request
 * associated with it. When the last request becomes idle, when it is retired
 * after completion, the optional callback @func is invoked.
 */
static inline void
init_request_active(struct i915_gem_active *active,
		    i915_gem_retire_fn retire)
{
463
	RCU_INIT_POINTER(active->request, NULL);
464 465 466 467
	INIT_LIST_HEAD(&active->link);
	active->retire = retire ?: i915_gem_retire_noop;
}

468 469 470 471 472 473 474 475 476
/**
 * i915_gem_active_set - updates the tracker to watch the current request
 * @active - the active tracker
 * @request - the request to watch
 *
 * i915_gem_active_set() watches the given @request for completion. Whilst
 * that @request is busy, the @active reports busy. When that @request is
 * retired, the @active tracker is updated to report idle.
 */
477 478
static inline void
i915_gem_active_set(struct i915_gem_active *active,
479
		    struct i915_request *request)
480
{
481
	list_move(&active->link, &request->active_list);
482
	rcu_assign_pointer(active->request, request);
483 484
}

485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
/**
 * i915_gem_active_set_retire_fn - updates the retirement callback
 * @active - the active tracker
 * @fn - the routine called when the request is retired
 * @mutex - struct_mutex used to guard retirements
 *
 * i915_gem_active_set_retire_fn() updates the function pointer that
 * is called when the final request associated with the @active tracker
 * is retired.
 */
static inline void
i915_gem_active_set_retire_fn(struct i915_gem_active *active,
			      i915_gem_retire_fn fn,
			      struct mutex *mutex)
{
	lockdep_assert_held(mutex);
	active->retire = fn ?: i915_gem_retire_noop;
}

504
static inline struct i915_request *
505 506
__i915_gem_active_peek(const struct i915_gem_active *active)
{
507 508
	/*
	 * Inside the error capture (running with the driver in an unknown
509 510 511 512 513 514
	 * state), we want to bend the rules slightly (a lot).
	 *
	 * Work is in progress to make it safer, in the meantime this keeps
	 * the known issue from spamming the logs.
	 */
	return rcu_dereference_protected(active->request, 1);
515 516
}

517 518 519 520 521 522 523 524
/**
 * i915_gem_active_raw - return the active request
 * @active - the active tracker
 *
 * i915_gem_active_raw() returns the current request being tracked, or NULL.
 * It does not obtain a reference on the request for the caller, so the caller
 * must hold struct_mutex.
 */
525
static inline struct i915_request *
526 527 528 529 530 531
i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex)
{
	return rcu_dereference_protected(active->request,
					 lockdep_is_held(mutex));
}

532
/**
533
 * i915_gem_active_peek - report the active request being monitored
534 535
 * @active - the active tracker
 *
536 537 538
 * i915_gem_active_peek() returns the current request being tracked if
 * still active, or NULL. It does not obtain a reference on the request
 * for the caller, so the caller must hold struct_mutex.
539
 */
540
static inline struct i915_request *
541
i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex)
542
{
543
	struct i915_request *request;
544

545
	request = i915_gem_active_raw(active, mutex);
546
	if (!request || i915_request_completed(request))
547 548 549 550 551
		return NULL;

	return request;
}

552 553 554 555 556 557 558
/**
 * i915_gem_active_get - return a reference to the active request
 * @active - the active tracker
 *
 * i915_gem_active_get() returns a reference to the active request, or NULL
 * if the active tracker is idle. The caller must hold struct_mutex.
 */
559
static inline struct i915_request *
560
i915_gem_active_get(const struct i915_gem_active *active, struct mutex *mutex)
561
{
562
	return i915_request_get(i915_gem_active_peek(active, mutex));
563 564
}

565 566 567 568 569 570 571 572
/**
 * __i915_gem_active_get_rcu - return a reference to the active request
 * @active - the active tracker
 *
 * __i915_gem_active_get() returns a reference to the active request, or NULL
 * if the active tracker is idle. The caller must hold the RCU read lock, but
 * the returned pointer is safe to use outside of RCU.
 */
573
static inline struct i915_request *
574 575
__i915_gem_active_get_rcu(const struct i915_gem_active *active)
{
576 577
	/*
	 * Performing a lockless retrieval of the active request is super
578
	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing
579 580 581 582 583 584
	 * slab of request objects will not be freed whilst we hold the
	 * RCU read lock. It does not guarantee that the request itself
	 * will not be freed and then *reused*. Viz,
	 *
	 * Thread A			Thread B
	 *
585 586 587
	 * rq = active.request
	 *				retire(rq) -> free(rq);
	 *				(rq is now first on the slab freelist)
588 589
	 *				active.request = NULL
	 *
590 591
	 *				rq = new submission on a new object
	 * ref(rq)
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616
	 *
	 * To prevent the request from being reused whilst the caller
	 * uses it, we take a reference like normal. Whilst acquiring
	 * the reference we check that it is not in a destroyed state
	 * (refcnt == 0). That prevents the request being reallocated
	 * whilst the caller holds on to it. To check that the request
	 * was not reallocated as we acquired the reference we have to
	 * check that our request remains the active request across
	 * the lookup, in the same manner as a seqlock. The visibility
	 * of the pointer versus the reference counting is controlled
	 * by using RCU barriers (rcu_dereference and rcu_assign_pointer).
	 *
	 * In the middle of all that, we inspect whether the request is
	 * complete. Retiring is lazy so the request may be completed long
	 * before the active tracker is updated. Querying whether the
	 * request is complete is far cheaper (as it involves no locked
	 * instructions setting cachelines to exclusive) than acquiring
	 * the reference, so we do it first. The RCU read lock ensures the
	 * pointer dereference is valid, but does not ensure that the
	 * seqno nor HWS is the right one! However, if the request was
	 * reallocated, that means the active tracker's request was complete.
	 * If the new request is also complete, then both are and we can
	 * just report the active tracker is idle. If the new request is
	 * incomplete, then we acquire a reference on it and check that
	 * it remained the active request.
617 618 619
	 *
	 * It is then imperative that we do not zero the request on
	 * reallocation, so that we can chase the dangling pointers!
620
	 * See i915_request_alloc().
621 622
	 */
	do {
623
		struct i915_request *request;
624 625

		request = rcu_dereference(active->request);
626
		if (!request || i915_request_completed(request))
627 628
			return NULL;

629 630 631
		/*
		 * An especially silly compiler could decide to recompute the
		 * result of i915_request_completed, more specifically
632 633 634 635 636
		 * re-emit the load for request->fence.seqno. A race would catch
		 * a later seqno value, which could flip the result from true to
		 * false. Which means part of the instructions below might not
		 * be executed, while later on instructions are executed. Due to
		 * barriers within the refcounting the inconsistency can't reach
637 638
		 * past the call to i915_request_get_rcu, but not executing
		 * that while still executing i915_request_put() creates
639 640 641 642
		 * havoc enough.  Prevent this with a compiler barrier.
		 */
		barrier();

643
		request = i915_request_get_rcu(request);
644

645 646 647
		/*
		 * What stops the following rcu_access_pointer() from occurring
		 * before the above i915_request_get_rcu()? If we were
648 649 650 651 652 653 654 655 656 657 658 659 660
		 * to read the value before pausing to get the reference to
		 * the request, we may not notice a change in the active
		 * tracker.
		 *
		 * The rcu_access_pointer() is a mere compiler barrier, which
		 * means both the CPU and compiler are free to perform the
		 * memory read without constraint. The compiler only has to
		 * ensure that any operations after the rcu_access_pointer()
		 * occur afterwards in program order. This means the read may
		 * be performed earlier by an out-of-order CPU, or adventurous
		 * compiler.
		 *
		 * The atomic operation at the heart of
661
		 * i915_request_get_rcu(), see dma_fence_get_rcu(), is
662
		 * atomic_inc_not_zero() which is only a full memory barrier
663
		 * when successful. That is, if i915_request_get_rcu()
664 665 666 667
		 * returns the request (and so with the reference counted
		 * incremented) then the following read for rcu_access_pointer()
		 * must occur after the atomic operation and so confirm
		 * that this request is the one currently being tracked.
668 669 670
		 *
		 * The corresponding write barrier is part of
		 * rcu_assign_pointer().
671 672 673 674
		 */
		if (!request || request == rcu_access_pointer(active->request))
			return rcu_pointer_handoff(request);

675
		i915_request_put(request);
676 677 678 679 680 681 682 683 684 685 686
	} while (1);
}

/**
 * i915_gem_active_get_unlocked - return a reference to the active request
 * @active - the active tracker
 *
 * i915_gem_active_get_unlocked() returns a reference to the active request,
 * or NULL if the active tracker is idle. The reference is obtained under RCU,
 * so no locking is required by the caller.
 *
687
 * The reference should be freed with i915_request_put().
688
 */
689
static inline struct i915_request *
690 691
i915_gem_active_get_unlocked(const struct i915_gem_active *active)
{
692
	struct i915_request *request;
693 694 695 696 697 698 699 700

	rcu_read_lock();
	request = __i915_gem_active_get_rcu(active);
	rcu_read_unlock();

	return request;
}

701 702 703 704 705 706 707 708 709 710 711
/**
 * i915_gem_active_isset - report whether the active tracker is assigned
 * @active - the active tracker
 *
 * i915_gem_active_isset() returns true if the active tracker is currently
 * assigned to a request. Due to the lazy retiring, that request may be idle
 * and this may report stale information.
 */
static inline bool
i915_gem_active_isset(const struct i915_gem_active *active)
{
712
	return rcu_access_pointer(active->request);
713 714 715
}

/**
716
 * i915_gem_active_wait - waits until the request is completed
717
 * @active - the active request on which to wait
718
 * @flags - how to wait
719 720 721
 * @timeout - how long to wait at most
 * @rps - userspace client to charge for a waitboost
 *
722
 * i915_gem_active_wait() waits until the request is completed before
723 724 725 726 727 728 729 730 731
 * returning, without requiring any locks to be held. Note that it does not
 * retire any requests before returning.
 *
 * This function relies on RCU in order to acquire the reference to the active
 * request without holding any locks. See __i915_gem_active_get_rcu() for the
 * glory details on how that is managed. Once the reference is acquired, we
 * can then wait upon the request, and afterwards release our reference,
 * free of any locking.
 *
732
 * This function wraps i915_request_wait(), see it for the full details on
733 734 735 736 737
 * the arguments.
 *
 * Returns 0 if successful, or a negative error code.
 */
static inline int
738
i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags)
739
{
740
	struct i915_request *request;
741
	long ret = 0;
742 743 744

	request = i915_gem_active_get_unlocked(active);
	if (request) {
745 746
		ret = i915_request_wait(request, flags, MAX_SCHEDULE_TIMEOUT);
		i915_request_put(request);
747 748
	}

749
	return ret < 0 ? ret : 0;
750 751
}

752 753 754 755 756 757 758 759 760 761
/**
 * i915_gem_active_retire - waits until the request is retired
 * @active - the active request on which to wait
 *
 * i915_gem_active_retire() waits until the request is completed,
 * and then ensures that at least the retirement handler for this
 * @active tracker is called before returning. If the @active
 * tracker is idle, the function returns immediately.
 */
static inline int __must_check
762
i915_gem_active_retire(struct i915_gem_active *active,
763
		       struct mutex *mutex)
764
{
765
	struct i915_request *request;
766
	long ret;
767

768
	request = i915_gem_active_raw(active, mutex);
769 770 771
	if (!request)
		return 0;

772
	ret = i915_request_wait(request,
773
				I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
774 775
				MAX_SCHEDULE_TIMEOUT);
	if (ret < 0)
776 777 778
		return ret;

	list_del_init(&active->link);
779 780
	RCU_INIT_POINTER(active->request, NULL);

781 782 783
	active->retire(active, request);

	return 0;
784 785
}

786 787 788
#define for_each_active(mask, idx) \
	for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx))

789
#endif /* I915_REQUEST_H */