diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7e08c774a1aae98dc2049f627e46e9eca42452fc..a8d0f70c22f9a7adc7d81f408a5a69941b745ea3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3807,49 +3807,68 @@ static __always_inline unsigned int __busy_set_if_active(const struct i915_gem_active *active, unsigned int (*flag)(unsigned int id)) { - /* For more discussion about the barriers and locking concerns, - * see __i915_gem_active_get_rcu(). - */ - do { - struct drm_i915_gem_request *request; - unsigned int id; - - request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) - return 0; + struct drm_i915_gem_request *request; - id = request->engine->exec_id; + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return 0; - /* Check that the pointer wasn't reassigned and overwritten. - * - * In __i915_gem_active_get_rcu(), we enforce ordering between - * the first rcu pointer dereference (imposing a - * read-dependency only on access through the pointer) and - * the second lockless access through the memory barrier - * following a successful atomic_inc_not_zero(). Here there - * is no such barrier, and so we must manually insert an - * explicit read barrier to ensure that the following - * access occurs after all the loads through the first - * pointer. - * - * It is worth comparing this sequence with - * raw_write_seqcount_latch() which operates very similarly. - * The challenge here is the visibility of the other CPU - * writes to the reallocated request vs the local CPU ordering. - * Before the other CPU can overwrite the request, it will - * have updated our active->request and gone through a wmb. - * During the read here, we want to make sure that the values - * we see have not been overwritten as we do so - and we do - * that by serialising the second pointer check with the writes - * on other other CPUs. - * - * The corresponding write barrier is part of - * rcu_assign_pointer(). - */ - smp_rmb(); - if (request == rcu_access_pointer(active->request)) - return flag(id); - } while (1); + /* This is racy. See __i915_gem_active_get_rcu() for an in detail + * discussion of how to handle the race correctly, but for reporting + * the busy state we err on the side of potentially reporting the + * wrong engine as being busy (but we guarantee that the result + * is at least self-consistent). + * + * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated + * whilst we are inspecting it, even under the RCU read lock as we are. + * This means that there is a small window for the engine and/or the + * seqno to have been overwritten. The seqno will always be in the + * future compared to the intended, and so we know that if that + * seqno is idle (on whatever engine) our request is idle and the + * return 0 above is correct. + * + * The issue is that if the engine is switched, it is just as likely + * to report that it is busy (but since the switch happened, we know + * the request should be idle). So there is a small chance that a busy + * result is actually the wrong engine. + * + * So why don't we care? + * + * For starters, the busy ioctl is a heuristic that is by definition + * racy. Even with perfect serialisation in the driver, the hardware + * state is constantly advancing - the state we report to the user + * is stale. + * + * The critical information for the busy-ioctl is whether the object + * is idle as userspace relies on that to detect whether its next + * access will stall, or if it has missed submitting commands to + * the hardware allowing the GPU to stall. We never generate a + * false-positive for idleness, thus busy-ioctl is reliable at the + * most fundamental level, and we maintain the guarantee that a + * busy object left to itself will eventually become idle (and stay + * idle!). + * + * We allow ourselves the leeway of potentially misreporting the busy + * state because that is an optimisation heuristic that is constantly + * in flux. Being quickly able to detect the busy/idle state is much + * more important than accurate logging of exactly which engines were + * busy. + * + * For accuracy in reporting the engine, we could use + * + * result = 0; + * request = __i915_gem_active_get_rcu(active); + * if (request) { + * if (!i915_gem_request_completed(request)) + * result = flag(request->engine->exec_id); + * i915_gem_request_put(request); + * } + * + * but that still remains susceptible to both hardware and userspace + * races. So we accept making the result of that race slightly worse, + * given the rarity of the race and its low impact on the result. + */ + return flag(READ_ONCE(request->engine->exec_id)); } static __always_inline unsigned int @@ -3897,11 +3916,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * retired and freed. We take a local copy of the pointer, * but before we add its engine into the busy set, the other * thread reallocates it and assigns it to a task on another - * engine with a fresh and incomplete seqno. - * - * So after we lookup the engine's id, we double check that - * the active request is the same and only then do we add it - * into the busy set. + * engine with a fresh and incomplete seqno. Guarding against + * that requires careful serialisation and reference counting, + * i.e. using __i915_gem_active_get_request_rcu(). We don't, + * instead we expect that if the result is busy, which engines + * are busy is not completely reliable - we only guarantee + * that the object was busy. */ rcu_read_lock(); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 452629de7a577c5547c622d6098f428959bf3917..5501fe83ed92490613fbb25091b217fb0efa8997 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -855,7 +855,16 @@ struct drm_i915_gem_busy { * having flushed any pending activity), and a non-zero return that * the object is still in-flight on the GPU. (The GPU has not yet * signaled completion for all pending requests that reference the - * object.) + * object.) An object is guaranteed to become idle eventually (so + * long as no new GPU commands are executed upon it). Due to the + * asynchronous nature of the hardware, an object reported + * as busy may become idle before the ioctl is completed. + * + * Furthermore, if the object is busy, which engine is busy is only + * provided as a guide. There are race conditions which prevent the + * report of which engines are busy from being always accurate. + * However, the converse is not true. If the object is idle, the + * result of the ioctl, that all engines are idle, is accurate. * * The returned dword is split into two fields to indicate both * the engines on which the object is being read, and the @@ -878,6 +887,11 @@ struct drm_i915_gem_busy { * execution engines, e.g. multiple media engines, which are * mapped to the same identifier in the EXECBUFFER2 ioctl and * so are not separately reported for busyness. + * + * Caveat emptor: + * Only the boolean result of this query is reliable; that is whether + * the object is idle or busy. The report of which engines are busy + * should be only used as a heuristic. */ __u32 busy; };