diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0ebec2b77ae1ea64a03d01d6acf69077c4a6831e..691f0b694e779eaa59a92fa0647d57e000638459 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -919,6 +919,11 @@ struct i915_ctx_hang_stats { /* This context is banned to submit more work */ bool banned; + +#define CONTEXT_SCORE_GUILTY 10 +#define CONTEXT_SCORE_BAN_THRESHOLD 40 + /* Accumulated score of hangs caused by this context */ + int ban_score; }; /* This must match up with the value previously used for execbuf2.rsvd1. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f8dfd4aba61b38c9308937891468969bd8cd672..4c4aed2d2afb86faef721d1431d22fcd567fb393 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2622,33 +2622,45 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, static bool i915_context_is_banned(const struct i915_gem_context *ctx) { + const struct i915_ctx_hang_stats *hs = &ctx->hang_stats; unsigned long elapsed; - if (ctx->hang_stats.banned) + if (hs->banned) return true; - elapsed = get_seconds() - ctx->hang_stats.guilty_ts; - if (ctx->hang_stats.ban_period_seconds && - elapsed <= ctx->hang_stats.ban_period_seconds) { + if (!hs->ban_period_seconds) + return false; + + elapsed = get_seconds() - hs->guilty_ts; + if (elapsed <= hs->ban_period_seconds) { DRM_DEBUG("context hanging too fast, banning!\n"); return true; } + if (hs->ban_score >= CONTEXT_SCORE_BAN_THRESHOLD) { + DRM_DEBUG("context hanging too often, banning!\n"); + return true; + } + return false; } -static void i915_set_reset_status(struct i915_gem_context *ctx, - const bool guilty) +static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx) { struct i915_ctx_hang_stats *hs = &ctx->hang_stats; - if (guilty) { - hs->banned = i915_context_is_banned(ctx); - hs->batch_active++; - hs->guilty_ts = get_seconds(); - } else { - hs->batch_pending++; - } + hs->ban_score += CONTEXT_SCORE_GUILTY; + + hs->banned = i915_context_is_banned(ctx); + hs->batch_active++; + hs->guilty_ts = get_seconds(); +} + +static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx) +{ + struct i915_ctx_hang_stats *hs = &ctx->hang_stats; + + hs->batch_pending++; } struct drm_i915_gem_request * @@ -2713,7 +2725,11 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) ring_hung = false; } - i915_set_reset_status(request->ctx, ring_hung); + if (ring_hung) + i915_gem_context_mark_guilty(request->ctx); + else + i915_gem_context_mark_innocent(request->ctx); + if (!ring_hung) return; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 27e8f257fb3996758300305808079bfc286efa51..60e63956ea1949368ccd193e869a576415caabc3 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -263,6 +263,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) request->engine); } + /* Retirement decays the ban score as it is a sign of ctx progress */ + if (request->ctx->hang_stats.ban_score > 0) + request->ctx->hang_stats.ban_score--; + i915_gem_context_put(request->ctx); dma_fence_signal(&request->fence);