diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 948b973af067815591e31fdbe086201562deb384..99d3272d82d805a0a237ee614e24de9d8e35227b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1359,11 +1359,12 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "\tseqno = %x [current %x, last %x]\n", engine->hangcheck.seqno, seqno[id], intel_engine_last_submit(engine)); - seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", + seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s, wedged? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings)), - yesno(engine->hangcheck.stalled)); + yesno(engine->hangcheck.stalled), + yesno(engine->hangcheck.wedged)); spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 19defe73b1566addbed6c30be53463f2d76c06fb..74dd88d8563e698fd3446ae4e48a34964bf78b5a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1006,6 +1006,8 @@ struct i915_gem_mm { #define I915_ENGINE_DEAD_TIMEOUT (4 * HZ) /* Seqno, head and subunits dead */ #define I915_SEQNO_DEAD_TIMEOUT (12 * HZ) /* Seqno dead with active head */ +#define I915_ENGINE_WEDGED_TIMEOUT (60 * HZ) /* Reset but no recovery? */ + enum modeset_restore { MODESET_ON_LID_OPEN, MODESET_DONE, diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index d47e346bd49e97fc45e9aa0cb15563bbdb28c2fd..2fc7a0dd0df9b2bc7a88814f75d98334bfd4d4e9 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -294,6 +294,7 @@ static void hangcheck_store_sample(struct intel_engine_cs *engine, engine->hangcheck.seqno = hc->seqno; engine->hangcheck.action = hc->action; engine->hangcheck.stalled = hc->stalled; + engine->hangcheck.wedged = hc->wedged; } static enum intel_engine_hangcheck_action @@ -368,6 +369,9 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, hc->stalled = time_after(jiffies, engine->hangcheck.action_timestamp + timeout); + hc->wedged = time_after(jiffies, + engine->hangcheck.action_timestamp + + I915_ENGINE_WEDGED_TIMEOUT); } static void hangcheck_declare_hang(struct drm_i915_private *i915, @@ -409,7 +413,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) gpu_error.hangcheck_work.work); struct intel_engine_cs *engine; enum intel_engine_id id; - unsigned int hung = 0, stuck = 0; + unsigned int hung = 0, stuck = 0, wedged = 0; if (!i915_modparams.enable_hangcheck) return; @@ -440,6 +444,17 @@ static void i915_hangcheck_elapsed(struct work_struct *work) if (hc.action != ENGINE_DEAD) stuck |= intel_engine_flag(engine); } + + if (engine->hangcheck.wedged) + wedged |= intel_engine_flag(engine); + } + + if (wedged) { + dev_err(dev_priv->drm.dev, + "GPU recovery timed out," + " cancelling all in-flight rendering.\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(dev_priv); } if (hung) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 2c35dd3525a6e40748f17e7bb73e85ce32a69d91..4003f3ebe3d1b3132903aa6e0a9165307faca376 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -122,7 +122,8 @@ struct intel_engine_hangcheck { int deadlock; struct intel_instdone instdone; struct i915_request *active_request; - bool stalled; + bool stalled:1; + bool wedged:1; }; struct intel_ring {