diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index e4f89341d17cfad2d7b6450ff0ba421266a51787..8bb444cda14fa2c21c6be9a9e0422539a63f7109 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -220,7 +220,9 @@ static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - CE_TRACE(ce, "retire\n"); + CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n", + intel_context_get_total_runtime_ns(ce), + intel_context_get_avg_runtime_ns(ce)); set_bit(CONTEXT_VALID_BIT, &ce->flags); if (ce->state) @@ -281,6 +283,8 @@ intel_context_init(struct intel_context *ce, ce->sseu = engine->sseu; ce->ring = __intel_context_ring_size(SZ_4K); + ewma_runtime_init(&ce->runtime.avg); + ce->vm = i915_vm_get(engine->gt->vm); INIT_LIST_HEAD(&ce->signal_link); diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 604d5cfc46bab20af91b57bbe53cdd7f8838595c..18efad2551245a915917a1bcb893ba3a9d1a4099 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,6 +12,7 @@ #include #include "i915_active.h" +#include "i915_drv.h" #include "intel_context_types.h" #include "intel_engine_types.h" #include "intel_ring_types.h" @@ -227,4 +228,20 @@ intel_context_clear_nopreempt(struct intel_context *ce) clear_bit(CONTEXT_NOPREEMPT, &ce->flags); } +static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce) +{ + const u32 period = + RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns; + + return READ_ONCE(ce->runtime.total) * period; +} + +static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) +{ + const u32 period = + RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns; + + return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period); +} + #endif /* __INTEL_CONTEXT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index ca1420fb8b5339594b2b603db05525359d80b938..11278343b9b55709e0e581e19c44baceb64804d7 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -7,6 +7,7 @@ #ifndef __INTEL_CONTEXT_TYPES__ #define __INTEL_CONTEXT_TYPES__ +#include #include #include #include @@ -19,6 +20,8 @@ #define CONTEXT_REDZONE POISON_INUSE +DECLARE_EWMA(runtime, 3, 8); + struct i915_gem_context; struct i915_vma; struct intel_context; @@ -68,6 +71,15 @@ struct intel_context { u64 lrc_desc; u32 tag; /* cookie passed to HW to track this context on submission */ + /* Time on GPU as tracked by the hw. */ + struct { + struct ewma_runtime avg; + u64 total; + u32 last; + I915_SELFTEST_DECLARE(u32 num_underflow); + I915_SELFTEST_DECLARE(u32 max_underflow); + } runtime; + unsigned int active_count; /* protected by timeline->mutex */ atomic_t pin_count; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index c3d7727021dba1c28c659b3450c2cd46a35052b7..78e854440949e3d5e4b0dbdcb059e58a8e2cbbe0 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1195,6 +1195,42 @@ static void reset_active(struct i915_request *rq, ce->lrc_desc |= CTX_DESC_FORCE_RESTORE; } +static u32 intel_context_get_runtime(const struct intel_context *ce) +{ + /* + * We can use either ppHWSP[16] which is recorded before the context + * switch (and so excludes the cost of context switches) or use the + * value from the context image itself, which is saved/restored earlier + * and so includes the cost of the save. + */ + return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); +} + +static void intel_context_update_runtime(struct intel_context *ce) +{ + u32 old; + s32 dt; + + if (intel_context_is_barrier(ce)) + return; + + old = ce->runtime.last; + ce->runtime.last = intel_context_get_runtime(ce); + dt = ce->runtime.last - old; + + if (unlikely(dt <= 0)) { + CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", + old, ce->runtime.last, dt); + I915_SELFTEST_ONLY(ce->runtime.num_underflow += dt < 0); + I915_SELFTEST_ONLY(ce->runtime.max_underflow = + max_t(u32, ce->runtime.max_underflow, -dt)); + return; + } + + ewma_runtime_add(&ce->runtime.avg, dt); + ce->runtime.total += dt; +} + static inline struct intel_engine_cs * __execlists_schedule_in(struct i915_request *rq) { @@ -1278,6 +1314,7 @@ __execlists_schedule_out(struct i915_request *rq, i915_request_completed(rq)) intel_engine_add_retire(engine, ce->timeline); + intel_context_update_runtime(ce); intel_engine_context_out(engine); execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); intel_gt_pm_put_async(engine->gt); @@ -4607,8 +4644,13 @@ populate_lr_context(struct intel_context *ce, inhibit = false; } - /* The second page of the context object contains some fields which must - * be set up prior to the first execution. */ + /* Clear the ppHWSP (inc. per-context counters) */ + memset(vaddr, 0, PAGE_SIZE); + + /* + * The second page of the context object contains some registers which + * must be set up prior to the first execution. + */ execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE, ce, engine, ring, inhibit); diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 08a3be65f70093e250e296124616759d6483bcf0..d39b72590e40a0be5890f76ecf9ea494a3f46eec 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -17,6 +17,7 @@ #define CTX_RING_CTL (0x0a + 1) #define CTX_BB_STATE (0x10 + 1) #define CTX_BB_PER_CTX_PTR (0x18 + 1) +#define CTX_TIMESTAMP (0x22 + 1) #define CTX_PDP3_UDW (0x24 + 1) #define CTX_PDP3_LDW (0x26 + 1) #define CTX_PDP2_UDW (0x28 + 1) diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 64761e6198767abd4304ec121233e5780b295702..40c53cc1c7c0e74765505bd0f78b59d0dca5d7b5 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -4151,6 +4151,11 @@ static int live_lrc_fixed(void *arg) CTX_BB_STATE - 1, "BB_STATE" }, + { + i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)), + CTX_TIMESTAMP - 1, + "RING_CTX_TIMESTAMP" + }, { }, }, *t; u32 *hw; @@ -4450,6 +4455,96 @@ static int live_gpr_clear(void *arg) return err; } +static int __live_pphwsp_runtime(struct intel_engine_cs *engine) +{ + struct intel_context *ce; + struct i915_request *rq; + IGT_TIMEOUT(end_time); + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + ce->runtime.num_underflow = 0; + ce->runtime.max_underflow = 0; + + do { + unsigned int loop = 1024; + + while (loop) { + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_rq; + } + + if (--loop == 0) + i915_request_get(rq); + + i915_request_add(rq); + } + + if (__igt_timeout(end_time, NULL)) + break; + + i915_request_put(rq); + } while (1); + + err = i915_request_wait(rq, 0, HZ / 5); + if (err < 0) { + pr_err("%s: request not completed!\n", engine->name); + goto err_wait; + } + + igt_flush_test(engine->i915); + + pr_info("%s: pphwsp runtime %lluns, average %lluns\n", + engine->name, + intel_context_get_total_runtime_ns(ce), + intel_context_get_avg_runtime_ns(ce)); + + err = 0; + if (ce->runtime.num_underflow) { + pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n", + engine->name, + ce->runtime.num_underflow, + ce->runtime.max_underflow); + GEM_TRACE_DUMP(); + err = -EOVERFLOW; + } + +err_wait: + i915_request_put(rq); +err_rq: + intel_context_put(ce); + return err; +} + +static int live_pphwsp_runtime(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * Check that cumulative context runtime as stored in the pphwsp[16] + * is monotonic. + */ + + for_each_engine(engine, gt, id) { + err = __live_pphwsp_runtime(engine); + if (err) + break; + } + + if (igt_flush_test(gt->i915)) + err = -EIO; + + return err; +} + int intel_lrc_live_selftests(struct drm_i915_private *i915) { static const struct i915_subtest tests[] = { @@ -4457,6 +4552,7 @@ int intel_lrc_live_selftests(struct drm_i915_private *i915) SUBTEST(live_lrc_fixed), SUBTEST(live_lrc_state), SUBTEST(live_gpr_clear), + SUBTEST(live_pphwsp_runtime), }; if (!HAS_LOGICAL_RING_CONTEXTS(i915)) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index b2ed977ed971103a1282f9bf8ebca697095b70ef..3052c4eaf9f64c9f7884526d1a091cff2ceec12c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -481,9 +481,13 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct i915_gem_context_coredump *ctx) { - err_printf(m, "%s%s[%d] prio %d, guilty %d active %d\n", + const u32 period = RUNTIME_INFO(m->i915)->cs_timestamp_period_ns; + + err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n", header, ctx->comm, ctx->pid, ctx->sched_attr.priority, - ctx->guilty, ctx->active); + ctx->guilty, ctx->active, + ctx->total_runtime * period, + mul_u32_u32(ctx->avg_runtime, period)); } static struct i915_vma_coredump * @@ -1260,6 +1264,9 @@ static bool record_context(struct i915_gem_context_coredump *e, e->guilty = atomic_read(&ctx->guilty_count); e->active = atomic_read(&ctx->active_count); + e->total_runtime = rq->context->runtime.total; + e->avg_runtime = ewma_runtime_read(&rq->context->runtime.avg); + simulated = i915_gem_context_no_error_capture(ctx); i915_gem_context_put(ctx); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index b35bc9edd7332242271842e0e45918fb03ee912d..0d1f6c8ff3556fa3d8923692741272045141358e 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -88,6 +88,10 @@ struct intel_engine_coredump { struct i915_gem_context_coredump { char comm[TASK_COMM_LEN]; + + u64 total_runtime; + u32 avg_runtime; + pid_t pid; int active; int guilty; diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index fcdacd6d4aa5531d0b0767b91501156d48e429b3..a97437fac88455caa4b918e8dc6e016dffe53f68 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -1045,6 +1045,12 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) /* Initialize command stream timestamp frequency */ runtime->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); + runtime->cs_timestamp_period_ns = + div_u64(1e6, runtime->cs_timestamp_frequency_khz); + drm_dbg(&dev_priv->drm, + "CS timestamp wraparound in %lldms\n", + div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns, S32_MAX), + USEC_PER_SEC)); } void intel_driver_caps_print(const struct intel_driver_caps *caps, diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 7d4d122d2182f1ea0190bfd164473a162c4d3afb..f8bfa26388c13f38c7c4895ffba02c85312aec55 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -217,6 +217,7 @@ struct intel_runtime_info { struct sseu_dev_info sseu; u32 cs_timestamp_frequency_khz; + u32 cs_timestamp_period_ns; /* Media engine access to SFC per instance */ u8 vdbox_sfc_access;