drm/i915: Record the default hw state after reset upon load

Take a copy of the HW state after a reset upon module loading by executing a context switch from a blank context to the kernel context, thus saving the default hw state over the blank context image. We can then use the default hw state to initialise any future context, ensuring that each starts with the default view of hw state. v2: Unmap our default state from the GTT after stealing it from the context. This should stop us from accidentally overwriting it via the GTT (and frees up some precious GTT space). Testcase: igt/gem_ctx_isolation Signed-off-by: N Chris Wilson <chris@chris-wilson.co.uk> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: N Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-7-chris@chris-wilson.co.uk

drm/i915: Record the default hw state after reset upon load
Take a copy of the HW state after a reset upon module loading by executing a context switch from a blank context to the kernel context, thus saving the default hw state over the blank context image. We can then use the default hw state to initialise any future context, ensuring that each starts with the default view of hw state. v2: Unmap our default state from the GTT after stealing it from the context. This should stop us from accidentally overwriting it via the GTT (and frees up some precious GTT space). Testcase: igt/gem_ctx_isolation Signed-off-by: N Chris Wilson <chris@chris-wilson.co.uk> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: N Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20171110142634.10551-7-chris@chris-wilson.co.uk
d2b4b979 · Chris Wilson · f4e15af7 · d2b4b979 · d2b4b979 · d2b4b979
11 changed file
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -723,8 +723,6 @@ int intel_vgpu_init_gvt_context(struct intel_vgpu *vgpu)
 	if (IS_ERR(vgpu->shadow_ctx))
 		return PTR_ERR(vgpu->shadow_ctx);

-	vgpu->shadow_ctx->engine[RCS].initialised = true;
-
 	bitmap_zero(vgpu->shadow_ctx_desc_updated, I915_NUM_ENGINES);

 	return 0;

--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1974,7 +1974,6 @@ static int i915_context_status(struct seq_file *m, void *unused)
 			struct intel_context *ce = &ctx->engine[engine->id];

 			seq_printf(m, "%s: ", engine->name);
-			seq_putc(m, ce->initialised ? 'I' : 'i');
 			if (ce->state)
 				describe_obj(m, ce->state->obj);
 			if (ce->ring)

--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -406,6 +406,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		 */
 		value = 1;
 		break;
+	case I915_PARAM_HAS_CONTEXT_ISOLATION:
+		value = intel_engines_has_context_isolation(dev_priv);
+		break;
 	case I915_PARAM_SLICE_MASK:
 		value = INTEL_INFO(dev_priv)->sseu.slice_mask;
 		if (!value)

--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4972,6 +4972,120 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
 	return true;
 }

+static int __intel_engines_record_defaults(struct drm_i915_private *i915)
+{
+	struct i915_gem_context *ctx;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int err;
+
+	/*
+	 * As we reset the gpu during very early sanitisation, the current
+	 * register state on the GPU should reflect its defaults values.
+	 * We load a context onto the hw (with restore-inhibit), then switch
+	 * over to a second context to save that default register state. We
+	 * can then prime every new context with that state so they all start
+	 * from the same default HW values.
+	 */
+
+	ctx = i915_gem_context_create_kernel(i915, 0);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	for_each_engine(engine, i915, id) {
+		struct drm_i915_gem_request *rq;
+
+		rq = i915_gem_request_alloc(engine, ctx);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_ctx;
+		}
+
+		err = i915_switch_context(rq);
+		if (engine->init_context)
+			err = engine->init_context(rq);
+
+		__i915_add_request(rq, true);
+		if (err)
+			goto err_active;
+	}
+
+	err = i915_gem_switch_to_kernel_context(i915);
+	if (err)
+		goto err_active;
+
+	err = i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED);
+	if (err)
+		goto err_active;
+
+	assert_kernel_context_is_current(i915);
+
+	for_each_engine(engine, i915, id) {
+		struct i915_vma *state;
+
+		state = ctx->engine[id].state;
+		if (!state)
+			continue;
+
+		/*
+		 * As we will hold a reference to the logical state, it will
+		 * not be torn down with the context, and importantly the
+		 * object will hold onto its vma (making it possible for a
+		 * stray GTT write to corrupt our defaults). Unmap the vma
+		 * from the GTT to prevent such accidents and reclaim the
+		 * space.
+		 */
+		err = i915_vma_unbind(state);
+		if (err)
+			goto err_active;
+
+		err = i915_gem_object_set_to_cpu_domain(state->obj, false);
+		if (err)
+			goto err_active;
+
+		engine->default_state = i915_gem_object_get(state->obj);
+	}
+
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
+		unsigned int found = intel_engines_has_context_isolation(i915);
+
+		/*
+		 * Make sure that classes with multiple engine instances all
+		 * share the same basic configuration.
+		 */
+		for_each_engine(engine, i915, id) {
+			unsigned int bit = BIT(engine->uabi_class);
+			unsigned int expected = engine->default_state ? bit : 0;
+
+			if ((found & bit) != expected) {
+				DRM_ERROR("mismatching default context state for class %d on engine %s\n",
+					  engine->uabi_class, engine->name);
+			}
+		}
+	}
+
+out_ctx:
+	i915_gem_context_set_closed(ctx);
+	i915_gem_context_put(ctx);
+	return err;
+
+err_active:
+	/*
+	 * If we have to abandon now, we expect the engines to be idle
+	 * and ready to be torn-down. First try to flush any remaining
+	 * request, ensure we are pointing at the kernel context and
+	 * then remove it.
+	 */
+	if (WARN_ON(i915_gem_switch_to_kernel_context(i915)))
+		goto out_ctx;
+
+	if (WARN_ON(i915_gem_wait_for_idle(i915, I915_WAIT_LOCKED)))
+		goto out_ctx;
+
+	i915_gem_contexts_lost(i915);
+	goto out_ctx;
+}
+
 int i915_gem_init(struct drm_i915_private *dev_priv)
 {
 	int ret;
@@ -5038,6 +5152,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	 */
 	intel_init_clock_gating(dev_priv);

+	ret = __intel_engines_record_defaults(dev_priv);
 out_unlock:
 	if (ret == -EIO) {
 		/* Allow engine initialisation to fail by marking the GPU as

--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -418,8 +418,8 @@ i915_gem_context_create_gvt(struct drm_device *dev)
 	return ctx;
 }

-static struct i915_gem_context *
-create_kernel_context(struct drm_i915_private *i915, int prio)
+struct i915_gem_context *
+i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
 {
 	struct i915_gem_context *ctx;

@@ -473,7 +473,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	ida_init(&dev_priv->contexts.hw_ida);

 	/* lowest priority; idle task */
-	ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN);
+	ctx = i915_gem_context_create_kernel(dev_priv, I915_PRIORITY_MIN);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default global context\n");
 		err = PTR_ERR(ctx);
@@ -487,7 +487,7 @@ int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
 	dev_priv->kernel_context = ctx;

 	/* highest priority; preempting task */
-	ctx = create_kernel_context(dev_priv, INT_MAX);
+	ctx = i915_gem_context_create_kernel(dev_priv, INT_MAX);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default preempt context\n");
 		err = PTR_ERR(ctx);
@@ -522,28 +522,6 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
 		engine->context_unpin(engine, engine->last_retired_context);
 		engine->last_retired_context = NULL;
 	}
-
-	/* Force the GPU state to be restored on enabling */
-	if (!i915_modparams.enable_execlists) {
-		struct i915_gem_context *ctx;
-
-		list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
-			if (!i915_gem_context_is_default(ctx))
-				continue;
-
-			for_each_engine(engine, dev_priv, id)
-				ctx->engine[engine->id].initialised = false;
-
-			ctx->remap_slice = ALL_L3_SLICES(dev_priv);
-		}
-
-		for_each_engine(engine, dev_priv, id) {
-			struct intel_context *kce =
-				&dev_priv->kernel_context->engine[engine->id];
-
-			kce->initialised = true;
-		}
-	}
 }

 void i915_gem_contexts_fini(struct drm_i915_private *i915)
@@ -718,9 +696,6 @@ static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
 	if (to->remap_slice)
 		return false;

-	if (!to->engine[RCS].initialised)
-		return false;
-
 	if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
 		return false;

@@ -795,11 +770,14 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 			return ret;
 	}

-	if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
-		/* NB: If we inhibit the restore, the context is not allowed to
-		 * die because future work may end up depending on valid address
-		 * space. This means we must enforce that a page table load
-		 * occur when this occurs. */
+	if (i915_gem_context_is_kernel(to))
+		/*
+		 * The kernel context(s) is treated as pure scratch and is not
+		 * expected to retain any state (as we sacrifice it during
+		 * suspend and on resume it may be corrupted). This is ok,
+		 * as nothing actually executes using the kernel context; it
+		 * is purely used for flushing user contexts.
+		 */
 		hw_flags = MI_RESTORE_INHIBIT;
 	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
 		hw_flags = MI_FORCE_RESTORE;
@@ -843,15 +821,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req)
 		to->remap_slice &= ~(1<<i);
 	}

-	if (!to->engine[RCS].initialised) {
-		if (engine->init_context) {
-			ret = engine->init_context(req);
-			if (ret)
-				return ret;
-		}
-		to->engine[RCS].initialised = true;
-	}
-
 	return 0;
 }


--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -157,7 +157,6 @@ struct i915_gem_context {
 		u32 *lrc_reg_state;
 		u64 lrc_desc;
 		int pin_count;
-		bool initialised;
 	} engine[I915_NUM_ENGINES];

 	/** ring_size: size for allocating the per-engine ring buffer */
@@ -292,6 +291,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
 				       struct drm_file *file);

+struct i915_gem_context *
+i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio);
+
 static inline struct i915_gem_context *
 i915_gem_context_get(struct i915_gem_context *ctx)
 {

--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -687,6 +687,9 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
 	intel_engine_cleanup_cmd_parser(engine);
 	i915_gem_batch_pool_fini(&engine->batch_pool);

+	if (engine->default_state)
+		i915_gem_object_put(engine->default_state);
+
 	if (HAS_LOGICAL_RING_PREEMPTION(engine->i915))
 		engine->context_unpin(engine, engine->i915->preempt_context);
 	engine->context_unpin(engine, engine->i915->kernel_context);
@@ -1705,6 +1708,20 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
 	}
 }

+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	unsigned int which;
+
+	which = 0;
+	for_each_engine(engine, i915, id)
+		if (engine->default_state)
+			which |= BIT(engine->uabi_class);
+
+	return which;
+}
+
 static void print_request(struct drm_printer *m,
 			  struct drm_i915_gem_request *rq,
 			  const char *prefix)

--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1180,7 +1180,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_context *ce = &request->ctx->engine[engine->id];
 	u32 *cs;
-	int ret;

 	GEM_BUG_ON(!ce->pin_count);

@@ -1194,14 +1193,6 @@ static int execlists_request_alloc(struct drm_i915_gem_request *request)
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);

-	if (!ce->initialised) {
-		ret = engine->init_context(request);
-		if (ret)
-			return ret;
-
-		ce->initialised = true;
-	}
-
 	/* Note that after this point, we have committed to using
 	 * this request as it is being used to both track the
 	 * state of engine initialisation and liveness of the
@@ -2133,7 +2124,6 @@ static void execlists_init_reg_state(u32 *regs,

 	CTX_REG(regs, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(engine),
 		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
-				   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
 				   (HAS_RESOURCE_STREAMER(dev_priv) ?
 				   CTX_CTRL_RS_CTX_ENABLE : 0)));
 	CTX_REG(regs, CTX_RING_HEAD, RING_HEAD(base), 0);
@@ -2210,6 +2200,7 @@ populate_lr_context(struct i915_gem_context *ctx,
 		    struct intel_ring *ring)
 {
 	void *vaddr;
+	u32 *regs;
 	int ret;

 	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
@@ -2226,11 +2217,31 @@ populate_lr_context(struct i915_gem_context *ctx,
 	}
 	ctx_obj->mm.dirty = true;

+	if (engine->default_state) {
+		/*
+		 * We only want to copy over the template context state;
+		 * skipping over the headers reserved for GuC communication,
+		 * leaving those as zero.
+		 */
+		const unsigned long start = LRC_HEADER_PAGES * PAGE_SIZE;
+		void *defaults;
+
+		defaults = i915_gem_object_pin_map(engine->default_state,
+						   I915_MAP_WB);
+		if (IS_ERR(defaults))
+			return PTR_ERR(defaults);
+
+		memcpy(vaddr + start, defaults + start, engine->context_size);
+		i915_gem_object_unpin_map(engine->default_state);
+	}
+
 	/* The second page of the context object contains some fields which must
 	 * be set up prior to the first execution. */
-
-	execlists_init_reg_state(vaddr + LRC_STATE_PN * PAGE_SIZE,
-				 ctx, engine, ring);
+	regs = vaddr + LRC_STATE_PN * PAGE_SIZE;
+	execlists_init_reg_state(regs, ctx, engine, ring);
+	if (!engine->default_state)
+		regs[CTX_CONTEXT_CONTROL + 1] |=
+			_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);

 	i915_gem_object_unpin_map(ctx_obj);

@@ -2283,7 +2294,6 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,

 	ce->ring = ring;
 	ce->state = vma;
-	ce->initialised |= engine->init_context == NULL;

 	return 0;


--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1384,11 +1384,34 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	struct drm_i915_private *i915 = engine->i915;
 	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
+	int err;

 	obj = i915_gem_object_create(i915, engine->context_size);
 	if (IS_ERR(obj))
 		return ERR_CAST(obj);

+	if (engine->default_state) {
+		void *defaults, *vaddr;
+
+		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_obj;
+		}
+
+		defaults = i915_gem_object_pin_map(engine->default_state,
+						   I915_MAP_WB);
+		if (IS_ERR(defaults)) {
+			err = PTR_ERR(defaults);
+			goto err_map;
+		}
+
+		memcpy(vaddr, defaults, engine->context_size);
+
+		i915_gem_object_unpin_map(engine->default_state);
+		i915_gem_object_unpin_map(obj);
+	}
+
 	/*
 	 * Try to make the context utilize L3 as well as LLC.
 	 *
@@ -1410,10 +1433,18 @@ alloc_context_vma(struct intel_engine_cs *engine)
 	}

 	vma = i915_vma_instance(obj, &i915->ggtt.base, NULL);
-	if (IS_ERR(vma))
-		i915_gem_object_put(obj);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err_obj;
+	}

 	return vma;
+
+err_map:
+	i915_gem_object_unpin_map(obj);
+err_obj:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
 }

 static struct intel_ring *
@@ -1449,16 +1480,6 @@ intel_ring_context_pin(struct intel_engine_cs *engine,
 		ce->state->obj->pin_global++;
 	}

-	/* The kernel context is only used as a placeholder for flushing the
-	 * active context. It is never used for submitting user rendering and
-	 * as such never requires the golden render context, and so we can skip
-	 * emitting it when we switch to the kernel context. This is required
-	 * as during eviction we cannot allocate and pin the renderstate in
-	 * order to initialise the context.
-	 */
-	if (i915_gem_context_is_kernel(ctx))
-		ce->initialised = true;
-
 	i915_gem_context_get(ctx);

 out:

--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -306,6 +306,7 @@ struct intel_engine_cs {
 	struct intel_ring *buffer;
 	struct intel_timeline *timeline;

+	struct drm_i915_gem_object *default_state;
 	struct intel_render_state *render_state;

 	atomic_t irq_count;
@@ -932,6 +933,7 @@ void intel_engines_park(struct drm_i915_private *i915);
 void intel_engines_unpark(struct drm_i915_private *i915);

 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);

 bool intel_engine_can_store_dword(struct intel_engine_cs *engine);


--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -466,6 +466,21 @@ typedef struct drm_i915_irq_wait {
 */
 #define I915_PARAM_HAS_EXEC_FENCE_ARRAY  49

+/*
+ * Query whether every context (both per-file default and user created) is
+ * isolated (insofar as HW supports). If this parameter is not true, then
+ * freshly created contexts may inherit values from an existing context,
+ * rather than default HW values. If true, it also ensures (insofar as HW
+ * supports) that all state set by this context will not leak to any other
+ * context.
+ *
+ * As not every engine across every gen support contexts, the returned
+ * value reports the support of context isolation for individual engines by
+ * returning a bitmask of each engine class set to true if that class supports
+ * isolation.
+ */
+#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*