diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 69190af87a42462ffe40598e10f55293c69e2bd3..84e2a231b03cb9cf4fc73621b235cfa92030111a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4143,6 +4143,10 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj, bound = vma ? vma->bound : 0; if (vma == NULL || !drm_mm_node_allocated(&vma->node)) { + /* In true PPGTT, bind has possibly changed PDEs, which + * means we must do a context switch before the GPU can + * accurately read some of the VMAs. + */ vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment, flags); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b6ea85dcf9e6967a907776f4b4d46e0bc80a908b..dd9ab36a039b9613959832c7588e15254e138da9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -573,8 +573,20 @@ static inline bool should_skip_switch(struct intel_engine_cs *ring, struct intel_context *from, struct intel_context *to) { - if (from == to && !to->remap_slice) - return true; + struct drm_i915_private *dev_priv = ring->dev->dev_private; + + if (to->remap_slice) + return false; + + if (to->ppgtt) { + if (from == to && !test_bit(ring->id, + &to->ppgtt->pd_dirty_rings)) + return true; + } else if (dev_priv->mm.aliasing_ppgtt) { + if (from == to && !test_bit(ring->id, + &dev_priv->mm.aliasing_ppgtt->pd_dirty_rings)) + return true; + } return false; } @@ -610,10 +622,7 @@ needs_pd_load_post(struct intel_engine_cs *ring, struct intel_context *to) if (ring != &dev_priv->ring[RCS]) return false; - if (!to->legacy_hw_ctx.initialized) - return true; - - if (i915_gem_context_is_default(to)) + if (to->ppgtt->pd_dirty_rings) return true; return false; @@ -664,6 +673,9 @@ static int do_switch(struct intel_engine_cs *ring, ret = to->ppgtt->switch_mm(to->ppgtt, ring); if (ret) goto unpin_out; + + /* Doing a PD load always reloads the page dirs */ + clear_bit(ring->id, &to->ppgtt->pd_dirty_rings); } if (ring != &dev_priv->ring[RCS]) { @@ -696,6 +708,8 @@ static int do_switch(struct intel_engine_cs *ring, if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) hw_flags |= MI_RESTORE_INHIBIT; + else if (to->ppgtt && test_and_clear_bit(ring->id, &to->ppgtt->pd_dirty_rings)) + hw_flags |= MI_FORCE_RESTORE; ret = mi_set_context(ring, to, hw_flags); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 9838d11e43ee622ec770edb2e3ec719da48fe5f9..43335de0f713efd89d5e7d029cf92dd97a8452b6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1251,6 +1251,13 @@ i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, if (ret) goto error; + if (ctx->ppgtt) + WARN(ctx->ppgtt->pd_dirty_rings & (1<id), + "%s didn't clear reload\n", ring->name); + else if (dev_priv->mm.aliasing_ppgtt) + WARN(dev_priv->mm.aliasing_ppgtt->pd_dirty_rings & + (1<id), "%s didn't clear reload\n", ring->name); + instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; instp_mask = I915_EXEC_CONSTANTS_MASK; switch (instp_mode) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7d206b59eb3c0d52c8e456f21d0b515dfc081a2e..645c3636c5712eacd19673385dbd42cd06e16f86 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -1162,6 +1162,16 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt) 4096, PCI_DMA_BIDIRECTIONAL); } +/* PDE TLBs are a pain invalidate pre GEN8. It requires a context reload. If we + * are switching between contexts with the same LRCA, we also must do a force + * restore. + */ +static inline void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) +{ + /* If current vm != vm, */ + ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; +} + static int gen6_alloc_va_range(struct i915_address_space *vm, uint64_t start, uint64_t length) { @@ -1181,6 +1191,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm, GEN6_PTES); } + mark_tlbs_dirty(ppgtt); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 7472312deff938cf20411d9827edd342fc8b747a..75e29f738a1beb98c2bdc7ab35aba42c0c470c87 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -300,6 +300,7 @@ struct i915_hw_ppgtt { struct i915_address_space base; struct kref ref; struct drm_mm_node node; + unsigned long pd_dirty_rings; unsigned num_pd_entries; unsigned num_pd_pages; /* gen8+ */ union {