提交 ca82580c 编写于 作者: T Tvrtko Ursulin

drm/i915: Do not call API requiring struct_mutex where it is not available

LRC code was calling GEM API like i915_gem_obj_ggtt_offset from
places where the struct_mutex cannot be grabbed (irq handlers).

To avoid that this patch caches some interesting bits and values
in the engine and context structures.

Some usages are also removed where they are not needed like a
few asserts which are either impossible or have been checked
already during engine initialization.

Side benefit is also that interrupt handlers and command
submission stop evaluating invariant conditionals, like what
Gen we are running on, on every interrupt and every command
submitted.

This patch deals with logical ring context id and descriptors
while subsequent patches will deal with the remaining issues.

v2:
 * Cache the VMA instead of the address. (Chris Wilson)
 * Incorporate Dave Gordon's good comments and function name.

v3:
 * Extract ctx descriptor template to a function and group
   functions dealing with ctx descriptor & co together near
   top of the file. (Dave Gordon)
Signed-off-by: NTvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: NChris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Gordon <david.s.gordon@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1452870629-13830-1-git-send-email-tvrtko.ursulin@linux.intel.com
上级 e0313db0
...@@ -1994,12 +1994,13 @@ static int i915_context_status(struct seq_file *m, void *unused) ...@@ -1994,12 +1994,13 @@ static int i915_context_status(struct seq_file *m, void *unused)
} }
static void i915_dump_lrc_obj(struct seq_file *m, static void i915_dump_lrc_obj(struct seq_file *m,
struct intel_engine_cs *ring, struct intel_context *ctx,
struct drm_i915_gem_object *ctx_obj) struct intel_engine_cs *ring)
{ {
struct page *page; struct page *page;
uint32_t *reg_state; uint32_t *reg_state;
int j; int j;
struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
unsigned long ggtt_offset = 0; unsigned long ggtt_offset = 0;
if (ctx_obj == NULL) { if (ctx_obj == NULL) {
...@@ -2009,7 +2010,7 @@ static void i915_dump_lrc_obj(struct seq_file *m, ...@@ -2009,7 +2010,7 @@ static void i915_dump_lrc_obj(struct seq_file *m,
} }
seq_printf(m, "CONTEXT: %s %u\n", ring->name, seq_printf(m, "CONTEXT: %s %u\n", ring->name,
intel_execlists_ctx_id(ctx_obj)); intel_execlists_ctx_id(ctx, ring));
if (!i915_gem_obj_ggtt_bound(ctx_obj)) if (!i915_gem_obj_ggtt_bound(ctx_obj))
seq_puts(m, "\tNot bound in GGTT\n"); seq_puts(m, "\tNot bound in GGTT\n");
...@@ -2058,8 +2059,7 @@ static int i915_dump_lrc(struct seq_file *m, void *unused) ...@@ -2058,8 +2059,7 @@ static int i915_dump_lrc(struct seq_file *m, void *unused)
list_for_each_entry(ctx, &dev_priv->context_list, link) { list_for_each_entry(ctx, &dev_priv->context_list, link) {
for_each_ring(ring, dev_priv, i) { for_each_ring(ring, dev_priv, i) {
if (ring->default_context != ctx) if (ring->default_context != ctx)
i915_dump_lrc_obj(m, ring, i915_dump_lrc_obj(m, ctx, ring);
ctx->engine[i].state);
} }
} }
...@@ -2133,11 +2133,8 @@ static int i915_execlists(struct seq_file *m, void *data) ...@@ -2133,11 +2133,8 @@ static int i915_execlists(struct seq_file *m, void *data)
seq_printf(m, "\t%d requests in queue\n", count); seq_printf(m, "\t%d requests in queue\n", count);
if (head_req) { if (head_req) {
struct drm_i915_gem_object *ctx_obj;
ctx_obj = head_req->ctx->engine[ring_id].state;
seq_printf(m, "\tHead request id: %u\n", seq_printf(m, "\tHead request id: %u\n",
intel_execlists_ctx_id(ctx_obj)); intel_execlists_ctx_id(head_req->ctx, ring));
seq_printf(m, "\tHead request tail: %u\n", seq_printf(m, "\tHead request tail: %u\n",
head_req->tail); head_req->tail);
} }
......
...@@ -888,6 +888,8 @@ struct intel_context { ...@@ -888,6 +888,8 @@ struct intel_context {
struct drm_i915_gem_object *state; struct drm_i915_gem_object *state;
struct intel_ringbuffer *ringbuf; struct intel_ringbuffer *ringbuf;
int pin_count; int pin_count;
struct i915_vma *lrc_vma;
u64 lrc_desc;
} engine[I915_NUM_RINGS]; } engine[I915_NUM_RINGS];
struct list_head link; struct list_head link;
......
...@@ -44,7 +44,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t; ...@@ -44,7 +44,6 @@ typedef uint64_t gen8_ppgtt_pml4e_t;
#define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT) #define gtt_total_entries(gtt) ((gtt).base.total >> PAGE_SHIFT)
/* gen6-hsw has bit 11-4 for physical addr bit 39-32 */ /* gen6-hsw has bit 11-4 for physical addr bit 39-32 */
#define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0))
#define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr)
......
...@@ -263,65 +263,92 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists ...@@ -263,65 +263,92 @@ int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists
return 0; return 0;
} }
static void
logical_ring_init_platform_invariants(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
ring->disable_lite_restore_wa = (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
(ring->id == VCS || ring->id == VCS2);
ring->ctx_desc_template = GEN8_CTX_VALID;
ring->ctx_desc_template |= GEN8_CTX_ADDRESSING_MODE(dev) <<
GEN8_CTX_ADDRESSING_MODE_SHIFT;
if (IS_GEN8(dev))
ring->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
ring->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
/* TODO: WaDisableLiteRestore when we start using semaphore
* signalling between Command Streamers */
/* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */
/* WaEnableForceRestoreInCtxtDescForVCS:skl */
/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
if (ring->disable_lite_restore_wa)
ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
}
/** /**
* intel_execlists_ctx_id() - get the Execlists Context ID * intel_lr_context_descriptor_update() - calculate & cache the descriptor
* @ctx_obj: Logical Ring Context backing object. * descriptor for a pinned context
* *
* Do not confuse with ctx->id! Unfortunately we have a name overload * @ctx: Context to work on
* here: the old context ID we pass to userspace as a handler so that * @ring: Engine the descriptor will be used with
* they can refer to a context, and the new context ID we pass to the
* ELSP so that the GPU can inform us of the context status via
* interrupts.
* *
* Return: 20-bits globally unique context ID. * The context descriptor encodes various attributes of a context,
* including its GTT address and some flags. Because it's fairly
* expensive to calculate, we'll just do it once and cache the result,
* which remains valid until the context is unpinned.
*
* This is what a descriptor looks like, from LSB to MSB:
* bits 0-11: flags, GEN8_CTX_* (cached in ctx_desc_template)
* bits 12-31: LRCA, GTT address of (the HWSP of) this context
* bits 32-51: ctx ID, a globally unique tag (the LRCA again!)
* bits 52-63: reserved, may encode the engine ID (for GuC)
*/ */
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj) static void
intel_lr_context_descriptor_update(struct intel_context *ctx,
struct intel_engine_cs *ring)
{ {
u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) + uint64_t lrca, desc;
LRC_PPHWSP_PN * PAGE_SIZE;
/* LRCA is required to be 4K aligned so the more significant 20 bits lrca = ctx->engine[ring->id].lrc_vma->node.start +
* are globally unique */ LRC_PPHWSP_PN * PAGE_SIZE;
return lrca >> 12;
}
static bool disable_lite_restore_wa(struct intel_engine_cs *ring) desc = ring->ctx_desc_template; /* bits 0-11 */
{ desc |= lrca; /* bits 12-31 */
struct drm_device *dev = ring->dev; desc |= (lrca >> PAGE_SHIFT) << GEN8_CTX_ID_SHIFT; /* bits 32-51 */
return (IS_SKL_REVID(dev, 0, SKL_REVID_B0) || ctx->engine[ring->id].lrc_desc = desc;
IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
(ring->id == VCS || ring->id == VCS2);
} }
uint64_t intel_lr_context_descriptor(struct intel_context *ctx, uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
struct intel_engine_cs *ring) struct intel_engine_cs *ring)
{ {
struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state; return ctx->engine[ring->id].lrc_desc;
uint64_t desc; }
uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
LRC_PPHWSP_PN * PAGE_SIZE;
WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
desc = GEN8_CTX_VALID;
desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
if (IS_GEN8(ctx_obj->base.dev))
desc |= GEN8_CTX_L3LLC_COHERENT;
desc |= GEN8_CTX_PRIVILEGE;
desc |= lrca;
desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
/* TODO: WaDisableLiteRestore when we start using semaphore
* signalling between Command Streamers */
/* desc |= GEN8_CTX_FORCE_RESTORE; */
/* WaEnableForceRestoreInCtxtDescForVCS:skl */
/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
if (disable_lite_restore_wa(ring))
desc |= GEN8_CTX_FORCE_RESTORE;
return desc; /**
* intel_execlists_ctx_id() - get the Execlists Context ID
* @ctx: Context to get the ID for
* @ring: Engine to get the ID for
*
* Do not confuse with ctx->id! Unfortunately we have a name overload
* here: the old context ID we pass to userspace as a handler so that
* they can refer to a context, and the new context ID we pass to the
* ELSP so that the GPU can inform us of the context status via
* interrupts.
*
* The context ID is a portion of the context descriptor, so we can
* just extract the required part from the cached descriptor.
*
* Return: 20-bits globally unique context ID.
*/
u32 intel_execlists_ctx_id(struct intel_context *ctx,
struct intel_engine_cs *ring)
{
return intel_lr_context_descriptor(ctx, ring) >> GEN8_CTX_ID_SHIFT;
} }
static void execlists_elsp_write(struct drm_i915_gem_request *rq0, static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
...@@ -369,8 +396,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq) ...@@ -369,8 +396,6 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
uint32_t *reg_state; uint32_t *reg_state;
BUG_ON(!ctx_obj); BUG_ON(!ctx_obj);
WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN); page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
reg_state = kmap_atomic(page); reg_state = kmap_atomic(page);
...@@ -477,9 +502,7 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring, ...@@ -477,9 +502,7 @@ static bool execlists_check_remove_request(struct intel_engine_cs *ring,
execlist_link); execlist_link);
if (head_req != NULL) { if (head_req != NULL) {
struct drm_i915_gem_object *ctx_obj = if (intel_execlists_ctx_id(head_req->ctx, ring) == request_id) {
head_req->ctx->engine[ring->id].state;
if (intel_execlists_ctx_id(ctx_obj) == request_id) {
WARN(head_req->elsp_submitted == 0, WARN(head_req->elsp_submitted == 0,
"Never submitted head request\n"); "Never submitted head request\n");
...@@ -556,7 +579,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring) ...@@ -556,7 +579,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
} }
} }
if (disable_lite_restore_wa(ring)) { if (ring->disable_lite_restore_wa) {
/* Prevent a ctx to preempt itself */ /* Prevent a ctx to preempt itself */
if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) && if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
(submit_contexts != 0)) (submit_contexts != 0))
...@@ -1039,14 +1062,16 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req) ...@@ -1039,14 +1062,16 @@ int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
} }
static int intel_lr_context_do_pin(struct intel_engine_cs *ring, static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
struct drm_i915_gem_object *ctx_obj, struct intel_context *ctx)
struct intel_ringbuffer *ringbuf)
{ {
struct drm_device *dev = ring->dev; struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_private *dev_priv = dev->dev_private;
int ret = 0; struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
int ret;
WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex)); WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
PIN_OFFSET_BIAS | GUC_WOPCM_TOP); PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
if (ret) if (ret)
...@@ -1056,6 +1081,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring, ...@@ -1056,6 +1081,8 @@ static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
if (ret) if (ret)
goto unpin_ctx_obj; goto unpin_ctx_obj;
ctx->engine[ring->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
intel_lr_context_descriptor_update(ctx, ring);
ctx_obj->dirty = true; ctx_obj->dirty = true;
/* Invalidate GuC TLB. */ /* Invalidate GuC TLB. */
...@@ -1074,11 +1101,9 @@ static int intel_lr_context_pin(struct drm_i915_gem_request *rq) ...@@ -1074,11 +1101,9 @@ static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
{ {
int ret = 0; int ret = 0;
struct intel_engine_cs *ring = rq->ring; struct intel_engine_cs *ring = rq->ring;
struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
struct intel_ringbuffer *ringbuf = rq->ringbuf;
if (rq->ctx->engine[ring->id].pin_count++ == 0) { if (rq->ctx->engine[ring->id].pin_count++ == 0) {
ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf); ret = intel_lr_context_do_pin(ring, rq->ctx);
if (ret) if (ret)
goto reset_pin_count; goto reset_pin_count;
} }
...@@ -1100,6 +1125,8 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq) ...@@ -1100,6 +1125,8 @@ void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
if (--rq->ctx->engine[ring->id].pin_count == 0) { if (--rq->ctx->engine[ring->id].pin_count == 0) {
intel_unpin_ringbuffer_obj(ringbuf); intel_unpin_ringbuffer_obj(ringbuf);
i915_gem_object_ggtt_unpin(ctx_obj); i915_gem_object_ggtt_unpin(ctx_obj);
rq->ctx->engine[ring->id].lrc_vma = NULL;
rq->ctx->engine[ring->id].lrc_desc = 0;
} }
} }
} }
...@@ -1939,6 +1966,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring) ...@@ -1939,6 +1966,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
ring->status_page.obj = NULL; ring->status_page.obj = NULL;
} }
ring->disable_lite_restore_wa = false;
ring->ctx_desc_template = 0;
lrc_destroy_wa_ctx_obj(ring); lrc_destroy_wa_ctx_obj(ring);
ring->dev = NULL; ring->dev = NULL;
} }
...@@ -1989,6 +2019,8 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) ...@@ -1989,6 +2019,8 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
INIT_LIST_HEAD(&ring->execlist_retired_req_list); INIT_LIST_HEAD(&ring->execlist_retired_req_list);
spin_lock_init(&ring->execlist_lock); spin_lock_init(&ring->execlist_lock);
logical_ring_init_platform_invariants(ring);
ret = i915_cmd_parser_init_ring(ring); ret = i915_cmd_parser_init_ring(ring);
if (ret) if (ret)
goto error; goto error;
...@@ -1998,10 +2030,7 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring) ...@@ -1998,10 +2030,7 @@ logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
goto error; goto error;
/* As this is the default context, always pin it */ /* As this is the default context, always pin it */
ret = intel_lr_context_do_pin( ret = intel_lr_context_do_pin(ring, ring->default_context);
ring,
ring->default_context->engine[ring->id].state,
ring->default_context->engine[ring->id].ringbuf);
if (ret) { if (ret) {
DRM_ERROR( DRM_ERROR(
"Failed to pin and map ringbuffer %s: %d\n", "Failed to pin and map ringbuffer %s: %d\n",
......
...@@ -107,13 +107,15 @@ void intel_lr_context_reset(struct drm_device *dev, ...@@ -107,13 +107,15 @@ void intel_lr_context_reset(struct drm_device *dev,
uint64_t intel_lr_context_descriptor(struct intel_context *ctx, uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
struct intel_engine_cs *ring); struct intel_engine_cs *ring);
u32 intel_execlists_ctx_id(struct intel_context *ctx,
struct intel_engine_cs *ring);
/* Execlists */ /* Execlists */
int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists); int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
struct i915_execbuffer_params; struct i915_execbuffer_params;
int intel_execlists_submission(struct i915_execbuffer_params *params, int intel_execlists_submission(struct i915_execbuffer_params *params,
struct drm_i915_gem_execbuffer2 *args, struct drm_i915_gem_execbuffer2 *args,
struct list_head *vmas); struct list_head *vmas);
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
void intel_lrc_irq_handler(struct intel_engine_cs *ring); void intel_lrc_irq_handler(struct intel_engine_cs *ring);
void intel_execlists_retire_requests(struct intel_engine_cs *ring); void intel_execlists_retire_requests(struct intel_engine_cs *ring);
......
...@@ -269,6 +269,8 @@ struct intel_engine_cs { ...@@ -269,6 +269,8 @@ struct intel_engine_cs {
struct list_head execlist_queue; struct list_head execlist_queue;
struct list_head execlist_retired_req_list; struct list_head execlist_retired_req_list;
u8 next_context_status_buffer; u8 next_context_status_buffer;
bool disable_lite_restore_wa;
u32 ctx_desc_template;
u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */ u32 irq_keep_mask; /* bitmask for interrupts that should not be masked */
int (*emit_request)(struct drm_i915_gem_request *request); int (*emit_request)(struct drm_i915_gem_request *request);
int (*emit_flush)(struct drm_i915_gem_request *request, int (*emit_flush)(struct drm_i915_gem_request *request,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册