提交 8b46ed57 编写于 作者: J Joonas Lahtinen

Merge tag 'gvt-next-2020-04-22' of https://github.com/intel/gvt-linux into drm-intel-next-queued

gvt-next-2020-04-22

- remove non-upstream xen support bits (Christoph)
- guest context shadow copy optimization (Yan)
- guest context tracking for shadow skip optimization (Yan)
Signed-off-by: NJoonas Lahtinen <joonas.lahtinen@linux.intel.com>
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200422051230.GH11247@zhen-hp.sh.intel.com
...@@ -2341,12 +2341,27 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, ...@@ -2341,12 +2341,27 @@ int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
{ {
const struct intel_gvt_device_info *info = &vgpu->gvt->device_info; const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
int ret; int ret;
struct intel_vgpu_submission *s = &vgpu->submission;
struct intel_engine_cs *engine;
int i;
if (bytes != 4 && bytes != 8) if (bytes != 4 && bytes != 8)
return -EINVAL; return -EINVAL;
off -= info->gtt_start_offset; off -= info->gtt_start_offset;
ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes); ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
/* if ggtt of last submitted context is written,
* that context is probably got unpinned.
* Set last shadowed ctx to invalid.
*/
for_each_engine(engine, vgpu->gvt->gt, i) {
if (!s->last_ctx[i].valid)
continue;
if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
s->last_ctx[i].valid = false;
}
return ret; return ret;
} }
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
*/ */
#include <linux/types.h> #include <linux/types.h>
#include <xen/xen.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include "i915_drv.h" #include "i915_drv.h"
......
...@@ -163,6 +163,11 @@ struct intel_vgpu_submission { ...@@ -163,6 +163,11 @@ struct intel_vgpu_submission {
const struct intel_vgpu_submission_ops *ops; const struct intel_vgpu_submission_ops *ops;
int virtual_submission_interface; int virtual_submission_interface;
bool active; bool active;
struct {
u32 lrca;
bool valid;
u64 ring_context_gpa;
} last_ctx[I915_NUM_ENGINES];
}; };
struct intel_vgpu { struct intel_vgpu {
......
...@@ -79,6 +79,4 @@ struct intel_gvt_mpt { ...@@ -79,6 +79,4 @@ struct intel_gvt_mpt {
bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn); bool (*is_valid_gfn)(unsigned long handle, unsigned long gfn);
}; };
extern struct intel_gvt_mpt xengt_mpt;
#endif /* _GVT_HYPERCALL_H_ */ #endif /* _GVT_HYPERCALL_H_ */
...@@ -128,16 +128,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) ...@@ -128,16 +128,24 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
{ {
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
struct intel_gvt *gvt = vgpu->gvt; struct intel_gvt *gvt = vgpu->gvt;
struct drm_i915_gem_object *ctx_obj = struct intel_context *ctx = workload->req->context;
workload->req->context->state->obj;
struct execlist_ring_context *shadow_ring_context; struct execlist_ring_context *shadow_ring_context;
struct page *page;
void *dst; void *dst;
void *context_base;
unsigned long context_gpa, context_page_num; unsigned long context_gpa, context_page_num;
unsigned long gpa_base; /* first gpa of consecutive GPAs */
unsigned long gpa_size; /* size of consecutive GPAs */
struct intel_vgpu_submission *s = &vgpu->submission;
int i; int i;
bool skip = false;
int ring_id = workload->engine->id;
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); GEM_BUG_ON(!intel_context_is_pinned(ctx));
shadow_ring_context = kmap(page);
context_base = (void *) ctx->lrc_reg_state -
(LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
shadow_ring_context = (void *) ctx->lrc_reg_state;
sr_oa_regs(workload, (u32 *)shadow_ring_context, true); sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
#define COPY_REG(name) \ #define COPY_REG(name) \
...@@ -169,23 +177,43 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) ...@@ -169,23 +177,43 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
sr_oa_regs(workload, (u32 *)shadow_ring_context, false); sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
kunmap(page);
if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val)) gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx",
return 0; workload->engine->name, workload->ctx_desc.lrca,
workload->ctx_desc.context_id,
workload->ring_context_gpa);
gvt_dbg_sched("ring %s workload lrca %x", /* only need to ensure this context is not pinned/unpinned during the
workload->engine->name, * period from last submission to this this submission.
workload->ctx_desc.lrca); * Upon reaching this function, the currently submitted context is not
* supposed to get unpinned. If a misbehaving guest driver ever does
* this, it would corrupt itself.
*/
if (s->last_ctx[ring_id].valid &&
(s->last_ctx[ring_id].lrca ==
workload->ctx_desc.lrca) &&
(s->last_ctx[ring_id].ring_context_gpa ==
workload->ring_context_gpa))
skip = true;
s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca;
s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa;
if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip)
return 0;
s->last_ctx[ring_id].valid = false;
context_page_num = workload->engine->context_size; context_page_num = workload->engine->context_size;
context_page_num = context_page_num >> PAGE_SHIFT; context_page_num = context_page_num >> PAGE_SHIFT;
if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0) if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0)
context_page_num = 19; context_page_num = 19;
i = 2; /* find consecutive GPAs from gma until the first inconsecutive GPA.
while (i < context_page_num) { * read from the continuous GPAs into dst virtual address
*/
gpa_size = 0;
for (i = 2; i < context_page_num; i++) {
context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
(u32)((workload->ctx_desc.lrca + i) << (u32)((workload->ctx_desc.lrca + i) <<
I915_GTT_PAGE_SHIFT)); I915_GTT_PAGE_SHIFT));
...@@ -194,13 +222,26 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) ...@@ -194,13 +222,26 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
return -EFAULT; return -EFAULT;
} }
page = i915_gem_object_get_page(ctx_obj, i); if (gpa_size == 0) {
dst = kmap(page); gpa_base = context_gpa;
intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst, dst = context_base + (i << I915_GTT_PAGE_SHIFT);
I915_GTT_PAGE_SIZE); } else if (context_gpa != gpa_base + gpa_size)
kunmap(page); goto read;
i++;
gpa_size += I915_GTT_PAGE_SIZE;
if (i == context_page_num - 1)
goto read;
continue;
read:
intel_gvt_hypervisor_read_gpa(vgpu, gpa_base, dst, gpa_size);
gpa_base = context_gpa;
gpa_size = I915_GTT_PAGE_SIZE;
dst = context_base + (i << I915_GTT_PAGE_SHIFT);
} }
s->last_ctx[ring_id].valid = true;
return 0; return 0;
} }
...@@ -783,11 +824,13 @@ static void update_guest_context(struct intel_vgpu_workload *workload) ...@@ -783,11 +824,13 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
{ {
struct i915_request *rq = workload->req; struct i915_request *rq = workload->req;
struct intel_vgpu *vgpu = workload->vgpu; struct intel_vgpu *vgpu = workload->vgpu;
struct drm_i915_gem_object *ctx_obj = rq->context->state->obj;
struct execlist_ring_context *shadow_ring_context; struct execlist_ring_context *shadow_ring_context;
struct page *page; struct intel_context *ctx = workload->req->context;
void *context_base;
void *src; void *src;
unsigned long context_gpa, context_page_num; unsigned long context_gpa, context_page_num;
unsigned long gpa_base; /* first gpa of consecutive GPAs */
unsigned long gpa_size; /* size of consecutive GPAs*/
int i; int i;
u32 ring_base; u32 ring_base;
u32 head, tail; u32 head, tail;
...@@ -796,6 +839,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload) ...@@ -796,6 +839,8 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id, gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
workload->ctx_desc.lrca); workload->ctx_desc.lrca);
GEM_BUG_ON(!intel_context_is_pinned(ctx));
head = workload->rb_head; head = workload->rb_head;
tail = workload->rb_tail; tail = workload->rb_tail;
wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF; wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF;
...@@ -819,9 +864,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload) ...@@ -819,9 +864,14 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0) if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0)
context_page_num = 19; context_page_num = 19;
i = 2; context_base = (void *) ctx->lrc_reg_state -
(LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
while (i < context_page_num) { /* find consecutive GPAs from gma until the first inconsecutive GPA.
* write to the consecutive GPAs from src virtual address
*/
gpa_size = 0;
for (i = 2; i < context_page_num; i++) {
context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
(u32)((workload->ctx_desc.lrca + i) << (u32)((workload->ctx_desc.lrca + i) <<
I915_GTT_PAGE_SHIFT)); I915_GTT_PAGE_SHIFT));
...@@ -830,19 +880,30 @@ static void update_guest_context(struct intel_vgpu_workload *workload) ...@@ -830,19 +880,30 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
return; return;
} }
page = i915_gem_object_get_page(ctx_obj, i); if (gpa_size == 0) {
src = kmap(page); gpa_base = context_gpa;
intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src, src = context_base + (i << I915_GTT_PAGE_SHIFT);
I915_GTT_PAGE_SIZE); } else if (context_gpa != gpa_base + gpa_size)
kunmap(page); goto write;
i++;
gpa_size += I915_GTT_PAGE_SIZE;
if (i == context_page_num - 1)
goto write;
continue;
write:
intel_gvt_hypervisor_write_gpa(vgpu, gpa_base, src, gpa_size);
gpa_base = context_gpa;
gpa_size = I915_GTT_PAGE_SIZE;
src = context_base + (i << I915_GTT_PAGE_SHIFT);
} }
intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4); RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); shadow_ring_context = (void *) ctx->lrc_reg_state;
shadow_ring_context = kmap(page);
#define COPY_REG(name) \ #define COPY_REG(name) \
intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \ intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
...@@ -859,8 +920,6 @@ static void update_guest_context(struct intel_vgpu_workload *workload) ...@@ -859,8 +920,6 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
(void *)shadow_ring_context + (void *)shadow_ring_context +
sizeof(*shadow_ring_context), sizeof(*shadow_ring_context),
I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context)); I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
kunmap(page);
} }
void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu, void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
...@@ -1259,6 +1318,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) ...@@ -1259,6 +1318,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
atomic_set(&s->running_workload_num, 0); atomic_set(&s->running_workload_num, 0);
bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES); bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
memset(s->last_ctx, 0, sizeof(s->last_ctx));
i915_vm_put(&ppgtt->vm); i915_vm_put(&ppgtt->vm);
return 0; return 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册