提交 2b2fc72a 编写于 作者: D Dave Airlie

Merge tag 'drm-intel-testing-2017-04-03' of...

Merge tag 'drm-intel-testing-2017-04-03' of git://anongit.freedesktop.org/git/drm-intel into drm-next

Last 4.12 feature pile:

GVT updates:
- Add mdev attribute group for per-vgpu info
- Time slice based vGPU scheduling QoS support (Gao Ping)
- Initial KBL support for E3 server (Han Xu)
- other misc.

i915:
- lots and lots of small fixes and improvements all over
- refactor fw_domain code (Chris Wilson)
- improve guc code (Oscar Mateo)
- refactor cursor/sprite code, precompute more for less overhead in
  the critical path (Ville)
- refactor guc/huc fw loading code a bit (Michal Wajdeczko)

* tag 'drm-intel-testing-2017-04-03' of git://anongit.freedesktop.org/git/drm-intel: (121 commits)
  drm/i915: Update DRIVER_DATE to 20170403
  drm/i915: Clear gt.active_requests before checking idle status
  drm/i915/uc: Drop use of MISSING_CASE on trivial enums
  drm/i915: make a few DDI functions static
  drm/i915: Combine reset_all_global_seqno() loops into one
  drm/i915: Remove redudant wait for each engine to idle from seqno wrap
  drm/i915: Wait for all engines to be idle as part of i915_gem_wait_for_idle()
  drm/i915: Move retire-requests into i915_gem_wait_for_idle()
  drm/i915/uc: Move fw path check to fetch_uc_fw()
  drm/i915/huc: Remove unused intel_huc_fini()
  drm/i915/uc: Add intel_uc_fw_fini()
  drm/i915/uc: Add intel_uc_fw_type_repr()
  drm/i915/uc: Move intel_uc_fw_status_repr() to intel_uc.h
  drivers: gpu: drm: i915L intel_lpe_audio: Fix kerneldoc comments
  drm/i915: Suppress busy status for engines if wedged
  drm/i915: Do request retirement before marking engines as wedged
  drm/i915: Drop verbose and archaic "ring" from our internal engine names
  drm/i915: Use a dummy timeline name for a signaled fence
  drm/i915: Ironlake do_idle_maps w/a may be called w/o struct_mutex
  drm/i915/guc: Take enable_guc_loading check out of GEM core code
  ...
......@@ -1215,7 +1215,7 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s,
if (!info->async_flip)
return 0;
if (IS_SKYLAKE(dev_priv)) {
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0);
tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) &
GENMASK(12, 10)) >> 10;
......@@ -1243,7 +1243,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip(
set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12),
info->surf_val << 12);
if (IS_SKYLAKE(dev_priv)) {
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0),
info->stride_val);
set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10),
......@@ -1267,7 +1267,7 @@ static int decode_mi_display_flip(struct parser_exec_state *s,
if (IS_BROADWELL(dev_priv))
return gen8_decode_mi_display_flip(s, info);
if (IS_SKYLAKE(dev_priv))
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
return skl_decode_mi_display_flip(s, info);
return -ENODEV;
......@@ -1278,7 +1278,9 @@ static int check_mi_display_flip(struct parser_exec_state *s,
{
struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
if (IS_BROADWELL(dev_priv)
|| IS_SKYLAKE(dev_priv)
|| IS_KABYLAKE(dev_priv))
return gen8_check_mi_display_flip(s, info);
return -ENODEV;
}
......@@ -1289,7 +1291,9 @@ static int update_plane_mmio_from_mi_display_flip(
{
struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
if (IS_BROADWELL(dev_priv)
|| IS_SKYLAKE(dev_priv)
|| IS_KABYLAKE(dev_priv))
return gen8_update_plane_mmio_from_mi_display_flip(s, info);
return -ENODEV;
}
......@@ -1569,7 +1573,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
{
struct intel_gvt *gvt = s->vgpu->gvt;
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
|| IS_KABYLAKE(gvt->dev_priv)) {
/* BDW decides privilege based on address space */
if (cmd_val(s, 0) & (1 << 8))
return 0;
......@@ -2604,6 +2609,9 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
unsigned long gma_head, gma_tail, gma_bottom, ring_size, ring_tail;
struct parser_exec_state s;
int ret = 0;
struct intel_vgpu_workload *workload = container_of(wa_ctx,
struct intel_vgpu_workload,
wa_ctx);
/* ring base is page aligned */
if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE)))
......@@ -2618,14 +2626,14 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
s.buf_type = RING_BUFFER_INSTRUCTION;
s.buf_addr_type = GTT_BUFFER;
s.vgpu = wa_ctx->workload->vgpu;
s.ring_id = wa_ctx->workload->ring_id;
s.vgpu = workload->vgpu;
s.ring_id = workload->ring_id;
s.ring_start = wa_ctx->indirect_ctx.guest_gma;
s.ring_size = ring_size;
s.ring_head = gma_head;
s.ring_tail = gma_tail;
s.rb_va = wa_ctx->indirect_ctx.shadow_va;
s.workload = wa_ctx->workload;
s.workload = workload;
ret = ip_gma_set(&s, gma_head);
if (ret)
......@@ -2708,12 +2716,15 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{
int ctx_size = wa_ctx->indirect_ctx.size;
unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma;
struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
struct intel_vgpu_workload *workload = container_of(wa_ctx,
struct intel_vgpu_workload,
wa_ctx);
struct intel_vgpu *vgpu = workload->vgpu;
struct drm_i915_gem_object *obj;
int ret = 0;
void *map;
obj = i915_gem_object_create(wa_ctx->workload->vgpu->gvt->dev_priv,
obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
roundup(ctx_size + CACHELINE_BYTES,
PAGE_SIZE));
if (IS_ERR(obj))
......@@ -2733,8 +2744,8 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
goto unmap_src;
}
ret = copy_gma_to_hva(wa_ctx->workload->vgpu,
wa_ctx->workload->vgpu->gtt.ggtt_mm,
ret = copy_gma_to_hva(workload->vgpu,
workload->vgpu->gtt.ggtt_mm,
guest_gma, guest_gma + ctx_size,
map);
if (ret < 0) {
......@@ -2772,7 +2783,10 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{
int ret;
struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
struct intel_vgpu_workload *workload = container_of(wa_ctx,
struct intel_vgpu_workload,
wa_ctx);
struct intel_vgpu *vgpu = workload->vgpu;
if (wa_ctx->indirect_ctx.size == 0)
return 0;
......
......@@ -161,8 +161,9 @@ static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = {
#define DPCD_HEADER_SIZE 0xb
/* let the virtual display supports DP1.2 */
static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = {
0x11, 0x0a, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};
static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
......@@ -172,9 +173,20 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
SDE_PORTC_HOTPLUG_CPT |
SDE_PORTD_HOTPLUG_CPT);
if (IS_SKYLAKE(dev_priv))
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
SDE_PORTE_HOTPLUG_SPT);
vgpu_vreg(vgpu, SKL_FUSE_STATUS) |=
SKL_FUSE_DOWNLOAD_STATUS |
SKL_FUSE_PG0_DIST_STATUS |
SKL_FUSE_PG1_DIST_STATUS |
SKL_FUSE_PG2_DIST_STATUS;
vgpu_vreg(vgpu, LCPLL1_CTL) |=
LCPLL_PLL_ENABLE |
LCPLL_PLL_LOCK;
vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
}
if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
......@@ -191,7 +203,7 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
}
if (IS_SKYLAKE(dev_priv) &&
if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT;
}
......@@ -353,7 +365,7 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu)
{
struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
if (IS_SKYLAKE(dev_priv))
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
clean_virtual_dp_monitor(vgpu, PORT_D);
else
clean_virtual_dp_monitor(vgpu, PORT_B);
......@@ -375,7 +387,7 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution)
intel_vgpu_init_i2c_edid(vgpu);
if (IS_SKYLAKE(dev_priv))
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D,
resolution);
else
......
......@@ -394,9 +394,11 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{
int ring_id = wa_ctx->workload->ring_id;
struct i915_gem_context *shadow_ctx =
wa_ctx->workload->vgpu->shadow_ctx;
struct intel_vgpu_workload *workload = container_of(wa_ctx,
struct intel_vgpu_workload,
wa_ctx);
int ring_id = workload->ring_id;
struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
struct drm_i915_gem_object *ctx_obj =
shadow_ctx->engine[ring_id].state->obj;
struct execlist_ring_context *shadow_ring_context;
......@@ -680,7 +682,6 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
CACHELINE_BYTES;
workload->wa_ctx.per_ctx.guest_gma =
per_ctx & PER_CTX_ADDR_MASK;
workload->wa_ctx.workload = workload;
WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1));
}
......
......@@ -2220,7 +2220,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
gvt_dbg_core("init gtt\n");
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
|| IS_KABYLAKE(gvt->dev_priv)) {
gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table;
......
......@@ -106,7 +106,8 @@ static void init_device_info(struct intel_gvt *gvt)
struct intel_gvt_device_info *info = &gvt->device_info;
struct pci_dev *pdev = gvt->dev_priv->drm.pdev;
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
|| IS_KABYLAKE(gvt->dev_priv)) {
info->max_support_vgpus = 8;
info->cfg_space_size = 256;
info->mmio_size = 2 * 1024 * 1024;
......@@ -143,6 +144,11 @@ static int gvt_service_thread(void *data)
intel_gvt_emulate_vblank(gvt);
mutex_unlock(&gvt->lock);
}
if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
(void *)&gvt->service_request)) {
intel_gvt_schedule(gvt);
}
}
return 0;
......@@ -196,6 +202,8 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv)
idr_destroy(&gvt->vgpu_idr);
intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
kfree(dev_priv->gvt);
dev_priv->gvt = NULL;
}
......@@ -214,6 +222,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv)
int intel_gvt_init_device(struct drm_i915_private *dev_priv)
{
struct intel_gvt *gvt;
struct intel_vgpu *vgpu;
int ret;
/*
......@@ -286,6 +295,14 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
goto out_clean_types;
}
vgpu = intel_gvt_create_idle_vgpu(gvt);
if (IS_ERR(vgpu)) {
ret = PTR_ERR(vgpu);
gvt_err("failed to create idle vgpu\n");
goto out_clean_types;
}
gvt->idle_vgpu = vgpu;
gvt_dbg_core("gvt device initialization is done\n");
dev_priv->gvt = gvt;
return 0;
......
......@@ -138,6 +138,10 @@ struct intel_vgpu_display {
struct intel_vgpu_sbi sbi;
};
struct vgpu_sched_ctl {
int weight;
};
struct intel_vgpu {
struct intel_gvt *gvt;
int id;
......@@ -147,6 +151,7 @@ struct intel_vgpu {
bool failsafe;
bool resetting;
void *sched_data;
struct vgpu_sched_ctl sched_ctl;
struct intel_vgpu_fence fence;
struct intel_vgpu_gm gm;
......@@ -160,6 +165,7 @@ struct intel_vgpu {
struct list_head workload_q_head[I915_NUM_ENGINES];
struct kmem_cache *workloads;
atomic_t running_workload_num;
ktime_t last_ctx_submit_time;
DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
struct i915_gem_context *shadow_ctx;
......@@ -215,6 +221,7 @@ struct intel_vgpu_type {
unsigned int low_gm_size;
unsigned int high_gm_size;
unsigned int fence;
unsigned int weight;
enum intel_vgpu_edid resolution;
};
......@@ -236,6 +243,7 @@ struct intel_gvt {
DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
struct intel_vgpu_type *types;
unsigned int num_types;
struct intel_vgpu *idle_vgpu;
struct task_struct *service_thread;
wait_queue_head_t service_thread_wq;
......@@ -249,6 +257,7 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915)
enum {
INTEL_GVT_REQUEST_EMULATE_VBLANK = 0,
INTEL_GVT_REQUEST_SCHED = 1,
};
static inline void intel_gvt_request_service(struct intel_gvt *gvt,
......@@ -322,6 +331,8 @@ struct intel_vgpu_creation_params {
__u64 resolution;
__s32 primary;
__u64 vgpu_id;
__u32 weight;
};
int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu,
......@@ -376,6 +387,8 @@ static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu,
int intel_gvt_init_vgpu_types(struct intel_gvt *gvt);
void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt);
struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt);
void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu);
struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
struct intel_vgpu_type *type);
void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
......
此差异已折叠。
......@@ -580,7 +580,7 @@ static void gen8_init_irq(
SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
} else if (IS_SKYLAKE(gvt->dev_priv)) {
} else if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) {
SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT);
SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT);
SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT);
......@@ -690,7 +690,8 @@ int intel_gvt_init_irq(struct intel_gvt *gvt)
gvt_dbg_core("init irq framework\n");
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
|| IS_KABYLAKE(gvt->dev_priv)) {
irq->ops = &gen8_irq_ops;
irq->irq_map = gen8_irq_map;
} else {
......
......@@ -295,10 +295,12 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev,
return 0;
return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
"fence: %d\nresolution: %s\n",
"fence: %d\nresolution: %s\n"
"weight: %d\n",
BYTES_TO_MB(type->low_gm_size),
BYTES_TO_MB(type->high_gm_size),
type->fence, vgpu_edid_str(type->resolution));
type->fence, vgpu_edid_str(type->resolution),
type->weight);
}
static MDEV_TYPE_ATTR_RO(available_instances);
......@@ -1146,8 +1148,40 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
return 0;
}
static ssize_t
vgpu_id_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct mdev_device *mdev = mdev_from_dev(dev);
if (mdev) {
struct intel_vgpu *vgpu = (struct intel_vgpu *)
mdev_get_drvdata(mdev);
return sprintf(buf, "%d\n", vgpu->id);
}
return sprintf(buf, "\n");
}
static DEVICE_ATTR_RO(vgpu_id);
static struct attribute *intel_vgpu_attrs[] = {
&dev_attr_vgpu_id.attr,
NULL
};
static const struct attribute_group intel_vgpu_group = {
.name = "intel_vgpu",
.attrs = intel_vgpu_attrs,
};
static const struct attribute_group *intel_vgpu_groups[] = {
&intel_vgpu_group,
NULL,
};
static const struct mdev_parent_ops intel_vgpu_ops = {
.supported_type_groups = intel_vgpu_type_groups,
.mdev_attr_groups = intel_vgpu_groups,
.create = intel_vgpu_create,
.remove = intel_vgpu_remove,
......@@ -1339,13 +1373,6 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
{
struct intel_vgpu *vgpu = info->vgpu;
if (!info) {
gvt_vgpu_err("kvmgt_guest_info invalid\n");
return false;
}
kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
kvmgt_protect_table_destroy(info);
gvt_cache_destroy(info->vgpu);
......
......@@ -44,20 +44,21 @@ struct intel_vgpu;
#define D_HSW (1 << 2)
#define D_BDW (1 << 3)
#define D_SKL (1 << 4)
#define D_KBL (1 << 5)
#define D_GEN9PLUS (D_SKL)
#define D_GEN8PLUS (D_BDW | D_SKL)
#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL)
#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL)
#define D_GEN9PLUS (D_SKL | D_KBL)
#define D_GEN8PLUS (D_BDW | D_SKL | D_KBL)
#define D_GEN75PLUS (D_HSW | D_BDW | D_SKL | D_KBL)
#define D_GEN7PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
#define D_SKL_PLUS (D_SKL)
#define D_BDW_PLUS (D_BDW | D_SKL)
#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL)
#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL)
#define D_SKL_PLUS (D_SKL | D_KBL)
#define D_BDW_PLUS (D_BDW | D_SKL | D_KBL)
#define D_HSW_PLUS (D_HSW | D_BDW | D_SKL | D_KBL)
#define D_IVB_PLUS (D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
#define D_PRE_BDW (D_SNB | D_IVB | D_HSW)
#define D_PRE_SKL (D_SNB | D_IVB | D_HSW | D_BDW)
#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL)
#define D_ALL (D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
struct intel_gvt_mmio_info {
u32 offset;
......
......@@ -126,6 +126,18 @@ static struct render_mmio gen9_render_mmio_list[] = {
{VCS2, _MMIO(0x1c028), 0xffff, false},
{VECS, _MMIO(0x1a028), 0xffff, false},
{RCS, _MMIO(0x7304), 0xffff, true},
{RCS, _MMIO(0x2248), 0x0, false},
{RCS, _MMIO(0x940c), 0x0, false},
{RCS, _MMIO(0x4ab8), 0x0, false},
{RCS, _MMIO(0x4ab0), 0x0, false},
{RCS, _MMIO(0x20d4), 0x0, false},
{RCS, _MMIO(0xb004), 0x0, false},
{RCS, _MMIO(0x20a0), 0x0, false},
{RCS, _MMIO(0x20e4), 0xffff, false},
};
static u32 gen9_render_mocs[I915_NUM_ENGINES][64];
......@@ -159,7 +171,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
*/
fw = intel_uncore_forcewake_for_reg(dev_priv, reg,
FW_REG_READ | FW_REG_WRITE);
if (ring_id == RCS && IS_SKYLAKE(dev_priv))
if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
fw |= FORCEWAKE_RENDER;
intel_uncore_forcewake_get(dev_priv, fw);
......@@ -192,7 +204,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return;
if (!IS_SKYLAKE(dev_priv))
if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
return;
offset.reg = regs[ring_id];
......@@ -230,7 +242,7 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
return;
if (!IS_SKYLAKE(dev_priv))
if (!(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
return;
offset.reg = regs[ring_id];
......@@ -265,7 +277,8 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
u32 inhibit_mask =
_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
if (IS_SKYLAKE(vgpu->gvt->dev_priv)
|| IS_KABYLAKE(vgpu->gvt->dev_priv)) {
mmio = gen9_render_mmio_list;
array_size = ARRAY_SIZE(gen9_render_mmio_list);
load_mocs(vgpu, ring_id);
......@@ -312,7 +325,7 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
u32 v;
int i, array_size;
if (IS_SKYLAKE(dev_priv)) {
if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
mmio = gen9_render_mmio_list;
array_size = ARRAY_SIZE(gen9_render_mmio_list);
restore_mocs(vgpu, ring_id);
......
......@@ -47,11 +47,87 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
return false;
}
struct vgpu_sched_data {
struct list_head lru_list;
struct intel_vgpu *vgpu;
ktime_t sched_in_time;
ktime_t sched_out_time;
ktime_t sched_time;
ktime_t left_ts;
ktime_t allocated_ts;
struct vgpu_sched_ctl sched_ctl;
};
struct gvt_sched_data {
struct intel_gvt *gvt;
struct hrtimer timer;
unsigned long period;
struct list_head lru_runq_head;
};
static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu)
{
ktime_t delta_ts;
struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data;
delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time;
vgpu_data->sched_time += delta_ts;
vgpu_data->left_ts -= delta_ts;
}
#define GVT_TS_BALANCE_PERIOD_MS 100
#define GVT_TS_BALANCE_STAGE_NUM 10
static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
{
struct vgpu_sched_data *vgpu_data;
struct list_head *pos;
static uint64_t stage_check;
int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM;
/* The timeslice accumulation reset at stage 0, which is
* allocated again without adding previous debt.
*/
if (stage == 0) {
int total_weight = 0;
ktime_t fair_timeslice;
list_for_each(pos, &sched_data->lru_runq_head) {
vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
total_weight += vgpu_data->sched_ctl.weight;
}
list_for_each(pos, &sched_data->lru_runq_head) {
vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) *
vgpu_data->sched_ctl.weight /
total_weight;
vgpu_data->allocated_ts = fair_timeslice;
vgpu_data->left_ts = vgpu_data->allocated_ts;
}
} else {
list_for_each(pos, &sched_data->lru_runq_head) {
vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
/* timeslice for next 100ms should add the left/debt
* slice of previous stages.
*/
vgpu_data->left_ts += vgpu_data->allocated_ts;
}
}
}
static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
{
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
enum intel_engine_id i;
struct intel_engine_cs *engine;
struct vgpu_sched_data *vgpu_data;
ktime_t cur_time;
/* no target to schedule */
if (!scheduler->next_vgpu)
......@@ -77,6 +153,15 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
gvt_dbg_sched("switch to next vgpu %d\n",
scheduler->next_vgpu->id);
cur_time = ktime_get();
if (scheduler->current_vgpu) {
vgpu_data = scheduler->current_vgpu->sched_data;
vgpu_data->sched_out_time = cur_time;
vgpu_update_timeslice(scheduler->current_vgpu);
}
vgpu_data = scheduler->next_vgpu->sched_data;
vgpu_data->sched_in_time = cur_time;
/* switch current vgpu */
scheduler->current_vgpu = scheduler->next_vgpu;
scheduler->next_vgpu = NULL;
......@@ -88,62 +173,61 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
wake_up(&scheduler->waitq[i]);
}
struct tbs_vgpu_data {
struct list_head list;
struct intel_vgpu *vgpu;
/* put some per-vgpu sched stats here */
};
struct tbs_sched_data {
struct intel_gvt *gvt;
struct delayed_work work;
unsigned long period;
struct list_head runq_head;
};
#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1))
static void tbs_sched_func(struct work_struct *work)
static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
{
struct tbs_sched_data *sched_data = container_of(work,
struct tbs_sched_data, work.work);
struct tbs_vgpu_data *vgpu_data;
struct intel_gvt *gvt = sched_data->gvt;
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
struct vgpu_sched_data *vgpu_data;
struct intel_vgpu *vgpu = NULL;
struct list_head *pos, *head;
mutex_lock(&gvt->lock);
/* no vgpu or has already had a target */
if (list_empty(&sched_data->runq_head) || scheduler->next_vgpu)
goto out;
if (scheduler->current_vgpu) {
vgpu_data = scheduler->current_vgpu->sched_data;
head = &vgpu_data->list;
} else {
head = &sched_data->runq_head;
}
struct list_head *head = &sched_data->lru_runq_head;
struct list_head *pos;
/* search a vgpu with pending workload */
list_for_each(pos, head) {
if (pos == &sched_data->runq_head)
continue;
vgpu_data = container_of(pos, struct tbs_vgpu_data, list);
vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
if (!vgpu_has_pending_workload(vgpu_data->vgpu))
continue;
vgpu = vgpu_data->vgpu;
break;
/* Return the vGPU only if it has time slice left */
if (vgpu_data->left_ts > 0) {
vgpu = vgpu_data->vgpu;
break;
}
}
return vgpu;
}
/* in nanosecond */
#define GVT_DEFAULT_TIME_SLICE 1000000
static void tbs_sched_func(struct gvt_sched_data *sched_data)
{
struct intel_gvt *gvt = sched_data->gvt;
struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
struct vgpu_sched_data *vgpu_data;
struct intel_vgpu *vgpu = NULL;
static uint64_t timer_check;
if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
gvt_balance_timeslice(sched_data);
/* no active vgpu or has already had a target */
if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
goto out;
vgpu = find_busy_vgpu(sched_data);
if (vgpu) {
scheduler->next_vgpu = vgpu;
/* Move the last used vGPU to the tail of lru_list */
vgpu_data = vgpu->sched_data;
list_del_init(&vgpu_data->lru_list);
list_add_tail(&vgpu_data->lru_list,
&sched_data->lru_runq_head);
gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
} else {
scheduler->next_vgpu = gvt->idle_vgpu;
}
out:
if (scheduler->next_vgpu) {
......@@ -151,34 +235,49 @@ static void tbs_sched_func(struct work_struct *work)
scheduler->next_vgpu->id);
try_to_schedule_next_vgpu(gvt);
}
}
/*
* still have vgpu on runq
* or last schedule haven't finished due to running workload
*/
if (!list_empty(&sched_data->runq_head) || scheduler->next_vgpu)
schedule_delayed_work(&sched_data->work, sched_data->period);
void intel_gvt_schedule(struct intel_gvt *gvt)
{
struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
mutex_lock(&gvt->lock);
tbs_sched_func(sched_data);
mutex_unlock(&gvt->lock);
}
static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data)
{
struct gvt_sched_data *data;
data = container_of(timer_data, struct gvt_sched_data, timer);
intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED);
hrtimer_add_expires_ns(&data->timer, data->period);
return HRTIMER_RESTART;
}
static int tbs_sched_init(struct intel_gvt *gvt)
{
struct intel_gvt_workload_scheduler *scheduler =
&gvt->scheduler;
struct tbs_sched_data *data;
struct gvt_sched_data *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
INIT_LIST_HEAD(&data->runq_head);
INIT_DELAYED_WORK(&data->work, tbs_sched_func);
INIT_LIST_HEAD(&data->lru_runq_head);
hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
data->timer.function = tbs_timer_fn;
data->period = GVT_DEFAULT_TIME_SLICE;
data->gvt = gvt;
scheduler->sched_data = data;
return 0;
}
......@@ -186,25 +285,28 @@ static void tbs_sched_clean(struct intel_gvt *gvt)
{
struct intel_gvt_workload_scheduler *scheduler =
&gvt->scheduler;
struct tbs_sched_data *data = scheduler->sched_data;
struct gvt_sched_data *data = scheduler->sched_data;
hrtimer_cancel(&data->timer);
cancel_delayed_work(&data->work);
kfree(data);
scheduler->sched_data = NULL;
}
static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
{
struct tbs_vgpu_data *data;
struct vgpu_sched_data *data;
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
data->sched_ctl.weight = vgpu->sched_ctl.weight;
data->vgpu = vgpu;
INIT_LIST_HEAD(&data->list);
INIT_LIST_HEAD(&data->lru_list);
vgpu->sched_data = data;
return 0;
}
......@@ -216,21 +318,24 @@ static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
{
struct tbs_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
struct tbs_vgpu_data *vgpu_data = vgpu->sched_data;
struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
if (!list_empty(&vgpu_data->list))
if (!list_empty(&vgpu_data->lru_list))
return;
list_add_tail(&vgpu_data->list, &sched_data->runq_head);
schedule_delayed_work(&sched_data->work, 0);
list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head);
if (!hrtimer_active(&sched_data->timer))
hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
sched_data->period), HRTIMER_MODE_ABS);
}
static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
{
struct tbs_vgpu_data *vgpu_data = vgpu->sched_data;
struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
list_del_init(&vgpu_data->list);
list_del_init(&vgpu_data->lru_list);
}
static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
......
......@@ -43,6 +43,8 @@ struct intel_gvt_sched_policy_ops {
void (*stop_schedule)(struct intel_vgpu *vgpu);
};
void intel_gvt_schedule(struct intel_gvt *gvt);
int intel_gvt_init_sched_policy(struct intel_gvt *gvt);
void intel_gvt_clean_sched_policy(struct intel_gvt *gvt);
......
......@@ -448,7 +448,8 @@ static int workload_thread(void *priv)
struct intel_vgpu_workload *workload = NULL;
struct intel_vgpu *vgpu = NULL;
int ret;
bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
bool need_force_wake = IS_SKYLAKE(gvt->dev_priv)
|| IS_KABYLAKE(gvt->dev_priv);
DEFINE_WAIT_FUNC(wait, woken_wake_function);
kfree(p);
......
......@@ -67,7 +67,6 @@ struct shadow_per_ctx {
};
struct intel_shadow_wa_ctx {
struct intel_vgpu_workload *workload;
struct shadow_indirect_ctx indirect_ctx;
struct shadow_per_ctx per_ctx;
......
......@@ -64,18 +64,28 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
}
#define VGPU_MAX_WEIGHT 16
#define VGPU_WEIGHT(vgpu_num) \
(VGPU_MAX_WEIGHT / (vgpu_num))
static struct {
unsigned int low_mm;
unsigned int high_mm;
unsigned int fence;
/* A vGPU with a weight of 8 will get twice as much GPU as a vGPU
* with a weight of 4 on a contended host, different vGPU type has
* different weight set. Legal weights range from 1 to 16.
*/
unsigned int weight;
enum intel_vgpu_edid edid;
char *name;
} vgpu_types[] = {
/* Fixed vGPU type table */
{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" },
{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" },
{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" },
{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" },
{ MB_TO_BYTES(64), MB_TO_BYTES(384), 4, VGPU_WEIGHT(8), GVT_EDID_1024_768, "8" },
{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, VGPU_WEIGHT(4), GVT_EDID_1920_1200, "4" },
{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, VGPU_WEIGHT(2), GVT_EDID_1920_1200, "2" },
{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, VGPU_WEIGHT(1), GVT_EDID_1920_1200, "1" },
};
/**
......@@ -120,6 +130,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
gvt->types[i].fence = vgpu_types[i].fence;
if (vgpu_types[i].weight < 1 ||
vgpu_types[i].weight > VGPU_MAX_WEIGHT)
return -EINVAL;
gvt->types[i].weight = vgpu_types[i].weight;
gvt->types[i].resolution = vgpu_types[i].edid;
gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
high_avail / vgpu_types[i].high_mm);
......@@ -131,11 +147,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
sprintf(gvt->types[i].name, "GVTg_V5_%s",
vgpu_types[i].name);
gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n",
gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u weight %u res %s\n",
i, gvt->types[i].name,
gvt->types[i].avail_instance,
gvt->types[i].low_gm_size,
gvt->types[i].high_gm_size, gvt->types[i].fence,
gvt->types[i].weight,
vgpu_edid_str(gvt->types[i].resolution));
}
......@@ -216,6 +233,59 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
mutex_unlock(&gvt->lock);
}
#define IDLE_VGPU_IDR 0
/**
* intel_gvt_create_idle_vgpu - create an idle virtual GPU
* @gvt: GVT device
*
* This function is called when user wants to create an idle virtual GPU.
*
* Returns:
* pointer to intel_vgpu, error pointer if failed.
*/
struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt)
{
struct intel_vgpu *vgpu;
enum intel_engine_id i;
int ret;
vgpu = vzalloc(sizeof(*vgpu));
if (!vgpu)
return ERR_PTR(-ENOMEM);
vgpu->id = IDLE_VGPU_IDR;
vgpu->gvt = gvt;
for (i = 0; i < I915_NUM_ENGINES; i++)
INIT_LIST_HEAD(&vgpu->workload_q_head[i]);
ret = intel_vgpu_init_sched_policy(vgpu);
if (ret)
goto out_free_vgpu;
vgpu->active = false;
return vgpu;
out_free_vgpu:
vfree(vgpu);
return ERR_PTR(ret);
}
/**
* intel_gvt_destroy_vgpu - destroy an idle virtual GPU
* @vgpu: virtual GPU
*
* This function is called when user wants to destroy an idle virtual GPU.
*
*/
void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu)
{
intel_vgpu_clean_sched_policy(vgpu);
vfree(vgpu);
}
static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
struct intel_vgpu_creation_params *param)
{
......@@ -232,13 +302,15 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
mutex_lock(&gvt->lock);
ret = idr_alloc(&gvt->vgpu_idr, vgpu, 1, GVT_MAX_VGPU, GFP_KERNEL);
ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU,
GFP_KERNEL);
if (ret < 0)
goto out_free_vgpu;
vgpu->id = ret;
vgpu->handle = param->handle;
vgpu->gvt = gvt;
vgpu->sched_ctl.weight = param->weight;
bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES);
intel_vgpu_init_cfg_space(vgpu, param->primary);
......@@ -325,6 +397,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
param.low_gm_sz = type->low_gm_size;
param.high_gm_sz = type->high_gm_size;
param.fence_sz = type->fence;
param.weight = type->weight;
param.resolution = type->resolution;
/* XXX current param based on MB */
......
......@@ -1012,9 +1012,12 @@ static int gpu_state_release(struct inode *inode, struct file *file)
static int i915_gpu_info_open(struct inode *inode, struct file *file)
{
struct drm_i915_private *i915 = inode->i_private;
struct i915_gpu_state *gpu;
gpu = i915_capture_gpu_state(inode->i_private);
intel_runtime_pm_get(i915);
gpu = i915_capture_gpu_state(i915);
intel_runtime_pm_put(i915);
if (!gpu)
return -ENOMEM;
......@@ -1459,16 +1462,14 @@ static int ironlake_drpc_info(struct seq_file *m)
static int i915_forcewake_domains(struct seq_file *m, void *data)
{
struct drm_i915_private *dev_priv = node_to_i915(m->private);
struct drm_i915_private *i915 = node_to_i915(m->private);
struct intel_uncore_forcewake_domain *fw_domain;
unsigned int tmp;
spin_lock_irq(&dev_priv->uncore.lock);
for_each_fw_domain(fw_domain, dev_priv) {
for_each_fw_domain(fw_domain, i915, tmp)
seq_printf(m, "%s.wake_count = %u\n",
intel_uncore_forcewake_domain_to_str(fw_domain->id),
fw_domain->wake_count);
}
spin_unlock_irq(&dev_priv->uncore.lock);
READ_ONCE(fw_domain->wake_count));
return 0;
}
......@@ -1938,9 +1939,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
{
seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
ring->space, ring->head, ring->tail,
ring->last_retired_head);
seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)",
ring->space, ring->head, ring->tail);
}
static int i915_context_status(struct seq_file *m, void *unused)
......@@ -2474,9 +2474,9 @@ static void i915_guc_client_info(struct seq_file *m,
enum intel_engine_id id;
uint64_t tot = 0;
seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
client->priority, client->ctx_index, client->proc_desc_offset);
seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n",
client->priority, client->stage_id, client->proc_desc_offset);
seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n",
client->doorbell_id, client->doorbell_offset, client->doorbell_cookie);
seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
client->wq_size, client->wq_offset, client->wq_tail);
......@@ -2511,7 +2511,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
}
seq_printf(m, "Doorbell map:\n");
seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap);
seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
......@@ -4129,7 +4129,9 @@ i915_wedged_get(void *data, u64 *val)
static int
i915_wedged_set(void *data, u64 val)
{
struct drm_i915_private *dev_priv = data;
struct drm_i915_private *i915 = data;
struct intel_engine_cs *engine;
unsigned int tmp;
/*
* There is no safeguard against this debugfs entry colliding
......@@ -4139,13 +4141,17 @@ i915_wedged_set(void *data, u64 val)
* while it is writing to 'i915_wedged'
*/
if (i915_reset_backoff(&dev_priv->gpu_error))
if (i915_reset_backoff(&i915->gpu_error))
return -EAGAIN;
i915_handle_error(dev_priv, val,
"Manually setting wedged to %llu", val);
for_each_engine_masked(engine, i915, val, tmp) {
engine->hangcheck.seqno = intel_engine_get_seqno(engine);
engine->hangcheck.stalled = true;
}
i915_handle_error(i915, val, "Manually setting wedged to %llu", val);
wait_on_bit(&dev_priv->gpu_error.flags,
wait_on_bit(&i915->gpu_error.flags,
I915_RESET_HANDOFF,
TASK_UNINTERRUPTIBLE);
......@@ -4173,10 +4179,6 @@ fault_irq_set(struct drm_i915_private *i915,
if (err)
goto err_unlock;
/* Retire to kick idle work */
i915_gem_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests);
*irq = val;
mutex_unlock(&i915->drm.struct_mutex);
......@@ -4280,7 +4282,7 @@ i915_drop_caches_set(void *data, u64 val)
goto unlock;
}
if (val & (DROP_RETIRE | DROP_ACTIVE))
if (val & DROP_RETIRE)
i915_gem_retire_requests(dev_priv);
lockdep_set_current_reclaim_state(GFP_KERNEL);
......
......@@ -549,6 +549,7 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
static void i915_gem_fini(struct drm_i915_private *dev_priv)
{
mutex_lock(&dev_priv->drm.struct_mutex);
intel_uc_fini_hw(dev_priv);
i915_gem_cleanup_engines(dev_priv);
i915_gem_context_fini(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
......@@ -609,7 +610,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
ret = i915_gem_init(dev_priv);
if (ret)
goto cleanup_irq;
goto cleanup_uc;
intel_modeset_gem_init(dev);
......@@ -631,9 +632,9 @@ static int i915_load_modeset_init(struct drm_device *dev)
if (i915_gem_suspend(dev_priv))
DRM_ERROR("failed to idle hardware; continuing to unload!\n");
i915_gem_fini(dev_priv);
cleanup_uc:
intel_uc_fini_fw(dev_priv);
cleanup_irq:
intel_guc_fini(dev_priv);
intel_huc_fini(dev_priv);
drm_irq_uninstall(dev);
intel_teardown_gmbus(dev_priv);
cleanup_csr:
......@@ -1351,9 +1352,8 @@ void i915_driver_unload(struct drm_device *dev)
/* Flush any outstanding unpin_work. */
drain_workqueue(dev_priv->wq);
intel_guc_fini(dev_priv);
intel_huc_fini(dev_priv);
i915_gem_fini(dev_priv);
intel_uc_fini_fw(dev_priv);
intel_fbc_cleanup_cfb(dev_priv);
intel_power_domains_fini(dev_priv);
......
......@@ -79,26 +79,8 @@
#define DRIVER_NAME "i915"
#define DRIVER_DESC "Intel Graphics"
#define DRIVER_DATE "20170320"
#define DRIVER_TIMESTAMP 1489994464
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
#if 0
#define WARN_ON(x) ({ \
bool __i915_warn_cond = (x); \
if (__builtin_constant_p(__i915_warn_cond)) \
BUILD_BUG_ON(__i915_warn_cond); \
WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
#else
#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
#endif
#undef WARN_ON_ONCE
#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \
(long) (x), __func__);
#define DRIVER_DATE "20170403"
#define DRIVER_TIMESTAMP 1491198738
/* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
* WARN_ON()) for hw state sanity checks to check for unexpected conditions
......@@ -703,9 +685,9 @@ enum forcewake_domain_id {
};
enum forcewake_domains {
FORCEWAKE_RENDER = (1 << FW_DOMAIN_ID_RENDER),
FORCEWAKE_BLITTER = (1 << FW_DOMAIN_ID_BLITTER),
FORCEWAKE_MEDIA = (1 << FW_DOMAIN_ID_MEDIA),
FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
FORCEWAKE_MEDIA = BIT(FW_DOMAIN_ID_MEDIA),
FORCEWAKE_ALL = (FORCEWAKE_RENDER |
FORCEWAKE_BLITTER |
FORCEWAKE_MEDIA)
......@@ -732,21 +714,25 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
struct intel_uncore_funcs {
void (*force_wake_get)(struct drm_i915_private *dev_priv,
enum forcewake_domains domains);
enum forcewake_domains domains);
void (*force_wake_put)(struct drm_i915_private *dev_priv,
enum forcewake_domains domains);
uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
void (*mmio_writeb)(struct drm_i915_private *dev_priv, i915_reg_t r,
uint8_t val, bool trace);
void (*mmio_writew)(struct drm_i915_private *dev_priv, i915_reg_t r,
uint16_t val, bool trace);
void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r,
uint32_t val, bool trace);
enum forcewake_domains domains);
uint8_t (*mmio_readb)(struct drm_i915_private *dev_priv,
i915_reg_t r, bool trace);
uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
i915_reg_t r, bool trace);
uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
i915_reg_t r, bool trace);
uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
i915_reg_t r, bool trace);
void (*mmio_writeb)(struct drm_i915_private *dev_priv,
i915_reg_t r, uint8_t val, bool trace);
void (*mmio_writew)(struct drm_i915_private *dev_priv,
i915_reg_t r, uint16_t val, bool trace);
void (*mmio_writel)(struct drm_i915_private *dev_priv,
i915_reg_t r, uint32_t val, bool trace);
};
struct intel_forcewake_range {
......@@ -770,32 +756,35 @@ struct intel_uncore {
enum forcewake_domains fw_domains;
enum forcewake_domains fw_domains_active;
u32 fw_set;
u32 fw_clear;
u32 fw_reset;
struct intel_uncore_forcewake_domain {
struct drm_i915_private *i915;
enum forcewake_domain_id id;
enum forcewake_domains mask;
unsigned wake_count;
struct hrtimer timer;
i915_reg_t reg_set;
u32 val_set;
u32 val_clear;
i915_reg_t reg_ack;
i915_reg_t reg_post;
u32 val_reset;
} fw_domain[FW_DOMAIN_ID_COUNT];
int unclaimed_mmio_check;
};
#define __mask_next_bit(mask) ({ \
int __idx = ffs(mask) - 1; \
mask &= ~BIT(__idx); \
__idx; \
})
/* Iterate over initialised fw domains */
#define for_each_fw_domain_masked(domain__, mask__, dev_priv__) \
for ((domain__) = &(dev_priv__)->uncore.fw_domain[0]; \
(domain__) < &(dev_priv__)->uncore.fw_domain[FW_DOMAIN_ID_COUNT]; \
(domain__)++) \
for_each_if ((mask__) & (domain__)->mask)
#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
for (tmp__ = (mask__); \
tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
#define for_each_fw_domain(domain__, dev_priv__) \
for_each_fw_domain_masked(domain__, FORCEWAKE_ALL, dev_priv__)
#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
#define CSR_VERSION(major, minor) ((major) << 16 | (minor))
#define CSR_VERSION_MAJOR(version) ((version) >> 16)
......@@ -846,6 +835,7 @@ struct intel_csr {
func(has_resource_streamer); \
func(has_runtime_pm); \
func(has_snoop); \
func(unfenced_needs_alignment); \
func(cursor_needs_physical); \
func(hws_needs_physical); \
func(overlay_needs_physical); \
......@@ -2578,12 +2568,6 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc)
(id__)++) \
for_each_if ((engine__) = (dev_priv__)->engine[(id__)])
#define __mask_next_bit(mask) ({ \
int __idx = ffs(mask) - 1; \
mask &= ~BIT(__idx); \
__idx; \
})
/* Iterator over subset of engines selected by mask */
#define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \
......@@ -3956,14 +3940,14 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
#define POSTING_READ16(reg) (void)I915_READ16_NOTRACE(reg)
#define __raw_read(x, s) \
static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \
static inline uint##x##_t __raw_i915_read##x(const struct drm_i915_private *dev_priv, \
i915_reg_t reg) \
{ \
return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \
}
#define __raw_write(x, s) \
static inline void __raw_i915_write##x(struct drm_i915_private *dev_priv, \
static inline void __raw_i915_write##x(const struct drm_i915_private *dev_priv, \
i915_reg_t reg, uint##x##_t val) \
{ \
write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \
......
......@@ -2321,7 +2321,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
st->nents = 0;
for (i = 0; i < page_count; i++) {
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
if (IS_ERR(page)) {
if (unlikely(IS_ERR(page))) {
i915_gem_shrink(dev_priv,
page_count,
I915_SHRINK_BOUND |
......@@ -2329,12 +2329,21 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
I915_SHRINK_PURGEABLE);
page = shmem_read_mapping_page_gfp(mapping, i, gfp);
}
if (IS_ERR(page)) {
if (unlikely(IS_ERR(page))) {
gfp_t reclaim;
/* We've tried hard to allocate the memory by reaping
* our own buffer, now let the real VM do its job and
* go down in flames if truly OOM.
*
* However, since graphics tend to be disposable,
* defer the oom here by reporting the ENOMEM back
* to userspace.
*/
page = shmem_read_mapping_page(mapping, i);
reclaim = mapping_gfp_constraint(mapping, 0);
reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto err_sg;
......@@ -2989,10 +2998,15 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
lockdep_assert_held(&dev_priv->drm.struct_mutex);
set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
/* Retire completed requests first so the list of inflight/incomplete
* requests is accurate and we don't try and mark successful requests
* as in error during __i915_gem_set_wedged_BKL().
*/
i915_gem_retire_requests(dev_priv);
stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
i915_gem_context_lost(dev_priv);
i915_gem_retire_requests(dev_priv);
mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
}
......@@ -3098,9 +3112,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
* Wait for last execlists context complete, but bail out in case a
* new request is submitted.
*/
wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
intel_engines_are_idle(dev_priv),
10);
wait_for(intel_engines_are_idle(dev_priv), 10);
if (READ_ONCE(dev_priv->gt.active_requests))
return;
......@@ -3259,6 +3271,29 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
return 0;
}
static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms)
{
return wait_for(intel_engine_is_idle(engine), timeout_ms);
}
static int wait_for_engines(struct drm_i915_private *i915)
{
struct intel_engine_cs *engine;
enum intel_engine_id id;
for_each_engine(engine, i915, id) {
if (GEM_WARN_ON(wait_for_engine(engine, 50))) {
i915_gem_set_wedged(i915);
return -EIO;
}
GEM_BUG_ON(intel_engine_get_seqno(engine) !=
intel_engine_last_submit(engine));
}
return 0;
}
int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
{
int ret;
......@@ -3273,13 +3308,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
if (ret)
return ret;
}
i915_gem_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests);
ret = wait_for_engines(i915);
} else {
ret = wait_for_timeline(&i915->gt.global_timeline, flags);
if (ret)
return ret;
}
return 0;
return ret;
}
/** Flushes the GTT write domain for the object if it's dirty. */
......@@ -3307,8 +3345,14 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
* system agents we cannot reproduce this behaviour).
*/
wmb();
if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
if (intel_runtime_pm_get_if_in_use(dev_priv)) {
spin_lock_irq(&dev_priv->uncore.lock);
POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
spin_unlock_irq(&dev_priv->uncore.lock);
intel_runtime_pm_put(dev_priv);
}
}
intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
......@@ -4408,9 +4452,6 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
if (ret)
goto err_unlock;
i915_gem_retire_requests(dev_priv);
GEM_BUG_ON(dev_priv->gt.active_requests);
assert_kernel_context_is_current(dev_priv);
i915_gem_context_lost(dev_priv);
mutex_unlock(&dev->struct_mutex);
......
......@@ -168,7 +168,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
i915_sw_fence_await_reservation(&clflush->wait,
obj->resv, NULL,
false, I915_FENCE_TIMEOUT,
true, I915_FENCE_TIMEOUT,
GFP_KERNEL);
reservation_object_lock(obj->resv, NULL);
......
......@@ -576,25 +576,25 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
}
static inline int
mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
mi_set_context(struct drm_i915_gem_request *req, u32 flags)
{
struct drm_i915_private *dev_priv = req->i915;
struct intel_engine_cs *engine = req->engine;
enum intel_engine_id id;
u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT;
const int num_rings =
/* Use an extended w/a on ivb+ if signalling from other rings */
i915.semaphores ?
/* Use an extended w/a on gen7 if signalling from other rings */
(i915.semaphores && INTEL_GEN(dev_priv) == 7) ?
INTEL_INFO(dev_priv)->num_rings - 1 :
0;
int len;
u32 *cs;
/* These flags are for resource streamer on HSW+ */
flags |= MI_MM_SPACE_GTT;
if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
else if (INTEL_GEN(dev_priv) < 8)
flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
/* These flags are for resource streamer on HSW+ */
flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
else
flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
len = 4;
if (INTEL_GEN(dev_priv) >= 7)
......
......@@ -196,7 +196,6 @@ i915_gem_evict_something(struct i915_address_space *vm,
if (ret)
return ret;
i915_gem_retire_requests(dev_priv);
goto search_again;
found:
......@@ -383,7 +382,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
if (ret)
return ret;
i915_gem_retire_requests(dev_priv);
WARN_ON(!list_empty(&vm->active_list));
}
......
......@@ -890,6 +890,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
struct list_head ordered_vmas;
struct list_head pinned_vmas;
bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment;
int retry;
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
......@@ -910,7 +911,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
if (!has_fenced_gpu_access)
entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
need_fence =
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
(entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
needs_unfenced_map) &&
i915_gem_object_is_tiled(obj);
need_mappable = need_fence || need_reloc_mappable(vma);
......
......@@ -2364,7 +2364,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
struct i915_ggtt *ggtt = &dev_priv->ggtt;
if (unlikely(ggtt->do_idle_maps)) {
if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
if (i915_gem_wait_for_idle(dev_priv, 0)) {
DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
/* Wait a bit, in hopes it avoids the hang */
udelay(10);
......
......@@ -37,6 +37,17 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence)
static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
{
/* The timeline struct (as part of the ppgtt underneath a context)
* may be freed when the request is no longer in use by the GPU.
* We could extend the life of a context to beyond that of all
* fences, possibly keeping the hw resource around indefinitely,
* or we just give them a false name. Since
* dma_fence_ops.get_timeline_name is a debug feature, the occasional
* lie seems justifiable.
*/
if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
return "signaled";
return to_request(fence)->timeline->common->name;
}
......@@ -180,7 +191,6 @@ i915_priotree_init(struct i915_priotree *pt)
static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
{
struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
struct intel_engine_cs *engine;
enum intel_engine_id id;
int ret;
......@@ -192,15 +202,10 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
if (ret)
return ret;
i915_gem_retire_requests(i915);
GEM_BUG_ON(i915->gt.active_requests > 1);
/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
for_each_engine(engine, i915, id) {
struct intel_timeline *tl = &timeline->engine[id];
if (wait_for(intel_engine_is_idle(engine), 50))
return -EBUSY;
struct i915_gem_timeline *timeline;
struct intel_timeline *tl = engine->timeline;
if (!i915_seqno_passed(seqno, tl->seqno)) {
/* spin until threads are complete */
......@@ -211,14 +216,10 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
/* Finally reset hw state */
tl->seqno = seqno;
intel_engine_init_global_seqno(engine, seqno);
}
list_for_each_entry(timeline, &i915->gt.timelines, link) {
for_each_engine(engine, i915, id) {
struct intel_timeline *tl = &timeline->engine[id];
memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno));
}
list_for_each_entry(timeline, &i915->gt.timelines, link)
memset(timeline->engine[id].sync_seqno, 0,
sizeof(timeline->engine[id].sync_seqno));
}
return 0;
......@@ -295,7 +296,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
* completion order.
*/
list_del(&request->ring_link);
request->ring->last_retired_head = request->postfix;
request->ring->head = request->postfix;
if (!--request->i915->gt.active_requests) {
GEM_BUG_ON(!request->i915->gt.awake);
mod_delayed_work(request->i915->wq,
......
......@@ -1742,8 +1742,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
I915_WRITE(SOFT_SCRATCH(15), msg & ~flush);
/* Handle flush interrupt in bottom half */
queue_work(dev_priv->guc.log.flush_wq,
&dev_priv->guc.log.flush_work);
queue_work(dev_priv->guc.log.runtime.flush_wq,
&dev_priv->guc.log.runtime.flush_work);
dev_priv->guc.log.flush_interrupt_count++;
} else {
......
......@@ -61,6 +61,7 @@
.has_overlay = 1, .overlay_needs_physical = 1, \
.has_gmch_display = 1, \
.hws_needs_physical = 1, \
.unfenced_needs_alignment = 1, \
.ring_mask = RENDER_RING, \
GEN_DEFAULT_PIPEOFFSETS, \
CURSOR_OFFSETS
......@@ -102,6 +103,7 @@ static const struct intel_device_info intel_i915g_info = {
.platform = INTEL_I915G, .cursor_needs_physical = 1,
.has_overlay = 1, .overlay_needs_physical = 1,
.hws_needs_physical = 1,
.unfenced_needs_alignment = 1,
};
static const struct intel_device_info intel_i915gm_info = {
......@@ -113,6 +115,7 @@ static const struct intel_device_info intel_i915gm_info = {
.supports_tv = 1,
.has_fbc = 1,
.hws_needs_physical = 1,
.unfenced_needs_alignment = 1,
};
static const struct intel_device_info intel_i945g_info = {
......@@ -121,6 +124,7 @@ static const struct intel_device_info intel_i945g_info = {
.has_hotplug = 1, .cursor_needs_physical = 1,
.has_overlay = 1, .overlay_needs_physical = 1,
.hws_needs_physical = 1,
.unfenced_needs_alignment = 1,
};
static const struct intel_device_info intel_i945gm_info = {
......@@ -131,6 +135,7 @@ static const struct intel_device_info intel_i945gm_info = {
.supports_tv = 1,
.has_fbc = 1,
.hws_needs_physical = 1,
.unfenced_needs_alignment = 1,
};
static const struct intel_device_info intel_g33_info = {
......
......@@ -1705,7 +1705,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
*/
if (WARN_ON(stream->sample_flags != props->sample_flags)) {
ret = -ENODEV;
goto err_alloc;
goto err_flags;
}
list_add(&stream->link, &dev_priv->perf.streams);
......@@ -1728,6 +1728,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
err_open:
list_del(&stream->link);
err_flags:
if (stream->ops->destroy)
stream->ops->destroy(stream);
err_alloc:
......@@ -1793,6 +1794,11 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
if (ret)
return ret;
if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
DRM_DEBUG("Unknown i915 perf property ID\n");
return -EINVAL;
}
switch ((enum drm_i915_perf_property_id)id) {
case DRM_I915_PERF_PROP_CTX_HANDLE:
props->single_context = 1;
......@@ -1862,9 +1868,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
props->oa_periodic = true;
props->oa_period_exponent = value;
break;
default:
case DRM_I915_PERF_PROP_MAX:
MISSING_CASE(id);
DRM_DEBUG("Unknown i915 perf property ID\n");
return -EINVAL;
}
......
......@@ -7829,7 +7829,14 @@ enum {
#define TRANS_DDI_EDP_INPUT_B_ONOFF (5<<12)
#define TRANS_DDI_EDP_INPUT_C_ONOFF (6<<12)
#define TRANS_DDI_DP_VC_PAYLOAD_ALLOC (1<<8)
#define TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1<<7)
#define TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ (1<<6)
#define TRANS_DDI_BFI_ENABLE (1<<4)
#define TRANS_DDI_HIGH_TMDS_CHAR_RATE (1<<4)
#define TRANS_DDI_HDMI_SCRAMBLING (1<<0)
#define TRANS_DDI_HDMI_SCRAMBLING_MASK (TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE \
| TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ \
| TRANS_DDI_HDMI_SCRAMBLING)
/* DisplayPort Transport Control */
#define _DP_TP_CTL_A 0x64040
......
......@@ -25,6 +25,24 @@
#ifndef __I915_UTILS_H
#define __I915_UTILS_H
#undef WARN_ON
/* Many gcc seem to no see through this and fall over :( */
#if 0
#define WARN_ON(x) ({ \
bool __i915_warn_cond = (x); \
if (__builtin_constant_p(__i915_warn_cond)) \
BUILD_BUG_ON(__i915_warn_cond); \
WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
#else
#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
#endif
#undef WARN_ON_ONCE
#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \
(long)(x), __func__)
#if GCC_VERSION >= 70000
#define add_overflows(A, B) \
__builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0)
......
......@@ -47,11 +47,12 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
{
struct intel_breadcrumbs *b = &engine->breadcrumbs;
unsigned long flags;
unsigned int result;
spin_lock_irq(&b->irq_lock);
spin_lock_irqsave(&b->irq_lock, flags);
result = __intel_breadcrumbs_wakeup(b);
spin_unlock_irq(&b->irq_lock);
spin_unlock_irqrestore(&b->irq_lock, flags);
return result;
}
......
......@@ -1442,16 +1442,33 @@ static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state,
if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95);
/* BSpec says "Do not use DisplayPort with CDCLK less than
* 432 MHz, audio enabled, port width x4, and link rate
* HBR2 (5.4 GHz), or else there may be audio corruption or
* screen corruption."
/* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz,
* audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else
* there may be audio corruption or screen corruption." This cdclk
* restriction for GLK is 316.8 MHz and since GLK can output two
* pixels per clock, the pixel rate becomes 2 * 316.8 MHz.
*/
if (intel_crtc_has_dp_encoder(crtc_state) &&
crtc_state->has_audio &&
crtc_state->port_clock >= 540000 &&
crtc_state->lane_count == 4)
pixel_rate = max(432000, pixel_rate);
crtc_state->lane_count == 4) {
if (IS_GEMINILAKE(dev_priv))
pixel_rate = max(2 * 316800, pixel_rate);
else
pixel_rate = max(432000, pixel_rate);
}
/* According to BSpec, "The CD clock frequency must be at least twice
* the frequency of the Azalia BCLK." and BCLK is 96 MHz by default.
* The check for GLK has to be adjusted as the platform can output
* two pixels per clock.
*/
if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) {
if (IS_GEMINILAKE(dev_priv))
pixel_rate = max(2 * 2 * 96000, pixel_rate);
else
pixel_rate = max(2 * 96000, pixel_rate);
}
return pixel_rate;
}
......
......@@ -49,7 +49,7 @@ MODULE_FIRMWARE(I915_CSR_SKL);
MODULE_FIRMWARE(I915_CSR_BXT);
#define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7)
#define FIRMWARE_URL "https://01.org/linuxgraphics/intel-linux-graphics-firmwares"
#define FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware"
......
......@@ -539,7 +539,7 @@ intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv,
* values in advance. This function programs the correct values for
* DP/eDP/FDI use cases.
*/
void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder)
static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder)
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
u32 iboost_bit = 0;
......@@ -806,7 +806,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc,
DP_TP_CTL_ENABLE);
}
void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
{
struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
struct intel_digital_port *intel_dig_port =
......@@ -837,7 +837,8 @@ intel_ddi_get_crtc_encoder(struct intel_crtc *crtc)
return ret;
}
static struct intel_encoder *
/* Finds the only possible encoder associated with the given CRTC. */
struct intel_encoder *
intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state)
{
struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
......@@ -1127,72 +1128,6 @@ void intel_ddi_clock_get(struct intel_encoder *encoder,
bxt_ddi_clock_get(encoder, pipe_config);
}
static bool
hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
struct intel_crtc_state *crtc_state,
struct intel_encoder *encoder)
{
struct intel_shared_dpll *pll;
pll = intel_get_shared_dpll(intel_crtc, crtc_state,
encoder);
if (!pll)
DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
pipe_name(intel_crtc->pipe));
return pll;
}
static bool
skl_ddi_pll_select(struct intel_crtc *intel_crtc,
struct intel_crtc_state *crtc_state,
struct intel_encoder *encoder)
{
struct intel_shared_dpll *pll;
pll = intel_get_shared_dpll(intel_crtc, crtc_state, encoder);
if (pll == NULL) {
DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
pipe_name(intel_crtc->pipe));
return false;
}
return true;
}
static bool
bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
struct intel_crtc_state *crtc_state,
struct intel_encoder *encoder)
{
return !!intel_get_shared_dpll(intel_crtc, crtc_state, encoder);
}
/*
* Tries to find a *shared* PLL for the CRTC and store it in
* intel_crtc->ddi_pll_sel.
*
* For private DPLLs, compute_config() should do the selection for us. This
* function should be folded into compute_config() eventually.
*/
bool intel_ddi_pll_select(struct intel_crtc *intel_crtc,
struct intel_crtc_state *crtc_state)
{
struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
struct intel_encoder *encoder =
intel_ddi_get_crtc_new_encoder(crtc_state);
if (IS_GEN9_BC(dev_priv))
return skl_ddi_pll_select(intel_crtc, crtc_state,
encoder);
else if (IS_GEN9_LP(dev_priv))
return bxt_ddi_pll_select(intel_crtc, crtc_state,
encoder);
else
return hsw_ddi_pll_select(intel_crtc, crtc_state,
encoder);
}
void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
{
struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
......@@ -1309,6 +1244,11 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
temp |= TRANS_DDI_MODE_SELECT_HDMI;
else
temp |= TRANS_DDI_MODE_SELECT_DVI;
if (crtc_state->hdmi_scrambling)
temp |= TRANS_DDI_HDMI_SCRAMBLING_MASK;
if (crtc_state->hdmi_high_tmds_clock_ratio)
temp |= TRANS_DDI_HIGH_TMDS_CHAR_RATE;
} else if (type == INTEL_OUTPUT_ANALOG) {
temp |= TRANS_DDI_MODE_SELECT_FDI;
temp |= (crtc_state->fdi_lanes - 1) << 1;
......@@ -1676,8 +1616,8 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
return DDI_BUF_TRANS_SELECT(level);
}
void intel_ddi_clk_select(struct intel_encoder *encoder,
struct intel_shared_dpll *pll)
static void intel_ddi_clk_select(struct intel_encoder *encoder,
struct intel_shared_dpll *pll)
{
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
enum port port = intel_ddi_get_encoder_port(encoder);
......@@ -1881,6 +1821,12 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder,
if (type == INTEL_OUTPUT_HDMI) {
struct intel_digital_port *intel_dig_port =
enc_to_dig_port(encoder);
bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio;
bool scrambling = pipe_config->hdmi_scrambling;
intel_hdmi_handle_sink_scrambling(intel_encoder,
conn_state->connector,
clock_ratio, scrambling);
/* In HDMI/DVI mode, the port width, and swing/emphasis values
* are ignored so nothing special needs to be done besides
......@@ -1914,6 +1860,12 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder,
if (old_crtc_state->has_audio)
intel_audio_codec_disable(intel_encoder);
if (type == INTEL_OUTPUT_HDMI) {
intel_hdmi_handle_sink_scrambling(intel_encoder,
old_conn_state->connector,
false, false);
}
if (type == INTEL_OUTPUT_EDP) {
struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
......@@ -2040,6 +1992,12 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config))
pipe_config->has_infoframe = true;
if ((temp & TRANS_DDI_HDMI_SCRAMBLING_MASK) ==
TRANS_DDI_HDMI_SCRAMBLING_MASK)
pipe_config->hdmi_scrambling = true;
if (temp & TRANS_DDI_HIGH_TMDS_CHAR_RATE)
pipe_config->hdmi_high_tmds_clock_ratio = true;
/* fall through */
case TRANS_DDI_MODE_SELECT_DVI:
pipe_config->lane_count = 4;
......
......@@ -398,6 +398,9 @@ struct intel_plane_state {
int x, y;
} aux;
/* plane control register */
u32 ctl;
/*
* scaler_id
* = -1 : not using a scaler
......@@ -729,6 +732,12 @@ struct intel_crtc_state {
/* bitmask of visible planes (enum plane_id) */
u8 active_planes;
/* HDMI scrambling status */
bool hdmi_scrambling;
/* HDMI High TMDS char rate ratio */
bool hdmi_high_tmds_clock_ratio;
};
struct intel_crtc {
......@@ -1220,12 +1229,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv);
void intel_crt_reset(struct drm_encoder *encoder);
/* intel_ddi.c */
void intel_ddi_clk_select(struct intel_encoder *encoder,
struct intel_shared_dpll *pll);
void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder,
struct intel_crtc_state *old_crtc_state,
struct drm_connector_state *old_conn_state);
void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder);
void hsw_fdi_link_train(struct intel_crtc *crtc,
const struct intel_crtc_state *crtc_state);
void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port);
......@@ -1236,8 +1242,8 @@ void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
enum transcoder cpu_transcoder);
void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state);
void intel_ddi_disable_pipe_clock(const struct intel_crtc_state *crtc_state);
bool intel_ddi_pll_select(struct intel_crtc *crtc,
struct intel_crtc_state *crtc_state);
struct intel_encoder *
intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state);
void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state);
void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp);
bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
......@@ -1246,7 +1252,6 @@ bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
void intel_ddi_get_config(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config);
void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder);
void intel_ddi_clock_get(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config);
void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state,
......@@ -1445,12 +1450,12 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state)
return i915_ggtt_offset(state->vma);
}
u32 skl_plane_ctl_format(uint32_t pixel_format);
u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
u32 skl_plane_ctl_rotation(unsigned int rotation);
u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
const struct intel_plane_state *plane_state);
u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
unsigned int rotation);
int skl_check_plane_surface(struct intel_plane_state *plane_state);
int i9xx_check_plane_surface(struct intel_plane_state *plane_state);
/* intel_csr.c */
void intel_csr_ucode_init(struct drm_i915_private *);
......@@ -1620,6 +1625,10 @@ struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
bool intel_hdmi_compute_config(struct intel_encoder *encoder,
struct intel_crtc_state *pipe_config,
struct drm_connector_state *conn_state);
void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder,
struct drm_connector *connector,
bool high_tmds_clock_ratio,
bool scrambling);
void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable);
......
......@@ -36,45 +36,45 @@ static const struct engine_info {
int (*init_execlists)(struct intel_engine_cs *engine);
} intel_engines[] = {
[RCS] = {
.name = "render ring",
.exec_id = I915_EXEC_RENDER,
.name = "rcs",
.hw_id = RCS_HW,
.exec_id = I915_EXEC_RENDER,
.mmio_base = RENDER_RING_BASE,
.irq_shift = GEN8_RCS_IRQ_SHIFT,
.init_execlists = logical_render_ring_init,
.init_legacy = intel_init_render_ring_buffer,
},
[BCS] = {
.name = "blitter ring",
.exec_id = I915_EXEC_BLT,
.name = "bcs",
.hw_id = BCS_HW,
.exec_id = I915_EXEC_BLT,
.mmio_base = BLT_RING_BASE,
.irq_shift = GEN8_BCS_IRQ_SHIFT,
.init_execlists = logical_xcs_ring_init,
.init_legacy = intel_init_blt_ring_buffer,
},
[VCS] = {
.name = "bsd ring",
.exec_id = I915_EXEC_BSD,
.name = "vcs",
.hw_id = VCS_HW,
.exec_id = I915_EXEC_BSD,
.mmio_base = GEN6_BSD_RING_BASE,
.irq_shift = GEN8_VCS1_IRQ_SHIFT,
.init_execlists = logical_xcs_ring_init,
.init_legacy = intel_init_bsd_ring_buffer,
},
[VCS2] = {
.name = "bsd2 ring",
.exec_id = I915_EXEC_BSD,
.name = "vcs2",
.hw_id = VCS2_HW,
.exec_id = I915_EXEC_BSD,
.mmio_base = GEN8_BSD2_RING_BASE,
.irq_shift = GEN8_VCS2_IRQ_SHIFT,
.init_execlists = logical_xcs_ring_init,
.init_legacy = intel_init_bsd2_ring_buffer,
},
[VECS] = {
.name = "video enhancement ring",
.exec_id = I915_EXEC_VEBOX,
.name = "vecs",
.hw_id = VECS_HW,
.exec_id = I915_EXEC_VEBOX,
.mmio_base = VEBOX_RING_BASE,
.irq_shift = GEN8_VECS_IRQ_SHIFT,
.init_execlists = logical_xcs_ring_init,
......@@ -242,12 +242,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
void *semaphores;
/* Semaphores are in noncoherent memory, flush to be safe */
semaphores = kmap(page);
semaphores = kmap_atomic(page);
memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
I915_NUM_ENGINES * gen8_semaphore_seqno_size);
kunmap(page);
kunmap_atomic(semaphores);
}
intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
......@@ -1111,6 +1111,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
struct intel_engine_cs *engine;
enum intel_engine_id id;
if (READ_ONCE(dev_priv->gt.active_requests))
return false;
/* If the driver is wedged, HW state may be very inconsistent and
* report that it is still busy, even though we have stopped using it.
*/
if (i915_terminally_wedged(&dev_priv->gpu_error))
return true;
for_each_engine(engine, dev_priv, id) {
if (!intel_engine_is_idle(engine))
return false;
......
......@@ -26,14 +26,14 @@
#define GFXCORE_FAMILY_GEN9 12
#define GFXCORE_FAMILY_UNKNOWN 0x7fffffff
#define GUC_CTX_PRIORITY_KMD_HIGH 0
#define GUC_CTX_PRIORITY_HIGH 1
#define GUC_CTX_PRIORITY_KMD_NORMAL 2
#define GUC_CTX_PRIORITY_NORMAL 3
#define GUC_CTX_PRIORITY_NUM 4
#define GUC_CLIENT_PRIORITY_KMD_HIGH 0
#define GUC_CLIENT_PRIORITY_HIGH 1
#define GUC_CLIENT_PRIORITY_KMD_NORMAL 2
#define GUC_CLIENT_PRIORITY_NORMAL 3
#define GUC_CLIENT_PRIORITY_NUM 4
#define GUC_MAX_GPU_CONTEXTS 1024
#define GUC_INVALID_CTX_ID GUC_MAX_GPU_CONTEXTS
#define GUC_MAX_STAGE_DESCRIPTORS 1024
#define GUC_INVALID_STAGE_ID GUC_MAX_STAGE_DESCRIPTORS
#define GUC_RENDER_ENGINE 0
#define GUC_VIDEO_ENGINE 1
......@@ -68,14 +68,14 @@
#define GUC_DOORBELL_ENABLED 1
#define GUC_DOORBELL_DISABLED 0
#define GUC_CTX_DESC_ATTR_ACTIVE (1 << 0)
#define GUC_CTX_DESC_ATTR_PENDING_DB (1 << 1)
#define GUC_CTX_DESC_ATTR_KERNEL (1 << 2)
#define GUC_CTX_DESC_ATTR_PREEMPT (1 << 3)
#define GUC_CTX_DESC_ATTR_RESET (1 << 4)
#define GUC_CTX_DESC_ATTR_WQLOCKED (1 << 5)
#define GUC_CTX_DESC_ATTR_PCH (1 << 6)
#define GUC_CTX_DESC_ATTR_TERMINATED (1 << 7)
#define GUC_STAGE_DESC_ATTR_ACTIVE BIT(0)
#define GUC_STAGE_DESC_ATTR_PENDING_DB BIT(1)
#define GUC_STAGE_DESC_ATTR_KERNEL BIT(2)
#define GUC_STAGE_DESC_ATTR_PREEMPT BIT(3)
#define GUC_STAGE_DESC_ATTR_RESET BIT(4)
#define GUC_STAGE_DESC_ATTR_WQLOCKED BIT(5)
#define GUC_STAGE_DESC_ATTR_PCH BIT(6)
#define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7)
/* The guc control data is 10 DWORDs */
#define GUC_CTL_CTXINFO 0
......@@ -241,8 +241,8 @@ union guc_doorbell_qw {
u64 value_qw;
} __packed;
#define GUC_MAX_DOORBELLS 256
#define GUC_INVALID_DOORBELL_ID (GUC_MAX_DOORBELLS)
#define GUC_NUM_DOORBELLS 256
#define GUC_DOORBELL_INVALID (GUC_NUM_DOORBELLS)
#define GUC_DB_SIZE (PAGE_SIZE)
#define GUC_WQ_SIZE (PAGE_SIZE * 2)
......@@ -251,12 +251,12 @@ union guc_doorbell_qw {
struct guc_wq_item {
u32 header;
u32 context_desc;
u32 ring_tail;
u32 submit_element_info;
u32 fence_id;
} __packed;
struct guc_process_desc {
u32 context_id;
u32 stage_id;
u64 db_base_addr;
u32 head;
u32 tail;
......@@ -278,7 +278,7 @@ struct guc_execlist_context {
u32 context_desc;
u32 context_id;
u32 ring_status;
u32 ring_lcra;
u32 ring_lrca;
u32 ring_begin;
u32 ring_end;
u32 ring_next_free_location;
......@@ -289,10 +289,18 @@ struct guc_execlist_context {
u16 engine_submit_queue_count;
} __packed;
/*Context descriptor for communicating between uKernel and Driver*/
struct guc_context_desc {
/*
* This structure describes a stage set arranged for a particular communication
* between uKernel (GuC) and Driver (KMD). Technically, this is known as a
* "GuC Context descriptor" in the specs, but we use the term "stage descriptor"
* to avoid confusion with all the other things already named "context" in the
* driver. A static pool of these descriptors are stored inside a GEM object
* (stage_desc_pool) which is held for the entire lifetime of our interaction
* with the GuC, being allocated before the GuC is loaded with its firmware.
*/
struct guc_stage_desc {
u32 sched_common_area;
u32 context_id;
u32 stage_id;
u32 pas_id;
u8 engines_used;
u64 db_trigger_cpu;
......@@ -359,7 +367,7 @@ struct guc_policy {
} __packed;
struct guc_policies {
struct guc_policy policy[GUC_CTX_PRIORITY_NUM][GUC_MAX_ENGINES_NUM];
struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINES_NUM];
/* In micro seconds. How much time to allow before DPC processing is
* called back via interrupt (to prevent DPC queue drain starving).
......@@ -401,16 +409,17 @@ struct guc_mmio_regset {
u32 number_of_registers;
} __packed;
/* MMIO registers that are set as non privileged */
struct mmio_white_list {
u32 mmio_start;
u32 offsets[GUC_MMIO_WHITE_LIST_MAX];
u32 count;
} __packed;
struct guc_mmio_reg_state {
struct guc_mmio_regset global_reg;
struct guc_mmio_regset engine_reg[GUC_MAX_ENGINES_NUM];
/* MMIO registers that are set as non privileged */
struct __packed {
u32 mmio_start;
u32 offsets[GUC_MMIO_WHITE_LIST_MAX];
u32 count;
} mmio_white_list[GUC_MAX_ENGINES_NUM];
struct mmio_white_list white_list[GUC_MAX_ENGINES_NUM];
} __packed;
/* GuC Additional Data Struct */
......
......@@ -73,22 +73,6 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
/* User-friendly representation of an enum */
const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
{
switch (status) {
case INTEL_UC_FIRMWARE_FAIL:
return "FAIL";
case INTEL_UC_FIRMWARE_NONE:
return "NONE";
case INTEL_UC_FIRMWARE_PENDING:
return "PENDING";
case INTEL_UC_FIRMWARE_SUCCESS:
return "SUCCESS";
default:
return "UNKNOWN!";
}
};
static u32 get_gttype(struct drm_i915_private *dev_priv)
{
......@@ -148,16 +132,14 @@ static void guc_params_init(struct drm_i915_private *dev_priv)
} else
params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED;
if (guc->ads_vma) {
/* If GuC submission is enabled, set up additional parameters here */
if (i915.enable_guc_submission) {
u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool);
u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16;
params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
}
/* If GuC submission is enabled, set up additional parameters here */
if (i915.enable_guc_submission) {
u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool_vma);
u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
pgs >>= PAGE_SHIFT;
params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
......@@ -430,24 +412,3 @@ int intel_guc_select_fw(struct intel_guc *guc)
return 0;
}
/**
* intel_guc_fini() - clean up all allocated resources
* @dev_priv: i915 device private
*/
void intel_guc_fini(struct drm_i915_private *dev_priv)
{
struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
struct drm_i915_gem_object *obj;
mutex_lock(&dev_priv->drm.struct_mutex);
i915_guc_submission_disable(dev_priv);
i915_guc_submission_fini(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
obj = fetch_and_zero(&guc_fw->obj);
if (obj)
i915_gem_object_put(obj);
guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
}
......@@ -45,6 +45,8 @@ static bool is_supported_device(struct drm_i915_private *dev_priv)
return true;
if (IS_SKYLAKE(dev_priv))
return true;
if (IS_KABYLAKE(dev_priv) && INTEL_DEVID(dev_priv) == 0x591D)
return true;
return false;
}
......
......@@ -34,6 +34,7 @@
#include <drm/drm_atomic_helper.h>
#include <drm/drm_crtc.h>
#include <drm/drm_edid.h>
#include <drm/drm_scdc_helper.h>
#include "intel_drv.h"
#include <drm/i915_drm.h>
#include <drm/intel_lpe_audio.h>
......@@ -1208,6 +1209,8 @@ static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv)
{
if (IS_G4X(dev_priv))
return 165000;
else if (IS_GEMINILAKE(dev_priv))
return 594000;
else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
return 300000;
else
......@@ -1334,6 +1337,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
struct drm_scdc *scdc = &conn_state->connector->display_info.hdmi.scdc;
int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock;
int clock_12bpc = clock_8bpc * 3 / 2;
int desired_bpp;
......@@ -1403,6 +1407,16 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
pipe_config->lane_count = 4;
if (scdc->scrambling.supported && IS_GEMINILAKE(dev_priv)) {
if (scdc->scrambling.low_rates)
pipe_config->hdmi_scrambling = true;
if (pipe_config->port_clock > 340000) {
pipe_config->hdmi_scrambling = true;
pipe_config->hdmi_high_tmds_clock_ratio = true;
}
}
return true;
}
......@@ -1812,6 +1826,57 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
}
/*
* intel_hdmi_handle_sink_scrambling: handle sink scrambling/clock ratio setup
* @encoder: intel_encoder
* @connector: drm_connector
* @high_tmds_clock_ratio = bool to indicate if the function needs to set
* or reset the high tmds clock ratio for scrambling
* @scrambling: bool to Indicate if the function needs to set or reset
* sink scrambling
*
* This function handles scrambling on HDMI 2.0 capable sinks.
* If required clock rate is > 340 Mhz && scrambling is supported by sink
* it enables scrambling. This should be called before enabling the HDMI
* 2.0 port, as the sink can choose to disable the scrambling if it doesn't
* detect a scrambled clock within 100 ms.
*/
void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
struct drm_connector *connector,
bool high_tmds_clock_ratio,
bool scrambling)
{
struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
struct drm_i915_private *dev_priv = connector->dev->dev_private;
struct drm_scrambling *sink_scrambling =
&connector->display_info.hdmi.scdc.scrambling;
struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv,
intel_hdmi->ddc_bus);
bool ret;
if (!sink_scrambling->supported)
return;
DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n",
encoder->base.name, connector->name);
/* Set TMDS bit clock ratio to 1/40 or 1/10 */
ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio);
if (!ret) {
DRM_ERROR("Set TMDS ratio failed\n");
return;
}
/* Enable/disable sink scrambling */
ret = drm_scdc_set_scrambling(adptr, scrambling);
if (!ret) {
DRM_ERROR("Set sink scrambling failed\n");
return;
}
DRM_DEBUG_KMS("sink scrambling handled\n");
}
static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv,
enum port port)
{
......
......@@ -250,24 +250,6 @@ int intel_huc_init_hw(struct intel_huc *huc)
return err;
}
/**
* intel_huc_fini() - clean up resources allocated for HuC
* @dev_priv: the drm_i915_private device
*
* Cleans up by releasing the huc firmware GEM obj.
*/
void intel_huc_fini(struct drm_i915_private *dev_priv)
{
struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
struct drm_i915_gem_object *obj;
obj = fetch_and_zero(&huc_fw->obj);
if (obj)
i915_gem_object_put(obj);
huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
}
/**
* intel_guc_auth_huc() - authenticate ucode
* @dev_priv: the drm_i915_device
......
......@@ -331,6 +331,7 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
* audio driver and i915
* @dev_priv: the i915 drm device private data
* @eld : ELD data
* @pipe: pipe id
* @port: port id
* @tmds_clk_speed: tmds clock frequency in Hz
*
......
......@@ -326,7 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
u32 *reg_state = ce->lrc_reg_state;
GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8));
assert_ring_tail_valid(rq->ring, rq->tail);
reg_state[CTX_RING_TAIL+1] = rq->tail;
/* True 32b PPGTT with dynamic page allocation: update PDP
......@@ -399,22 +399,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct drm_i915_gem_request *last;
struct execlist_port *port = engine->execlist_port;
unsigned long flags;
struct rb_node *rb;
bool submit = false;
/* After execlist_first is updated, the tasklet will be rescheduled.
*
* If we are currently running (inside the tasklet) and a third
* party queues a request and so updates engine->execlist_first under
* the spinlock (which we have elided), it will atomically set the
* TASKLET_SCHED flag causing the us to be re-executed and pick up
* the change in state (the update to TASKLET_SCHED incurs a memory
* barrier making this cross-cpu checking safe).
*/
if (!READ_ONCE(engine->execlist_first))
return;
last = port->request;
if (last)
/* WaIdleLiteRestore:bdw,skl
......@@ -448,7 +435,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
* and context switches) submission.
*/
spin_lock_irqsave(&engine->timeline->lock, flags);
spin_lock_irq(&engine->timeline->lock);
rb = engine->execlist_first;
while (rb) {
struct drm_i915_gem_request *cursor =
......@@ -500,7 +487,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
i915_gem_request_assign(&port->request, last);
engine->execlist_first = rb;
}
spin_unlock_irqrestore(&engine->timeline->lock, flags);
spin_unlock_irq(&engine->timeline->lock);
if (submit)
execlists_submit_ports(engine);
......@@ -530,24 +517,36 @@ static void intel_lrc_irq_handler(unsigned long data)
intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
* imposing the cost of a locked atomic transaction when submitting a
* new request (outside of the context-switch interrupt).
*/
while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
u32 __iomem *csb_mmio =
dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
u32 __iomem *buf =
dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
unsigned int csb, head, tail;
csb = readl(csb_mmio);
head = GEN8_CSB_READ_PTR(csb);
tail = GEN8_CSB_WRITE_PTR(csb);
if (head == tail)
break;
unsigned int head, tail;
/* The write will be ordered by the uncached read (itself
* a memory barrier), so we do not need another in the form
* of a locked instruction. The race between the interrupt
* handler and the split test/clear is harmless as we order
* our clear before the CSB read. If the interrupt arrived
* first between the test and the clear, we read the updated
* CSB and clear the bit. If the interrupt arrives as we read
* the CSB or later (i.e. after we had cleared the bit) the bit
* is set and we do a new loop.
*/
__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
head = readl(csb_mmio);
tail = GEN8_CSB_WRITE_PTR(head);
head = GEN8_CSB_READ_PTR(head);
while (head != tail) {
unsigned int status;
if (tail < head)
tail += GEN8_CSB_ENTRIES;
do {
unsigned int idx = ++head % GEN8_CSB_ENTRIES;
unsigned int status = readl(buf + 2 * idx);
if (++head == GEN8_CSB_ENTRIES)
head = 0;
/* We are flying near dragons again.
*
......@@ -566,11 +565,12 @@ static void intel_lrc_irq_handler(unsigned long data)
* status notifier.
*/
status = readl(buf + 2 * head);
if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
continue;
/* Check the context/desc id for this event matches */
GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) !=
GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
port[0].context_id);
GEM_BUG_ON(port[0].count == 0);
......@@ -588,10 +588,9 @@ static void intel_lrc_irq_handler(unsigned long data)
GEM_BUG_ON(port[0].count == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
} while (head < tail);
}
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
GEN8_CSB_WRITE_PTR(csb) << 8),
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
csb_mmio);
}
......@@ -647,15 +646,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
static struct intel_engine_cs *
pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
{
struct intel_engine_cs *engine;
struct intel_engine_cs *engine =
container_of(pt, struct drm_i915_gem_request, priotree)->engine;
GEM_BUG_ON(!locked);
engine = container_of(pt,
struct drm_i915_gem_request,
priotree)->engine;
if (engine != locked) {
if (locked)
spin_unlock_irq(&locked->timeline->lock);
spin_lock_irq(&engine->timeline->lock);
spin_unlock(&locked->timeline->lock);
spin_lock(&engine->timeline->lock);
}
return engine;
......@@ -663,7 +661,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
{
struct intel_engine_cs *engine = NULL;
struct intel_engine_cs *engine;
struct i915_dependency *dep, *p;
struct i915_dependency stack;
LIST_HEAD(dfs);
......@@ -697,26 +695,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
struct i915_priotree *pt = dep->signaler;
list_for_each_entry(p, &pt->signalers_list, signal_link)
/* Within an engine, there can be no cycle, but we may
* refer to the same dependency chain multiple times
* (redundant dependencies are not eliminated) and across
* engines.
*/
list_for_each_entry(p, &pt->signalers_list, signal_link) {
GEM_BUG_ON(p->signaler->priority < pt->priority);
if (prio > READ_ONCE(p->signaler->priority))
list_move_tail(&p->dfs_link, &dfs);
}
list_safe_reset_next(dep, p, dfs_link);
if (!RB_EMPTY_NODE(&pt->node))
continue;
engine = pt_lock_engine(pt, engine);
/* If it is not already in the rbtree, we can update the
* priority inplace and skip over it (and its dependencies)
* if it is referenced *again* as we descend the dfs.
*/
if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
pt->priority = prio;
list_del_init(&dep->dfs_link);
}
}
engine = request->engine;
spin_lock_irq(&engine->timeline->lock);
/* Fifo and depth-first replacement ensure our deps execute before us */
list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
struct i915_priotree *pt = dep->signaler;
......@@ -728,16 +723,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
if (prio <= pt->priority)
continue;
GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
pt->priority = prio;
rb_erase(&pt->node, &engine->execlist_queue);
if (insert_request(pt, &engine->execlist_queue))
engine->execlist_first = &pt->node;
if (!RB_EMPTY_NODE(&pt->node)) {
rb_erase(&pt->node, &engine->execlist_queue);
if (insert_request(pt, &engine->execlist_queue))
engine->execlist_first = &pt->node;
}
}
if (engine)
spin_unlock_irq(&engine->timeline->lock);
spin_unlock_irq(&engine->timeline->lock);
/* XXX Do we need to preempt to make room for us and our deps? */
}
......@@ -1255,7 +1249,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
request->ring->head = request->postfix;
request->ring->last_retired_head = -1;
intel_ring_update_space(request->ring);
/* Catch up with any missed context-switch interrupts */
......@@ -1268,8 +1261,10 @@ static void reset_common_ring(struct intel_engine_cs *engine,
GEM_BUG_ON(request->ctx != port[0].request->ctx);
/* Reset WaIdleLiteRestore:bdw,skl as well */
request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
request->tail =
intel_ring_wrap(request->ring,
request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
assert_ring_tail_valid(request->ring, request->tail);
}
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
......@@ -1480,7 +1475,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
request->tail = intel_ring_offset(request, cs);
GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs);
}
......@@ -1508,7 +1503,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
*cs++ = MI_USER_INTERRUPT;
*cs++ = MI_NOOP;
request->tail = intel_ring_offset(request, cs);
GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
assert_ring_tail_valid(request->ring, request->tail);
gen8_emit_wa_tail(request, cs);
}
......@@ -1575,6 +1570,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
{
engine->submit_request = execlists_submit_request;
engine->schedule = execlists_schedule;
engine->irq_tasklet.func = intel_lrc_irq_handler;
}
static void
......@@ -2041,7 +2037,6 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
i915_gem_object_unpin_map(ce->state->obj);
ce->ring->head = ce->ring->tail = 0;
ce->ring->last_retired_head = -1;
intel_ring_update_space(ce->ring);
}
}
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -235,7 +235,6 @@ static void hang_fini(struct hang *h)
i915_gem_object_put(h->hws);
i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
i915_gem_retire_requests(h->i915);
}
static int igt_hang_sanitycheck(void *arg)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册