Merge tag 'drm-intel-next-2017-05-29' of git://anongit.freedesktop.org/git/drm-intel into drm-next

More stuff for 4.13: - skl+ wm fixes from Mahesh Kumar - some refactor and tests for i915_sw_fence (Chris) - tune execlist/scheduler code (Chris) - g4x,g33 gpu reset improvements (Chris, Mika) - guc code cleanup (Michal Wajdeczko, Michał Winiarski) - dp aux backlight improvements (Puthikorn Voravootivat) - buffer based guc/host communication (Michal Wajdeczko) * tag 'drm-intel-next-2017-05-29' of git://anongit.freedesktop.org/git/drm-intel: (253 commits) drm/i915: Update DRIVER_DATE to 20170529 drm/i915: Keep the forcewake timer alive for 1ms past the most recent use drm/i915/guc: capture GuC logs if FW fails to load drm/i915/guc: Introduce buffer based cmd transport drm/i915/guc: Disable send function on fini drm: Add definition for eDP backlight frequency drm/i915: Drop AUX backlight enable check for backlight control drm/i915: Consolidate #ifdef CONFIG_INTEL_IOMMU drm/i915: Only GGTT vma may be pinned and prevent shrinking drm/i915: Serialize GTT/Aperture accesses on BXT drm/i915: Convert i915_gem_object_ops->flags values to use BIT() drm/i915/selftests: Silence compiler warning in igt_ctx_exec drm/i915/guc: Skip port assign on first iteration of GuC dequeue drm/i915: Remove misleading comment in request_alloc drm/i915/g33: Improve reset reliability Revert "drm/i915: Restore lost "Initialized i915" welcome message" drm/i915/huc: Update GLK HuC version drm/i915: Check for allocation failure drm/i915/guc: Remove action status and statistics from debugfs drm/i915/g4x: Improve gpu reset reliability ...

Merge tag 'drm-intel-next-2017-05-29' of git://anongit.freedesktop.org/git/drm-intel into drm-next
More stuff for 4.13: - skl+ wm fixes from Mahesh Kumar - some refactor and tests for i915_sw_fence (Chris) - tune execlist/scheduler code (Chris) - g4x,g33 gpu reset improvements (Chris, Mika) - guc code cleanup (Michal Wajdeczko, Michał Winiarski) - dp aux backlight improvements (Puthikorn Voravootivat) - buffer based guc/host communication (Michal Wajdeczko) * tag 'drm-intel-next-2017-05-29' of git://anongit.freedesktop.org/git/drm-intel: (253 commits) drm/i915: Update DRIVER_DATE to 20170529 drm/i915: Keep the forcewake timer alive for 1ms past the most recent use drm/i915/guc: capture GuC logs if FW fails to load drm/i915/guc: Introduce buffer based cmd transport drm/i915/guc: Disable send function on fini drm: Add definition for eDP backlight frequency drm/i915: Drop AUX backlight enable check for backlight control drm/i915: Consolidate #ifdef CONFIG_INTEL_IOMMU drm/i915: Only GGTT vma may be pinned and prevent shrinking drm/i915: Serialize GTT/Aperture accesses on BXT drm/i915: Convert i915_gem_object_ops->flags values to use BIT() drm/i915/selftests: Silence compiler warning in igt_ctx_exec drm/i915/guc: Skip port assign on first iteration of GuC dequeue drm/i915: Remove misleading comment in request_alloc drm/i915/g33: Improve reset reliability Revert "drm/i915: Restore lost "Initialized i915" welcome message" drm/i915/huc: Update GLK HuC version drm/i915: Check for allocation failure drm/i915/guc: Remove action status and statistics from debugfs drm/i915/g4x: Improve gpu reset reliability ...
a82256bc · Dave Airlie · 1afc4544 · cd9f4688 · a82256bc · a82256bc
98 changed file
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -61,6 +61,18 @@ config DRM_I915_SW_FENCE_DEBUG_OBJECTS

          If in doubt, say "N".

+config DRM_I915_SW_FENCE_CHECK_DAG
+        bool "Enable additional driver debugging for detecting dependency cycles"
+        depends on DRM_I915
+        default n
+        help
+          Choose this option to turn on extra driver debugging that may affect
+          performance but will catch some internal issues.
+
+          Recommended for driver developers only.
+
+          If in doubt, say "N".
+
 config DRM_I915_SELFTEST
 	bool "Enable selftests upon driver load"
 	depends on DRM_I915

--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -16,6 +16,7 @@ i915-y := i915_drv.o \
 	  i915_params.o \
 	  i915_pci.o \
          i915_suspend.o \
+	  i915_syncmap.o \
 	  i915_sw_fence.o \
 	  i915_sysfs.o \
 	  intel_csr.o \
@@ -57,6 +58,7 @@ i915-y += i915_cmd_parser.o \

 # general-purpose microcontroller (GuC) support
 i915-y += intel_uc.o \
+	  intel_guc_ct.o \
 	  intel_guc_log.o \
 	  intel_guc_loader.o \
 	  intel_huc.o \

--- a/drivers/gpu/drm/i915/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/dvo_ch7017.c
@@ -280,10 +280,10 @@ static void ch7017_mode_set(struct intel_dvo_device *dvo,
 			(0 << CH7017_PHASE_DETECTOR_SHIFT);
 	} else {
 		outputs_enable = CH7017_LVDS_CHANNEL_A | CH7017_CHARGE_PUMP_HIGH;
-		lvds_pll_feedback_div = CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
+		lvds_pll_feedback_div =
+			CH7017_LVDS_PLL_FEEDBACK_DEFAULT_RESERVED |
 			(2 << CH7017_LVDS_PLL_FEED_BACK_DIVIDER_SHIFT) |
 			(3 << CH7017_LVDS_PLL_FEED_FORWARD_DIVIDER_SHIFT);
-		lvds_pll_feedback_div = 35;
 		lvds_control_2 = (3 << CH7017_LOOP_FILTER_SHIFT) |
 			(0 << CH7017_PHASE_DETECTOR_SHIFT);
 		if (1) { /* XXX: dual channel panel detection.  Assume yes for now. */

--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -69,8 +69,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
 			workload->ctx_desc.lrca);

-	context_page_num = intel_lr_context_size(
-			gvt->dev_priv->engine[ring_id]);
+	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;

 	context_page_num = context_page_num >> PAGE_SHIFT;

@@ -181,6 +180,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
 	struct drm_i915_gem_request *rq;
 	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_ring *ring;
 	int ret;

 	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
@@ -199,8 +199,9 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
 	 * the guest context, gvt can unpin the shadow_ctx safely.
 	 */
-	ret = engine->context_pin(engine, shadow_ctx);
-	if (ret) {
+	ring = engine->context_pin(engine, shadow_ctx);
+	if (IS_ERR(ring)) {
+		ret = PTR_ERR(ring);
 		gvt_vgpu_err("fail to pin shadow context\n");
 		workload->status = ret;
 		mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -330,8 +331,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload)
 	gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
 			workload->ctx_desc.lrca);

-	context_page_num = intel_lr_context_size(
-			gvt->dev_priv->engine[ring_id]);
+	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;

 	context_page_num = context_page_num >> PAGE_SHIFT;


--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1166,8 +1166,8 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				find_reg(engine, is_master, reg_addr);

 			if (!reg) {
-				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
-						 reg_addr, *cmd, engine->exec_id);
+				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n",
+						 reg_addr, *cmd, engine->name);
 				return false;
 			}

@@ -1222,11 +1222,11 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 				desc->bits[i].mask;

 			if (dword != desc->bits[i].expected) {
-				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (exec_id=%d)\n",
+				DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X for bitmask 0x%08X (exp=0x%08X act=0x%08X) (%s)\n",
 						 *cmd,
 						 desc->bits[i].mask,
 						 desc->bits[i].expected,
-						 dword, engine->exec_id);
+						 dword, engine->name);
 				return false;
 			}
 		}
@@ -1284,7 +1284,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,

 		if (*cmd == MI_BATCH_BUFFER_END) {
 			if (needs_clflush_after) {
-				void *ptr = ptr_mask_bits(shadow_batch_obj->mm.mapping);
+				void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
 				drm_clflush_virt_range(ptr,
 						       (void *)(cmd + 1) - ptr);
 			}

--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2482,8 +2482,6 @@ static void i915_guc_client_info(struct seq_file *m,
 		client->wq_size, client->wq_offset, client->wq_tail);

 	seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space);
-	seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail);
-	seq_printf(m, "\tLast submission result: %d\n", client->retcode);

 	for_each_engine(engine, dev_priv, id) {
 		u64 submissions = client->submissions[id];
@@ -2494,42 +2492,34 @@ static void i915_guc_client_info(struct seq_file *m,
 	seq_printf(m, "\tTotal: %llu\n", tot);
 }

-static int i915_guc_info(struct seq_file *m, void *data)
+static bool check_guc_submission(struct seq_file *m)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const struct intel_guc *guc = &dev_priv->guc;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	u64 total;

 	if (!guc->execbuf_client) {
 		seq_printf(m, "GuC submission %s\n",
 			   HAS_GUC_SCHED(dev_priv) ?
 			   "disabled" :
 			   "not supported");
-		return 0;
+		return false;
 	}

+	return true;
+}
+
+static int i915_guc_info(struct seq_file *m, void *data)
+{
+	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	const struct intel_guc *guc = &dev_priv->guc;
+
+	if (!check_guc_submission(m))
+		return 0;
+
 	seq_printf(m, "Doorbell map:\n");
 	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
 	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);

-	seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
-	seq_printf(m, "GuC action failure count: %u\n", guc->action_fail);
-	seq_printf(m, "GuC last action command: 0x%x\n", guc->action_cmd);
-	seq_printf(m, "GuC last action status: 0x%x\n", guc->action_status);
-	seq_printf(m, "GuC last action error code: %d\n", guc->action_err);
-
-	total = 0;
-	seq_printf(m, "\nGuC submissions:\n");
-	for_each_engine(engine, dev_priv, id) {
-		u64 submissions = guc->submissions[id];
-		total += submissions;
-		seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n",
-			engine->name, submissions, guc->last_seqno[id]);
-	}
-	seq_printf(m, "\t%s: %llu\n", "Total", total);
-
 	seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client);
 	i915_guc_client_info(m, dev_priv, guc->execbuf_client);

@@ -2540,36 +2530,99 @@ static int i915_guc_info(struct seq_file *m, void *data)
 	return 0;
 }

-static int i915_guc_log_dump(struct seq_file *m, void *data)
+static int i915_guc_stage_pool(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct drm_i915_gem_object *obj;
-	int i = 0, pg;
+	const struct intel_guc *guc = &dev_priv->guc;
+	struct guc_stage_desc *desc = guc->stage_desc_pool_vaddr;
+	struct i915_guc_client *client = guc->execbuf_client;
+	unsigned int tmp;
+	int index;

-	if (!dev_priv->guc.log.vma)
+	if (!check_guc_submission(m))
 		return 0;

-	obj = dev_priv->guc.log.vma->obj;
-	for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
-		u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
+	for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) {
+		struct intel_engine_cs *engine;
+
+		if (!(desc->attribute & GUC_STAGE_DESC_ATTR_ACTIVE))
+			continue;
+
+		seq_printf(m, "GuC stage descriptor %u:\n", index);
+		seq_printf(m, "\tIndex: %u\n", desc->stage_id);
+		seq_printf(m, "\tAttribute: 0x%x\n", desc->attribute);
+		seq_printf(m, "\tPriority: %d\n", desc->priority);
+		seq_printf(m, "\tDoorbell id: %d\n", desc->db_id);
+		seq_printf(m, "\tEngines used: 0x%x\n",
+			   desc->engines_used);
+		seq_printf(m, "\tDoorbell trigger phy: 0x%llx, cpu: 0x%llx, uK: 0x%x\n",
+			   desc->db_trigger_phy,
+			   desc->db_trigger_cpu,
+			   desc->db_trigger_uk);
+		seq_printf(m, "\tProcess descriptor: 0x%x\n",
+			   desc->process_desc);
+		seq_printf(m, "\tWorkqueue address: 0x%x, size: 0x%x\n",
+			   desc->wq_addr, desc->wq_size);
+		seq_putc(m, '\n');
+
+		for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
+			u32 guc_engine_id = engine->guc_id;
+			struct guc_execlist_context *lrc =
+						&desc->lrc[guc_engine_id];
+
+			seq_printf(m, "\t%s LRC:\n", engine->name);
+			seq_printf(m, "\t\tContext desc: 0x%x\n",
+				   lrc->context_desc);
+			seq_printf(m, "\t\tContext id: 0x%x\n", lrc->context_id);
+			seq_printf(m, "\t\tLRCA: 0x%x\n", lrc->ring_lrca);
+			seq_printf(m, "\t\tRing begin: 0x%x\n", lrc->ring_begin);
+			seq_printf(m, "\t\tRing end: 0x%x\n", lrc->ring_end);
+			seq_putc(m, '\n');
+		}
+	}
+
+	return 0;
+}
+
+static int i915_guc_log_dump(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct drm_i915_private *dev_priv = node_to_i915(node);
+	bool dump_load_err = !!node->info_ent->data;
+	struct drm_i915_gem_object *obj = NULL;
+	u32 *log;
+	int i = 0;
+
+	if (dump_load_err)
+		obj = dev_priv->guc.load_err_log;
+	else if (dev_priv->guc.log.vma)
+		obj = dev_priv->guc.log.vma->obj;

-		for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
-			seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-				   *(log + i), *(log + i + 1),
-				   *(log + i + 2), *(log + i + 3));
+	if (!obj)
+		return 0;

-		kunmap_atomic(log);
+	log = i915_gem_object_pin_map(obj, I915_MAP_WC);
+	if (IS_ERR(log)) {
+		DRM_DEBUG("Failed to pin object\n");
+		seq_puts(m, "(log data unaccessible)\n");
+		return PTR_ERR(log);
 	}

+	for (i = 0; i < obj->base.size / sizeof(u32); i += 4)
+		seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+			   *(log + i), *(log + i + 1),
+			   *(log + i + 2), *(log + i + 3));
+
 	seq_putc(m, '\n');

+	i915_gem_object_unpin_map(obj);
+
 	return 0;
 }

 static int i915_guc_log_control_get(void *data, u64 *val)
 {
-	struct drm_device *dev = data;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = data;

 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
@@ -2581,14 +2634,13 @@ static int i915_guc_log_control_get(void *data, u64 *val)

 static int i915_guc_log_control_set(void *data, u64 val)
 {
-	struct drm_device *dev = data;
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = data;
 	int ret;

 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;

-	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex);
 	if (ret)
 		return ret;

@@ -2596,7 +2648,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
 	ret = i915_guc_log_control(dev_priv, val);
 	intel_runtime_pm_put(dev_priv);

-	mutex_unlock(&dev->struct_mutex);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }

@@ -2855,7 +2907,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 	seq_printf(m, "version: %d.%d\n", CSR_VERSION_MAJOR(csr->version),
 		   CSR_VERSION_MINOR(csr->version));

-	if (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6)) {
+	if (IS_KABYLAKE(dev_priv) ||
+	    (IS_SKYLAKE(dev_priv) && csr->version >= CSR_VERSION(1, 6))) {
 		seq_printf(m, "DC3 -> DC5 count: %d\n",
 			   I915_READ(SKL_CSR_DC3_DC5_COUNT));
 		seq_printf(m, "DC5 -> DC6 count: %d\n",
@@ -3043,36 +3096,6 @@ static void intel_connector_info(struct seq_file *m,
 		intel_seq_print_mode(m, 2, mode);
 }

-static bool cursor_active(struct drm_i915_private *dev_priv, int pipe)
-{
-	u32 state;
-
-	if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
-		state = I915_READ(CURCNTR(PIPE_A)) & CURSOR_ENABLE;
-	else
-		state = I915_READ(CURCNTR(pipe)) & CURSOR_MODE;
-
-	return state;
-}
-
-static bool cursor_position(struct drm_i915_private *dev_priv,
-			    int pipe, int *x, int *y)
-{
-	u32 pos;
-
-	pos = I915_READ(CURPOS(pipe));
-
-	*x = (pos >> CURSOR_X_SHIFT) & CURSOR_POS_MASK;
-	if (pos & (CURSOR_POS_SIGN << CURSOR_X_SHIFT))
-		*x = -*x;
-
-	*y = (pos >> CURSOR_Y_SHIFT) & CURSOR_POS_MASK;
-	if (pos & (CURSOR_POS_SIGN << CURSOR_Y_SHIFT))
-		*y = -*y;
-
-	return cursor_active(dev_priv, pipe);
-}
-
 static const char *plane_type(enum drm_plane_type type)
 {
 	switch (type) {
@@ -3194,9 +3217,7 @@ static int i915_display_info(struct seq_file *m, void *unused)
 	seq_printf(m, "CRTC info\n");
 	seq_printf(m, "---------\n");
 	for_each_intel_crtc(dev, crtc) {
-		bool active;
 		struct intel_crtc_state *pipe_config;
-		int x, y;

 		drm_modeset_lock(&crtc->base.mutex, NULL);
 		pipe_config = to_intel_crtc_state(crtc->base.state);
@@ -3208,14 +3229,18 @@ static int i915_display_info(struct seq_file *m, void *unused)
 			   yesno(pipe_config->dither), pipe_config->pipe_bpp);

 		if (pipe_config->base.active) {
+			struct intel_plane *cursor =
+				to_intel_plane(crtc->base.cursor);
+
 			intel_crtc_info(m, crtc);

-			active = cursor_position(dev_priv, crtc->pipe, &x, &y);
-			seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x, active? %s\n",
-				   yesno(crtc->cursor_base),
-				   x, y, crtc->base.cursor->state->crtc_w,
-				   crtc->base.cursor->state->crtc_h,
-				   crtc->cursor_addr, yesno(active));
+			seq_printf(m, "\tcursor visible? %s, position (%d, %d), size %dx%d, addr 0x%08x\n",
+				   yesno(cursor->base.state->visible),
+				   cursor->base.state->crtc_x,
+				   cursor->base.state->crtc_y,
+				   cursor->base.state->crtc_w,
+				   cursor->base.state->crtc_h,
+				   cursor->cursor.base);
 			intel_scaler_info(m, crtc);
 			intel_plane_info(m, crtc);
 		}
@@ -3316,7 +3341,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)

 		if (i915.enable_execlists) {
 			u32 ptr, read, write;
-			struct rb_node *rb;
+			unsigned int idx;

 			seq_printf(m, "\tExeclist status: 0x%08x %08x\n",
 				   I915_READ(RING_EXECLIST_STATUS_LO(engine)),
@@ -3334,8 +3359,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			if (read > write)
 				write += GEN8_CSB_ENTRIES;
 			while (read < write) {
-				unsigned int idx = ++read % GEN8_CSB_ENTRIES;
-
+				idx = ++read % GEN8_CSB_ENTRIES;
 				seq_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
 					   idx,
 					   I915_READ(RING_CONTEXT_STATUS_BUF_LO(engine, idx)),
@@ -3343,28 +3367,30 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 			}

 			rcu_read_lock();
-			rq = READ_ONCE(engine->execlist_port[0].request);
-			if (rq) {
-				seq_printf(m, "\t\tELSP[0] count=%d, ",
-					   engine->execlist_port[0].count);
-				print_request(m, rq, "rq: ");
-			} else {
-				seq_printf(m, "\t\tELSP[0] idle\n");
-			}
-			rq = READ_ONCE(engine->execlist_port[1].request);
-			if (rq) {
-				seq_printf(m, "\t\tELSP[1] count=%d, ",
-					   engine->execlist_port[1].count);
-				print_request(m, rq, "rq: ");
-			} else {
-				seq_printf(m, "\t\tELSP[1] idle\n");
+			for (idx = 0; idx < ARRAY_SIZE(engine->execlist_port); idx++) {
+				unsigned int count;
+
+				rq = port_unpack(&engine->execlist_port[idx],
+						 &count);
+				if (rq) {
+					seq_printf(m, "\t\tELSP[%d] count=%d, ",
+						   idx, count);
+					print_request(m, rq, "rq: ");
+				} else {
+					seq_printf(m, "\t\tELSP[%d] idle\n",
+						   idx);
+				}
 			}
 			rcu_read_unlock();

 			spin_lock_irq(&engine->timeline->lock);
-			for (rb = engine->execlist_first; rb; rb = rb_next(rb)) {
-				rq = rb_entry(rb, typeof(*rq), priotree.node);
-				print_request(m, rq, "\t\tQ ");
+			for (rb = engine->execlist_first; rb; rb = rb_next(rb)){
+				struct i915_priolist *p =
+					rb_entry(rb, typeof(*p), node);
+
+				list_for_each_entry(rq, &p->requests,
+						    priotree.link)
+					print_request(m, rq, "\t\tQ ");
 			}
 			spin_unlock_irq(&engine->timeline->lock);
 		} else if (INTEL_GEN(dev_priv) > 6) {
@@ -3704,16 +3730,10 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 	if (len == 0)
 		return 0;

-	input_buffer = kmalloc(len + 1, GFP_KERNEL);
-	if (!input_buffer)
-		return -ENOMEM;
+	input_buffer = memdup_user_nul(ubuf, len);
+	if (IS_ERR(input_buffer))
+		return PTR_ERR(input_buffer);

-	if (copy_from_user(input_buffer, ubuf, len)) {
-		status = -EFAULT;
-		goto out;
-	}
-
-	input_buffer[len] = '\0';
 	DRM_DEBUG_DRIVER("Copied %d bytes from user\n", (unsigned int)len);

 	drm_connector_list_iter_begin(dev, &conn_iter);
@@ -3739,7 +3759,6 @@ static ssize_t i915_displayport_test_active_write(struct file *file,
 		}
 	}
 	drm_connector_list_iter_end(&conn_iter);
-out:
 	kfree(input_buffer);
 	if (status < 0)
 		return status;
@@ -3900,6 +3919,8 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
 		num_levels = 3;
 	else if (IS_VALLEYVIEW(dev_priv))
 		num_levels = 1;
+	else if (IS_G4X(dev_priv))
+		num_levels = 3;
 	else
 		num_levels = ilk_wm_max_level(dev_priv) + 1;

@@ -3912,8 +3933,10 @@ static void wm_latency_show(struct seq_file *m, const uint16_t wm[8])
 		 * - WM1+ latency values in 0.5us units
 		 * - latencies are in us on gen9/vlv/chv
 		 */
-		if (INTEL_GEN(dev_priv) >= 9 || IS_VALLEYVIEW(dev_priv) ||
-		    IS_CHERRYVIEW(dev_priv))
+		if (INTEL_GEN(dev_priv) >= 9 ||
+		    IS_VALLEYVIEW(dev_priv) ||
+		    IS_CHERRYVIEW(dev_priv) ||
+		    IS_G4X(dev_priv))
 			latency *= 10;
 		else if (level > 0)
 			latency *= 5;
@@ -3974,7 +3997,7 @@ static int pri_wm_latency_open(struct inode *inode, struct file *file)
 {
 	struct drm_i915_private *dev_priv = inode->i_private;

-	if (INTEL_GEN(dev_priv) < 5)
+	if (INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv))
 		return -ENODEV;

 	return single_open(file, pri_wm_latency_show, dev_priv);
@@ -4016,6 +4039,8 @@ static ssize_t wm_latency_write(struct file *file, const char __user *ubuf,
 		num_levels = 3;
 	else if (IS_VALLEYVIEW(dev_priv))
 		num_levels = 1;
+	else if (IS_G4X(dev_priv))
+		num_levels = 3;
 	else
 		num_levels = ilk_wm_max_level(dev_priv) + 1;

@@ -4776,6 +4801,8 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_guc_info", i915_guc_info, 0},
 	{"i915_guc_load_status", i915_guc_load_status_info, 0},
 	{"i915_guc_log_dump", i915_guc_log_dump, 0},
+	{"i915_guc_load_err_log_dump", i915_guc_log_dump, 0, (void *)1},
+	{"i915_guc_stage_pool", i915_guc_stage_pool, 0},
 	{"i915_huc_load_status", i915_huc_load_status_info, 0},
 	{"i915_frequency_info", i915_frequency_info, 0},
 	{"i915_hangcheck_info", i915_hangcheck_info, 0},

--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
 	case I915_PARAM_HAS_EXEC_FENCE:
+	case I915_PARAM_HAS_EXEC_CAPTURE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -834,10 +835,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_uc_init_early(dev_priv);
 	i915_memcpy_init_early(dev_priv);

-	ret = intel_engines_init_early(dev_priv);
-	if (ret)
-		return ret;
-
 	ret = i915_workqueues_init(dev_priv);
 	if (ret < 0)
 		goto err_engines;
@@ -855,7 +852,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 	intel_init_audio_hooks(dev_priv);
 	ret = i915_gem_load_init(dev_priv);
 	if (ret < 0)
-		goto err_workqueues;
+		goto err_irq;

 	intel_display_crc_init(dev_priv);

@@ -867,7 +864,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,

 	return 0;

-err_workqueues:
+err_irq:
+	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 err_engines:
 	i915_engines_cleanup(dev_priv);
@@ -882,6 +880,7 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 {
 	i915_perf_fini(dev_priv);
 	i915_gem_load_cleanup(dev_priv);
+	intel_irq_fini(dev_priv);
 	i915_workqueues_cleanup(dev_priv);
 	i915_engines_cleanup(dev_priv);
 }
@@ -947,14 +946,21 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv)

 	ret = i915_mmio_setup(dev_priv);
 	if (ret < 0)
-		goto put_bridge;
+		goto err_bridge;

 	intel_uncore_init(dev_priv);
+
+	ret = intel_engines_init_mmio(dev_priv);
+	if (ret)
+		goto err_uncore;
+
 	i915_gem_init_mmio(dev_priv);

 	return 0;

-put_bridge:
+err_uncore:
+	intel_uncore_fini(dev_priv);
+err_bridge:
 	pci_dev_put(dev_priv->bridge_dev);

 	return ret;
@@ -1213,9 +1219,8 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct drm_i915_private *dev_priv;
 	int ret;

-	/* Enable nuclear pageflip on ILK+, except vlv/chv */
-	if (!i915.nuclear_pageflip &&
-	    (match_info->gen < 5 || match_info->has_gmch_display))
+	/* Enable nuclear pageflip on ILK+ */
+	if (!i915.nuclear_pageflip && match_info->gen < 5)
 		driver.driver_features &= ~DRIVER_ATOMIC;

 	ret = -ENOMEM;
@@ -1272,10 +1277,6 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)

 	dev_priv->ipc_enabled = false;

-	/* Everything is in place, we can now relax! */
-	DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n",
-		 driver.name, driver.major, driver.minor, driver.patchlevel,
-		 driver.date, pci_name(pdev), dev_priv->drm.primary->index);
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
 		DRM_INFO("DRM_I915_DEBUG enabled\n");
 	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))

--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -55,6 +55,7 @@
 #include "i915_reg.h"
 #include "i915_utils.h"

+#include "intel_uncore.h"
 #include "intel_bios.h"
 #include "intel_dpll_mgr.h"
 #include "intel_uc.h"
@@ -79,8 +80,8 @@

 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20170403"
-#define DRIVER_TIMESTAMP	1491198738
+#define DRIVER_DATE		"20170529"
+#define DRIVER_TIMESTAMP	1496041258

 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
 * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -114,6 +115,13 @@ typedef struct {
 	fp; \
 })

+static inline bool is_fixed16_zero(uint_fixed_16_16_t val)
+{
+	if (val.val == 0)
+		return true;
+	return false;
+}
+
 static inline uint_fixed_16_16_t u32_to_fixed_16_16(uint32_t val)
 {
 	uint_fixed_16_16_t fp;
@@ -152,8 +160,39 @@ static inline uint_fixed_16_16_t max_fixed_16_16(uint_fixed_16_16_t max1,
 	return max;
 }

-static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val,
-							  uint32_t d)
+static inline uint32_t div_round_up_fixed16(uint_fixed_16_16_t val,
+					    uint_fixed_16_16_t d)
+{
+	return DIV_ROUND_UP(val.val, d.val);
+}
+
+static inline uint32_t mul_round_up_u32_fixed16(uint32_t val,
+						uint_fixed_16_16_t mul)
+{
+	uint64_t intermediate_val;
+	uint32_t result;
+
+	intermediate_val = (uint64_t) val * mul.val;
+	intermediate_val = DIV_ROUND_UP_ULL(intermediate_val, 1 << 16);
+	WARN_ON(intermediate_val >> 32);
+	result = clamp_t(uint32_t, intermediate_val, 0, ~0);
+	return result;
+}
+
+static inline uint_fixed_16_16_t mul_fixed16(uint_fixed_16_16_t val,
+					     uint_fixed_16_16_t mul)
+{
+	uint64_t intermediate_val;
+	uint_fixed_16_16_t fp;
+
+	intermediate_val = (uint64_t) val.val * mul.val;
+	intermediate_val = intermediate_val >> 16;
+	WARN_ON(intermediate_val >> 32);
+	fp.val = clamp_t(uint32_t, intermediate_val, 0, ~0);
+	return fp;
+}
+
+static inline uint_fixed_16_16_t fixed_16_16_div(uint32_t val, uint32_t d)
 {
 	uint_fixed_16_16_t fp, res;

@@ -162,8 +201,7 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up(uint32_t val,
 	return res;
 }

-static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val,
-							      uint32_t d)
+static inline uint_fixed_16_16_t fixed_16_16_div_u64(uint32_t val, uint32_t d)
 {
 	uint_fixed_16_16_t res;
 	uint64_t interm_val;
@@ -176,6 +214,17 @@ static inline uint_fixed_16_16_t fixed_16_16_div_round_up_u64(uint32_t val,
 	return res;
 }

+static inline uint32_t div_round_up_u32_fixed16(uint32_t val,
+						uint_fixed_16_16_t d)
+{
+	uint64_t interm_val;
+
+	interm_val = (uint64_t)val << 16;
+	interm_val = DIV_ROUND_UP_ULL(interm_val, d.val);
+	WARN_ON(interm_val >> 32);
+	return clamp_t(uint32_t, interm_val, 0, ~0);
+}
+
 static inline uint_fixed_16_16_t mul_u32_fixed_16_16(uint32_t val,
 						     uint_fixed_16_16_t mul)
 {
@@ -676,116 +725,6 @@ struct drm_i915_display_funcs {
 	void (*load_luts)(struct drm_crtc_state *crtc_state);
 };

-enum forcewake_domain_id {
-	FW_DOMAIN_ID_RENDER = 0,
-	FW_DOMAIN_ID_BLITTER,
-	FW_DOMAIN_ID_MEDIA,
-
-	FW_DOMAIN_ID_COUNT
-};
-
-enum forcewake_domains {
-	FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
-	FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
-	FORCEWAKE_MEDIA	= BIT(FW_DOMAIN_ID_MEDIA),
-	FORCEWAKE_ALL = (FORCEWAKE_RENDER |
-			 FORCEWAKE_BLITTER |
-			 FORCEWAKE_MEDIA)
-};
-
-#define FW_REG_READ  (1)
-#define FW_REG_WRITE (2)
-
-enum decoupled_power_domain {
-	GEN9_DECOUPLED_PD_BLITTER = 0,
-	GEN9_DECOUPLED_PD_RENDER,
-	GEN9_DECOUPLED_PD_MEDIA,
-	GEN9_DECOUPLED_PD_ALL
-};
-
-enum decoupled_ops {
-	GEN9_DECOUPLED_OP_WRITE = 0,
-	GEN9_DECOUPLED_OP_READ
-};
-
-enum forcewake_domains
-intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
-			       i915_reg_t reg, unsigned int op);
-
-struct intel_uncore_funcs {
-	void (*force_wake_get)(struct drm_i915_private *dev_priv,
-			       enum forcewake_domains domains);
-	void (*force_wake_put)(struct drm_i915_private *dev_priv,
-			       enum forcewake_domains domains);
-
-	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv,
-			       i915_reg_t r, bool trace);
-	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
-			       i915_reg_t r, bool trace);
-	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
-			       i915_reg_t r, bool trace);
-	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
-			       i915_reg_t r, bool trace);
-
-	void (*mmio_writeb)(struct drm_i915_private *dev_priv,
-			    i915_reg_t r, uint8_t val, bool trace);
-	void (*mmio_writew)(struct drm_i915_private *dev_priv,
-			    i915_reg_t r, uint16_t val, bool trace);
-	void (*mmio_writel)(struct drm_i915_private *dev_priv,
-			    i915_reg_t r, uint32_t val, bool trace);
-};
-
-struct intel_forcewake_range {
-	u32 start;
-	u32 end;
-
-	enum forcewake_domains domains;
-};
-
-struct intel_uncore {
-	spinlock_t lock; /** lock is also taken in irq contexts. */
-
-	const struct intel_forcewake_range *fw_domains_table;
-	unsigned int fw_domains_table_entries;
-
-	struct notifier_block pmic_bus_access_nb;
-	struct intel_uncore_funcs funcs;
-
-	unsigned fifo_count;
-
-	enum forcewake_domains fw_domains;
-	enum forcewake_domains fw_domains_active;
-
-	u32 fw_set;
-	u32 fw_clear;
-	u32 fw_reset;
-
-	struct intel_uncore_forcewake_domain {
-		enum forcewake_domain_id id;
-		enum forcewake_domains mask;
-		unsigned wake_count;
-		struct hrtimer timer;
-		i915_reg_t reg_set;
-		i915_reg_t reg_ack;
-	} fw_domain[FW_DOMAIN_ID_COUNT];
-
-	int unclaimed_mmio_check;
-};
-
-#define __mask_next_bit(mask) ({					\
-	int __idx = ffs(mask) - 1;					\
-	mask &= ~BIT(__idx);						\
-	__idx;								\
-})
-
-/* Iterate over initialised fw domains */
-#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
-	for (tmp__ = (mask__); \
-	     tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
-
-#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
-	for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
-
 #define CSR_VERSION(major, minor)	((major) << 16 | (minor))
 #define CSR_VERSION_MAJOR(version)	((version) >> 16)
 #define CSR_VERSION_MINOR(version)	((version) & 0xffff)
@@ -821,8 +760,8 @@ struct intel_csr {
 	func(has_gmbus_irq); \
 	func(has_gmch_display); \
 	func(has_guc); \
+	func(has_guc_ct); \
 	func(has_hotplug); \
-	func(has_hw_contexts); \
 	func(has_l3_dpf); \
 	func(has_llc); \
 	func(has_logical_ring_contexts); \
@@ -1025,6 +964,9 @@ struct i915_gpu_state {
 			u32 *pages[0];
 		} *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;

+		struct drm_i915_error_object **user_bo;
+		long user_bo_count;
+
 		struct drm_i915_error_object *wa_ctx;

 		struct drm_i915_error_request {
@@ -1511,11 +1453,7 @@ struct i915_gem_mm {
 	/** LRU list of objects with fence regs on them. */
 	struct list_head fence_list;

-	/**
-	 * Are we in a non-interruptible section of code like
-	 * modesetting?
-	 */
-	bool interruptible;
+	u64 unordered_timeline;

 	/* the indicator for dispatch video commands on two BSD rings */
 	atomic_t bsd_engine_dispatch_index;
@@ -1566,7 +1504,7 @@ struct i915_gpu_error {
 	 *
 	 * This is a counter which gets incremented when reset is triggered,
 	 *
-	 * Before the reset commences, the I915_RESET_IN_PROGRESS bit is set
+	 * Before the reset commences, the I915_RESET_BACKOFF bit is set
 	 * meaning that any waiters holding onto the struct_mutex should
 	 * relinquish the lock immediately in order for the reset to start.
 	 *
@@ -1763,13 +1701,15 @@ struct ilk_wm_values {
 	enum intel_ddb_partitioning partitioning;
 };

-struct vlv_pipe_wm {
+struct g4x_pipe_wm {
 	uint16_t plane[I915_MAX_PLANES];
+	uint16_t fbc;
 };

-struct vlv_sr_wm {
+struct g4x_sr_wm {
 	uint16_t plane;
 	uint16_t cursor;
+	uint16_t fbc;
 };

 struct vlv_wm_ddl_values {
@@ -1777,13 +1717,22 @@ struct vlv_wm_ddl_values {
 };

 struct vlv_wm_values {
-	struct vlv_pipe_wm pipe[3];
-	struct vlv_sr_wm sr;
+	struct g4x_pipe_wm pipe[3];
+	struct g4x_sr_wm sr;
 	struct vlv_wm_ddl_values ddl[3];
 	uint8_t level;
 	bool cxsr;
 };

+struct g4x_wm_values {
+	struct g4x_pipe_wm pipe[2];
+	struct g4x_sr_wm sr;
+	struct g4x_sr_wm hpll;
+	bool cxsr;
+	bool hpll_en;
+	bool fbc_en;
+};
+
 struct skl_ddb_entry {
 	uint16_t start, end;	/* in number of blocks, 'end' is exclusive */
 };
@@ -2100,7 +2049,7 @@ struct i915_oa_ops {
 		    size_t *offset);

 	/**
-	 * @oa_buffer_is_empty: Check if OA buffer empty (false positives OK)
+	 * @oa_buffer_check: Check for OA buffer data + update tail
 	 *
 	 * This is either called via fops or the poll check hrtimer (atomic
 	 * ctx) without any locks taken.
@@ -2113,7 +2062,7 @@ struct i915_oa_ops {
 	 * here, which will be handled gracefully - likely resulting in an
 	 * %EAGAIN error for userspace.
 	 */
-	bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
+	bool (*oa_buffer_check)(struct drm_i915_private *dev_priv);
 };

 struct intel_cdclk_state {
@@ -2127,6 +2076,7 @@ struct drm_i915_private {
 	struct kmem_cache *vmas;
 	struct kmem_cache *requests;
 	struct kmem_cache *dependencies;
+	struct kmem_cache *priorities;

 	const struct intel_device_info info;

@@ -2362,7 +2312,6 @@ struct drm_i915_private {
 	 */
 	struct mutex av_mutex;

-	uint32_t hw_context_size;
 	struct list_head context_list;

 	u32 fdi_rx_config;
@@ -2413,6 +2362,7 @@ struct drm_i915_private {
 			struct ilk_wm_values hw;
 			struct skl_wm_values skl_hw;
 			struct vlv_wm_values vlv;
+			struct g4x_wm_values g4x;
 		};

 		uint8_t max_level;
@@ -2454,11 +2404,14 @@ struct drm_i915_private {
 			wait_queue_head_t poll_wq;
 			bool pollin;

+			/**
+			 * For rate limiting any notifications of spurious
+			 * invalid OA reports
+			 */
+			struct ratelimit_state spurious_report_rs;
+
 			bool periodic;
 			int period_exponent;
-			int timestamp_frequency;
-
-			int tail_margin;

 			int metrics_set;

@@ -2472,6 +2425,70 @@ struct drm_i915_private {
 				u8 *vaddr;
 				int format;
 				int format_size;
+
+				/**
+				 * Locks reads and writes to all head/tail state
+				 *
+				 * Consider: the head and tail pointer state
+				 * needs to be read consistently from a hrtimer
+				 * callback (atomic context) and read() fop
+				 * (user context) with tail pointer updates
+				 * happening in atomic context and head updates
+				 * in user context and the (unlikely)
+				 * possibility of read() errors needing to
+				 * reset all head/tail state.
+				 *
+				 * Note: Contention or performance aren't
+				 * currently a significant concern here
+				 * considering the relatively low frequency of
+				 * hrtimer callbacks (5ms period) and that
+				 * reads typically only happen in response to a
+				 * hrtimer event and likely complete before the
+				 * next callback.
+				 *
+				 * Note: This lock is not held *while* reading
+				 * and copying data to userspace so the value
+				 * of head observed in htrimer callbacks won't
+				 * represent any partial consumption of data.
+				 */
+				spinlock_t ptr_lock;
+
+				/**
+				 * One 'aging' tail pointer and one 'aged'
+				 * tail pointer ready to used for reading.
+				 *
+				 * Initial values of 0xffffffff are invalid
+				 * and imply that an update is required
+				 * (and should be ignored by an attempted
+				 * read)
+				 */
+				struct {
+					u32 offset;
+				} tails[2];
+
+				/**
+				 * Index for the aged tail ready to read()
+				 * data up to.
+				 */
+				unsigned int aged_tail_idx;
+
+				/**
+				 * A monotonic timestamp for when the current
+				 * aging tail pointer was read; used to
+				 * determine when it is old enough to trust.
+				 */
+				u64 aging_timestamp;
+
+				/**
+				 * Although we can always read back the head
+				 * pointer register, we prefer to avoid
+				 * trusting the HW state, just to avoid any
+				 * risk that some hardware condition could
+				 * somehow bump the head pointer unpredictably
+				 * and cause us to forward the wrong OA buffer
+				 * data to userspace.
+				 */
+				u32 head;
 			} oa_buffer;

 			u32 gen7_latched_oastatus1;
@@ -2870,7 +2887,6 @@ intel_info(const struct drm_i915_private *dev_priv)

 #define HWS_NEEDS_PHYSICAL(dev_priv)	((dev_priv)->info.hws_needs_physical)

-#define HAS_HW_CONTEXTS(dev_priv)	    ((dev_priv)->info.has_hw_contexts)
 #define HAS_LOGICAL_RING_CONTEXTS(dev_priv) \
 		((dev_priv)->info.has_logical_ring_contexts)
 #define USES_PPGTT(dev_priv)		(i915.enable_ppgtt)
@@ -2909,6 +2925,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_FW_BLC(dev_priv) 	(INTEL_GEN(dev_priv) > 2)
 #define HAS_PIPE_CXSR(dev_priv) ((dev_priv)->info.has_pipe_cxsr)
 #define HAS_FBC(dev_priv)	((dev_priv)->info.has_fbc)
+#define HAS_CUR_FBC(dev_priv)	(!HAS_GMCH_DISPLAY(dev_priv) && INTEL_INFO(dev_priv)->gen >= 7)

 #define HAS_IPS(dev_priv)	(IS_HSW_ULT(dev_priv) || IS_BROADWELL(dev_priv))

@@ -2931,6 +2948,7 @@ intel_info(const struct drm_i915_private *dev_priv)
 * properties, so we have separate macros to test them.
 */
 #define HAS_GUC(dev_priv)	((dev_priv)->info.has_guc)
+#define HAS_GUC_CT(dev_priv)	((dev_priv)->info.has_guc_ct)
 #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_HUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
@@ -2981,15 +2999,26 @@ intel_info(const struct drm_i915_private *dev_priv)

 #include "i915_trace.h"

-static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
+static inline bool intel_vtd_active(void)
 {
 #ifdef CONFIG_INTEL_IOMMU
-	if (INTEL_GEN(dev_priv) >= 6 && intel_iommu_gfx_mapped)
+	if (intel_iommu_gfx_mapped)
 		return true;
 #endif
 	return false;
 }

+static inline bool intel_scanout_needs_vtd_wa(struct drm_i915_private *dev_priv)
+{
+	return INTEL_GEN(dev_priv) >= 6 && intel_vtd_active();
+}
+
+static inline bool
+intel_ggtt_update_needs_vtd_wa(struct drm_i915_private *dev_priv)
+{
+	return IS_BROXTON(dev_priv) && intel_vtd_active();
+}
+
 int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 				int enable_ppgtt);

@@ -3026,7 +3055,7 @@ extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv);
 extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);

-int intel_engines_init_early(struct drm_i915_private *dev_priv);
+int intel_engines_init_mmio(struct drm_i915_private *dev_priv);
 int intel_engines_init(struct drm_i915_private *dev_priv);

 /* intel_hotplug.c */
@@ -3063,43 +3092,10 @@ void i915_handle_error(struct drm_i915_private *dev_priv,
 		       const char *fmt, ...);

 extern void intel_irq_init(struct drm_i915_private *dev_priv);
+extern void intel_irq_fini(struct drm_i915_private *dev_priv);
 int intel_irq_install(struct drm_i915_private *dev_priv);
 void intel_irq_uninstall(struct drm_i915_private *dev_priv);

-extern void intel_uncore_sanitize(struct drm_i915_private *dev_priv);
-extern void intel_uncore_init(struct drm_i915_private *dev_priv);
-extern bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv);
-extern bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv);
-extern void intel_uncore_fini(struct drm_i915_private *dev_priv);
-extern void intel_uncore_suspend(struct drm_i915_private *dev_priv);
-extern void intel_uncore_resume_early(struct drm_i915_private *dev_priv);
-const char *intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id);
-void intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
-				enum forcewake_domains domains);
-void intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
-				enum forcewake_domains domains);
-/* Like above but the caller must manage the uncore.lock itself.
- * Must be used with I915_READ_FW and friends.
- */
-void intel_uncore_forcewake_get__locked(struct drm_i915_private *dev_priv,
-					enum forcewake_domains domains);
-void intel_uncore_forcewake_put__locked(struct drm_i915_private *dev_priv,
-					enum forcewake_domains domains);
-u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv);
-
-void assert_forcewakes_inactive(struct drm_i915_private *dev_priv);
-
-int intel_wait_for_register(struct drm_i915_private *dev_priv,
-			    i915_reg_t reg,
-			    const u32 mask,
-			    const u32 value,
-			    const unsigned long timeout_ms);
-int intel_wait_for_register_fw(struct drm_i915_private *dev_priv,
-			       i915_reg_t reg,
-			       const u32 mask,
-			       const u32 value,
-			       const unsigned long timeout_ms);
-
 static inline bool intel_gvt_active(struct drm_i915_private *dev_priv)
 {
 	return dev_priv->gvt;
@@ -3447,8 +3443,9 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
 #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX

 int __must_check
-i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
-				  bool write);
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
 int __must_check
 i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
 struct i915_vma * __must_check
@@ -3711,8 +3708,8 @@ int  intel_lpe_audio_init(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
 void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
-			    void *eld, int port, int pipe, int tmds_clk_speed,
-			    bool dp_output, int link_rate);
+			    enum pipe pipe, enum port port,
+			    const void *eld, int ls_clock, bool dp_output);

 /* intel_i2c.c */
 extern int intel_setup_gmbus(struct drm_i915_private *dev_priv);

--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,8 +46,6 @@
 #include <linux/dma-buf.h>

 static void i915_gem_flush_free_objects(struct drm_i915_private *i915);
-static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
-static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);

 static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
 {
@@ -705,6 +703,61 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
 			       args->size, &args->handle);
 }

+static inline enum fb_op_origin
+fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
+{
+	return (domain == I915_GEM_DOMAIN_GTT ?
+		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+
+	if (!(obj->base.write_domain & flush_domains))
+		return;
+
+	/* No actual flushing is required for the GTT write domain.  Writes
+	 * to it "immediately" go to main memory as far as we know, so there's
+	 * no chipset flush.  It also doesn't land in render cache.
+	 *
+	 * However, we do have to enforce the order so that all writes through
+	 * the GTT land before any writes to the device, such as updates to
+	 * the GATT itself.
+	 *
+	 * We also have to wait a bit for the writes to land from the GTT.
+	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
+	 * timing. This issue has only been observed when switching quickly
+	 * between GTT writes and CPU reads from inside the kernel on recent hw,
+	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
+	 * system agents we cannot reproduce this behaviour).
+	 */
+	wmb();
+
+	switch (obj->base.write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+			if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+				spin_lock_irq(&dev_priv->uncore.lock);
+				POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+				spin_unlock_irq(&dev_priv->uncore.lock);
+				intel_runtime_pm_put(dev_priv);
+			}
+		}
+
+		intel_fb_obj_flush(obj,
+				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+	}
+
+	obj->base.write_domain = 0;
+}
+
 static inline int
 __copy_to_user_swizzled(char __user *cpu_vaddr,
 			const char *gpu_vaddr, int gpu_offset,
@@ -794,7 +847,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
 			goto out;
 	}

-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);

 	/* If we're not in the cpu read domain, set ourself into the gtt
 	 * read domain and manually flush cachelines (if required). This
@@ -846,7 +899,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
 			goto out;
 	}

-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);

 	/* If we're not in the cpu write domain, set ourself into the
 	 * gtt write domain and manually flush cachelines (as required).
@@ -1501,13 +1554,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }

-static inline enum fb_op_origin
-write_origin(struct drm_i915_gem_object *obj, unsigned domain)
-{
-	return (domain == I915_GEM_DOMAIN_GTT ?
-		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
-}
-
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915;
@@ -1591,10 +1637,12 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	if (err)
 		goto out_unpin;

-	if (read_domains & I915_GEM_DOMAIN_GTT)
-		err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
 	else
-		err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);

 	/* And bump the LRU for this access */
 	i915_gem_object_bump_inactive_ggtt(obj);
@@ -1602,7 +1650,8 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&dev->struct_mutex);

 	if (write_domain != 0)
-		intel_fb_obj_invalidate(obj, write_origin(obj, write_domain));
+		intel_fb_obj_invalidate(obj,
+					fb_write_origin(obj, write_domain));

 out_unpin:
 	i915_gem_object_unpin_pages(obj);
@@ -1737,6 +1786,9 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
 *     into userspace. (This view is aligned and sized appropriately for
 *     fenced access.)
 *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
 * Restrictions:
 *
 *  * snoopable objects cannot be accessed via the GTT. It can cause machine
@@ -1764,7 +1816,7 @@ static unsigned int tile_row_pages(struct drm_i915_gem_object *obj)
 */
 int i915_gem_mmap_gtt_version(void)
 {
-	return 1;
+	return 2;
 }

 static inline struct i915_ggtt_view
@@ -2228,7 +2280,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj,
 	if (obj->mm.mapping) {
 		void *ptr;

-		ptr = ptr_mask_bits(obj->mm.mapping);
+		ptr = page_mask_bits(obj->mm.mapping);
 		if (is_vmalloc_addr(ptr))
 			vunmap(ptr);
 		else
@@ -2560,7 +2612,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 	}
 	GEM_BUG_ON(!obj->mm.pages);

-	ptr = ptr_unpack_bits(obj->mm.mapping, has_type);
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
 	if (ptr && has_type != type) {
 		if (pinned) {
 			ret = -EBUSY;
@@ -2582,7 +2634,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
 			goto err_unpin;
 		}

-		obj->mm.mapping = ptr_pack_bits(ptr, type);
+		obj->mm.mapping = page_pack_bits(ptr, type);
 	}

 out_unlock:
@@ -2967,12 +3019,14 @@ static void engine_set_wedged(struct intel_engine_cs *engine)
 	 */

 	if (i915.enable_execlists) {
+		struct execlist_port *port = engine->execlist_port;
 		unsigned long flags;
+		unsigned int n;

 		spin_lock_irqsave(&engine->timeline->lock, flags);

-		i915_gem_request_put(engine->execlist_port[0].request);
-		i915_gem_request_put(engine->execlist_port[1].request);
+		for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
+			i915_gem_request_put(port_request(&port[n]));
 		memset(engine->execlist_port, 0, sizeof(engine->execlist_port));
 		engine->execlist_queue = RB_ROOT;
 		engine->execlist_first = NULL;
@@ -3101,8 +3155,6 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	struct drm_i915_private *dev_priv =
 		container_of(work, typeof(*dev_priv), gt.idle_work.work);
 	struct drm_device *dev = &dev_priv->drm;
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
 	bool rearm_hangcheck;

 	if (!READ_ONCE(dev_priv->gt.awake))
@@ -3140,10 +3192,8 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	if (wait_for(intel_engines_are_idle(dev_priv), 10))
 		DRM_ERROR("Timeout waiting for engines to idle\n");

-	for_each_engine(engine, dev_priv, id) {
-		intel_engine_disarm_breadcrumbs(engine);
-		i915_gem_batch_pool_fini(&engine->batch_pool);
-	}
+	intel_engines_mark_idle(dev_priv);
+	i915_gem_timelines_mark_idle(dev_priv);

 	GEM_BUG_ON(!dev_priv->gt.awake);
 	dev_priv->gt.awake = false;
@@ -3320,56 +3370,6 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 	return ret;
 }

-/** Flushes the GTT write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
-{
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
-		return;
-
-	/* No actual flushing is required for the GTT write domain.  Writes
-	 * to it "immediately" go to main memory as far as we know, so there's
-	 * no chipset flush.  It also doesn't land in render cache.
-	 *
-	 * However, we do have to enforce the order so that all writes through
-	 * the GTT land before any writes to the device, such as updates to
-	 * the GATT itself.
-	 *
-	 * We also have to wait a bit for the writes to land from the GTT.
-	 * An uncached read (i.e. mmio) seems to be ideal for the round-trip
-	 * timing. This issue has only been observed when switching quickly
-	 * between GTT writes and CPU reads from inside the kernel on recent hw,
-	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
-	 * system agents we cannot reproduce this behaviour).
-	 */
-	wmb();
-	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
-		if (intel_runtime_pm_get_if_in_use(dev_priv)) {
-			spin_lock_irq(&dev_priv->uncore.lock);
-			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
-			spin_unlock_irq(&dev_priv->uncore.lock);
-			intel_runtime_pm_put(dev_priv);
-		}
-	}
-
-	intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
-
-	obj->base.write_domain = 0;
-}
-
-/** Flushes the CPU write domain for the object if it's dirty. */
-static void
-i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
-{
-	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
-		return;
-
-	i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
-	obj->base.write_domain = 0;
-}
-
 static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
 {
 	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU && !obj->cache_dirty)
@@ -3389,6 +3389,69 @@ void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
 	mutex_unlock(&obj->base.dev->struct_mutex);
 }

+/**
+ * Moves a single object to the WC read, and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	lockdep_assert_held(&obj->base.dev->struct_mutex);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_LOCKED |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT,
+				   NULL);
+	if (ret)
+		return ret;
+
+	if (obj->base.write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->base.read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->base.read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->base.read_domains = I915_GEM_DOMAIN_WC;
+		obj->base.write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
 /**
 * Moves a single object to the GTT read, and possibly write domain.
 * @obj: object to act on
@@ -3428,7 +3491,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 	if (ret)
 		return ret;

-	i915_gem_object_flush_cpu_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);

 	/* Serialise direct access to this object with the barriers for
 	 * coherent writes from the GPU, by effectively invalidating the
@@ -3802,7 +3865,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
 	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
 		return 0;

-	i915_gem_object_flush_gtt_write_domain(obj);
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);

 	/* Flush the CPU cache if it's still invalid. */
 	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
@@ -3996,7 +4059,7 @@ __busy_set_if_active(const struct dma_fence *fence,
 	if (i915_gem_request_completed(rq))
 		return 0;

-	return flag(rq->engine->exec_id);
+	return flag(rq->engine->uabi_id);
 }

 static __always_inline unsigned int
@@ -4195,7 +4258,7 @@ i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 	 * catch if we ever need to fix it. In the meantime, if you do spot
 	 * such a local variable, please consider fixing!
 	 */
-	if (WARN_ON(size >> PAGE_SHIFT > INT_MAX))
+	if (size >> PAGE_SHIFT > INT_MAX)
 		return ERR_PTR(-E2BIG);

 	if (overflows_type(size, obj->base.size))
@@ -4302,6 +4365,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 	intel_runtime_pm_put(i915);
 	mutex_unlock(&i915->drm.struct_mutex);

+	cond_resched();
+
 	llist_for_each_entry_safe(obj, on, freed, freed) {
 		GEM_BUG_ON(obj->bind_count);
 		GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits));
@@ -4349,8 +4414,11 @@ static void __i915_gem_free_work(struct work_struct *work)
 	 * unbound now.
 	 */

-	while ((freed = llist_del_all(&i915->mm.free_list)))
+	while ((freed = llist_del_all(&i915->mm.free_list))) {
 		__i915_gem_free_objects(i915, freed);
+		if (need_resched())
+			break;
+	}
 }

 static void __i915_gem_free_object_rcu(struct rcu_head *head)
@@ -4415,10 +4483,9 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
 	 * try to take over. The only way to remove the earlier state
 	 * is by resetting. However, resetting on earlier gen is tricky as
 	 * it may impact the display and we are uncertain about the stability
-	 * of the reset, so we only reset recent machines with logical
-	 * context support (that must be reset to remove any stray contexts).
+	 * of the reset, so this could be applied to even earlier gen.
 	 */
-	if (HAS_HW_CONTEXTS(i915)) {
+	if (INTEL_GEN(i915) >= 5) {
 		int reset = intel_gpu_reset(i915, ALL_ENGINES);
 		WARN_ON(reset && reset != -ENODEV);
 	}
@@ -4661,11 +4728,9 @@ bool intel_sanitize_semaphores(struct drm_i915_private *dev_priv, int value)
 	if (value >= 0)
 		return value;

-#ifdef CONFIG_INTEL_IOMMU
 	/* Enable semaphores on SNB when IO remapping is off */
-	if (INTEL_INFO(dev_priv)->gen == 6 && intel_iommu_gfx_mapped)
+	if (IS_GEN6(dev_priv) && intel_vtd_active())
 		return false;
-#endif

 	return true;
 }
@@ -4676,7 +4741,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)

 	mutex_lock(&dev_priv->drm.struct_mutex);

-	i915_gem_clflush_init(dev_priv);
+	dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1);

 	if (!i915.enable_execlists) {
 		dev_priv->gt.resume = intel_legacy_submission_resume;
@@ -4799,12 +4864,16 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)
 	if (!dev_priv->dependencies)
 		goto err_requests;

+	dev_priv->priorities = KMEM_CACHE(i915_priolist, SLAB_HWCACHE_ALIGN);
+	if (!dev_priv->priorities)
+		goto err_dependencies;
+
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	INIT_LIST_HEAD(&dev_priv->gt.timelines);
 	err = i915_gem_timeline_init__global(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (err)
-		goto err_dependencies;
+		goto err_priorities;

 	INIT_LIST_HEAD(&dev_priv->context_list);
 	INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work);
@@ -4822,14 +4891,14 @@ i915_gem_load_init(struct drm_i915_private *dev_priv)

 	init_waitqueue_head(&dev_priv->pending_flip_queue);

-	dev_priv->mm.interruptible = true;
-
 	atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0);

 	spin_lock_init(&dev_priv->fb_tracking.lock);

 	return 0;

+err_priorities:
+	kmem_cache_destroy(dev_priv->priorities);
 err_dependencies:
 	kmem_cache_destroy(dev_priv->dependencies);
 err_requests:
@@ -4853,6 +4922,7 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)
 	WARN_ON(!list_empty(&dev_priv->gt.timelines));
 	mutex_unlock(&dev_priv->drm.struct_mutex);

+	kmem_cache_destroy(dev_priv->priorities);
 	kmem_cache_destroy(dev_priv->dependencies);
 	kmem_cache_destroy(dev_priv->requests);
 	kmem_cache_destroy(dev_priv->vmas);
@@ -4864,9 +4934,10 @@ void i915_gem_load_cleanup(struct drm_i915_private *dev_priv)

 int i915_gem_freeze(struct drm_i915_private *dev_priv)
 {
-	mutex_lock(&dev_priv->drm.struct_mutex);
+	/* Discard all purgeable objects, let userspace recover those as
+	 * required after resuming.
+	 */
 	i915_gem_shrink_all(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);

 	return 0;
 }
@@ -4891,12 +4962,13 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv)
 	 * we update that state just before writing out the image.
 	 *
 	 * To try and reduce the hibernation image, we manually shrink
-	 * the objects as well.
+	 * the objects as well, see i915_gem_freeze()
 	 */

-	mutex_lock(&dev_priv->drm.struct_mutex);
 	i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND);
+	i915_gem_drain_freed_objects(dev_priv);

+	mutex_lock(&dev_priv->drm.struct_mutex);
 	for (p = phases; *p; p++) {
 		list_for_each_entry(obj, *p, global_link) {
 			obj->base.read_domains = I915_GEM_DOMAIN_CPU;

--- a/drivers/gpu/drm/i915/i915_gem.h
+++ b/drivers/gpu/drm/i915/i915_gem.h
@@ -25,6 +25,8 @@
 #ifndef __I915_GEM_H__
 #define __I915_GEM_H__

+#include <linux/bug.h>
+
 #ifdef CONFIG_DRM_I915_DEBUG_GEM
 #define GEM_BUG_ON(expr) BUG_ON(expr)
 #define GEM_WARN_ON(expr) WARN_ON(expr)

--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -27,7 +27,6 @@
 #include "i915_gem_clflush.h"

 static DEFINE_SPINLOCK(clflush_lock);
-static u64 clflush_context;

 struct clflush {
 	struct dma_fence dma; /* Must be first for dma_fence_free() */
@@ -157,7 +156,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 		dma_fence_init(&clflush->dma,
 			       &i915_clflush_ops,
 			       &clflush_lock,
-			       clflush_context,
+			       to_i915(obj->base.dev)->mm.unordered_timeline,
 			       0);
 		i915_sw_fence_init(&clflush->wait, i915_clflush_notify);

@@ -182,8 +181,3 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 		GEM_BUG_ON(obj->base.write_domain != I915_GEM_DOMAIN_CPU);
 	}
 }
-
-void i915_gem_clflush_init(struct drm_i915_private *i915)
-{
-	clflush_context = dma_fence_context_alloc(1);
-}
--- a/drivers/gpu/drm/i915/i915_gem_clflush.h
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.h
@@ -28,7 +28,6 @@
 struct drm_i915_private;
 struct drm_i915_gem_object;

-void i915_gem_clflush_init(struct drm_i915_private *i915);
 void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 			     unsigned int flags);
 #define I915_CLFLUSH_FORCE BIT(0)

--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -92,33 +92,6 @@

 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1

-static int get_context_size(struct drm_i915_private *dev_priv)
-{
-	int ret;
-	u32 reg;
-
-	switch (INTEL_GEN(dev_priv)) {
-	case 6:
-		reg = I915_READ(CXT_SIZE);
-		ret = GEN6_CXT_TOTAL_SIZE(reg) * 64;
-		break;
-	case 7:
-		reg = I915_READ(GEN7_CXT_SIZE);
-		if (IS_HASWELL(dev_priv))
-			ret = HSW_CXT_TOTAL_SIZE;
-		else
-			ret = GEN7_CXT_TOTAL_SIZE(reg) * 64;
-		break;
-	case 8:
-		ret = GEN8_CXT_TOTAL_SIZE;
-		break;
-	default:
-		BUG();
-	}
-
-	return ret;
-}
-
 void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -151,45 +124,6 @@ void i915_gem_context_free(struct kref *ctx_ref)
 	kfree(ctx);
 }

-static struct drm_i915_gem_object *
-alloc_context_obj(struct drm_i915_private *dev_priv, u64 size)
-{
-	struct drm_i915_gem_object *obj;
-	int ret;
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	obj = i915_gem_object_create(dev_priv, size);
-	if (IS_ERR(obj))
-		return obj;
-
-	/*
-	 * Try to make the context utilize L3 as well as LLC.
-	 *
-	 * On VLV we don't have L3 controls in the PTEs so we
-	 * shouldn't touch the cache level, especially as that
-	 * would make the object snooped which might have a
-	 * negative performance impact.
-	 *
-	 * Snooping is required on non-llc platforms in execlist
-	 * mode, but since all GGTT accesses use PAT entry 0 we
-	 * get snooping anyway regardless of cache_level.
-	 *
-	 * This is only applicable for Ivy Bridge devices since
-	 * later platforms don't have L3 control bits in the PTE.
-	 */
-	if (IS_IVYBRIDGE(dev_priv)) {
-		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_L3_LLC);
-		/* Failure shouldn't ever happen this early */
-		if (WARN_ON(ret)) {
-			i915_gem_object_put(obj);
-			return ERR_PTR(ret);
-		}
-	}
-
-	return obj;
-}
-
 static void context_close(struct i915_gem_context *ctx)
 {
 	i915_gem_context_set_closed(ctx);
@@ -265,26 +199,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	kref_init(&ctx->ref);
 	list_add_tail(&ctx->link, &dev_priv->context_list);
 	ctx->i915 = dev_priv;
-
-	if (dev_priv->hw_context_size) {
-		struct drm_i915_gem_object *obj;
-		struct i915_vma *vma;
-
-		obj = alloc_context_obj(dev_priv, dev_priv->hw_context_size);
-		if (IS_ERR(obj)) {
-			ret = PTR_ERR(obj);
-			goto err_out;
-		}
-
-		vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL);
-		if (IS_ERR(vma)) {
-			i915_gem_object_put(obj);
-			ret = PTR_ERR(vma);
-			goto err_out;
-		}
-
-		ctx->engine[RCS].state = vma;
-	}
+	ctx->priority = I915_PRIORITY_NORMAL;

 	/* Default context will never have a file_priv */
 	ret = DEFAULT_CONTEXT_HANDLE;
@@ -443,21 +358,6 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
 	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
 	ida_init(&dev_priv->context_hw_ida);

-	if (i915.enable_execlists) {
-		/* NB: intentionally left blank. We will allocate our own
-		 * backing objects as we need them, thank you very much */
-		dev_priv->hw_context_size = 0;
-	} else if (HAS_HW_CONTEXTS(dev_priv)) {
-		dev_priv->hw_context_size =
-			round_up(get_context_size(dev_priv),
-				 I915_GTT_PAGE_SIZE);
-		if (dev_priv->hw_context_size > (1<<20)) {
-			DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
-					 dev_priv->hw_context_size);
-			dev_priv->hw_context_size = 0;
-		}
-	}
-
 	ctx = i915_gem_create_context(dev_priv, NULL);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default global context (error %ld)\n",
@@ -477,8 +377,8 @@ int i915_gem_context_init(struct drm_i915_private *dev_priv)
 	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));

 	DRM_DEBUG_DRIVER("%s context support initialized\n",
-			i915.enable_execlists ? "LR" :
-			dev_priv->hw_context_size ? "HW" : "fake");
+			 dev_priv->engine[RCS]->context_size ? "logical" :
+			 "fake");
 	return 0;
 }

@@ -941,11 +841,6 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 	return 0;
 }

-static bool contexts_enabled(struct drm_device *dev)
-{
-	return i915.enable_execlists || to_i915(dev)->hw_context_size;
-}
-
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
 	return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
@@ -954,12 +849,13 @@ static bool client_is_banned(struct drm_i915_file_private *file_priv)
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file)
 {
+	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_context_create *args = data;
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct i915_gem_context *ctx;
 	int ret;

-	if (!contexts_enabled(dev))
+	if (!dev_priv->engine[RCS]->context_size)
 		return -ENODEV;

 	if (args->pad != 0)
@@ -977,7 +873,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	if (ret)
 		return ret;

-	ctx = i915_gem_create_context(to_i915(dev), file_priv);
+	ctx = i915_gem_create_context(dev_priv, file_priv);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);

--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -122,12 +122,36 @@ static void i915_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf, unsigned long
 }
 static void *i915_gem_dmabuf_kmap(struct dma_buf *dma_buf, unsigned long page_num)
 {
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	struct page *page;
+
+	if (page_num >= obj->base.size >> PAGE_SHIFT)
+		return NULL;
+
+	if (!i915_gem_object_has_struct_page(obj))
+		return NULL;
+
+	if (i915_gem_object_pin_pages(obj))
+		return NULL;
+
+	/* Synchronisation is left to the caller (via .begin_cpu_access()) */
+	page = i915_gem_object_get_page(obj, page_num);
+	if (IS_ERR(page))
+		goto err_unpin;
+
+	return kmap(page);
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
 	return NULL;
 }

 static void i915_gem_dmabuf_kunmap(struct dma_buf *dma_buf, unsigned long page_num, void *addr)
 {
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);

+	kunmap(virt_to_page(addr));
+	i915_gem_object_unpin_pages(obj);
 }

 static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)

--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
 	list_for_each_entry(vma, vmas, exec_list) {
 		struct drm_i915_gem_object *obj = vma->obj;

+		if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+			struct i915_gem_capture_list *capture;
+
+			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+			if (unlikely(!capture))
+				return -ENOMEM;
+
+			capture->next = req->capture_list;
+			capture->vma = vma;
+			req->capture_list = capture;
+		}
+
 		if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
 			continue;


--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -168,13 +168,11 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
 	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
 		return 3;

-#ifdef CONFIG_INTEL_IOMMU
 	/* Disable ppgtt on SNB if VT-d is on. */
-	if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
+	if (IS_GEN6(dev_priv) && intel_vtd_active()) {
 		DRM_INFO("Disabling PPGTT because VT-d is on\n");
 		return 0;
 	}
-#endif

 	/* Early VLV doesn't have this */
 	if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
@@ -195,9 +193,12 @@ static int ppgtt_bind_vma(struct i915_vma *vma,
 	u32 pte_flags;
 	int ret;

-	ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size);
-	if (ret)
-		return ret;
+	if (!(vma->flags & I915_VMA_LOCAL_BIND)) {
+		ret = vma->vm->allocate_va_range(vma->vm, vma->node.start,
+						 vma->size);
+		if (ret)
+			return ret;
+	}

 	vma->pages = vma->obj->mm.pages;

@@ -1989,14 +1990,10 @@ void i915_ppgtt_release(struct kref *kref)
 */
 static bool needs_idle_maps(struct drm_i915_private *dev_priv)
 {
-#ifdef CONFIG_INTEL_IOMMU
 	/* Query intel_iommu to see if we need the workaround. Presumably that
 	 * was loaded first.
 	 */
-	if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped)
-		return true;
-#endif
-	return false;
+	return IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_vtd_active();
 }

 void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
@@ -2188,6 +2185,101 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm,
 		gen8_set_pte(&gtt_base[i], scratch_pte);
 }

+static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
+{
+	struct drm_i915_private *dev_priv = vm->i915;
+
+	/*
+	 * Make sure the internal GAM fifo has been cleared of all GTT
+	 * writes before exiting stop_machine(). This guarantees that
+	 * any aperture accesses waiting to start in another process
+	 * cannot back up behind the GTT writes causing a hang.
+	 * The register can be any arbitrary GAM register.
+	 */
+	POSTING_READ(GFX_FLSH_CNTL_GEN6);
+}
+
+struct insert_page {
+	struct i915_address_space *vm;
+	dma_addr_t addr;
+	u64 offset;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
+{
+	struct insert_page *arg = _arg;
+
+	gen8_ggtt_insert_page(arg->vm, arg->addr, arg->offset, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
+					  dma_addr_t addr,
+					  u64 offset,
+					  enum i915_cache_level level,
+					  u32 unused)
+{
+	struct insert_page arg = { vm, addr, offset, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_page__cb, &arg, NULL);
+}
+
+struct insert_entries {
+	struct i915_address_space *vm;
+	struct sg_table *st;
+	u64 start;
+	enum i915_cache_level level;
+};
+
+static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
+{
+	struct insert_entries *arg = _arg;
+
+	gen8_ggtt_insert_entries(arg->vm, arg->st, arg->start, arg->level, 0);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
+					     struct sg_table *st,
+					     u64 start,
+					     enum i915_cache_level level,
+					     u32 unused)
+{
+	struct insert_entries arg = { vm, st, start, level };
+
+	stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL);
+}
+
+struct clear_range {
+	struct i915_address_space *vm;
+	u64 start;
+	u64 length;
+};
+
+static int bxt_vtd_ggtt_clear_range__cb(void *_arg)
+{
+	struct clear_range *arg = _arg;
+
+	gen8_ggtt_clear_range(arg->vm, arg->start, arg->length);
+	bxt_vtd_ggtt_wa(arg->vm);
+
+	return 0;
+}
+
+static void bxt_vtd_ggtt_clear_range__BKL(struct i915_address_space *vm,
+					  u64 start,
+					  u64 length)
+{
+	struct clear_range arg = { vm, start, length };
+
+	stop_machine(bxt_vtd_ggtt_clear_range__cb, &arg, NULL);
+}
+
 static void gen6_ggtt_clear_range(struct i915_address_space *vm,
 				  u64 start, u64 length)
 {
@@ -2306,10 +2398,11 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma,
 	if (flags & I915_VMA_LOCAL_BIND) {
 		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;

-		if (appgtt->base.allocate_va_range) {
+		if (!(vma->flags & I915_VMA_LOCAL_BIND) &&
+		    appgtt->base.allocate_va_range) {
 			ret = appgtt->base.allocate_va_range(&appgtt->base,
 							     vma->node.start,
-							     vma->node.size);
+							     vma->size);
 			if (ret)
 				goto err_pages;
 		}
@@ -2579,14 +2672,14 @@ static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
 {
 	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
 	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
-	return snb_gmch_ctl << 25; /* 32 MB units */
+	return (size_t)snb_gmch_ctl << 25; /* 32 MB units */
 }

 static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
 {
 	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
 	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
-	return bdw_gmch_ctl << 25; /* 32 MB units */
+	return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */
 }

 static size_t chv_get_stolen_size(u16 gmch_ctrl)
@@ -2600,11 +2693,11 @@ static size_t chv_get_stolen_size(u16 gmch_ctrl)
 	 * 0x17 to 0x1d: 4MB increments start at 36MB
 	 */
 	if (gmch_ctrl < 0x11)
-		return gmch_ctrl << 25;
+		return (size_t)gmch_ctrl << 25;
 	else if (gmch_ctrl < 0x17)
-		return (gmch_ctrl - 0x11 + 2) << 22;
+		return (size_t)(gmch_ctrl - 0x11 + 2) << 22;
 	else
-		return (gmch_ctrl - 0x17 + 9) << 22;
+		return (size_t)(gmch_ctrl - 0x17 + 9) << 22;
 }

 static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
@@ -2613,10 +2706,10 @@ static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
 	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;

 	if (gen9_gmch_ctl < 0xf0)
-		return gen9_gmch_ctl << 25; /* 32 MB units */
+		return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */
 	else
 		/* 4MB increments starting at 0xf0 for 4MB */
-		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
+		return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22;
 }

 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
@@ -2743,13 +2836,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	unsigned int size;
 	u16 snb_gmch_ctl;
+	int err;

 	/* TODO: We're not aware of mappable constraints on gen8 yet */
 	ggtt->mappable_base = pci_resource_start(pdev, 2);
 	ggtt->mappable_end = pci_resource_len(pdev, 2);

-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39)))
-		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
+	if (err)
+		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);

 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);

@@ -2781,6 +2878,14 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)

 	ggtt->base.insert_entries = gen8_ggtt_insert_entries;

+	/* Serialize GTT updates with aperture access on BXT if VT-d is on. */
+	if (intel_ggtt_update_needs_vtd_wa(dev_priv)) {
+		ggtt->base.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
+		ggtt->base.insert_page    = bxt_vtd_ggtt_insert_page__BKL;
+		if (ggtt->base.clear_range != nop_clear_range)
+			ggtt->base.clear_range = bxt_vtd_ggtt_clear_range__BKL;
+	}
+
 	ggtt->invalidate = gen6_ggtt_invalidate;

 	return ggtt_probe_common(ggtt, size);
@@ -2792,6 +2897,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	unsigned int size;
 	u16 snb_gmch_ctl;
+	int err;

 	ggtt->mappable_base = pci_resource_start(pdev, 2);
 	ggtt->mappable_end = pci_resource_len(pdev, 2);
@@ -2804,8 +2910,11 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 		return -ENXIO;
 	}

-	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
-		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
+	if (!err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+	if (err)
+		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);

 	ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
@@ -2924,10 +3033,8 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
 		 ggtt->base.total >> 20);
 	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
 	DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
-#ifdef CONFIG_INTEL_IOMMU
-	if (intel_iommu_gfx_mapped)
+	if (intel_vtd_active())
 		DRM_INFO("VT-d active for gfx access\n");
-#endif

 	return 0;
 }

--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -37,8 +37,8 @@

 struct drm_i915_gem_object_ops {
 	unsigned int flags;
-#define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1
-#define I915_GEM_OBJECT_IS_SHRINKABLE   0x2
+#define I915_GEM_OBJECT_HAS_STRUCT_PAGE BIT(0)
+#define I915_GEM_OBJECT_IS_SHRINKABLE   BIT(1)

 	/* Interface between the GEM object and its backing storage.
 	 * get_pages() is called once prior to the use of the associated set

--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -61,7 +61,7 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence)
 	if (i915_fence_signaled(fence))
 		return false;

-	intel_engine_enable_signaling(to_request(fence));
+	intel_engine_enable_signaling(to_request(fence), true);
 	return true;
 }

@@ -159,7 +159,7 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt)
 {
 	struct i915_dependency *dep, *next;

-	GEM_BUG_ON(!RB_EMPTY_NODE(&pt->node));
+	GEM_BUG_ON(!list_empty(&pt->link));

 	/* Everyone we depended upon (the fences we wait to be signaled)
 	 * should retire before us and remove themselves from our list.
@@ -185,7 +185,7 @@ i915_priotree_init(struct i915_priotree *pt)
 {
 	INIT_LIST_HEAD(&pt->signalers_list);
 	INIT_LIST_HEAD(&pt->waiters_list);
-	RB_CLEAR_NODE(&pt->node);
+	INIT_LIST_HEAD(&pt->link);
 	pt->priority = INT_MIN;
 }

@@ -214,12 +214,12 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 		}

 		/* Finally reset hw state */
-		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
+		tl->seqno = seqno;

 		list_for_each_entry(timeline, &i915->gt.timelines, link)
-			memset(timeline->engine[id].sync_seqno, 0,
-			       sizeof(timeline->engine[id].sync_seqno));
+			memset(timeline->engine[id].global_sync, 0,
+			       sizeof(timeline->engine[id].global_sync));
 	}

 	return 0;
@@ -271,6 +271,48 @@ void i915_gem_retire_noop(struct i915_gem_active *active,
 	/* Space left intentionally blank */
 }

+static void advance_ring(struct drm_i915_gem_request *request)
+{
+	unsigned int tail;
+
+	/* We know the GPU must have read the request to have
+	 * sent us the seqno + interrupt, so use the position
+	 * of tail of the request to update the last known position
+	 * of the GPU head.
+	 *
+	 * Note this requires that we are always called in request
+	 * completion order.
+	 */
+	if (list_is_last(&request->ring_link, &request->ring->request_list)) {
+		/* We may race here with execlists resubmitting this request
+		 * as we retire it. The resubmission will move the ring->tail
+		 * forwards (to request->wa_tail). We either read the
+		 * current value that was written to hw, or the value that
+		 * is just about to be. Either works, if we miss the last two
+		 * noops - they are safe to be replayed on a reset.
+		 */
+		tail = READ_ONCE(request->ring->tail);
+	} else {
+		tail = request->postfix;
+	}
+	list_del(&request->ring_link);
+
+	request->ring->head = tail;
+}
+
+static void free_capture_list(struct drm_i915_gem_request *request)
+{
+	struct i915_gem_capture_list *capture;
+
+	capture = request->capture_list;
+	while (capture) {
+		struct i915_gem_capture_list *next = capture->next;
+
+		kfree(capture);
+		capture = next;
+	}
+}
+
 static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 {
 	struct intel_engine_cs *engine = request->engine;
@@ -287,16 +329,6 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	list_del_init(&request->link);
 	spin_unlock_irq(&engine->timeline->lock);

-	/* We know the GPU must have read the request to have
-	 * sent us the seqno + interrupt, so use the position
-	 * of tail of the request to update the last known position
-	 * of the GPU head.
-	 *
-	 * Note this requires that we are always called in request
-	 * completion order.
-	 */
-	list_del(&request->ring_link);
-	request->ring->head = request->postfix;
 	if (!--request->i915->gt.active_requests) {
 		GEM_BUG_ON(!request->i915->gt.awake);
 		mod_delayed_work(request->i915->wq,
@@ -304,6 +336,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 				 msecs_to_jiffies(100));
 	}
 	unreserve_seqno(request->engine);
+	advance_ring(request);
+
+	free_capture_list(request);

 	/* Walk through the active list, calling retire on each. This allows
 	 * objects to track their GPU activity and mark themselves as idle
@@ -402,7 +437,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING);
 	request->global_seqno = seqno;
 	if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags))
-		intel_engine_enable_signaling(request);
+		intel_engine_enable_signaling(request, false);
 	spin_unlock(&request->lock);

 	engine->emit_breadcrumb(request,
@@ -503,9 +538,6 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
 *
 * @engine: engine that we wish to issue the request on.
 * @ctx: context that the request will be associated with.
- *       This can be NULL if the request is not directly related to
- *       any specific user context, in which case this function will
- *       choose an appropriate context to use.
 *
 * Returns a pointer to the allocated request if successful,
 * or an error code if not.
@@ -516,6 +548,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_gem_request *req;
+	struct intel_ring *ring;
 	int ret;

 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
@@ -530,9 +563,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GGTT space, so do this first before we reserve a seqno for
 	 * ourselves.
 	 */
-	ret = engine->context_pin(engine, ctx);
-	if (ret)
-		return ERR_PTR(ret);
+	ring = engine->context_pin(engine, ctx);
+	if (IS_ERR(ring))
+		return ERR_CAST(ring);
+	GEM_BUG_ON(!ring);

 	ret = reserve_seqno(engine);
 	if (ret)
@@ -598,11 +632,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	req->i915 = dev_priv;
 	req->engine = engine;
 	req->ctx = ctx;
+	req->ring = ring;

 	/* No zalloc, must clear what we need by hand */
 	req->global_seqno = 0;
 	req->file_priv = NULL;
 	req->batch = NULL;
+	req->capture_list = NULL;

 	/*
 	 * Reserve space in the ring buffer for all the commands required to
@@ -623,7 +659,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	req->head = req->ring->tail;
+	req->head = req->ring->emit;

 	/* Check that we didn't interrupt ourselves with a new request */
 	GEM_BUG_ON(req->timeline->seqno != req->fence.seqno);
@@ -651,6 +687,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	int ret;

 	GEM_BUG_ON(to == from);
+	GEM_BUG_ON(to->timeline == from->timeline);

 	if (i915_gem_request_completed(from))
 		return 0;
@@ -663,9 +700,6 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 			return ret;
 	}

-	if (to->timeline == from->timeline)
-		return 0;
-
 	if (to->engine == from->engine) {
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
@@ -674,55 +708,45 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	}

 	seqno = i915_gem_request_global_seqno(from);
-	if (!seqno) {
-		ret = i915_sw_fence_await_dma_fence(&to->submit,
-						    &from->fence, 0,
-						    GFP_KERNEL);
-		return ret < 0 ? ret : 0;
-	}
+	if (!seqno)
+		goto await_dma_fence;

-	if (seqno <= to->timeline->sync_seqno[from->engine->id])
-		return 0;
+	if (!to->engine->semaphore.sync_to) {
+		if (!__i915_gem_request_started(from, seqno))
+			goto await_dma_fence;

-	trace_i915_gem_ring_sync_to(to, from);
-	if (!i915.semaphores) {
-		if (!i915_spin_request(from, TASK_INTERRUPTIBLE, 2)) {
-			ret = i915_sw_fence_await_dma_fence(&to->submit,
-							    &from->fence, 0,
-							    GFP_KERNEL);
-			if (ret < 0)
-				return ret;
-		}
+		if (!__i915_spin_request(from, seqno, TASK_INTERRUPTIBLE, 2))
+			goto await_dma_fence;
 	} else {
+		GEM_BUG_ON(!from->engine->semaphore.signal);
+
+		if (seqno <= to->timeline->global_sync[from->engine->id])
+			return 0;
+
+		trace_i915_gem_ring_sync_to(to, from);
 		ret = to->engine->semaphore.sync_to(to, from);
 		if (ret)
 			return ret;
+
+		to->timeline->global_sync[from->engine->id] = seqno;
 	}

-	to->timeline->sync_seqno[from->engine->id] = seqno;
 	return 0;
+
+await_dma_fence:
+	ret = i915_sw_fence_await_dma_fence(&to->submit,
+					    &from->fence, 0,
+					    GFP_KERNEL);
+	return ret < 0 ? ret : 0;
 }

 int
 i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 				 struct dma_fence *fence)
 {
-	struct dma_fence_array *array;
+	struct dma_fence **child = &fence;
+	unsigned int nchild = 1;
 	int ret;
-	int i;
-
-	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-		return 0;
-
-	if (dma_fence_is_i915(fence))
-		return i915_gem_request_await_request(req, to_request(fence));
-
-	if (!dma_fence_is_array(fence)) {
-		ret = i915_sw_fence_await_dma_fence(&req->submit,
-						    fence, I915_FENCE_TIMEOUT,
-						    GFP_KERNEL);
-		return ret < 0 ? ret : 0;
-	}

 	/* Note that if the fence-array was created in signal-on-any mode,
 	 * we should *not* decompose it into its individual fences. However,
@@ -731,21 +755,46 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 	 * amdgpu and we should not see any incoming fence-array from
 	 * sync-file being in signal-on-any mode.
 	 */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+
+		child = array->fences;
+		nchild = array->num_fences;
+		GEM_BUG_ON(!nchild);
+	}
+
+	do {
+		fence = *child++;
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+			continue;
+
+		/*
+		 * Requests on the same timeline are explicitly ordered, along
+		 * with their dependencies, by i915_add_request() which ensures
+		 * that requests are submitted in-order through each ring.
+		 */
+		if (fence->context == req->fence.context)
+			continue;

-	array = to_dma_fence_array(fence);
-	for (i = 0; i < array->num_fences; i++) {
-		struct dma_fence *child = array->fences[i];
+		/* Squash repeated waits to the same timelines */
+		if (fence->context != req->i915->mm.unordered_timeline &&
+		    intel_timeline_sync_is_later(req->timeline, fence))
+			continue;

-		if (dma_fence_is_i915(child))
+		if (dma_fence_is_i915(fence))
 			ret = i915_gem_request_await_request(req,
-							     to_request(child));
+							     to_request(fence));
 		else
-			ret = i915_sw_fence_await_dma_fence(&req->submit,
-							    child, I915_FENCE_TIMEOUT,
+			ret = i915_sw_fence_await_dma_fence(&req->submit, fence,
+							    I915_FENCE_TIMEOUT,
 							    GFP_KERNEL);
 		if (ret < 0)
 			return ret;
-	}
+
+		/* Record the latest fence used against each timeline */
+		if (fence->context != req->i915->mm.unordered_timeline)
+			intel_timeline_sync_set(req->timeline, fence);
+	} while (--nchild);

 	return 0;
 }

--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -67,12 +67,18 @@ struct i915_dependency {
 struct i915_priotree {
 	struct list_head signalers_list; /* those before us, we depend upon */
 	struct list_head waiters_list; /* those after us, they depend upon us */
-	struct rb_node node;
+	struct list_head link;
 	int priority;
 #define I915_PRIORITY_MAX 1024
+#define I915_PRIORITY_NORMAL 0
 #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
 };

+struct i915_gem_capture_list {
+	struct i915_gem_capture_list *next;
+	struct i915_vma *vma;
+};
+
 /**
 * Request queue structure.
 *
@@ -167,6 +173,12 @@ struct drm_i915_gem_request {
 	 * error state dump only).
 	 */
 	struct i915_vma *batch;
+	/** Additional buffers requested by userspace to be captured upon
+	 * a GPU hang. The vma/obj on this list are protected by their
+	 * active reference - all objects on this list must also be
+	 * on the active_list (of their final request).
+	 */
+	struct i915_gem_capture_list *capture_list;
 	struct list_head active_list;

 	/** Time at which this request was emitted, in jiffies. */

--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -35,9 +35,9 @@
 #include "i915_drv.h"
 #include "i915_trace.h"

-static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+static bool shrinker_lock(struct drm_i915_private *dev_priv, bool *unlock)
 {
-	switch (mutex_trylock_recursive(&dev->struct_mutex)) {
+	switch (mutex_trylock_recursive(&dev_priv->drm.struct_mutex)) {
 	case MUTEX_TRYLOCK_FAILED:
 		return false;

@@ -53,24 +53,29 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	BUG();
 }

-static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+static void shrinker_unlock(struct drm_i915_private *dev_priv, bool unlock)
 {
 	if (!unlock)
 		return;

-	mutex_unlock(&dev->struct_mutex);
-
-	/* expedite the RCU grace period to free some request slabs */
-	synchronize_rcu_expedited();
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 }

 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;

-	list_for_each_entry(vma, &obj->vma_list, obj_link)
+	list_for_each_entry(vma, &obj->vma_list, obj_link) {
+		/* Only GGTT vma may be permanently pinned, and are always
+		 * at the start of the list. We can stop hunting as soon
+		 * as we see a ppGTT vma.
+		 */
+		if (!i915_vma_is_ggtt(vma))
+			break;
+
 		if (i915_vma_is_pinned(vma))
 			return true;
+	}

 	return false;
 }
@@ -156,7 +161,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 	unsigned long count = 0;
 	bool unlock;

-	if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;

 	trace_i915_gem_shrink(dev_priv, target, flags);
@@ -244,7 +249,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,

 	i915_gem_retire_requests(dev_priv);

-	i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
+	shrinker_unlock(dev_priv, unlock);

 	return count;
 }
@@ -274,8 +279,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 				I915_SHRINK_ACTIVE);
 	intel_runtime_pm_put(dev_priv);

-	synchronize_rcu(); /* wait for our earlier RCU delayed slab frees */
-
 	return freed;
 }

@@ -284,12 +287,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_device *dev = &dev_priv->drm;
 	struct drm_i915_gem_object *obj;
 	unsigned long count;
 	bool unlock;

-	if (!i915_gem_shrinker_lock(dev, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return 0;

 	i915_gem_retire_requests(dev_priv);
@@ -304,7 +306,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 	}

-	i915_gem_shrinker_unlock(dev, unlock);
+	shrinker_unlock(dev_priv, unlock);

 	return count;
 }
@@ -314,11 +316,10 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
-	struct drm_device *dev = &dev_priv->drm;
 	unsigned long freed;
 	bool unlock;

-	if (!i915_gem_shrinker_lock(dev, &unlock))
+	if (!shrinker_lock(dev_priv, &unlock))
 		return SHRINK_STOP;

 	freed = i915_gem_shrink(dev_priv,
@@ -332,26 +333,20 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);

-	i915_gem_shrinker_unlock(dev, unlock);
+	shrinker_unlock(dev_priv, unlock);

 	return freed;
 }

-struct shrinker_lock_uninterruptible {
-	bool was_interruptible;
-	bool unlock;
-};
-
 static bool
-i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
-				       struct shrinker_lock_uninterruptible *slu,
-				       int timeout_ms)
+shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv, bool *unlock,
+			      int timeout_ms)
 {
 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);

 	do {
 		if (i915_gem_wait_for_idle(dev_priv, 0) == 0 &&
-		    i915_gem_shrinker_lock(&dev_priv->drm, &slu->unlock))
+		    shrinker_lock(dev_priv, unlock))
 			break;

 		schedule_timeout_killable(1);
@@ -364,29 +359,19 @@ i915_gem_shrinker_lock_uninterruptible(struct drm_i915_private *dev_priv,
 		}
 	} while (1);

-	slu->was_interruptible = dev_priv->mm.interruptible;
-	dev_priv->mm.interruptible = false;
 	return true;
 }

-static void
-i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
-					 struct shrinker_lock_uninterruptible *slu)
-{
-	dev_priv->mm.interruptible = slu->was_interruptible;
-	i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
-}
-
 static int
 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 {
 	struct drm_i915_private *dev_priv =
 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
-	struct shrinker_lock_uninterruptible slu;
 	struct drm_i915_gem_object *obj;
 	unsigned long unevictable, bound, unbound, freed_pages;
+	bool unlock;

-	if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
 		return NOTIFY_DONE;

 	freed_pages = i915_gem_shrink_all(dev_priv);
@@ -415,7 +400,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
 			bound += obj->base.size >> PAGE_SHIFT;
 	}

-	i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+	shrinker_unlock(dev_priv, unlock);

 	if (freed_pages || unbound || bound)
 		pr_info("Purging GPU memory, %lu pages freed, "
@@ -435,12 +420,12 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 {
 	struct drm_i915_private *dev_priv =
 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
-	struct shrinker_lock_uninterruptible slu;
 	struct i915_vma *vma, *next;
 	unsigned long freed_pages = 0;
+	bool unlock;
 	int ret;

-	if (!i915_gem_shrinker_lock_uninterruptible(dev_priv, &slu, 5000))
+	if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000))
 		return NOTIFY_DONE;

 	/* Force everything onto the inactive lists */
@@ -465,7 +450,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr
 	}

 out:
-	i915_gem_shrinker_unlock_uninterruptible(dev_priv, &slu);
+	shrinker_unlock(dev_priv, unlock);

 	*(unsigned long *)ptr += freed_pages;
 	return NOTIFY_DONE;

--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -414,12 +414,10 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 		return 0;
 	}

-#ifdef CONFIG_INTEL_IOMMU
-	if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) {
+	if (intel_vtd_active() && INTEL_GEN(dev_priv) < 8) {
 		DRM_INFO("DMAR active, disabling use of stolen memory\n");
 		return 0;
 	}
-#endif

 	if (ggtt->stolen_size == 0)
 		return 0;

--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -23,6 +23,32 @@
 */

 #include "i915_drv.h"
+#include "i915_syncmap.h"
+
+static void __intel_timeline_init(struct intel_timeline *tl,
+				  struct i915_gem_timeline *parent,
+				  u64 context,
+				  struct lock_class_key *lockclass,
+				  const char *lockname)
+{
+	tl->fence_context = context;
+	tl->common = parent;
+#ifdef CONFIG_DEBUG_SPINLOCK
+	__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
+#else
+	spin_lock_init(&tl->lock);
+#endif
+	init_request_active(&tl->last_request, NULL);
+	INIT_LIST_HEAD(&tl->requests);
+	i915_syncmap_init(&tl->sync);
+}
+
+static void __intel_timeline_fini(struct intel_timeline *tl)
+{
+	GEM_BUG_ON(!list_empty(&tl->requests));
+
+	i915_syncmap_free(&tl->sync);
+}

 static int __i915_gem_timeline_init(struct drm_i915_private *i915,
 				    struct i915_gem_timeline *timeline,
@@ -35,6 +61,14 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,

 	lockdep_assert_held(&i915->drm.struct_mutex);

+	/*
+	 * Ideally we want a set of engines on a single leaf as we expect
+	 * to mostly be tracking synchronisation between engines. It is not
+	 * a huge issue if this is not the case, but we may want to mitigate
+	 * any page crossing penalties if they become an issue.
+	 */
+	BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES);
+
 	timeline->i915 = i915;
 	timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL);
 	if (!timeline->name)
@@ -44,19 +78,10 @@ static int __i915_gem_timeline_init(struct drm_i915_private *i915,

 	/* Called during early_init before we know how many engines there are */
 	fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine));
-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-		struct intel_timeline *tl = &timeline->engine[i];
-
-		tl->fence_context = fences++;
-		tl->common = timeline;
-#ifdef CONFIG_DEBUG_SPINLOCK
-		__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
-#else
-		spin_lock_init(&tl->lock);
-#endif
-		init_request_active(&tl->last_request, NULL);
-		INIT_LIST_HEAD(&tl->requests);
-	}
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+		__intel_timeline_init(&timeline->engine[i],
+				      timeline, fences++,
+				      lockclass, lockname);

 	return 0;
 }
@@ -81,18 +106,52 @@ int i915_gem_timeline_init__global(struct drm_i915_private *i915)
 					&class, "&global_timeline->lock");
 }

+/**
+ * i915_gem_timelines_mark_idle -- called when the driver idles
+ * @i915 - the drm_i915_private device
+ *
+ * When the driver is completely idle, we know that all of our sync points
+ * have been signaled and our tracking is then entirely redundant. Any request
+ * to wait upon an older sync point will be completed instantly as we know
+ * the fence is signaled and therefore we will not even look them up in the
+ * sync point map.
+ */
+void i915_gem_timelines_mark_idle(struct drm_i915_private *i915)
+{
+	struct i915_gem_timeline *timeline;
+	int i;
+
+	lockdep_assert_held(&i915->drm.struct_mutex);
+
+	list_for_each_entry(timeline, &i915->gt.timelines, link) {
+		for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
+			struct intel_timeline *tl = &timeline->engine[i];
+
+			/*
+			 * All known fences are completed so we can scrap
+			 * the current sync point tracking and start afresh,
+			 * any attempt to wait upon a previous sync point
+			 * will be skipped as the fence was signaled.
+			 */
+			i915_syncmap_free(&tl->sync);
+		}
+	}
+}
+
 void i915_gem_timeline_fini(struct i915_gem_timeline *timeline)
 {
 	int i;

 	lockdep_assert_held(&timeline->i915->drm.struct_mutex);

-	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) {
-		struct intel_timeline *tl = &timeline->engine[i];
-
-		GEM_BUG_ON(!list_empty(&tl->requests));
-	}
+	for (i = 0; i < ARRAY_SIZE(timeline->engine); i++)
+		__intel_timeline_fini(&timeline->engine[i]);

 	list_del(&timeline->link);
 	kfree(timeline->name);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_timeline.c"
+#include "selftests/i915_gem_timeline.c"
+#endif
--- a/drivers/gpu/drm/i915/i915_gem_timeline.h
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.h
@@ -27,7 +27,9 @@

 #include <linux/list.h>

+#include "i915_utils.h"
 #include "i915_gem_request.h"
+#include "i915_syncmap.h"

 struct i915_gem_timeline;

@@ -55,7 +57,25 @@ struct intel_timeline {
 	 * struct_mutex.
 	 */
 	struct i915_gem_active last_request;
-	u32 sync_seqno[I915_NUM_ENGINES];
+
+	/**
+	 * We track the most recent seqno that we wait on in every context so
+	 * that we only have to emit a new await and dependency on a more
+	 * recent sync point. As the contexts may be executed out-of-order, we
+	 * have to track each individually and can not rely on an absolute
+	 * global_seqno. When we know that all tracked fences are completed
+	 * (i.e. when the driver is idle), we know that the syncmap is
+	 * redundant and we can discard it without loss of generality.
+	 */
+	struct i915_syncmap *sync;
+	/**
+	 * Separately to the inter-context seqno map above, we track the last
+	 * barrier (e.g. semaphore wait) to the global engine timelines. Note
+	 * that this tracks global_seqno rather than the context.seqno, and
+	 * so it is subject to the limitations of hw wraparound and that we
+	 * may need to revoke global_seqno (on pre-emption).
+	 */
+	u32 global_sync[I915_NUM_ENGINES];

 	struct i915_gem_timeline *common;
 };
@@ -73,6 +93,31 @@ int i915_gem_timeline_init(struct drm_i915_private *i915,
 			   struct i915_gem_timeline *tl,
 			   const char *name);
 int i915_gem_timeline_init__global(struct drm_i915_private *i915);
+void i915_gem_timelines_mark_idle(struct drm_i915_private *i915);
 void i915_gem_timeline_fini(struct i915_gem_timeline *tl);

+static inline int __intel_timeline_sync_set(struct intel_timeline *tl,
+					    u64 context, u32 seqno)
+{
+	return i915_syncmap_set(&tl->sync, context, seqno);
+}
+
+static inline int intel_timeline_sync_set(struct intel_timeline *tl,
+					  const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_set(tl, fence->context, fence->seqno);
+}
+
+static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl,
+						  u64 context, u32 seqno)
+{
+	return i915_syncmap_is_later(&tl->sync, context, seqno);
+}
+
+static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl,
+						const struct dma_fence *fence)
+{
+	return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno);
+}
+
 #endif
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 			print_error_obj(m, dev_priv->engine[i], NULL, obj);
 		}

+		for (j = 0; j < ee->user_bo_count; j++)
+			print_error_obj(m, dev_priv->engine[i],
+					"user", ee->user_bo[j]);
+
 		if (ee->num_requests) {
 			err_printf(m, "%s --- %d requests\n",
 				   dev_priv->engine[i]->name,
@@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
 {
 	struct i915_gpu_state *error =
 		container_of(error_ref, typeof(*error), ref);
-	int i;
+	long i, j;

 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		struct drm_i915_error_engine *ee = &error->engine[i];

+		for (j = 0; j < ee->user_bo_count; j++)
+			i915_error_object_free(ee->user_bo[j]);
+		kfree(ee->user_bo);
+
 		i915_error_object_free(ee->batchbuffer);
 		i915_error_object_free(ee->wa_batchbuffer);
 		i915_error_object_free(ee->ringbuffer);
@@ -1316,12 +1324,17 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 static void error_record_engine_execlists(struct intel_engine_cs *engine,
 					  struct drm_i915_error_engine *ee)
 {
+	const struct execlist_port *port = engine->execlist_port;
 	unsigned int n;

-	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++)
-		if (engine->execlist_port[n].request)
-			record_request(engine->execlist_port[n].request,
-				       &ee->execlist[n]);
+	for (n = 0; n < ARRAY_SIZE(engine->execlist_port); n++) {
+		struct drm_i915_gem_request *rq = port_request(&port[n]);
+
+		if (!rq)
+			break;
+
+		record_request(rq, &ee->execlist[n]);
+	}
 }

 static void record_context(struct drm_i915_error_context *e,
@@ -1346,6 +1359,35 @@ static void record_context(struct drm_i915_error_context *e,
 	e->active = ctx->active_count;
 }

+static void request_record_user_bo(struct drm_i915_gem_request *request,
+				   struct drm_i915_error_engine *ee)
+{
+	struct i915_gem_capture_list *c;
+	struct drm_i915_error_object **bo;
+	long count;
+
+	count = 0;
+	for (c = request->capture_list; c; c = c->next)
+		count++;
+
+	bo = NULL;
+	if (count)
+		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
+	if (!bo)
+		return;
+
+	count = 0;
+	for (c = request->capture_list; c; c = c->next) {
+		bo[count] = i915_error_object_create(request->i915, c->vma);
+		if (!bo[count])
+			break;
+		count++;
+	}
+
+	ee->user_bo = bo;
+	ee->user_bo_count = count;
+}
+
 static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				  struct i915_gpu_state *error)
 {
@@ -1392,6 +1434,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
 				ee->wa_batchbuffer =
 					i915_error_object_create(dev_priv,
 								 engine->scratch);
+			request_record_user_bo(request, ee);

 			ee->ctx =
 				i915_error_object_create(dev_priv,
@@ -1560,6 +1603,9 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 		error->done_reg = I915_READ(DONE_REG);
 	}

+	if (INTEL_GEN(dev_priv) >= 5)
+		error->ccid = I915_READ(CCID);
+
 	/* 3: Feature specific registers */
 	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
 		error->gam_ecochk = I915_READ(GAM_ECOCHK);
@@ -1567,9 +1613,6 @@ static void i915_capture_reg_state(struct drm_i915_private *dev_priv,
 	}

 	/* 4: Everything else */
-	if (HAS_HW_CONTEXTS(dev_priv))
-		error->ccid = I915_READ(CCID);
-
 	if (INTEL_GEN(dev_priv) >= 8) {
 		error->ier = I915_READ(GEN8_DE_MISC_IER);
 		for (i = 0; i < 4; i++)

--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -480,9 +480,7 @@ static void guc_wq_item_append(struct i915_guc_client *client,
 	GEM_BUG_ON(freespace < wqi_size);

 	/* The GuC firmware wants the tail index in QWords, not bytes */
-	tail = rq->tail;
-	assert_ring_tail_valid(rq->ring, rq->tail);
-	tail >>= 3;
+	tail = intel_ring_set_tail(rq->ring, rq->tail) >> 3;
 	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);

 	/* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we
@@ -616,12 +614,6 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
 	b_ret = guc_ring_doorbell(client);

 	client->submissions[engine_id] += 1;
-	client->retcode = b_ret;
-	if (b_ret)
-		client->b_fail += 1;
-
-	guc->submissions[engine_id] += 1;
-	guc->last_seqno[engine_id] = rq->global_seqno;

 	spin_unlock_irqrestore(&client->wq_lock, flags);
 }
@@ -651,47 +643,68 @@ static void nested_enable_signaling(struct drm_i915_gem_request *rq)
 	trace_dma_fence_enable_signal(&rq->fence);

 	spin_lock_nested(&rq->lock, SINGLE_DEPTH_NESTING);
-	intel_engine_enable_signaling(rq);
+	intel_engine_enable_signaling(rq, true);
 	spin_unlock(&rq->lock);
 }

+static void port_assign(struct execlist_port *port,
+			struct drm_i915_gem_request *rq)
+{
+	GEM_BUG_ON(rq == port_request(port));
+
+	if (port_isset(port))
+		i915_gem_request_put(port_request(port));
+
+	port_set(port, i915_gem_request_get(rq));
+	nested_enable_signaling(rq);
+}
+
 static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct execlist_port *port = engine->execlist_port;
-	struct drm_i915_gem_request *last = port[0].request;
+	struct drm_i915_gem_request *last = port_request(port);
 	struct rb_node *rb;
 	bool submit = false;

 	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
+	GEM_BUG_ON(rb_first(&engine->execlist_queue) != rb);
 	while (rb) {
-		struct drm_i915_gem_request *rq =
-			rb_entry(rb, typeof(*rq), priotree.node);
-
-		if (last && rq->ctx != last->ctx) {
-			if (port != engine->execlist_port)
-				break;
-
-			i915_gem_request_assign(&port->request, last);
-			nested_enable_signaling(last);
-			port++;
+		struct i915_priolist *p = rb_entry(rb, typeof(*p), node);
+		struct drm_i915_gem_request *rq, *rn;
+
+		list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) {
+			if (last && rq->ctx != last->ctx) {
+				if (port != engine->execlist_port) {
+					__list_del_many(&p->requests,
+							&rq->priotree.link);
+					goto done;
+				}
+
+				if (submit)
+					port_assign(port, last);
+				port++;
+			}
+
+			INIT_LIST_HEAD(&rq->priotree.link);
+			rq->priotree.priority = INT_MAX;
+
+			i915_guc_submit(rq);
+			trace_i915_gem_request_in(rq, port_index(port, engine));
+			last = rq;
+			submit = true;
 		}

 		rb = rb_next(rb);
-		rb_erase(&rq->priotree.node, &engine->execlist_queue);
-		RB_CLEAR_NODE(&rq->priotree.node);
-		rq->priotree.priority = INT_MAX;
-
-		i915_guc_submit(rq);
-		trace_i915_gem_request_in(rq, port - engine->execlist_port);
-		last = rq;
-		submit = true;
-	}
-	if (submit) {
-		i915_gem_request_assign(&port->request, last);
-		nested_enable_signaling(last);
-		engine->execlist_first = rb;
+		rb_erase(&p->node, &engine->execlist_queue);
+		INIT_LIST_HEAD(&p->requests);
+		if (p->priority != I915_PRIORITY_NORMAL)
+			kmem_cache_free(engine->i915->priorities, p);
 	}
+done:
+	engine->execlist_first = rb;
+	if (submit)
+		port_assign(port, last);
 	spin_unlock_irq(&engine->timeline->lock);

 	return submit;
@@ -705,17 +718,19 @@ static void i915_guc_irq_handler(unsigned long data)
 	bool submit;

 	do {
-		rq = port[0].request;
+		rq = port_request(&port[0]);
 		while (rq && i915_gem_request_completed(rq)) {
 			trace_i915_gem_request_out(rq);
 			i915_gem_request_put(rq);
-			port[0].request = port[1].request;
-			port[1].request = NULL;
-			rq = port[0].request;
+
+			port[0] = port[1];
+			memset(&port[1], 0, sizeof(port[1]));
+
+			rq = port_request(&port[0]);
 		}

 		submit = false;
-		if (!port[1].request)
+		if (!port_count(&port[1]))
 			submit = i915_guc_dequeue(engine);
 	} while (submit);
 }
@@ -1053,8 +1068,7 @@ static int guc_ads_create(struct intel_guc *guc)
 		dev_priv->engine[RCS]->status_page.ggtt_offset;

 	for_each_engine(engine, dev_priv, id)
-		blob->ads.eng_state_size[engine->guc_id] =
-			intel_lr_context_size(engine);
+		blob->ads.eng_state_size[engine->guc_id] = engine->context_size;

 	base = guc_ggtt_offset(vma);
 	blob->ads.scheduler_policies = base + ptr_offset(blob, policies);

--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1200,7 +1200,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 static void ivybridge_parity_work(struct work_struct *work)
 {
 	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private, l3_parity.error_work);
+		container_of(work, typeof(*dev_priv), l3_parity.error_work);
 	u32 error_status, row, bank, subbank;
 	char *parity_event[6];
 	uint32_t misccpctl;
@@ -1317,14 +1317,16 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv,
 		ivybridge_parity_error_irq_handler(dev_priv, gt_iir);
 }

-static __always_inline void
+static void
 gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 {
 	bool tasklet = false;

 	if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) {
-		set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
-		tasklet = true;
+		if (port_count(&engine->execlist_port[0])) {
+			__set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+			tasklet = true;
+		}
 	}

 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
@@ -2917,7 +2919,6 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 	u32 pipestat_mask;
 	u32 enable_mask;
 	enum pipe pipe;
-	u32 val;

 	pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV |
 			PIPE_CRC_DONE_INTERRUPT_STATUS;
@@ -2928,18 +2929,16 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)

 	enable_mask = I915_DISPLAY_PORT_INTERRUPT |
 		I915_DISPLAY_PIPE_A_EVENT_INTERRUPT |
-		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;
+		I915_DISPLAY_PIPE_B_EVENT_INTERRUPT |
+		I915_LPE_PIPE_A_INTERRUPT |
+		I915_LPE_PIPE_B_INTERRUPT;
+
 	if (IS_CHERRYVIEW(dev_priv))
-		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT;
+		enable_mask |= I915_DISPLAY_PIPE_C_EVENT_INTERRUPT |
+			I915_LPE_PIPE_C_INTERRUPT;

 	WARN_ON(dev_priv->irq_mask != ~0);

-	val = (I915_LPE_PIPE_A_INTERRUPT |
-		I915_LPE_PIPE_B_INTERRUPT |
-		I915_LPE_PIPE_C_INTERRUPT);
-
-	enable_mask |= val;
-
 	dev_priv->irq_mask = ~enable_mask;

 	GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask);
@@ -4197,11 +4196,15 @@ static void i965_irq_uninstall(struct drm_device * dev)
 void intel_irq_init(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = &dev_priv->drm;
+	int i;

 	intel_hpd_init_work(dev_priv);

 	INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work);
+
 	INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work);
+	for (i = 0; i < MAX_L3_SLICES; ++i)
+		dev_priv->l3_parity.remap_info[i] = NULL;

 	if (HAS_GUC_SCHED(dev_priv))
 		dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT;
@@ -4326,6 +4329,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	}
 }

+/**
+ * intel_irq_fini - deinitializes IRQ support
+ * @i915: i915 device instance
+ *
+ * This function deinitializes all the IRQ support.
+ */
+void intel_irq_fini(struct drm_i915_private *i915)
+{
+	int i;
+
+	for (i = 0; i < MAX_L3_SLICES; ++i)
+		kfree(i915->l3_parity.remap_info[i]);
+}
+
 /**
 * intel_irq_install - enables the hardware interrupt
 * @dev_priv: i915 device instance

--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -220,7 +220,6 @@ static const struct intel_device_info intel_ironlake_m_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_aliasing_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	CURSOR_OFFSETS
@@ -245,7 +244,6 @@ static const struct intel_device_info intel_sandybridge_m_info = {
 	.has_rc6 = 1, \
 	.has_rc6p = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_aliasing_ppgtt = 1, \
 	.has_full_ppgtt = 1, \
 	GEN_DEFAULT_PIPEOFFSETS, \
@@ -280,7 +278,6 @@ static const struct intel_device_info intel_valleyview_info = {
 	.has_runtime_pm = 1,
 	.has_rc6 = 1,
 	.has_gmbus_irq = 1,
-	.has_hw_contexts = 1,
 	.has_gmch_display = 1,
 	.has_hotplug = 1,
 	.has_aliasing_ppgtt = 1,
@@ -340,7 +337,6 @@ static const struct intel_device_info intel_cherryview_info = {
 	.has_resource_streamer = 1,
 	.has_rc6 = 1,
 	.has_gmbus_irq = 1,
-	.has_hw_contexts = 1,
 	.has_logical_ring_contexts = 1,
 	.has_gmch_display = 1,
 	.has_aliasing_ppgtt = 1,
@@ -387,7 +383,6 @@ static const struct intel_device_info intel_skylake_gt3_info = {
 	.has_rc6 = 1, \
 	.has_dp_mst = 1, \
 	.has_gmbus_irq = 1, \
-	.has_hw_contexts = 1, \
 	.has_logical_ring_contexts = 1, \
 	.has_guc = 1, \
 	.has_decoupled_mmio = 1, \

--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -85,6 +85,14 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 #define VECS_HW		3
 #define VCS2_HW		4

+/* Engine class */
+
+#define RENDER_CLASS		0
+#define VIDEO_DECODE_CLASS	1
+#define VIDEO_ENHANCEMENT_CLASS	2
+#define COPY_ENGINE_CLASS	3
+#define OTHER_CLASS		4
+
 /* PCI config space */

 #define MCHBAR_I915 0x44
@@ -3051,10 +3059,14 @@ enum skl_disp_power_wells {
 #define CLKCFG_FSB_667					(3 << 0)	/* hrawclk 166 */
 #define CLKCFG_FSB_800					(2 << 0)	/* hrawclk 200 */
 #define CLKCFG_FSB_1067					(6 << 0)	/* hrawclk 266 */
+#define CLKCFG_FSB_1067_ALT				(0 << 0)	/* hrawclk 266 */
 #define CLKCFG_FSB_1333					(7 << 0)	/* hrawclk 333 */
-/* Note, below two are guess */
-#define CLKCFG_FSB_1600					(4 << 0)	/* hrawclk 400 */
-#define CLKCFG_FSB_1600_ALT				(0 << 0)	/* hrawclk 400 */
+/*
+ * Note that on at least on ELK the below value is reported for both
+ * 333 and 400 MHz BIOS FSB setting, but given that the gmch datasheet
+ * lists only 200/266/333 MHz FSB as supported let's decode it as 333 MHz.
+ */
+#define CLKCFG_FSB_1333_ALT				(4 << 0)	/* hrawclk 333 */
 #define CLKCFG_FSB_MASK					(7 << 0)
 #define CLKCFG_MEM_533					(1 << 4)
 #define CLKCFG_MEM_667					(2 << 4)
@@ -3362,16 +3374,6 @@ enum skl_disp_power_wells {
 #define GEN7_CXT_VFSTATE_SIZE(ctx_reg)	(((ctx_reg) >> 0) & 0x3f)
 #define GEN7_CXT_TOTAL_SIZE(ctx_reg)	(GEN7_CXT_EXTENDED_SIZE(ctx_reg) + \
 					 GEN7_CXT_VFSTATE_SIZE(ctx_reg))
-/* Haswell does have the CXT_SIZE register however it does not appear to be
- * valid. Now, docs explain in dwords what is in the context object. The full
- * size is 70720 bytes, however, the power context and execlist context will
- * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size, including the extra state required for the
- * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
- */
-#define HSW_CXT_TOTAL_SIZE		(17 * PAGE_SIZE)
-/* Same as Haswell, but 72064 bytes now. */
-#define GEN8_CXT_TOTAL_SIZE		(18 * PAGE_SIZE)

 enum {
 	INTEL_ADVANCED_CONTEXT = 0,
@@ -5437,9 +5439,7 @@ enum {
 #define   CURSOR_MODE_128_ARGB_AX ((1 << 5) | CURSOR_MODE_128_32B_AX)
 #define   CURSOR_MODE_256_ARGB_AX ((1 << 5) | CURSOR_MODE_256_32B_AX)
 #define   CURSOR_MODE_64_ARGB_AX ((1 << 5) | CURSOR_MODE_64_32B_AX)
-#define   MCURSOR_PIPE_SELECT	(1 << 28)
-#define   MCURSOR_PIPE_A	0x00
-#define   MCURSOR_PIPE_B	(1 << 28)
+#define   MCURSOR_PIPE_SELECT(pipe)	((pipe) << 28)
 #define   MCURSOR_GAMMA_ENABLE  (1 << 26)
 #define   CURSOR_ROTATE_180	(1<<15)
 #define   CURSOR_TRICKLE_FEED_DISABLE	(1 << 14)
@@ -5449,7 +5449,9 @@ enum {
 #define   CURSOR_POS_SIGN       0x8000
 #define   CURSOR_X_SHIFT        0
 #define   CURSOR_Y_SHIFT        16
-#define CURSIZE			_MMIO(0x700a0)
+#define CURSIZE			_MMIO(0x700a0) /* 845/865 */
+#define _CUR_FBC_CTL_A		0x700a0 /* ivb+ */
+#define   CUR_FBC_CTL_EN	(1 << 31)
 #define _CURBCNTR		0x700c0
 #define _CURBBASE		0x700c4
 #define _CURBPOS		0x700c8
@@ -5465,6 +5467,7 @@ enum {
 #define CURCNTR(pipe) _CURSOR2(pipe, _CURACNTR)
 #define CURBASE(pipe) _CURSOR2(pipe, _CURABASE)
 #define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS)
+#define CUR_FBC_CTL(pipe) _CURSOR2(pipe, _CUR_FBC_CTL_A)

 #define CURSOR_A_OFFSET 0x70080
 #define CURSOR_B_OFFSET 0x700c0
@@ -5497,8 +5500,7 @@ enum {
 #define   DISPPLANE_PIPE_CSC_ENABLE		(1<<24)
 #define   DISPPLANE_SEL_PIPE_SHIFT		24
 #define   DISPPLANE_SEL_PIPE_MASK		(3<<DISPPLANE_SEL_PIPE_SHIFT)
-#define   DISPPLANE_SEL_PIPE_A			0
-#define   DISPPLANE_SEL_PIPE_B			(1<<DISPPLANE_SEL_PIPE_SHIFT)
+#define   DISPPLANE_SEL_PIPE(pipe)		((pipe)<<DISPPLANE_SEL_PIPE_SHIFT)
 #define   DISPPLANE_SRC_KEY_ENABLE		(1<<22)
 #define   DISPPLANE_SRC_KEY_DISABLE		0
 #define   DISPPLANE_LINE_DOUBLE			(1<<20)
@@ -8276,7 +8278,7 @@ enum {

 /* MIPI DSI registers */

-#define _MIPI_PORT(port, a, c)	((port) ? c : a)	/* ports A and C only */
+#define _MIPI_PORT(port, a, c)	(((port) == PORT_A) ? a : c)	/* ports A and C only */
 #define _MMIO_MIPI(port, a, c)	_MMIO(_MIPI_PORT(port, a, c))

 #define MIPIO_TXESC_CLK_DIV1			_MMIO(0x160004)

--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -12,6 +12,7 @@
 #include <linux/reservation.h>

 #include "i915_sw_fence.h"
+#include "i915_selftest.h"

 #define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */

@@ -120,34 +121,6 @@ void i915_sw_fence_fini(struct i915_sw_fence *fence)
 }
 #endif

-static void i915_sw_fence_release(struct kref *kref)
-{
-	struct i915_sw_fence *fence = container_of(kref, typeof(*fence), kref);
-
-	WARN_ON(atomic_read(&fence->pending) > 0);
-	debug_fence_destroy(fence);
-
-	if (fence->flags & I915_SW_FENCE_MASK) {
-		__i915_sw_fence_notify(fence, FENCE_FREE);
-	} else {
-		i915_sw_fence_fini(fence);
-		kfree(fence);
-	}
-}
-
-static void i915_sw_fence_put(struct i915_sw_fence *fence)
-{
-	debug_fence_assert(fence);
-	kref_put(&fence->kref, i915_sw_fence_release);
-}
-
-static struct i915_sw_fence *i915_sw_fence_get(struct i915_sw_fence *fence)
-{
-	debug_fence_assert(fence);
-	kref_get(&fence->kref);
-	return fence;
-}
-
 static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence,
 					struct list_head *continuation)
 {
@@ -202,13 +175,15 @@ static void __i915_sw_fence_complete(struct i915_sw_fence *fence,

 	debug_fence_set_state(fence, DEBUG_FENCE_IDLE, DEBUG_FENCE_NOTIFY);

-	if (fence->flags & I915_SW_FENCE_MASK &&
-	    __i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
+	if (__i915_sw_fence_notify(fence, FENCE_COMPLETE) != NOTIFY_DONE)
 		return;

 	debug_fence_set_state(fence, DEBUG_FENCE_NOTIFY, DEBUG_FENCE_IDLE);

 	__i915_sw_fence_wake_up_all(fence, continuation);
+
+	debug_fence_destroy(fence);
+	__i915_sw_fence_notify(fence, FENCE_FREE);
 }

 static void i915_sw_fence_complete(struct i915_sw_fence *fence)
@@ -232,33 +207,26 @@ void __i915_sw_fence_init(struct i915_sw_fence *fence,
 			  const char *name,
 			  struct lock_class_key *key)
 {
-	BUG_ON((unsigned long)fn & ~I915_SW_FENCE_MASK);
+	BUG_ON(!fn || (unsigned long)fn & ~I915_SW_FENCE_MASK);

 	debug_fence_init(fence);

 	__init_waitqueue_head(&fence->wait, name, key);
-	kref_init(&fence->kref);
 	atomic_set(&fence->pending, 1);
 	fence->flags = (unsigned long)fn;
 }

-static void __i915_sw_fence_commit(struct i915_sw_fence *fence)
-{
-	i915_sw_fence_complete(fence);
-	i915_sw_fence_put(fence);
-}
-
 void i915_sw_fence_commit(struct i915_sw_fence *fence)
 {
 	debug_fence_activate(fence);
-	__i915_sw_fence_commit(fence);
+	i915_sw_fence_complete(fence);
 }

 static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void *key)
 {
 	list_del(&wq->task_list);
 	__i915_sw_fence_complete(wq->private, key);
-	i915_sw_fence_put(wq->private);
+
 	if (wq->flags & I915_SW_FENCE_FLAG_ALLOC)
 		kfree(wq);
 	return 0;
@@ -307,7 +275,7 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence,
 	unsigned long flags;
 	bool err;

-	if (!IS_ENABLED(CONFIG_I915_SW_FENCE_CHECK_DAG))
+	if (!IS_ENABLED(CONFIG_DRM_I915_SW_FENCE_CHECK_DAG))
 		return false;

 	spin_lock_irqsave(&i915_sw_fence_lock, flags);
@@ -353,7 +321,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 	INIT_LIST_HEAD(&wq->task_list);
 	wq->flags = pending;
 	wq->func = i915_sw_fence_wake;
-	wq->private = i915_sw_fence_get(fence);
+	wq->private = fence;

 	i915_sw_fence_await(fence);

@@ -402,7 +370,7 @@ static void timer_i915_sw_fence_wake(unsigned long data)
 	dma_fence_put(cb->dma);
 	cb->dma = NULL;

-	__i915_sw_fence_commit(cb->fence);
+	i915_sw_fence_complete(cb->fence);
 	cb->timer.function = NULL;
 }

@@ -413,7 +381,7 @@ static void dma_i915_sw_fence_wake(struct dma_fence *dma,

 	del_timer_sync(&cb->timer);
 	if (cb->timer.function)
-		__i915_sw_fence_commit(cb->fence);
+		i915_sw_fence_complete(cb->fence);
 	dma_fence_put(cb->dma);

 	kfree(cb);
@@ -440,7 +408,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 		return dma_fence_wait(dma, false);
 	}

-	cb->fence = i915_sw_fence_get(fence);
+	cb->fence = fence;
 	i915_sw_fence_await(fence);

 	cb->dma = NULL;
@@ -523,3 +491,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,

 	return ret;
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_sw_fence.c"
+#endif
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -23,7 +23,6 @@ struct reservation_object;
 struct i915_sw_fence {
 	wait_queue_head_t wait;
 	unsigned long flags;
-	struct kref kref;
 	atomic_t pending;
 };


--- a/drivers/gpu/drm/i915/i915_syncmap.c
+++ b/drivers/gpu/drm/i915/i915_syncmap.c
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "i915_syncmap.h"
+
+#include "i915_gem.h" /* GEM_BUG_ON() */
+#include "i915_selftest.h"
+
+#define SHIFT ilog2(KSYNCMAP)
+#define MASK (KSYNCMAP - 1)
+
+/*
+ * struct i915_syncmap is a layer of a radixtree that maps a u64 fence
+ * context id to the last u32 fence seqno waited upon from that context.
+ * Unlike lib/radixtree it uses a parent pointer that allows traversal back to
+ * the root. This allows us to access the whole tree via a single pointer
+ * to the most recently used layer. We expect fence contexts to be dense
+ * and most reuse to be on the same i915_gem_context but on neighbouring
+ * engines (i.e. on adjacent contexts) and reuse the same leaf, a very
+ * effective lookup cache. If the new lookup is not on the same leaf, we
+ * expect it to be on the neighbouring branch.
+ *
+ * A leaf holds an array of u32 seqno, and has height 0. The bitmap field
+ * allows us to store whether a particular seqno is valid (i.e. allows us
+ * to distinguish unset from 0).
+ *
+ * A branch holds an array of layer pointers, and has height > 0, and always
+ * has at least 2 layers (either branches or leaves) below it.
+ *
+ * For example,
+ *	for x in
+ *	  0 1 2 0x10 0x11 0x200 0x201
+ *	  0x500000 0x500001 0x503000 0x503001
+ *	  0xE<<60:
+ *		i915_syncmap_set(&sync, x, lower_32_bits(x));
+ * will build a tree like:
+ *	0xXXXXXXXXXXXXXXXX
+ *	0-> 0x0000000000XXXXXX
+ *	|   0-> 0x0000000000000XXX
+ *	|   |   0-> 0x00000000000000XX
+ *	|   |   |   0-> 0x000000000000000X 0:0, 1:1, 2:2
+ *	|   |   |   1-> 0x000000000000001X 0:10, 1:11
+ *	|   |   2-> 0x000000000000020X 0:200, 1:201
+ *	|   5-> 0x000000000050XXXX
+ *	|       0-> 0x000000000050000X 0:500000, 1:500001
+ *	|       3-> 0x000000000050300X 0:503000, 1:503001
+ *	e-> 0xe00000000000000X e:e
+ */
+
+struct i915_syncmap {
+	u64 prefix;
+	unsigned int height;
+	unsigned int bitmap;
+	struct i915_syncmap *parent;
+	/*
+	 * Following this header is an array of either seqno or child pointers:
+	 * union {
+	 *	u32 seqno[KSYNCMAP];
+	 *	struct i915_syncmap *child[KSYNCMAP];
+	 * };
+	 */
+};
+
+/**
+ * i915_syncmap_init -- initialise the #i915_syncmap
+ * @root - pointer to the #i915_syncmap
+ */
+void i915_syncmap_init(struct i915_syncmap **root)
+{
+	BUILD_BUG_ON_NOT_POWER_OF_2(KSYNCMAP);
+	BUILD_BUG_ON_NOT_POWER_OF_2(SHIFT);
+	BUILD_BUG_ON(KSYNCMAP > BITS_PER_BYTE * sizeof((*root)->bitmap));
+	*root = NULL;
+}
+
+static inline u32 *__sync_seqno(struct i915_syncmap *p)
+{
+	GEM_BUG_ON(p->height);
+	return (u32 *)(p + 1);
+}
+
+static inline struct i915_syncmap **__sync_child(struct i915_syncmap *p)
+{
+	GEM_BUG_ON(!p->height);
+	return (struct i915_syncmap **)(p + 1);
+}
+
+static inline unsigned int
+__sync_branch_idx(const struct i915_syncmap *p, u64 id)
+{
+	return (id >> p->height) & MASK;
+}
+
+static inline unsigned int
+__sync_leaf_idx(const struct i915_syncmap *p, u64 id)
+{
+	GEM_BUG_ON(p->height);
+	return id & MASK;
+}
+
+static inline u64 __sync_branch_prefix(const struct i915_syncmap *p, u64 id)
+{
+	return id >> p->height >> SHIFT;
+}
+
+static inline u64 __sync_leaf_prefix(const struct i915_syncmap *p, u64 id)
+{
+	GEM_BUG_ON(p->height);
+	return id >> SHIFT;
+}
+
+static inline bool seqno_later(u32 a, u32 b)
+{
+	return (s32)(a - b) >= 0;
+}
+
+/**
+ * i915_syncmap_is_later -- compare against the last know sync point
+ * @root - pointer to the #i915_syncmap
+ * @id - the context id (other timeline) we are synchronising to
+ * @seqno - the sequence number along the other timeline
+ *
+ * If we have already synchronised this @root timeline with another (@id) then
+ * we can omit any repeated or earlier synchronisation requests. If the two
+ * timelines are already coupled, we can also omit the dependency between the
+ * two as that is already known via the timeline.
+ *
+ * Returns true if the two timelines are already synchronised wrt to @seqno,
+ * false if not and the synchronisation must be emitted.
+ */
+bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p;
+	unsigned int idx;
+
+	p = *root;
+	if (!p)
+		return false;
+
+	if (likely(__sync_leaf_prefix(p, id) == p->prefix))
+		goto found;
+
+	/* First climb the tree back to a parent branch */
+	do {
+		p = p->parent;
+		if (!p)
+			return false;
+
+		if (__sync_branch_prefix(p, id) == p->prefix)
+			break;
+	} while (1);
+
+	/* And then descend again until we find our leaf */
+	do {
+		if (!p->height)
+			break;
+
+		p = __sync_child(p)[__sync_branch_idx(p, id)];
+		if (!p)
+			return false;
+
+		if (__sync_branch_prefix(p, id) != p->prefix)
+			return false;
+	} while (1);
+
+	*root = p;
+found:
+	idx = __sync_leaf_idx(p, id);
+	if (!(p->bitmap & BIT(idx)))
+		return false;
+
+	return seqno_later(__sync_seqno(p)[idx], seqno);
+}
+
+static struct i915_syncmap *
+__sync_alloc_leaf(struct i915_syncmap *parent, u64 id)
+{
+	struct i915_syncmap *p;
+
+	p = kmalloc(sizeof(*p) + KSYNCMAP * sizeof(u32), GFP_KERNEL);
+	if (unlikely(!p))
+		return NULL;
+
+	p->parent = parent;
+	p->height = 0;
+	p->bitmap = 0;
+	p->prefix = __sync_leaf_prefix(p, id);
+	return p;
+}
+
+static inline void __sync_set_seqno(struct i915_syncmap *p, u64 id, u32 seqno)
+{
+	unsigned int idx = __sync_leaf_idx(p, id);
+
+	p->bitmap |= BIT(idx);
+	__sync_seqno(p)[idx] = seqno;
+}
+
+static inline void __sync_set_child(struct i915_syncmap *p,
+				    unsigned int idx,
+				    struct i915_syncmap *child)
+{
+	p->bitmap |= BIT(idx);
+	__sync_child(p)[idx] = child;
+}
+
+static noinline int __sync_set(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p = *root;
+	unsigned int idx;
+
+	if (!p) {
+		p = __sync_alloc_leaf(NULL, id);
+		if (unlikely(!p))
+			return -ENOMEM;
+
+		goto found;
+	}
+
+	/* Caller handled the likely cached case */
+	GEM_BUG_ON(__sync_leaf_prefix(p, id) == p->prefix);
+
+	/* Climb back up the tree until we find a common prefix */
+	do {
+		if (!p->parent)
+			break;
+
+		p = p->parent;
+
+		if (__sync_branch_prefix(p, id) == p->prefix)
+			break;
+	} while (1);
+
+	/*
+	 * No shortcut, we have to descend the tree to find the right layer
+	 * containing this fence.
+	 *
+	 * Each layer in the tree holds 16 (KSYNCMAP) pointers, either fences
+	 * or lower layers. Leaf nodes (height = 0) contain the fences, all
+	 * other nodes (height > 0) are internal layers that point to a lower
+	 * node. Each internal layer has at least 2 descendents.
+	 *
+	 * Starting at the top, we check whether the current prefix matches. If
+	 * it doesn't, we have gone past our target and need to insert a join
+	 * into the tree, and a new leaf node for the target as a descendent
+	 * of the join, as well as the original layer.
+	 *
+	 * The matching prefix means we are still following the right branch
+	 * of the tree. If it has height 0, we have found our leaf and just
+	 * need to replace the fence slot with ourselves. If the height is
+	 * not zero, our slot contains the next layer in the tree (unless
+	 * it is empty, in which case we can add ourselves as a new leaf).
+	 * As descend the tree the prefix grows (and height decreases).
+	 */
+	do {
+		struct i915_syncmap *next;
+
+		if (__sync_branch_prefix(p, id) != p->prefix) {
+			unsigned int above;
+
+			/* Insert a join above the current layer */
+			next = kzalloc(sizeof(*next) + KSYNCMAP * sizeof(next),
+				       GFP_KERNEL);
+			if (unlikely(!next))
+				return -ENOMEM;
+
+			/* Compute the height at which these two diverge */
+			above = fls64(__sync_branch_prefix(p, id) ^ p->prefix);
+			above = round_up(above, SHIFT);
+			next->height = above + p->height;
+			next->prefix = __sync_branch_prefix(next, id);
+
+			/* Insert the join into the parent */
+			if (p->parent) {
+				idx = __sync_branch_idx(p->parent, id);
+				__sync_child(p->parent)[idx] = next;
+				GEM_BUG_ON(!(p->parent->bitmap & BIT(idx)));
+			}
+			next->parent = p->parent;
+
+			/* Compute the idx of the other branch, not our id! */
+			idx = p->prefix >> (above - SHIFT) & MASK;
+			__sync_set_child(next, idx, p);
+			p->parent = next;
+
+			/* Ascend to the join */
+			p = next;
+		} else {
+			if (!p->height)
+				break;
+		}
+
+		/* Descend into the next layer */
+		GEM_BUG_ON(!p->height);
+		idx = __sync_branch_idx(p, id);
+		next = __sync_child(p)[idx];
+		if (!next) {
+			next = __sync_alloc_leaf(p, id);
+			if (unlikely(!next))
+				return -ENOMEM;
+
+			__sync_set_child(p, idx, next);
+			p = next;
+			break;
+		}
+
+		p = next;
+	} while (1);
+
+found:
+	GEM_BUG_ON(p->prefix != __sync_leaf_prefix(p, id));
+	__sync_set_seqno(p, id, seqno);
+	*root = p;
+	return 0;
+}
+
+/**
+ * i915_syncmap_set -- mark the most recent syncpoint between contexts
+ * @root - pointer to the #i915_syncmap
+ * @id - the context id (other timeline) we have synchronised to
+ * @seqno - the sequence number along the other timeline
+ *
+ * When we synchronise this @root timeline with another (@id), we also know
+ * that we have synchronized with all previous seqno along that timeline. If
+ * we then have a request to synchronise with the same seqno or older, we can
+ * omit it, see i915_syncmap_is_later()
+ *
+ * Returns 0 on success, or a negative error code.
+ */
+int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno)
+{
+	struct i915_syncmap *p = *root;
+
+	/*
+	 * We expect to be called in sequence following is_later(id), which
+	 * should have preloaded the root for us.
+	 */
+	if (likely(p && __sync_leaf_prefix(p, id) == p->prefix)) {
+		__sync_set_seqno(p, id, seqno);
+		return 0;
+	}
+
+	return __sync_set(root, id, seqno);
+}
+
+static void __sync_free(struct i915_syncmap *p)
+{
+	if (p->height) {
+		unsigned int i;
+
+		while ((i = ffs(p->bitmap))) {
+			p->bitmap &= ~0u << i;
+			__sync_free(__sync_child(p)[i - 1]);
+		}
+	}
+
+	kfree(p);
+}
+
+/**
+ * i915_syncmap_free -- free all memory associated with the syncmap
+ * @root - pointer to the #i915_syncmap
+ *
+ * Either when the timeline is to be freed and we no longer need the sync
+ * point tracking, or when the fences are all known to be signaled and the
+ * sync point tracking is redundant, we can free the #i915_syncmap to recover
+ * its allocations.
+ *
+ * Will reinitialise the @root pointer so that the #i915_syncmap is ready for
+ * reuse.
+ */
+void i915_syncmap_free(struct i915_syncmap **root)
+{
+	struct i915_syncmap *p;
+
+	p = *root;
+	if (!p)
+		return;
+
+	while (p->parent)
+		p = p->parent;
+
+	__sync_free(p);
+	*root = NULL;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_syncmap.c"
+#endif
--- a/drivers/gpu/drm/i915/i915_syncmap.h
+++ b/drivers/gpu/drm/i915/i915_syncmap.h
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __I915_SYNCMAP_H__
+#define __I915_SYNCMAP_H__
+
+#include <linux/types.h>
+
+struct i915_syncmap;
+#define KSYNCMAP 16 /* radix of the tree, how many slots in each layer */
+
+void i915_syncmap_init(struct i915_syncmap **root);
+int i915_syncmap_set(struct i915_syncmap **root, u64 id, u32 seqno);
+bool i915_syncmap_is_later(struct i915_syncmap **root, u64 id, u32 seqno);
+void i915_syncmap_free(struct i915_syncmap **root);
+
+#endif /* __I915_SYNCMAP_H__ */
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -181,13 +181,10 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
 	struct drm_device *dev = &dev_priv->drm;
 	struct i915_gem_context *ctx;
-	u32 *temp = NULL; /* Just here to make handling failures easy */
 	int slice = (int)(uintptr_t)attr->private;
+	u32 **remap_info;
 	int ret;

-	if (!HAS_HW_CONTEXTS(dev_priv))
-		return -ENXIO;
-
 	ret = l3_access_valid(dev_priv, offset);
 	if (ret)
 		return ret;
@@ -196,11 +193,12 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	if (ret)
 		return ret;

-	if (!dev_priv->l3_parity.remap_info[slice]) {
-		temp = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
-		if (!temp) {
-			mutex_unlock(&dev->struct_mutex);
-			return -ENOMEM;
+	remap_info = &dev_priv->l3_parity.remap_info[slice];
+	if (!*remap_info) {
+		*remap_info = kzalloc(GEN7_L3LOG_SIZE, GFP_KERNEL);
+		if (!*remap_info) {
+			ret = -ENOMEM;
+			goto out;
 		}
 	}

@@ -208,18 +206,18 @@ i915_l3_write(struct file *filp, struct kobject *kobj,
 	 * aren't propagated. Since I cannot find a stable way to reset the GPU
 	 * at this point it is left as a TODO.
 	*/
-	if (temp)
-		dev_priv->l3_parity.remap_info[slice] = temp;
-
-	memcpy(dev_priv->l3_parity.remap_info[slice] + (offset/4), buf, count);
+	memcpy(*remap_info + (offset/4), buf, count);

 	/* NB: We defer the remapping until we switch to the context */
 	list_for_each_entry(ctx, &dev_priv->context_list, link)
 		ctx->remap_slice |= (1<<slice);

+	ret = count;
+
+out:
 	mutex_unlock(&dev->struct_mutex);

-	return count;
+	return ret;
 }

 static struct bin_attribute dpf_attrs = {

--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -89,6 +89,55 @@ TRACE_EVENT(intel_memory_cxsr,
 		      __entry->frame[PIPE_C], __entry->scanline[PIPE_C])
 );

+TRACE_EVENT(g4x_wm,
+	    TP_PROTO(struct intel_crtc *crtc, const struct g4x_wm_values *wm),
+	    TP_ARGS(crtc, wm),
+
+	    TP_STRUCT__entry(
+			     __field(enum pipe, pipe)
+			     __field(u32, frame)
+			     __field(u32, scanline)
+			     __field(u16, primary)
+			     __field(u16, sprite)
+			     __field(u16, cursor)
+			     __field(u16, sr_plane)
+			     __field(u16, sr_cursor)
+			     __field(u16, sr_fbc)
+			     __field(u16, hpll_plane)
+			     __field(u16, hpll_cursor)
+			     __field(u16, hpll_fbc)
+			     __field(bool, cxsr)
+			     __field(bool, hpll)
+			     __field(bool, fbc)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->pipe = crtc->pipe;
+			   __entry->frame = crtc->base.dev->driver->get_vblank_counter(crtc->base.dev,
+										       crtc->pipe);
+			   __entry->scanline = intel_get_crtc_scanline(crtc);
+			   __entry->primary = wm->pipe[crtc->pipe].plane[PLANE_PRIMARY];
+			   __entry->sprite = wm->pipe[crtc->pipe].plane[PLANE_SPRITE0];
+			   __entry->cursor = wm->pipe[crtc->pipe].plane[PLANE_CURSOR];
+			   __entry->sr_plane = wm->sr.plane;
+			   __entry->sr_cursor = wm->sr.cursor;
+			   __entry->sr_fbc = wm->sr.fbc;
+			   __entry->hpll_plane = wm->hpll.plane;
+			   __entry->hpll_cursor = wm->hpll.cursor;
+			   __entry->hpll_fbc = wm->hpll.fbc;
+			   __entry->cxsr = wm->cxsr;
+			   __entry->hpll = wm->hpll_en;
+			   __entry->fbc = wm->fbc_en;
+			   ),
+
+	    TP_printk("pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
+		      pipe_name(__entry->pipe), __entry->frame, __entry->scanline,
+		      __entry->primary, __entry->sprite, __entry->cursor,
+		      yesno(__entry->cxsr), __entry->sr_plane, __entry->sr_cursor, __entry->sr_fbc,
+		      yesno(__entry->hpll), __entry->hpll_plane, __entry->hpll_cursor, __entry->hpll_fbc,
+		      yesno(__entry->fbc))
+);
+
 TRACE_EVENT(vlv_wm,
 	    TP_PROTO(struct intel_crtc *crtc, const struct vlv_wm_values *wm),
 	    TP_ARGS(crtc, wm),

--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -70,20 +70,27 @@
 #define overflows_type(x, T) \
 	(sizeof(x) > sizeof(T) && (x) >> (sizeof(T) * BITS_PER_BYTE))

-#define ptr_mask_bits(ptr) ({						\
+#define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
-	(typeof(ptr))(__v & PAGE_MASK);					\
+	(typeof(ptr))(__v & -BIT(n));					\
 })

-#define ptr_unpack_bits(ptr, bits) ({					\
+#define ptr_unmask_bits(ptr, n) ((unsigned long)(ptr) & (BIT(n) - 1))
+
+#define ptr_unpack_bits(ptr, bits, n) ({				\
 	unsigned long __v = (unsigned long)(ptr);			\
-	(bits) = __v & ~PAGE_MASK;					\
-	(typeof(ptr))(__v & PAGE_MASK);					\
+	*(bits) = __v & (BIT(n) - 1);					\
+	(typeof(ptr))(__v & -BIT(n));					\
 })

-#define ptr_pack_bits(ptr, bits)					\
+#define ptr_pack_bits(ptr, bits, n)					\
 	((typeof(ptr))((unsigned long)(ptr) | (bits)))

+#define page_mask_bits(ptr) ptr_mask_bits(ptr, PAGE_SHIFT)
+#define page_unmask_bits(ptr) ptr_unmask_bits(ptr, PAGE_SHIFT)
+#define page_pack_bits(ptr, bits) ptr_pack_bits(ptr, bits, PAGE_SHIFT)
+#define page_unpack_bits(ptr, bits) ptr_unpack_bits(ptr, bits, PAGE_SHIFT)
+
 #define ptr_offset(ptr, member) offsetof(typeof(*(ptr)), member)

 #define fetch_and_zero(ptr) ({						\
@@ -92,4 +99,19 @@
 	__T;								\
 })

+#define __mask_next_bit(mask) ({					\
+	int __idx = ffs(mask) - 1;					\
+	mask &= ~BIT(__idx);						\
+	__idx;								\
+})
+
+#include <linux/list.h>
+
+static inline void __list_del_many(struct list_head *head,
+				   struct list_head *first)
+{
+	first->prev = head;
+	WRITE_ONCE(head->next, first);
+}
+
 #endif /* !__I915_UTILS_H */
--- a/drivers/gpu/drm/i915/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/intel_atomic_plane.c
@@ -102,23 +102,7 @@ void
 intel_plane_destroy_state(struct drm_plane *plane,
 			  struct drm_plane_state *state)
 {
-	struct i915_vma *vma;
-
-	vma = fetch_and_zero(&to_intel_plane_state(state)->vma);
-
-	/*
-	 * FIXME: Normally intel_cleanup_plane_fb handles destruction of vma.
-	 * We currently don't clear all planes during driver unload, so we have
-	 * to be able to unpin vma here for now.
-	 *
-	 * Normally this can only happen during unload when kmscon is disabled
-	 * and userspace doesn't attempt to set a framebuffer at all.
-	 */
-	if (vma) {
-		mutex_lock(&plane->dev->struct_mutex);
-		intel_unpin_fb_vma(vma);
-		mutex_unlock(&plane->dev->struct_mutex);
-	}
+	WARN_ON(to_intel_plane_state(state)->vma);

 	drm_atomic_helper_plane_destroy_state(plane, state);
 }
@@ -185,7 +169,7 @@ int intel_plane_atomic_check_with_state(struct intel_crtc_state *crtc_state,
 	}

 	intel_state->base.visible = false;
-	ret = intel_plane->check_plane(plane, crtc_state, intel_state);
+	ret = intel_plane->check_plane(intel_plane, crtc_state, intel_state);
 	if (ret)
 		return ret;

@@ -235,14 +219,14 @@ static void intel_plane_atomic_update(struct drm_plane *plane,
 		trace_intel_update_plane(plane,
 					 to_intel_crtc(crtc));

-		intel_plane->update_plane(plane,
+		intel_plane->update_plane(intel_plane,
 					  to_intel_crtc_state(crtc->state),
 					  intel_state);
 	} else {
 		trace_intel_disable_plane(plane,
 					  to_intel_crtc(crtc));

-		intel_plane->disable_plane(plane, crtc);
+		intel_plane->disable_plane(intel_plane, to_intel_crtc(crtc));
 	}
 }


--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -632,20 +632,9 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder,
 						 (int) port, (int) pipe);
 	}

-	switch (intel_encoder->type) {
-	case INTEL_OUTPUT_HDMI:
-		intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
-				       crtc_state->port_clock,
-				       false, 0);
-		break;
-	case INTEL_OUTPUT_DP:
-		intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
-				       adjusted_mode->crtc_clock,
-				       true, crtc_state->port_clock);
-		break;
-	default:
-		break;
-	}
+	intel_lpe_audio_notify(dev_priv, pipe, port, connector->eld,
+			       crtc_state->port_clock,
+			       intel_encoder->type == INTEL_OUTPUT_DP);
 }

 /**
@@ -680,7 +669,7 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder)
 						 (int) port, (int) pipe);
 	}

-	intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false, 0);
+	intel_lpe_audio_notify(dev_priv, pipe, port, NULL, 0, false);
 }

 /**

--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -64,10 +64,12 @@ static unsigned long wait_timeout(void)

 static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
 {
-	DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s\n",
+	DRM_DEBUG_DRIVER("%s missed breadcrumb at %pF, irq posted? %s, current seqno=%x, last=%x\n",
 			 engine->name, __builtin_return_address(0),
 			 yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-					&engine->irq_posted)));
+					&engine->irq_posted)),
+			 intel_engine_get_seqno(engine),
+			 intel_engine_last_submit(engine));

 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
@@ -665,12 +667,13 @@ static int intel_breadcrumbs_signaler(void *arg)
 	return 0;
 }

-void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
+void intel_engine_enable_signaling(struct drm_i915_gem_request *request,
+				   bool wakeup)
 {
 	struct intel_engine_cs *engine = request->engine;
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 	struct rb_node *parent, **p;
-	bool first, wakeup;
+	bool first;
 	u32 seqno;

 	/* Note that we may be called from an interrupt handler on another
@@ -703,7 +706,7 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request)
 	 * If we are the oldest waiter, enable the irq (after which we
 	 * must double check that the seqno did not complete).
 	 */
-	wakeup = __intel_engine_add_wait(engine, &request->signaling.wait);
+	wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait);

 	/* Now insert ourselves into the retirement ordered list of signals
 	 * on this engine. We track the oldest seqno as that will be the

--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1071,9 +1071,15 @@ static int bxt_calc_cdclk(int max_pixclk)

 static int glk_calc_cdclk(int max_pixclk)
 {
-	if (max_pixclk > 2 * 158400)
+	/*
+	 * FIXME: Avoid using a pixel clock that is more than 99% of the cdclk
+	 * as a temporary workaround. Use a higher cdclk instead. (Note that
+	 * intel_compute_max_dotclk() limits the max pixel clock to 99% of max
+	 * cdclk.)
+	 */
+	if (max_pixclk > DIV_ROUND_UP(2 * 158400 * 99, 100))
 		return 316800;
-	else if (max_pixclk > 2 * 79200)
+	else if (max_pixclk > DIV_ROUND_UP(2 * 79200 * 99, 100))
 		return 158400;
 	else
 		return 79200;
@@ -1664,7 +1670,11 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv)
 	int max_cdclk_freq = dev_priv->max_cdclk_freq;

 	if (IS_GEMINILAKE(dev_priv))
-		return 2 * max_cdclk_freq;
+		/*
+		 * FIXME: Limiting to 99% as a temporary workaround. See
+		 * glk_calc_cdclk() for details.
+		 */
+		return 2 * max_cdclk_freq * 99 / 100;
 	else if (INTEL_INFO(dev_priv)->gen >= 9 ||
 		 IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
 		return max_cdclk_freq;
@@ -1798,13 +1808,11 @@ static int g4x_hrawclk(struct drm_i915_private *dev_priv)
 	case CLKCFG_FSB_800:
 		return 200000;
 	case CLKCFG_FSB_1067:
+	case CLKCFG_FSB_1067_ALT:
 		return 266667;
 	case CLKCFG_FSB_1333:
+	case CLKCFG_FSB_1333_ALT:
 		return 333333;
-	/* these two are just a guess; one of them might be right */
-	case CLKCFG_FSB_1600:
-	case CLKCFG_FSB_1600_ALT:
-		return 400000;
 	default:
 		return 133333;
 	}

--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -777,13 +777,6 @@ static int intel_crt_get_modes(struct drm_connector *connector)
 	return ret;
 }

-static int intel_crt_set_property(struct drm_connector *connector,
-				  struct drm_property *property,
-				  uint64_t value)
-{
-	return 0;
-}
-
 void intel_crt_reset(struct drm_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->dev);
@@ -814,10 +807,9 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = {
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_crt_destroy,
-	.set_property = intel_crt_set_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-	.atomic_get_property = intel_connector_atomic_get_property,
 };

 static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = {

--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -337,7 +337,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
 		for_each_pipe(dev_priv, pipe)
 			info->num_sprites[pipe] = 2;
-	} else if (INTEL_GEN(dev_priv) >= 5) {
+	} else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) {
 		for_each_pipe(dev_priv, pipe)
 			info->num_sprites[pipe] = 1;
 	}

--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
--- a/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
+++ b/drivers/gpu/drm/i915/intel_dp_aux_backlight.c
@@ -28,6 +28,10 @@ static void set_aux_backlight_enable(struct intel_dp *intel_dp, bool enable)
 {
 	uint8_t reg_val = 0;

+	/* Early return when display use other mechanism to enable backlight. */
+	if (!(intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP))
+		return;
+
 	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_EDP_DISPLAY_CONTROL_REGISTER,
 			      &reg_val) < 0) {
 		DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
@@ -97,15 +101,37 @@ static void intel_dp_aux_enable_backlight(struct intel_connector *connector)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&connector->encoder->base);
 	uint8_t dpcd_buf = 0;
+	uint8_t edp_backlight_mode = 0;

-	set_aux_backlight_enable(intel_dp, true);
+	if (drm_dp_dpcd_readb(&intel_dp->aux,
+			DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) != 1) {
+		DRM_DEBUG_KMS("Failed to read DPCD register 0x%x\n",
+			      DP_EDP_BACKLIGHT_MODE_SET_REGISTER);
+		return;
+	}
+
+	edp_backlight_mode = dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
+
+	switch (edp_backlight_mode) {
+	case DP_EDP_BACKLIGHT_CONTROL_MODE_PWM:
+	case DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET:
+	case DP_EDP_BACKLIGHT_CONTROL_MODE_PRODUCT:
+		dpcd_buf &= ~DP_EDP_BACKLIGHT_CONTROL_MODE_MASK;
+		dpcd_buf |= DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD;
+		if (drm_dp_dpcd_writeb(&intel_dp->aux,
+			DP_EDP_BACKLIGHT_MODE_SET_REGISTER, dpcd_buf) < 0) {
+			DRM_DEBUG_KMS("Failed to write aux backlight mode\n");
+		}
+		break;
+
+	/* Do nothing when it is already DPCD mode */
+	case DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD:
+	default:
+		break;
+	}

-	if ((drm_dp_dpcd_readb(&intel_dp->aux,
-			       DP_EDP_BACKLIGHT_MODE_SET_REGISTER, &dpcd_buf) == 1) &&
-	    ((dpcd_buf & DP_EDP_BACKLIGHT_CONTROL_MODE_MASK) ==
-	     DP_EDP_BACKLIGHT_CONTROL_MODE_PRESET))
-		drm_dp_dpcd_writeb(&intel_dp->aux, DP_EDP_BACKLIGHT_MODE_SET_REGISTER,
-				   (dpcd_buf | DP_EDP_BACKLIGHT_CONTROL_MODE_DPCD));
+	set_aux_backlight_enable(intel_dp, true);
+	intel_dp_aux_set_backlight(connector, connector->panel.backlight.level);
 }

 static void intel_dp_aux_disable_backlight(struct intel_connector *connector)
@@ -143,9 +169,8 @@ intel_dp_aux_display_control_capable(struct intel_connector *connector)
 	 * the panel can support backlight control over the aux channel
 	 */
 	if (intel_dp->edp_dpcd[1] & DP_EDP_TCON_BACKLIGHT_ADJUSTMENT_CAP &&
-	    (intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_AUX_ENABLE_CAP) &&
-	    !((intel_dp->edp_dpcd[1] & DP_EDP_BACKLIGHT_PIN_ENABLE_CAP) ||
-	      (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP))) {
+	    (intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_AUX_SET_CAP) &&
+	    !(intel_dp->edp_dpcd[2] & DP_EDP_BACKLIGHT_BRIGHTNESS_PWM_PIN_CAP)) {
 		DRM_DEBUG_KMS("AUX Backlight Control Supported!\n");
 		return true;
 	}

--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
--- a/drivers/gpu/drm/i915/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c
--- a/drivers/gpu/drm/i915/intel_dvo.c
+++ b/drivers/gpu/drm/i915/intel_dvo.c
@@ -350,7 +350,7 @@ static const struct drm_connector_funcs intel_dvo_connector_funcs = {
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_dvo_destroy,
 	.fill_modes = drm_helper_probe_single_connector_modes,
-	.atomic_get_property = intel_connector_atomic_get_property,
+	.set_property = drm_atomic_helper_connector_set_property,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
 };

--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
--- a/drivers/gpu/drm/i915/intel_guc_ct.c
+++ b/drivers/gpu/drm/i915/intel_guc_ct.c
--- a/drivers/gpu/drm/i915/intel_guc_ct.h
+++ b/drivers/gpu/drm/i915/intel_guc_ct.h
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
--- a/drivers/gpu/drm/i915/intel_uncore.h
+++ b/drivers/gpu/drm/i915/intel_uncore.h
--- a/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_coherency.c
--- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c
--- a/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_dmabuf.c
--- a/drivers/gpu/drm/i915/selftests/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_object.c
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
--- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
--- a/drivers/gpu/drm/i915/selftests/i915_random.c
+++ b/drivers/gpu/drm/i915/selftests/i915_random.c
--- a/drivers/gpu/drm/i915/selftests/i915_random.h
+++ b/drivers/gpu/drm/i915/selftests/i915_random.h
--- a/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/selftests/i915_sw_fence.c
--- a/drivers/gpu/drm/i915/selftests/i915_syncmap.c
+++ b/drivers/gpu/drm/i915/selftests/i915_syncmap.c
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.c
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c
--- a/drivers/gpu/drm/i915/selftests/mock_timeline.h
+++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h
--- a/drivers/gpu/drm/i915/selftests/mock_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c
--- a/drivers/gpu/drm/i915/selftests/mock_uncore.h
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.h
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
--- a/include/drm/intel_lpe_audio.h
+++ b/include/drm/intel_lpe_audio.h
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
--- a/sound/x86/intel_hdmi_audio.c
+++ b/sound/x86/intel_hdmi_audio.c
--- a/sound/x86/intel_hdmi_audio.h
+++ b/sound/x86/intel_hdmi_audio.h