diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 1238b75fe3f8966d021eb4081b1366029559c65a..d26a092c70e8c8fe2a14df28dd22e4253f739a16 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -2537,7 +2537,8 @@ static int scan_workload(struct intel_vgpu_workload *workload)
 	s.rb_va = workload->shadow_ring_buffer_va;
 	s.workload = workload;
 
-	if (bypass_scan_mask & (1 << workload->ring_id))
+	if ((bypass_scan_mask & (1 << workload->ring_id)) ||
+		gma_head == gma_tail)
 		return 0;
 
 	ret = ip_gma_set(&s, gma_head);
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
index 7e1da1c563ca8453cf2eab237994a6b8f5eb6c88..bda85dff7b2a998d68d1485479c9687eab206600 100644
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -502,8 +502,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
 	 * ACK of I2C_WRITE
 	 * returned byte if it is READ
 	 */
-
-	aux_data_for_write |= (GVT_AUX_I2C_REPLY_ACK & 0xff) << 24;
+	aux_data_for_write |= GVT_AUX_I2C_REPLY_ACK << 24;
 	vgpu_vreg(vgpu, offset + 4) = aux_data_for_write;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/edid.h b/drivers/gpu/drm/i915/gvt/edid.h
index de366b1d5196e7dc8cc664951661f895498a5478..f6dfc8b795ec2f3d57d3314052b72dd98eabd599 100644
--- a/drivers/gpu/drm/i915/gvt/edid.h
+++ b/drivers/gpu/drm/i915/gvt/edid.h
@@ -44,7 +44,7 @@
 #define GVT_AUX_I2C_READ			0x1
 #define GVT_AUX_I2C_STATUS			0x2
 #define GVT_AUX_I2C_MOT				0x4
-#define GVT_AUX_I2C_REPLY_ACK			(0x0 << 6)
+#define GVT_AUX_I2C_REPLY_ACK			0x0
 
 struct intel_vgpu_edid_data {
 	bool data_valid;
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index c1f6019d889572938f1019f78078654f41958d98..f32bb6f6495ce0aafddf35d920298c5d315af9bc 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -838,23 +838,21 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
 }
 
 void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
-		unsigned long ring_bitmap)
+		unsigned long engine_mask)
 {
-	int bit;
-	struct list_head *pos, *n;
-	struct intel_vgpu_workload *workload = NULL;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	struct intel_engine_cs *engine;
+	struct intel_vgpu_workload *pos, *n;
+	unsigned int tmp;
 
-	for_each_set_bit(bit, &ring_bitmap, sizeof(ring_bitmap) * 8) {
-		if (bit >= I915_NUM_ENGINES)
-			break;
+	for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
 		/* free the unsubmited workload in the queue */
-		list_for_each_safe(pos, n, &vgpu->workload_q_head[bit]) {
-			workload = container_of(pos,
-					struct intel_vgpu_workload, list);
-			list_del_init(&workload->list);
-			free_workload(workload);
+		list_for_each_entry_safe(pos, n,
+			&vgpu->workload_q_head[engine->id], list) {
+			list_del_init(&pos->list);
+			free_workload(pos);
 		}
 
-		init_vgpu_execlist(vgpu, bit);
+		init_vgpu_execlist(vgpu, engine->id);
 	}
 }
diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h
index 635f31c6dcc161bb8af01489760646edbfc174a3..7eced40a1e309fe02b564e719847b6bbf424acf9 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.h
+++ b/drivers/gpu/drm/i915/gvt/execlist.h
@@ -183,6 +183,6 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu);
 int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id);
 
 void intel_vgpu_reset_execlist(struct intel_vgpu *vgpu,
-		unsigned long ring_bitmap);
+		unsigned long engine_mask);
 
 #endif /*_GVT_EXECLIST_H_*/
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 1b3db0c7a6db89412e19125e3452f5b8c7aa29b8..522809710312c25767209656133651971dd6a01f 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1279,14 +1279,12 @@ static int skl_misc_ctl_write(struct intel_vgpu *vgpu, unsigned int offset,
 	case 0x4ddc:
 		vgpu_vreg(vgpu, offset) = 0x8000003c;
 		/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl */
-		if (IS_SKL_REVID(dev_priv, SKL_REVID_C0, REVID_FOREVER))
-			I915_WRITE(reg, vgpu_vreg(vgpu, offset));
+		I915_WRITE(reg, vgpu_vreg(vgpu, offset));
 		break;
 	case 0x42080:
 		vgpu_vreg(vgpu, offset) = 0x8000;
 		/* WaCompressedResourceDisplayNewHashMode:skl */
-		if (IS_SKL_REVID(dev_priv, SKL_REVID_E0, REVID_FOREVER))
-			I915_WRITE(reg, vgpu_vreg(vgpu, offset));
+		I915_WRITE(reg, vgpu_vreg(vgpu, offset));
 		break;
 	default:
 		return -EINVAL;
@@ -1372,6 +1370,7 @@ static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu,
 	unsigned int id = 0;
 
 	write_vreg(vgpu, offset, p_data, bytes);
+	vgpu_vreg(vgpu, offset) = 0;
 
 	switch (offset) {
 	case 0x4260:
@@ -1508,7 +1507,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK, NULL, NULL);
@@ -1517,7 +1516,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(0x2470, D_ALL, F_MODE_MASK, NULL, NULL);
 	MMIO_D(GAM_ECOCHK, D_ALL);
 	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(0x9030, D_ALL);
 	MMIO_D(0x20a0, D_ALL);
 	MMIO_D(0x2420, D_ALL);
@@ -1526,7 +1525,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x2438, D_ALL);
 	MMIO_D(0x243c, D_ALL);
 	MMIO_DFH(0x7018, D_ALL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0xe184, D_ALL, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0xe100, D_ALL, F_MODE_MASK, NULL, NULL);
 
 	/* display */
@@ -2139,6 +2138,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN6_MBCTL, D_ALL);
 	MMIO_D(0x911c, D_ALL);
 	MMIO_D(0x9120, D_ALL);
+	MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_D(GAB_CTL, D_ALL);
 	MMIO_D(0x48800, D_ALL);
@@ -2377,7 +2377,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, NULL);
 	MMIO_GM(0x1c080, D_BDW_PLUS, NULL, NULL);
 
-	MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_D(CHICKEN_PIPESL_1(PIPE_A), D_BDW);
 	MMIO_D(CHICKEN_PIPESL_1(PIPE_B), D_BDW);
@@ -2396,7 +2396,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS);
 
 	MMIO_D(0xfdc, D_BDW);
-	MMIO_D(GEN8_ROW_CHICKEN, D_BDW_PLUS);
+	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(GEN7_ROW_CHICKEN2, D_BDW_PLUS);
 	MMIO_D(GEN8_UCGCTL6, D_BDW_PLUS);
 
@@ -2407,10 +2407,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0xb10c, D_BDW);
 	MMIO_D(0xb110, D_BDW);
 
-	MMIO_DH(0x24d0, D_BDW_PLUS, NULL, NULL);
-	MMIO_DH(0x24d4, D_BDW_PLUS, NULL, NULL);
-	MMIO_DH(0x24d8, D_BDW_PLUS, NULL, NULL);
-	MMIO_DH(0x24dc, D_BDW_PLUS, NULL, NULL);
+	MMIO_DFH(0x24d0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x24d4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x24d8, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x24dc, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_D(0x83a4, D_BDW);
 	MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS);
@@ -2424,9 +2424,9 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x6e570, D_BDW_PLUS);
 	MMIO_D(0x65f10, D_BDW_PLUS);
 
-	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0xe180, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
 
 	MMIO_D(0x2248, D_BDW);
@@ -2457,6 +2457,7 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0xa210, D_SKL_PLUS);
 	MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
+	MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DH(0x4ddc, D_SKL, NULL, skl_misc_ctl_write);
 	MMIO_DH(0x42080, D_SKL, NULL, skl_misc_ctl_write);
 	MMIO_D(0x45504, D_SKL);
@@ -2606,8 +2607,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x51000, D_SKL);
 	MMIO_D(0x6c00c, D_SKL);
 
-	MMIO_F(0xc800, 0x7f8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(0xb020, 0x80, 0, 0, 0, D_SKL, NULL, NULL);
+	MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
+	MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
 
 	MMIO_D(0xd08, D_SKL);
 	MMIO_D(0x20e0, D_SKL);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 5bf4d73d57d9de434c344a4407af1cdf5cba8416..dc036503315741f266223486922ca4ed67afc6de 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -43,20 +43,16 @@
 #include "i915_drv.h"
 #include "gvt.h"
 
-#if IS_ENABLED(CONFIG_VFIO_MDEV)
-#include <linux/mdev.h>
-#else
-static inline long vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
+static inline long kvmgt_pin_pages(struct device *dev, unsigned long *user_pfn,
 			long npage, int prot, unsigned long *phys_pfn)
 {
 	return 0;
 }
-static inline long vfio_unpin_pages(struct device *dev, unsigned long *pfn,
+static inline long kvmgt_unpin_pages(struct device *dev, unsigned long *pfn,
 			long npage)
 {
 	return 0;
 }
-#endif
 
 static const struct intel_gvt_ops *intel_gvt_ops;
 
@@ -183,7 +179,7 @@ static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
 	}
 
 	pfn = this->pfn;
-	WARN_ON((vfio_unpin_pages(dev, &pfn, 1) != 1));
+	WARN_ON((kvmgt_unpin_pages(dev, &pfn, 1) != 1));
 	__gvt_cache_remove_entry(vgpu, this);
 	mutex_unlock(&vgpu->vdev.cache_lock);
 }
@@ -206,7 +202,7 @@ static void gvt_cache_destroy(struct intel_vgpu *vgpu)
 		dma = rb_entry(node, struct gvt_dma, node);
 		pfn = dma->pfn;
 
-		vfio_unpin_pages(dev, &pfn, 1);
+		kvmgt_unpin_pages(dev, &pfn, 1);
 		__gvt_cache_remove_entry(vgpu, dma);
 	}
 	mutex_unlock(&vgpu->vdev.cache_lock);
@@ -512,8 +508,8 @@ static unsigned long kvmgt_gfn_to_pfn(unsigned long handle, unsigned long gfn)
 	if (pfn != 0)
 		return pfn;
 
-	rc = vfio_pin_pages(info->vgpu->vdev.mdev, &gfn, 1,
-				IOMMU_READ | IOMMU_WRITE, &pfn);
+	rc = kvmgt_pin_pages(info->vgpu->vdev.mdev, &gfn, 1,
+			     IOMMU_READ | IOMMU_WRITE, &pfn);
 	if (rc != 1) {
 		gvt_err("vfio_pin_pages failed for gfn: 0x%lx\n", gfn);
 		return 0;
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index 7d87c43661c58627012bbc6d870b70b9a9854678..f898df38dd9a79f8b6dbc208ed803dba76b78635 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -160,8 +160,6 @@ static int shadow_context_status_change(struct notifier_block *nb,
 
 static int dispatch_workload(struct intel_vgpu_workload *workload)
 {
-	struct intel_vgpu *vgpu = workload->vgpu;
-	struct intel_gvt *gvt = vgpu->gvt;
 	int ring_id = workload->ring_id;
 	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
 	struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
@@ -174,6 +172,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	shadow_ctx->desc_template = workload->ctx_desc.addressing_mode <<
 				    GEN8_CTX_ADDRESSING_MODE_SHIFT;
 
+	mutex_lock(&dev_priv->drm.struct_mutex);
+
 	rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
 	if (IS_ERR(rq)) {
 		gvt_err("fail to allocate gem request\n");
@@ -185,40 +185,35 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 
 	workload->req = i915_gem_request_get(rq);
 
-	mutex_lock(&gvt->lock);
-
 	ret = intel_gvt_scan_and_shadow_workload(workload);
 	if (ret)
-		goto err;
+		goto out;
 
 	ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
 	if (ret)
-		goto err;
+		goto out;
 
 	ret = populate_shadow_context(workload);
 	if (ret)
-		goto err;
+		goto out;
 
 	if (workload->prepare) {
 		ret = workload->prepare(workload);
 		if (ret)
-			goto err;
+			goto out;
 	}
 
-	mutex_unlock(&gvt->lock);
-
 	gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
 			ring_id, workload->req);
 
-	i915_add_request_no_flush(rq);
+	ret = 0;
 	workload->dispatched = true;
-	return 0;
-err:
-	workload->status = ret;
-
-	mutex_unlock(&gvt->lock);
+out:
+	if (ret)
+		workload->status = ret;
 
 	i915_add_request_no_flush(rq);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return ret;
 }
 
@@ -438,9 +433,9 @@ static int workload_thread(void *priv)
 			intel_uncore_forcewake_get(gvt->dev_priv,
 					FORCEWAKE_ALL);
 
-		mutex_lock(&gvt->dev_priv->drm.struct_mutex);
+		mutex_lock(&gvt->lock);
 		ret = dispatch_workload(workload);
-		mutex_unlock(&gvt->dev_priv->drm.struct_mutex);
+		mutex_unlock(&gvt->lock);
 
 		if (ret) {
 			gvt_err("fail to dispatch workload, skip\n");
@@ -463,9 +458,7 @@ static int workload_thread(void *priv)
 		gvt_dbg_sched("will complete workload %p\n, status: %d\n",
 				workload, workload->status);
 
-		mutex_lock(&gvt->dev_priv->drm.struct_mutex);
 		complete_current_workload(gvt, ring_id);
-		mutex_unlock(&gvt->dev_priv->drm.struct_mutex);
 
 		i915_gem_request_put(fetch_and_zero(&workload->req));
 
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 3a15feadc1df67959680c2589715f573cfacd2ee..4f64845d8a4c1fd9af8133157dd624f1268258fe 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -46,9 +46,13 @@ int setup_vgpu_mmio(struct intel_vgpu *vgpu)
 	struct intel_gvt *gvt = vgpu->gvt;
 	const struct intel_gvt_device_info *info = &gvt->device_info;
 
-	vgpu->mmio.vreg = vzalloc(info->mmio_size * 2);
-	if (!vgpu->mmio.vreg)
-		return -ENOMEM;
+	if (vgpu->mmio.vreg)
+		memset(vgpu->mmio.vreg, 0, info->mmio_size * 2);
+	else {
+		vgpu->mmio.vreg = vzalloc(info->mmio_size * 2);
+		if (!vgpu->mmio.vreg)
+			return -ENOMEM;
+	}
 
 	vgpu->mmio.sreg = vgpu->mmio.vreg + info->mmio_size;
 
@@ -95,6 +99,7 @@ static void setup_vgpu_cfg_space(struct intel_vgpu *vgpu,
 	 */
 	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4);
 	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4);
+	memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
 
 	for (i = 0; i < INTEL_GVT_MAX_BAR_NUM; i++) {
 		vgpu->cfg_space.bar[i].size = pci_resource_len(