diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 39aa318ad77917923ac24e09ec86a7d2925d2af8..69cc3bc20495f1f4ae8798a84b7bd4dac504cae7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1573,10 +1573,19 @@ gen6_ring_get_seqno(struct intel_engine_cs *engine, bool lazy_coherency)
 {
 	/* Workaround to force correct ordering between irq and seqno writes on
 	 * ivb (and maybe also on snb) by reading from a CS register (like
-	 * ACTHD) before reading the status page. */
+	 * ACTHD) before reading the status page.
+	 *
+	 * Note that this effectively stalls the read by the time it takes to
+	 * do a memory transaction, which more or less ensures that the write
+	 * from the GPU has sufficient time to invalidate the CPU cacheline.
+	 * Alternatively we could delay the interrupt from the CS ring to give
+	 * the write time to land, but that would incur a delay after every
+	 * batch i.e. much more frequent than a delay when waiting for the
+	 * interrupt (with the same net latency).
+	 */
 	if (!lazy_coherency) {
 		struct drm_i915_private *dev_priv = engine->dev->dev_private;
-		POSTING_READ(RING_ACTHD(engine->mmio_base));
+		POSTING_READ_FW(RING_ACTHD(engine->mmio_base));
 	}
 
 	return intel_read_status_page(engine, I915_GEM_HWS_INDEX);