diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 42a4b85b0eae3dd8d61d22dc65db6e9ae5a5c7ca..55cdb4d30a16fe25cea64bf6f10f6994a20e5a81 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -262,6 +262,25 @@ gen6_render_ring_flush(struct intel_ring_buffer *ring, return 0; } +static int +gen7_render_ring_cs_stall_wa(struct intel_ring_buffer *ring) +{ + int ret; + + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); + intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_STALL_AT_SCOREBOARD); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + + return 0; +} + static int gen7_render_ring_flush(struct intel_ring_buffer *ring, u32 invalidate_domains, u32 flush_domains) @@ -271,6 +290,16 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, u32 scratch_addr = pc->gtt_offset + 128; int ret; + /* + * Ensure that any following seqno writes only happen when the render + * cache is indeed flushed. + * + * Workaround: 4th PIPE_CONTROL command (except the ones with only + * read-cache invalidate bits set) must have the CS_STALL bit set. We + * don't try to be clever and just set it unconditionally. + */ + flags |= PIPE_CONTROL_CS_STALL; + /* Just flush everything. Experiments have shown that reducing the * number of bits based on the write domains has little performance * impact. @@ -278,11 +307,6 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, if (flush_domains) { flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* - * Ensure that any following seqno writes only happen - * when the render cache is indeed flushed. - */ - flags |= PIPE_CONTROL_CS_STALL; } if (invalidate_domains) { flags |= PIPE_CONTROL_TLB_INVALIDATE; @@ -295,6 +319,11 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, * TLB invalidate requires a post-sync write. */ flags |= PIPE_CONTROL_QW_WRITE; + + /* Workaround: we must issue a pipe_control with CS-stall bit + * set before a pipe_control command that has the state cache + * invalidate bit set. */ + gen7_render_ring_cs_stall_wa(ring); } ret = intel_ring_begin(ring, 4);