提交 d0a57789 编写于 作者: C Chris Wilson 提交者: Daniel Vetter

drm/i915: Only insert the mb() before updating the fence parameter

With a fence, we only need to insert a memory barrier around the actual
fence alteration for CPU accesses through the GTT. Performing the
barrier in flush-fence was inserting unnecessary and expensive barriers
for never fenced objects.

Note removing the barriers from flush-fence, which was effectively a
barrier before every direct access through the GTT, revealed that we
where missing a barrier before the first access through the GTT. Lack of
that barrier was sufficient to cause GPU hangs.

v2: Add a couple more comments to explain the new barriers
Signed-off-by: NChris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: NDaniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: NJesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: NDaniel Vetter <daniel.vetter@ffwll.ch>
上级 1f83fee0
...@@ -2611,9 +2611,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg, ...@@ -2611,9 +2611,22 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
POSTING_READ(FENCE_REG_830_0 + reg * 4); POSTING_READ(FENCE_REG_830_0 + reg * 4);
} }
inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj)
{
return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT;
}
static void i915_gem_write_fence(struct drm_device *dev, int reg, static void i915_gem_write_fence(struct drm_device *dev, int reg,
struct drm_i915_gem_object *obj) struct drm_i915_gem_object *obj)
{ {
struct drm_i915_private *dev_priv = dev->dev_private;
/* Ensure that all CPU reads are completed before installing a fence
* and all writes before removing the fence.
*/
if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj))
mb();
switch (INTEL_INFO(dev)->gen) { switch (INTEL_INFO(dev)->gen) {
case 7: case 7:
case 6: case 6:
...@@ -2623,6 +2636,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, ...@@ -2623,6 +2636,12 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg,
case 2: i830_write_fence_reg(dev, reg, obj); break; case 2: i830_write_fence_reg(dev, reg, obj); break;
default: BUG(); default: BUG();
} }
/* And similarly be paranoid that no direct access to this region
* is reordered to before the fence is installed.
*/
if (i915_gem_object_needs_mb(obj))
mb();
} }
static inline int fence_number(struct drm_i915_private *dev_priv, static inline int fence_number(struct drm_i915_private *dev_priv,
...@@ -2652,7 +2671,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, ...@@ -2652,7 +2671,7 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
} }
static int static int
i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
{ {
if (obj->last_fenced_seqno) { if (obj->last_fenced_seqno) {
int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno);
...@@ -2662,12 +2681,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) ...@@ -2662,12 +2681,6 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj)
obj->last_fenced_seqno = 0; obj->last_fenced_seqno = 0;
} }
/* Ensure that all CPU reads are completed before installing a fence
* and all writes before removing the fence.
*/
if (obj->base.read_domains & I915_GEM_DOMAIN_GTT)
mb();
obj->fenced_gpu_access = false; obj->fenced_gpu_access = false;
return 0; return 0;
} }
...@@ -2678,7 +2691,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj) ...@@ -2678,7 +2691,7 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj)
struct drm_i915_private *dev_priv = obj->base.dev->dev_private; struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
int ret; int ret;
ret = i915_gem_object_flush_fence(obj); ret = i915_gem_object_wait_fence(obj);
if (ret) if (ret)
return ret; return ret;
...@@ -2752,7 +2765,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) ...@@ -2752,7 +2765,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
* will need to serialise the write to the associated fence register? * will need to serialise the write to the associated fence register?
*/ */
if (obj->fence_dirty) { if (obj->fence_dirty) {
ret = i915_gem_object_flush_fence(obj); ret = i915_gem_object_wait_fence(obj);
if (ret) if (ret)
return ret; return ret;
} }
...@@ -2773,7 +2786,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) ...@@ -2773,7 +2786,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
if (reg->obj) { if (reg->obj) {
struct drm_i915_gem_object *old = reg->obj; struct drm_i915_gem_object *old = reg->obj;
ret = i915_gem_object_flush_fence(old); ret = i915_gem_object_wait_fence(old);
if (ret) if (ret)
return ret; return ret;
...@@ -3068,6 +3081,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) ...@@ -3068,6 +3081,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
i915_gem_object_flush_cpu_write_domain(obj); i915_gem_object_flush_cpu_write_domain(obj);
/* Serialise direct access to this object with the barriers for
* coherent writes from the GPU, by effectively invalidating the
* GTT domain upon first access.
*/
if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
mb();
old_write_domain = obj->base.write_domain; old_write_domain = obj->base.write_domain;
old_read_domains = obj->base.read_domains; old_read_domains = obj->base.read_domains;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册