diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index dc10bc43864e30497122ac98e746a3896573c14e..9838d11e43ee622ec770edb2e3ec719da48fe5f9 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -251,7 +251,6 @@ static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { return (HAS_LLC(obj->base.dev) || obj->base.write_domain == I915_GEM_DOMAIN_CPU || - !obj->map_and_fenceable || obj->cache_level != I915_CACHE_NONE); } @@ -337,6 +336,51 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, return 0; } +static void +clflush_write32(void *addr, uint32_t value) +{ + /* This is not a fast path, so KISS. */ + drm_clflush_virt_range(addr, sizeof(uint32_t)); + *(uint32_t *)addr = value; + drm_clflush_virt_range(addr, sizeof(uint32_t)); +} + +static int +relocate_entry_clflush(struct drm_i915_gem_object *obj, + struct drm_i915_gem_relocation_entry *reloc, + uint64_t target_offset) +{ + struct drm_device *dev = obj->base.dev; + uint32_t page_offset = offset_in_page(reloc->offset); + uint64_t delta = (int)reloc->delta + target_offset; + char *vaddr; + int ret; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ret; + + vaddr = kmap_atomic(i915_gem_object_get_page(obj, + reloc->offset >> PAGE_SHIFT)); + clflush_write32(vaddr + page_offset, lower_32_bits(delta)); + + if (INTEL_INFO(dev)->gen >= 8) { + page_offset = offset_in_page(page_offset + sizeof(uint32_t)); + + if (page_offset == 0) { + kunmap_atomic(vaddr); + vaddr = kmap_atomic(i915_gem_object_get_page(obj, + (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); + } + + clflush_write32(vaddr + page_offset, upper_32_bits(delta)); + } + + kunmap_atomic(vaddr); + + return 0; +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, @@ -426,8 +470,14 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (use_cpu_reloc(obj)) ret = relocate_entry_cpu(obj, reloc, target_offset); - else + else if (obj->map_and_fenceable) ret = relocate_entry_gtt(obj, reloc, target_offset); + else if (cpu_has_clflush) + ret = relocate_entry_clflush(obj, reloc, target_offset); + else { + WARN_ONCE(1, "Impossible case in relocation handling\n"); + ret = -ENODEV; + } if (ret) return ret; @@ -525,6 +575,12 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) return ret; } +static bool only_mappable_for_reloc(unsigned int flags) +{ + return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == + __EXEC_OBJECT_NEEDS_MAP; +} + static int i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct intel_engine_cs *ring, @@ -536,14 +592,21 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, int ret; flags = 0; - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) - flags |= PIN_GLOBAL | PIN_MAPPABLE; - if (entry->flags & EXEC_OBJECT_NEEDS_GTT) - flags |= PIN_GLOBAL; - if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) - flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + if (!drm_mm_node_allocated(&vma->node)) { + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) + flags |= PIN_GLOBAL | PIN_MAPPABLE; + if (entry->flags & EXEC_OBJECT_NEEDS_GTT) + flags |= PIN_GLOBAL; + if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) + flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; + } ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); + if ((ret == -ENOSPC || ret == -E2BIG) && + only_mappable_for_reloc(entry->flags)) + ret = i915_gem_object_pin(obj, vma->vm, + entry->alignment, + flags & ~(PIN_GLOBAL | PIN_MAPPABLE)); if (ret) return ret; @@ -605,13 +668,14 @@ eb_vma_misplaced(struct i915_vma *vma) vma->node.start & (entry->alignment - 1)) return true; - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) - return true; - if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && vma->node.start < BATCH_OFFSET_BIAS) return true; + /* avoid costly ping-pong once a batch bo ended up non-mappable */ + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) + return !only_mappable_for_reloc(entry->flags); + return false; }