diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 38390f7c6ab679ef3ed3ce0c4f9e28b7fa1e11b5..4dfbb80f0fd50263c9b0a0e21cce67faddfaecde 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -1686,7 +1686,7 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev, } EXPORT_SYMBOL(intel_gmch_probe); -const struct intel_gtt *intel_gtt_get(void) +struct intel_gtt *intel_gtt_get(void) { return &intel_private.base; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index ff06e3239ada6f6d99d7877fa223f6f74bbeac52..1eea5be43617df03fc7ab97e9a6c942db45937a8 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1496,19 +1496,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) goto free_priv; } - ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL); - if (!ret) { - DRM_ERROR("failed to set up gmch\n"); - ret = -EIO; + ret = i915_gem_gtt_init(dev); + if (ret) goto put_bridge; - } - - dev_priv->mm.gtt = intel_gtt_get(); - if (!dev_priv->mm.gtt) { - DRM_ERROR("Failed to initialize GTT\n"); - ret = -ENODEV; - goto put_gmch; - } i915_kick_out_firmware_fb(dev_priv); @@ -1683,7 +1673,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) out_rmmap: pci_iounmap(dev->pdev, dev_priv->regs); put_gmch: - intel_gmch_remove(); + i915_gem_gtt_fini(dev); put_bridge: pci_dev_put(dev_priv->bridge_dev); free_priv: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c4339c2b1b57e8bffa9e4b95d42e88e23e6c89a9..f316916fe65e43a4ce6cfb9e845461a8774940f7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -746,7 +746,7 @@ typedef struct drm_i915_private { struct { /** Bridge to intel-gtt-ko */ - const struct intel_gtt *gtt; + struct intel_gtt *gtt; /** Memory allocator for GTT stolen memory */ struct drm_mm stolen; /** Memory allocator for GTT */ @@ -1538,6 +1538,14 @@ void i915_gem_init_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); +int i915_gem_gtt_init(struct drm_device *dev); +void i915_gem_gtt_fini(struct drm_device *dev); +extern inline void i915_gem_chipset_flush(struct drm_device *dev) +{ + if (INTEL_INFO(dev)->gen < 6) + intel_gtt_chipset_flush(); +} + /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct drm_device *dev, int min_size, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d8eaebfea93e4128b4354d488900eb5fa4e960d4..c161fdbd830fb406f68fb80959128f33bfed6c8d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -845,12 +845,12 @@ i915_gem_shmem_pwrite(struct drm_device *dev, * domain anymore. */ if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { i915_gem_clflush_object(obj); - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); } } if (needs_clflush_after) - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); return ret; } @@ -3058,7 +3058,7 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) return; i915_gem_clflush_object(obj); - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(obj->base.dev); old_write_domain = obj->base.write_domain; obj->base.write_domain = 0; @@ -3959,7 +3959,7 @@ i915_gem_init_hw(struct drm_device *dev) drm_i915_private_t *dev_priv = dev->dev_private; int ret; - if (!intel_enable_gtt()) + if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; if (IS_HASWELL(dev) && (I915_READ(0x120010) == 1)) @@ -4294,7 +4294,7 @@ void i915_gem_detach_phys_object(struct drm_device *dev, page_cache_release(page); } } - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); obj->phys_obj->cur_obj = NULL; obj->phys_obj = NULL; @@ -4381,7 +4381,7 @@ i915_gem_phys_pwrite(struct drm_device *dev, return -EFAULT; } - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 91d43d5c4526ac05a431e070570d729f800b4993..d80e9dd00c48ffe90094b8b668ac37ca55ce0aed 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -672,7 +672,7 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, } if (flush_domains & I915_GEM_DOMAIN_CPU) - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(ring->dev); if (flush_domains & I915_GEM_DOMAIN_GTT) wmb(); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 06202fd6dbdd0e6cd5bfd523ad1e73989d3aad5a..e74be0c2c6a5145764282b27a6c3c81568ce8675 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -262,26 +262,6 @@ void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt, obj->base.size >> PAGE_SHIFT); } -/* XXX kill agp_type! */ -static unsigned int cache_level_to_agp_type(struct drm_device *dev, - enum i915_cache_level cache_level) -{ - switch (cache_level) { - case I915_CACHE_LLC_MLC: - /* Older chipsets do not have this extra level of CPU - * cacheing, so fallthrough and request the PTE simply - * as cached. - */ - if (INTEL_INFO(dev)->gen >= 6 && !IS_HASWELL(dev)) - return AGP_USER_CACHED_MEMORY_LLC_MLC; - case I915_CACHE_LLC: - return AGP_USER_CACHED_MEMORY; - default: - case I915_CACHE_NONE: - return AGP_USER_MEMORY; - } -} - static bool do_idling(struct drm_i915_private *dev_priv) { bool ret = dev_priv->mm.interruptible; @@ -304,13 +284,38 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) dev_priv->mm.interruptible = interruptible; } + +static void i915_ggtt_clear_range(struct drm_device *dev, + unsigned first_entry, + unsigned num_entries) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + gtt_pte_t scratch_pte; + volatile void __iomem *gtt_base = dev_priv->mm.gtt->gtt + first_entry; + const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; + + if (INTEL_INFO(dev)->gen < 6) { + intel_gtt_clear_range(first_entry, num_entries); + return; + } + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC); + memset_io(gtt_base, scratch_pte, num_entries * sizeof(scratch_pte)); + readl(gtt_base); +} + void i915_gem_restore_gtt_mappings(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; /* First fill our portion of the GTT with scratch pages */ - intel_gtt_clear_range(dev_priv->mm.gtt_start / PAGE_SIZE, + i915_ggtt_clear_range(dev, dev_priv->mm.gtt_start / PAGE_SIZE, (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE); list_for_each_entry(obj, &dev_priv->mm.bound_list, gtt_list) { @@ -318,7 +323,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) i915_gem_gtt_bind_object(obj, obj->cache_level); } - intel_gtt_chipset_flush(); + i915_gem_chipset_flush(dev); } int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) @@ -334,21 +339,69 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) return 0; } +/* + * Binds an object into the global gtt with the specified cache level. The object + * will be accessible to the GPU via commands whose operands reference offsets + * within the global GTT as well as accessible by the GPU through the GMADR + * mapped BAR (dev_priv->mm.gtt->gtt). + */ +static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level level) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct sg_table *st = obj->pages; + struct scatterlist *sg = st->sgl; + const int first_entry = obj->gtt_space->start >> PAGE_SHIFT; + const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; + gtt_pte_t __iomem *gtt_entries = dev_priv->mm.gtt->gtt + first_entry; + int unused, i = 0; + unsigned int len, m = 0; + dma_addr_t addr; + + for_each_sg(st->sgl, sg, st->nents, unused) { + len = sg_dma_len(sg) >> PAGE_SHIFT; + for (m = 0; m < len; m++) { + addr = sg_dma_address(sg) + (m << PAGE_SHIFT); + gtt_entries[i] = pte_encode(dev, addr, level); + i++; + } + } + + BUG_ON(i > max_entries); + BUG_ON(i != obj->base.size / PAGE_SIZE); + + /* XXX: This serves as a posting read to make sure that the PTE has + * actually been updated. There is some concern that even though + * registers and PTEs are within the same BAR that they are potentially + * of NUMA access patterns. Therefore, even with the way we assume + * hardware should work, we must keep this posting read for paranoia. + */ + if (i != 0) + WARN_ON(readl(>t_entries[i-1]) != pte_encode(dev, addr, level)); +} + void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; - unsigned int agp_type = cache_level_to_agp_type(dev, cache_level); + if (INTEL_INFO(dev)->gen < 6) { + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + intel_gtt_insert_sg_entries(obj->pages, + obj->gtt_space->start >> PAGE_SHIFT, + flags); + } else { + gen6_ggtt_bind_object(obj, cache_level); + } - intel_gtt_insert_sg_entries(obj->pages, - obj->gtt_space->start >> PAGE_SHIFT, - agp_type); obj->has_global_gtt_mapping = 1; } void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) { - intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT, + i915_ggtt_clear_range(obj->base.dev, + obj->gtt_space->start >> PAGE_SHIFT, obj->base.size >> PAGE_SHIFT); obj->has_global_gtt_mapping = 0; @@ -406,5 +459,153 @@ void i915_gem_init_global_gtt(struct drm_device *dev, dev_priv->mm.mappable_gtt_total = min(end, mappable_end) - start; /* ... but ensure that we clear the entire range. */ - intel_gtt_clear_range(start / PAGE_SIZE, (end-start) / PAGE_SIZE); + i915_ggtt_clear_range(dev, start / PAGE_SIZE, (end-start) / PAGE_SIZE); +} + +static int setup_scratch_page(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct page *page; + dma_addr_t dma_addr; + + page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); + if (page == NULL) + return -ENOMEM; + get_page(page); + set_pages_uc(page, 1); + +#ifdef CONFIG_INTEL_IOMMU + dma_addr = pci_map_page(dev->pdev, page, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(dev->pdev, dma_addr)) + return -EINVAL; +#else + dma_addr = page_to_phys(page); +#endif + dev_priv->mm.gtt->scratch_page = page; + dev_priv->mm.gtt->scratch_page_dma = dma_addr; + + return 0; +} + +static void teardown_scratch_page(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + set_pages_wb(dev_priv->mm.gtt->scratch_page, 1); + pci_unmap_page(dev->pdev, dev_priv->mm.gtt->scratch_page_dma, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + put_page(dev_priv->mm.gtt->scratch_page); + __free_page(dev_priv->mm.gtt->scratch_page); +} + +static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; + return snb_gmch_ctl << 20; +} + +static inline unsigned int gen6_get_stolen_size(u16 snb_gmch_ctl) +{ + snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; + snb_gmch_ctl &= SNB_GMCH_GMS_MASK; + return snb_gmch_ctl << 25; /* 32 MB units */ +} + +int i915_gem_gtt_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + phys_addr_t gtt_bus_addr; + u16 snb_gmch_ctl; + u32 tmp; + int ret; + + /* On modern platforms we need not worry ourself with the legacy + * hostbridge query stuff. Skip it entirely + */ + if (INTEL_INFO(dev)->gen < 6) { + ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL); + if (!ret) { + DRM_ERROR("failed to set up gmch\n"); + return -EIO; + } + + dev_priv->mm.gtt = intel_gtt_get(); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + intel_gmch_remove(); + return -ENODEV; + } + return 0; + } + + dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL); + if (!dev_priv->mm.gtt) + return -ENOMEM; + + if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) + pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); + + pci_read_config_dword(dev->pdev, PCI_BASE_ADDRESS_0, &tmp); + /* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */ + gtt_bus_addr = (tmp & PCI_BASE_ADDRESS_MEM_MASK) + (2<<20); + + pci_read_config_dword(dev->pdev, PCI_BASE_ADDRESS_2, &tmp); + dev_priv->mm.gtt->gma_bus_addr = tmp & PCI_BASE_ADDRESS_MEM_MASK; + + /* i9xx_setup */ + pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + dev_priv->mm.gtt->gtt_total_entries = + gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t); + dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); + + dev_priv->mm.gtt->gtt_mappable_entries = pci_resource_len(dev->pdev, 2) >> PAGE_SHIFT; + /* 64/512MB is the current min/max we actually know of, but this is just a + * coarse sanity check. + */ + if ((dev_priv->mm.gtt->gtt_mappable_entries >> 8) < 64 || + dev_priv->mm.gtt->gtt_mappable_entries > dev_priv->mm.gtt->gtt_total_entries) { + DRM_ERROR("Unknown GMADR entries (%d)\n", + dev_priv->mm.gtt->gtt_mappable_entries); + ret = -ENXIO; + goto err_out; + } + + ret = setup_scratch_page(dev); + if (ret) { + DRM_ERROR("Scratch setup failed\n"); + goto err_out; + } + + dev_priv->mm.gtt->gtt = ioremap(gtt_bus_addr, + dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t)); + if (!dev_priv->mm.gtt->gtt) { + DRM_ERROR("Failed to map the gtt page table\n"); + teardown_scratch_page(dev); + ret = -ENOMEM; + goto err_out; + } + + /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */ + DRM_INFO("Memory Usable by graphics device = %dK\n", dev_priv->mm.gtt->gtt_total_entries >> 10); + DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8); + DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20); + + return 0; + +err_out: + kfree(dev_priv->mm.gtt); + if (INTEL_INFO(dev)->gen < 6) + intel_gmch_remove(); + return ret; +} + +void i915_gem_gtt_fini(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + iounmap(dev_priv->mm.gtt->gtt); + teardown_scratch_page(dev); + if (INTEL_INFO(dev)->gen < 6) + intel_gmch_remove(); + kfree(dev_priv->mm.gtt); } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0866ac3d0a3f3e272dd98759f3a0d82f7a5d2d38..449403f60e4fdccac161f29b9937ab55f8834e75 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -41,6 +41,12 @@ */ #define INTEL_GMCH_CTRL 0x52 #define INTEL_GMCH_VGA_DISABLE (1 << 1) +#define SNB_GMCH_CTRL 0x50 +#define SNB_GMCH_GGMS_SHIFT 8 /* GTT Graphics Memory Size */ +#define SNB_GMCH_GGMS_MASK 0x3 +#define SNB_GMCH_GMS_SHIFT 3 /* Graphics Mode Select */ +#define SNB_GMCH_GMS_MASK 0x1f + /* PCI config space */ diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h index 2e37e9f02e7141429af5860144dde1f863342b86..94e8f2c7f9e12407181f3b195da3d38407e8823c 100644 --- a/include/drm/intel-gtt.h +++ b/include/drm/intel-gtt.h @@ -3,7 +3,7 @@ #ifndef _DRM_INTEL_GTT_H #define _DRM_INTEL_GTT_H -const struct intel_gtt { +struct intel_gtt { /* Size of memory reserved for graphics by the BIOS */ unsigned int stolen_size; /* Total number of gtt entries. */ @@ -17,6 +17,7 @@ const struct intel_gtt { unsigned int do_idle_maps : 1; /* Share the scratch page dma with ppgtts. */ dma_addr_t scratch_page_dma; + struct page *scratch_page; /* for ppgtt PDE access */ u32 __iomem *gtt; /* needed for ioremap in drm/i915 */