diff --git a/drivers/gpu/drm/gma500/gtt.c b/drivers/gpu/drm/gma500/gtt.c new file mode 100644 index 0000000000000000000000000000000000000000..461ead251bbdc4c2f112afdbbfa8c5560a1ed98d --- /dev/null +++ b/drivers/gpu/drm/gma500/gtt.c @@ -0,0 +1,500 @@ +/* + * Copyright (c) 2007, Intel Corporation. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Authors: Thomas Hellstrom + * Alan Cox + */ + +#include +#include "psb_drv.h" + + +/* + * GTT resource allocator - manage page mappings in GTT space + */ + +/** + * psb_gtt_mask_pte - generate GTT pte entry + * @pfn: page number to encode + * @type: type of memory in the GTT + * + * Set the GTT entry for the appropriate memory type. + */ +static inline uint32_t psb_gtt_mask_pte(uint32_t pfn, int type) +{ + uint32_t mask = PSB_PTE_VALID; + + if (type & PSB_MMU_CACHED_MEMORY) + mask |= PSB_PTE_CACHED; + if (type & PSB_MMU_RO_MEMORY) + mask |= PSB_PTE_RO; + if (type & PSB_MMU_WO_MEMORY) + mask |= PSB_PTE_WO; + + return (pfn << PAGE_SHIFT) | mask; +} + +/** + * psb_gtt_entry - find the GTT entries for a gtt_range + * @dev: our DRM device + * @r: our GTT range + * + * Given a gtt_range object return the GTT offset of the page table + * entries for this gtt_range + */ +u32 *psb_gtt_entry(struct drm_device *dev, struct gtt_range *r) +{ + struct drm_psb_private *dev_priv = dev->dev_private; + unsigned long offset; + + offset = r->resource.start - dev_priv->gtt_mem->start; + + return dev_priv->gtt_map + (offset >> PAGE_SHIFT); +} + +/** + * psb_gtt_insert - put an object into the GTT + * @dev: our DRM device + * @r: our GTT range + * + * Take our preallocated GTT range and insert the GEM object into + * the GTT. + * + * FIXME: gtt lock ? + */ +static int psb_gtt_insert(struct drm_device *dev, struct gtt_range *r) +{ + u32 *gtt_slot, pte; + struct page **pages; + int i; + + if (r->pages == NULL) { + WARN_ON(1); + return -EINVAL; + } + + WARN_ON(r->stolen); /* refcount these maybe ? */ + + gtt_slot = psb_gtt_entry(dev, r); + pages = r->pages; + + /* Make sure changes are visible to the GPU */ + set_pages_array_uc(pages, r->npage); + + /* Write our page entries into the GTT itself */ + for (i = 0; i < r->npage; i++) { + pte = psb_gtt_mask_pte(page_to_pfn(*pages++), 0/*type*/); + iowrite32(pte, gtt_slot++); + } + /* Make sure all the entries are set before we return */ + ioread32(gtt_slot - 1); + return 0; +} + +/** + * psb_gtt_remove - remove an object from the GTT + * @dev: our DRM device + * @r: our GTT range + * + * Remove a preallocated GTT range from the GTT. Overwrite all the + * page table entries with the dummy page + */ + +static void psb_gtt_remove(struct drm_device *dev, struct gtt_range *r) +{ + struct drm_psb_private *dev_priv = dev->dev_private; + u32 *gtt_slot, pte; + int i; + + WARN_ON(r->stolen); + + gtt_slot = psb_gtt_entry(dev, r); + pte = psb_gtt_mask_pte(page_to_pfn(dev_priv->scratch_page), 0); + + for (i = 0; i < r->npage; i++) + iowrite32(pte, gtt_slot++); + ioread32(gtt_slot - 1); + set_pages_array_wb(r->pages, r->npage); +} + +/** + * psb_gtt_attach_pages - attach and pin GEM pages + * @gt: the gtt range + * + * Pin and build an in kernel list of the pages that back our GEM object. + * While we hold this the pages cannot be swapped out + */ +static int psb_gtt_attach_pages(struct gtt_range *gt) +{ + struct inode *inode; + struct address_space *mapping; + int i; + struct page *p; + int pages = gt->gem.size / PAGE_SIZE; + + WARN_ON(gt->pages); + + /* This is the shared memory object that backs the GEM resource */ + inode = gt->gem.filp->f_path.dentry->d_inode; + mapping = inode->i_mapping; + + gt->pages = kmalloc(pages * sizeof(struct page *), GFP_KERNEL); + if (gt->pages == NULL) + return -ENOMEM; + gt->npage = pages; + + for (i = 0; i < pages; i++) { + /* FIXME: review flags later */ + p = read_cache_page_gfp(mapping, i, + __GFP_COLD | GFP_KERNEL); + if (IS_ERR(p)) + goto err; + gt->pages[i] = p; + } + return 0; + +err: + while (i--) + page_cache_release(gt->pages[i]); + kfree(gt->pages); + gt->pages = NULL; + return PTR_ERR(p); +} + +/** + * psb_gtt_detach_pages - attach and pin GEM pages + * @gt: the gtt range + * + * Undo the effect of psb_gtt_attach_pages. At this point the pages + * must have been removed from the GTT as they could now be paged out + * and move bus address. + */ +static void psb_gtt_detach_pages(struct gtt_range *gt) +{ + int i; + for (i = 0; i < gt->npage; i++) { + /* FIXME: do we need to force dirty */ + set_page_dirty(gt->pages[i]); + page_cache_release(gt->pages[i]); + } + kfree(gt->pages); + gt->pages = NULL; +} + +/** + * psb_gtt_pin - pin pages into the GTT + * @gt: range to pin + * + * Pin a set of pages into the GTT. The pins are refcounted so that + * multiple pins need multiple unpins to undo. + * + * Non GEM backed objects treat this as a no-op as they are always GTT + * backed objects. + */ +int psb_gtt_pin(struct gtt_range *gt) +{ + int ret = 0; + struct drm_device *dev = gt->gem.dev; + struct drm_psb_private *dev_priv = dev->dev_private; + + mutex_lock(&dev_priv->gtt_mutex); + + if (gt->in_gart == 0 && gt->stolen == 0) { + ret = psb_gtt_attach_pages(gt); + if (ret < 0) + goto out; + ret = psb_gtt_insert(dev, gt); + if (ret < 0) { + psb_gtt_detach_pages(gt); + goto out; + } + } + gt->in_gart++; +out: + mutex_unlock(&dev_priv->gtt_mutex); + return ret; +} + +/** + * psb_gtt_unpin - Drop a GTT pin requirement + * @gt: range to pin + * + * Undoes the effect of psb_gtt_pin. On the last drop the GEM object + * will be removed from the GTT which will also drop the page references + * and allow the VM to clean up or page stuff. + * + * Non GEM backed objects treat this as a no-op as they are always GTT + * backed objects. + */ +void psb_gtt_unpin(struct gtt_range *gt) +{ + struct drm_device *dev = gt->gem.dev; + struct drm_psb_private *dev_priv = dev->dev_private; + + mutex_lock(&dev_priv->gtt_mutex); + + WARN_ON(!gt->in_gart); + + gt->in_gart--; + if (gt->in_gart == 0 && gt->stolen == 0) { + psb_gtt_remove(dev, gt); + psb_gtt_detach_pages(gt); + } + mutex_unlock(&dev_priv->gtt_mutex); +} + +/* + * GTT resource allocator - allocate and manage GTT address space + */ + +/** + * psb_gtt_alloc_range - allocate GTT address space + * @dev: Our DRM device + * @len: length (bytes) of address space required + * @name: resource name + * @backed: resource should be backed by stolen pages + * + * Ask the kernel core to find us a suitable range of addresses + * to use for a GTT mapping. + * + * Returns a gtt_range structure describing the object, or NULL on + * error. On successful return the resource is both allocated and marked + * as in use. + */ +struct gtt_range *psb_gtt_alloc_range(struct drm_device *dev, int len, + const char *name, int backed) +{ + struct drm_psb_private *dev_priv = dev->dev_private; + struct gtt_range *gt; + struct resource *r = dev_priv->gtt_mem; + int ret; + unsigned long start, end; + + if (backed) { + /* The start of the GTT is the stolen pages */ + start = r->start; + end = r->start + dev_priv->gtt.stolen_size - 1; + } else { + /* The rest we will use for GEM backed objects */ + start = r->start + dev_priv->gtt.stolen_size; + end = r->end; + } + + gt = kzalloc(sizeof(struct gtt_range), GFP_KERNEL); + if (gt == NULL) + return NULL; + gt->resource.name = name; + gt->stolen = backed; + gt->in_gart = backed; + /* Ensure this is set for non GEM objects */ + gt->gem.dev = dev; + ret = allocate_resource(dev_priv->gtt_mem, >->resource, + len, start, end, PAGE_SIZE, NULL, NULL); + if (ret == 0) { + gt->offset = gt->resource.start - r->start; + return gt; + } + kfree(gt); + return NULL; +} + +/** + * psb_gtt_free_range - release GTT address space + * @dev: our DRM device + * @gt: a mapping created with psb_gtt_alloc_range + * + * Release a resource that was allocated with psb_gtt_alloc_range. If the + * object has been pinned by mmap users we clean this up here currently. + */ +void psb_gtt_free_range(struct drm_device *dev, struct gtt_range *gt) +{ + /* Undo the mmap pin if we are destroying the object */ + if (gt->mmapping) { + psb_gtt_unpin(gt); + gt->mmapping = 0; + } + WARN_ON(gt->in_gart && !gt->stolen); + release_resource(>->resource); + kfree(gt); +} + +void psb_gtt_alloc(struct drm_device *dev) +{ + struct drm_psb_private *dev_priv = dev->dev_private; + init_rwsem(&dev_priv->gtt.sem); +} + +void psb_gtt_takedown(struct drm_device *dev) +{ + struct drm_psb_private *dev_priv = dev->dev_private; + + if (dev_priv->gtt_map) { + iounmap(dev_priv->gtt_map); + dev_priv->gtt_map = NULL; + } + if (dev_priv->gtt_initialized) { + pci_write_config_word(dev->pdev, PSB_GMCH_CTRL, + dev_priv->gmch_ctrl); + PSB_WVDC32(dev_priv->pge_ctl, PSB_PGETBL_CTL); + (void) PSB_RVDC32(PSB_PGETBL_CTL); + } + if (dev_priv->vram_addr) + iounmap(dev_priv->gtt_map); +} + +int psb_gtt_init(struct drm_device *dev, int resume) +{ + struct drm_psb_private *dev_priv = dev->dev_private; + unsigned gtt_pages; + unsigned long stolen_size, vram_stolen_size; + unsigned i, num_pages; + unsigned pfn_base; + uint32_t vram_pages; + uint32_t dvmt_mode = 0; + struct psb_gtt *pg; + + int ret = 0; + uint32_t pte; + + mutex_init(&dev_priv->gtt_mutex); + + psb_gtt_alloc(dev); + pg = &dev_priv->gtt; + + /* Enable the GTT */ + pci_read_config_word(dev->pdev, PSB_GMCH_CTRL, &dev_priv->gmch_ctrl); + pci_write_config_word(dev->pdev, PSB_GMCH_CTRL, + dev_priv->gmch_ctrl | _PSB_GMCH_ENABLED); + + dev_priv->pge_ctl = PSB_RVDC32(PSB_PGETBL_CTL); + PSB_WVDC32(dev_priv->pge_ctl | _PSB_PGETBL_ENABLED, PSB_PGETBL_CTL); + (void) PSB_RVDC32(PSB_PGETBL_CTL); + + /* The root resource we allocate address space from */ + dev_priv->gtt_initialized = 1; + + pg->gtt_phys_start = dev_priv->pge_ctl & PAGE_MASK; + + /* + * FIXME: video mmu has hw bug to access 0x0D0000000, + * then make gatt start at 0x0e000,0000 + */ + pg->mmu_gatt_start = 0xE0000000; + + pg->gtt_start = pci_resource_start(dev->pdev, PSB_GTT_RESOURCE); + gtt_pages = pci_resource_len(dev->pdev, PSB_GTT_RESOURCE) + >> PAGE_SHIFT; + /* CDV workaround */ + if (pg->gtt_start == 0 || gtt_pages == 0) { + dev_err(dev->dev, "GTT PCI BAR not initialized.\n"); + gtt_pages = 64; + pg->gtt_start = dev_priv->pge_ctl; + } + + pg->gatt_start = pci_resource_start(dev->pdev, PSB_GATT_RESOURCE); + pg->gatt_pages = pci_resource_len(dev->pdev, PSB_GATT_RESOURCE) + >> PAGE_SHIFT; + dev_priv->gtt_mem = &dev->pdev->resource[PSB_GATT_RESOURCE]; + + if (pg->gatt_pages == 0 || pg->gatt_start == 0) { + static struct resource fudge; /* Preferably peppermint */ + + /* This can occur on CDV SDV systems. Fudge it in this case. + We really don't care what imaginary space is being allocated + at this point */ + dev_err(dev->dev, "GATT PCI BAR not initialized.\n"); + pg->gatt_start = 0x40000000; + pg->gatt_pages = (128 * 1024 * 1024) >> PAGE_SHIFT; + fudge.start = 0x40000000; + fudge.end = 0x40000000 + 128 * 1024 * 1024 - 1; + fudge.name = "fudge"; + fudge.flags = IORESOURCE_MEM; + dev_priv->gtt_mem = &fudge; + } + + pci_read_config_dword(dev->pdev, PSB_BSM, &dev_priv->stolen_base); + vram_stolen_size = pg->gtt_phys_start - dev_priv->stolen_base + - PAGE_SIZE; + + stolen_size = vram_stolen_size; + + printk(KERN_INFO "Stolen memory information\n"); + printk(KERN_INFO " base in RAM: 0x%x\n", dev_priv->stolen_base); + printk(KERN_INFO " size: %luK, calculated by (GTT RAM base) - (Stolen base), seems wrong\n", + vram_stolen_size/1024); + dvmt_mode = (dev_priv->gmch_ctrl >> 4) & 0x7; + printk(KERN_INFO " the correct size should be: %dM(dvmt mode=%d)\n", + (dvmt_mode == 1) ? 1 : (2 << (dvmt_mode - 1)), dvmt_mode); + + if (resume && (gtt_pages != pg->gtt_pages) && + (stolen_size != pg->stolen_size)) { + dev_err(dev->dev, "GTT resume error.\n"); + ret = -EINVAL; + goto out_err; + } + + pg->gtt_pages = gtt_pages; + pg->stolen_size = stolen_size; + dev_priv->vram_stolen_size = vram_stolen_size; + + /* + * Map the GTT and the stolen memory area + */ + dev_priv->gtt_map = ioremap_nocache(pg->gtt_phys_start, + gtt_pages << PAGE_SHIFT); + if (!dev_priv->gtt_map) { + dev_err(dev->dev, "Failure to map gtt.\n"); + ret = -ENOMEM; + goto out_err; + } + + dev_priv->vram_addr = ioremap_wc(dev_priv->stolen_base, stolen_size); + if (!dev_priv->vram_addr) { + dev_err(dev->dev, "Failure to map stolen base.\n"); + ret = -ENOMEM; + goto out_err; + } + + /* + * Insert vram stolen pages into the GTT + */ + + pfn_base = dev_priv->stolen_base >> PAGE_SHIFT; + vram_pages = num_pages = vram_stolen_size >> PAGE_SHIFT; + printk(KERN_INFO"Set up %d stolen pages starting at 0x%08x, GTT offset %dK\n", + num_pages, pfn_base << PAGE_SHIFT, 0); + for (i = 0; i < num_pages; ++i) { + pte = psb_gtt_mask_pte(pfn_base + i, 0); + iowrite32(pte, dev_priv->gtt_map + i); + } + + /* + * Init rest of GTT to the scratch page to avoid accidents or scribbles + */ + + pfn_base = page_to_pfn(dev_priv->scratch_page); + pte = psb_gtt_mask_pte(pfn_base, 0); + for (; i < gtt_pages; ++i) + iowrite32(pte, dev_priv->gtt_map + i); + + (void) ioread32(dev_priv->gtt_map + i - 1); + return 0; + +out_err: + psb_gtt_takedown(dev); + return ret; +} diff --git a/drivers/gpu/drm/gma500/gtt.h b/drivers/gpu/drm/gma500/gtt.h new file mode 100644 index 0000000000000000000000000000000000000000..e0e1cb6f9bd654827fe12115b72dd588b4de18e2 --- /dev/null +++ b/drivers/gpu/drm/gma500/gtt.h @@ -0,0 +1,61 @@ +/************************************************************************** + * Copyright (c) 2007-2008, Intel Corporation. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + **************************************************************************/ + +#ifndef _PSB_GTT_H_ +#define _PSB_GTT_H_ + +#include + +/* This wants cleaning up with respect to the psb_dev and un-needed stuff */ +struct psb_gtt { + uint32_t gatt_start; + uint32_t mmu_gatt_start; + uint32_t gtt_start; + uint32_t gtt_phys_start; + unsigned gtt_pages; + unsigned gatt_pages; + unsigned long stolen_size; + unsigned long vram_stolen_size; + struct rw_semaphore sem; +}; + +/* Exported functions */ +extern int psb_gtt_init(struct drm_device *dev, int resume); +extern void psb_gtt_takedown(struct drm_device *dev); + +/* Each gtt_range describes an allocation in the GTT area */ +struct gtt_range { + struct resource resource; /* Resource for our allocation */ + u32 offset; /* GTT offset of our object */ + struct drm_gem_object gem; /* GEM high level stuff */ + int in_gart; /* Currently in the GART (ref ct) */ + bool stolen; /* Backed from stolen RAM */ + bool mmapping; /* Is mmappable */ + struct page **pages; /* Backing pages if present */ + int npage; /* Number of backing pages */ +}; + +extern struct gtt_range *psb_gtt_alloc_range(struct drm_device *dev, int len, + const char *name, int backed); +extern void psb_gtt_kref_put(struct gtt_range *gt); +extern void psb_gtt_free_range(struct drm_device *dev, struct gtt_range *gt); +extern int psb_gtt_pin(struct gtt_range *gt); +extern void psb_gtt_unpin(struct gtt_range *gt); + +#endif diff --git a/drivers/gpu/drm/gma500/mmu.c b/drivers/gpu/drm/gma500/mmu.c new file mode 100644 index 0000000000000000000000000000000000000000..c904d73b1de3f99e9d391ccc0fd6a81d4635f104 --- /dev/null +++ b/drivers/gpu/drm/gma500/mmu.c @@ -0,0 +1,858 @@ +/************************************************************************** + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + **************************************************************************/ +#include +#include "psb_drv.h" +#include "psb_reg.h" + +/* + * Code for the SGX MMU: + */ + +/* + * clflush on one processor only: + * clflush should apparently flush the cache line on all processors in an + * SMP system. + */ + +/* + * kmap atomic: + * The usage of the slots must be completely encapsulated within a spinlock, and + * no other functions that may be using the locks for other purposed may be + * called from within the locked region. + * Since the slots are per processor, this will guarantee that we are the only + * user. + */ + +/* + * TODO: Inserting ptes from an interrupt handler: + * This may be desirable for some SGX functionality where the GPU can fault in + * needed pages. For that, we need to make an atomic insert_pages function, that + * may fail. + * If it fails, the caller need to insert the page using a workqueue function, + * but on average it should be fast. + */ + +struct psb_mmu_driver { + /* protects driver- and pd structures. Always take in read mode + * before taking the page table spinlock. + */ + struct rw_semaphore sem; + + /* protects page tables, directory tables and pt tables. + * and pt structures. + */ + spinlock_t lock; + + atomic_t needs_tlbflush; + + uint8_t __iomem *register_map; + struct psb_mmu_pd *default_pd; + /*uint32_t bif_ctrl;*/ + int has_clflush; + int clflush_add; + unsigned long clflush_mask; + + struct drm_psb_private *dev_priv; +}; + +struct psb_mmu_pd; + +struct psb_mmu_pt { + struct psb_mmu_pd *pd; + uint32_t index; + uint32_t count; + struct page *p; + uint32_t *v; +}; + +struct psb_mmu_pd { + struct psb_mmu_driver *driver; + int hw_context; + struct psb_mmu_pt **tables; + struct page *p; + struct page *dummy_pt; + struct page *dummy_page; + uint32_t pd_mask; + uint32_t invalid_pde; + uint32_t invalid_pte; +}; + +static inline uint32_t psb_mmu_pt_index(uint32_t offset) +{ + return (offset >> PSB_PTE_SHIFT) & 0x3FF; +} + +static inline uint32_t psb_mmu_pd_index(uint32_t offset) +{ + return offset >> PSB_PDE_SHIFT; +} + +static inline void psb_clflush(void *addr) +{ + __asm__ __volatile__("clflush (%0)\n" : : "r"(addr) : "memory"); +} + +static inline void psb_mmu_clflush(struct psb_mmu_driver *driver, + void *addr) +{ + if (!driver->has_clflush) + return; + + mb(); + psb_clflush(addr); + mb(); +} + +static void psb_page_clflush(struct psb_mmu_driver *driver, struct page* page) +{ + uint32_t clflush_add = driver->clflush_add >> PAGE_SHIFT; + uint32_t clflush_count = PAGE_SIZE / clflush_add; + int i; + uint8_t *clf; + + clf = kmap_atomic(page, KM_USER0); + mb(); + for (i = 0; i < clflush_count; ++i) { + psb_clflush(clf); + clf += clflush_add; + } + mb(); + kunmap_atomic(clf, KM_USER0); +} + +static void psb_pages_clflush(struct psb_mmu_driver *driver, + struct page *page[], unsigned long num_pages) +{ + int i; + + if (!driver->has_clflush) + return ; + + for (i = 0; i < num_pages; i++) + psb_page_clflush(driver, *page++); +} + +static void psb_mmu_flush_pd_locked(struct psb_mmu_driver *driver, + int force) +{ + atomic_set(&driver->needs_tlbflush, 0); +} + +static void psb_mmu_flush_pd(struct psb_mmu_driver *driver, int force) +{ + down_write(&driver->sem); + psb_mmu_flush_pd_locked(driver, force); + up_write(&driver->sem); +} + +void psb_mmu_flush(struct psb_mmu_driver *driver, int rc_prot) +{ + if (rc_prot) + down_write(&driver->sem); + if (rc_prot) + up_write(&driver->sem); +} + +void psb_mmu_set_pd_context(struct psb_mmu_pd *pd, int hw_context) +{ + /*ttm_tt_cache_flush(&pd->p, 1);*/ + psb_pages_clflush(pd->driver, &pd->p, 1); + down_write(&pd->driver->sem); + wmb(); + psb_mmu_flush_pd_locked(pd->driver, 1); + pd->hw_context = hw_context; + up_write(&pd->driver->sem); + +} + +static inline unsigned long psb_pd_addr_end(unsigned long addr, + unsigned long end) +{ + + addr = (addr + PSB_PDE_MASK + 1) & ~PSB_PDE_MASK; + return (addr < end) ? addr : end; +} + +static inline uint32_t psb_mmu_mask_pte(uint32_t pfn, int type) +{ + uint32_t mask = PSB_PTE_VALID; + + if (type & PSB_MMU_CACHED_MEMORY) + mask |= PSB_PTE_CACHED; + if (type & PSB_MMU_RO_MEMORY) + mask |= PSB_PTE_RO; + if (type & PSB_MMU_WO_MEMORY) + mask |= PSB_PTE_WO; + + return (pfn << PAGE_SHIFT) | mask; +} + +struct psb_mmu_pd *psb_mmu_alloc_pd(struct psb_mmu_driver *driver, + int trap_pagefaults, int invalid_type) +{ + struct psb_mmu_pd *pd = kmalloc(sizeof(*pd), GFP_KERNEL); + uint32_t *v; + int i; + + if (!pd) + return NULL; + + pd->p = alloc_page(GFP_DMA32); + if (!pd->p) + goto out_err1; + pd->dummy_pt = alloc_page(GFP_DMA32); + if (!pd->dummy_pt) + goto out_err2; + pd->dummy_page = alloc_page(GFP_DMA32); + if (!pd->dummy_page) + goto out_err3; + + if (!trap_pagefaults) { + pd->invalid_pde = + psb_mmu_mask_pte(page_to_pfn(pd->dummy_pt), + invalid_type); + pd->invalid_pte = + psb_mmu_mask_pte(page_to_pfn(pd->dummy_page), + invalid_type); + } else { + pd->invalid_pde = 0; + pd->invalid_pte = 0; + } + + v = kmap(pd->dummy_pt); + for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) + v[i] = pd->invalid_pte; + + kunmap(pd->dummy_pt); + + v = kmap(pd->p); + for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) + v[i] = pd->invalid_pde; + + kunmap(pd->p); + + clear_page(kmap(pd->dummy_page)); + kunmap(pd->dummy_page); + + pd->tables = vmalloc_user(sizeof(struct psb_mmu_pt *) * 1024); + if (!pd->tables) + goto out_err4; + + pd->hw_context = -1; + pd->pd_mask = PSB_PTE_VALID; + pd->driver = driver; + + return pd; + +out_err4: + __free_page(pd->dummy_page); +out_err3: + __free_page(pd->dummy_pt); +out_err2: + __free_page(pd->p); +out_err1: + kfree(pd); + return NULL; +} + +void psb_mmu_free_pt(struct psb_mmu_pt *pt) +{ + __free_page(pt->p); + kfree(pt); +} + +void psb_mmu_free_pagedir(struct psb_mmu_pd *pd) +{ + struct psb_mmu_driver *driver = pd->driver; + struct psb_mmu_pt *pt; + int i; + + down_write(&driver->sem); + if (pd->hw_context != -1) + psb_mmu_flush_pd_locked(driver, 1); + + /* Should take the spinlock here, but we don't need to do that + since we have the semaphore in write mode. */ + + for (i = 0; i < 1024; ++i) { + pt = pd->tables[i]; + if (pt) + psb_mmu_free_pt(pt); + } + + vfree(pd->tables); + __free_page(pd->dummy_page); + __free_page(pd->dummy_pt); + __free_page(pd->p); + kfree(pd); + up_write(&driver->sem); +} + +static struct psb_mmu_pt *psb_mmu_alloc_pt(struct psb_mmu_pd *pd) +{ + struct psb_mmu_pt *pt = kmalloc(sizeof(*pt), GFP_KERNEL); + void *v; + uint32_t clflush_add = pd->driver->clflush_add >> PAGE_SHIFT; + uint32_t clflush_count = PAGE_SIZE / clflush_add; + spinlock_t *lock = &pd->driver->lock; + uint8_t *clf; + uint32_t *ptes; + int i; + + if (!pt) + return NULL; + + pt->p = alloc_page(GFP_DMA32); + if (!pt->p) { + kfree(pt); + return NULL; + } + + spin_lock(lock); + + v = kmap_atomic(pt->p, KM_USER0); + clf = (uint8_t *) v; + ptes = (uint32_t *) v; + for (i = 0; i < (PAGE_SIZE / sizeof(uint32_t)); ++i) + *ptes++ = pd->invalid_pte; + + + if (pd->driver->has_clflush && pd->hw_context != -1) { + mb(); + for (i = 0; i < clflush_count; ++i) { + psb_clflush(clf); + clf += clflush_add; + } + mb(); + } + + kunmap_atomic(v, KM_USER0); + spin_unlock(lock); + + pt->count = 0; + pt->pd = pd; + pt->index = 0; + + return pt; +} + +struct psb_mmu_pt *psb_mmu_pt_alloc_map_lock(struct psb_mmu_pd *pd, + unsigned long addr) +{ + uint32_t index = psb_mmu_pd_index(addr); + struct psb_mmu_pt *pt; + uint32_t *v; + spinlock_t *lock = &pd->driver->lock; + + spin_lock(lock); + pt = pd->tables[index]; + while (!pt) { + spin_unlock(lock); + pt = psb_mmu_alloc_pt(pd); + if (!pt) + return NULL; + spin_lock(lock); + + if (pd->tables[index]) { + spin_unlock(lock); + psb_mmu_free_pt(pt); + spin_lock(lock); + pt = pd->tables[index]; + continue; + } + + v = kmap_atomic(pd->p, KM_USER0); + pd->tables[index] = pt; + v[index] = (page_to_pfn(pt->p) << 12) | pd->pd_mask; + pt->index = index; + kunmap_atomic((void *) v, KM_USER0); + + if (pd->hw_context != -1) { + psb_mmu_clflush(pd->driver, (void *) &v[index]); + atomic_set(&pd->driver->needs_tlbflush, 1); + } + } + pt->v = kmap_atomic(pt->p, KM_USER0); + return pt; +} + +static struct psb_mmu_pt *psb_mmu_pt_map_lock(struct psb_mmu_pd *pd, + unsigned long addr) +{ + uint32_t index = psb_mmu_pd_index(addr); + struct psb_mmu_pt *pt; + spinlock_t *lock = &pd->driver->lock; + + spin_lock(lock); + pt = pd->tables[index]; + if (!pt) { + spin_unlock(lock); + return NULL; + } + pt->v = kmap_atomic(pt->p, KM_USER0); + return pt; +} + +static void psb_mmu_pt_unmap_unlock(struct psb_mmu_pt *pt) +{ + struct psb_mmu_pd *pd = pt->pd; + uint32_t *v; + + kunmap_atomic(pt->v, KM_USER0); + if (pt->count == 0) { + v = kmap_atomic(pd->p, KM_USER0); + v[pt->index] = pd->invalid_pde; + pd->tables[pt->index] = NULL; + + if (pd->hw_context != -1) { + psb_mmu_clflush(pd->driver, + (void *) &v[pt->index]); + atomic_set(&pd->driver->needs_tlbflush, 1); + } + kunmap_atomic(pt->v, KM_USER0); + spin_unlock(&pd->driver->lock); + psb_mmu_free_pt(pt); + return; + } + spin_unlock(&pd->driver->lock); +} + +static inline void psb_mmu_set_pte(struct psb_mmu_pt *pt, + unsigned long addr, uint32_t pte) +{ + pt->v[psb_mmu_pt_index(addr)] = pte; +} + +static inline void psb_mmu_invalidate_pte(struct psb_mmu_pt *pt, + unsigned long addr) +{ + pt->v[psb_mmu_pt_index(addr)] = pt->pd->invalid_pte; +} + + +void psb_mmu_mirror_gtt(struct psb_mmu_pd *pd, + uint32_t mmu_offset, uint32_t gtt_start, + uint32_t gtt_pages) +{ + uint32_t *v; + uint32_t start = psb_mmu_pd_index(mmu_offset); + struct psb_mmu_driver *driver = pd->driver; + int num_pages = gtt_pages; + + down_read(&driver->sem); + spin_lock(&driver->lock); + + v = kmap_atomic(pd->p, KM_USER0); + v += start; + + while (gtt_pages--) { + *v++ = gtt_start | pd->pd_mask; + gtt_start += PAGE_SIZE; + } + + /*ttm_tt_cache_flush(&pd->p, num_pages);*/ + psb_pages_clflush(pd->driver, &pd->p, num_pages); + kunmap_atomic(v, KM_USER0); + spin_unlock(&driver->lock); + + if (pd->hw_context != -1) + atomic_set(&pd->driver->needs_tlbflush, 1); + + up_read(&pd->driver->sem); + psb_mmu_flush_pd(pd->driver, 0); +} + +struct psb_mmu_pd *psb_mmu_get_default_pd(struct psb_mmu_driver *driver) +{ + struct psb_mmu_pd *pd; + + /* down_read(&driver->sem); */ + pd = driver->default_pd; + /* up_read(&driver->sem); */ + + return pd; +} + +/* Returns the physical address of the PD shared by sgx/msvdx */ +uint32_t psb_get_default_pd_addr(struct psb_mmu_driver *driver) +{ + struct psb_mmu_pd *pd; + + pd = psb_mmu_get_default_pd(driver); + return page_to_pfn(pd->p) << PAGE_SHIFT; +} + +void psb_mmu_driver_takedown(struct psb_mmu_driver *driver) +{ + psb_mmu_free_pagedir(driver->default_pd); + kfree(driver); +} + +struct psb_mmu_driver *psb_mmu_driver_init(uint8_t __iomem * registers, + int trap_pagefaults, + int invalid_type, + struct drm_psb_private *dev_priv) +{ + struct psb_mmu_driver *driver; + + driver = kmalloc(sizeof(*driver), GFP_KERNEL); + + if (!driver) + return NULL; + driver->dev_priv = dev_priv; + + driver->default_pd = psb_mmu_alloc_pd(driver, trap_pagefaults, + invalid_type); + if (!driver->default_pd) + goto out_err1; + + spin_lock_init(&driver->lock); + init_rwsem(&driver->sem); + down_write(&driver->sem); + driver->register_map = registers; + atomic_set(&driver->needs_tlbflush, 1); + + driver->has_clflush = 0; + + if (boot_cpu_has(X86_FEATURE_CLFLSH)) { + uint32_t tfms, misc, cap0, cap4, clflush_size; + + /* + * clflush size is determined at kernel setup for x86_64 + * but not for i386. We have to do it here. + */ + + cpuid(0x00000001, &tfms, &misc, &cap0, &cap4); + clflush_size = ((misc >> 8) & 0xff) * 8; + driver->has_clflush = 1; + driver->clflush_add = + PAGE_SIZE * clflush_size / sizeof(uint32_t); + driver->clflush_mask = driver->clflush_add - 1; + driver->clflush_mask = ~driver->clflush_mask; + } + + up_write(&driver->sem); + return driver; + +out_err1: + kfree(driver); + return NULL; +} + +static void psb_mmu_flush_ptes(struct psb_mmu_pd *pd, + unsigned long address, uint32_t num_pages, + uint32_t desired_tile_stride, + uint32_t hw_tile_stride) +{ + struct psb_mmu_pt *pt; + uint32_t rows = 1; + uint32_t i; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long add; + unsigned long row_add; + unsigned long clflush_add = pd->driver->clflush_add; + unsigned long clflush_mask = pd->driver->clflush_mask; + + if (!pd->driver->has_clflush) { + /*ttm_tt_cache_flush(&pd->p, num_pages);*/ + psb_pages_clflush(pd->driver, &pd->p, num_pages); + return; + } + + if (hw_tile_stride) + rows = num_pages / desired_tile_stride; + else + desired_tile_stride = num_pages; + + add = desired_tile_stride << PAGE_SHIFT; + row_add = hw_tile_stride << PAGE_SHIFT; + mb(); + for (i = 0; i < rows; ++i) { + + addr = address; + end = addr + add; + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_map_lock(pd, addr); + if (!pt) + continue; + do { + psb_clflush(&pt->v + [psb_mmu_pt_index(addr)]); + } while (addr += + clflush_add, + (addr & clflush_mask) < next); + + psb_mmu_pt_unmap_unlock(pt); + } while (addr = next, next != end); + address += row_add; + } + mb(); +} + +void psb_mmu_remove_pfn_sequence(struct psb_mmu_pd *pd, + unsigned long address, uint32_t num_pages) +{ + struct psb_mmu_pt *pt; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long f_address = address; + + down_read(&pd->driver->sem); + + addr = address; + end = addr + (num_pages << PAGE_SHIFT); + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_alloc_map_lock(pd, addr); + if (!pt) + goto out; + do { + psb_mmu_invalidate_pte(pt, addr); + --pt->count; + } while (addr += PAGE_SIZE, addr < next); + psb_mmu_pt_unmap_unlock(pt); + + } while (addr = next, next != end); + +out: + if (pd->hw_context != -1) + psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1); + + up_read(&pd->driver->sem); + + if (pd->hw_context != -1) + psb_mmu_flush(pd->driver, 0); + + return; +} + +void psb_mmu_remove_pages(struct psb_mmu_pd *pd, unsigned long address, + uint32_t num_pages, uint32_t desired_tile_stride, + uint32_t hw_tile_stride) +{ + struct psb_mmu_pt *pt; + uint32_t rows = 1; + uint32_t i; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long add; + unsigned long row_add; + unsigned long f_address = address; + + if (hw_tile_stride) + rows = num_pages / desired_tile_stride; + else + desired_tile_stride = num_pages; + + add = desired_tile_stride << PAGE_SHIFT; + row_add = hw_tile_stride << PAGE_SHIFT; + + /* down_read(&pd->driver->sem); */ + + /* Make sure we only need to flush this processor's cache */ + + for (i = 0; i < rows; ++i) { + + addr = address; + end = addr + add; + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_map_lock(pd, addr); + if (!pt) + continue; + do { + psb_mmu_invalidate_pte(pt, addr); + --pt->count; + + } while (addr += PAGE_SIZE, addr < next); + psb_mmu_pt_unmap_unlock(pt); + + } while (addr = next, next != end); + address += row_add; + } + if (pd->hw_context != -1) + psb_mmu_flush_ptes(pd, f_address, num_pages, + desired_tile_stride, hw_tile_stride); + + /* up_read(&pd->driver->sem); */ + + if (pd->hw_context != -1) + psb_mmu_flush(pd->driver, 0); +} + +int psb_mmu_insert_pfn_sequence(struct psb_mmu_pd *pd, uint32_t start_pfn, + unsigned long address, uint32_t num_pages, + int type) +{ + struct psb_mmu_pt *pt; + uint32_t pte; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long f_address = address; + int ret = 0; + + down_read(&pd->driver->sem); + + addr = address; + end = addr + (num_pages << PAGE_SHIFT); + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_alloc_map_lock(pd, addr); + if (!pt) { + ret = -ENOMEM; + goto out; + } + do { + pte = psb_mmu_mask_pte(start_pfn++, type); + psb_mmu_set_pte(pt, addr, pte); + pt->count++; + } while (addr += PAGE_SIZE, addr < next); + psb_mmu_pt_unmap_unlock(pt); + + } while (addr = next, next != end); + +out: + if (pd->hw_context != -1) + psb_mmu_flush_ptes(pd, f_address, num_pages, 1, 1); + + up_read(&pd->driver->sem); + + if (pd->hw_context != -1) + psb_mmu_flush(pd->driver, 1); + + return ret; +} + +int psb_mmu_insert_pages(struct psb_mmu_pd *pd, struct page **pages, + unsigned long address, uint32_t num_pages, + uint32_t desired_tile_stride, + uint32_t hw_tile_stride, int type) +{ + struct psb_mmu_pt *pt; + uint32_t rows = 1; + uint32_t i; + uint32_t pte; + unsigned long addr; + unsigned long end; + unsigned long next; + unsigned long add; + unsigned long row_add; + unsigned long f_address = address; + int ret = 0; + + if (hw_tile_stride) { + if (num_pages % desired_tile_stride != 0) + return -EINVAL; + rows = num_pages / desired_tile_stride; + } else { + desired_tile_stride = num_pages; + } + + add = desired_tile_stride << PAGE_SHIFT; + row_add = hw_tile_stride << PAGE_SHIFT; + + down_read(&pd->driver->sem); + + for (i = 0; i < rows; ++i) { + + addr = address; + end = addr + add; + + do { + next = psb_pd_addr_end(addr, end); + pt = psb_mmu_pt_alloc_map_lock(pd, addr); + if (!pt) { + ret = -ENOMEM; + goto out; + } + do { + pte = + psb_mmu_mask_pte(page_to_pfn(*pages++), + type); + psb_mmu_set_pte(pt, addr, pte); + pt->count++; + } while (addr += PAGE_SIZE, addr < next); + psb_mmu_pt_unmap_unlock(pt); + + } while (addr = next, next != end); + + address += row_add; + } +out: + if (pd->hw_context != -1) + psb_mmu_flush_ptes(pd, f_address, num_pages, + desired_tile_stride, hw_tile_stride); + + up_read(&pd->driver->sem); + + if (pd->hw_context != -1) + psb_mmu_flush(pd->driver, 1); + + return ret; +} + +int psb_mmu_virtual_to_pfn(struct psb_mmu_pd *pd, uint32_t virtual, + unsigned long *pfn) +{ + int ret; + struct psb_mmu_pt *pt; + uint32_t tmp; + spinlock_t *lock = &pd->driver->lock; + + down_read(&pd->driver->sem); + pt = psb_mmu_pt_map_lock(pd, virtual); + if (!pt) { + uint32_t *v; + + spin_lock(lock); + v = kmap_atomic(pd->p, KM_USER0); + tmp = v[psb_mmu_pd_index(virtual)]; + kunmap_atomic(v, KM_USER0); + spin_unlock(lock); + + if (tmp != pd->invalid_pde || !(tmp & PSB_PTE_VALID) || + !(pd->invalid_pte & PSB_PTE_VALID)) { + ret = -EINVAL; + goto out; + } + ret = 0; + *pfn = pd->invalid_pte >> PAGE_SHIFT; + goto out; + } + tmp = pt->v[psb_mmu_pt_index(virtual)]; + if (!(tmp & PSB_PTE_VALID)) { + ret = -EINVAL; + } else { + ret = 0; + *pfn = tmp >> PAGE_SHIFT; + } + psb_mmu_pt_unmap_unlock(pt); +out: + up_read(&pd->driver->sem); + return ret; +}