diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c index 90a5907080a122938202a35154b777968714c046..d3276ebcfb4715a118268a8cb6c92640a33afcca 100644 --- a/arch/sparc64/kernel/iommu.c +++ b/arch/sparc64/kernel/iommu.c @@ -512,124 +512,209 @@ static void dma_4u_unmap_single(struct device *dev, dma_addr_t bus_addr, static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction) { - unsigned long flags, ctx, i, npages, iopte_protection; - struct scatterlist *sg; + struct scatterlist *s, *outs, *segstart; + unsigned long flags, handle, prot, ctx; + dma_addr_t dma_next = 0, dma_addr; + unsigned int max_seg_size; + int outcount, incount, i; struct strbuf *strbuf; struct iommu *iommu; - iopte_t *base; - u32 dma_base; - - /* Fast path single entry scatterlists. */ - if (nelems == 1) { - sglist->dma_address = - dma_4u_map_single(dev, sg_virt(sglist), - sglist->length, direction); - if (unlikely(sglist->dma_address == DMA_ERROR_CODE)) - return 0; - sglist->dma_length = sglist->length; - return 1; - } + + BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; strbuf = dev->archdata.stc; - - if (unlikely(direction == DMA_NONE)) - goto bad_no_ctx; - - npages = calc_npages(sglist, nelems); + if (nelems == 0 || !iommu) + return 0; spin_lock_irqsave(&iommu->lock, flags); - base = alloc_npages(dev, iommu, npages); ctx = 0; if (iommu->iommu_ctxflush) ctx = iommu_alloc_ctx(iommu); - spin_unlock_irqrestore(&iommu->lock, flags); - - if (base == NULL) - goto bad; - - dma_base = iommu->page_table_map_base + - ((base - iommu->page_table) << IO_PAGE_SHIFT); - if (strbuf->strbuf_enabled) - iopte_protection = IOPTE_STREAMING(ctx); + prot = IOPTE_STREAMING(ctx); else - iopte_protection = IOPTE_CONSISTENT(ctx); + prot = IOPTE_CONSISTENT(ctx); if (direction != DMA_TO_DEVICE) - iopte_protection |= IOPTE_WRITE; - - for_each_sg(sglist, sg, nelems, i) { - unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg); - unsigned long slen = sg->length; - unsigned long this_npages; + prot |= IOPTE_WRITE; + + outs = s = segstart = &sglist[0]; + outcount = 1; + incount = nelems; + handle = 0; + + /* Init first segment length for backout at failure */ + outs->dma_length = 0; + + max_seg_size = dma_get_max_seg_size(dev); + for_each_sg(sglist, s, nelems, i) { + unsigned long paddr, npages, entry, slen; + iopte_t *base; + + slen = s->length; + /* Sanity check */ + if (slen == 0) { + dma_next = 0; + continue; + } + /* Allocate iommu entries for that segment */ + paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); + npages = iommu_num_pages(paddr, slen); + entry = iommu_range_alloc(dev, iommu, npages, &handle); + + /* Handle failure */ + if (unlikely(entry == DMA_ERROR_CODE)) { + if (printk_ratelimit()) + printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" + " npages %lx\n", iommu, paddr, npages); + goto iommu_map_failed; + } - this_npages = iommu_num_pages(paddr, slen); + base = iommu->page_table + entry; - sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK); - sg->dma_length = slen; + /* Convert entry to a dma_addr_t */ + dma_addr = iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT); + dma_addr |= (s->offset & ~IO_PAGE_MASK); + /* Insert into HW table */ paddr &= IO_PAGE_MASK; - while (this_npages--) { - iopte_val(*base) = iopte_protection | paddr; - + while (npages--) { + iopte_val(*base) = prot | paddr; base++; paddr += IO_PAGE_SIZE; - dma_base += IO_PAGE_SIZE; } + + /* If we are in an open segment, try merging */ + if (segstart != s) { + /* We cannot merge if: + * - allocated dma_addr isn't contiguous to previous allocation + */ + if ((dma_addr != dma_next) || + (outs->dma_length + s->length > max_seg_size)) { + /* Can't merge: create a new segment */ + segstart = s; + outcount++; + outs = sg_next(outs); + } else { + outs->dma_length += s->length; + } + } + + if (segstart == s) { + /* This is a new segment, fill entries */ + outs->dma_address = dma_addr; + outs->dma_length = slen; + } + + /* Calculate next page pointer for contiguous check */ + dma_next = dma_addr + slen; } - return nelems; + spin_unlock_irqrestore(&iommu->lock, flags); + + if (outcount < incount) { + outs = sg_next(outs); + outs->dma_address = DMA_ERROR_CODE; + outs->dma_length = 0; + } + + return outcount; + +iommu_map_failed: + for_each_sg(sglist, s, nelems, i) { + if (s->dma_length != 0) { + unsigned long vaddr, npages, entry, i; + iopte_t *base; + + vaddr = s->dma_address & IO_PAGE_MASK; + npages = iommu_num_pages(s->dma_address, s->dma_length); + iommu_range_free(iommu, vaddr, npages); + + entry = (vaddr - iommu->page_table_map_base) + >> IO_PAGE_SHIFT; + base = iommu->page_table + entry; + + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); + + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + } + if (s == outs) + break; + } + spin_unlock_irqrestore(&iommu->lock, flags); -bad: - iommu_free_ctx(iommu, ctx); -bad_no_ctx: - if (printk_ratelimit()) - WARN_ON(1); return 0; } +/* If contexts are being used, they are the same in all of the mappings + * we make for a particular SG. + */ +static unsigned long fetch_sg_ctx(struct iommu *iommu, struct scatterlist *sg) +{ + unsigned long ctx = 0; + + if (iommu->iommu_ctxflush) { + iopte_t *base; + u32 bus_addr; + + bus_addr = sg->dma_address & IO_PAGE_MASK; + base = iommu->page_table + + ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + + ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; + } + return ctx; +} + static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction) { - unsigned long flags, ctx, i, npages; + unsigned long flags, ctx; + struct scatterlist *sg; struct strbuf *strbuf; struct iommu *iommu; - iopte_t *base; - u32 bus_addr; - if (unlikely(direction == DMA_NONE)) { - if (printk_ratelimit()) - WARN_ON(1); - } + BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; strbuf = dev->archdata.stc; - bus_addr = sglist->dma_address & IO_PAGE_MASK; + ctx = fetch_sg_ctx(iommu, sglist); - npages = calc_npages(sglist, nelems); + spin_lock_irqsave(&iommu->lock, flags); - base = iommu->page_table + - ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + sg = sglist; + while (nelems--) { + dma_addr_t dma_handle = sg->dma_address; + unsigned int len = sg->dma_length; + unsigned long npages, entry; + iopte_t *base; + int i; - spin_lock_irqsave(&iommu->lock, flags); + if (!len) + break; + npages = iommu_num_pages(dma_handle, len); + iommu_range_free(iommu, dma_handle, npages); - /* Record the context, if any. */ - ctx = 0; - if (iommu->iommu_ctxflush) - ctx = (iopte_val(*base) & IOPTE_CONTEXT) >> 47UL; + entry = ((dma_handle - iommu->page_table_map_base) + >> IO_PAGE_SHIFT); + base = iommu->page_table + entry; - /* Step 1: Kick data out of streaming buffers if necessary. */ - if (strbuf->strbuf_enabled) - strbuf_flush(strbuf, iommu, bus_addr, ctx, npages, direction); + dma_handle &= IO_PAGE_MASK; + if (strbuf->strbuf_enabled) + strbuf_flush(strbuf, iommu, dma_handle, ctx, + npages, direction); - /* Step 2: Clear out the TSB entries. */ - for (i = 0; i < npages; i++) - iopte_make_dummy(iommu, base + i); + for (i = 0; i < npages; i++) + iopte_make_dummy(iommu, base + i); - iommu_range_free(iommu, bus_addr, npages); + sg = sg_next(sg); + } iommu_free_ctx(iommu, ctx); diff --git a/arch/sparc64/kernel/pci_sun4v.c b/arch/sparc64/kernel/pci_sun4v.c index c8b6199a5dc4b405b4153eb0b084842266391b71..ddca6c6c0b492709193e1a3c8a86dcb30f884fbf 100644 --- a/arch/sparc64/kernel/pci_sun4v.c +++ b/arch/sparc64/kernel/pci_sun4v.c @@ -89,6 +89,17 @@ static long iommu_batch_flush(struct iommu_batch *p) return 0; } +static inline void iommu_batch_new_entry(unsigned long entry) +{ + struct iommu_batch *p = &__get_cpu_var(iommu_batch); + + if (p->entry + p->npages == entry) + return; + if (p->entry != ~0UL) + iommu_batch_flush(p); + p->entry = entry; +} + /* Interrupts must be disabled. */ static inline long iommu_batch_add(u64 phys_page) { @@ -320,88 +331,131 @@ static void dma_4v_unmap_single(struct device *dev, dma_addr_t bus_addr, static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction) { - unsigned long flags, npages, i, prot; - u32 dma_base, orig_dma_base; - struct scatterlist *sg; + struct scatterlist *s, *outs, *segstart; + unsigned long flags, handle, prot; + dma_addr_t dma_next = 0, dma_addr; + unsigned int max_seg_size; + int outcount, incount, i; struct iommu *iommu; - long entry, err; - - /* Fast path single entry scatterlists. */ - if (nelems == 1) { - sglist->dma_address = - dma_4v_map_single(dev, sg_virt(sglist), - sglist->length, direction); - if (unlikely(sglist->dma_address == DMA_ERROR_CODE)) - return 0; - sglist->dma_length = sglist->length; - return 1; - } + long err; + + BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; + if (nelems == 0 || !iommu) + return 0; - if (unlikely(direction == DMA_NONE)) - goto bad; - - npages = calc_npages(sglist, nelems); + prot = HV_PCI_MAP_ATTR_READ; + if (direction != DMA_TO_DEVICE) + prot |= HV_PCI_MAP_ATTR_WRITE; - spin_lock_irqsave(&iommu->lock, flags); - entry = iommu_range_alloc(dev, iommu, npages, NULL); - spin_unlock_irqrestore(&iommu->lock, flags); + outs = s = segstart = &sglist[0]; + outcount = 1; + incount = nelems; + handle = 0; - if (unlikely(entry == DMA_ERROR_CODE)) - goto bad; + /* Init first segment length for backout at failure */ + outs->dma_length = 0; - orig_dma_base = dma_base = iommu->page_table_map_base + - (entry << IO_PAGE_SHIFT); + spin_lock_irqsave(&iommu->lock, flags); - prot = HV_PCI_MAP_ATTR_READ; - if (direction != DMA_TO_DEVICE) - prot |= HV_PCI_MAP_ATTR_WRITE; + iommu_batch_start(dev, prot, ~0UL); - local_irq_save(flags); + max_seg_size = dma_get_max_seg_size(dev); + for_each_sg(sglist, s, nelems, i) { + unsigned long paddr, npages, entry, slen; - iommu_batch_start(dev, prot, entry); + slen = s->length; + /* Sanity check */ + if (slen == 0) { + dma_next = 0; + continue; + } + /* Allocate iommu entries for that segment */ + paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); + npages = iommu_num_pages(paddr, slen); + entry = iommu_range_alloc(dev, iommu, npages, &handle); - for_each_sg(sglist, sg, nelems, i) { - unsigned long paddr = SG_ENT_PHYS_ADDRESS(sg); - unsigned long slen = sg->length; - unsigned long this_npages; + /* Handle failure */ + if (unlikely(entry == DMA_ERROR_CODE)) { + if (printk_ratelimit()) + printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" + " npages %lx\n", iommu, paddr, npages); + goto iommu_map_failed; + } - this_npages = iommu_num_pages(paddr, slen); + iommu_batch_new_entry(entry); - sg->dma_address = dma_base | (paddr & ~IO_PAGE_MASK); - sg->dma_length = slen; + /* Convert entry to a dma_addr_t */ + dma_addr = iommu->page_table_map_base + + (entry << IO_PAGE_SHIFT); + dma_addr |= (s->offset & ~IO_PAGE_MASK); + /* Insert into HW table */ paddr &= IO_PAGE_MASK; - while (this_npages--) { + while (npages--) { err = iommu_batch_add(paddr); - if (unlikely(err < 0L)) { - local_irq_restore(flags); + if (unlikely(err < 0L)) goto iommu_map_failed; + paddr += IO_PAGE_SIZE; + } + + /* If we are in an open segment, try merging */ + if (segstart != s) { + /* We cannot merge if: + * - allocated dma_addr isn't contiguous to previous allocation + */ + if ((dma_addr != dma_next) || + (outs->dma_length + s->length > max_seg_size)) { + /* Can't merge: create a new segment */ + segstart = s; + outcount++; + outs = sg_next(outs); + } else { + outs->dma_length += s->length; } + } - paddr += IO_PAGE_SIZE; - dma_base += IO_PAGE_SIZE; + if (segstart == s) { + /* This is a new segment, fill entries */ + outs->dma_address = dma_addr; + outs->dma_length = slen; } + + /* Calculate next page pointer for contiguous check */ + dma_next = dma_addr + slen; } err = iommu_batch_end(); - local_irq_restore(flags); - if (unlikely(err < 0L)) goto iommu_map_failed; - return nelems; + spin_unlock_irqrestore(&iommu->lock, flags); -bad: - if (printk_ratelimit()) - WARN_ON(1); - return 0; + if (outcount < incount) { + outs = sg_next(outs); + outs->dma_address = DMA_ERROR_CODE; + outs->dma_length = 0; + } + + return outcount; iommu_map_failed: - spin_lock_irqsave(&iommu->lock, flags); - iommu_range_free(iommu, orig_dma_base, npages); + for_each_sg(sglist, s, nelems, i) { + if (s->dma_length != 0) { + unsigned long vaddr, npages; + + vaddr = s->dma_address & IO_PAGE_MASK; + npages = iommu_num_pages(s->dma_address, s->dma_length); + iommu_range_free(iommu, vaddr, npages); + /* XXX demap? XXX */ + s->dma_address = DMA_ERROR_CODE; + s->dma_length = 0; + } + if (s == outs) + break; + } spin_unlock_irqrestore(&iommu->lock, flags); return 0; @@ -410,39 +464,43 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction direction) { - unsigned long flags, npages; struct pci_pbm_info *pbm; - u32 devhandle, bus_addr; + struct scatterlist *sg; struct iommu *iommu; - long entry; + unsigned long flags; + u32 devhandle; - if (unlikely(direction == DMA_NONE)) { - if (printk_ratelimit()) - WARN_ON(1); - } + BUG_ON(direction == DMA_NONE); iommu = dev->archdata.iommu; pbm = dev->archdata.host_controller; devhandle = pbm->devhandle; - bus_addr = sglist->dma_address & IO_PAGE_MASK; - - npages = calc_npages(sglist, nelems); - - entry = ((bus_addr - iommu->page_table_map_base) >> IO_PAGE_SHIFT); - spin_lock_irqsave(&iommu->lock, flags); - iommu_range_free(iommu, bus_addr, npages); - - do { - unsigned long num; + sg = sglist; + while (nelems--) { + dma_addr_t dma_handle = sg->dma_address; + unsigned int len = sg->dma_length; + unsigned long npages, entry; + + if (!len) + break; + npages = iommu_num_pages(dma_handle, len); + iommu_range_free(iommu, dma_handle, npages); + + entry = ((dma_handle - iommu->page_table_map_base) >> IO_PAGE_SHIFT); + while (npages) { + unsigned long num; + + num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), + npages); + entry += num; + npages -= num; + } - num = pci_sun4v_iommu_demap(devhandle, HV_PCI_TSBID(0, entry), - npages); - entry += num; - npages -= num; - } while (npages != 0); + sg = sg_next(sg); + } spin_unlock_irqrestore(&iommu->lock, flags); } diff --git a/include/asm-sparc64/io.h b/include/asm-sparc64/io.h index b6ece223562de86719dc403595eca551140be570..c299b853b5babae9520be2238620319cf490c149 100644 --- a/include/asm-sparc64/io.h +++ b/include/asm-sparc64/io.h @@ -16,7 +16,7 @@ /* BIO layer definitions. */ extern unsigned long kern_base, kern_size; #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) -#define BIO_VMERGE_BOUNDARY 0 +#define BIO_VMERGE_BOUNDARY 8192 static inline u8 _inb(unsigned long addr) {