Merge tag 'dma-mapping-4.19' of git://git.infradead.org/users/hch/dma-mapping

Pull dma-mapping updates from Christoph Hellwig: - a series from Robin to fix bus imposed dma limits by adding a separate mask for them to struct device instead of trying to squeeze a second meaning out of the existing dma mask as we did before. This has ACKs from the various other subsystems touched - a small swiotlb cleanup from Kees (acked by Konrad) - conversion of nios2 and sh to the new generic dma-noncoherent code. Various other architecture conversions will come through the architectures maintainers trees. * tag 'dma-mapping-4.19' of git://git.infradead.org/users/hch/dma-mapping: sh: use generic dma_noncoherent_ops sh: split arch/sh/mm/consistent.c sh: use dma_direct_ops for the CONFIG_DMA_COHERENT case sh: introduce a sh_cacheop_vaddr helper sh: simplify get_arch_dma_ops OF: Don't set default coherent DMA mask ACPI/IORT: Don't set default coherent DMA mask iommu/dma: Respect bus DMA limit for IOVAs of/device: Set bus DMA mask as appropriate ACPI/IORT: Set bus DMA mask as appropriate dma-mapping: Generalise dma_32bit_limit flag ACPI/IORT: Support address size limit for root complexes of/platform: Initialise default DMA masks nios2: use generic dma_noncoherent_ops swiotlb: clean up reporting dma-mapping: relax warning for per-device areas

Merge tag 'dma-mapping-4.19' of git://git.infradead.org/users/hch/dma-mapping
Pull dma-mapping updates from Christoph Hellwig: - a series from Robin to fix bus imposed dma limits by adding a separate mask for them to struct device instead of trying to squeeze a second meaning out of the existing dma mask as we did before. This has ACKs from the various other subsystems touched - a small swiotlb cleanup from Kees (acked by Konrad) - conversion of nios2 and sh to the new generic dma-noncoherent code. Various other architecture conversions will come through the architectures maintainers trees. * tag 'dma-mapping-4.19' of git://git.infradead.org/users/hch/dma-mapping: sh: use generic dma_noncoherent_ops sh: split arch/sh/mm/consistent.c sh: use dma_direct_ops for the CONFIG_DMA_COHERENT case sh: introduce a sh_cacheop_vaddr helper sh: simplify get_arch_dma_ops OF: Don't set default coherent DMA mask ACPI/IORT: Don't set default coherent DMA mask iommu/dma: Respect bus DMA limit for IOVAs of/device: Set bus DMA mask as appropriate ACPI/IORT: Set bus DMA mask as appropriate dma-mapping: Generalise dma_32bit_limit flag ACPI/IORT: Support address size limit for root complexes of/platform: Initialise default DMA masks nios2: use generic dma_noncoherent_ops swiotlb: clean up reporting dma-mapping: relax warning for per-device areas
f66dc723 · Linus Torvalds · b219a1d2 · 6fa1d28e · f66dc723 · f66dc723
23 changed file
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
 # SPDX-License-Identifier: GPL-2.0
 config NIOS2
 	def_bool y
+	select ARCH_HAS_SYNC_DMA_FOR_CPU
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select DMA_NONCOHERENT_OPS
 	select TIMER_OF
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS

--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += current.h
 generic-y += device.h
 generic-y += div64.h
 generic-y += dma.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += extable.h

--- a/arch/nios2/include/asm/dma-mapping.h
+++ b/arch/nios2/include/asm/dma-mapping.h
-/*
- * Copyright (C) 2011 Tobias Klauser <tklauser@distanz.ch>
- * Copyright (C) 2009 Wind River Systems Inc
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License.  See the file COPYING in the main directory of this
- * archive for more details.
- */
-
-#ifndef _ASM_NIOS2_DMA_MAPPING_H
-#define _ASM_NIOS2_DMA_MAPPING_H
-
-extern const struct dma_map_ops nios2_dma_ops;
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	return &nios2_dma_ops;
-}
-
-#endif /* _ASM_NIOS2_DMA_MAPPING_H */
--- a/arch/nios2/mm/dma-mapping.c
+++ b/arch/nios2/mm/dma-mapping.c
@@ -12,18 +12,18 @@

 #include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/export.h>
 #include <linux/string.h>
-#include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
 #include <linux/cache.h>
 #include <asm/cacheflush.h>

-static inline void __dma_sync_for_device(void *vaddr, size_t size,
-			      enum dma_data_direction direction)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	switch (direction) {
+	void *vaddr = phys_to_virt(paddr);
+
+	switch (dir) {
 	case DMA_FROM_DEVICE:
 		invalidate_dcache_range((unsigned long)vaddr,
 			(unsigned long)(vaddr + size));
@@ -42,10 +42,12 @@ static inline void __dma_sync_for_device(void *vaddr, size_t size,
 	}
 }

-static inline void __dma_sync_for_cpu(void *vaddr, size_t size,
-			      enum dma_data_direction direction)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
 {
-	switch (direction) {
+	void *vaddr = phys_to_virt(paddr);
+
+	switch (dir) {
 	case DMA_BIDIRECTIONAL:
 	case DMA_FROM_DEVICE:
 		invalidate_dcache_range((unsigned long)vaddr,
@@ -58,8 +60,8 @@ static inline void __dma_sync_for_cpu(void *vaddr, size_t size,
 	}
 }

-static void *nios2_dma_alloc(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
 {
 	void *ret;

@@ -80,125 +82,10 @@ static void *nios2_dma_alloc(struct device *dev, size_t size,
 	return ret;
 }

-static void nios2_dma_free(struct device *dev, size_t size, void *vaddr,
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 		dma_addr_t dma_handle, unsigned long attrs)
 {
 	unsigned long addr = (unsigned long) CAC_ADDR((unsigned long) vaddr);

 	free_pages(addr, get_order(size));
 }
-
-static int nios2_dma_map_sg(struct device *dev, struct scatterlist *sg,
-		int nents, enum dma_data_direction direction,
-		unsigned long attrs)
-{
-	int i;
-
-	for_each_sg(sg, sg, nents, i) {
-		void *addr = sg_virt(sg);
-
-		if (!addr)
-			continue;
-
-		sg->dma_address = sg_phys(sg);
-
-		if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-			continue;
-
-		__dma_sync_for_device(addr, sg->length, direction);
-	}
-
-	return nents;
-}
-
-static dma_addr_t nios2_dma_map_page(struct device *dev, struct page *page,
-			unsigned long offset, size_t size,
-			enum dma_data_direction direction,
-			unsigned long attrs)
-{
-	void *addr = page_address(page) + offset;
-
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		__dma_sync_for_device(addr, size, direction);
-
-	return page_to_phys(page) + offset;
-}
-
-static void nios2_dma_unmap_page(struct device *dev, dma_addr_t dma_address,
-		size_t size, enum dma_data_direction direction,
-		unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		__dma_sync_for_cpu(phys_to_virt(dma_address), size, direction);
-}
-
-static void nios2_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
-		int nhwentries, enum dma_data_direction direction,
-		unsigned long attrs)
-{
-	void *addr;
-	int i;
-
-	if (direction == DMA_TO_DEVICE)
-		return;
-
-	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
-		return;
-
-	for_each_sg(sg, sg, nhwentries, i) {
-		addr = sg_virt(sg);
-		if (addr)
-			__dma_sync_for_cpu(addr, sg->length, direction);
-	}
-}
-
-static void nios2_dma_sync_single_for_cpu(struct device *dev,
-		dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
-{
-	__dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction);
-}
-
-static void nios2_dma_sync_single_for_device(struct device *dev,
-		dma_addr_t dma_handle, size_t size,
-		enum dma_data_direction direction)
-{
-	__dma_sync_for_device(phys_to_virt(dma_handle), size, direction);
-}
-
-static void nios2_dma_sync_sg_for_cpu(struct device *dev,
-		struct scatterlist *sg, int nelems,
-		enum dma_data_direction direction)
-{
-	int i;
-
-	/* Make sure that gcc doesn't leave the empty loop body.  */
-	for_each_sg(sg, sg, nelems, i)
-		__dma_sync_for_cpu(sg_virt(sg), sg->length, direction);
-}
-
-static void nios2_dma_sync_sg_for_device(struct device *dev,
-		struct scatterlist *sg, int nelems,
-		enum dma_data_direction direction)
-{
-	int i;
-
-	/* Make sure that gcc doesn't leave the empty loop body.  */
-	for_each_sg(sg, sg, nelems, i)
-		__dma_sync_for_device(sg_virt(sg), sg->length, direction);
-
-}
-
-const struct dma_map_ops nios2_dma_ops = {
-	.alloc			= nios2_dma_alloc,
-	.free			= nios2_dma_free,
-	.map_page		= nios2_dma_map_page,
-	.unmap_page		= nios2_dma_unmap_page,
-	.map_sg			= nios2_dma_map_sg,
-	.unmap_sg		= nios2_dma_unmap_sg,
-	.sync_single_for_device	= nios2_dma_sync_single_for_device,
-	.sync_single_for_cpu	= nios2_dma_sync_single_for_cpu,
-	.sync_sg_for_cpu	= nios2_dma_sync_sg_for_cpu,
-	.sync_sg_for_device	= nios2_dma_sync_sg_for_device,
-};
-EXPORT_SYMBOL(nios2_dma_ops);
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -51,7 +51,6 @@ config SUPERH
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_NMI
-	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH

 	help
@@ -159,10 +158,13 @@ config SWAP_IO_SPACE
 	bool

 config DMA_COHERENT
+	select DMA_DIRECT_OPS
 	bool

 config DMA_NONCOHERENT
 	def_bool !DMA_COHERENT
+	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+	select DMA_NONCOHERENT_OPS

 config PGTABLE_LEVELS
 	default 3 if X2TLB

--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -2,6 +2,7 @@ generic-y += compat.h
 generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
+generic-y += dma-mapping.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += irq_regs.h

--- a/arch/sh/include/asm/cacheflush.h
+++ b/arch/sh/include/asm/cacheflush.h
@@ -101,5 +101,12 @@ void kunmap_coherent(void *kvaddr);

 void cpu_cache_init(void);

+static inline void *sh_cacheop_vaddr(void *vaddr)
+{
+	if (__in_29bit_mode())
+		vaddr = (void *)CAC_ADDR((unsigned long)vaddr);
+	return vaddr;
+}
+
 #endif /* __KERNEL__ */
 #endif /* __ASM_SH_CACHEFLUSH_H */
--- a/arch/sh/include/asm/dma-mapping.h
+++ b/arch/sh/include/asm/dma-mapping.h
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_SH_DMA_MAPPING_H
-#define __ASM_SH_DMA_MAPPING_H
-
-extern const struct dma_map_ops *dma_ops;
-extern void no_iommu_init(void);
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-	return dma_ops;
-}
-
-extern void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-					dma_addr_t *dma_addr, gfp_t flag,
-					unsigned long attrs);
-extern void dma_generic_free_coherent(struct device *dev, size_t size,
-				      void *vaddr, dma_addr_t dma_handle,
-				      unsigned long attrs);
-
-void sh_sync_dma_for_device(void *vaddr, size_t size,
-	    enum dma_data_direction dir);
-
-#endif /* __ASM_SH_DMA_MAPPING_H */
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -12,7 +12,7 @@ endif

 CFLAGS_REMOVE_return_address.o = -pg

-obj-y	:= debugtraps.o dma-nommu.o dumpstack.o 		\
+obj-y	:= debugtraps.o dumpstack.o 		\
 	   idle.o io.o irq.o irq_$(BITS).o kdebugfs.o			\
 	   machvec.o nmi_debug.o process.o				\
 	   process_$(BITS).o ptrace.o ptrace_$(BITS).o			\
@@ -45,7 +45,7 @@ obj-$(CONFIG_DUMP_CODE)		+= disassemble.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o
 obj-$(CONFIG_DWARF_UNWINDER)	+= dwarf.o
 obj-$(CONFIG_PERF_EVENTS)	+= perf_event.o perf_callchain.o
-
+obj-$(CONFIG_DMA_NONCOHERENT)	+= dma-coherent.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)		+= hw_breakpoint.o

 ccflags-y := -Werror
--- a/arch/sh/kernel/dma-coherent.c
+++ b/arch/sh/kernel/dma-coherent.c
+/*
+ * Copyright (C) 2004 - 2007  Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/addrspace.h>
+
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+		gfp_t gfp, unsigned long attrs)
+{
+	void *ret, *ret_nocache;
+	int order = get_order(size);
+
+	gfp |= __GFP_ZERO;
+
+	ret = (void *)__get_free_pages(gfp, order);
+	if (!ret)
+		return NULL;
+
+	/*
+	 * Pages from the page allocator may have data present in
+	 * cache. So flush the cache before using uncached memory.
+	 */
+	arch_sync_dma_for_device(dev, virt_to_phys(ret), size,
+			DMA_BIDIRECTIONAL);
+
+	ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size);
+	if (!ret_nocache) {
+		free_pages((unsigned long)ret, order);
+		return NULL;
+	}
+
+	split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
+
+	*dma_handle = virt_to_phys(ret);
+	if (!WARN_ON(!dev))
+		*dma_handle -= PFN_PHYS(dev->dma_pfn_offset);
+
+	return ret_nocache;
+}
+
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+		dma_addr_t dma_handle, unsigned long attrs)
+{
+	int order = get_order(size);
+	unsigned long pfn = (dma_handle >> PAGE_SHIFT);
+	int k;
+
+	if (!WARN_ON(!dev))
+		pfn += dev->dma_pfn_offset;
+
+	for (k = 0; k < (1 << order); k++)
+		__free_pages(pfn_to_page(pfn + k), 0);
+
+	iounmap(vaddr);
+}
+
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+		size_t size, enum dma_data_direction dir)
+{
+	void *addr = sh_cacheop_vaddr(phys_to_virt(paddr));
+
+	switch (dir) {
+	case DMA_FROM_DEVICE:		/* invalidate only */
+		__flush_invalidate_region(addr, size);
+		break;
+	case DMA_TO_DEVICE:		/* writeback only */
+		__flush_wback_region(addr, size);
+		break;
+	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
+		__flush_purge_region(addr, size);
+		break;
+	default:
+		BUG();
+	}
+}
--- a/arch/sh/kernel/dma-nommu.c
+++ b/arch/sh/kernel/dma-nommu.c
-/*
- * DMA mapping support for platforms lacking IOMMUs.
- *
- * Copyright (C) 2009  Paul Mundt
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/dma-mapping.h>
-#include <linux/io.h>
-#include <asm/cacheflush.h>
-
-static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
-				 unsigned long offset, size_t size,
-				 enum dma_data_direction dir,
-				 unsigned long attrs)
-{
-	dma_addr_t addr = page_to_phys(page) + offset
-		- PFN_PHYS(dev->dma_pfn_offset);
-
-	WARN_ON(size == 0);
-
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		sh_sync_dma_for_device(page_address(page) + offset, size, dir);
-
-	return addr;
-}
-
-static int nommu_map_sg(struct device *dev, struct scatterlist *sg,
-			int nents, enum dma_data_direction dir,
-			unsigned long attrs)
-{
-	struct scatterlist *s;
-	int i;
-
-	WARN_ON(nents == 0 || sg[0].length == 0);
-
-	for_each_sg(sg, s, nents, i) {
-		dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset);
-
-		BUG_ON(!sg_page(s));
-
-		if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-			sh_sync_dma_for_device(sg_virt(s), s->length, dir);
-
-		s->dma_address = sg_phys(s) - offset;
-		s->dma_length = s->length;
-	}
-
-	return nents;
-}
-
-#ifdef CONFIG_DMA_NONCOHERENT
-static void nommu_sync_single_for_device(struct device *dev, dma_addr_t addr,
-			      size_t size, enum dma_data_direction dir)
-{
-	sh_sync_dma_for_device(phys_to_virt(addr), size, dir);
-}
-
-static void nommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
-			  int nelems, enum dma_data_direction dir)
-{
-	struct scatterlist *s;
-	int i;
-
-	for_each_sg(sg, s, nelems, i)
-		sh_sync_dma_for_device(sg_virt(s), s->length, dir);
-}
-#endif
-
-const struct dma_map_ops nommu_dma_ops = {
-	.alloc			= dma_generic_alloc_coherent,
-	.free			= dma_generic_free_coherent,
-	.map_page		= nommu_map_page,
-	.map_sg			= nommu_map_sg,
-#ifdef CONFIG_DMA_NONCOHERENT
-	.sync_single_for_device	= nommu_sync_single_for_device,
-	.sync_sg_for_device	= nommu_sync_sg_for_device,
-#endif
-};
-
-void __init no_iommu_init(void)
-{
-	if (dma_ops)
-		return;
-	dma_ops = &nommu_dma_ops;
-}
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
 /*
- * arch/sh/mm/consistent.c
- *
 * Copyright (C) 2004 - 2007  Paul Mundt
 *
- * Declared coherent memory functions based on arch/x86/kernel/pci-dma_32.c
- *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
@@ -13,90 +9,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <linux/dma-debug.h>
 #include <linux/io.h>
-#include <linux/module.h>
-#include <linux/gfp.h>
-#include <asm/cacheflush.h>
-#include <asm/addrspace.h>
-
-const struct dma_map_ops *dma_ops;
-EXPORT_SYMBOL(dma_ops);
-
-void *dma_generic_alloc_coherent(struct device *dev, size_t size,
-				 dma_addr_t *dma_handle, gfp_t gfp,
-				 unsigned long attrs)
-{
-	void *ret, *ret_nocache;
-	int order = get_order(size);
-
-	gfp |= __GFP_ZERO;
-
-	ret = (void *)__get_free_pages(gfp, order);
-	if (!ret)
-		return NULL;
-
-	/*
-	 * Pages from the page allocator may have data present in
-	 * cache. So flush the cache before using uncached memory.
-	 */
-	sh_sync_dma_for_device(ret, size, DMA_BIDIRECTIONAL);
-
-	ret_nocache = (void __force *)ioremap_nocache(virt_to_phys(ret), size);
-	if (!ret_nocache) {
-		free_pages((unsigned long)ret, order);
-		return NULL;
-	}
-
-	split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order);
-
-	*dma_handle = virt_to_phys(ret);
-	if (!WARN_ON(!dev))
-		*dma_handle -= PFN_PHYS(dev->dma_pfn_offset);
-
-	return ret_nocache;
-}
-
-void dma_generic_free_coherent(struct device *dev, size_t size,
-			       void *vaddr, dma_addr_t dma_handle,
-			       unsigned long attrs)
-{
-	int order = get_order(size);
-	unsigned long pfn = dma_handle >> PAGE_SHIFT;
-	int k;
-
-	if (!WARN_ON(!dev))
-		pfn += dev->dma_pfn_offset;
-
-	for (k = 0; k < (1 << order); k++)
-		__free_pages(pfn_to_page(pfn + k), 0);
-
-	iounmap(vaddr);
-}
-
-void sh_sync_dma_for_device(void *vaddr, size_t size,
-		    enum dma_data_direction direction)
-{
-	void *addr;
-
-	addr = __in_29bit_mode() ?
-	       (void *)CAC_ADDR((unsigned long)vaddr) : vaddr;
-
-	switch (direction) {
-	case DMA_FROM_DEVICE:		/* invalidate only */
-		__flush_invalidate_region(addr, size);
-		break;
-	case DMA_TO_DEVICE:		/* writeback only */
-		__flush_wback_region(addr, size);
-		break;
-	case DMA_BIDIRECTIONAL:		/* writeback and invalidate */
-		__flush_purge_region(addr, size);
-		break;
-	default:
-		BUG();
-	}
-}
-EXPORT_SYMBOL(sh_sync_dma_for_device);

 static int __init memchunk_setup(char *str)
 {

--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -339,22 +339,12 @@ void __init paging_init(void)
 	free_area_init_nodes(max_zone_pfns);
 }

-/*
- * Early initialization for any I/O MMUs we might have.
- */
-static void __init iommu_init(void)
-{
-	no_iommu_init();
-}
-
 unsigned int mem_init_done = 0;

 void __init mem_init(void)
 {
 	pg_data_t *pgdat;

-	iommu_init();
-
 	high_memory = NULL;
 	for_each_online_pgdat(pgdat)
 		high_memory = max_t(void *, high_memory,

--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -175,7 +175,7 @@ rootfs_initcall(pci_iommu_init);

 static int via_no_dac_cb(struct pci_dev *pdev, void *data)
 {
-	pdev->dev.dma_32bit_limit = true;
+	pdev->dev.bus_dma_mask = DMA_BIT_MASK(32);
 	return 0;
 }


--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -947,6 +947,24 @@ static int nc_dma_get_range(struct device *dev, u64 *size)
 	return 0;
 }

+static int rc_dma_get_range(struct device *dev, u64 *size)
+{
+	struct acpi_iort_node *node;
+	struct acpi_iort_root_complex *rc;
+
+	node = iort_scan_node(ACPI_IORT_NODE_PCI_ROOT_COMPLEX,
+			      iort_match_node_callback, dev);
+	if (!node || node->revision < 1)
+		return -ENODEV;
+
+	rc = (struct acpi_iort_root_complex *)node->node_data;
+
+	*size = rc->memory_address_limit >= 64 ? U64_MAX :
+			1ULL<<rc->memory_address_limit;
+
+	return 0;
+}
+
 /**
 * iort_dma_setup() - Set-up device DMA parameters.
 *
@@ -960,25 +978,28 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
 	int ret, msb;

 	/*
-	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
-	 * setup the correct supported mask.
+	 * If @dev is expected to be DMA-capable then the bus code that created
+	 * it should have initialised its dma_mask pointer by this point. For
+	 * now, we'll continue the legacy behaviour of coercing it to the
+	 * coherent mask if not, but we'll no longer do so quietly.
 	 */
-	if (!dev->coherent_dma_mask)
-		dev->coherent_dma_mask = DMA_BIT_MASK(32);
-
-	/*
-	 * Set it to coherent_dma_mask by default if the architecture
-	 * code has not set it.
-	 */
-	if (!dev->dma_mask)
+	if (!dev->dma_mask) {
+		dev_warn(dev, "DMA mask not set\n");
 		dev->dma_mask = &dev->coherent_dma_mask;
+	}

-	size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
+	if (dev->coherent_dma_mask)
+		size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
+	else
+		size = 1ULL << 32;

-	if (dev_is_pci(dev))
+	if (dev_is_pci(dev)) {
 		ret = acpi_dma_get_range(dev, &dmaaddr, &offset, &size);
-	else
+		if (ret == -ENODEV)
+			ret = rc_dma_get_range(dev, &size);
+	} else {
 		ret = nc_dma_get_range(dev, &size);
+	}

 	if (!ret) {
 		msb = fls64(dmaaddr + size - 1);
@@ -993,6 +1014,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
 		 * Limit coherent and dma mask based on size
 		 * retrieved from firmware.
 		 */
+		dev->bus_dma_mask = mask;
 		dev->coherent_dma_mask = mask;
 		*dev->dma_mask = mask;
 	}

--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -367,6 +367,9 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
 	if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
 		iova_len = roundup_pow_of_two(iova_len);

+	if (dev->bus_dma_mask)
+		dma_limit &= dev->bus_dma_mask;
+
 	if (domain->geometry.force_aperture)
 		dma_limit = min(dma_limit, domain->geometry.aperture_end);


--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -127,20 +127,20 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
 	}

 	/*
-	 * Set default coherent_dma_mask to 32 bit.  Drivers are expected to
-	 * setup the correct supported mask.
+	 * If @dev is expected to be DMA-capable then the bus code that created
+	 * it should have initialised its dma_mask pointer by this point. For
+	 * now, we'll continue the legacy behaviour of coercing it to the
+	 * coherent mask if not, but we'll no longer do so quietly.
 	 */
-	if (!dev->coherent_dma_mask)
-		dev->coherent_dma_mask = DMA_BIT_MASK(32);
-	/*
-	 * Set it to coherent_dma_mask by default if the architecture
-	 * code has not set it.
-	 */
-	if (!dev->dma_mask)
+	if (!dev->dma_mask) {
+		dev_warn(dev, "DMA mask not set\n");
 		dev->dma_mask = &dev->coherent_dma_mask;
+	}

-	if (!size)
+	if (!size && dev->coherent_dma_mask)
 		size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1);
+	else if (!size)
+		size = 1ULL << 32;

 	dev->dma_pfn_offset = offset;

@@ -149,6 +149,7 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
 	 * set by the driver.
 	 */
 	mask = DMA_BIT_MASK(ilog2(dma_addr + size - 1) + 1);
+	dev->bus_dma_mask = mask;
 	dev->coherent_dma_mask &= mask;
 	*dev->dma_mask &= mask;


--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -185,6 +185,9 @@ static struct platform_device *of_platform_device_create_pdata(
 	if (!dev)
 		goto err_clear_flag;

+	dev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
+	if (!dev->dev.dma_mask)
+		dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
 	of_msi_configure(&dev->dev, dev->dev.of_node);

--- a/drivers/sh/maple/maple.c
+++ b/drivers/sh/maple/maple.c
@@ -300,8 +300,8 @@ static void maple_send(void)
 	mutex_unlock(&maple_wlist_lock);
 	if (maple_packets > 0) {
 		for (i = 0; i < (1 << MAPLE_DMA_PAGES); i++)
-			sh_sync_dma_for_device(maple_sendbuf + i * PAGE_SIZE,
-				       PAGE_SIZE, DMA_BIDIRECTIONAL);
+			__flush_purge_region(maple_sendbuf + i * PAGE_SIZE,
+					PAGE_SIZE);
 	}

 finish:
@@ -642,7 +642,8 @@ static void maple_dma_handler(struct work_struct *work)
 		list_for_each_entry_safe(mq, nmq, &maple_sentq, list) {
 			mdev = mq->dev;
 			recvbuf = mq->recvbuf->buf;
-			sh_sync_dma_for_device(recvbuf, 0x400, DMA_FROM_DEVICE);
+			__flush_invalidate_region(sh_cacheop_vaddr(recvbuf),
+					0x400);
 			code = recvbuf[0];
 			kfree(mq->sendbuf);
 			list_del_init(&mq->list);

--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -886,6 +886,8 @@ struct dev_links_info {
 * @coherent_dma_mask: Like dma_mask, but for alloc_coherent mapping as not all
 * 		hardware supports 64-bit addresses for consistent allocations
 * 		such descriptors.
+ * @bus_dma_mask: Mask of an upstream bridge or bus which imposes a smaller DMA
+ *		limit than the device itself supports.
 * @dma_pfn_offset: offset of DMA memory range relatively of RAM
 * @dma_parms:	A low level driver may set these to teach IOMMU code about
 * 		segment limitations.
@@ -912,8 +914,6 @@ struct dev_links_info {
 * @offline:	Set after successful invocation of bus type's .offline().
 * @of_node_reused: Set if the device-tree node is shared with an ancestor
 *              device.
- * @dma_32bit_limit: bridge limited to 32bit DMA even if the device itself
- *		indicates support for a higher limit in the dma_mask field.
 *
 * At the lowest level, every device in a Linux system is represented by an
 * instance of struct device. The device structure contains the information
@@ -967,6 +967,7 @@ struct device {
 					     not all hardware supports
 					     64 bit addresses for consistent
 					     allocations such descriptors. */
+	u64		bus_dma_mask;	/* upstream dma_mask constraint */
 	unsigned long	dma_pfn_offset;

 	struct device_dma_parameters *dma_parms;
@@ -1002,7 +1003,6 @@ struct device {
 	bool			offline_disabled:1;
 	bool			offline:1;
 	bool			of_node_reused:1;
-	bool			dma_32bit_limit:1;
 };

 static inline struct device *kobj_to_dev(struct kobject *kobj)

--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -538,10 +538,17 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 	const struct dma_map_ops *ops = get_dma_ops(dev);

 	BUG_ON(!ops);
-	WARN_ON(irqs_disabled());

 	if (dma_release_from_dev_coherent(dev, get_order(size), cpu_addr))
 		return;
+	/*
+	 * On non-coherent platforms which implement DMA-coherent buffers via
+	 * non-cacheable remaps, ops->free() may call vunmap(). Thus getting
+	 * this far in IRQ context is a) at risk of a BUG_ON() or trying to
+	 * sleep on some machines, and b) an indication that the driver is
+	 * probably misusing the coherent API anyway.
+	 */
+	WARN_ON(irqs_disabled());

 	if (!ops->free || !cpu_addr)
 		return;

--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -180,10 +180,10 @@ int dma_direct_supported(struct device *dev, u64 mask)
 		return 0;
 #endif
 	/*
-	 * Various PCI/PCIe bridges have broken support for > 32bit DMA even
-	 * if the device itself might support it.
+	 * Upstream PCI/PCIe bridges or SoC interconnects may not carry
+	 * as many DMA address bits as the device itself supports.
 	 */
-	if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
+	if (dev->bus_dma_mask && mask > dev->bus_dma_mask)
 		return 0;
 	return 1;
 }

--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -17,6 +17,8 @@
 * 08/12/11 beckyb	Add highmem support
 */

+#define pr_fmt(fmt) "software IO TLB: " fmt
+
 #include <linux/cache.h>
 #include <linux/dma-direct.h>
 #include <linux/mm.h>
@@ -162,20 +164,16 @@ static bool no_iotlb_memory;
 void swiotlb_print_info(void)
 {
 	unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-	unsigned char *vstart, *vend;

 	if (no_iotlb_memory) {
-		pr_warn("software IO TLB: No low mem\n");
+		pr_warn("No low mem\n");
 		return;
 	}

-	vstart = phys_to_virt(io_tlb_start);
-	vend = phys_to_virt(io_tlb_end);
-
-	printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
+	pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n",
 	       (unsigned long long)io_tlb_start,
 	       (unsigned long long)io_tlb_end,
-	       bytes >> 20, vstart, vend - 1);
+	       bytes >> 20);
 }

 /*
@@ -275,7 +273,7 @@ swiotlb_init(int verbose)
 	if (io_tlb_start)
 		memblock_free_early(io_tlb_start,
 				    PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-	pr_warn("Cannot allocate SWIOTLB buffer");
+	pr_warn("Cannot allocate buffer");
 	no_iotlb_memory = true;
 }

@@ -317,8 +315,8 @@ swiotlb_late_init_with_default_size(size_t default_size)
 		return -ENOMEM;
 	}
 	if (order != get_order(bytes)) {
-		printk(KERN_WARNING "Warning: only able to allocate %ld MB "
-		       "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
+		pr_warn("only able to allocate %ld MB\n",
+			(PAGE_SIZE << order) >> 20);
 		io_tlb_nslabs = SLABS_PER_PAGE << order;
 	}
 	rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);