diff --git a/Documentation/ABI/testing/sysfs-platform-hidma b/Documentation/ABI/testing/sysfs-platform-hidma
index d36441538660df72816841a3b966b878d9f0a838..fca40a54df5972a05957303cf8a3d4ef38d5b2a8 100644
--- a/Documentation/ABI/testing/sysfs-platform-hidma
+++ b/Documentation/ABI/testing/sysfs-platform-hidma
@@ -2,7 +2,7 @@ What:		/sys/devices/platform/hidma-*/chid
 		/sys/devices/platform/QCOM8061:*/chid
 Date:		Dec 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Contains the ID of the channel within the HIDMA instance.
 		It is used to associate a given HIDMA channel with the
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma-mgmt b/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
index c2fb5d033f0eb634276377693ce11846881d31c5..3b6c5c9eabdc6a72e544e56041d0c12689bd7b69 100644
--- a/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
+++ b/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
@@ -2,7 +2,7 @@ What:		/sys/devices/platform/hidma-mgmt*/chanops/chan*/priority
 		/sys/devices/platform/QCOM8060:*/chanops/chan*/priority
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Contains either 0 or 1 and indicates if the DMA channel is a
 		low priority (0) or high priority (1) channel.
@@ -11,7 +11,7 @@ What:		/sys/devices/platform/hidma-mgmt*/chanops/chan*/weight
 		/sys/devices/platform/QCOM8060:*/chanops/chan*/weight
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Contains 0..15 and indicates the weight of the channel among
 		equal priority channels during round robin scheduling.
@@ -20,7 +20,7 @@ What:		/sys/devices/platform/hidma-mgmt*/chreset_timeout_cycles
 		/sys/devices/platform/QCOM8060:*/chreset_timeout_cycles
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Contains the platform specific cycle value to wait after a
 		reset command is issued. If the value is chosen too short,
@@ -32,7 +32,7 @@ What:		/sys/devices/platform/hidma-mgmt*/dma_channels
 		/sys/devices/platform/QCOM8060:*/dma_channels
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Contains the number of dma channels supported by one instance
 		of HIDMA hardware. The value may change from chip to chip.
@@ -41,7 +41,7 @@ What:		/sys/devices/platform/hidma-mgmt*/hw_version_major
 		/sys/devices/platform/QCOM8060:*/hw_version_major
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Version number major for the hardware.
 
@@ -49,7 +49,7 @@ What:		/sys/devices/platform/hidma-mgmt*/hw_version_minor
 		/sys/devices/platform/QCOM8060:*/hw_version_minor
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Version number minor for the hardware.
 
@@ -57,7 +57,7 @@ What:		/sys/devices/platform/hidma-mgmt*/max_rd_xactions
 		/sys/devices/platform/QCOM8060:*/max_rd_xactions
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Contains a value between 0 and 31. Maximum number of
 		read transactions that can be issued back to back.
@@ -69,7 +69,7 @@ What:		/sys/devices/platform/hidma-mgmt*/max_read_request
 		/sys/devices/platform/QCOM8060:*/max_read_request
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Size of each read request. The value needs to be a power
 		of two and can be between 128 and 1024.
@@ -78,7 +78,7 @@ What:		/sys/devices/platform/hidma-mgmt*/max_wr_xactions
 		/sys/devices/platform/QCOM8060:*/max_wr_xactions
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Contains a value between 0 and 31. Maximum number of
 		write transactions that can be issued back to back.
@@ -91,7 +91,7 @@ What:		/sys/devices/platform/hidma-mgmt*/max_write_request
 		/sys/devices/platform/QCOM8060:*/max_write_request
 Date:		Nov 2015
 KernelVersion:	4.4
-Contact:	"Sinan Kaya <okaya@cudeaurora.org>"
+Contact:	"Sinan Kaya <okaya@codeaurora.org>"
 Description:
 		Size of each write request. The value needs to be a power
 		of two and can be between 128 and 1024.
diff --git a/Documentation/devicetree/bindings/dma/stm32-dma.txt b/Documentation/devicetree/bindings/dma/stm32-dma.txt
index 70cd13f1588abea6f266afe65e3ad4b4f66683ca..4408af693d0cc75298e66c3c56af8f7521d3eb23 100644
--- a/Documentation/devicetree/bindings/dma/stm32-dma.txt
+++ b/Documentation/devicetree/bindings/dma/stm32-dma.txt
@@ -40,8 +40,7 @@ Example:
 
 DMA clients connected to the STM32 DMA controller must use the format
 described in the dma.txt file, using a five-cell specifier for each
-channel: a phandle plus four integer cells.
-The four cells in order are:
+channel: a phandle to the DMA controller plus the following four integer cells:
 
 1. The channel id
 2. The request line number
@@ -61,7 +60,7 @@ The four cells in order are:
 	0x1: medium
 	0x2: high
 	0x3: very high
-5. A 32bit mask specifying the DMA FIFO threshold configuration which are device
+4. A 32bit mask specifying the DMA FIFO threshold configuration which are device
    dependent:
  -bit 0-1: Fifo threshold
 	0x0: 1/4 full FIFO
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 263495d0adbdfd0a5e079c3ba5c6f71247ee7fde..d01d59812cf3ec8f171e09641ae2c1c787e0de06 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -157,7 +157,7 @@ config DMA_SUN4I
 
 config DMA_SUN6I
 	tristate "Allwinner A31 SoCs DMA support"
-	depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST
+	depends on MACH_SUN6I || MACH_SUN8I || (ARM64 && ARCH_SUNXI) || COMPILE_TEST
 	depends on RESET_CONTROLLER
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
@@ -458,7 +458,7 @@ config STM32_DMA
 	help
 	  Enable support for the on-chip DMA controller on STMicroelectronics
 	  STM32 MCUs.
-	  If you have a board based on such a MCU and wish to use DMA say Y or M
+	  If you have a board based on such a MCU and wish to use DMA say Y
 	  here.
 
 config S3C24XX_DMAC
@@ -571,12 +571,12 @@ config XILINX_ZYNQMP_DMA
 	  Enable support for Xilinx ZynqMP DMA controller.
 
 config ZX_DMA
-	tristate "ZTE ZX296702 DMA support"
+	tristate "ZTE ZX DMA support"
 	depends on ARCH_ZX || COMPILE_TEST
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-	  Support the DMA engine for ZTE ZX296702 platform devices.
+	  Support the DMA engine for ZTE ZX family platform devices.
 
 
 # driver files
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index a4fa3360e609d1ef81bf89dc7086b76e6ae52048..0b723e94d9e6d3751d1ceea4509b173cbd84b458 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -66,7 +66,7 @@ obj-$(CONFIG_TI_CPPI41) += cppi41.o
 obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o
 obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
-obj-$(CONFIG_ZX_DMA) += zx296702_dma.o
+obj-$(CONFIG_ZX_DMA) += zx_dma.o
 obj-$(CONFIG_ST_FDMA) += st_fdma.o
 
 obj-y += qcom/
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 6b535262ac5d76972bb525b49f3cf160fcf54aa0..24e0221fd66d1ff58eead62ee9f4a865eb87da03 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -65,7 +65,7 @@
 #include <linux/mempool.h>
 
 static DEFINE_MUTEX(dma_list_mutex);
-static DEFINE_IDR(dma_idr);
+static DEFINE_IDA(dma_ida);
 static LIST_HEAD(dma_device_list);
 static long dmaengine_ref_count;
 
@@ -162,7 +162,7 @@ static void chan_dev_release(struct device *dev)
 	chan_dev = container_of(dev, typeof(*chan_dev), device);
 	if (atomic_dec_and_test(chan_dev->idr_ref)) {
 		mutex_lock(&dma_list_mutex);
-		idr_remove(&dma_idr, chan_dev->dev_id);
+		ida_remove(&dma_ida, chan_dev->dev_id);
 		mutex_unlock(&dma_list_mutex);
 		kfree(chan_dev->idr_ref);
 	}
@@ -898,14 +898,15 @@ static int get_dma_id(struct dma_device *device)
 {
 	int rc;
 
-	mutex_lock(&dma_list_mutex);
-
-	rc = idr_alloc(&dma_idr, NULL, 0, 0, GFP_KERNEL);
-	if (rc >= 0)
-		device->dev_id = rc;
+	do {
+		if (!ida_pre_get(&dma_ida, GFP_KERNEL))
+			return -ENOMEM;
+		mutex_lock(&dma_list_mutex);
+		rc = ida_get_new(&dma_ida, &device->dev_id);
+		mutex_unlock(&dma_list_mutex);
+	} while (rc == -EAGAIN);
 
-	mutex_unlock(&dma_list_mutex);
-	return rc < 0 ? rc : 0;
+	return rc;
 }
 
 /**
@@ -1035,7 +1036,7 @@ int dma_async_device_register(struct dma_device *device)
 	/* if we never registered a channel just release the idr */
 	if (atomic_read(idr_ref) == 0) {
 		mutex_lock(&dma_list_mutex);
-		idr_remove(&dma_idr, device->dev_id);
+		ida_remove(&dma_ida, device->dev_id);
 		mutex_unlock(&dma_list_mutex);
 		kfree(idr_ref);
 		return rc;
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index e5adf5d1c34fcf53dfeab355ee647a97c2765661..e500950dad822ab84c52a6412649f740c321405b 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -138,16 +138,32 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	dwc->descs_allocated--;
 }
 
-static void dwc_initialize(struct dw_dma_chan *dwc)
+static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc)
+{
+	u32 cfghi = 0;
+	u32 cfglo = 0;
+
+	/* Set default burst alignment */
+	cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
+
+	/* Low 4 bits of the request lines */
+	cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf);
+	cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf);
+
+	/* Request line extension (2 bits) */
+	cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3);
+	cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3);
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc)
 {
-	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	u32 cfghi = DWC_CFGH_FIFO_MODE;
 	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
 	bool hs_polarity = dwc->dws.hs_polarity;
 
-	if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
-		return;
-
 	cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
 	cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
 
@@ -156,6 +172,19 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
 	channel_writel(dwc, CFG_LO, cfglo);
 	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void dwc_initialize(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+	if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
+		return;
+
+	if (dw->pdata->is_idma32)
+		dwc_initialize_chan_idma32(dwc);
+	else
+		dwc_initialize_chan_dw(dwc);
 
 	/* Enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dwc->mask);
@@ -184,6 +213,37 @@ static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		cpu_relax();
 }
 
+static u32 bytes2block(struct dw_dma_chan *dwc, size_t bytes,
+			  unsigned int width, size_t *len)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	u32 block;
+
+	/* Always in bytes for iDMA 32-bit */
+	if (dw->pdata->is_idma32)
+		width = 0;
+
+	if ((bytes >> width) > dwc->block_size) {
+		block = dwc->block_size;
+		*len = block << width;
+	} else {
+		block = bytes >> width;
+		*len = bytes;
+	}
+
+	return block;
+}
+
+static size_t block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+	if (dw->pdata->is_idma32)
+		return IDMA32C_CTLH_BLOCK_TS(block);
+
+	return DWC_CTLH_BLOCK_TS(block) << width;
+}
+
 /*----------------------------------------------------------------------*/
 
 /* Perform single block transfer */
@@ -332,7 +392,7 @@ static inline u32 dwc_get_sent(struct dw_dma_chan *dwc)
 	u32 ctlhi = channel_readl(dwc, CTL_HI);
 	u32 ctllo = channel_readl(dwc, CTL_LO);
 
-	return (ctlhi & DWC_CTLH_BLOCK_TS_MASK) * (1 << (ctllo >> 4 & 7));
+	return block2bytes(dwc, ctlhi, ctllo >> 4 & 7);
 }
 
 static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
@@ -692,10 +752,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 			| DWC_CTLL_FC_M2M;
 	prev = first = NULL;
 
-	for (offset = 0; offset < len; offset += xfer_count << src_width) {
-		xfer_count = min_t(size_t, (len - offset) >> src_width,
-					   dwc->block_size);
-
+	for (offset = 0; offset < len; offset += xfer_count) {
 		desc = dwc_desc_get(dwc);
 		if (!desc)
 			goto err_desc_get;
@@ -703,8 +760,8 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		lli_write(desc, sar, src + offset);
 		lli_write(desc, dar, dest + offset);
 		lli_write(desc, ctllo, ctllo);
-		lli_write(desc, ctlhi, xfer_count);
-		desc->len = xfer_count << src_width;
+		lli_write(desc, ctlhi, bytes2block(dwc, len - offset, src_width, &xfer_count));
+		desc->len = xfer_count;
 
 		if (!first) {
 			first = desc;
@@ -775,7 +832,8 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
-			u32		len, dlen, mem;
+			u32		len, mem;
+			size_t		dlen;
 
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
@@ -789,17 +847,8 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 			lli_write(desc, sar, mem);
 			lli_write(desc, dar, reg);
+			lli_write(desc, ctlhi, bytes2block(dwc, len, mem_width, &dlen));
 			lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
-			if ((len >> mem_width) > dwc->block_size) {
-				dlen = dwc->block_size << mem_width;
-				mem += dlen;
-				len -= dlen;
-			} else {
-				dlen = len;
-				len = 0;
-			}
-
-			lli_write(desc, ctlhi, dlen >> mem_width);
 			desc->len = dlen;
 
 			if (!first) {
@@ -809,6 +858,9 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 				list_add_tail(&desc->desc_node, &first->tx_list);
 			}
 			prev = desc;
+
+			mem += dlen;
+			len -= dlen;
 			total_len += dlen;
 
 			if (len)
@@ -828,13 +880,12 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct dw_desc	*desc;
-			u32		len, dlen, mem;
+			u32		len, mem;
+			size_t		dlen;
 
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
 
-			mem_width = __ffs(data_width | mem | len);
-
 slave_sg_fromdev_fill_desc:
 			desc = dwc_desc_get(dwc);
 			if (!desc)
@@ -842,16 +893,9 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 			lli_write(desc, sar, reg);
 			lli_write(desc, dar, mem);
+			lli_write(desc, ctlhi, bytes2block(dwc, len, reg_width, &dlen));
+			mem_width = __ffs(data_width | mem | dlen);
 			lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
-			if ((len >> reg_width) > dwc->block_size) {
-				dlen = dwc->block_size << reg_width;
-				mem += dlen;
-				len -= dlen;
-			} else {
-				dlen = len;
-				len = 0;
-			}
-			lli_write(desc, ctlhi, dlen >> reg_width);
 			desc->len = dlen;
 
 			if (!first) {
@@ -861,6 +905,9 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 				list_add_tail(&desc->desc_node, &first->tx_list);
 			}
 			prev = desc;
+
+			mem += dlen;
+			len -= dlen;
 			total_len += dlen;
 
 			if (len)
@@ -903,25 +950,20 @@ bool dw_dma_filter(struct dma_chan *chan, void *param)
 }
 EXPORT_SYMBOL_GPL(dw_dma_filter);
 
-/*
- * Fix sconfig's burst size according to dw_dmac. We need to convert them as:
- * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
- *
- * NOTE: burst size 2 is not supported by controller.
- *
- * This can be done by finding least significant bit set: n & (n - 1)
- */
-static inline void convert_burst(u32 *maxburst)
-{
-	if (*maxburst > 1)
-		*maxburst = fls(*maxburst) - 2;
-	else
-		*maxburst = 0;
-}
-
 static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 {
 	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+	struct dma_slave_config *sc = &dwc->dma_sconfig;
+	struct dw_dma *dw = to_dw_dma(chan->device);
+	/*
+	 * Fix sconfig's burst size according to dw_dmac. We need to convert
+	 * them as:
+	 * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+	 *
+	 * NOTE: burst size 2 is not supported by DesignWare controller.
+	 *       iDMA 32-bit supports it.
+	 */
+	u32 s = dw->pdata->is_idma32 ? 1 : 2;
 
 	/* Check if chan will be configured for slave transfers */
 	if (!is_slave_direction(sconfig->direction))
@@ -930,28 +972,39 @@ static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
 	dwc->direction = sconfig->direction;
 
-	convert_burst(&dwc->dma_sconfig.src_maxburst);
-	convert_burst(&dwc->dma_sconfig.dst_maxburst);
+	sc->src_maxburst = sc->src_maxburst > 1 ? fls(sc->src_maxburst) - s : 0;
+	sc->dst_maxburst = sc->dst_maxburst > 1 ? fls(sc->dst_maxburst) - s : 0;
 
 	return 0;
 }
 
-static int dwc_pause(struct dma_chan *chan)
+static void dwc_chan_pause(struct dw_dma_chan *dwc, bool drain)
 {
-	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
-	unsigned long		flags;
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	unsigned int		count = 20;	/* timeout iterations */
 	u32			cfglo;
 
-	spin_lock_irqsave(&dwc->lock, flags);
-
 	cfglo = channel_readl(dwc, CFG_LO);
+	if (dw->pdata->is_idma32) {
+		if (drain)
+			cfglo |= IDMA32C_CFGL_CH_DRAIN;
+		else
+			cfglo &= ~IDMA32C_CFGL_CH_DRAIN;
+	}
 	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
 	while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
 		udelay(2);
 
 	set_bit(DW_DMA_IS_PAUSED, &dwc->flags);
+}
 
+static int dwc_pause(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	unsigned long		flags;
+
+	spin_lock_irqsave(&dwc->lock, flags);
+	dwc_chan_pause(dwc, false);
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	return 0;
@@ -993,6 +1046,8 @@ static int dwc_terminate_all(struct dma_chan *chan)
 
 	clear_bit(DW_DMA_IS_SOFT_LLP, &dwc->flags);
 
+	dwc_chan_pause(dwc, true);
+
 	dwc_chan_disable(dw, dwc);
 
 	dwc_chan_resume(dwc);
@@ -1085,6 +1140,32 @@ static void dwc_issue_pending(struct dma_chan *chan)
 
 /*----------------------------------------------------------------------*/
 
+/*
+ * Program FIFO size of channels.
+ *
+ * By default full FIFO (1024 bytes) is assigned to channel 0. Here we
+ * slice FIFO on equal parts between channels.
+ */
+static void idma32_fifo_partition(struct dw_dma *dw)
+{
+	u64 value = IDMA32C_FP_PSIZE_CH0(128) | IDMA32C_FP_PSIZE_CH1(128) |
+		    IDMA32C_FP_UPDATE;
+	u64 fifo_partition = 0;
+
+	if (!dw->pdata->is_idma32)
+		return;
+
+	/* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */
+	fifo_partition |= value << 0;
+
+	/* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */
+	fifo_partition |= value << 32;
+
+	/* Program FIFO Partition registers - 128 bytes for each channel */
+	idma32_writeq(dw, FIFO_PARTITION1, fifo_partition);
+	idma32_writeq(dw, FIFO_PARTITION0, fifo_partition);
+}
+
 static void dw_dma_off(struct dw_dma *dw)
 {
 	unsigned int i;
@@ -1504,8 +1585,16 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 	/* Force dma off, just in case */
 	dw_dma_off(dw);
 
+	idma32_fifo_partition(dw);
+
+	/* Device and instance ID for IRQ and DMA pool */
+	if (pdata->is_idma32)
+		snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", chip->id);
+	else
+		snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", chip->id);
+
 	/* Create a pool of consistent memory blocks for hardware descriptors */
-	dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", chip->dev,
+	dw->desc_pool = dmam_pool_create(dw->name, chip->dev,
 					 sizeof(struct dw_desc), 4, 0);
 	if (!dw->desc_pool) {
 		dev_err(chip->dev, "No memory for descriptors dma pool\n");
@@ -1516,7 +1605,7 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 	tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
 
 	err = request_irq(chip->irq, dw_dma_interrupt, IRQF_SHARED,
-			  "dw_dmac", dw);
+			  dw->name, dw);
 	if (err)
 		goto err_pdata;
 
@@ -1665,6 +1754,8 @@ int dw_dma_enable(struct dw_dma_chip *chip)
 {
 	struct dw_dma *dw = chip->dw;
 
+	idma32_fifo_partition(dw);
+
 	dw_dma_on(dw);
 	return 0;
 }
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 0ae6c3b1d34e66b16359eb50efe0041498f18f9d..7778ed705a1adf2c91956460603329318569307f 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -15,6 +15,18 @@
 
 #include "internal.h"
 
+static struct dw_dma_platform_data mrfld_pdata = {
+	.nr_channels = 8,
+	.is_private = true,
+	.is_memcpy = true,
+	.is_idma32 = true,
+	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+	.chan_priority = CHAN_PRIORITY_ASCENDING,
+	.block_size = 131071,
+	.nr_masters = 1,
+	.data_width = {4},
+};
+
 static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 {
 	const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
@@ -47,6 +59,7 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 		return -ENOMEM;
 
 	chip->dev = &pdev->dev;
+	chip->id = pdev->devfn;
 	chip->regs = pcim_iomap_table(pdev)[0];
 	chip->irq = pdev->irq;
 	chip->pdata = pdata;
@@ -95,14 +108,16 @@ static const struct dev_pm_ops dw_pci_dev_pm_ops = {
 };
 
 static const struct pci_device_id dw_pci_id_table[] = {
-	/* Medfield */
+	/* Medfield (GPDMA) */
 	{ PCI_VDEVICE(INTEL, 0x0827) },
-	{ PCI_VDEVICE(INTEL, 0x0830) },
 
 	/* BayTrail */
 	{ PCI_VDEVICE(INTEL, 0x0f06) },
 	{ PCI_VDEVICE(INTEL, 0x0f40) },
 
+	/* Merrifield iDMA 32-bit (GPDMA) */
+	{ PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&mrfld_pdata },
+
 	/* Braswell */
 	{ PCI_VDEVICE(INTEL, 0x2286) },
 	{ PCI_VDEVICE(INTEL, 0x22c0) },
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index b1655e40cfa24f7313e11c49c7394951cda68451..c639c60b825abfcc5c1743096f855ccc413fe834 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -202,6 +202,7 @@ static int dw_probe(struct platform_device *pdev)
 		pdata = dw_dma_parse_dt(pdev);
 
 	chip->dev = dev;
+	chip->id = pdev->id;
 	chip->pdata = pdata;
 
 	chip->clk = devm_clk_get(chip->dev, "hclk");
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 4e0128c627047741487123e8d51e38c010b6f914..32a328721c8872d2f94da638dbf429dd40fbfde5 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -3,15 +3,19 @@
  *
  * Copyright (C) 2005-2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2016 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 
+#include <linux/bitops.h>
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
 
+#include <linux/io-64-nonatomic-hi-lo.h>
+
 #include "internal.h"
 
 #define DW_DMA_MAX_NR_REQUESTS	16
@@ -85,9 +89,9 @@ struct dw_dma_regs {
 	DW_REG(ID);
 	DW_REG(TEST);
 
-	/* reserved */
-	DW_REG(__reserved0);
-	DW_REG(__reserved1);
+	/* iDMA 32-bit support */
+	DW_REG(CLASS_PRIORITY0);
+	DW_REG(CLASS_PRIORITY1);
 
 	/* optional encoded params, 0x3c8..0x3f7 */
 	u32	__reserved;
@@ -99,6 +103,17 @@ struct dw_dma_regs {
 
 	/* top-level parameters */
 	u32	DW_PARAMS;
+
+	/* component ID */
+	u32	COMP_TYPE;
+	u32	COMP_VERSION;
+
+	/* iDMA 32-bit support */
+	DW_REG(FIFO_PARTITION0);
+	DW_REG(FIFO_PARTITION1);
+
+	DW_REG(SAI_ERR);
+	DW_REG(GLOBAL_CFG);
 };
 
 /*
@@ -170,8 +185,9 @@ enum dw_dma_msize {
 #define DWC_CTLL_LLP_S_EN	(1 << 28)	/* src block chain */
 
 /* Bitfields in CTL_HI */
-#define DWC_CTLH_DONE		0x00001000
-#define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
+#define DWC_CTLH_BLOCK_TS_MASK	GENMASK(11, 0)
+#define DWC_CTLH_BLOCK_TS(x)	((x) & DWC_CTLH_BLOCK_TS_MASK)
+#define DWC_CTLH_DONE		(1 << 12)
 
 /* Bitfields in CFG_LO */
 #define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
@@ -214,6 +230,33 @@ enum dw_dma_msize {
 /* Bitfields in CFG */
 #define DW_CFG_DMA_EN		(1 << 0)
 
+/* iDMA 32-bit support */
+
+/* Bitfields in CTL_HI */
+#define IDMA32C_CTLH_BLOCK_TS_MASK	GENMASK(16, 0)
+#define IDMA32C_CTLH_BLOCK_TS(x)	((x) & IDMA32C_CTLH_BLOCK_TS_MASK)
+#define IDMA32C_CTLH_DONE		(1 << 17)
+
+/* Bitfields in CFG_LO */
+#define IDMA32C_CFGL_DST_BURST_ALIGN	(1 << 0)	/* dst burst align */
+#define IDMA32C_CFGL_SRC_BURST_ALIGN	(1 << 1)	/* src burst align */
+#define IDMA32C_CFGL_CH_DRAIN		(1 << 10)	/* drain FIFO */
+#define IDMA32C_CFGL_DST_OPT_BL		(1 << 20)	/* optimize dst burst length */
+#define IDMA32C_CFGL_SRC_OPT_BL		(1 << 21)	/* optimize src burst length */
+
+/* Bitfields in CFG_HI */
+#define IDMA32C_CFGH_SRC_PER(x)		((x) << 0)
+#define IDMA32C_CFGH_DST_PER(x)		((x) << 4)
+#define IDMA32C_CFGH_RD_ISSUE_THD(x)	((x) << 8)
+#define IDMA32C_CFGH_RW_ISSUE_THD(x)	((x) << 18)
+#define IDMA32C_CFGH_SRC_PER_EXT(x)	((x) << 28)	/* src peripheral extension */
+#define IDMA32C_CFGH_DST_PER_EXT(x)	((x) << 30)	/* dst peripheral extension */
+
+/* Bitfields in FIFO_PARTITION */
+#define IDMA32C_FP_PSIZE_CH0(x)		((x) << 0)
+#define IDMA32C_FP_PSIZE_CH1(x)		((x) << 13)
+#define IDMA32C_FP_UPDATE		(1 << 26)
+
 enum dw_dmac_flags {
 	DW_DMA_IS_CYCLIC = 0,
 	DW_DMA_IS_SOFT_LLP = 1,
@@ -270,6 +313,7 @@ static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
 
 struct dw_dma {
 	struct dma_device	dma;
+	char			name[20];
 	void __iomem		*regs;
 	struct dma_pool		*desc_pool;
 	struct tasklet_struct	tasklet;
@@ -293,6 +337,11 @@ static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
 #define dma_writel(dw, name, val) \
 	dma_writel_native((val), &(__dw_regs(dw)->name))
 
+#define idma32_readq(dw, name)				\
+	hi_lo_readq(&(__dw_regs(dw)->name))
+#define idma32_writeq(dw, name, val)			\
+	hi_lo_writeq((val), &(__dw_regs(dw)->name))
+
 #define channel_set_bit(dw, reg, mask) \
 	dma_writel(dw, reg, ((mask) << 8) | (mask))
 #define channel_clear_bit(dw, reg, mask) \
diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c
index dd184b50e5b40a508c1bdacee874b6464d2b1055..284627806b887cc2fa58616639be17a244c8a058 100644
--- a/drivers/dma/ipu/ipu_irq.c
+++ b/drivers/dma/ipu/ipu_irq.c
@@ -272,7 +272,7 @@ static void ipu_irq_handler(struct irq_desc *desc)
 	u32 status;
 	int i, line;
 
-	for (i = IPU_IRQ_NR_FN_BANKS; i < IPU_IRQ_NR_BANKS; i++) {
+	for (i = 0; i < IPU_IRQ_NR_BANKS; i++) {
 		struct ipu_irq_bank *bank = irq_bank + i;
 
 		raw_spin_lock(&bank_lock);
diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 4c357d47546594c6bd0c9b1ca16e27c658a720cf..48b22d5c860260988f052c331f0c3050854b288c 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1724,6 +1724,7 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 
 	dmac->dev = &pdev->dev;
 	platform_set_drvdata(pdev, dmac);
+	dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
 
 	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
 	if (ret < 0)
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 8684d11b29bba16650ff1bf154dc3ce0a16d1984..a6620b671d1d9b4b5b55de388743e1f7091db2c4 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -2809,12 +2809,14 @@ static void __init d40_chan_init(struct d40_base *base, struct dma_device *dma,
 
 static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
 {
-	if (dma_has_cap(DMA_SLAVE, dev->cap_mask))
+	if (dma_has_cap(DMA_SLAVE, dev->cap_mask)) {
 		dev->device_prep_slave_sg = d40_prep_slave_sg;
+		dev->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	}
 
 	if (dma_has_cap(DMA_MEMCPY, dev->cap_mask)) {
 		dev->device_prep_dma_memcpy = d40_prep_memcpy;
-
+		dev->directions = BIT(DMA_MEM_TO_MEM);
 		/*
 		 * This controller can only access address at even
 		 * 32bit boundaries, i.e. 2^2
@@ -2836,6 +2838,7 @@ static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
 	dev->device_pause = d40_pause;
 	dev->device_resume = d40_resume;
 	dev->device_terminate_all = d40_terminate_all;
+	dev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	dev->dev = base->dev;
 }
 
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 3056ce7f8c69d01c61fe3ab0eeff6ad299f538f7..49f86cabcfec1e04b6d63f4dfb77d6f4a74654a4 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -114,6 +114,7 @@
 #define STM32_DMA_MAX_CHANNELS		0x08
 #define STM32_DMA_MAX_REQUEST_ID	0x08
 #define STM32_DMA_MAX_DATA_PARAM	0x03
+#define STM32_DMA_MAX_BURST		16
 
 enum stm32_dma_width {
 	STM32_DMA_BYTE,
@@ -403,6 +404,13 @@ static int stm32_dma_terminate_all(struct dma_chan *c)
 	return 0;
 }
 
+static void stm32_dma_synchronize(struct dma_chan *c)
+{
+	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
+
+	vchan_synchronize(&chan->vchan);
+}
+
 static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
 {
 	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
@@ -421,7 +429,7 @@ static void stm32_dma_dump_reg(struct stm32_dma_chan *chan)
 	dev_dbg(chan2dev(chan), "SFCR:  0x%08x\n", sfcr);
 }
 
-static int stm32_dma_start_transfer(struct stm32_dma_chan *chan)
+static void stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 {
 	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
 	struct virt_dma_desc *vdesc;
@@ -432,12 +440,12 @@ static int stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 
 	ret = stm32_dma_disable_chan(chan);
 	if (ret < 0)
-		return ret;
+		return;
 
 	if (!chan->desc) {
 		vdesc = vchan_next_desc(&chan->vchan);
 		if (!vdesc)
-			return -EPERM;
+			return;
 
 		chan->desc = to_stm32_dma_desc(vdesc);
 		chan->next_sg = 0;
@@ -471,7 +479,7 @@ static int stm32_dma_start_transfer(struct stm32_dma_chan *chan)
 
 	chan->busy = true;
 
-	return 0;
+	dev_dbg(chan2dev(chan), "vchan %p: started\n", &chan->vchan);
 }
 
 static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan)
@@ -500,8 +508,6 @@ static void stm32_dma_configure_next_sg(struct stm32_dma_chan *chan)
 			dev_dbg(chan2dev(chan), "CT=0 <=> SM1AR: 0x%08x\n",
 				stm32_dma_read(dmadev, STM32_DMA_SM1AR(id)));
 		}
-
-		chan->next_sg++;
 	}
 }
 
@@ -510,6 +516,7 @@ static void stm32_dma_handle_chan_done(struct stm32_dma_chan *chan)
 	if (chan->desc) {
 		if (chan->desc->cyclic) {
 			vchan_cyclic_callback(&chan->desc->vdesc);
+			chan->next_sg++;
 			stm32_dma_configure_next_sg(chan);
 		} else {
 			chan->busy = false;
@@ -552,15 +559,13 @@ static void stm32_dma_issue_pending(struct dma_chan *c)
 {
 	struct stm32_dma_chan *chan = to_stm32_dma_chan(c);
 	unsigned long flags;
-	int ret;
 
 	spin_lock_irqsave(&chan->vchan.lock, flags);
-	if (!chan->busy) {
-		if (vchan_issue_pending(&chan->vchan) && !chan->desc) {
-			ret = stm32_dma_start_transfer(chan);
-			if ((!ret) && (chan->desc->cyclic))
-				stm32_dma_configure_next_sg(chan);
-		}
+	if (vchan_issue_pending(&chan->vchan) && !chan->desc && !chan->busy) {
+		dev_dbg(chan2dev(chan), "vchan %p: issued\n", &chan->vchan);
+		stm32_dma_start_transfer(chan);
+		if (chan->desc->cyclic)
+			stm32_dma_configure_next_sg(chan);
 	}
 	spin_unlock_irqrestore(&chan->vchan.lock, flags);
 }
@@ -848,26 +853,40 @@ static struct dma_async_tx_descriptor *stm32_dma_prep_dma_memcpy(
 	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
 }
 
+static u32 stm32_dma_get_remaining_bytes(struct stm32_dma_chan *chan)
+{
+	u32 dma_scr, width, ndtr;
+	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
+
+	dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	width = STM32_DMA_SCR_PSIZE_GET(dma_scr);
+	ndtr = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
+
+	return ndtr << width;
+}
+
 static size_t stm32_dma_desc_residue(struct stm32_dma_chan *chan,
 				     struct stm32_dma_desc *desc,
 				     u32 next_sg)
 {
-	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
-	u32 dma_scr, width, residue, count;
+	u32 residue = 0;
 	int i;
 
-	residue = 0;
+	/*
+	 * In cyclic mode, for the last period, residue = remaining bytes from
+	 * NDTR
+	 */
+	if (chan->desc->cyclic && next_sg == 0)
+		return stm32_dma_get_remaining_bytes(chan);
 
+	/*
+	 * For all other periods in cyclic mode, and in sg mode,
+	 * residue = remaining bytes from NDTR + remaining periods/sg to be
+	 * transferred
+	 */
 	for (i = next_sg; i < desc->num_sgs; i++)
 		residue += desc->sg_req[i].len;
-
-	if (next_sg != 0) {
-		dma_scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
-		width = STM32_DMA_SCR_PSIZE_GET(dma_scr);
-		count = stm32_dma_read(dmadev, STM32_DMA_SNDTR(chan->id));
-
-		residue += count << width;
-	}
+	residue += stm32_dma_get_remaining_bytes(chan);
 
 	return residue;
 }
@@ -964,27 +983,36 @@ static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
 					   struct of_dma *ofdma)
 {
 	struct stm32_dma_device *dmadev = ofdma->of_dma_data;
+	struct device *dev = dmadev->ddev.dev;
 	struct stm32_dma_cfg cfg;
 	struct stm32_dma_chan *chan;
 	struct dma_chan *c;
 
-	if (dma_spec->args_count < 4)
+	if (dma_spec->args_count < 4) {
+		dev_err(dev, "Bad number of cells\n");
 		return NULL;
+	}
 
 	cfg.channel_id = dma_spec->args[0];
 	cfg.request_line = dma_spec->args[1];
 	cfg.stream_config = dma_spec->args[2];
 	cfg.threshold = dma_spec->args[3];
 
-	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) || (cfg.request_line >=
-				STM32_DMA_MAX_REQUEST_ID))
+	if ((cfg.channel_id >= STM32_DMA_MAX_CHANNELS) ||
+	    (cfg.request_line >= STM32_DMA_MAX_REQUEST_ID)) {
+		dev_err(dev, "Bad channel and/or request id\n");
 		return NULL;
+	}
 
 	chan = &dmadev->chan[cfg.channel_id];
 
 	c = dma_get_slave_channel(&chan->vchan.chan);
-	if (c)
-		stm32_dma_set_config(chan, &cfg);
+	if (!c) {
+		dev_err(dev, "No more channel avalaible\n");
+		return NULL;
+	}
+
+	stm32_dma_set_config(chan, &cfg);
 
 	return c;
 }
@@ -1048,6 +1076,7 @@ static int stm32_dma_probe(struct platform_device *pdev)
 	dd->device_prep_dma_cyclic = stm32_dma_prep_dma_cyclic;
 	dd->device_config = stm32_dma_slave_config;
 	dd->device_terminate_all = stm32_dma_terminate_all;
+	dd->device_synchronize = stm32_dma_synchronize;
 	dd->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
 		BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
 		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
@@ -1056,6 +1085,7 @@ static int stm32_dma_probe(struct platform_device *pdev)
 		BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
 	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	dd->max_burst = STM32_DMA_MAX_BURST;
 	dd->dev = &pdev->dev;
 	INIT_LIST_HEAD(&dd->channels);
 
diff --git a/drivers/dma/zx296702_dma.c b/drivers/dma/zx_dma.c
similarity index 99%
rename from drivers/dma/zx296702_dma.c
rename to drivers/dma/zx_dma.c
index 380276d078b2c2cb852bfe8115bb00cb98c4c7ea..2bb695315300d9f9fab6c1b32f8ef85b6486ab05 100644
--- a/drivers/dma/zx296702_dma.c
+++ b/drivers/dma/zx_dma.c
@@ -26,7 +26,7 @@
 
 #define DRIVER_NAME		"zx-dma"
 #define DMA_ALIGN		4
-#define DMA_MAX_SIZE		(0x10000 - PAGE_SIZE)
+#define DMA_MAX_SIZE		(0x10000 - 512)
 #define LLI_BLOCK_SIZE		(4 * PAGE_SIZE)
 
 #define REG_ZX_SRC_ADDR			0x00
@@ -365,7 +365,8 @@ static enum dma_status zx_dma_tx_status(struct dma_chan *chan,
 
 		bytes = 0;
 		clli = zx_dma_get_curr_lli(p);
-		index = (clli - ds->desc_hw_lli) / sizeof(struct zx_desc_hw);
+		index = (clli - ds->desc_hw_lli) /
+				sizeof(struct zx_desc_hw) + 1;
 		for (; index < ds->desc_num; index++) {
 			bytes += ds->desc_hw[index].src_x;
 			/* end of lli */
@@ -812,6 +813,7 @@ static int zx_dma_probe(struct platform_device *op)
 	INIT_LIST_HEAD(&d->slave.channels);
 	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
 	dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
 	dma_cap_set(DMA_PRIVATE, d->slave.cap_mask);
 	d->slave.dev = &op->dev;
 	d->slave.device_free_chan_resources = zx_dma_free_chan_resources;
diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h
index 388574ea38ed9d4aa1d644442672ad883feaaef3..28e3cf1465ab6d3034929c0e255be9f3830a49ee 100644
--- a/include/linux/async_tx.h
+++ b/include/linux/async_tx.h
@@ -87,7 +87,7 @@ struct async_submit_ctl {
 	void *scribble;
 };
 
-#ifdef CONFIG_DMA_ENGINE
+#if defined(CONFIG_DMA_ENGINE) && !defined(CONFIG_ASYNC_TX_CHANNEL_SWITCH)
 #define async_tx_issue_pending_all dma_issue_pending_all
 
 /**
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index ccfd0c3777df20d2bd9c6cd1518673643369c1ad..b63b25814d77df827131c608574fa86821040b8a 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -23,6 +23,7 @@ struct dw_dma;
 /**
  * struct dw_dma_chip - representation of DesignWare DMA controller hardware
  * @dev:		struct device of the DMA controller
+ * @id:			instance ID
  * @irq:		irq line
  * @regs:		memory mapped I/O space
  * @clk:		hclk clock
@@ -31,6 +32,7 @@ struct dw_dma;
  */
 struct dw_dma_chip {
 	struct device	*dev;
+	int		id;
 	int		irq;
 	void __iomem	*regs;
 	struct clk	*clk;
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index feee6ec6a13bbba6762618c21812754e3320684d..533680860865a2f6b64d0f73d3fb9040e097657b 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -894,6 +894,17 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memset(
 						    len, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	if (!chan || !chan->device || !chan->device->device_prep_dma_memcpy)
+		return NULL;
+
+	return chan->device->device_prep_dma_memcpy(chan, dest, src,
+						    len, flags);
+}
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg(
 		struct dma_chan *chan,
 		struct scatterlist *dst_sg, unsigned int dst_nents,
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index e69e415d0d988701c73e20d97756561cab797738..896cb71a382cbf4aa8eff220638310895f0ee965 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -41,6 +41,7 @@ struct dw_dma_slave {
  * @is_private: The device channels should be marked as private and not for
  *	by the general purpose DMA channel allocator.
  * @is_memcpy: The device channels do support memory-to-memory transfers.
+ * @is_idma32: The type of the DMA controller is iDMA32
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
@@ -53,6 +54,7 @@ struct dw_dma_platform_data {
 	unsigned int	nr_channels;
 	bool		is_private;
 	bool		is_memcpy;
+	bool		is_idma32;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;