diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt
index aa005c1d10d95756921ebb946d54d682f72942d5..da6614c6379604bb209156cea87a4e3b27a7d1b8 100644
--- a/Documentation/devicetree/bindings/spi/sh-msiof.txt
+++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt
@@ -10,6 +10,7 @@ Required properties:
 			 "renesas,msiof-r8a7792" (R-Car V2H)
 			 "renesas,msiof-r8a7793" (R-Car M2-N)
 			 "renesas,msiof-r8a7794" (R-Car E2)
+			 "renesas,msiof-r8a7796" (R-Car M3-W)
 			 "renesas,msiof-sh73a0" (SH-Mobile AG5)
 - reg                  : A list of offsets and lengths of the register sets for
 			 the device.
diff --git a/Documentation/devicetree/bindings/spi/spi-armada-3700.txt b/Documentation/devicetree/bindings/spi/spi-armada-3700.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1564aa8c02cdb1dbe1265f5c7ff14ad466acc502
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-armada-3700.txt
@@ -0,0 +1,25 @@
+* Marvell Armada 3700 SPI Controller
+
+Required Properties:
+
+- compatible: should be "marvell,armada-3700-spi"
+- reg: physical base address of the controller and length of memory mapped
+       region.
+- interrupts: The interrupt number. The interrupt specifier format depends on
+	      the interrupt controller and of its driver.
+- clocks: Must contain the clock source, usually from the North Bridge clocks.
+- num-cs: The number of chip selects that is supported by this SPI Controller
+- #address-cells: should be 1.
+- #size-cells: should be 0.
+
+Example:
+
+	spi0: spi@10600 {
+		compatible = "marvell,armada-3700-spi";
+		#address-cells = <1>;
+		#size-cells = <0>;
+		reg = <0x10600 0x5d>;
+		clocks = <&nb_perih_clk 7>;
+		interrupts = <GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>;
+		num-cs = <4>;
+	};
diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.txt b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.txt
new file mode 100644
index 0000000000000000000000000000000000000000..225ace1d0c6511250727bb37e2ab862666ae1d79
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-fsl-lpspi.txt
@@ -0,0 +1,19 @@
+* Freescale Low Power SPI (LPSPI) for i.MX
+
+Required properties:
+- compatible :
+  - "fsl,imx7ulp-spi" for LPSPI compatible with the one integrated on i.MX7ULP soc
+- reg : address and length of the lpspi master registers
+- interrupt-parent : core interrupt controller
+- interrupts : lpspi interrupt
+- clocks : lpspi clock specifier
+
+Examples:
+
+lpspi2: lpspi@40290000 {
+	compatible = "fsl,imx7ulp-spi";
+	reg = <0x40290000 0x10000>;
+	interrupt-parent = <&intc>;
+	interrupts = <GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&clks IMX7ULP_CLK_LPSPI2>;
+};
diff --git a/Documentation/devicetree/bindings/spi/spi-sun6i.txt b/Documentation/devicetree/bindings/spi/spi-sun6i.txt
index 21de73db6a05fc5c97df2375c1ce87117fc98c3a..2ec99b86b62297ab9793e143e137982ca6b3c11d 100644
--- a/Documentation/devicetree/bindings/spi/spi-sun6i.txt
+++ b/Documentation/devicetree/bindings/spi/spi-sun6i.txt
@@ -1,7 +1,7 @@
-Allwinner A31 SPI controller
+Allwinner A31/H3 SPI controller
 
 Required properties:
-- compatible: Should be "allwinner,sun6i-a31-spi".
+- compatible: Should be "allwinner,sun6i-a31-spi" or "allwinner,sun8i-h3-spi".
 - reg: Should contain register location and length.
 - interrupts: Should contain interrupt.
 - clocks: phandle to the clocks feeding the SPI controller. Two are
@@ -12,6 +12,11 @@ Required properties:
 - resets: phandle to the reset controller asserting this device in
           reset
 
+Optional properties:
+- dmas: DMA specifiers for rx and tx dma. See the DMA client binding,
+	Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request names should include "rx" and "tx" if present.
+
 Example:
 
 spi1: spi@01c69000 {
@@ -22,3 +27,19 @@ spi1: spi@01c69000 {
 	clock-names = "ahb", "mod";
 	resets = <&ahb1_rst 21>;
 };
+
+spi0: spi@01c68000 {
+	compatible = "allwinner,sun8i-h3-spi";
+	reg = <0x01c68000 0x1000>;
+	interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
+	clocks = <&ccu CLK_BUS_SPI0>, <&ccu CLK_SPI0>;
+	clock-names = "ahb", "mod";
+	dmas = <&dma 23>, <&dma 23>;
+	dma-names = "rx", "tx";
+	pinctrl-names = "default";
+	pinctrl-0 = <&spi0_pins>;
+	resets = <&ccu RST_BUS_SPI0>;
+	status = "disabled";
+	#address-cells = <1>;
+	#size-cells = <0>;
+};
diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig
index b7995474148c7eb91b2c2f87459b0c9bd446d3f2..ec4aa252d6e8c1c761a47246851ad2645382516e 100644
--- a/drivers/spi/Kconfig
+++ b/drivers/spi/Kconfig
@@ -67,6 +67,13 @@ config SPI_ATH79
 	  This enables support for the SPI controller present on the
 	  Atheros AR71XX/AR724X/AR913X SoCs.
 
+config SPI_ARMADA_3700
+	tristate "Marvell Armada 3700 SPI Controller"
+	depends on (ARCH_MVEBU && OF) || COMPILE_TEST
+	help
+	  This enables support for the SPI controller present on the
+	  Marvell Armada 3700 SoCs.
+
 config SPI_ATMEL
 	tristate "Atmel SPI Controller"
 	depends on HAS_DMA
@@ -264,6 +271,12 @@ config SPI_FALCON
 	  has only been tested with m25p80 type chips. The hardware has no
 	  support for other types of SPI peripherals.
 
+config SPI_FSL_LPSPI
+	tristate "Freescale i.MX LPSPI controller"
+	depends on ARCH_MXC || COMPILE_TEST
+	help
+	  This enables Freescale i.MX LPSPI controllers in master mode.
+
 config SPI_GPIO
 	tristate "GPIO-based bitbanging SPI Master"
 	depends on GPIOLIB || COMPILE_TEST
@@ -373,7 +386,6 @@ config SPI_FSL_DSPI
 config SPI_FSL_ESPI
 	tristate "Freescale eSPI controller"
 	depends on FSL_SOC
-	select SPI_FSL_LIB
 	help
 	  This enables using the Freescale eSPI controllers in master mode.
 	  From MPC8536, 85xx platform uses the controller, and all P10xx,
@@ -451,7 +463,8 @@ config SPI_ORION
 	tristate "Orion SPI master"
 	depends on PLAT_ORION || ARCH_MVEBU || COMPILE_TEST
 	help
-	  This enables using the SPI master controller on the Orion chips.
+	  This enables using the SPI master controller on the Orion
+	  and MVEBU chips.
 
 config SPI_PIC32
 	tristate "Microchip PIC32 series SPI"
@@ -553,7 +566,7 @@ config SPI_S3C24XX_FIQ
 
 config SPI_S3C64XX
 	tristate "Samsung S3C64XX series type SPI"
-	depends on (PLAT_SAMSUNG || ARCH_EXYNOS)
+	depends on (PLAT_SAMSUNG || ARCH_EXYNOS || COMPILE_TEST)
 	help
 	  SPI driver for Samsung S3C64XX and newer SoCs.
 
diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile
index aa939d95552134fc05b15f7ee0b142fdb4ba4253..7a6b64662c82c71106f3af94a61190ba1f98a218 100644
--- a/drivers/spi/Makefile
+++ b/drivers/spi/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_SPI_LOOPBACK_TEST)		+= spi-loopback-test.o
 
 # SPI master controller drivers (bus)
 obj-$(CONFIG_SPI_ALTERA)		+= spi-altera.o
+obj-$(CONFIG_SPI_ARMADA_3700)		+= spi-armada-3700.o
 obj-$(CONFIG_SPI_ATMEL)			+= spi-atmel.o
 obj-$(CONFIG_SPI_ATH79)			+= spi-ath79.o
 obj-$(CONFIG_SPI_AU1550)		+= spi-au1550.o
@@ -43,6 +44,7 @@ obj-$(CONFIG_SPI_FSL_CPM)		+= spi-fsl-cpm.o
 obj-$(CONFIG_SPI_FSL_DSPI)		+= spi-fsl-dspi.o
 obj-$(CONFIG_SPI_FSL_LIB)		+= spi-fsl-lib.o
 obj-$(CONFIG_SPI_FSL_ESPI)		+= spi-fsl-espi.o
+obj-$(CONFIG_SPI_FSL_LPSPI)		+= spi-fsl-lpspi.o
 obj-$(CONFIG_SPI_FSL_SPI)		+= spi-fsl-spi.o
 obj-$(CONFIG_SPI_GPIO)			+= spi-gpio.o
 obj-$(CONFIG_SPI_IMG_SPFI)		+= spi-img-spfi.o
diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c
new file mode 100644
index 0000000000000000000000000000000000000000..e89da0af45d2518ef26670f7bbd50b875692872b
--- /dev/null
+++ b/drivers/spi/spi-armada-3700.c
@@ -0,0 +1,923 @@
+/*
+ * Marvell Armada-3700 SPI controller driver
+ *
+ * Copyright (C) 2016 Marvell Ltd.
+ *
+ * Author: Wilson Ding <dingwei@marvell.com>
+ * Author: Romain Perier <romain.perier@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_device.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/spi/spi.h>
+
+#define DRIVER_NAME			"armada_3700_spi"
+
+#define A3700_SPI_TIMEOUT		10
+
+/* SPI Register Offest */
+#define A3700_SPI_IF_CTRL_REG		0x00
+#define A3700_SPI_IF_CFG_REG		0x04
+#define A3700_SPI_DATA_OUT_REG		0x08
+#define A3700_SPI_DATA_IN_REG		0x0C
+#define A3700_SPI_IF_INST_REG		0x10
+#define A3700_SPI_IF_ADDR_REG		0x14
+#define A3700_SPI_IF_RMODE_REG		0x18
+#define A3700_SPI_IF_HDR_CNT_REG	0x1C
+#define A3700_SPI_IF_DIN_CNT_REG	0x20
+#define A3700_SPI_IF_TIME_REG		0x24
+#define A3700_SPI_INT_STAT_REG		0x28
+#define A3700_SPI_INT_MASK_REG		0x2C
+
+/* A3700_SPI_IF_CTRL_REG */
+#define A3700_SPI_EN			BIT(16)
+#define A3700_SPI_ADDR_NOT_CONFIG	BIT(12)
+#define A3700_SPI_WFIFO_OVERFLOW	BIT(11)
+#define A3700_SPI_WFIFO_UNDERFLOW	BIT(10)
+#define A3700_SPI_RFIFO_OVERFLOW	BIT(9)
+#define A3700_SPI_RFIFO_UNDERFLOW	BIT(8)
+#define A3700_SPI_WFIFO_FULL		BIT(7)
+#define A3700_SPI_WFIFO_EMPTY		BIT(6)
+#define A3700_SPI_RFIFO_FULL		BIT(5)
+#define A3700_SPI_RFIFO_EMPTY		BIT(4)
+#define A3700_SPI_WFIFO_RDY		BIT(3)
+#define A3700_SPI_RFIFO_RDY		BIT(2)
+#define A3700_SPI_XFER_RDY		BIT(1)
+#define A3700_SPI_XFER_DONE		BIT(0)
+
+/* A3700_SPI_IF_CFG_REG */
+#define A3700_SPI_WFIFO_THRS		BIT(28)
+#define A3700_SPI_RFIFO_THRS		BIT(24)
+#define A3700_SPI_AUTO_CS		BIT(20)
+#define A3700_SPI_DMA_RD_EN		BIT(18)
+#define A3700_SPI_FIFO_MODE		BIT(17)
+#define A3700_SPI_SRST			BIT(16)
+#define A3700_SPI_XFER_START		BIT(15)
+#define A3700_SPI_XFER_STOP		BIT(14)
+#define A3700_SPI_INST_PIN		BIT(13)
+#define A3700_SPI_ADDR_PIN		BIT(12)
+#define A3700_SPI_DATA_PIN1		BIT(11)
+#define A3700_SPI_DATA_PIN0		BIT(10)
+#define A3700_SPI_FIFO_FLUSH		BIT(9)
+#define A3700_SPI_RW_EN			BIT(8)
+#define A3700_SPI_CLK_POL		BIT(7)
+#define A3700_SPI_CLK_PHA		BIT(6)
+#define A3700_SPI_BYTE_LEN		BIT(5)
+#define A3700_SPI_CLK_PRESCALE		BIT(0)
+#define A3700_SPI_CLK_PRESCALE_MASK	(0x1f)
+
+#define A3700_SPI_WFIFO_THRS_BIT	28
+#define A3700_SPI_RFIFO_THRS_BIT	24
+#define A3700_SPI_FIFO_THRS_MASK	0x7
+
+#define A3700_SPI_DATA_PIN_MASK		0x3
+
+/* A3700_SPI_IF_HDR_CNT_REG */
+#define A3700_SPI_DUMMY_CNT_BIT		12
+#define A3700_SPI_DUMMY_CNT_MASK	0x7
+#define A3700_SPI_RMODE_CNT_BIT		8
+#define A3700_SPI_RMODE_CNT_MASK	0x3
+#define A3700_SPI_ADDR_CNT_BIT		4
+#define A3700_SPI_ADDR_CNT_MASK		0x7
+#define A3700_SPI_INSTR_CNT_BIT		0
+#define A3700_SPI_INSTR_CNT_MASK	0x3
+
+/* A3700_SPI_IF_TIME_REG */
+#define A3700_SPI_CLK_CAPT_EDGE		BIT(7)
+
+/* Flags and macros for struct a3700_spi */
+#define A3700_INSTR_CNT			1
+#define A3700_ADDR_CNT			3
+#define A3700_DUMMY_CNT			1
+
+struct a3700_spi {
+	struct spi_master *master;
+	void __iomem *base;
+	struct clk *clk;
+	unsigned int irq;
+	unsigned int flags;
+	bool xmit_data;
+	const u8 *tx_buf;
+	u8 *rx_buf;
+	size_t buf_len;
+	u8 byte_len;
+	u32 wait_mask;
+	struct completion done;
+	u32 addr_cnt;
+	u32 instr_cnt;
+	size_t hdr_cnt;
+};
+
+static u32 spireg_read(struct a3700_spi *a3700_spi, u32 offset)
+{
+	return readl(a3700_spi->base + offset);
+}
+
+static void spireg_write(struct a3700_spi *a3700_spi, u32 offset, u32 data)
+{
+	writel(data, a3700_spi->base + offset);
+}
+
+static void a3700_spi_auto_cs_unset(struct a3700_spi *a3700_spi)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val &= ~A3700_SPI_AUTO_CS;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+}
+
+static void a3700_spi_activate_cs(struct a3700_spi *a3700_spi, unsigned int cs)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CTRL_REG);
+	val |= (A3700_SPI_EN << cs);
+	spireg_write(a3700_spi, A3700_SPI_IF_CTRL_REG, val);
+}
+
+static void a3700_spi_deactivate_cs(struct a3700_spi *a3700_spi,
+				    unsigned int cs)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CTRL_REG);
+	val &= ~(A3700_SPI_EN << cs);
+	spireg_write(a3700_spi, A3700_SPI_IF_CTRL_REG, val);
+}
+
+static int a3700_spi_pin_mode_set(struct a3700_spi *a3700_spi,
+				  unsigned int pin_mode)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val &= ~(A3700_SPI_INST_PIN | A3700_SPI_ADDR_PIN);
+	val &= ~(A3700_SPI_DATA_PIN0 | A3700_SPI_DATA_PIN1);
+
+	switch (pin_mode) {
+	case 1:
+		break;
+	case 2:
+		val |= A3700_SPI_DATA_PIN0;
+		break;
+	case 4:
+		val |= A3700_SPI_DATA_PIN1;
+		break;
+	default:
+		dev_err(&a3700_spi->master->dev, "wrong pin mode %u", pin_mode);
+		return -EINVAL;
+	}
+
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+
+	return 0;
+}
+
+static void a3700_spi_fifo_mode_set(struct a3700_spi *a3700_spi)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val |= A3700_SPI_FIFO_MODE;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+}
+
+static void a3700_spi_mode_set(struct a3700_spi *a3700_spi,
+			       unsigned int mode_bits)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+
+	if (mode_bits & SPI_CPOL)
+		val |= A3700_SPI_CLK_POL;
+	else
+		val &= ~A3700_SPI_CLK_POL;
+
+	if (mode_bits & SPI_CPHA)
+		val |= A3700_SPI_CLK_PHA;
+	else
+		val &= ~A3700_SPI_CLK_PHA;
+
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+}
+
+static void a3700_spi_clock_set(struct a3700_spi *a3700_spi,
+				unsigned int speed_hz, u16 mode)
+{
+	u32 val;
+	u32 prescale;
+
+	prescale = DIV_ROUND_UP(clk_get_rate(a3700_spi->clk), speed_hz);
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val = val & ~A3700_SPI_CLK_PRESCALE_MASK;
+
+	val = val | (prescale & A3700_SPI_CLK_PRESCALE_MASK);
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+
+	if (prescale <= 2) {
+		val = spireg_read(a3700_spi, A3700_SPI_IF_TIME_REG);
+		val |= A3700_SPI_CLK_CAPT_EDGE;
+		spireg_write(a3700_spi, A3700_SPI_IF_TIME_REG, val);
+	}
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val &= ~(A3700_SPI_CLK_POL | A3700_SPI_CLK_PHA);
+
+	if (mode & SPI_CPOL)
+		val |= A3700_SPI_CLK_POL;
+
+	if (mode & SPI_CPHA)
+		val |= A3700_SPI_CLK_PHA;
+
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+}
+
+static void a3700_spi_bytelen_set(struct a3700_spi *a3700_spi, unsigned int len)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	if (len == 4)
+		val |= A3700_SPI_BYTE_LEN;
+	else
+		val &= ~A3700_SPI_BYTE_LEN;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+
+	a3700_spi->byte_len = len;
+}
+
+static int a3700_spi_fifo_flush(struct a3700_spi *a3700_spi)
+{
+	int timeout = A3700_SPI_TIMEOUT;
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val |= A3700_SPI_FIFO_FLUSH;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+
+	while (--timeout) {
+		val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+		if (!(val & A3700_SPI_FIFO_FLUSH))
+			return 0;
+		udelay(1);
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int a3700_spi_init(struct a3700_spi *a3700_spi)
+{
+	struct spi_master *master = a3700_spi->master;
+	u32 val;
+	int i, ret = 0;
+
+	/* Reset SPI unit */
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val |= A3700_SPI_SRST;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+
+	udelay(A3700_SPI_TIMEOUT);
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val &= ~A3700_SPI_SRST;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+
+	/* Disable AUTO_CS and deactivate all chip-selects */
+	a3700_spi_auto_cs_unset(a3700_spi);
+	for (i = 0; i < master->num_chipselect; i++)
+		a3700_spi_deactivate_cs(a3700_spi, i);
+
+	/* Enable FIFO mode */
+	a3700_spi_fifo_mode_set(a3700_spi);
+
+	/* Set SPI mode */
+	a3700_spi_mode_set(a3700_spi, master->mode_bits);
+
+	/* Reset counters */
+	spireg_write(a3700_spi, A3700_SPI_IF_HDR_CNT_REG, 0);
+	spireg_write(a3700_spi, A3700_SPI_IF_DIN_CNT_REG, 0);
+
+	/* Mask the interrupts and clear cause bits */
+	spireg_write(a3700_spi, A3700_SPI_INT_MASK_REG, 0);
+	spireg_write(a3700_spi, A3700_SPI_INT_STAT_REG, ~0U);
+
+	return ret;
+}
+
+static irqreturn_t a3700_spi_interrupt(int irq, void *dev_id)
+{
+	struct spi_master *master = dev_id;
+	struct a3700_spi *a3700_spi;
+	u32 cause;
+
+	a3700_spi = spi_master_get_devdata(master);
+
+	/* Get interrupt causes */
+	cause = spireg_read(a3700_spi, A3700_SPI_INT_STAT_REG);
+
+	if (!cause || !(a3700_spi->wait_mask & cause))
+		return IRQ_NONE;
+
+	/* mask and acknowledge the SPI interrupts */
+	spireg_write(a3700_spi, A3700_SPI_INT_MASK_REG, 0);
+	spireg_write(a3700_spi, A3700_SPI_INT_STAT_REG, cause);
+
+	/* Wake up the transfer */
+	if (a3700_spi->wait_mask & cause)
+		complete(&a3700_spi->done);
+
+	return IRQ_HANDLED;
+}
+
+static bool a3700_spi_wait_completion(struct spi_device *spi)
+{
+	struct a3700_spi *a3700_spi;
+	unsigned int timeout;
+	unsigned int ctrl_reg;
+	unsigned long timeout_jiffies;
+
+	a3700_spi = spi_master_get_devdata(spi->master);
+
+	/* SPI interrupt is edge-triggered, which means an interrupt will
+	 * be generated only when detecting a specific status bit changed
+	 * from '0' to '1'. So when we start waiting for a interrupt, we
+	 * need to check status bit in control reg first, if it is already 1,
+	 * then we do not need to wait for interrupt
+	 */
+	ctrl_reg = spireg_read(a3700_spi, A3700_SPI_IF_CTRL_REG);
+	if (a3700_spi->wait_mask & ctrl_reg)
+		return true;
+
+	reinit_completion(&a3700_spi->done);
+
+	spireg_write(a3700_spi, A3700_SPI_INT_MASK_REG,
+		     a3700_spi->wait_mask);
+
+	timeout_jiffies = msecs_to_jiffies(A3700_SPI_TIMEOUT);
+	timeout = wait_for_completion_timeout(&a3700_spi->done,
+					      timeout_jiffies);
+
+	a3700_spi->wait_mask = 0;
+
+	if (timeout)
+		return true;
+
+	/* there might be the case that right after we checked the
+	 * status bits in this routine and before start to wait for
+	 * interrupt by wait_for_completion_timeout, the interrupt
+	 * happens, to avoid missing it we need to double check
+	 * status bits in control reg, if it is already 1, then
+	 * consider that we have the interrupt successfully and
+	 * return true.
+	 */
+	ctrl_reg = spireg_read(a3700_spi, A3700_SPI_IF_CTRL_REG);
+	if (a3700_spi->wait_mask & ctrl_reg)
+		return true;
+
+	spireg_write(a3700_spi, A3700_SPI_INT_MASK_REG, 0);
+
+	return true;
+}
+
+static bool a3700_spi_transfer_wait(struct spi_device *spi,
+				    unsigned int bit_mask)
+{
+	struct a3700_spi *a3700_spi;
+
+	a3700_spi = spi_master_get_devdata(spi->master);
+	a3700_spi->wait_mask = bit_mask;
+
+	return a3700_spi_wait_completion(spi);
+}
+
+static void a3700_spi_fifo_thres_set(struct a3700_spi *a3700_spi,
+				     unsigned int bytes)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val &= ~(A3700_SPI_FIFO_THRS_MASK << A3700_SPI_RFIFO_THRS_BIT);
+	val |= (bytes - 1) << A3700_SPI_RFIFO_THRS_BIT;
+	val &= ~(A3700_SPI_FIFO_THRS_MASK << A3700_SPI_WFIFO_THRS_BIT);
+	val |= (7 - bytes) << A3700_SPI_WFIFO_THRS_BIT;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+}
+
+static void a3700_spi_transfer_setup(struct spi_device *spi,
+				    struct spi_transfer *xfer)
+{
+	struct a3700_spi *a3700_spi;
+	unsigned int byte_len;
+
+	a3700_spi = spi_master_get_devdata(spi->master);
+
+	a3700_spi_clock_set(a3700_spi, xfer->speed_hz, spi->mode);
+
+	byte_len = xfer->bits_per_word >> 3;
+
+	a3700_spi_fifo_thres_set(a3700_spi, byte_len);
+}
+
+static void a3700_spi_set_cs(struct spi_device *spi, bool enable)
+{
+	struct a3700_spi *a3700_spi = spi_master_get_devdata(spi->master);
+
+	if (!enable)
+		a3700_spi_activate_cs(a3700_spi, spi->chip_select);
+	else
+		a3700_spi_deactivate_cs(a3700_spi, spi->chip_select);
+}
+
+static void a3700_spi_header_set(struct a3700_spi *a3700_spi)
+{
+	u32 instr_cnt = 0, addr_cnt = 0, dummy_cnt = 0;
+	u32 val = 0;
+
+	/* Clear the header registers */
+	spireg_write(a3700_spi, A3700_SPI_IF_INST_REG, 0);
+	spireg_write(a3700_spi, A3700_SPI_IF_ADDR_REG, 0);
+	spireg_write(a3700_spi, A3700_SPI_IF_RMODE_REG, 0);
+
+	/* Set header counters */
+	if (a3700_spi->tx_buf) {
+		if (a3700_spi->buf_len <= a3700_spi->instr_cnt) {
+			instr_cnt = a3700_spi->buf_len;
+		} else if (a3700_spi->buf_len <= (a3700_spi->instr_cnt +
+						  a3700_spi->addr_cnt)) {
+			instr_cnt = a3700_spi->instr_cnt;
+			addr_cnt = a3700_spi->buf_len - instr_cnt;
+		} else if (a3700_spi->buf_len <= a3700_spi->hdr_cnt) {
+			instr_cnt = a3700_spi->instr_cnt;
+			addr_cnt = a3700_spi->addr_cnt;
+			/* Need to handle the normal write case with 1 byte
+			 * data
+			 */
+			if (!a3700_spi->tx_buf[instr_cnt + addr_cnt])
+				dummy_cnt = a3700_spi->buf_len - instr_cnt -
+					    addr_cnt;
+		}
+		val |= ((instr_cnt & A3700_SPI_INSTR_CNT_MASK)
+			<< A3700_SPI_INSTR_CNT_BIT);
+		val |= ((addr_cnt & A3700_SPI_ADDR_CNT_MASK)
+			<< A3700_SPI_ADDR_CNT_BIT);
+		val |= ((dummy_cnt & A3700_SPI_DUMMY_CNT_MASK)
+			<< A3700_SPI_DUMMY_CNT_BIT);
+	}
+	spireg_write(a3700_spi, A3700_SPI_IF_HDR_CNT_REG, val);
+
+	/* Update the buffer length to be transferred */
+	a3700_spi->buf_len -= (instr_cnt + addr_cnt + dummy_cnt);
+
+	/* Set Instruction */
+	val = 0;
+	while (instr_cnt--) {
+		val = (val << 8) | a3700_spi->tx_buf[0];
+		a3700_spi->tx_buf++;
+	}
+	spireg_write(a3700_spi, A3700_SPI_IF_INST_REG, val);
+
+	/* Set Address */
+	val = 0;
+	while (addr_cnt--) {
+		val = (val << 8) | a3700_spi->tx_buf[0];
+		a3700_spi->tx_buf++;
+	}
+	spireg_write(a3700_spi, A3700_SPI_IF_ADDR_REG, val);
+}
+
+static int a3700_is_wfifo_full(struct a3700_spi *a3700_spi)
+{
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CTRL_REG);
+	return (val & A3700_SPI_WFIFO_FULL);
+}
+
+static int a3700_spi_fifo_write(struct a3700_spi *a3700_spi)
+{
+	u32 val;
+	int i = 0;
+
+	while (!a3700_is_wfifo_full(a3700_spi) && a3700_spi->buf_len) {
+		val = 0;
+		if (a3700_spi->buf_len >= 4) {
+			val = cpu_to_le32(*(u32 *)a3700_spi->tx_buf);
+			spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG, val);
+
+			a3700_spi->buf_len -= 4;
+			a3700_spi->tx_buf += 4;
+		} else {
+			/*
+			 * If the remained buffer length is less than 4-bytes,
+			 * we should pad the write buffer with all ones. So that
+			 * it avoids overwrite the unexpected bytes following
+			 * the last one.
+			 */
+			val = GENMASK(31, 0);
+			while (a3700_spi->buf_len) {
+				val &= ~(0xff << (8 * i));
+				val |= *a3700_spi->tx_buf++ << (8 * i);
+				i++;
+				a3700_spi->buf_len--;
+
+				spireg_write(a3700_spi, A3700_SPI_DATA_OUT_REG,
+					     val);
+			}
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int a3700_is_rfifo_empty(struct a3700_spi *a3700_spi)
+{
+	u32 val = spireg_read(a3700_spi, A3700_SPI_IF_CTRL_REG);
+
+	return (val & A3700_SPI_RFIFO_EMPTY);
+}
+
+static int a3700_spi_fifo_read(struct a3700_spi *a3700_spi)
+{
+	u32 val;
+
+	while (!a3700_is_rfifo_empty(a3700_spi) && a3700_spi->buf_len) {
+		val = spireg_read(a3700_spi, A3700_SPI_DATA_IN_REG);
+		if (a3700_spi->buf_len >= 4) {
+			u32 data = le32_to_cpu(val);
+			memcpy(a3700_spi->rx_buf, &data, 4);
+
+			a3700_spi->buf_len -= 4;
+			a3700_spi->rx_buf += 4;
+		} else {
+			/*
+			 * When remain bytes is not larger than 4, we should
+			 * avoid memory overwriting and just write the left rx
+			 * buffer bytes.
+			 */
+			while (a3700_spi->buf_len) {
+				*a3700_spi->rx_buf = val & 0xff;
+				val >>= 8;
+
+				a3700_spi->buf_len--;
+				a3700_spi->rx_buf++;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void a3700_spi_transfer_abort_fifo(struct a3700_spi *a3700_spi)
+{
+	int timeout = A3700_SPI_TIMEOUT;
+	u32 val;
+
+	val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+	val |= A3700_SPI_XFER_STOP;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+
+	while (--timeout) {
+		val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+		if (!(val & A3700_SPI_XFER_START))
+			break;
+		udelay(1);
+	}
+
+	a3700_spi_fifo_flush(a3700_spi);
+
+	val &= ~A3700_SPI_XFER_STOP;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+}
+
+static int a3700_spi_prepare_message(struct spi_master *master,
+				     struct spi_message *message)
+{
+	struct a3700_spi *a3700_spi = spi_master_get_devdata(master);
+	struct spi_device *spi = message->spi;
+	int ret;
+
+	ret = clk_enable(a3700_spi->clk);
+	if (ret) {
+		dev_err(&spi->dev, "failed to enable clk with error %d\n", ret);
+		return ret;
+	}
+
+	/* Flush the FIFOs */
+	ret = a3700_spi_fifo_flush(a3700_spi);
+	if (ret)
+		return ret;
+
+	a3700_spi_bytelen_set(a3700_spi, 4);
+
+	return 0;
+}
+
+static int a3700_spi_transfer_one(struct spi_master *master,
+				  struct spi_device *spi,
+				  struct spi_transfer *xfer)
+{
+	struct a3700_spi *a3700_spi = spi_master_get_devdata(master);
+	int ret = 0, timeout = A3700_SPI_TIMEOUT;
+	unsigned int nbits = 0;
+	u32 val;
+
+	a3700_spi_transfer_setup(spi, xfer);
+
+	a3700_spi->tx_buf  = xfer->tx_buf;
+	a3700_spi->rx_buf  = xfer->rx_buf;
+	a3700_spi->buf_len = xfer->len;
+
+	/* SPI transfer headers */
+	a3700_spi_header_set(a3700_spi);
+
+	if (xfer->tx_buf)
+		nbits = xfer->tx_nbits;
+	else if (xfer->rx_buf)
+		nbits = xfer->rx_nbits;
+
+	a3700_spi_pin_mode_set(a3700_spi, nbits);
+
+	if (xfer->rx_buf) {
+		/* Set read data length */
+		spireg_write(a3700_spi, A3700_SPI_IF_DIN_CNT_REG,
+			     a3700_spi->buf_len);
+		/* Start READ transfer */
+		val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+		val &= ~A3700_SPI_RW_EN;
+		val |= A3700_SPI_XFER_START;
+		spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+	} else if (xfer->tx_buf) {
+		/* Start Write transfer */
+		val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+		val |= (A3700_SPI_XFER_START | A3700_SPI_RW_EN);
+		spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+
+		/*
+		 * If there are data to be written to the SPI device, xmit_data
+		 * flag is set true; otherwise the instruction in SPI_INSTR does
+		 * not require data to be written to the SPI device, then
+		 * xmit_data flag is set false.
+		 */
+		a3700_spi->xmit_data = (a3700_spi->buf_len != 0);
+	}
+
+	while (a3700_spi->buf_len) {
+		if (a3700_spi->tx_buf) {
+			/* Wait wfifo ready */
+			if (!a3700_spi_transfer_wait(spi,
+						     A3700_SPI_WFIFO_RDY)) {
+				dev_err(&spi->dev,
+					"wait wfifo ready timed out\n");
+				ret = -ETIMEDOUT;
+				goto error;
+			}
+			/* Fill up the wfifo */
+			ret = a3700_spi_fifo_write(a3700_spi);
+			if (ret)
+				goto error;
+		} else if (a3700_spi->rx_buf) {
+			/* Wait rfifo ready */
+			if (!a3700_spi_transfer_wait(spi,
+						     A3700_SPI_RFIFO_RDY)) {
+				dev_err(&spi->dev,
+					"wait rfifo ready timed out\n");
+				ret = -ETIMEDOUT;
+				goto error;
+			}
+			/* Drain out the rfifo */
+			ret = a3700_spi_fifo_read(a3700_spi);
+			if (ret)
+				goto error;
+		}
+	}
+
+	/*
+	 * Stop a write transfer in fifo mode:
+	 *	- wait all the bytes in wfifo to be shifted out
+	 *	 - set XFER_STOP bit
+	 *	- wait XFER_START bit clear
+	 *	- clear XFER_STOP bit
+	 * Stop a read transfer in fifo mode:
+	 *	- the hardware is to reset the XFER_START bit
+	 *	   after the number of bytes indicated in DIN_CNT
+	 *	   register
+	 *	- just wait XFER_START bit clear
+	 */
+	if (a3700_spi->tx_buf) {
+		if (a3700_spi->xmit_data) {
+			/*
+			 * If there are data written to the SPI device, wait
+			 * until SPI_WFIFO_EMPTY is 1 to wait for all data to
+			 * transfer out of write FIFO.
+			 */
+			if (!a3700_spi_transfer_wait(spi,
+						     A3700_SPI_WFIFO_EMPTY)) {
+				dev_err(&spi->dev, "wait wfifo empty timed out\n");
+				return -ETIMEDOUT;
+			}
+		} else {
+			/*
+			 * If the instruction in SPI_INSTR does not require data
+			 * to be written to the SPI device, wait until SPI_RDY
+			 * is 1 for the SPI interface to be in idle.
+			 */
+			if (!a3700_spi_transfer_wait(spi, A3700_SPI_XFER_RDY)) {
+				dev_err(&spi->dev, "wait xfer ready timed out\n");
+				return -ETIMEDOUT;
+			}
+		}
+
+		val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+		val |= A3700_SPI_XFER_STOP;
+		spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+	}
+
+	while (--timeout) {
+		val = spireg_read(a3700_spi, A3700_SPI_IF_CFG_REG);
+		if (!(val & A3700_SPI_XFER_START))
+			break;
+		udelay(1);
+	}
+
+	if (timeout == 0) {
+		dev_err(&spi->dev, "wait transfer start clear timed out\n");
+		ret = -ETIMEDOUT;
+		goto error;
+	}
+
+	val &= ~A3700_SPI_XFER_STOP;
+	spireg_write(a3700_spi, A3700_SPI_IF_CFG_REG, val);
+	goto out;
+
+error:
+	a3700_spi_transfer_abort_fifo(a3700_spi);
+out:
+	spi_finalize_current_transfer(master);
+
+	return ret;
+}
+
+static int a3700_spi_unprepare_message(struct spi_master *master,
+				       struct spi_message *message)
+{
+	struct a3700_spi *a3700_spi = spi_master_get_devdata(master);
+
+	clk_disable(a3700_spi->clk);
+
+	return 0;
+}
+
+static const struct of_device_id a3700_spi_dt_ids[] = {
+	{ .compatible = "marvell,armada-3700-spi", .data = NULL },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, a3700_spi_dt_ids);
+
+static int a3700_spi_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *of_node = dev->of_node;
+	struct resource *res;
+	struct spi_master *master;
+	struct a3700_spi *spi;
+	u32 num_cs = 0;
+	int ret = 0;
+
+	master = spi_alloc_master(dev, sizeof(*spi));
+	if (!master) {
+		dev_err(dev, "master allocation failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (of_property_read_u32(of_node, "num-cs", &num_cs)) {
+		dev_err(dev, "could not find num-cs\n");
+		ret = -ENXIO;
+		goto error;
+	}
+
+	master->bus_num = pdev->id;
+	master->dev.of_node = of_node;
+	master->mode_bits = SPI_MODE_3;
+	master->num_chipselect = num_cs;
+	master->bits_per_word_mask = SPI_BPW_MASK(8) | SPI_BPW_MASK(32);
+	master->prepare_message =  a3700_spi_prepare_message;
+	master->transfer_one = a3700_spi_transfer_one;
+	master->unprepare_message = a3700_spi_unprepare_message;
+	master->set_cs = a3700_spi_set_cs;
+	master->flags = SPI_MASTER_HALF_DUPLEX;
+	master->mode_bits |= (SPI_RX_DUAL | SPI_RX_DUAL |
+			      SPI_RX_QUAD | SPI_TX_QUAD);
+
+	platform_set_drvdata(pdev, master);
+
+	spi = spi_master_get_devdata(master);
+	memset(spi, 0, sizeof(struct a3700_spi));
+
+	spi->master = master;
+	spi->instr_cnt = A3700_INSTR_CNT;
+	spi->addr_cnt = A3700_ADDR_CNT;
+	spi->hdr_cnt = A3700_INSTR_CNT + A3700_ADDR_CNT +
+		       A3700_DUMMY_CNT;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	spi->base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(spi->base)) {
+		ret = PTR_ERR(spi->base);
+		goto error;
+	}
+
+	spi->irq = platform_get_irq(pdev, 0);
+	if (spi->irq < 0) {
+		dev_err(dev, "could not get irq: %d\n", spi->irq);
+		ret = -ENXIO;
+		goto error;
+	}
+
+	init_completion(&spi->done);
+
+	spi->clk = devm_clk_get(dev, NULL);
+	if (IS_ERR(spi->clk)) {
+		dev_err(dev, "could not find clk: %ld\n", PTR_ERR(spi->clk));
+		goto error;
+	}
+
+	ret = clk_prepare(spi->clk);
+	if (ret) {
+		dev_err(dev, "could not prepare clk: %d\n", ret);
+		goto error;
+	}
+
+	ret = a3700_spi_init(spi);
+	if (ret)
+		goto error_clk;
+
+	ret = devm_request_irq(dev, spi->irq, a3700_spi_interrupt, 0,
+			       dev_name(dev), master);
+	if (ret) {
+		dev_err(dev, "could not request IRQ: %d\n", ret);
+		goto error_clk;
+	}
+
+	ret = devm_spi_register_master(dev, master);
+	if (ret) {
+		dev_err(dev, "Failed to register master\n");
+		goto error_clk;
+	}
+
+	return 0;
+
+error_clk:
+	clk_disable_unprepare(spi->clk);
+error:
+	spi_master_put(master);
+out:
+	return ret;
+}
+
+static int a3700_spi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct a3700_spi *spi = spi_master_get_devdata(master);
+
+	clk_unprepare(spi->clk);
+	spi_master_put(master);
+
+	return 0;
+}
+
+static struct platform_driver a3700_spi_driver = {
+	.driver = {
+		.name	= DRIVER_NAME,
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(a3700_spi_dt_ids),
+	},
+	.probe		= a3700_spi_probe,
+	.remove		= a3700_spi_remove,
+};
+
+module_platform_driver(a3700_spi_driver);
+
+MODULE_DESCRIPTION("Armada-3700 SPI driver");
+MODULE_AUTHOR("Wilson Ding <dingwei@marvell.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/spi/spi-ath79.c b/drivers/spi/spi-ath79.c
index 6165bf21d42750af1f765f2e79bc9a9a51ea734c..f369174fbd886fbbc09a13af5f9dbce66d498635 100644
--- a/drivers/spi/spi-ath79.c
+++ b/drivers/spi/spi-ath79.c
@@ -304,6 +304,7 @@ static const struct of_device_id ath79_spi_of_match[] = {
 	{ .compatible = "qca,ar7100-spi", },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, ath79_spi_of_match);
 
 static struct platform_driver ath79_spi_driver = {
 	.probe		= ath79_spi_probe,
diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c
index 8feac599e9ab4bfe886b8da89960f04ed32c1aba..0e7712bac3b6e1411e49890a5668c068371631f0 100644
--- a/drivers/spi/spi-atmel.c
+++ b/drivers/spi/spi-atmel.c
@@ -24,6 +24,7 @@
 
 #include <linux/io.h>
 #include <linux/gpio.h>
+#include <linux/of_gpio.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
 
@@ -264,17 +265,6 @@
 
 #define AUTOSUSPEND_TIMEOUT	2000
 
-struct atmel_spi_dma {
-	struct dma_chan			*chan_rx;
-	struct dma_chan			*chan_tx;
-	struct scatterlist		sgrx;
-	struct scatterlist		sgtx;
-	struct dma_async_tx_descriptor	*data_desc_rx;
-	struct dma_async_tx_descriptor	*data_desc_tx;
-
-	struct at_dma_slave	dma_slave;
-};
-
 struct atmel_spi_caps {
 	bool	is_spi2;
 	bool	has_wdrbt;
@@ -295,6 +285,7 @@ struct atmel_spi {
 	int			irq;
 	struct clk		*clk;
 	struct platform_device	*pdev;
+	unsigned long		spi_clk;
 
 	struct spi_transfer	*current_transfer;
 	int			current_remaining_bytes;
@@ -302,17 +293,11 @@ struct atmel_spi {
 
 	struct completion	xfer_completion;
 
-	/* scratch buffer */
-	void			*buffer;
-	dma_addr_t		buffer_dma;
-
 	struct atmel_spi_caps	caps;
 
 	bool			use_dma;
 	bool			use_pdc;
 	bool			use_cs_gpios;
-	/* dmaengine data */
-	struct atmel_spi_dma	dma;
 
 	bool			keep_cs;
 	bool			cs_active;
@@ -326,7 +311,7 @@ struct atmel_spi_device {
 	u32			csr;
 };
 
-#define BUFFER_SIZE		PAGE_SIZE
+#define SPI_MAX_DMA_XFER	65535 /* true for both PDC and DMA */
 #define INVALID_DMA_ADDRESS	0xffffffff
 
 /*
@@ -456,10 +441,20 @@ static inline bool atmel_spi_use_dma(struct atmel_spi *as,
 	return as->use_dma && xfer->len >= DMA_MIN_BYTES;
 }
 
+static bool atmel_spi_can_dma(struct spi_master *master,
+			      struct spi_device *spi,
+			      struct spi_transfer *xfer)
+{
+	struct atmel_spi *as = spi_master_get_devdata(master);
+
+	return atmel_spi_use_dma(as, xfer);
+}
+
 static int atmel_spi_dma_slave_config(struct atmel_spi *as,
 				struct dma_slave_config *slave_config,
 				u8 bits_per_word)
 {
+	struct spi_master *master = platform_get_drvdata(as->pdev);
 	int err = 0;
 
 	if (bits_per_word > 8) {
@@ -491,7 +486,7 @@ static int atmel_spi_dma_slave_config(struct atmel_spi *as,
 	 * path works the same whether FIFOs are available (and enabled) or not.
 	 */
 	slave_config->direction = DMA_MEM_TO_DEV;
-	if (dmaengine_slave_config(as->dma.chan_tx, slave_config)) {
+	if (dmaengine_slave_config(master->dma_tx, slave_config)) {
 		dev_err(&as->pdev->dev,
 			"failed to configure tx dma channel\n");
 		err = -EINVAL;
@@ -506,7 +501,7 @@ static int atmel_spi_dma_slave_config(struct atmel_spi *as,
 	 * enabled) or not.
 	 */
 	slave_config->direction = DMA_DEV_TO_MEM;
-	if (dmaengine_slave_config(as->dma.chan_rx, slave_config)) {
+	if (dmaengine_slave_config(master->dma_rx, slave_config)) {
 		dev_err(&as->pdev->dev,
 			"failed to configure rx dma channel\n");
 		err = -EINVAL;
@@ -515,7 +510,8 @@ static int atmel_spi_dma_slave_config(struct atmel_spi *as,
 	return err;
 }
 
-static int atmel_spi_configure_dma(struct atmel_spi *as)
+static int atmel_spi_configure_dma(struct spi_master *master,
+				   struct atmel_spi *as)
 {
 	struct dma_slave_config	slave_config;
 	struct device *dev = &as->pdev->dev;
@@ -525,26 +521,26 @@ static int atmel_spi_configure_dma(struct atmel_spi *as)
 	dma_cap_zero(mask);
 	dma_cap_set(DMA_SLAVE, mask);
 
-	as->dma.chan_tx = dma_request_slave_channel_reason(dev, "tx");
-	if (IS_ERR(as->dma.chan_tx)) {
-		err = PTR_ERR(as->dma.chan_tx);
+	master->dma_tx = dma_request_slave_channel_reason(dev, "tx");
+	if (IS_ERR(master->dma_tx)) {
+		err = PTR_ERR(master->dma_tx);
 		if (err == -EPROBE_DEFER) {
 			dev_warn(dev, "no DMA channel available at the moment\n");
-			return err;
+			goto error_clear;
 		}
 		dev_err(dev,
 			"DMA TX channel not available, SPI unable to use DMA\n");
 		err = -EBUSY;
-		goto error;
+		goto error_clear;
 	}
 
 	/*
 	 * No reason to check EPROBE_DEFER here since we have already requested
 	 * tx channel. If it fails here, it's for another reason.
 	 */
-	as->dma.chan_rx = dma_request_slave_channel(dev, "rx");
+	master->dma_rx = dma_request_slave_channel(dev, "rx");
 
-	if (!as->dma.chan_rx) {
+	if (!master->dma_rx) {
 		dev_err(dev,
 			"DMA RX channel not available, SPI unable to use DMA\n");
 		err = -EBUSY;
@@ -557,31 +553,38 @@ static int atmel_spi_configure_dma(struct atmel_spi *as)
 
 	dev_info(&as->pdev->dev,
 			"Using %s (tx) and %s (rx) for DMA transfers\n",
-			dma_chan_name(as->dma.chan_tx),
-			dma_chan_name(as->dma.chan_rx));
+			dma_chan_name(master->dma_tx),
+			dma_chan_name(master->dma_rx));
+
 	return 0;
 error:
-	if (as->dma.chan_rx)
-		dma_release_channel(as->dma.chan_rx);
-	if (!IS_ERR(as->dma.chan_tx))
-		dma_release_channel(as->dma.chan_tx);
+	if (master->dma_rx)
+		dma_release_channel(master->dma_rx);
+	if (!IS_ERR(master->dma_tx))
+		dma_release_channel(master->dma_tx);
+error_clear:
+	master->dma_tx = master->dma_rx = NULL;
 	return err;
 }
 
-static void atmel_spi_stop_dma(struct atmel_spi *as)
+static void atmel_spi_stop_dma(struct spi_master *master)
 {
-	if (as->dma.chan_rx)
-		dmaengine_terminate_all(as->dma.chan_rx);
-	if (as->dma.chan_tx)
-		dmaengine_terminate_all(as->dma.chan_tx);
+	if (master->dma_rx)
+		dmaengine_terminate_all(master->dma_rx);
+	if (master->dma_tx)
+		dmaengine_terminate_all(master->dma_tx);
 }
 
-static void atmel_spi_release_dma(struct atmel_spi *as)
+static void atmel_spi_release_dma(struct spi_master *master)
 {
-	if (as->dma.chan_rx)
-		dma_release_channel(as->dma.chan_rx);
-	if (as->dma.chan_tx)
-		dma_release_channel(as->dma.chan_tx);
+	if (master->dma_rx) {
+		dma_release_channel(master->dma_rx);
+		master->dma_rx = NULL;
+	}
+	if (master->dma_tx) {
+		dma_release_channel(master->dma_tx);
+		master->dma_tx = NULL;
+	}
 }
 
 /* This function is called by the DMA driver from tasklet context */
@@ -611,14 +614,10 @@ static void atmel_spi_next_xfer_single(struct spi_master *master,
 		cpu_relax();
 	}
 
-	if (xfer->tx_buf) {
-		if (xfer->bits_per_word > 8)
-			spi_writel(as, TDR, *(u16 *)(xfer->tx_buf + xfer_pos));
-		else
-			spi_writel(as, TDR, *(u8 *)(xfer->tx_buf + xfer_pos));
-	} else {
-		spi_writel(as, TDR, 0);
-	}
+	if (xfer->bits_per_word > 8)
+		spi_writel(as, TDR, *(u16 *)(xfer->tx_buf + xfer_pos));
+	else
+		spi_writel(as, TDR, *(u8 *)(xfer->tx_buf + xfer_pos));
 
 	dev_dbg(master->dev.parent,
 		"  start pio xfer %p: len %u tx %p rx %p bitpw %d\n",
@@ -665,17 +664,12 @@ static void atmel_spi_next_xfer_fifo(struct spi_master *master,
 
 	/* Fill TX FIFO */
 	while (num_data >= 2) {
-		if (xfer->tx_buf) {
-			if (xfer->bits_per_word > 8) {
-				td0 = *words++;
-				td1 = *words++;
-			} else {
-				td0 = *bytes++;
-				td1 = *bytes++;
-			}
+		if (xfer->bits_per_word > 8) {
+			td0 = *words++;
+			td1 = *words++;
 		} else {
-			td0 = 0;
-			td1 = 0;
+			td0 = *bytes++;
+			td1 = *bytes++;
 		}
 
 		spi_writel(as, TDR, (td1 << 16) | td0);
@@ -683,14 +677,10 @@ static void atmel_spi_next_xfer_fifo(struct spi_master *master,
 	}
 
 	if (num_data) {
-		if (xfer->tx_buf) {
-			if (xfer->bits_per_word > 8)
-				td0 = *words++;
-			else
-				td0 = *bytes++;
-		} else {
-			td0 = 0;
-		}
+		if (xfer->bits_per_word > 8)
+			td0 = *words++;
+		else
+			td0 = *bytes++;
 
 		spi_writew(as, TDR, td0);
 		num_data--;
@@ -730,13 +720,12 @@ static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 				u32 *plen)
 {
 	struct atmel_spi	*as = spi_master_get_devdata(master);
-	struct dma_chan		*rxchan = as->dma.chan_rx;
-	struct dma_chan		*txchan = as->dma.chan_tx;
+	struct dma_chan		*rxchan = master->dma_rx;
+	struct dma_chan		*txchan = master->dma_tx;
 	struct dma_async_tx_descriptor *rxdesc;
 	struct dma_async_tx_descriptor *txdesc;
 	struct dma_slave_config	slave_config;
 	dma_cookie_t		cookie;
-	u32	len = *plen;
 
 	dev_vdbg(master->dev.parent, "atmel_spi_next_xfer_dma_submit\n");
 
@@ -747,44 +736,22 @@ static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 	/* release lock for DMA operations */
 	atmel_spi_unlock(as);
 
-	/* prepare the RX dma transfer */
-	sg_init_table(&as->dma.sgrx, 1);
-	if (xfer->rx_buf) {
-		as->dma.sgrx.dma_address = xfer->rx_dma + xfer->len - *plen;
-	} else {
-		as->dma.sgrx.dma_address = as->buffer_dma;
-		if (len > BUFFER_SIZE)
-			len = BUFFER_SIZE;
-	}
-
-	/* prepare the TX dma transfer */
-	sg_init_table(&as->dma.sgtx, 1);
-	if (xfer->tx_buf) {
-		as->dma.sgtx.dma_address = xfer->tx_dma + xfer->len - *plen;
-	} else {
-		as->dma.sgtx.dma_address = as->buffer_dma;
-		if (len > BUFFER_SIZE)
-			len = BUFFER_SIZE;
-		memset(as->buffer, 0, len);
-	}
-
-	sg_dma_len(&as->dma.sgtx) = len;
-	sg_dma_len(&as->dma.sgrx) = len;
-
-	*plen = len;
+	*plen = xfer->len;
 
 	if (atmel_spi_dma_slave_config(as, &slave_config,
 				       xfer->bits_per_word))
 		goto err_exit;
 
 	/* Send both scatterlists */
-	rxdesc = dmaengine_prep_slave_sg(rxchan, &as->dma.sgrx, 1,
+	rxdesc = dmaengine_prep_slave_sg(rxchan,
+					 xfer->rx_sg.sgl, xfer->rx_sg.nents,
 					 DMA_FROM_DEVICE,
 					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!rxdesc)
 		goto err_dma;
 
-	txdesc = dmaengine_prep_slave_sg(txchan, &as->dma.sgtx, 1,
+	txdesc = dmaengine_prep_slave_sg(txchan,
+					 xfer->tx_sg.sgl, xfer->tx_sg.nents,
 					 DMA_TO_DEVICE,
 					 DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 	if (!txdesc)
@@ -818,7 +785,7 @@ static int atmel_spi_next_xfer_dma_submit(struct spi_master *master,
 
 err_dma:
 	spi_writel(as, IDR, SPI_BIT(OVRES));
-	atmel_spi_stop_dma(as);
+	atmel_spi_stop_dma(master);
 err_exit:
 	atmel_spi_lock(as);
 	return -ENOMEM;
@@ -830,30 +797,10 @@ static void atmel_spi_next_xfer_data(struct spi_master *master,
 				dma_addr_t *rx_dma,
 				u32 *plen)
 {
-	struct atmel_spi	*as = spi_master_get_devdata(master);
-	u32			len = *plen;
-
-	/* use scratch buffer only when rx or tx data is unspecified */
-	if (xfer->rx_buf)
-		*rx_dma = xfer->rx_dma + xfer->len - *plen;
-	else {
-		*rx_dma = as->buffer_dma;
-		if (len > BUFFER_SIZE)
-			len = BUFFER_SIZE;
-	}
-
-	if (xfer->tx_buf)
-		*tx_dma = xfer->tx_dma + xfer->len - *plen;
-	else {
-		*tx_dma = as->buffer_dma;
-		if (len > BUFFER_SIZE)
-			len = BUFFER_SIZE;
-		memset(as->buffer, 0, len);
-		dma_sync_single_for_device(&as->pdev->dev,
-				as->buffer_dma, len, DMA_TO_DEVICE);
-	}
-
-	*plen = len;
+	*rx_dma = xfer->rx_dma + xfer->len - *plen;
+	*tx_dma = xfer->tx_dma + xfer->len - *plen;
+	if (*plen > master->max_dma_len)
+		*plen = master->max_dma_len;
 }
 
 static int atmel_spi_set_xfer_speed(struct atmel_spi *as,
@@ -864,7 +811,7 @@ static int atmel_spi_set_xfer_speed(struct atmel_spi *as,
 	unsigned long		bus_hz;
 
 	/* v1 chips start out at half the peripheral bus speed. */
-	bus_hz = clk_get_rate(as->clk);
+	bus_hz = as->spi_clk;
 	if (!atmel_spi_is_v2(as))
 		bus_hz /= 2;
 
@@ -1025,16 +972,12 @@ atmel_spi_pump_single_data(struct atmel_spi *as, struct spi_transfer *xfer)
 	u16		*rxp16;
 	unsigned long	xfer_pos = xfer->len - as->current_remaining_bytes;
 
-	if (xfer->rx_buf) {
-		if (xfer->bits_per_word > 8) {
-			rxp16 = (u16 *)(((u8 *)xfer->rx_buf) + xfer_pos);
-			*rxp16 = spi_readl(as, RDR);
-		} else {
-			rxp = ((u8 *)xfer->rx_buf) + xfer_pos;
-			*rxp = spi_readl(as, RDR);
-		}
+	if (xfer->bits_per_word > 8) {
+		rxp16 = (u16 *)(((u8 *)xfer->rx_buf) + xfer_pos);
+		*rxp16 = spi_readl(as, RDR);
 	} else {
-		spi_readl(as, RDR);
+		rxp = ((u8 *)xfer->rx_buf) + xfer_pos;
+		*rxp = spi_readl(as, RDR);
 	}
 	if (xfer->bits_per_word > 8) {
 		if (as->current_remaining_bytes > 2)
@@ -1073,12 +1016,10 @@ atmel_spi_pump_fifo_data(struct atmel_spi *as, struct spi_transfer *xfer)
 	/* Read data */
 	while (num_data) {
 		rd = spi_readl(as, RDR);
-		if (xfer->rx_buf) {
-			if (xfer->bits_per_word > 8)
-				*words++ = rd;
-			else
-				*bytes++ = rd;
-		}
+		if (xfer->bits_per_word > 8)
+			*words++ = rd;
+		else
+			*bytes++ = rd;
 		num_data--;
 	}
 }
@@ -1204,7 +1145,6 @@ static int atmel_spi_setup(struct spi_device *spi)
 	u32			csr;
 	unsigned int		bits = spi->bits_per_word;
 	unsigned int		npcs_pin;
-	int			ret;
 
 	as = spi_master_get_devdata(spi->master);
 
@@ -1247,16 +1187,9 @@ static int atmel_spi_setup(struct spi_device *spi)
 		if (!asd)
 			return -ENOMEM;
 
-		if (as->use_cs_gpios) {
-			ret = gpio_request(npcs_pin, dev_name(&spi->dev));
-			if (ret) {
-				kfree(asd);
-				return ret;
-			}
-
+		if (as->use_cs_gpios)
 			gpio_direction_output(npcs_pin,
 					      !(spi->mode & SPI_CS_HIGH));
-		}
 
 		asd->npcs_pin = npcs_pin;
 		spi->controller_state = asd;
@@ -1307,7 +1240,7 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 	 * better fault reporting.
 	 */
 	if ((!msg->is_dma_mapped)
-		&& (atmel_spi_use_dma(as, xfer)	|| as->use_pdc)) {
+		&& as->use_pdc) {
 		if (atmel_spi_dma_map_xfer(as, xfer) < 0)
 			return -ENOMEM;
 	}
@@ -1380,11 +1313,11 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 			spi_readl(as, SR);
 
 		} else if (atmel_spi_use_dma(as, xfer)) {
-			atmel_spi_stop_dma(as);
+			atmel_spi_stop_dma(master);
 		}
 
 		if (!msg->is_dma_mapped
-			&& (atmel_spi_use_dma(as, xfer) || as->use_pdc))
+			&& as->use_pdc)
 			atmel_spi_dma_unmap_xfer(master, xfer);
 
 		return 0;
@@ -1395,7 +1328,7 @@ static int atmel_spi_one_transfer(struct spi_master *master,
 	}
 
 	if (!msg->is_dma_mapped
-		&& (atmel_spi_use_dma(as, xfer) || as->use_pdc))
+		&& as->use_pdc)
 		atmel_spi_dma_unmap_xfer(master, xfer);
 
 	if (xfer->delay_usecs)
@@ -1471,13 +1404,11 @@ static int atmel_spi_transfer_one_message(struct spi_master *master,
 static void atmel_spi_cleanup(struct spi_device *spi)
 {
 	struct atmel_spi_device	*asd = spi->controller_state;
-	unsigned		gpio = (unsigned long) spi->controller_data;
 
 	if (!asd)
 		return;
 
 	spi->controller_state = NULL;
-	gpio_free(gpio);
 	kfree(asd);
 }
 
@@ -1499,6 +1430,39 @@ static void atmel_get_caps(struct atmel_spi *as)
 }
 
 /*-------------------------------------------------------------------------*/
+static int atmel_spi_gpio_cs(struct platform_device *pdev)
+{
+	struct spi_master	*master = platform_get_drvdata(pdev);
+	struct atmel_spi	*as = spi_master_get_devdata(master);
+	struct device_node	*np = master->dev.of_node;
+	int			i;
+	int			ret = 0;
+	int			nb = 0;
+
+	if (!as->use_cs_gpios)
+		return 0;
+
+	if (!np)
+		return 0;
+
+	nb = of_gpio_named_count(np, "cs-gpios");
+	for (i = 0; i < nb; i++) {
+		int cs_gpio = of_get_named_gpio(pdev->dev.of_node,
+						"cs-gpios", i);
+
+		if (cs_gpio == -EPROBE_DEFER)
+			return cs_gpio;
+
+		if (gpio_is_valid(cs_gpio)) {
+			ret = devm_gpio_request(&pdev->dev, cs_gpio,
+						dev_name(&pdev->dev));
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
 
 static int atmel_spi_probe(struct platform_device *pdev)
 {
@@ -1537,29 +1501,23 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = master->dev.of_node ? 0 : 4;
 	master->setup = atmel_spi_setup;
+	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
 	master->transfer_one_message = atmel_spi_transfer_one_message;
 	master->cleanup = atmel_spi_cleanup;
 	master->auto_runtime_pm = true;
+	master->max_dma_len = SPI_MAX_DMA_XFER;
+	master->can_dma = atmel_spi_can_dma;
 	platform_set_drvdata(pdev, master);
 
 	as = spi_master_get_devdata(master);
 
-	/*
-	 * Scratch buffer is used for throwaway rx and tx data.
-	 * It's coherent to minimize dcache pollution.
-	 */
-	as->buffer = dma_alloc_coherent(&pdev->dev, BUFFER_SIZE,
-					&as->buffer_dma, GFP_KERNEL);
-	if (!as->buffer)
-		goto out_free;
-
 	spin_lock_init(&as->lock);
 
 	as->pdev = pdev;
 	as->regs = devm_ioremap_resource(&pdev->dev, regs);
 	if (IS_ERR(as->regs)) {
 		ret = PTR_ERR(as->regs);
-		goto out_free_buffer;
+		goto out_unmap_regs;
 	}
 	as->phybase = regs->start;
 	as->irq = irq;
@@ -1577,14 +1535,19 @@ static int atmel_spi_probe(struct platform_device *pdev)
 		master->num_chipselect = 4;
 	}
 
+	ret = atmel_spi_gpio_cs(pdev);
+	if (ret)
+		goto out_unmap_regs;
+
 	as->use_dma = false;
 	as->use_pdc = false;
 	if (as->caps.has_dma_support) {
-		ret = atmel_spi_configure_dma(as);
-		if (ret == 0)
+		ret = atmel_spi_configure_dma(master, as);
+		if (ret == 0) {
 			as->use_dma = true;
-		else if (ret == -EPROBE_DEFER)
+		} else if (ret == -EPROBE_DEFER) {
 			return ret;
+		}
 	} else {
 		as->use_pdc = true;
 	}
@@ -1606,6 +1569,9 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	ret = clk_prepare_enable(clk);
 	if (ret)
 		goto out_free_irq;
+
+	as->spi_clk = clk_get_rate(clk);
+
 	spi_writel(as, CR, SPI_BIT(SWRST));
 	spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
 	if (as->caps.has_wdrbt) {
@@ -1626,10 +1592,6 @@ static int atmel_spi_probe(struct platform_device *pdev)
 		spi_writel(as, CR, SPI_BIT(FIFOEN));
 	}
 
-	/* go! */
-	dev_info(&pdev->dev, "Atmel SPI Controller at 0x%08lx (irq %d)\n",
-			(unsigned long)regs->start, irq);
-
 	pm_runtime_set_autosuspend_delay(&pdev->dev, AUTOSUSPEND_TIMEOUT);
 	pm_runtime_use_autosuspend(&pdev->dev);
 	pm_runtime_set_active(&pdev->dev);
@@ -1639,6 +1601,10 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	if (ret)
 		goto out_free_dma;
 
+	/* go! */
+	dev_info(&pdev->dev, "Atmel SPI Controller at 0x%08lx (irq %d)\n",
+			(unsigned long)regs->start, irq);
+
 	return 0;
 
 out_free_dma:
@@ -1646,16 +1612,13 @@ static int atmel_spi_probe(struct platform_device *pdev)
 	pm_runtime_set_suspended(&pdev->dev);
 
 	if (as->use_dma)
-		atmel_spi_release_dma(as);
+		atmel_spi_release_dma(master);
 
 	spi_writel(as, CR, SPI_BIT(SWRST));
 	spi_writel(as, CR, SPI_BIT(SWRST)); /* AT91SAM9263 Rev B workaround */
 	clk_disable_unprepare(clk);
 out_free_irq:
 out_unmap_regs:
-out_free_buffer:
-	dma_free_coherent(&pdev->dev, BUFFER_SIZE, as->buffer,
-			as->buffer_dma);
 out_free:
 	spi_master_put(master);
 	return ret;
@@ -1671,8 +1634,8 @@ static int atmel_spi_remove(struct platform_device *pdev)
 	/* reset the hardware and block queue progress */
 	spin_lock_irq(&as->lock);
 	if (as->use_dma) {
-		atmel_spi_stop_dma(as);
-		atmel_spi_release_dma(as);
+		atmel_spi_stop_dma(master);
+		atmel_spi_release_dma(master);
 	}
 
 	spi_writel(as, CR, SPI_BIT(SWRST));
@@ -1680,9 +1643,6 @@ static int atmel_spi_remove(struct platform_device *pdev)
 	spi_readl(as, SR);
 	spin_unlock_irq(&as->lock);
 
-	dma_free_coherent(&pdev->dev, BUFFER_SIZE, as->buffer,
-			as->buffer_dma);
-
 	clk_disable_unprepare(as->clk);
 
 	pm_runtime_put_noidle(&pdev->dev);
diff --git a/drivers/spi/spi-axi-spi-engine.c b/drivers/spi/spi-axi-spi-engine.c
index 2b1456e5e2219c19cf5ab83c2b2cd61d987e118f..319225d7e761b0066a062b017c6e6f2104447859 100644
--- a/drivers/spi/spi-axi-spi-engine.c
+++ b/drivers/spi/spi-axi-spi-engine.c
@@ -574,6 +574,7 @@ static const struct of_device_id spi_engine_match_table[] = {
 	{ .compatible = "adi,axi-spi-engine-1.00.a" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, spi_engine_match_table);
 
 static struct platform_driver spi_engine_driver = {
 	.probe = spi_engine_probe,
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
index 27960e46135de0d79ce0ee61946a0095ccf082f1..b715a26a91484fb695088459d9249b367b334fe1 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw.c
@@ -502,6 +502,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
 	master->handle_err = dw_spi_handle_err;
 	master->max_speed_hz = dws->max_freq;
 	master->dev.of_node = dev->of_node;
+	master->flags = SPI_MASTER_GPIO_SS;
 
 	/* Basic HW init */
 	spi_hw_init(dev, dws);
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index a67b0ff6a362380e4f62382c12c557f98a127947..14c8e7ce1913b0992ef8d9765c983b86b19aa573 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -15,6 +15,8 @@
 
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
@@ -40,6 +42,7 @@
 #define TRAN_STATE_WORD_ODD_NUM	0x04
 
 #define DSPI_FIFO_SIZE			4
+#define DSPI_DMA_BUFSIZE		(DSPI_FIFO_SIZE * 1024)
 
 #define SPI_MCR		0x00
 #define SPI_MCR_MASTER		(1 << 31)
@@ -72,6 +75,11 @@
 #define SPI_SR_TCFQF		0x80000000
 #define SPI_SR_CLEAR		0xdaad0000
 
+#define SPI_RSER_TFFFE		BIT(25)
+#define SPI_RSER_TFFFD		BIT(24)
+#define SPI_RSER_RFDFE		BIT(17)
+#define SPI_RSER_RFDFD		BIT(16)
+
 #define SPI_RSER		0x30
 #define SPI_RSER_EOQFE		0x10000000
 #define SPI_RSER_TCFQE		0x80000000
@@ -109,6 +117,8 @@
 
 #define SPI_TCR_TCNT_MAX	0x10000
 
+#define DMA_COMPLETION_TIMEOUT	msecs_to_jiffies(3000)
+
 struct chip_data {
 	u32 mcr_val;
 	u32 ctar_val;
@@ -118,6 +128,7 @@ struct chip_data {
 enum dspi_trans_mode {
 	DSPI_EOQ_MODE = 0,
 	DSPI_TCFQ_MODE,
+	DSPI_DMA_MODE,
 };
 
 struct fsl_dspi_devtype_data {
@@ -126,7 +137,7 @@ struct fsl_dspi_devtype_data {
 };
 
 static const struct fsl_dspi_devtype_data vf610_data = {
-	.trans_mode = DSPI_EOQ_MODE,
+	.trans_mode = DSPI_DMA_MODE,
 	.max_clock_factor = 2,
 };
 
@@ -140,6 +151,23 @@ static const struct fsl_dspi_devtype_data ls2085a_data = {
 	.max_clock_factor = 8,
 };
 
+struct fsl_dspi_dma {
+	/* Length of transfer in words of DSPI_FIFO_SIZE */
+	u32 curr_xfer_len;
+
+	u32 *tx_dma_buf;
+	struct dma_chan *chan_tx;
+	dma_addr_t tx_dma_phys;
+	struct completion cmd_tx_complete;
+	struct dma_async_tx_descriptor *tx_desc;
+
+	u32 *rx_dma_buf;
+	struct dma_chan *chan_rx;
+	dma_addr_t rx_dma_phys;
+	struct completion cmd_rx_complete;
+	struct dma_async_tx_descriptor *rx_desc;
+};
+
 struct fsl_dspi {
 	struct spi_master	*master;
 	struct platform_device	*pdev;
@@ -166,8 +194,11 @@ struct fsl_dspi {
 	u32			waitflags;
 
 	u32			spi_tcnt;
+	struct fsl_dspi_dma	*dma;
 };
 
+static u32 dspi_data_to_pushr(struct fsl_dspi *dspi, int tx_word);
+
 static inline int is_double_byte_mode(struct fsl_dspi *dspi)
 {
 	unsigned int val;
@@ -177,6 +208,255 @@ static inline int is_double_byte_mode(struct fsl_dspi *dspi)
 	return ((val & SPI_FRAME_BITS_MASK) == SPI_FRAME_BITS(8)) ? 0 : 1;
 }
 
+static void dspi_tx_dma_callback(void *arg)
+{
+	struct fsl_dspi *dspi = arg;
+	struct fsl_dspi_dma *dma = dspi->dma;
+
+	complete(&dma->cmd_tx_complete);
+}
+
+static void dspi_rx_dma_callback(void *arg)
+{
+	struct fsl_dspi *dspi = arg;
+	struct fsl_dspi_dma *dma = dspi->dma;
+	int rx_word;
+	int i;
+	u16 d;
+
+	rx_word = is_double_byte_mode(dspi);
+
+	if (!(dspi->dataflags & TRAN_STATE_RX_VOID)) {
+		for (i = 0; i < dma->curr_xfer_len; i++) {
+			d = dspi->dma->rx_dma_buf[i];
+			rx_word ? (*(u16 *)dspi->rx = d) :
+						(*(u8 *)dspi->rx = d);
+			dspi->rx += rx_word + 1;
+		}
+	}
+
+	complete(&dma->cmd_rx_complete);
+}
+
+static int dspi_next_xfer_dma_submit(struct fsl_dspi *dspi)
+{
+	struct fsl_dspi_dma *dma = dspi->dma;
+	struct device *dev = &dspi->pdev->dev;
+	int time_left;
+	int tx_word;
+	int i;
+
+	tx_word = is_double_byte_mode(dspi);
+
+	for (i = 0; i < dma->curr_xfer_len; i++) {
+		dspi->dma->tx_dma_buf[i] = dspi_data_to_pushr(dspi, tx_word);
+		if ((dspi->cs_change) && (!dspi->len))
+			dspi->dma->tx_dma_buf[i] &= ~SPI_PUSHR_CONT;
+	}
+
+	dma->tx_desc = dmaengine_prep_slave_single(dma->chan_tx,
+					dma->tx_dma_phys,
+					dma->curr_xfer_len *
+					DMA_SLAVE_BUSWIDTH_4_BYTES,
+					DMA_MEM_TO_DEV,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!dma->tx_desc) {
+		dev_err(dev, "Not able to get desc for DMA xfer\n");
+		return -EIO;
+	}
+
+	dma->tx_desc->callback = dspi_tx_dma_callback;
+	dma->tx_desc->callback_param = dspi;
+	if (dma_submit_error(dmaengine_submit(dma->tx_desc))) {
+		dev_err(dev, "DMA submit failed\n");
+		return -EINVAL;
+	}
+
+	dma->rx_desc = dmaengine_prep_slave_single(dma->chan_rx,
+					dma->rx_dma_phys,
+					dma->curr_xfer_len *
+					DMA_SLAVE_BUSWIDTH_4_BYTES,
+					DMA_DEV_TO_MEM,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+	if (!dma->rx_desc) {
+		dev_err(dev, "Not able to get desc for DMA xfer\n");
+		return -EIO;
+	}
+
+	dma->rx_desc->callback = dspi_rx_dma_callback;
+	dma->rx_desc->callback_param = dspi;
+	if (dma_submit_error(dmaengine_submit(dma->rx_desc))) {
+		dev_err(dev, "DMA submit failed\n");
+		return -EINVAL;
+	}
+
+	reinit_completion(&dspi->dma->cmd_rx_complete);
+	reinit_completion(&dspi->dma->cmd_tx_complete);
+
+	dma_async_issue_pending(dma->chan_rx);
+	dma_async_issue_pending(dma->chan_tx);
+
+	time_left = wait_for_completion_timeout(&dspi->dma->cmd_tx_complete,
+					DMA_COMPLETION_TIMEOUT);
+	if (time_left == 0) {
+		dev_err(dev, "DMA tx timeout\n");
+		dmaengine_terminate_all(dma->chan_tx);
+		dmaengine_terminate_all(dma->chan_rx);
+		return -ETIMEDOUT;
+	}
+
+	time_left = wait_for_completion_timeout(&dspi->dma->cmd_rx_complete,
+					DMA_COMPLETION_TIMEOUT);
+	if (time_left == 0) {
+		dev_err(dev, "DMA rx timeout\n");
+		dmaengine_terminate_all(dma->chan_tx);
+		dmaengine_terminate_all(dma->chan_rx);
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int dspi_dma_xfer(struct fsl_dspi *dspi)
+{
+	struct fsl_dspi_dma *dma = dspi->dma;
+	struct device *dev = &dspi->pdev->dev;
+	int curr_remaining_bytes;
+	int bytes_per_buffer;
+	int word = 1;
+	int ret = 0;
+
+	if (is_double_byte_mode(dspi))
+		word = 2;
+	curr_remaining_bytes = dspi->len;
+	bytes_per_buffer = DSPI_DMA_BUFSIZE / DSPI_FIFO_SIZE;
+	while (curr_remaining_bytes) {
+		/* Check if current transfer fits the DMA buffer */
+		dma->curr_xfer_len = curr_remaining_bytes / word;
+		if (dma->curr_xfer_len > bytes_per_buffer)
+			dma->curr_xfer_len = bytes_per_buffer;
+
+		ret = dspi_next_xfer_dma_submit(dspi);
+		if (ret) {
+			dev_err(dev, "DMA transfer failed\n");
+			goto exit;
+
+		} else {
+			curr_remaining_bytes -= dma->curr_xfer_len * word;
+			if (curr_remaining_bytes < 0)
+				curr_remaining_bytes = 0;
+		}
+	}
+
+exit:
+	return ret;
+}
+
+static int dspi_request_dma(struct fsl_dspi *dspi, phys_addr_t phy_addr)
+{
+	struct fsl_dspi_dma *dma;
+	struct dma_slave_config cfg;
+	struct device *dev = &dspi->pdev->dev;
+	int ret;
+
+	dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL);
+	if (!dma)
+		return -ENOMEM;
+
+	dma->chan_rx = dma_request_slave_channel(dev, "rx");
+	if (!dma->chan_rx) {
+		dev_err(dev, "rx dma channel not available\n");
+		ret = -ENODEV;
+		return ret;
+	}
+
+	dma->chan_tx = dma_request_slave_channel(dev, "tx");
+	if (!dma->chan_tx) {
+		dev_err(dev, "tx dma channel not available\n");
+		ret = -ENODEV;
+		goto err_tx_channel;
+	}
+
+	dma->tx_dma_buf = dma_alloc_coherent(dev, DSPI_DMA_BUFSIZE,
+					&dma->tx_dma_phys, GFP_KERNEL);
+	if (!dma->tx_dma_buf) {
+		ret = -ENOMEM;
+		goto err_tx_dma_buf;
+	}
+
+	dma->rx_dma_buf = dma_alloc_coherent(dev, DSPI_DMA_BUFSIZE,
+					&dma->rx_dma_phys, GFP_KERNEL);
+	if (!dma->rx_dma_buf) {
+		ret = -ENOMEM;
+		goto err_rx_dma_buf;
+	}
+
+	cfg.src_addr = phy_addr + SPI_POPR;
+	cfg.dst_addr = phy_addr + SPI_PUSHR;
+	cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	cfg.src_maxburst = 1;
+	cfg.dst_maxburst = 1;
+
+	cfg.direction = DMA_DEV_TO_MEM;
+	ret = dmaengine_slave_config(dma->chan_rx, &cfg);
+	if (ret) {
+		dev_err(dev, "can't configure rx dma channel\n");
+		ret = -EINVAL;
+		goto err_slave_config;
+	}
+
+	cfg.direction = DMA_MEM_TO_DEV;
+	ret = dmaengine_slave_config(dma->chan_tx, &cfg);
+	if (ret) {
+		dev_err(dev, "can't configure tx dma channel\n");
+		ret = -EINVAL;
+		goto err_slave_config;
+	}
+
+	dspi->dma = dma;
+	init_completion(&dma->cmd_tx_complete);
+	init_completion(&dma->cmd_rx_complete);
+
+	return 0;
+
+err_slave_config:
+	dma_free_coherent(dev, DSPI_DMA_BUFSIZE,
+			dma->rx_dma_buf, dma->rx_dma_phys);
+err_rx_dma_buf:
+	dma_free_coherent(dev, DSPI_DMA_BUFSIZE,
+			dma->tx_dma_buf, dma->tx_dma_phys);
+err_tx_dma_buf:
+	dma_release_channel(dma->chan_tx);
+err_tx_channel:
+	dma_release_channel(dma->chan_rx);
+
+	devm_kfree(dev, dma);
+	dspi->dma = NULL;
+
+	return ret;
+}
+
+static void dspi_release_dma(struct fsl_dspi *dspi)
+{
+	struct fsl_dspi_dma *dma = dspi->dma;
+	struct device *dev = &dspi->pdev->dev;
+
+	if (dma) {
+		if (dma->chan_tx) {
+			dma_unmap_single(dev, dma->tx_dma_phys,
+					DSPI_DMA_BUFSIZE, DMA_TO_DEVICE);
+			dma_release_channel(dma->chan_tx);
+		}
+
+		if (dma->chan_rx) {
+			dma_unmap_single(dev, dma->rx_dma_phys,
+					DSPI_DMA_BUFSIZE, DMA_FROM_DEVICE);
+			dma_release_channel(dma->chan_rx);
+		}
+	}
+}
+
 static void hz_to_spi_baud(char *pbr, char *br, int speed_hz,
 		unsigned long clkrate)
 {
@@ -425,6 +705,12 @@ static int dspi_transfer_one_message(struct spi_master *master,
 			regmap_write(dspi->regmap, SPI_RSER, SPI_RSER_TCFQE);
 			dspi_tcfq_write(dspi);
 			break;
+		case DSPI_DMA_MODE:
+			regmap_write(dspi->regmap, SPI_RSER,
+				SPI_RSER_TFFFE | SPI_RSER_TFFFD |
+				SPI_RSER_RFDFE | SPI_RSER_RFDFD);
+			status = dspi_dma_xfer(dspi);
+			break;
 		default:
 			dev_err(&dspi->pdev->dev, "unsupported trans_mode %u\n",
 				trans_mode);
@@ -432,9 +718,13 @@ static int dspi_transfer_one_message(struct spi_master *master,
 			goto out;
 		}
 
-		if (wait_event_interruptible(dspi->waitq, dspi->waitflags))
-			dev_err(&dspi->pdev->dev, "wait transfer complete fail!\n");
-		dspi->waitflags = 0;
+		if (trans_mode != DSPI_DMA_MODE) {
+			if (wait_event_interruptible(dspi->waitq,
+						dspi->waitflags))
+				dev_err(&dspi->pdev->dev,
+					"wait transfer complete fail!\n");
+			dspi->waitflags = 0;
+		}
 
 		if (transfer->delay_usecs)
 			udelay(transfer->delay_usecs);
@@ -740,6 +1030,13 @@ static int dspi_probe(struct platform_device *pdev)
 	if (ret)
 		goto out_master_put;
 
+	if (dspi->devtype_data->trans_mode == DSPI_DMA_MODE) {
+		if (dspi_request_dma(dspi, res->start)) {
+			dev_err(&pdev->dev, "can't get dma channels\n");
+			goto out_clk_put;
+		}
+	}
+
 	master->max_speed_hz =
 		clk_get_rate(dspi->clk) / dspi->devtype_data->max_clock_factor;
 
@@ -768,6 +1065,7 @@ static int dspi_remove(struct platform_device *pdev)
 	struct fsl_dspi *dspi = spi_master_get_devdata(master);
 
 	/* Disconnect from the SPI framework */
+	dspi_release_dma(dspi);
 	clk_disable_unprepare(dspi->clk);
 	spi_unregister_master(dspi->master);
 
diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index 2c175b9495f7ee102a0d70a96f42fa730c019247..1d332e23f6ede6874e3a7c76329a9ab0ff67a751 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -23,8 +23,6 @@
 #include <linux/pm_runtime.h>
 #include <sysdev/fsl_soc.h>
 
-#include "spi-fsl-lib.h"
-
 /* eSPI Controller registers */
 #define ESPI_SPMODE	0x00	/* eSPI mode register */
 #define ESPI_SPIE	0x04	/* eSPI event register */
@@ -54,8 +52,11 @@
 #define CSMODE_AFT(x)		((x) << 8)
 #define CSMODE_CG(x)		((x) << 3)
 
+#define FSL_ESPI_FIFO_SIZE	32
+#define FSL_ESPI_RXTHR		15
+
 /* Default mode/csmode for eSPI controller */
-#define SPMODE_INIT_VAL (SPMODE_TXTHR(4) | SPMODE_RXTHR(3))
+#define SPMODE_INIT_VAL (SPMODE_TXTHR(4) | SPMODE_RXTHR(FSL_ESPI_RXTHR))
 #define CSMODE_INIT_VAL (CSMODE_POL_1 | CSMODE_BEF(0) \
 		| CSMODE_AFT(0) | CSMODE_CG(1))
 
@@ -90,219 +91,342 @@
 
 #define AUTOSUSPEND_TIMEOUT 2000
 
-static inline u32 fsl_espi_read_reg(struct mpc8xxx_spi *mspi, int offset)
+struct fsl_espi {
+	struct device *dev;
+	void __iomem *reg_base;
+
+	struct list_head *m_transfers;
+	struct spi_transfer *tx_t;
+	unsigned int tx_pos;
+	bool tx_done;
+	struct spi_transfer *rx_t;
+	unsigned int rx_pos;
+	bool rx_done;
+
+	bool swab;
+	unsigned int rxskip;
+
+	spinlock_t lock;
+
+	u32 spibrg;             /* SPIBRG input clock */
+
+	struct completion done;
+};
+
+struct fsl_espi_cs {
+	u32 hw_mode;
+};
+
+static inline u32 fsl_espi_read_reg(struct fsl_espi *espi, int offset)
 {
-	return ioread32be(mspi->reg_base + offset);
+	return ioread32be(espi->reg_base + offset);
 }
 
-static inline u8 fsl_espi_read_reg8(struct mpc8xxx_spi *mspi, int offset)
+static inline u16 fsl_espi_read_reg16(struct fsl_espi *espi, int offset)
 {
-	return ioread8(mspi->reg_base + offset);
+	return ioread16be(espi->reg_base + offset);
 }
 
-static inline void fsl_espi_write_reg(struct mpc8xxx_spi *mspi, int offset,
-				      u32 val)
+static inline u8 fsl_espi_read_reg8(struct fsl_espi *espi, int offset)
 {
-	iowrite32be(val, mspi->reg_base + offset);
+	return ioread8(espi->reg_base + offset);
 }
 
-static inline void fsl_espi_write_reg8(struct mpc8xxx_spi *mspi, int offset,
-				       u8 val)
+static inline void fsl_espi_write_reg(struct fsl_espi *espi, int offset,
+				      u32 val)
 {
-	iowrite8(val, mspi->reg_base + offset);
+	iowrite32be(val, espi->reg_base + offset);
 }
 
-static void fsl_espi_copy_to_buf(struct spi_message *m,
-				 struct mpc8xxx_spi *mspi)
+static inline void fsl_espi_write_reg16(struct fsl_espi *espi, int offset,
+					u16 val)
 {
-	struct spi_transfer *t;
-	u8 *buf = mspi->local_buf;
-
-	list_for_each_entry(t, &m->transfers, transfer_list) {
-		if (t->tx_buf)
-			memcpy(buf, t->tx_buf, t->len);
-		else
-			memset(buf, 0, t->len);
-		buf += t->len;
-	}
+	iowrite16be(val, espi->reg_base + offset);
 }
 
-static void fsl_espi_copy_from_buf(struct spi_message *m,
-				   struct mpc8xxx_spi *mspi)
+static inline void fsl_espi_write_reg8(struct fsl_espi *espi, int offset,
+				       u8 val)
 {
-	struct spi_transfer *t;
-	u8 *buf = mspi->local_buf;
-
-	list_for_each_entry(t, &m->transfers, transfer_list) {
-		if (t->rx_buf)
-			memcpy(t->rx_buf, buf, t->len);
-		buf += t->len;
-	}
+	iowrite8(val, espi->reg_base + offset);
 }
 
 static int fsl_espi_check_message(struct spi_message *m)
 {
-	struct mpc8xxx_spi *mspi = spi_master_get_devdata(m->spi->master);
+	struct fsl_espi *espi = spi_master_get_devdata(m->spi->master);
 	struct spi_transfer *t, *first;
 
 	if (m->frame_length > SPCOM_TRANLEN_MAX) {
-		dev_err(mspi->dev, "message too long, size is %u bytes\n",
+		dev_err(espi->dev, "message too long, size is %u bytes\n",
 			m->frame_length);
 		return -EMSGSIZE;
 	}
 
 	first = list_first_entry(&m->transfers, struct spi_transfer,
 				 transfer_list);
+
 	list_for_each_entry(t, &m->transfers, transfer_list) {
 		if (first->bits_per_word != t->bits_per_word ||
 		    first->speed_hz != t->speed_hz) {
-			dev_err(mspi->dev, "bits_per_word/speed_hz should be the same for all transfers\n");
+			dev_err(espi->dev, "bits_per_word/speed_hz should be the same for all transfers\n");
 			return -EINVAL;
 		}
 	}
 
+	/* ESPI supports MSB-first transfers for word size 8 / 16 only */
+	if (!(m->spi->mode & SPI_LSB_FIRST) && first->bits_per_word != 8 &&
+	    first->bits_per_word != 16) {
+		dev_err(espi->dev,
+			"MSB-first transfer not supported for wordsize %u\n",
+			first->bits_per_word);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
-static void fsl_espi_change_mode(struct spi_device *spi)
+static unsigned int fsl_espi_check_rxskip_mode(struct spi_message *m)
 {
-	struct mpc8xxx_spi *mspi = spi_master_get_devdata(spi->master);
-	struct spi_mpc8xxx_cs *cs = spi->controller_state;
-	u32 tmp;
-	unsigned long flags;
-
-	/* Turn off IRQs locally to minimize time that SPI is disabled. */
-	local_irq_save(flags);
-
-	/* Turn off SPI unit prior changing mode */
-	tmp = fsl_espi_read_reg(mspi, ESPI_SPMODE);
-	fsl_espi_write_reg(mspi, ESPI_SPMODE, tmp & ~SPMODE_ENABLE);
-	fsl_espi_write_reg(mspi, ESPI_SPMODEx(spi->chip_select),
-			      cs->hw_mode);
-	fsl_espi_write_reg(mspi, ESPI_SPMODE, tmp);
-
-	local_irq_restore(flags);
+	struct spi_transfer *t;
+	unsigned int i = 0, rxskip = 0;
+
+	/*
+	 * prerequisites for ESPI rxskip mode:
+	 * - message has two transfers
+	 * - first transfer is a write and second is a read
+	 *
+	 * In addition the current low-level transfer mechanism requires
+	 * that the rxskip bytes fit into the TX FIFO. Else the transfer
+	 * would hang because after the first FSL_ESPI_FIFO_SIZE bytes
+	 * the TX FIFO isn't re-filled.
+	 */
+	list_for_each_entry(t, &m->transfers, transfer_list) {
+		if (i == 0) {
+			if (!t->tx_buf || t->rx_buf ||
+			    t->len > FSL_ESPI_FIFO_SIZE)
+				return 0;
+			rxskip = t->len;
+		} else if (i == 1) {
+			if (t->tx_buf || !t->rx_buf)
+				return 0;
+		}
+		i++;
+	}
+
+	return i == 2 ? rxskip : 0;
 }
 
-static u32 fsl_espi_tx_buf_lsb(struct mpc8xxx_spi *mpc8xxx_spi)
+static void fsl_espi_fill_tx_fifo(struct fsl_espi *espi, u32 events)
 {
-	u32 data;
-	u16 data_h;
-	u16 data_l;
-	const u32 *tx = mpc8xxx_spi->tx;
+	u32 tx_fifo_avail;
+	unsigned int tx_left;
+	const void *tx_buf;
+
+	/* if events is zero transfer has not started and tx fifo is empty */
+	tx_fifo_avail = events ? SPIE_TXCNT(events) :  FSL_ESPI_FIFO_SIZE;
+start:
+	tx_left = espi->tx_t->len - espi->tx_pos;
+	tx_buf = espi->tx_t->tx_buf;
+	while (tx_fifo_avail >= min(4U, tx_left) && tx_left) {
+		if (tx_left >= 4) {
+			if (!tx_buf)
+				fsl_espi_write_reg(espi, ESPI_SPITF, 0);
+			else if (espi->swab)
+				fsl_espi_write_reg(espi, ESPI_SPITF,
+					swahb32p(tx_buf + espi->tx_pos));
+			else
+				fsl_espi_write_reg(espi, ESPI_SPITF,
+					*(u32 *)(tx_buf + espi->tx_pos));
+			espi->tx_pos += 4;
+			tx_left -= 4;
+			tx_fifo_avail -= 4;
+		} else if (tx_left >= 2 && tx_buf && espi->swab) {
+			fsl_espi_write_reg16(espi, ESPI_SPITF,
+					swab16p(tx_buf + espi->tx_pos));
+			espi->tx_pos += 2;
+			tx_left -= 2;
+			tx_fifo_avail -= 2;
+		} else {
+			if (!tx_buf)
+				fsl_espi_write_reg8(espi, ESPI_SPITF, 0);
+			else
+				fsl_espi_write_reg8(espi, ESPI_SPITF,
+					*(u8 *)(tx_buf + espi->tx_pos));
+			espi->tx_pos += 1;
+			tx_left -= 1;
+			tx_fifo_avail -= 1;
+		}
+	}
 
-	if (!tx)
-		return 0;
+	if (!tx_left) {
+		/* Last transfer finished, in rxskip mode only one is needed */
+		if (list_is_last(&espi->tx_t->transfer_list,
+		    espi->m_transfers) || espi->rxskip) {
+			espi->tx_done = true;
+			return;
+		}
+		espi->tx_t = list_next_entry(espi->tx_t, transfer_list);
+		espi->tx_pos = 0;
+		/* continue with next transfer if tx fifo is not full */
+		if (tx_fifo_avail)
+			goto start;
+	}
+}
 
-	data = *tx++ << mpc8xxx_spi->tx_shift;
-	data_l = data & 0xffff;
-	data_h = (data >> 16) & 0xffff;
-	swab16s(&data_l);
-	swab16s(&data_h);
-	data = data_h | data_l;
+static void fsl_espi_read_rx_fifo(struct fsl_espi *espi, u32 events)
+{
+	u32 rx_fifo_avail = SPIE_RXCNT(events);
+	unsigned int rx_left;
+	void *rx_buf;
+
+start:
+	rx_left = espi->rx_t->len - espi->rx_pos;
+	rx_buf = espi->rx_t->rx_buf;
+	while (rx_fifo_avail >= min(4U, rx_left) && rx_left) {
+		if (rx_left >= 4) {
+			u32 val = fsl_espi_read_reg(espi, ESPI_SPIRF);
+
+			if (rx_buf && espi->swab)
+				*(u32 *)(rx_buf + espi->rx_pos) = swahb32(val);
+			else if (rx_buf)
+				*(u32 *)(rx_buf + espi->rx_pos) = val;
+			espi->rx_pos += 4;
+			rx_left -= 4;
+			rx_fifo_avail -= 4;
+		} else if (rx_left >= 2 && rx_buf && espi->swab) {
+			u16 val = fsl_espi_read_reg16(espi, ESPI_SPIRF);
+
+			*(u16 *)(rx_buf + espi->rx_pos) = swab16(val);
+			espi->rx_pos += 2;
+			rx_left -= 2;
+			rx_fifo_avail -= 2;
+		} else {
+			u8 val = fsl_espi_read_reg8(espi, ESPI_SPIRF);
+
+			if (rx_buf)
+				*(u8 *)(rx_buf + espi->rx_pos) = val;
+			espi->rx_pos += 1;
+			rx_left -= 1;
+			rx_fifo_avail -= 1;
+		}
+	}
 
-	mpc8xxx_spi->tx = tx;
-	return data;
+	if (!rx_left) {
+		if (list_is_last(&espi->rx_t->transfer_list,
+		    espi->m_transfers)) {
+			espi->rx_done = true;
+			return;
+		}
+		espi->rx_t = list_next_entry(espi->rx_t, transfer_list);
+		espi->rx_pos = 0;
+		/* continue with next transfer if rx fifo is not empty */
+		if (rx_fifo_avail)
+			goto start;
+	}
 }
 
 static void fsl_espi_setup_transfer(struct spi_device *spi,
 					struct spi_transfer *t)
 {
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	struct fsl_espi *espi = spi_master_get_devdata(spi->master);
 	int bits_per_word = t ? t->bits_per_word : spi->bits_per_word;
-	u32 hz = t ? t->speed_hz : spi->max_speed_hz;
-	u8 pm;
-	struct spi_mpc8xxx_cs *cs = spi->controller_state;
-
-	cs->rx_shift = 0;
-	cs->tx_shift = 0;
-	cs->get_rx = mpc8xxx_spi_rx_buf_u32;
-	cs->get_tx = mpc8xxx_spi_tx_buf_u32;
-	if (bits_per_word <= 8) {
-		cs->rx_shift = 8 - bits_per_word;
-	} else {
-		cs->rx_shift = 16 - bits_per_word;
-		if (spi->mode & SPI_LSB_FIRST)
-			cs->get_tx = fsl_espi_tx_buf_lsb;
-	}
-
-	mpc8xxx_spi->rx_shift = cs->rx_shift;
-	mpc8xxx_spi->tx_shift = cs->tx_shift;
-	mpc8xxx_spi->get_rx = cs->get_rx;
-	mpc8xxx_spi->get_tx = cs->get_tx;
+	u32 pm, hz = t ? t->speed_hz : spi->max_speed_hz;
+	struct fsl_espi_cs *cs = spi_get_ctldata(spi);
+	u32 hw_mode_old = cs->hw_mode;
 
 	/* mask out bits we are going to set */
 	cs->hw_mode &= ~(CSMODE_LEN(0xF) | CSMODE_DIV16 | CSMODE_PM(0xF));
 
 	cs->hw_mode |= CSMODE_LEN(bits_per_word - 1);
 
-	if ((mpc8xxx_spi->spibrg / hz) > 64) {
+	pm = DIV_ROUND_UP(espi->spibrg, hz * 4) - 1;
+
+	if (pm > 15) {
 		cs->hw_mode |= CSMODE_DIV16;
-		pm = DIV_ROUND_UP(mpc8xxx_spi->spibrg, hz * 16 * 4);
-
-		WARN_ONCE(pm > 33, "%s: Requested speed is too low: %d Hz. "
-			  "Will use %d Hz instead.\n", dev_name(&spi->dev),
-				hz, mpc8xxx_spi->spibrg / (4 * 16 * (32 + 1)));
-		if (pm > 33)
-			pm = 33;
-	} else {
-		pm = DIV_ROUND_UP(mpc8xxx_spi->spibrg, hz * 4);
+		pm = DIV_ROUND_UP(espi->spibrg, hz * 16 * 4) - 1;
 	}
-	if (pm)
-		pm--;
-	if (pm < 2)
-		pm = 2;
 
 	cs->hw_mode |= CSMODE_PM(pm);
 
-	fsl_espi_change_mode(spi);
+	/* don't write the mode register if the mode doesn't change */
+	if (cs->hw_mode != hw_mode_old)
+		fsl_espi_write_reg(espi, ESPI_SPMODEx(spi->chip_select),
+				   cs->hw_mode);
 }
 
 static int fsl_espi_bufs(struct spi_device *spi, struct spi_transfer *t)
 {
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(spi->master);
-	u32 word;
+	struct fsl_espi *espi = spi_master_get_devdata(spi->master);
+	unsigned int rx_len = t->len;
+	u32 mask, spcom;
 	int ret;
 
-	mpc8xxx_spi->len = t->len;
-	mpc8xxx_spi->count = roundup(t->len, 4) / 4;
-
-	mpc8xxx_spi->tx = t->tx_buf;
-	mpc8xxx_spi->rx = t->rx_buf;
-
-	reinit_completion(&mpc8xxx_spi->done);
+	reinit_completion(&espi->done);
 
 	/* Set SPCOM[CS] and SPCOM[TRANLEN] field */
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPCOM,
-		(SPCOM_CS(spi->chip_select) | SPCOM_TRANLEN(t->len - 1)));
+	spcom = SPCOM_CS(spi->chip_select);
+	spcom |= SPCOM_TRANLEN(t->len - 1);
+
+	/* configure RXSKIP mode */
+	if (espi->rxskip) {
+		spcom |= SPCOM_RXSKIP(espi->rxskip);
+		rx_len = t->len - espi->rxskip;
+		if (t->rx_nbits == SPI_NBITS_DUAL)
+			spcom |= SPCOM_DO;
+	}
+
+	fsl_espi_write_reg(espi, ESPI_SPCOM, spcom);
 
-	/* enable rx ints */
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIM, SPIM_RNE);
+	/* enable interrupts */
+	mask = SPIM_DON;
+	if (rx_len > FSL_ESPI_FIFO_SIZE)
+		mask |= SPIM_RXT;
+	fsl_espi_write_reg(espi, ESPI_SPIM, mask);
 
-	/* transmit word */
-	word = mpc8xxx_spi->get_tx(mpc8xxx_spi);
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPITF, word);
+	/* Prevent filling the fifo from getting interrupted */
+	spin_lock_irq(&espi->lock);
+	fsl_espi_fill_tx_fifo(espi, 0);
+	spin_unlock_irq(&espi->lock);
 
 	/* Won't hang up forever, SPI bus sometimes got lost interrupts... */
-	ret = wait_for_completion_timeout(&mpc8xxx_spi->done, 2 * HZ);
+	ret = wait_for_completion_timeout(&espi->done, 2 * HZ);
 	if (ret == 0)
-		dev_err(mpc8xxx_spi->dev,
-			"Transaction hanging up (left %d bytes)\n",
-			mpc8xxx_spi->count);
+		dev_err(espi->dev, "Transfer timed out!\n");
 
 	/* disable rx ints */
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIM, 0);
+	fsl_espi_write_reg(espi, ESPI_SPIM, 0);
 
-	return mpc8xxx_spi->count > 0 ? -EMSGSIZE : 0;
+	return ret == 0 ? -ETIMEDOUT : 0;
 }
 
 static int fsl_espi_trans(struct spi_message *m, struct spi_transfer *trans)
 {
-	struct mpc8xxx_spi *mspi = spi_master_get_devdata(m->spi->master);
+	struct fsl_espi *espi = spi_master_get_devdata(m->spi->master);
 	struct spi_device *spi = m->spi;
 	int ret;
 
-	fsl_espi_copy_to_buf(m, mspi);
+	/* In case of LSB-first and bits_per_word > 8 byte-swap all words */
+	espi->swab = spi->mode & SPI_LSB_FIRST && trans->bits_per_word > 8;
+
+	espi->m_transfers = &m->transfers;
+	espi->tx_t = list_first_entry(&m->transfers, struct spi_transfer,
+				      transfer_list);
+	espi->tx_pos = 0;
+	espi->tx_done = false;
+	espi->rx_t = list_first_entry(&m->transfers, struct spi_transfer,
+				      transfer_list);
+	espi->rx_pos = 0;
+	espi->rx_done = false;
+
+	espi->rxskip = fsl_espi_check_rxskip_mode(m);
+	if (trans->rx_nbits == SPI_NBITS_DUAL && !espi->rxskip) {
+		dev_err(espi->dev, "Dual output mode requires RXSKIP mode!\n");
+		return -EINVAL;
+	}
+
+	/* In RXSKIP mode skip first transfer for reads */
+	if (espi->rxskip)
+		espi->rx_t = list_next_entry(espi->rx_t, transfer_list);
+
 	fsl_espi_setup_transfer(spi, trans);
 
 	ret = fsl_espi_bufs(spi, trans);
@@ -310,19 +434,13 @@ static int fsl_espi_trans(struct spi_message *m, struct spi_transfer *trans)
 	if (trans->delay_usecs)
 		udelay(trans->delay_usecs);
 
-	fsl_espi_setup_transfer(spi, NULL);
-
-	if (!ret)
-		fsl_espi_copy_from_buf(m, mspi);
-
 	return ret;
 }
 
 static int fsl_espi_do_one_msg(struct spi_master *master,
 			       struct spi_message *m)
 {
-	struct mpc8xxx_spi *mspi = spi_master_get_devdata(m->spi->master);
-	unsigned int delay_usecs = 0;
+	unsigned int delay_usecs = 0, rx_nbits = 0;
 	struct spi_transfer *t, trans = {};
 	int ret;
 
@@ -333,6 +451,8 @@ static int fsl_espi_do_one_msg(struct spi_master *master,
 	list_for_each_entry(t, &m->transfers, transfer_list) {
 		if (t->delay_usecs > delay_usecs)
 			delay_usecs = t->delay_usecs;
+		if (t->rx_nbits > rx_nbits)
+			rx_nbits = t->rx_nbits;
 	}
 
 	t = list_first_entry(&m->transfers, struct spi_transfer,
@@ -342,8 +462,7 @@ static int fsl_espi_do_one_msg(struct spi_master *master,
 	trans.speed_hz = t->speed_hz;
 	trans.bits_per_word = t->bits_per_word;
 	trans.delay_usecs = delay_usecs;
-	trans.tx_buf = mspi->local_buf;
-	trans.rx_buf = mspi->local_buf;
+	trans.rx_nbits = rx_nbits;
 
 	if (trans.len)
 		ret = fsl_espi_trans(m, &trans);
@@ -360,12 +479,9 @@ static int fsl_espi_do_one_msg(struct spi_master *master,
 
 static int fsl_espi_setup(struct spi_device *spi)
 {
-	struct mpc8xxx_spi *mpc8xxx_spi;
+	struct fsl_espi *espi;
 	u32 loop_mode;
-	struct spi_mpc8xxx_cs *cs = spi_get_ctldata(spi);
-
-	if (!spi->max_speed_hz)
-		return -EINVAL;
+	struct fsl_espi_cs *cs = spi_get_ctldata(spi);
 
 	if (!cs) {
 		cs = kzalloc(sizeof(*cs), GFP_KERNEL);
@@ -374,12 +490,11 @@ static int fsl_espi_setup(struct spi_device *spi)
 		spi_set_ctldata(spi, cs);
 	}
 
-	mpc8xxx_spi = spi_master_get_devdata(spi->master);
+	espi = spi_master_get_devdata(spi->master);
 
-	pm_runtime_get_sync(mpc8xxx_spi->dev);
+	pm_runtime_get_sync(espi->dev);
 
-	cs->hw_mode = fsl_espi_read_reg(mpc8xxx_spi,
-					   ESPI_SPMODEx(spi->chip_select));
+	cs->hw_mode = fsl_espi_read_reg(espi, ESPI_SPMODEx(spi->chip_select));
 	/* mask out bits we are going to set */
 	cs->hw_mode &= ~(CSMODE_CP_BEGIN_EDGECLK | CSMODE_CI_INACTIVEHIGH
 			 | CSMODE_REV);
@@ -392,115 +507,74 @@ static int fsl_espi_setup(struct spi_device *spi)
 		cs->hw_mode |= CSMODE_REV;
 
 	/* Handle the loop mode */
-	loop_mode = fsl_espi_read_reg(mpc8xxx_spi, ESPI_SPMODE);
+	loop_mode = fsl_espi_read_reg(espi, ESPI_SPMODE);
 	loop_mode &= ~SPMODE_LOOP;
 	if (spi->mode & SPI_LOOP)
 		loop_mode |= SPMODE_LOOP;
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, loop_mode);
+	fsl_espi_write_reg(espi, ESPI_SPMODE, loop_mode);
 
 	fsl_espi_setup_transfer(spi, NULL);
 
-	pm_runtime_mark_last_busy(mpc8xxx_spi->dev);
-	pm_runtime_put_autosuspend(mpc8xxx_spi->dev);
+	pm_runtime_mark_last_busy(espi->dev);
+	pm_runtime_put_autosuspend(espi->dev);
 
 	return 0;
 }
 
 static void fsl_espi_cleanup(struct spi_device *spi)
 {
-	struct spi_mpc8xxx_cs *cs = spi_get_ctldata(spi);
+	struct fsl_espi_cs *cs = spi_get_ctldata(spi);
 
 	kfree(cs);
 	spi_set_ctldata(spi, NULL);
 }
 
-static void fsl_espi_cpu_irq(struct mpc8xxx_spi *mspi, u32 events)
+static void fsl_espi_cpu_irq(struct fsl_espi *espi, u32 events)
 {
-	/* We need handle RX first */
-	if (events & SPIE_RNE) {
-		u32 rx_data, tmp;
-		u8 rx_data_8;
-		int rx_nr_bytes = 4;
-		int ret;
-
-		/* Spin until RX is done */
-		if (SPIE_RXCNT(events) < min(4, mspi->len)) {
-			ret = spin_event_timeout(
-				!(SPIE_RXCNT(events =
-				fsl_espi_read_reg(mspi, ESPI_SPIE)) <
-						min(4, mspi->len)),
-						10000, 0); /* 10 msec */
-			if (!ret)
-				dev_err(mspi->dev,
-					 "tired waiting for SPIE_RXCNT\n");
-		}
+	if (!espi->rx_done)
+		fsl_espi_read_rx_fifo(espi, events);
 
-		if (mspi->len >= 4) {
-			rx_data = fsl_espi_read_reg(mspi, ESPI_SPIRF);
-		} else if (mspi->len <= 0) {
-			dev_err(mspi->dev,
-				"unexpected RX(SPIE_RNE) interrupt occurred,\n"
-				"(local rxlen %d bytes, reg rxlen %d bytes)\n",
-				min(4, mspi->len), SPIE_RXCNT(events));
-			rx_nr_bytes = 0;
-		} else {
-			rx_nr_bytes = mspi->len;
-			tmp = mspi->len;
-			rx_data = 0;
-			while (tmp--) {
-				rx_data_8 = fsl_espi_read_reg8(mspi,
-							       ESPI_SPIRF);
-				rx_data |= (rx_data_8 << (tmp * 8));
-			}
-
-			rx_data <<= (4 - mspi->len) * 8;
-		}
+	if (!espi->tx_done)
+		fsl_espi_fill_tx_fifo(espi, events);
 
-		mspi->len -= rx_nr_bytes;
+	if (!espi->tx_done || !espi->rx_done)
+		return;
 
-		if (rx_nr_bytes && mspi->rx)
-			mspi->get_rx(rx_data, mspi);
-	}
+	/* we're done, but check for errors before returning */
+	events = fsl_espi_read_reg(espi, ESPI_SPIE);
 
-	if (!(events & SPIE_TNF)) {
-		int ret;
-
-		/* spin until TX is done */
-		ret = spin_event_timeout(((events = fsl_espi_read_reg(
-				mspi, ESPI_SPIE)) & SPIE_TNF), 1000, 0);
-		if (!ret) {
-			dev_err(mspi->dev, "tired waiting for SPIE_TNF\n");
-			complete(&mspi->done);
-			return;
-		}
-	}
+	if (!(events & SPIE_DON))
+		dev_err(espi->dev,
+			"Transfer done but SPIE_DON isn't set!\n");
 
-	mspi->count -= 1;
-	if (mspi->count) {
-		u32 word = mspi->get_tx(mspi);
+	if (SPIE_RXCNT(events) || SPIE_TXCNT(events) != FSL_ESPI_FIFO_SIZE)
+		dev_err(espi->dev, "Transfer done but rx/tx fifo's aren't empty!\n");
 
-		fsl_espi_write_reg(mspi, ESPI_SPITF, word);
-	} else {
-		complete(&mspi->done);
-	}
+	complete(&espi->done);
 }
 
 static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
 {
-	struct mpc8xxx_spi *mspi = context_data;
+	struct fsl_espi *espi = context_data;
 	u32 events;
 
+	spin_lock(&espi->lock);
+
 	/* Get interrupt events(tx/rx) */
-	events = fsl_espi_read_reg(mspi, ESPI_SPIE);
-	if (!events)
+	events = fsl_espi_read_reg(espi, ESPI_SPIE);
+	if (!events) {
+		spin_unlock(&espi->lock);
 		return IRQ_NONE;
+	}
 
-	dev_vdbg(mspi->dev, "%s: events %x\n", __func__, events);
+	dev_vdbg(espi->dev, "%s: events %x\n", __func__, events);
 
-	fsl_espi_cpu_irq(mspi, events);
+	fsl_espi_cpu_irq(espi, events);
 
 	/* Clear the events */
-	fsl_espi_write_reg(mspi, ESPI_SPIE, events);
+	fsl_espi_write_reg(espi, ESPI_SPIE, events);
+
+	spin_unlock(&espi->lock);
 
 	return IRQ_HANDLED;
 }
@@ -509,12 +583,12 @@ static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
 static int fsl_espi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
+	struct fsl_espi *espi = spi_master_get_devdata(master);
 	u32 regval;
 
-	regval = fsl_espi_read_reg(mpc8xxx_spi, ESPI_SPMODE);
+	regval = fsl_espi_read_reg(espi, ESPI_SPMODE);
 	regval &= ~SPMODE_ENABLE;
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, regval);
+	fsl_espi_write_reg(espi, ESPI_SPMODE, regval);
 
 	return 0;
 }
@@ -522,12 +596,12 @@ static int fsl_espi_runtime_suspend(struct device *dev)
 static int fsl_espi_runtime_resume(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
-	struct mpc8xxx_spi *mpc8xxx_spi = spi_master_get_devdata(master);
+	struct fsl_espi *espi = spi_master_get_devdata(master);
 	u32 regval;
 
-	regval = fsl_espi_read_reg(mpc8xxx_spi, ESPI_SPMODE);
+	regval = fsl_espi_read_reg(espi, ESPI_SPMODE);
 	regval |= SPMODE_ENABLE;
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, regval);
+	fsl_espi_write_reg(espi, ESPI_SPMODE, regval);
 
 	return 0;
 }
@@ -538,96 +612,105 @@ static size_t fsl_espi_max_message_size(struct spi_device *spi)
 	return SPCOM_TRANLEN_MAX;
 }
 
+static void fsl_espi_init_regs(struct device *dev, bool initial)
+{
+	struct spi_master *master = dev_get_drvdata(dev);
+	struct fsl_espi *espi = spi_master_get_devdata(master);
+	struct device_node *nc;
+	u32 csmode, cs, prop;
+	int ret;
+
+	/* SPI controller initializations */
+	fsl_espi_write_reg(espi, ESPI_SPMODE, 0);
+	fsl_espi_write_reg(espi, ESPI_SPIM, 0);
+	fsl_espi_write_reg(espi, ESPI_SPCOM, 0);
+	fsl_espi_write_reg(espi, ESPI_SPIE, 0xffffffff);
+
+	/* Init eSPI CS mode register */
+	for_each_available_child_of_node(master->dev.of_node, nc) {
+		/* get chip select */
+		ret = of_property_read_u32(nc, "reg", &cs);
+		if (ret || cs >= master->num_chipselect)
+			continue;
+
+		csmode = CSMODE_INIT_VAL;
+
+		/* check if CSBEF is set in device tree */
+		ret = of_property_read_u32(nc, "fsl,csbef", &prop);
+		if (!ret) {
+			csmode &= ~(CSMODE_BEF(0xf));
+			csmode |= CSMODE_BEF(prop);
+		}
+
+		/* check if CSAFT is set in device tree */
+		ret = of_property_read_u32(nc, "fsl,csaft", &prop);
+		if (!ret) {
+			csmode &= ~(CSMODE_AFT(0xf));
+			csmode |= CSMODE_AFT(prop);
+		}
+
+		fsl_espi_write_reg(espi, ESPI_SPMODEx(cs), csmode);
+
+		if (initial)
+			dev_info(dev, "cs=%u, init_csmode=0x%x\n", cs, csmode);
+	}
+
+	/* Enable SPI interface */
+	fsl_espi_write_reg(espi, ESPI_SPMODE, SPMODE_INIT_VAL | SPMODE_ENABLE);
+}
+
 static int fsl_espi_probe(struct device *dev, struct resource *mem,
-			  unsigned int irq)
+			  unsigned int irq, unsigned int num_cs)
 {
-	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
 	struct spi_master *master;
-	struct mpc8xxx_spi *mpc8xxx_spi;
-	struct device_node *nc;
-	const __be32 *prop;
-	u32 regval, csmode;
-	int i, len, ret;
+	struct fsl_espi *espi;
+	int ret;
 
-	master = spi_alloc_master(dev, sizeof(struct mpc8xxx_spi));
+	master = spi_alloc_master(dev, sizeof(struct fsl_espi));
 	if (!master)
 		return -ENOMEM;
 
 	dev_set_drvdata(dev, master);
 
-	mpc8xxx_spi_probe(dev, mem, irq);
-
+	master->mode_bits = SPI_RX_DUAL | SPI_CPOL | SPI_CPHA | SPI_CS_HIGH |
+			    SPI_LSB_FIRST | SPI_LOOP;
+	master->dev.of_node = dev->of_node;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
 	master->setup = fsl_espi_setup;
 	master->cleanup = fsl_espi_cleanup;
 	master->transfer_one_message = fsl_espi_do_one_msg;
 	master->auto_runtime_pm = true;
 	master->max_message_size = fsl_espi_max_message_size;
+	master->num_chipselect = num_cs;
 
-	mpc8xxx_spi = spi_master_get_devdata(master);
+	espi = spi_master_get_devdata(master);
+	spin_lock_init(&espi->lock);
 
-	mpc8xxx_spi->local_buf =
-		devm_kmalloc(dev, SPCOM_TRANLEN_MAX, GFP_KERNEL);
-	if (!mpc8xxx_spi->local_buf) {
-		ret = -ENOMEM;
+	espi->dev = dev;
+	espi->spibrg = fsl_get_sys_freq();
+	if (espi->spibrg == -1) {
+		dev_err(dev, "Can't get sys frequency!\n");
+		ret = -EINVAL;
 		goto err_probe;
 	}
+	/* determined by clock divider fields DIV16/PM in register SPMODEx */
+	master->min_speed_hz = DIV_ROUND_UP(espi->spibrg, 4 * 16 * 16);
+	master->max_speed_hz = DIV_ROUND_UP(espi->spibrg, 4);
 
-	mpc8xxx_spi->reg_base = devm_ioremap_resource(dev, mem);
-	if (IS_ERR(mpc8xxx_spi->reg_base)) {
-		ret = PTR_ERR(mpc8xxx_spi->reg_base);
+	init_completion(&espi->done);
+
+	espi->reg_base = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(espi->reg_base)) {
+		ret = PTR_ERR(espi->reg_base);
 		goto err_probe;
 	}
 
 	/* Register for SPI Interrupt */
-	ret = devm_request_irq(dev, mpc8xxx_spi->irq, fsl_espi_irq,
-			  0, "fsl_espi", mpc8xxx_spi);
+	ret = devm_request_irq(dev, irq, fsl_espi_irq, 0, "fsl_espi", espi);
 	if (ret)
 		goto err_probe;
 
-	if (mpc8xxx_spi->flags & SPI_QE_CPU_MODE) {
-		mpc8xxx_spi->rx_shift = 16;
-		mpc8xxx_spi->tx_shift = 24;
-	}
-
-	/* SPI controller initializations */
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, 0);
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIM, 0);
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPCOM, 0);
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIE, 0xffffffff);
-
-	/* Init eSPI CS mode register */
-	for_each_available_child_of_node(master->dev.of_node, nc) {
-		/* get chip select */
-		prop = of_get_property(nc, "reg", &len);
-		if (!prop || len < sizeof(*prop))
-			continue;
-		i = be32_to_cpup(prop);
-		if (i < 0 || i >= pdata->max_chipselect)
-			continue;
-
-		csmode = CSMODE_INIT_VAL;
-		/* check if CSBEF is set in device tree */
-		prop = of_get_property(nc, "fsl,csbef", &len);
-		if (prop && len >= sizeof(*prop)) {
-			csmode &= ~(CSMODE_BEF(0xf));
-			csmode |= CSMODE_BEF(be32_to_cpup(prop));
-		}
-		/* check if CSAFT is set in device tree */
-		prop = of_get_property(nc, "fsl,csaft", &len);
-		if (prop && len >= sizeof(*prop)) {
-			csmode &= ~(CSMODE_AFT(0xf));
-			csmode |= CSMODE_AFT(be32_to_cpup(prop));
-		}
-		fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODEx(i), csmode);
-
-		dev_info(dev, "cs=%d, init_csmode=0x%x\n", i, csmode);
-	}
-
-	/* Enable SPI interface */
-	regval = pdata->initial_spmode | SPMODE_INIT_VAL | SPMODE_ENABLE;
-
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, regval);
+	fsl_espi_init_regs(dev, true);
 
 	pm_runtime_set_autosuspend_delay(dev, AUTOSUSPEND_TIMEOUT);
 	pm_runtime_use_autosuspend(dev);
@@ -639,8 +722,7 @@ static int fsl_espi_probe(struct device *dev, struct resource *mem,
 	if (ret < 0)
 		goto err_pm;
 
-	dev_info(dev, "at 0x%p (irq = %d)\n", mpc8xxx_spi->reg_base,
-		 mpc8xxx_spi->irq);
+	dev_info(dev, "at 0x%p (irq = %u)\n", espi->reg_base, irq);
 
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
@@ -659,20 +741,16 @@ static int fsl_espi_probe(struct device *dev, struct resource *mem,
 static int of_fsl_espi_get_chipselects(struct device *dev)
 {
 	struct device_node *np = dev->of_node;
-	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
-	const u32 *prop;
-	int len;
+	u32 num_cs;
+	int ret;
 
-	prop = of_get_property(np, "fsl,espi-num-chipselects", &len);
-	if (!prop || len < sizeof(*prop)) {
+	ret = of_property_read_u32(np, "fsl,espi-num-chipselects", &num_cs);
+	if (ret) {
 		dev_err(dev, "No 'fsl,espi-num-chipselects' property\n");
-		return -EINVAL;
+		return 0;
 	}
 
-	pdata->max_chipselect = *prop;
-	pdata->cs_control = NULL;
-
-	return 0;
+	return num_cs;
 }
 
 static int of_fsl_espi_probe(struct platform_device *ofdev)
@@ -680,16 +758,17 @@ static int of_fsl_espi_probe(struct platform_device *ofdev)
 	struct device *dev = &ofdev->dev;
 	struct device_node *np = ofdev->dev.of_node;
 	struct resource mem;
-	unsigned int irq;
+	unsigned int irq, num_cs;
 	int ret;
 
-	ret = of_mpc8xxx_spi_probe(ofdev);
-	if (ret)
-		return ret;
+	if (of_property_read_bool(np, "mode")) {
+		dev_err(dev, "mode property is not supported on ESPI!\n");
+		return -EINVAL;
+	}
 
-	ret = of_fsl_espi_get_chipselects(dev);
-	if (ret)
-		return ret;
+	num_cs = of_fsl_espi_get_chipselects(dev);
+	if (!num_cs)
+		return -EINVAL;
 
 	ret = of_address_to_resource(np, 0, &mem);
 	if (ret)
@@ -699,7 +778,7 @@ static int of_fsl_espi_probe(struct platform_device *ofdev)
 	if (!irq)
 		return -EINVAL;
 
-	return fsl_espi_probe(dev, &mem, irq);
+	return fsl_espi_probe(dev, &mem, irq, num_cs);
 }
 
 static int of_fsl_espi_remove(struct platform_device *dev)
@@ -721,38 +800,15 @@ static int of_fsl_espi_suspend(struct device *dev)
 		return ret;
 	}
 
-	ret = pm_runtime_force_suspend(dev);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return pm_runtime_force_suspend(dev);
 }
 
 static int of_fsl_espi_resume(struct device *dev)
 {
-	struct fsl_spi_platform_data *pdata = dev_get_platdata(dev);
 	struct spi_master *master = dev_get_drvdata(dev);
-	struct mpc8xxx_spi *mpc8xxx_spi;
-	u32 regval;
-	int i, ret;
-
-	mpc8xxx_spi = spi_master_get_devdata(master);
-
-	/* SPI controller initializations */
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, 0);
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIM, 0);
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPCOM, 0);
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPIE, 0xffffffff);
-
-	/* Init eSPI CS mode register */
-	for (i = 0; i < pdata->max_chipselect; i++)
-		fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODEx(i),
-				      CSMODE_INIT_VAL);
-
-	/* Enable SPI interface */
-	regval = pdata->initial_spmode | SPMODE_INIT_VAL | SPMODE_ENABLE;
+	int ret;
 
-	fsl_espi_write_reg(mpc8xxx_spi, ESPI_SPMODE, regval);
+	fsl_espi_init_regs(dev, false);
 
 	ret = pm_runtime_force_resume(dev);
 	if (ret < 0)
diff --git a/drivers/spi/spi-fsl-lib.h b/drivers/spi/spi-fsl-lib.h
index 2925c8089fd93e91c5ea44461644d66ff2388703..f303f306b38e60c43ab73db676f5911b0394bae8 100644
--- a/drivers/spi/spi-fsl-lib.h
+++ b/drivers/spi/spi-fsl-lib.h
@@ -28,10 +28,6 @@ struct mpc8xxx_spi {
 	/* rx & tx bufs from the spi_transfer */
 	const void *tx;
 	void *rx;
-#if IS_ENABLED(CONFIG_SPI_FSL_ESPI)
-	int len;
-	u8 *local_buf;
-#endif
 
 	int subblock;
 	struct spi_pram __iomem *pram;
diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c
new file mode 100644
index 0000000000000000000000000000000000000000..52551f6d0c7ddf71aaf03ecc18c5caf2f804108b
--- /dev/null
+++ b/drivers/spi/spi-fsl-lpspi.c
@@ -0,0 +1,525 @@
+/*
+ * Freescale i.MX7ULP LPSPI driver
+ *
+ * Copyright 2016 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/spi/spi_bitbang.h>
+#include <linux/types.h>
+
+#define DRIVER_NAME "fsl_lpspi"
+
+/* i.MX7ULP LPSPI registers */
+#define IMX7ULP_VERID	0x0
+#define IMX7ULP_PARAM	0x4
+#define IMX7ULP_CR	0x10
+#define IMX7ULP_SR	0x14
+#define IMX7ULP_IER	0x18
+#define IMX7ULP_DER	0x1c
+#define IMX7ULP_CFGR0	0x20
+#define IMX7ULP_CFGR1	0x24
+#define IMX7ULP_DMR0	0x30
+#define IMX7ULP_DMR1	0x34
+#define IMX7ULP_CCR	0x40
+#define IMX7ULP_FCR	0x58
+#define IMX7ULP_FSR	0x5c
+#define IMX7ULP_TCR	0x60
+#define IMX7ULP_TDR	0x64
+#define IMX7ULP_RSR	0x70
+#define IMX7ULP_RDR	0x74
+
+/* General control register field define */
+#define CR_RRF		BIT(9)
+#define CR_RTF		BIT(8)
+#define CR_RST		BIT(1)
+#define CR_MEN		BIT(0)
+#define SR_TCF		BIT(10)
+#define SR_RDF		BIT(1)
+#define SR_TDF		BIT(0)
+#define IER_TCIE	BIT(10)
+#define IER_RDIE	BIT(1)
+#define IER_TDIE	BIT(0)
+#define CFGR1_PCSCFG	BIT(27)
+#define CFGR1_PCSPOL	BIT(8)
+#define CFGR1_NOSTALL	BIT(3)
+#define CFGR1_MASTER	BIT(0)
+#define RSR_RXEMPTY	BIT(1)
+#define TCR_CPOL	BIT(31)
+#define TCR_CPHA	BIT(30)
+#define TCR_CONT	BIT(21)
+#define TCR_CONTC	BIT(20)
+#define TCR_RXMSK	BIT(19)
+#define TCR_TXMSK	BIT(18)
+
+static int clkdivs[] = {1, 2, 4, 8, 16, 32, 64, 128};
+
+struct lpspi_config {
+	u8 bpw;
+	u8 chip_select;
+	u8 prescale;
+	u16 mode;
+	u32 speed_hz;
+};
+
+struct fsl_lpspi_data {
+	struct device *dev;
+	void __iomem *base;
+	struct clk *clk;
+
+	void *rx_buf;
+	const void *tx_buf;
+	void (*tx)(struct fsl_lpspi_data *);
+	void (*rx)(struct fsl_lpspi_data *);
+
+	u32 remain;
+	u8 txfifosize;
+	u8 rxfifosize;
+
+	struct lpspi_config config;
+	struct completion xfer_done;
+};
+
+static const struct of_device_id fsl_lpspi_dt_ids[] = {
+	{ .compatible = "fsl,imx7ulp-spi", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_lpspi_dt_ids);
+
+#define LPSPI_BUF_RX(type)						\
+static void fsl_lpspi_buf_rx_##type(struct fsl_lpspi_data *fsl_lpspi)	\
+{									\
+	unsigned int val = readl(fsl_lpspi->base + IMX7ULP_RDR);	\
+									\
+	if (fsl_lpspi->rx_buf) {					\
+		*(type *)fsl_lpspi->rx_buf = val;			\
+		fsl_lpspi->rx_buf += sizeof(type);                      \
+	}								\
+}
+
+#define LPSPI_BUF_TX(type)						\
+static void fsl_lpspi_buf_tx_##type(struct fsl_lpspi_data *fsl_lpspi)	\
+{									\
+	type val = 0;							\
+									\
+	if (fsl_lpspi->tx_buf) {					\
+		val = *(type *)fsl_lpspi->tx_buf;			\
+		fsl_lpspi->tx_buf += sizeof(type);			\
+	}								\
+									\
+	fsl_lpspi->remain -= sizeof(type);				\
+	writel(val, fsl_lpspi->base + IMX7ULP_TDR);			\
+}
+
+LPSPI_BUF_RX(u8)
+LPSPI_BUF_TX(u8)
+LPSPI_BUF_RX(u16)
+LPSPI_BUF_TX(u16)
+LPSPI_BUF_RX(u32)
+LPSPI_BUF_TX(u32)
+
+static void fsl_lpspi_intctrl(struct fsl_lpspi_data *fsl_lpspi,
+			      unsigned int enable)
+{
+	writel(enable, fsl_lpspi->base + IMX7ULP_IER);
+}
+
+static int lpspi_prepare_xfer_hardware(struct spi_master *master)
+{
+	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+
+	return clk_prepare_enable(fsl_lpspi->clk);
+}
+
+static int lpspi_unprepare_xfer_hardware(struct spi_master *master)
+{
+	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+
+	clk_disable_unprepare(fsl_lpspi->clk);
+
+	return 0;
+}
+
+static int fsl_lpspi_txfifo_empty(struct fsl_lpspi_data *fsl_lpspi)
+{
+	u32 txcnt;
+	unsigned long orig_jiffies = jiffies;
+
+	do {
+		txcnt = readl(fsl_lpspi->base + IMX7ULP_FSR) & 0xff;
+
+		if (time_after(jiffies, orig_jiffies + msecs_to_jiffies(500))) {
+			dev_dbg(fsl_lpspi->dev, "txfifo empty timeout\n");
+			return -ETIMEDOUT;
+		}
+		cond_resched();
+
+	} while (txcnt);
+
+	return 0;
+}
+
+static void fsl_lpspi_write_tx_fifo(struct fsl_lpspi_data *fsl_lpspi)
+{
+	u8 txfifo_cnt;
+
+	txfifo_cnt = readl(fsl_lpspi->base + IMX7ULP_FSR) & 0xff;
+
+	while (txfifo_cnt < fsl_lpspi->txfifosize) {
+		if (!fsl_lpspi->remain)
+			break;
+		fsl_lpspi->tx(fsl_lpspi);
+		txfifo_cnt++;
+	}
+
+	if (!fsl_lpspi->remain && (txfifo_cnt < fsl_lpspi->txfifosize))
+		writel(0, fsl_lpspi->base + IMX7ULP_TDR);
+	else
+		fsl_lpspi_intctrl(fsl_lpspi, IER_TDIE);
+}
+
+static void fsl_lpspi_read_rx_fifo(struct fsl_lpspi_data *fsl_lpspi)
+{
+	while (!(readl(fsl_lpspi->base + IMX7ULP_RSR) & RSR_RXEMPTY))
+		fsl_lpspi->rx(fsl_lpspi);
+}
+
+static void fsl_lpspi_set_cmd(struct fsl_lpspi_data *fsl_lpspi,
+			      bool is_first_xfer)
+{
+	u32 temp = 0;
+
+	temp |= fsl_lpspi->config.bpw - 1;
+	temp |= fsl_lpspi->config.prescale << 27;
+	temp |= (fsl_lpspi->config.mode & 0x3) << 30;
+	temp |= (fsl_lpspi->config.chip_select & 0x3) << 24;
+
+	/*
+	 * Set TCR_CONT will keep SS asserted after current transfer.
+	 * For the first transfer, clear TCR_CONTC to assert SS.
+	 * For subsequent transfer, set TCR_CONTC to keep SS asserted.
+	 */
+	temp |= TCR_CONT;
+	if (is_first_xfer)
+		temp &= ~TCR_CONTC;
+	else
+		temp |= TCR_CONTC;
+
+	writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
+
+	dev_dbg(fsl_lpspi->dev, "TCR=0x%x\n", temp);
+}
+
+static void fsl_lpspi_set_watermark(struct fsl_lpspi_data *fsl_lpspi)
+{
+	u32 temp;
+
+	temp = fsl_lpspi->txfifosize >> 1 | (fsl_lpspi->rxfifosize >> 1) << 16;
+
+	writel(temp, fsl_lpspi->base + IMX7ULP_FCR);
+
+	dev_dbg(fsl_lpspi->dev, "FCR=0x%x\n", temp);
+}
+
+static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi)
+{
+	struct lpspi_config config = fsl_lpspi->config;
+	unsigned int perclk_rate, scldiv;
+	u8 prescale;
+
+	perclk_rate = clk_get_rate(fsl_lpspi->clk);
+	for (prescale = 0; prescale < 8; prescale++) {
+		scldiv = perclk_rate /
+			 (clkdivs[prescale] * config.speed_hz) - 2;
+		if (scldiv < 256) {
+			fsl_lpspi->config.prescale = prescale;
+			break;
+		}
+	}
+
+	if (prescale == 8 && scldiv >= 256)
+		return -EINVAL;
+
+	writel(scldiv, fsl_lpspi->base + IMX7ULP_CCR);
+
+	dev_dbg(fsl_lpspi->dev, "perclk=%d, speed=%d, prescale =%d, scldiv=%d\n",
+		perclk_rate, config.speed_hz, prescale, scldiv);
+
+	return 0;
+}
+
+static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi)
+{
+	u32 temp;
+	int ret;
+
+	temp = CR_RST;
+	writel(temp, fsl_lpspi->base + IMX7ULP_CR);
+	writel(0, fsl_lpspi->base + IMX7ULP_CR);
+
+	ret = fsl_lpspi_set_bitrate(fsl_lpspi);
+	if (ret)
+		return ret;
+
+	fsl_lpspi_set_watermark(fsl_lpspi);
+
+	temp = CFGR1_PCSCFG | CFGR1_MASTER | CFGR1_NOSTALL;
+	if (fsl_lpspi->config.mode & SPI_CS_HIGH)
+		temp |= CFGR1_PCSPOL;
+	writel(temp, fsl_lpspi->base + IMX7ULP_CFGR1);
+
+	temp = readl(fsl_lpspi->base + IMX7ULP_CR);
+	temp |= CR_RRF | CR_RTF | CR_MEN;
+	writel(temp, fsl_lpspi->base + IMX7ULP_CR);
+
+	return 0;
+}
+
+static void fsl_lpspi_setup_transfer(struct spi_device *spi,
+				     struct spi_transfer *t)
+{
+	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(spi->master);
+
+	fsl_lpspi->config.mode = spi->mode;
+	fsl_lpspi->config.bpw = t ? t->bits_per_word : spi->bits_per_word;
+	fsl_lpspi->config.speed_hz = t ? t->speed_hz : spi->max_speed_hz;
+	fsl_lpspi->config.chip_select = spi->chip_select;
+
+	if (!fsl_lpspi->config.speed_hz)
+		fsl_lpspi->config.speed_hz = spi->max_speed_hz;
+	if (!fsl_lpspi->config.bpw)
+		fsl_lpspi->config.bpw = spi->bits_per_word;
+
+	/* Initialize the functions for transfer */
+	if (fsl_lpspi->config.bpw <= 8) {
+		fsl_lpspi->rx = fsl_lpspi_buf_rx_u8;
+		fsl_lpspi->tx = fsl_lpspi_buf_tx_u8;
+	} else if (fsl_lpspi->config.bpw <= 16) {
+		fsl_lpspi->rx = fsl_lpspi_buf_rx_u16;
+		fsl_lpspi->tx = fsl_lpspi_buf_tx_u16;
+	} else {
+		fsl_lpspi->rx = fsl_lpspi_buf_rx_u32;
+		fsl_lpspi->tx = fsl_lpspi_buf_tx_u32;
+	}
+
+	fsl_lpspi_config(fsl_lpspi);
+}
+
+static int fsl_lpspi_transfer_one(struct spi_master *master,
+				  struct spi_device *spi,
+				  struct spi_transfer *t)
+{
+	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	int ret;
+
+	fsl_lpspi->tx_buf = t->tx_buf;
+	fsl_lpspi->rx_buf = t->rx_buf;
+	fsl_lpspi->remain = t->len;
+
+	reinit_completion(&fsl_lpspi->xfer_done);
+	fsl_lpspi_write_tx_fifo(fsl_lpspi);
+
+	ret = wait_for_completion_timeout(&fsl_lpspi->xfer_done, HZ);
+	if (!ret) {
+		dev_dbg(fsl_lpspi->dev, "wait for completion timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	ret = fsl_lpspi_txfifo_empty(fsl_lpspi);
+	if (ret)
+		return ret;
+
+	fsl_lpspi_read_rx_fifo(fsl_lpspi);
+
+	return 0;
+}
+
+static int fsl_lpspi_transfer_one_msg(struct spi_master *master,
+				      struct spi_message *msg)
+{
+	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+	struct spi_device *spi = msg->spi;
+	struct spi_transfer *xfer;
+	bool is_first_xfer = true;
+	u32 temp;
+	int ret;
+
+	msg->status = 0;
+	msg->actual_length = 0;
+
+	list_for_each_entry(xfer, &msg->transfers, transfer_list) {
+		fsl_lpspi_setup_transfer(spi, xfer);
+		fsl_lpspi_set_cmd(fsl_lpspi, is_first_xfer);
+
+		is_first_xfer = false;
+
+		ret = fsl_lpspi_transfer_one(master, spi, xfer);
+		if (ret < 0)
+			goto complete;
+
+		msg->actual_length += xfer->len;
+	}
+
+complete:
+	/* de-assert SS, then finalize current message */
+	temp = readl(fsl_lpspi->base + IMX7ULP_TCR);
+	temp &= ~TCR_CONTC;
+	writel(temp, fsl_lpspi->base + IMX7ULP_TCR);
+
+	msg->status = ret;
+	spi_finalize_current_message(master);
+
+	return ret;
+}
+
+static irqreturn_t fsl_lpspi_isr(int irq, void *dev_id)
+{
+	struct fsl_lpspi_data *fsl_lpspi = dev_id;
+	u32 temp;
+
+	fsl_lpspi_intctrl(fsl_lpspi, 0);
+	temp = readl(fsl_lpspi->base + IMX7ULP_SR);
+
+	fsl_lpspi_read_rx_fifo(fsl_lpspi);
+
+	if (temp & SR_TDF) {
+		fsl_lpspi_write_tx_fifo(fsl_lpspi);
+
+		if (!fsl_lpspi->remain)
+			complete(&fsl_lpspi->xfer_done);
+
+		return IRQ_HANDLED;
+	}
+
+	return IRQ_NONE;
+}
+
+static int fsl_lpspi_probe(struct platform_device *pdev)
+{
+	struct fsl_lpspi_data *fsl_lpspi;
+	struct spi_master *master;
+	struct resource *res;
+	int ret, irq;
+	u32 temp;
+
+	master = spi_alloc_master(&pdev->dev, sizeof(struct fsl_lpspi_data));
+	if (!master)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, master);
+
+	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(8, 32);
+	master->bus_num = pdev->id;
+
+	fsl_lpspi = spi_master_get_devdata(master);
+	fsl_lpspi->dev = &pdev->dev;
+
+	master->transfer_one_message = fsl_lpspi_transfer_one_msg;
+	master->prepare_transfer_hardware = lpspi_prepare_xfer_hardware;
+	master->unprepare_transfer_hardware = lpspi_unprepare_xfer_hardware;
+	master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH;
+	master->flags = SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX;
+	master->dev.of_node = pdev->dev.of_node;
+	master->bus_num = pdev->id;
+
+	init_completion(&fsl_lpspi->xfer_done);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fsl_lpspi->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fsl_lpspi->base)) {
+		ret = PTR_ERR(fsl_lpspi->base);
+		goto out_master_put;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto out_master_put;
+	}
+
+	ret = devm_request_irq(&pdev->dev, irq, fsl_lpspi_isr, 0,
+			       dev_name(&pdev->dev), fsl_lpspi);
+	if (ret) {
+		dev_err(&pdev->dev, "can't get irq%d: %d\n", irq, ret);
+		goto out_master_put;
+	}
+
+	fsl_lpspi->clk = devm_clk_get(&pdev->dev, "ipg");
+	if (IS_ERR(fsl_lpspi->clk)) {
+		ret = PTR_ERR(fsl_lpspi->clk);
+		goto out_master_put;
+	}
+
+	ret = clk_prepare_enable(fsl_lpspi->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "can't enable lpspi clock, ret=%d\n", ret);
+		goto out_master_put;
+	}
+
+	temp = readl(fsl_lpspi->base + IMX7ULP_PARAM);
+	fsl_lpspi->txfifosize = 1 << (temp & 0x0f);
+	fsl_lpspi->rxfifosize = 1 << ((temp >> 8) & 0x0f);
+
+	clk_disable_unprepare(fsl_lpspi->clk);
+
+	ret = devm_spi_register_master(&pdev->dev, master);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "spi_register_master error.\n");
+		goto out_master_put;
+	}
+
+	return 0;
+
+out_master_put:
+	spi_master_put(master);
+
+	return ret;
+}
+
+static int fsl_lpspi_remove(struct platform_device *pdev)
+{
+	struct spi_master *master = platform_get_drvdata(pdev);
+	struct fsl_lpspi_data *fsl_lpspi = spi_master_get_devdata(master);
+
+	clk_disable_unprepare(fsl_lpspi->clk);
+
+	return 0;
+}
+
+static struct platform_driver fsl_lpspi_driver = {
+	.driver = {
+		   .name = DRIVER_NAME,
+		   .of_match_table = fsl_lpspi_dt_ids,
+		   },
+	.probe = fsl_lpspi_probe,
+	.remove = fsl_lpspi_remove,
+};
+module_platform_driver(fsl_lpspi_driver);
+
+MODULE_DESCRIPTION("LPSPI Master Controller driver");
+MODULE_AUTHOR("Gao Pan <pandy.gao@nxp.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c
index deb782f6556c119dc8dfe6760510e54df6c0d906..32ced64a5bb9a012e2edd0d415d8c477ebc0522a 100644
--- a/drivers/spi/spi-imx.c
+++ b/drivers/spi/spi-imx.c
@@ -173,15 +173,16 @@ static int mxc_clkdivs[] = {0, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192,
 
 /* MX21, MX27 */
 static unsigned int spi_imx_clkdiv_1(unsigned int fin,
-		unsigned int fspi, unsigned int max)
+		unsigned int fspi, unsigned int max, unsigned int *fres)
 {
 	int i;
 
 	for (i = 2; i < max; i++)
 		if (fspi * mxc_clkdivs[i] >= fin)
-			return i;
+			break;
 
-	return max;
+	*fres = fin / mxc_clkdivs[i];
+	return i;
 }
 
 /* MX1, MX31, MX35, MX51 CSPI */
@@ -442,6 +443,7 @@ static void mx51_ecspi_reset(struct spi_imx_data *spi_imx)
 #define MX31_CSPICTRL_ENABLE	(1 << 0)
 #define MX31_CSPICTRL_MASTER	(1 << 1)
 #define MX31_CSPICTRL_XCH	(1 << 2)
+#define MX31_CSPICTRL_SMC	(1 << 3)
 #define MX31_CSPICTRL_POL	(1 << 4)
 #define MX31_CSPICTRL_PHA	(1 << 5)
 #define MX31_CSPICTRL_SSCTL	(1 << 6)
@@ -452,6 +454,10 @@ static void mx51_ecspi_reset(struct spi_imx_data *spi_imx)
 #define MX35_CSPICTRL_CS_SHIFT	12
 #define MX31_CSPICTRL_DR_SHIFT	16
 
+#define MX31_CSPI_DMAREG	0x10
+#define MX31_DMAREG_RH_DEN	(1<<4)
+#define MX31_DMAREG_TH_DEN	(1<<1)
+
 #define MX31_CSPISTATUS		0x14
 #define MX31_STATUS_RR		(1 << 3)
 
@@ -511,6 +517,9 @@ static int mx31_config(struct spi_device *spi, struct spi_imx_config *config)
 			(is_imx35_cspi(spi_imx) ? MX35_CSPICTRL_CS_SHIFT :
 						  MX31_CSPICTRL_CS_SHIFT);
 
+	if (spi_imx->usedma)
+		reg |= MX31_CSPICTRL_SMC;
+
 	writel(reg, spi_imx->base + MXC_CSPICTRL);
 
 	reg = readl(spi_imx->base + MX31_CSPI_TESTREG);
@@ -520,6 +529,13 @@ static int mx31_config(struct spi_device *spi, struct spi_imx_config *config)
 		reg &= ~MX31_TEST_LBC;
 	writel(reg, spi_imx->base + MX31_CSPI_TESTREG);
 
+	if (spi_imx->usedma) {
+		/* configure DMA requests when RXFIFO is half full and
+		   when TXFIFO is half empty */
+		writel(MX31_DMAREG_RH_DEN | MX31_DMAREG_TH_DEN,
+			spi_imx->base + MX31_CSPI_DMAREG);
+	}
+
 	return 0;
 }
 
@@ -574,9 +590,12 @@ static int mx21_config(struct spi_device *spi, struct spi_imx_config *config)
 	struct spi_imx_data *spi_imx = spi_master_get_devdata(spi->master);
 	unsigned int reg = MX21_CSPICTRL_ENABLE | MX21_CSPICTRL_MASTER;
 	unsigned int max = is_imx27_cspi(spi_imx) ? 16 : 18;
+	unsigned int clk;
+
+	reg |= spi_imx_clkdiv_1(spi_imx->spi_clk, config->speed_hz, max, &clk)
+		<< MX21_CSPICTRL_DR_SHIFT;
+	spi_imx->spi_bus_clk = clk;
 
-	reg |= spi_imx_clkdiv_1(spi_imx->spi_clk, config->speed_hz, max) <<
-		MX21_CSPICTRL_DR_SHIFT;
 	reg |= config->bpw - 1;
 
 	if (spi->mode & SPI_CPHA)
@@ -1244,10 +1263,10 @@ static int spi_imx_probe(struct platform_device *pdev)
 
 	spi_imx->spi_clk = clk_get_rate(spi_imx->clk_per);
 	/*
-	 * Only validated on i.mx6 now, can remove the constrain if validated on
-	 * other chips.
+	 * Only validated on i.mx35 and i.mx6 now, can remove the constraint
+	 * if validated on other chips.
 	 */
-	if (is_imx51_ecspi(spi_imx)) {
+	if (is_imx35_cspi(spi_imx) || is_imx51_ecspi(spi_imx)) {
 		ret = spi_imx_sdma_init(&pdev->dev, spi_imx, master);
 		if (ret == -EPROBE_DEFER)
 			goto out_clk_put;
diff --git a/drivers/spi/spi-jcore.c b/drivers/spi/spi-jcore.c
index f8117b80fa22e7ac8d5108f403166e178858cdb5..cebfea5faa4be247e2b8e145d96c3925c6a32ec7 100644
--- a/drivers/spi/spi-jcore.c
+++ b/drivers/spi/spi-jcore.c
@@ -214,6 +214,7 @@ static const struct of_device_id jcore_spi_of_match[] = {
 	{ .compatible = "jcore,spi2" },
 	{},
 };
+MODULE_DEVICE_TABLE(of, jcore_spi_of_match);
 
 static struct platform_driver jcore_spi_driver = {
 	.probe = jcore_spi_probe,
diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c
index d5157b2222ce1d8837ed204598107f4968218aef..79800e991ccd537180866aa4671ab165a4f896f8 100644
--- a/drivers/spi/spi-omap2-mcspi.c
+++ b/drivers/spi/spi-omap2-mcspi.c
@@ -1386,20 +1386,13 @@ static int omap2_mcspi_probe(struct platform_device *pdev)
 	regs_offset = pdata->regs_offset;
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (r == NULL) {
-		status = -ENODEV;
-		goto free_master;
-	}
-
-	r->start += regs_offset;
-	r->end += regs_offset;
-	mcspi->phys = r->start;
-
 	mcspi->base = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(mcspi->base)) {
 		status = PTR_ERR(mcspi->base);
 		goto free_master;
 	}
+	mcspi->phys = r->start + regs_offset;
+	mcspi->base += regs_offset;
 
 	mcspi->dev = &pdev->dev;
 
diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index ded37025b445834d0367ef41cac7aa80b2c656cf..6b001c4a564086528e9db608f7250305656d5da7 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c
@@ -138,37 +138,62 @@ static int orion_spi_baudrate_set(struct spi_device *spi, unsigned int speed)
 	tclk_hz = clk_get_rate(orion_spi->clk);
 
 	if (devdata->typ == ARMADA_SPI) {
-		unsigned int clk, spr, sppr, sppr2, err;
-		unsigned int best_spr, best_sppr, best_err;
-
-		best_err = speed;
-		best_spr = 0;
-		best_sppr = 0;
-
-		/* Iterate over the valid range looking for best fit */
-		for (sppr = 0; sppr < 8; sppr++) {
-			sppr2 = 0x1 << sppr;
-
-			spr = tclk_hz / sppr2;
-			spr = DIV_ROUND_UP(spr, speed);
-			if ((spr == 0) || (spr > 15))
-				continue;
-
-			clk = tclk_hz / (spr * sppr2);
-			err = speed - clk;
-
-			if (err < best_err) {
-				best_spr = spr;
-				best_sppr = sppr;
-				best_err = err;
-			}
-		}
+		/*
+		 * Given the core_clk (tclk_hz) and the target rate (speed) we
+		 * determine the best values for SPR (in [0 .. 15]) and SPPR (in
+		 * [0..7]) such that
+		 *
+		 * 	core_clk / (SPR * 2 ** SPPR)
+		 *
+		 * is as big as possible but not bigger than speed.
+		 */
 
-		if ((best_sppr == 0) && (best_spr == 0))
-			return -EINVAL;
+		/* best integer divider: */
+		unsigned divider = DIV_ROUND_UP(tclk_hz, speed);
+		unsigned spr, sppr;
+
+		if (divider < 16) {
+			/* This is the easy case, divider is less than 16 */
+			spr = divider;
+			sppr = 0;
+
+		} else {
+			unsigned two_pow_sppr;
+			/*
+			 * Find the highest bit set in divider. This and the
+			 * three next bits define SPR (apart from rounding).
+			 * SPPR is then the number of zero bits that must be
+			 * appended:
+			 */
+			sppr = fls(divider) - 4;
+
+			/*
+			 * As SPR only has 4 bits, we have to round divider up
+			 * to the next multiple of 2 ** sppr.
+			 */
+			two_pow_sppr = 1 << sppr;
+			divider = (divider + two_pow_sppr - 1) & -two_pow_sppr;
+
+			/*
+			 * recalculate sppr as rounding up divider might have
+			 * increased it enough to change the position of the
+			 * highest set bit. In this case the bit that now
+			 * doesn't make it into SPR is 0, so there is no need to
+			 * round again.
+			 */
+			sppr = fls(divider) - 4;
+			spr = divider >> sppr;
+
+			/*
+			 * Now do range checking. SPR is constructed to have a
+			 * width of 4 bits, so this is fine for sure. So we
+			 * still need to check for sppr to fit into 3 bits:
+			 */
+			if (sppr > 7)
+				return -EINVAL;
+		}
 
-		prescale = ((best_sppr & 0x6) << 5) |
-			((best_sppr & 0x1) << 4) | best_spr;
+		prescale = ((sppr & 0x6) << 5) | ((sppr & 0x1) << 4) | spr;
 	} else {
 		/*
 		 * the supported rates are: 4,6,8...30
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index ce31b8199bb3bf8f961eb169a910953800d367e2..2823a00a940591a631b73a945c0c1bff88c1617b 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -109,7 +109,6 @@ static  inline void pxa2xx_spi_write(const struct driver_data *drv_data,
 #define DONE_STATE ((void *)2)
 #define ERROR_STATE ((void *)-1)
 
-#define IS_DMA_ALIGNED(x)	IS_ALIGNED((unsigned long)(x), DMA_ALIGNMENT)
 #define DMA_ALIGNMENT		8
 
 static inline int pxa25x_ssp_comp(struct driver_data *drv_data)
diff --git a/drivers/spi/spi-rspi.c b/drivers/spi/spi-rspi.c
index a816f07e168e84b0192224dcaadb99cb377ac9ed..9daf500317376bd143cb56eb4b14738d11adbdca 100644
--- a/drivers/spi/spi-rspi.c
+++ b/drivers/spi/spi-rspi.c
@@ -413,7 +413,7 @@ static unsigned int qspi_set_send_trigger(struct rspi_data *rspi,
 	return n;
 }
 
-static void qspi_set_receive_trigger(struct rspi_data *rspi, unsigned int len)
+static int qspi_set_receive_trigger(struct rspi_data *rspi, unsigned int len)
 {
 	unsigned int n;
 
@@ -428,6 +428,7 @@ static void qspi_set_receive_trigger(struct rspi_data *rspi, unsigned int len)
 		qspi_update(rspi, SPBFCR_RXTRG_MASK,
 			     SPBFCR_RXTRG_1B, QSPI_SPBFCR);
 	}
+	return n;
 }
 
 #define set_config_register(spi, n) spi->ops->set_config_register(spi, n)
@@ -785,6 +786,9 @@ static int qspi_transfer_out_in(struct rspi_data *rspi,
 
 static int qspi_transfer_out(struct rspi_data *rspi, struct spi_transfer *xfer)
 {
+	const u8 *tx = xfer->tx_buf;
+	unsigned int n = xfer->len;
+	unsigned int i, len;
 	int ret;
 
 	if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
@@ -793,9 +797,23 @@ static int qspi_transfer_out(struct rspi_data *rspi, struct spi_transfer *xfer)
 			return ret;
 	}
 
-	ret = rspi_pio_transfer(rspi, xfer->tx_buf, NULL, xfer->len);
-	if (ret < 0)
-		return ret;
+	while (n > 0) {
+		len = qspi_set_send_trigger(rspi, n);
+		if (len == QSPI_BUFFER_SIZE) {
+			ret = rspi_wait_for_tx_empty(rspi);
+			if (ret < 0) {
+				dev_err(&rspi->master->dev, "transmit timeout\n");
+				return ret;
+			}
+			for (i = 0; i < len; i++)
+				rspi_write_data(rspi, *tx++);
+		} else {
+			ret = rspi_pio_transfer(rspi, tx, NULL, n);
+			if (ret < 0)
+				return ret;
+		}
+		n -= len;
+	}
 
 	/* Wait for the last transmission */
 	rspi_wait_for_tx_empty(rspi);
@@ -805,13 +823,37 @@ static int qspi_transfer_out(struct rspi_data *rspi, struct spi_transfer *xfer)
 
 static int qspi_transfer_in(struct rspi_data *rspi, struct spi_transfer *xfer)
 {
+	u8 *rx = xfer->rx_buf;
+	unsigned int n = xfer->len;
+	unsigned int i, len;
+	int ret;
+
 	if (rspi->master->can_dma && __rspi_can_dma(rspi, xfer)) {
 		int ret = rspi_dma_transfer(rspi, NULL, &xfer->rx_sg);
 		if (ret != -EAGAIN)
 			return ret;
 	}
 
-	return rspi_pio_transfer(rspi, NULL, xfer->rx_buf, xfer->len);
+	while (n > 0) {
+		len = qspi_set_receive_trigger(rspi, n);
+		if (len == QSPI_BUFFER_SIZE) {
+			ret = rspi_wait_for_rx_full(rspi);
+			if (ret < 0) {
+				dev_err(&rspi->master->dev, "receive timeout\n");
+				return ret;
+			}
+			for (i = 0; i < len; i++)
+				*rx++ = rspi_read_data(rspi);
+		} else {
+			ret = rspi_pio_transfer(rspi, NULL, rx, n);
+			if (ret < 0)
+				return ret;
+			*rx++ = ret;
+		}
+		n -= len;
+	}
+
+	return 0;
 }
 
 static int qspi_transfer_one(struct spi_master *master, struct spi_device *spi,
diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 1de3a772eb7d23a8a90b9b77704033ac4945e364..0012ad02e5696d35b547a3a698682f51a2d02819 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c
@@ -980,6 +980,7 @@ static const struct of_device_id sh_msiof_match[] = {
 	{ .compatible = "renesas,msiof-r8a7792",   .data = &r8a779x_data },
 	{ .compatible = "renesas,msiof-r8a7793",   .data = &r8a779x_data },
 	{ .compatible = "renesas,msiof-r8a7794",   .data = &r8a779x_data },
+	{ .compatible = "renesas,msiof-r8a7796",   .data = &r8a779x_data },
 	{},
 };
 MODULE_DEVICE_TABLE(of, sh_msiof_match);
diff --git a/drivers/spi/spi-sun4i.c b/drivers/spi/spi-sun4i.c
index 4969dc10684a4c48135a12e46823be3afc7c5c56..c5cd635c28f388bec2cfd47b9a6c6c9dcec9e046 100644
--- a/drivers/spi/spi-sun4i.c
+++ b/drivers/spi/spi-sun4i.c
@@ -46,6 +46,8 @@
 #define SUN4I_CTL_TP				BIT(18)
 
 #define SUN4I_INT_CTL_REG		0x0c
+#define SUN4I_INT_CTL_RF_F34			BIT(4)
+#define SUN4I_INT_CTL_TF_E34			BIT(12)
 #define SUN4I_INT_CTL_TC			BIT(16)
 
 #define SUN4I_INT_STA_REG		0x10
@@ -61,11 +63,14 @@
 #define SUN4I_CLK_CTL_CDR1(div)			(((div) & SUN4I_CLK_CTL_CDR1_MASK) << 8)
 #define SUN4I_CLK_CTL_DRS			BIT(12)
 
+#define SUN4I_MAX_XFER_SIZE			0xffffff
+
 #define SUN4I_BURST_CNT_REG		0x20
-#define SUN4I_BURST_CNT(cnt)			((cnt) & 0xffffff)
+#define SUN4I_BURST_CNT(cnt)			((cnt) & SUN4I_MAX_XFER_SIZE)
 
 #define SUN4I_XMIT_CNT_REG		0x24
-#define SUN4I_XMIT_CNT(cnt)			((cnt) & 0xffffff)
+#define SUN4I_XMIT_CNT(cnt)			((cnt) & SUN4I_MAX_XFER_SIZE)
+
 
 #define SUN4I_FIFO_STA_REG		0x28
 #define SUN4I_FIFO_STA_RF_CNT_MASK		0x7f
@@ -96,6 +101,31 @@ static inline void sun4i_spi_write(struct sun4i_spi *sspi, u32 reg, u32 value)
 	writel(value, sspi->base_addr + reg);
 }
 
+static inline u32 sun4i_spi_get_tx_fifo_count(struct sun4i_spi *sspi)
+{
+	u32 reg = sun4i_spi_read(sspi, SUN4I_FIFO_STA_REG);
+
+	reg >>= SUN4I_FIFO_STA_TF_CNT_BITS;
+
+	return reg & SUN4I_FIFO_STA_TF_CNT_MASK;
+}
+
+static inline void sun4i_spi_enable_interrupt(struct sun4i_spi *sspi, u32 mask)
+{
+	u32 reg = sun4i_spi_read(sspi, SUN4I_INT_CTL_REG);
+
+	reg |= mask;
+	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, reg);
+}
+
+static inline void sun4i_spi_disable_interrupt(struct sun4i_spi *sspi, u32 mask)
+{
+	u32 reg = sun4i_spi_read(sspi, SUN4I_INT_CTL_REG);
+
+	reg &= ~mask;
+	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, reg);
+}
+
 static inline void sun4i_spi_drain_fifo(struct sun4i_spi *sspi, int len)
 {
 	u32 reg, cnt;
@@ -118,10 +148,13 @@ static inline void sun4i_spi_drain_fifo(struct sun4i_spi *sspi, int len)
 
 static inline void sun4i_spi_fill_fifo(struct sun4i_spi *sspi, int len)
 {
+	u32 cnt;
 	u8 byte;
 
-	if (len > sspi->len)
-		len = sspi->len;
+	/* See how much data we can fit */
+	cnt = SUN4I_FIFO_DEPTH - sun4i_spi_get_tx_fifo_count(sspi);
+
+	len = min3(len, (int)cnt, sspi->len);
 
 	while (len--) {
 		byte = sspi->tx_buf ? *sspi->tx_buf++ : 0;
@@ -184,10 +217,10 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	u32 reg;
 
 	/* We don't support transfer larger than the FIFO */
-	if (tfr->len > SUN4I_FIFO_DEPTH)
+	if (tfr->len > SUN4I_MAX_XFER_SIZE)
 		return -EMSGSIZE;
 
-	if (tfr->tx_buf && tfr->len >= SUN4I_FIFO_DEPTH)
+	if (tfr->tx_buf && tfr->len >= SUN4I_MAX_XFER_SIZE)
 		return -EMSGSIZE;
 
 	reinit_completion(&sspi->done);
@@ -286,7 +319,11 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 	sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH - 1);
 
 	/* Enable the interrupts */
-	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, SUN4I_INT_CTL_TC);
+	sun4i_spi_enable_interrupt(sspi, SUN4I_INT_CTL_TC |
+					 SUN4I_INT_CTL_RF_F34);
+	/* Only enable Tx FIFO interrupt if we really need it */
+	if (tx_len > SUN4I_FIFO_DEPTH)
+		sun4i_spi_enable_interrupt(sspi, SUN4I_INT_CTL_TF_E34);
 
 	/* Start the transfer */
 	reg = sun4i_spi_read(sspi, SUN4I_CTL_REG);
@@ -306,7 +343,6 @@ static int sun4i_spi_transfer_one(struct spi_master *master,
 		goto out;
 	}
 
-	sun4i_spi_drain_fifo(sspi, SUN4I_FIFO_DEPTH);
 
 out:
 	sun4i_spi_write(sspi, SUN4I_INT_CTL_REG, 0);
@@ -322,10 +358,33 @@ static irqreturn_t sun4i_spi_handler(int irq, void *dev_id)
 	/* Transfer complete */
 	if (status & SUN4I_INT_CTL_TC) {
 		sun4i_spi_write(sspi, SUN4I_INT_STA_REG, SUN4I_INT_CTL_TC);
+		sun4i_spi_drain_fifo(sspi, SUN4I_FIFO_DEPTH);
 		complete(&sspi->done);
 		return IRQ_HANDLED;
 	}
 
+	/* Receive FIFO 3/4 full */
+	if (status & SUN4I_INT_CTL_RF_F34) {
+		sun4i_spi_drain_fifo(sspi, SUN4I_FIFO_DEPTH);
+		/* Only clear the interrupt _after_ draining the FIFO */
+		sun4i_spi_write(sspi, SUN4I_INT_STA_REG, SUN4I_INT_CTL_RF_F34);
+		return IRQ_HANDLED;
+	}
+
+	/* Transmit FIFO 3/4 empty */
+	if (status & SUN4I_INT_CTL_TF_E34) {
+		sun4i_spi_fill_fifo(sspi, SUN4I_FIFO_DEPTH);
+
+		if (!sspi->len)
+			/* nothing left to transmit */
+			sun4i_spi_disable_interrupt(sspi, SUN4I_INT_CTL_TF_E34);
+
+		/* Only clear the interrupt _after_ re-seeding the FIFO */
+		sun4i_spi_write(sspi, SUN4I_INT_STA_REG, SUN4I_INT_CTL_TF_E34);
+
+		return IRQ_HANDLED;
+	}
+
 	return IRQ_NONE;
 }
 
diff --git a/drivers/spi/spi-sun6i.c b/drivers/spi/spi-sun6i.c
index 9918a57a6a6e643456d95dfaf53a6d85e428cbac..e3114832c485cdac7621662ed1d7291b493a76d8 100644
--- a/drivers/spi/spi-sun6i.c
+++ b/drivers/spi/spi-sun6i.c
@@ -17,6 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
@@ -24,6 +25,7 @@
 #include <linux/spi/spi.h>
 
 #define SUN6I_FIFO_DEPTH		128
+#define SUN8I_FIFO_DEPTH		64
 
 #define SUN6I_GBL_CTL_REG		0x04
 #define SUN6I_GBL_CTL_BUS_ENABLE		BIT(0)
@@ -90,6 +92,7 @@ struct sun6i_spi {
 	const u8		*tx_buf;
 	u8			*rx_buf;
 	int			len;
+	unsigned long		fifo_depth;
 };
 
 static inline u32 sun6i_spi_read(struct sun6i_spi *sspi, u32 reg)
@@ -155,7 +158,9 @@ static void sun6i_spi_set_cs(struct spi_device *spi, bool enable)
 
 static size_t sun6i_spi_max_transfer_size(struct spi_device *spi)
 {
-	return SUN6I_FIFO_DEPTH - 1;
+	struct sun6i_spi *sspi = spi_master_get_devdata(spi->master);
+
+	return sspi->fifo_depth - 1;
 }
 
 static int sun6i_spi_transfer_one(struct spi_master *master,
@@ -170,7 +175,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 	u32 reg;
 
 	/* We don't support transfer larger than the FIFO */
-	if (tfr->len > SUN6I_FIFO_DEPTH)
+	if (tfr->len > sspi->fifo_depth)
 		return -EINVAL;
 
 	reinit_completion(&sspi->done);
@@ -265,7 +270,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 			SUN6I_BURST_CTL_CNT_STC(tx_len));
 
 	/* Fill the TX FIFO */
-	sun6i_spi_fill_fifo(sspi, SUN6I_FIFO_DEPTH);
+	sun6i_spi_fill_fifo(sspi, sspi->fifo_depth);
 
 	/* Enable the interrupts */
 	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, SUN6I_INT_CTL_TC);
@@ -288,7 +293,7 @@ static int sun6i_spi_transfer_one(struct spi_master *master,
 		goto out;
 	}
 
-	sun6i_spi_drain_fifo(sspi, SUN6I_FIFO_DEPTH);
+	sun6i_spi_drain_fifo(sspi, sspi->fifo_depth);
 
 out:
 	sun6i_spi_write(sspi, SUN6I_INT_CTL_REG, 0);
@@ -398,6 +403,8 @@ static int sun6i_spi_probe(struct platform_device *pdev)
 	}
 
 	sspi->master = master;
+	sspi->fifo_depth = (unsigned long)of_device_get_match_data(&pdev->dev);
+
 	master->max_speed_hz = 100 * 1000 * 1000;
 	master->min_speed_hz = 3 * 1000;
 	master->set_cs = sun6i_spi_set_cs;
@@ -470,7 +477,8 @@ static int sun6i_spi_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id sun6i_spi_match[] = {
-	{ .compatible = "allwinner,sun6i-a31-spi", },
+	{ .compatible = "allwinner,sun6i-a31-spi", .data = (void *)SUN6I_FIFO_DEPTH },
+	{ .compatible = "allwinner,sun8i-h3-spi",  .data = (void *)SUN8I_FIFO_DEPTH },
 	{}
 };
 MODULE_DEVICE_TABLE(of, sun6i_spi_match);
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index caeac66a39777465ac1c9a62cf2b74a99dfb2e47..ec6fb09e2e1711f7adfbe079559a89f1a9cd343d 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -411,6 +411,7 @@ static int ti_qspi_dma_xfer(struct ti_qspi *qspi, dma_addr_t dma_dst,
 	tx->callback = ti_qspi_dma_callback;
 	tx->callback_param = qspi;
 	cookie = tx->tx_submit(tx);
+	reinit_completion(&qspi->transfer_complete);
 
 	ret = dma_submit_error(cookie);
 	if (ret) {
diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c
index c54ee667447101abd8957bdc2c976c174d137d4c..fcb991034c3d076bc032545addca93517017b126 100644
--- a/drivers/spi/spi-topcliff-pch.c
+++ b/drivers/spi/spi-topcliff-pch.c
@@ -1268,11 +1268,8 @@ static void pch_spi_free_resources(struct pch_spi_board_data *board_dat,
 static int pch_spi_get_resources(struct pch_spi_board_data *board_dat,
 				 struct pch_spi_data *data)
 {
-	int retval = 0;
-
 	dev_dbg(&board_dat->pdev->dev, "%s ENTRY\n", __func__);
 
-
 	/* reset PCH SPI h/w */
 	pch_spi_reset(data->master);
 	dev_dbg(&board_dat->pdev->dev,
@@ -1280,15 +1277,7 @@ static int pch_spi_get_resources(struct pch_spi_board_data *board_dat,
 
 	dev_dbg(&board_dat->pdev->dev, "%s data->irq_reg_sts=true\n", __func__);
 
-	if (retval != 0) {
-		dev_err(&board_dat->pdev->dev,
-			"%s FAIL:invoking pch_spi_free_resources\n", __func__);
-		pch_spi_free_resources(board_dat, data);
-	}
-
-	dev_dbg(&board_dat->pdev->dev, "%s Return=%d\n", __func__, retval);
-
-	return retval;
+	return 0;
 }
 
 static void pch_free_dma_buf(struct pch_spi_board_data *board_dat,
diff --git a/drivers/spi/spi-xlp.c b/drivers/spi/spi-xlp.c
index 4071a729eb2f2af5ffd8f81c9113c026a07d3e34..bea7a93a60468813d904f3d9457c8e8440055ec3 100644
--- a/drivers/spi/spi-xlp.c
+++ b/drivers/spi/spi-xlp.c
@@ -451,6 +451,7 @@ static const struct of_device_id xlp_spi_dt_id[] = {
 	{ .compatible = "netlogic,xlp832-spi" },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, xlp_spi_dt_id);
 
 static struct platform_driver xlp_spi_driver = {
 	.probe	= xlp_spi_probe,
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 838783c3fed0ae81626099b8e295dc12963a1360..656dd3e3220c5062b43b5a162a078f245f90630c 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -697,10 +697,15 @@ static void spi_set_cs(struct spi_device *spi, bool enable)
 	if (spi->mode & SPI_CS_HIGH)
 		enable = !enable;
 
-	if (gpio_is_valid(spi->cs_gpio))
+	if (gpio_is_valid(spi->cs_gpio)) {
 		gpio_set_value(spi->cs_gpio, !enable);
-	else if (spi->master->set_cs)
+		/* Some SPI masters need both GPIO CS & slave_select */
+		if ((spi->master->flags & SPI_MASTER_GPIO_SS) &&
+		    spi->master->set_cs)
+			spi->master->set_cs(spi, !enable);
+	} else if (spi->master->set_cs) {
 		spi->master->set_cs(spi, !enable);
+	}
 }
 
 #ifdef CONFIG_HAS_DMA
@@ -720,6 +725,7 @@ static int spi_map_buf(struct spi_master *master, struct device *dev,
 	int desc_len;
 	int sgs;
 	struct page *vm_page;
+	struct scatterlist *sg;
 	void *sg_buf;
 	size_t min;
 	int i, ret;
@@ -738,6 +744,7 @@ static int spi_map_buf(struct spi_master *master, struct device *dev,
 	if (ret != 0)
 		return ret;
 
+	sg = &sgt->sgl[0];
 	for (i = 0; i < sgs; i++) {
 
 		if (vmalloced_buf || kmap_buf) {
@@ -751,16 +758,17 @@ static int spi_map_buf(struct spi_master *master, struct device *dev,
 				sg_free_table(sgt);
 				return -ENOMEM;
 			}
-			sg_set_page(&sgt->sgl[i], vm_page,
+			sg_set_page(sg, vm_page,
 				    min, offset_in_page(buf));
 		} else {
 			min = min_t(size_t, len, desc_len);
 			sg_buf = buf;
-			sg_set_buf(&sgt->sgl[i], sg_buf, min);
+			sg_set_buf(sg, sg_buf, min);
 		}
 
 		buf += min;
 		len -= min;
+		sg = sg_next(sg);
 	}
 
 	ret = dma_map_sg(dev, sgt->sgl, sgt->nents, dir);
@@ -1034,8 +1042,14 @@ static int spi_transfer_one_message(struct spi_master *master,
 		if (msg->status != -EINPROGRESS)
 			goto out;
 
-		if (xfer->delay_usecs)
-			udelay(xfer->delay_usecs);
+		if (xfer->delay_usecs) {
+			u16 us = xfer->delay_usecs;
+
+			if (us <= 10)
+				udelay(us);
+			else
+				usleep_range(us, us + DIV_ROUND_UP(us, 10));
+		}
 
 		if (xfer->cs_change) {
 			if (list_is_last(&xfer->transfer_list,
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 2e05046f866bd01bf87edcdeff0d5b76d4d0aea7..9e2e099baf8ca5cc6510912a36d4ca03daeb8273 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -696,6 +696,7 @@ static struct class *spidev_class;
 static const struct of_device_id spidev_dt_ids[] = {
 	{ .compatible = "rohm,dh2228fv" },
 	{ .compatible = "lineartechnology,ltc2488" },
+	{ .compatible = "ge,achc" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, spidev_dt_ids);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 4b743ac3539623889b6e2d8cc008df062e0f2b34..75c6bd0ac605b2024509985214cdc3d451c6ab53 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -442,6 +442,7 @@ struct spi_master {
 #define SPI_MASTER_NO_TX	BIT(2)		/* can't do buffer write */
 #define SPI_MASTER_MUST_RX      BIT(3)		/* requires rx */
 #define SPI_MASTER_MUST_TX      BIT(4)		/* requires tx */
+#define SPI_MASTER_GPIO_SS      BIT(5)		/* GPIO CS must select slave */
 
 	/*
 	 * on some hardware transfer / message size may be constrained
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index f046b77cfefe3056a72b82aa2194eb55e8b2fa93..816f119c9b7b1584a587077a55e28691418ff706 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -315,7 +315,7 @@ static void transfer_file(int fd, char *filename)
 		pabort("can't stat input file");
 
 	tx_fd = open(filename, O_RDONLY);
-	if (fd < 0)
+	if (tx_fd < 0)
 		pabort("can't open input file");
 
 	tx = malloc(sb.st_size);