77 files changed, 13956 insertions, 6156 deletions
diff --git a/kernel/drivers/dma/Kconfig b/kernel/drivers/dma/Kconfig
index bda2cb06d..e6cd1a320 100644
--- a/kernel/drivers/dma/Kconfig
+++ b/kernel/drivers/dma/Kconfig
@@ -33,27 +33,29 @@ if DMADEVICES
 
 comment "DMA Devices"
 
-config INTEL_MIC_X100_DMA
-	tristate "Intel MIC X100 DMA Driver"
-	depends on 64BIT && X86 && INTEL_MIC_BUS
-	select DMA_ENGINE
-	help
-	  This enables DMA support for the Intel Many Integrated Core
-	  (MIC) family of PCIe form factor coprocessor X100 devices that
-	  run a 64 bit Linux OS. This driver will be used by both MIC
-	  host and card drivers.
-
-	  If you are building host kernel with a MIC device or a card
-	  kernel for a MIC device, then say M (recommended) or Y, else
-	  say N. If unsure say N.
+#core
+config ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	bool
 
-	  More information about the Intel MIC family as well as the Linux
-	  OS and tools for MIC to use with this driver are available from
-	  <http://software.intel.com/en-us/mic-developer>.
+config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
+	bool
 
-config ASYNC_TX_ENABLE_CHANNEL_SWITCH
+config DMA_ENGINE
 	bool
 
+config DMA_VIRTUAL_CHANNELS
+	tristate
+
+config DMA_ACPI
+	def_bool y
+	depends on ACPI
+
+config DMA_OF
+	def_bool y
+	depends on OF
+	select DMA_ENGINE
+
+#devices
 config AMBA_PL08X
 	bool "ARM PrimeCell PL080 or PL081 support"
 	depends on ARM_AMBA
@@ -63,29 +65,15 @@ config AMBA_PL08X
 	  Platform has a PL08x DMAC device
 	  which can provide DMA engine support
 
-config INTEL_IOATDMA
-	tristate "Intel I/OAT DMA support"
-	depends on PCI && X86
+config AMCC_PPC440SPE_ADMA
+	tristate "AMCC PPC440SPe ADMA support"
+	depends on 440SPe || 440SP
 	select DMA_ENGINE
 	select DMA_ENGINE_RAID
-	select DCA
-	help
-	  Enable support for the Intel(R) I/OAT DMA engine present
-	  in recent Intel Xeon chipsets.
-
-	  Say Y here if you have such a chipset.
-
-	  If unsure, say N.
-
-config INTEL_IOP_ADMA
-	tristate "Intel IOP ADMA support"
-	depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
-	select DMA_ENGINE
+	select ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	help
-	  Enable support for the Intel(R) IOP Series RAID engines.
-
-source "drivers/dma/dw/Kconfig"
+	  Enable support for the AMCC PPC440SPe RAID engines.
 
 config AT_HDMAC
 	tristate "Atmel AHB DMA support"
@@ -101,6 +89,89 @@ config AT_XDMAC
 	help
 	  Support the Atmel XDMA controller.
 
+config AXI_DMAC
+	tristate "Analog Devices AXI-DMAC DMA support"
+	depends on MICROBLAZE || NIOS2 || ARCH_ZYNQ || ARCH_SOCFPGA || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the Analog Devices AXI-DMAC peripheral. This DMA
+	  controller is often used in Analog Device's reference designs for FPGA
+	  platforms.
+
+config COH901318
+	bool "ST-Ericsson COH901318 DMA support"
+	select DMA_ENGINE
+	depends on ARCH_U300
+	help
+	  Enable support for ST-Ericsson COH 901 318 DMA.
+
+config DMA_BCM2835
+	tristate "BCM2835 DMA engine support"
+	depends on ARCH_BCM2835
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+
+config DMA_JZ4740
+	tristate "JZ4740 DMA support"
+	depends on MACH_JZ4740
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+
+config DMA_JZ4780
+	tristate "JZ4780 DMA support"
+	depends on MACH_JZ4780
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  This selects support for the DMA controller in Ingenic JZ4780 SoCs.
+	  If you have a board based on such a SoC and wish to use DMA for
+	  devices which can use the DMA controller, say Y or M here.
+
+config DMA_OMAP
+	tristate "OMAP DMA support"
+	depends on ARCH_OMAP
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	select TI_DMA_CROSSBAR if SOC_DRA7XX
+
+config DMA_SA11X0
+	tristate "SA-11x0 DMA support"
+	depends on ARCH_SA1100
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the DMA engine found on Intel StrongARM SA-1100 and
+	  SA-1110 SoCs.  This DMA engine can only be used with on-chip
+	  devices.
+
+config DMA_SUN4I
+	tristate "Allwinner A10 DMA SoCs support"
+	depends on MACH_SUN4I || MACH_SUN5I || MACH_SUN7I
+	default (MACH_SUN4I || MACH_SUN5I || MACH_SUN7I)
+	select DMA_ENGINE
+	select DMA_OF
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the DMA controller present in the sun4i,
+	  sun5i and sun7i Allwinner ARM SoCs.
+
+config DMA_SUN6I
+	tristate "Allwinner A31 SoCs DMA support"
+	depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST
+	depends on RESET_CONTROLLER
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the DMA engine first found in Allwinner A31 SoCs.
+
+config EP93XX_DMA
+	bool "Cirrus Logic EP93xx DMA support"
+	depends on ARCH_EP93XX
+	select DMA_ENGINE
+	help
+	  Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller.
+
 config FSL_DMA
 	tristate "Freescale Elo series DMA support"
 	depends on FSL_SOC
@@ -112,6 +183,16 @@ config FSL_DMA
 	  EloPlus is on mpc85xx and mpc86xx and Pxxx parts, and the Elo3 is on
 	  some Txxx and Bxxx parts.
 
+config FSL_EDMA
+	tristate "Freescale eDMA engine support"
+	depends on OF
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the Freescale eDMA engine with programmable channel
+	  multiplexing capability for DMA request sources(slot).
+	  This module can be found on Freescale Vybrid and LS-1 SoCs.
+
 config FSL_RAID
         tristate "Freescale RAID engine Support"
         depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH
@@ -123,139 +204,175 @@ config FSL_RAID
           the capability to offload memcpy, xor and pq computation
 	  for raid5/6.
 
-source "drivers/dma/hsu/Kconfig"
-
-config MPC512X_DMA
-	tristate "Freescale MPC512x built-in DMA engine support"
-	depends on PPC_MPC512x || PPC_MPC831x
+config IMG_MDC_DMA
+	tristate "IMG MDC support"
+	depends on MIPS || COMPILE_TEST
+	depends on MFD_SYSCON
 	select DMA_ENGINE
-	---help---
-	  Enable support for the Freescale MPC512x built-in DMA engine.
-
-source "drivers/dma/bestcomm/Kconfig"
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the IMG multi-threaded DMA controller (MDC).
 
-config MV_XOR
-	bool "Marvell XOR engine support"
-	depends on PLAT_ORION
+config IMX_DMA
+	tristate "i.MX DMA support"
+	depends on ARCH_MXC
 	select DMA_ENGINE
-	select DMA_ENGINE_RAID
-	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
-	---help---
-	  Enable support for the Marvell XOR engine.
+	help
+	  Support the i.MX DMA engine. This engine is integrated into
+	  Freescale i.MX1/21/27 chips.
 
-config MX3_IPU
-	bool "MX3x Image Processing Unit support"
+config IMX_SDMA
+	tristate "i.MX SDMA support"
 	depends on ARCH_MXC
 	select DMA_ENGINE
-	default y
 	help
-	  If you plan to use the Image Processing unit in the i.MX3x, say
-	  Y here. If unsure, select Y.
+	  Support the i.MX SDMA engine. This engine is integrated into
+	  Freescale i.MX25/31/35/51/53/6 chips.
 
-config MX3_IPU_IRQS
-	int "Number of dynamically mapped interrupts for IPU"
-	depends on MX3_IPU
-	range 2 137
-	default 4
+config INTEL_IDMA64
+	tristate "Intel integrated DMA 64-bit support"
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
 	help
-	  Out of 137 interrupt sources on i.MX31 IPU only very few are used.
-	  To avoid bloating the irq_desc[] array we allocate a sufficient
-	  number of IRQ slots and map them dynamically to specific sources.
+	  Enable DMA support for Intel Low Power Subsystem such as found on
+	  Intel Skylake PCH.
 
-config TXX9_DMAC
-	tristate "Toshiba TXx9 SoC DMA support"
-	depends on MACH_TX49XX || MACH_TX39XX
+config INTEL_IOATDMA
+	tristate "Intel I/OAT DMA support"
+	depends on PCI && X86_64
 	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select DCA
 	help
-	  Support the TXx9 SoC internal DMA controller.  This can be
-	  integrated in chips such as the Toshiba TX4927/38/39.
+	  Enable support for the Intel(R) I/OAT DMA engine present
+	  in recent Intel Xeon chipsets.
 
-config TEGRA20_APB_DMA
-	bool "NVIDIA Tegra20 APB DMA support"
-	depends on ARCH_TEGRA
+	  Say Y here if you have such a chipset.
+
+	  If unsure, say N.
+
+config INTEL_IOP_ADMA
+	tristate "Intel IOP ADMA support"
+	depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
 	select DMA_ENGINE
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	help
-	  Support for the NVIDIA Tegra20 APB DMA controller driver. The
-	  DMA controller is having multiple DMA channel which can be
-	  configured for different peripherals like audio, UART, SPI,
-	  I2C etc which is in APB bus.
-	  This DMA controller transfers data from memory to peripheral fifo
-	  or vice versa. It does not support memory to memory data transfer.
+	  Enable support for the Intel(R) IOP Series RAID engines.
 
-config S3C24XX_DMAC
-	tristate "Samsung S3C24XX DMA support"
-	depends on ARCH_S3C24XX
+config INTEL_MIC_X100_DMA
+	tristate "Intel MIC X100 DMA Driver"
+	depends on 64BIT && X86 && INTEL_MIC_BUS
 	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
 	help
-	  Support for the Samsung S3C24XX DMA controller driver. The
-	  DMA controller is having multiple DMA channels which can be
-	  configured for different peripherals like audio, UART, SPI.
-	  The DMA controller can transfer data from memory to peripheral,
-	  periphal to memory, periphal to periphal and memory to memory.
+	  This enables DMA support for the Intel Many Integrated Core
+	  (MIC) family of PCIe form factor coprocessor X100 devices that
+	  run a 64 bit Linux OS. This driver will be used by both MIC
+	  host and card drivers.
 
-source "drivers/dma/sh/Kconfig"
+	  If you are building host kernel with a MIC device or a card
+	  kernel for a MIC device, then say M (recommended) or Y, else
+	  say N. If unsure say N.
 
-config COH901318
-	bool "ST-Ericsson COH901318 DMA support"
+	  More information about the Intel MIC family as well as the Linux
+	  OS and tools for MIC to use with this driver are available from
+	  <http://software.intel.com/en-us/mic-developer>.
+
+config K3_DMA
+	tristate "Hisilicon K3 DMA support"
+	depends on ARCH_HI3xxx
 	select DMA_ENGINE
-	depends on ARCH_U300
+	select DMA_VIRTUAL_CHANNELS
 	help
-	  Enable support for ST-Ericsson COH 901 318 DMA.
+	  Support the DMA engine for Hisilicon K3 platform
+	  devices.
 
-config STE_DMA40
-	bool "ST-Ericsson DMA40 support"
-	depends on ARCH_U8500
+config LPC18XX_DMAMUX
+	bool "NXP LPC18xx/43xx DMA MUX for PL080"
+	depends on ARCH_LPC18XX || COMPILE_TEST
+	depends on OF && AMBA_PL08X
+	select MFD_SYSCON
+	help
+	  Enable support for DMA on NXP LPC18xx/43xx platforms
+	  with PL080 and multiplexed DMA request lines.
+
+config MMP_PDMA
+	bool "MMP PDMA support"
+	depends on (ARCH_MMP || ARCH_PXA)
 	select DMA_ENGINE
 	help
-	  Support for ST-Ericsson DMA40 controller
+	  Support the MMP PDMA engine for PXA and MMP platform.
 
-config AMCC_PPC440SPE_ADMA
-	tristate "AMCC PPC440SPe ADMA support"
-	depends on 440SPe || 440SP
+config MMP_TDMA
+	bool "MMP Two-Channel DMA support"
+	depends on ARCH_MMP
 	select DMA_ENGINE
-	select DMA_ENGINE_RAID
-	select ARCH_HAS_ASYNC_TX_FIND_CHANNEL
-	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	select MMP_SRAM
 	help
-	  Enable support for the AMCC PPC440SPe RAID engines.
+	  Support the MMP Two-Channel DMA engine.
+	  This engine used for MMP Audio DMA and pxa910 SQU.
+	  It needs sram driver under mach-mmp.
 
-config TIMB_DMA
-	tristate "Timberdale FPGA DMA support"
-	depends on MFD_TIMBERDALE
+config MOXART_DMA
+	tristate "MOXART DMA support"
+	depends on ARCH_MOXART
 	select DMA_ENGINE
+	select DMA_OF
+	select DMA_VIRTUAL_CHANNELS
 	help
-	  Enable support for the Timberdale FPGA DMA engine.
+	  Enable support for the MOXA ART SoC DMA controller.
+ 
+	  Say Y here if you enabled MMP ADMA, otherwise say N.
 
-config SIRF_DMA
-	tristate "CSR SiRFprimaII/SiRFmarco DMA support"
-	depends on ARCH_SIRF
+config MPC512X_DMA
+	tristate "Freescale MPC512x built-in DMA engine support"
+	depends on PPC_MPC512x || PPC_MPC831x
+	select DMA_ENGINE
+	---help---
+	  Enable support for the Freescale MPC512x built-in DMA engine.
+
+config MV_XOR
+	bool "Marvell XOR engine support"
+	depends on PLAT_ORION
+	select DMA_ENGINE
+	select DMA_ENGINE_RAID
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+	---help---
+	  Enable support for the Marvell XOR engine.
+
+config MXS_DMA
+	bool "MXS DMA support"
+	depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q
+	select STMP_DEVICE
 	select DMA_ENGINE
 	help
-	  Enable support for the CSR SiRFprimaII DMA engine.
+	  Support the MXS DMA engine. This engine including APBH-DMA
+	  and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips.
 
-config TI_EDMA
-	bool "TI EDMA support"
-	depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE
+config MX3_IPU
+	bool "MX3x Image Processing Unit support"
+	depends on ARCH_MXC
 	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
-	select TI_PRIV_EDMA
-	default n
+	default y
 	help
-	  Enable support for the TI EDMA controller. This DMA
-	  engine is found on TI DaVinci and AM33xx parts.
+	  If you plan to use the Image Processing unit in the i.MX3x, say
+	  Y here. If unsure, select Y.
 
-config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
-	bool
+config MX3_IPU_IRQS
+	int "Number of dynamically mapped interrupts for IPU"
+	depends on MX3_IPU
+	range 2 137
+	default 4
+	help
+	  Out of 137 interrupt sources on i.MX31 IPU only very few are used.
+	  To avoid bloating the irq_desc[] array we allocate a sufficient
+	  number of IRQ slots and map them dynamically to specific sources.
 
-config PL330_DMA
-	tristate "DMA API Driver for PL330"
+config NBPFAXI_DMA
+	tristate "Renesas Type-AXI NBPF DMA support"
 	select DMA_ENGINE
-	depends on ARM_AMBA
+	depends on ARM || COMPILE_TEST
 	help
-	  Select if your platform has one or more PL330 DMACs.
-	  You need to provide platform specific settings via
-	  platform_data for a dma-pl330 device.
+	  Support for "Type-AXI" NBPF DMA IPs from Renesas
 
 config PCH_DMA
 	tristate "Intel EG20T PCH / LAPIS Semicon IOH(ML7213/ML7223/ML7831) DMA"
@@ -271,71 +388,87 @@ config PCH_DMA
 	  ML7213/ML7223/ML7831 is companion chip for Intel Atom E6xx series.
 	  ML7213/ML7223/ML7831 is completely compatible for Intel EG20T PCH.
 
-config IMX_SDMA
-	tristate "i.MX SDMA support"
-	depends on ARCH_MXC
+config PL330_DMA
+	tristate "DMA API Driver for PL330"
 	select DMA_ENGINE
+	depends on ARM_AMBA
 	help
-	  Support the i.MX SDMA engine. This engine is integrated into
-	  Freescale i.MX25/31/35/51/53/6 chips.
+	  Select if your platform has one or more PL330 DMACs.
+	  You need to provide platform specific settings via
+	  platform_data for a dma-pl330 device.
 
-config IMX_DMA
-	tristate "i.MX DMA support"
-	depends on ARCH_MXC
+config PXA_DMA
+	bool "PXA DMA support"
+	depends on (ARCH_MMP || ARCH_PXA)
 	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
 	help
-	  Support the i.MX DMA engine. This engine is integrated into
-	  Freescale i.MX1/21/27 chips.
+	  Support the DMA engine for PXA. It is also compatible with MMP PDMA
+	  platform. The internal DMA IP of all PXA variants is supported, with
+	  16 to 32 channels for peripheral to memory or memory to memory
+	  transfers.
 
-config MXS_DMA
-	bool "MXS DMA support"
-	depends on SOC_IMX23 || SOC_IMX28 || SOC_IMX6Q
-	select STMP_DEVICE
+config QCOM_BAM_DMA
+	tristate "QCOM BAM DMA support"
+	depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM)
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	---help---
+	  Enable support for the QCOM BAM DMA controller.  This controller
+	  provides DMA capabilities for a variety of on-chip devices.
+
+config SIRF_DMA
+	tristate "CSR SiRFprimaII/SiRFmarco DMA support"
+	depends on ARCH_SIRF
 	select DMA_ENGINE
 	help
-	  Support the MXS DMA engine. This engine including APBH-DMA
-	  and APBX-DMA is integrated into Freescale i.MX23/28/MX6Q/MX6DL chips.
+	  Enable support for the CSR SiRFprimaII DMA engine.
 
-config EP93XX_DMA
-	bool "Cirrus Logic EP93xx DMA support"
-	depends on ARCH_EP93XX
+config STE_DMA40
+	bool "ST-Ericsson DMA40 support"
+	depends on ARCH_U8500
 	select DMA_ENGINE
 	help
-	  Enable support for the Cirrus Logic EP93xx M2P/M2M DMA controller.
+	  Support for ST-Ericsson DMA40 controller
 
-config DMA_SA11X0
-	tristate "SA-11x0 DMA support"
-	depends on ARCH_SA1100
+config S3C24XX_DMAC
+	tristate "Samsung S3C24XX DMA support"
+	depends on ARCH_S3C24XX
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-	  Support the DMA engine found on Intel StrongARM SA-1100 and
-	  SA-1110 SoCs.  This DMA engine can only be used with on-chip
-	  devices.
+	  Support for the Samsung S3C24XX DMA controller driver. The
+	  DMA controller is having multiple DMA channels which can be
+	  configured for different peripherals like audio, UART, SPI.
+	  The DMA controller can transfer data from memory to peripheral,
+	  periphal to memory, periphal to periphal and memory to memory.
 
-config MMP_TDMA
-	bool "MMP Two-Channel DMA support"
-	depends on ARCH_MMP
+config TXX9_DMAC
+	tristate "Toshiba TXx9 SoC DMA support"
+	depends on MACH_TX49XX || MACH_TX39XX
 	select DMA_ENGINE
-	select MMP_SRAM
 	help
-	  Support the MMP Two-Channel DMA engine.
-	  This engine used for MMP Audio DMA and pxa910 SQU.
-	  It needs sram driver under mach-mmp.
-
-	  Say Y here if you enabled MMP ADMA, otherwise say N.
+	  Support the TXx9 SoC internal DMA controller.  This can be
+	  integrated in chips such as the Toshiba TX4927/38/39.
 
-config DMA_OMAP
-	tristate "OMAP DMA support"
-	depends on ARCH_OMAP
+config TEGRA20_APB_DMA
+	bool "NVIDIA Tegra20 APB DMA support"
+	depends on ARCH_TEGRA
 	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the NVIDIA Tegra20 APB DMA controller driver. The
+	  DMA controller is having multiple DMA channel which can be
+	  configured for different peripherals like audio, UART, SPI,
+	  I2C etc which is in APB bus.
+	  This DMA controller transfers data from memory to peripheral fifo
+	  or vice versa. It does not support memory to memory data transfer.
 
-config DMA_BCM2835
-	tristate "BCM2835 DMA engine support"
-	depends on ARCH_BCM2835
+config TIMB_DMA
+	tristate "Timberdale FPGA DMA support"
+	depends on MFD_TIMBERDALE
 	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the Timberdale FPGA DMA engine.
 
 config TI_CPPI41
 	tristate "AM33xx CPPI41 DMA support"
@@ -345,56 +478,28 @@ config TI_CPPI41
 	  The Communications Port Programming Interface (CPPI) 4.1 DMA engine
 	  is currently used by the USB driver on AM335x platforms.
 
-config MMP_PDMA
-	bool "MMP PDMA support"
-	depends on (ARCH_MMP || ARCH_PXA)
-	select DMA_ENGINE
-	help
-	  Support the MMP PDMA engine for PXA and MMP platform.
-
-config DMA_JZ4740
-	tristate "JZ4740 DMA support"
-	depends on MACH_JZ4740
-	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
-
-config DMA_JZ4780
-	tristate "JZ4780 DMA support"
-	depends on MACH_JZ4780
-	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
-	help
-	  This selects support for the DMA controller in Ingenic JZ4780 SoCs.
-	  If you have a board based on such a SoC and wish to use DMA for
-	  devices which can use the DMA controller, say Y or M here.
+config TI_DMA_CROSSBAR
+	bool
 
-config K3_DMA
-	tristate "Hisilicon K3 DMA support"
-	depends on ARCH_HI3xxx
+config TI_EDMA
+	bool "TI EDMA support"
+	depends on ARCH_DAVINCI || ARCH_OMAP || ARCH_KEYSTONE
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
+	select TI_DMA_CROSSBAR if ARCH_OMAP
+	default n
 	help
-	  Support the DMA engine for Hisilicon K3 platform
-	  devices.
+	  Enable support for the TI EDMA controller. This DMA
+	  engine is found on TI DaVinci and AM33xx parts.
 
-config MOXART_DMA
-	tristate "MOXART DMA support"
-	depends on ARCH_MOXART
-	select DMA_ENGINE
-	select DMA_OF
-	select DMA_VIRTUAL_CHANNELS
-	help
-	  Enable support for the MOXA ART SoC DMA controller.
- 
-config FSL_EDMA
-	tristate "Freescale eDMA engine support"
-	depends on OF
+config XGENE_DMA
+	tristate "APM X-Gene DMA support"
+	depends on ARCH_XGENE || COMPILE_TEST
 	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
+	select DMA_ENGINE_RAID
+	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
 	help
-	  Support the Freescale eDMA engine with programmable channel
-	  multiplexing capability for DMA request sources(slot).
-	  This module can be found on Freescale Vybrid and LS-1 SoCs.
+	  Enable support for the APM X-Gene SoC DMA engine.
 
 config XILINX_VDMA
 	tristate "Xilinx AXI VDMA Engine"
@@ -410,55 +515,25 @@ config XILINX_VDMA
 	  channels, Memory Mapped to Stream (MM2S) and Stream to
 	  Memory Mapped (S2MM) for the data transfers.
 
-config DMA_SUN6I
-	tristate "Allwinner A31 SoCs DMA support"
-	depends on MACH_SUN6I || MACH_SUN8I || COMPILE_TEST
-	depends on RESET_CONTROLLER
-	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
-	help
-	  Support for the DMA engine first found in Allwinner A31 SoCs.
-
-config NBPFAXI_DMA
-	tristate "Renesas Type-AXI NBPF DMA support"
-	select DMA_ENGINE
-	depends on ARM || COMPILE_TEST
-	help
-	  Support for "Type-AXI" NBPF DMA IPs from Renesas
-
-config IMG_MDC_DMA
-	tristate "IMG MDC support"
-	depends on MIPS || COMPILE_TEST
-	depends on MFD_SYSCON
+config ZX_DMA
+	tristate "ZTE ZX296702 DMA support"
+	depends on ARCH_ZX
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-	  Enable support for the IMG multi-threaded DMA controller (MDC).
+	  Support the DMA engine for ZTE ZX296702 platform devices.
 
-config XGENE_DMA
-	tristate "APM X-Gene DMA support"
-	depends on ARCH_XGENE || COMPILE_TEST
-	select DMA_ENGINE
-	select DMA_ENGINE_RAID
-	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
-	help
-	  Enable support for the APM X-Gene SoC DMA engine.
 
-config DMA_ENGINE
-	bool
+# driver files
+source "drivers/dma/bestcomm/Kconfig"
 
-config DMA_VIRTUAL_CHANNELS
-	tristate
+source "drivers/dma/dw/Kconfig"
 
-config DMA_ACPI
-	def_bool y
-	depends on ACPI
+source "drivers/dma/hsu/Kconfig"
 
-config DMA_OF
-	def_bool y
-	depends on OF
-	select DMA_ENGINE
+source "drivers/dma/sh/Kconfig"
 
+# clients
 comment "DMA Clients"
 	depends on DMA_ENGINE
 
@@ -483,13 +558,4 @@ config DMATEST
 config DMA_ENGINE_RAID
 	bool
 
-config QCOM_BAM_DMA
-	tristate "QCOM BAM DMA support"
-	depends on ARCH_QCOM || (COMPILE_TEST && OF && ARM)
-	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
-	---help---
-	  Enable support for the QCOM BAM DMA controller.  This controller
-	  provides DMA capabilities for a variety of on-chip devices.
-
 endif
diff --git a/kernel/drivers/dma/Makefile b/kernel/drivers/dma/Makefile
index 69f77d5ba..ef9c099bd 100644
--- a/kernel/drivers/dma/Makefile
+++ b/kernel/drivers/dma/Makefile
@@ -1,56 +1,69 @@
+#dmaengine debug flags
 subdir-ccflags-$(CONFIG_DMADEVICES_DEBUG)  := -DDEBUG
 subdir-ccflags-$(CONFIG_DMADEVICES_VDEBUG) += -DVERBOSE_DEBUG
 
+#core
 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o
 obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
 obj-$(CONFIG_DMA_OF) += of-dma.o
 
+#dmatest
 obj-$(CONFIG_DMATEST) += dmatest.o
-obj-$(CONFIG_INTEL_IOATDMA) += ioat/
-obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+
+#devices
+obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
+obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
+obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
+obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
+obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
+obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
+obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
+obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o
+obj-$(CONFIG_DMA_OMAP) += omap-dma.o
+obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
+obj-$(CONFIG_DMA_SUN4I) += sun4i-dma.o
+obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
+obj-$(CONFIG_DW_DMAC_CORE) += dw/
+obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
+obj-$(CONFIG_FSL_RAID) += fsl_raid.o
 obj-$(CONFIG_HSU_DMA) += hsu/
+obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
+obj-$(CONFIG_IMX_DMA) += imx-dma.o
+obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
+obj-$(CONFIG_INTEL_IDMA64) += idma64.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioat/
+obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
+obj-$(CONFIG_K3_DMA) += k3dma.o
+obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
+obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
+obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
+obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
 obj-$(CONFIG_MPC512X_DMA) += mpc512x_dma.o
-obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
 obj-$(CONFIG_MV_XOR) += mv_xor.o
-obj-$(CONFIG_DW_DMAC_CORE) += dw/
-obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
-obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
+obj-$(CONFIG_MXS_DMA) += mxs-dma.o
 obj-$(CONFIG_MX3_IPU) += ipu/
-obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
+obj-$(CONFIG_PCH_DMA) += pch_dma.o
+obj-$(CONFIG_PL330_DMA) += pl330.o
+obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
+obj-$(CONFIG_PXA_DMA) += pxa_dma.o
+obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
 obj-$(CONFIG_RENESAS_DMA) += sh/
-obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
-obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
-obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
-obj-$(CONFIG_IMX_DMA) += imx-dma.o
-obj-$(CONFIG_MXS_DMA) += mxs-dma.o
-obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
-obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
-obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
 obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
-obj-$(CONFIG_PL330_DMA) += pl330.o
-obj-$(CONFIG_PCH_DMA) += pch_dma.o
-obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
-obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
-obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
-obj-$(CONFIG_MMP_TDMA) += mmp_tdma.o
-obj-$(CONFIG_DMA_OMAP) += omap-dma.o
-obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
-obj-$(CONFIG_MMP_PDMA) += mmp_pdma.o
-obj-$(CONFIG_DMA_JZ4740) += dma-jz4740.o
-obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o
+obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
+obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
+obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_TI_CPPI41) += cppi41.o
-obj-$(CONFIG_K3_DMA) += k3dma.o
-obj-$(CONFIG_MOXART_DMA) += moxart-dma.o
-obj-$(CONFIG_FSL_RAID) += fsl_raid.o
-obj-$(CONFIG_FSL_EDMA) += fsl-edma.o
-obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
-obj-y += xilinx/
-obj-$(CONFIG_INTEL_MIC_X100_DMA) += mic_x100_dma.o
-obj-$(CONFIG_NBPFAXI_DMA) += nbpfaxi.o
-obj-$(CONFIG_DMA_SUN6I) += sun6i-dma.o
-obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
+obj-$(CONFIG_TI_DMA_CROSSBAR) += ti-dma-crossbar.o
+obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
+obj-$(CONFIG_ZX_DMA) += zx296702_dma.o
+
+obj-y += xilinx/
diff --git a/kernel/drivers/dma/acpi-dma.c b/kernel/drivers/dma/acpi-dma.c
index 5a635646e..16d0daa05 100644
--- a/kernel/drivers/dma/acpi-dma.c
+++ b/kernel/drivers/dma/acpi-dma.c
@@ -21,6 +21,7 @@
 #include <linux/ioport.h>
 #include <linux/acpi.h>
 #include <linux/acpi_dma.h>
+#include <linux/property.h>
 
 static LIST_HEAD(acpi_dma_list);
 static DEFINE_MUTEX(acpi_dma_lock);
@@ -160,10 +161,8 @@ int acpi_dma_controller_register(struct device *dev,
 		return -EINVAL;
 
 	/* Check if the device was enumerated by ACPI */
-	if (!ACPI_HANDLE(dev))
-		return -EINVAL;
-
-	if (acpi_bus_get_device(ACPI_HANDLE(dev), &adev))
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
 		return -EINVAL;
 
 	adma = kzalloc(sizeof(*adma), GFP_KERNEL);
@@ -358,10 +357,11 @@ struct dma_chan *acpi_dma_request_slave_chan_by_index(struct device *dev,
 	int found;
 
 	/* Check if the device was enumerated by ACPI */
-	if (!dev || !ACPI_HANDLE(dev))
+	if (!dev)
 		return ERR_PTR(-ENODEV);
 
-	if (acpi_bus_get_device(ACPI_HANDLE(dev), &adev))
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
 		return ERR_PTR(-ENODEV);
 
 	memset(&pdata, 0, sizeof(pdata));
@@ -413,21 +413,29 @@ EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_index);
  * translate the names "tx" and "rx" here based on the most common case where
  * the first FixedDMA descriptor is TX and second is RX.
  *
+ * If the device has "dma-names" property the FixedDMA descriptor indices
+ * are retrieved based on those. Otherwise the function falls back using
+ * hardcoded indices.
+ *
  * Return:
  * Pointer to appropriate dma channel on success or an error pointer.
  */
 struct dma_chan *acpi_dma_request_slave_chan_by_name(struct device *dev,
 		const char *name)
 {
-	size_t index;
-
-	if (!strcmp(name, "tx"))
-		index = 0;
-	else if (!strcmp(name, "rx"))
-		index = 1;
-	else
-		return ERR_PTR(-ENODEV);
+	int index;
+
+	index = device_property_match_string(dev, "dma-names", name);
+	if (index < 0) {
+		if (!strcmp(name, "tx"))
+			index = 0;
+		else if (!strcmp(name, "rx"))
+			index = 1;
+		else
+			return ERR_PTR(-ENODEV);
+	}
 
+	dev_dbg(dev, "found DMA channel \"%s\" at index %d\n", name, index);
 	return acpi_dma_request_slave_chan_by_index(dev, index);
 }
 EXPORT_SYMBOL_GPL(acpi_dma_request_slave_chan_by_name);
diff --git a/kernel/drivers/dma/amba-pl08x.c b/kernel/drivers/dma/amba-pl08x.c
index 49d396ec0..9b42c0588 100644
--- a/kernel/drivers/dma/amba-pl08x.c
+++ b/kernel/drivers/dma/amba-pl08x.c
@@ -83,6 +83,8 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
 #include <linux/pm_runtime.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
@@ -474,7 +476,7 @@ static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
 	u32 val = readl(ch->reg_config);
 
 	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
-	         PL080_CONFIG_TC_IRQ_MASK);
+		 PL080_CONFIG_TC_IRQ_MASK);
 
 	writel(val, ch->reg_config);
 
@@ -2030,10 +2032,188 @@ static inline void init_pl08x_debugfs(struct pl08x_driver_data *pl08x)
 }
 #endif
 
+#ifdef CONFIG_OF
+static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x,
+					 u32 id)
+{
+	struct pl08x_dma_chan *chan;
+
+	list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) {
+		if (chan->signal == id)
+			return &chan->vc.chan;
+	}
+
+	return NULL;
+}
+
+static struct dma_chan *pl08x_of_xlate(struct of_phandle_args *dma_spec,
+				       struct of_dma *ofdma)
+{
+	struct pl08x_driver_data *pl08x = ofdma->of_dma_data;
+	struct pl08x_channel_data *data;
+	struct pl08x_dma_chan *chan;
+	struct dma_chan *dma_chan;
+
+	if (!pl08x)
+		return NULL;
+
+	if (dma_spec->args_count != 2)
+		return NULL;
+
+	dma_chan = pl08x_find_chan_id(pl08x, dma_spec->args[0]);
+	if (dma_chan)
+		return dma_get_slave_channel(dma_chan);
+
+	chan = devm_kzalloc(pl08x->slave.dev, sizeof(*chan) + sizeof(*data),
+			    GFP_KERNEL);
+	if (!chan)
+		return NULL;
+
+	data = (void *)&chan[1];
+	data->bus_id = "(none)";
+	data->periph_buses = dma_spec->args[1];
+
+	chan->cd = data;
+	chan->host = pl08x;
+	chan->slave = true;
+	chan->name = data->bus_id;
+	chan->state = PL08X_CHAN_IDLE;
+	chan->signal = dma_spec->args[0];
+	chan->vc.desc_free = pl08x_desc_free;
+
+	vchan_init(&chan->vc, &pl08x->slave);
+
+	return dma_get_slave_channel(&chan->vc.chan);
+}
+
+static int pl08x_of_probe(struct amba_device *adev,
+			  struct pl08x_driver_data *pl08x,
+			  struct device_node *np)
+{
+	struct pl08x_platform_data *pd;
+	u32 cctl_memcpy = 0;
+	u32 val;
+	int ret;
+
+	pd = devm_kzalloc(&adev->dev, sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return -ENOMEM;
+
+	/* Eligible bus masters for fetching LLIs */
+	if (of_property_read_bool(np, "lli-bus-interface-ahb1"))
+		pd->lli_buses |= PL08X_AHB1;
+	if (of_property_read_bool(np, "lli-bus-interface-ahb2"))
+		pd->lli_buses |= PL08X_AHB2;
+	if (!pd->lli_buses) {
+		dev_info(&adev->dev, "no bus masters for LLIs stated, assume all\n");
+		pd->lli_buses |= PL08X_AHB1 | PL08X_AHB2;
+	}
+
+	/* Eligible bus masters for memory access */
+	if (of_property_read_bool(np, "mem-bus-interface-ahb1"))
+		pd->mem_buses |= PL08X_AHB1;
+	if (of_property_read_bool(np, "mem-bus-interface-ahb2"))
+		pd->mem_buses |= PL08X_AHB2;
+	if (!pd->mem_buses) {
+		dev_info(&adev->dev, "no bus masters for memory stated, assume all\n");
+		pd->mem_buses |= PL08X_AHB1 | PL08X_AHB2;
+	}
+
+	/* Parse the memcpy channel properties */
+	ret = of_property_read_u32(np, "memcpy-burst-size", &val);
+	if (ret) {
+		dev_info(&adev->dev, "no memcpy burst size specified, using 1 byte\n");
+		val = 1;
+	}
+	switch (val) {
+	default:
+		dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n");
+		/* Fall through */
+	case 1:
+		cctl_memcpy |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
+			       PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case 4:
+		cctl_memcpy |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
+			       PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case 8:
+		cctl_memcpy |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
+			       PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case 16:
+		cctl_memcpy |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
+			       PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case 32:
+		cctl_memcpy |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
+			       PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case 64:
+		cctl_memcpy |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
+			       PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case 128:
+		cctl_memcpy |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
+			       PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case 256:
+		cctl_memcpy |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
+			       PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	}
+
+	ret = of_property_read_u32(np, "memcpy-bus-width", &val);
+	if (ret) {
+		dev_info(&adev->dev, "no memcpy bus width specified, using 8 bits\n");
+		val = 8;
+	}
+	switch (val) {
+	default:
+		dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n");
+		/* Fall through */
+	case 8:
+		cctl_memcpy |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			       PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 16:
+		cctl_memcpy |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			       PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case 32:
+		cctl_memcpy |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			       PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	}
+
+	/* This is currently the only thing making sense */
+	cctl_memcpy |= PL080_CONTROL_PROT_SYS;
+
+	/* Set up memcpy channel */
+	pd->memcpy_channel.bus_id = "memcpy";
+	pd->memcpy_channel.cctl_memcpy = cctl_memcpy;
+	/* Use the buses that can access memory, obviously */
+	pd->memcpy_channel.periph_buses = pd->mem_buses;
+
+	pl08x->pd = pd;
+
+	return of_dma_controller_register(adev->dev.of_node, pl08x_of_xlate,
+					  pl08x);
+}
+#else
+static inline int pl08x_of_probe(struct amba_device *adev,
+				 struct pl08x_driver_data *pl08x,
+				 struct device_node *np)
+{
+	return -EINVAL;
+}
+#endif
+
 static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 {
 	struct pl08x_driver_data *pl08x;
 	const struct vendor_data *vd = id->data;
+	struct device_node *np = adev->dev.of_node;
 	u32 tsfr_size;
 	int ret = 0;
 	int i;
@@ -2093,9 +2273,15 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	/* Get the platform data */
 	pl08x->pd = dev_get_platdata(&adev->dev);
 	if (!pl08x->pd) {
-		dev_err(&adev->dev, "no platform data supplied\n");
-		ret = -EINVAL;
-		goto out_no_platdata;
+		if (np) {
+			ret = pl08x_of_probe(adev, pl08x, np);
+			if (ret)
+				goto out_no_platdata;
+		} else {
+			dev_err(&adev->dev, "no platform data supplied\n");
+			ret = -EINVAL;
+			goto out_no_platdata;
+		}
 	}
 
 	/* Assign useful pointers to the driver state */
diff --git a/kernel/drivers/dma/at_hdmac.c b/kernel/drivers/dma/at_hdmac.c
index 57b2141dd..53d22eb73 100644
--- a/kernel/drivers/dma/at_hdmac.c
+++ b/kernel/drivers/dma/at_hdmac.c
@@ -48,6 +48,8 @@
 	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |\
 	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
 
+#define ATC_MAX_DSCR_TRIALS	10
+
 /*
  * Initial number of descriptors to allocate for each channel. This could
  * be increased during dma usage.
@@ -247,6 +249,10 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
 	channel_writel(atchan, CTRLA, 0);
 	channel_writel(atchan, CTRLB, 0);
 	channel_writel(atchan, DSCR, first->txd.phys);
+	channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) |
+		       ATC_SPIP_BOUNDARY(first->boundary));
+	channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) |
+		       ATC_DPIP_BOUNDARY(first->boundary));
 	dma_writel(atdma, CHER, atchan->mask);
 
 	vdbg_dump_regs(atchan);
@@ -281,28 +287,19 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan,
  *
  * @current_len: the number of bytes left before reading CTRLA
  * @ctrla: the value of CTRLA
- * @desc: the descriptor containing the transfer width
- */
-static inline int atc_calc_bytes_left(int current_len, u32 ctrla,
-					struct at_desc *desc)
-{
-	return current_len - ((ctrla & ATC_BTSIZE_MAX) << desc->tx_width);
-}
-
-/**
- * atc_calc_bytes_left_from_reg - calculates the number of bytes left according
- * to the current value of CTRLA.
- *
- * @current_len: the number of bytes left before reading CTRLA
- * @atchan: the channel to read CTRLA for
- * @desc: the descriptor containing the transfer width
  */
-static inline int atc_calc_bytes_left_from_reg(int current_len,
-			struct at_dma_chan *atchan, struct at_desc *desc)
+static inline int atc_calc_bytes_left(int current_len, u32 ctrla)
 {
-	u32 ctrla = channel_readl(atchan, CTRLA);
+	u32 btsize = (ctrla & ATC_BTSIZE_MAX);
+	u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla);
 
-	return atc_calc_bytes_left(current_len, ctrla, desc);
+	/*
+	 * According to the datasheet, when reading the Control A Register
+	 * (ctrla), the Buffer Transfer Size (btsize) bitfield refers to the
+	 * number of transfers completed on the Source Interface.
+	 * So btsize is always a number of source width transfers.
+	 */
+	return current_len - (btsize << src_width);
 }
 
 /**
@@ -316,7 +313,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
 	struct at_desc *desc_first = atc_first_active(atchan);
 	struct at_desc *desc;
 	int ret;
-	u32 ctrla, dscr;
+	u32 ctrla, dscr, trials;
 
 	/*
 	 * If the cookie doesn't match to the currently running transfer then
@@ -342,15 +339,82 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
 		 * the channel's DSCR register and compare it against the value
 		 * of the hardware linked list structure of each child
 		 * descriptor.
+		 *
+		 * The CTRLA register provides us with the amount of data
+		 * already read from the source for the current child
+		 * descriptor. So we can compute a more accurate residue by also
+		 * removing the number of bytes corresponding to this amount of
+		 * data.
+		 *
+		 * However, the DSCR and CTRLA registers cannot be read both
+		 * atomically. Hence a race condition may occur: the first read
+		 * register may refer to one child descriptor whereas the second
+		 * read may refer to a later child descriptor in the list
+		 * because of the DMA transfer progression inbetween the two
+		 * reads.
+		 *
+		 * One solution could have been to pause the DMA transfer, read
+		 * the DSCR and CTRLA then resume the DMA transfer. Nonetheless,
+		 * this approach presents some drawbacks:
+		 * - If the DMA transfer is paused, RX overruns or TX underruns
+		 *   are more likey to occur depending on the system latency.
+		 *   Taking the USART driver as an example, it uses a cyclic DMA
+		 *   transfer to read data from the Receive Holding Register
+		 *   (RHR) to avoid RX overruns since the RHR is not protected
+		 *   by any FIFO on most Atmel SoCs. So pausing the DMA transfer
+		 *   to compute the residue would break the USART driver design.
+		 * - The atc_pause() function masks interrupts but we'd rather
+		 *   avoid to do so for system latency purpose.
+		 *
+		 * Then we'd rather use another solution: the DSCR is read a
+		 * first time, the CTRLA is read in turn, next the DSCR is read
+		 * a second time. If the two consecutive read values of the DSCR
+		 * are the same then we assume both refers to the very same
+		 * child descriptor as well as the CTRLA value read inbetween
+		 * does. For cyclic tranfers, the assumption is that a full loop
+		 * is "not so fast".
+		 * If the two DSCR values are different, we read again the CTRLA
+		 * then the DSCR till two consecutive read values from DSCR are
+		 * equal or till the maxium trials is reach.
+		 * This algorithm is very unlikely not to find a stable value for
+		 * DSCR.
 		 */
 
-		ctrla = channel_readl(atchan, CTRLA);
-		rmb(); /* ensure CTRLA is read before DSCR */
 		dscr = channel_readl(atchan, DSCR);
+		rmb(); /* ensure DSCR is read before CTRLA */
+		ctrla = channel_readl(atchan, CTRLA);
+		for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) {
+			u32 new_dscr;
+
+			rmb(); /* ensure DSCR is read after CTRLA */
+			new_dscr = channel_readl(atchan, DSCR);
+
+			/*
+			 * If the DSCR register value has not changed inside the
+			 * DMA controller since the previous read, we assume
+			 * that both the dscr and ctrla values refers to the
+			 * very same descriptor.
+			 */
+			if (likely(new_dscr == dscr))
+				break;
+
+			/*
+			 * DSCR has changed inside the DMA controller, so the
+			 * previouly read value of CTRLA may refer to an already
+			 * processed descriptor hence could be outdated.
+			 * We need to update ctrla to match the current
+			 * descriptor.
+			 */
+			dscr = new_dscr;
+			rmb(); /* ensure DSCR is read before CTRLA */
+			ctrla = channel_readl(atchan, CTRLA);
+		}
+		if (unlikely(trials >= ATC_MAX_DSCR_TRIALS))
+			return -ETIMEDOUT;
 
 		/* for the first descriptor we can be more accurate */
 		if (desc_first->lli.dscr == dscr)
-			return atc_calc_bytes_left(ret, ctrla, desc_first);
+			return atc_calc_bytes_left(ret, ctrla);
 
 		ret -= desc_first->len;
 		list_for_each_entry(desc, &desc_first->tx_list, desc_node) {
@@ -361,16 +425,14 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
 		}
 
 		/*
-		 * For the last descriptor in the chain we can calculate
+		 * For the current descriptor in the chain we can calculate
 		 * the remaining bytes using the channel's register.
-		 * Note that the transfer width of the first and last
-		 * descriptor may differ.
 		 */
-		if (!desc->lli.dscr)
-			ret = atc_calc_bytes_left_from_reg(ret, atchan, desc);
+		ret = atc_calc_bytes_left(ret, ctrla);
 	} else {
 		/* single transfer */
-		ret = atc_calc_bytes_left_from_reg(ret, atchan, desc_first);
+		ctrla = channel_readl(atchan, CTRLA);
+		ret = atc_calc_bytes_left(ret, ctrla);
 	}
 
 	return ret;
@@ -386,6 +448,7 @@ static void
 atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 {
 	struct dma_async_tx_descriptor	*txd = &desc->txd;
+	struct at_dma			*atdma = to_at_dma(atchan->chan_common.device);
 
 	dev_vdbg(chan2dev(&atchan->chan_common),
 		"descriptor %u complete\n", txd->cookie);
@@ -394,6 +457,13 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 	if (!atc_chan_is_cyclic(atchan))
 		dma_cookie_complete(txd);
 
+	/* If the transfer was a memset, free our temporary buffer */
+	if (desc->memset_buffer) {
+		dma_pool_free(atdma->memset_pool, desc->memset_vaddr,
+			      desc->memset_paddr);
+		desc->memset_buffer = false;
+	}
+
 	/* move children to free_list */
 	list_splice_init(&desc->tx_list, &atchan->free_list);
 	/* move myself to free_list */
@@ -635,6 +705,103 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
 }
 
 /**
+ * atc_prep_dma_interleaved - prepare memory to memory interleaved operation
+ * @chan: the channel to prepare operation on
+ * @xt: Interleaved transfer template
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_interleaved(struct dma_chan *chan,
+			 struct dma_interleaved_template *xt,
+			 unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct data_chunk	*first = xt->sgl;
+	struct at_desc		*desc = NULL;
+	size_t			xfer_count;
+	unsigned int		dwidth;
+	u32			ctrla;
+	u32			ctrlb;
+	size_t			len = 0;
+	int			i;
+
+	if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
+		return NULL;
+
+	dev_info(chan2dev(chan),
+		 "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, &xt->src_start, &xt->dst_start, xt->numf,
+		xt->frame_size, flags);
+
+	/*
+	 * The controller can only "skip" X bytes every Y bytes, so we
+	 * need to make sure we are given a template that fit that
+	 * description, ie a template with chunks that always have the
+	 * same size, with the same ICGs.
+	 */
+	for (i = 0; i < xt->frame_size; i++) {
+		struct data_chunk *chunk = xt->sgl + i;
+
+		if ((chunk->size != xt->sgl->size) ||
+		    (dmaengine_get_dst_icg(xt, chunk) != dmaengine_get_dst_icg(xt, first)) ||
+		    (dmaengine_get_src_icg(xt, chunk) != dmaengine_get_src_icg(xt, first))) {
+			dev_err(chan2dev(chan),
+				"%s: the controller can transfer only identical chunks\n",
+				__func__);
+			return NULL;
+		}
+
+		len += chunk->size;
+	}
+
+	dwidth = atc_get_xfer_width(xt->src_start,
+				    xt->dst_start, len);
+
+	xfer_count = len >> dwidth;
+	if (xfer_count > ATC_BTSIZE_MAX) {
+		dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__);
+		return NULL;
+	}
+
+	ctrla = ATC_SRC_WIDTH(dwidth) |
+		ATC_DST_WIDTH(dwidth);
+
+	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
+		| ATC_SRC_ADDR_MODE_INCR
+		| ATC_DST_ADDR_MODE_INCR
+		| ATC_SRC_PIP
+		| ATC_DST_PIP
+		| ATC_FC_MEM2MEM;
+
+	/* create the transfer */
+	desc = atc_desc_get(atchan);
+	if (!desc) {
+		dev_err(chan2dev(chan),
+			"%s: couldn't allocate our descriptor\n", __func__);
+		return NULL;
+	}
+
+	desc->lli.saddr = xt->src_start;
+	desc->lli.daddr = xt->dst_start;
+	desc->lli.ctrla = ctrla | xfer_count;
+	desc->lli.ctrlb = ctrlb;
+
+	desc->boundary = first->size >> dwidth;
+	desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1;
+	desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1;
+
+	desc->txd.cookie = -EBUSY;
+	desc->total_len = desc->len = len;
+
+	/* set end-of-link to the last link descriptor of list*/
+	set_desc_eol(desc);
+
+	desc->txd.flags = flags; /* client is in control of this ack */
+
+	return &desc->txd;
+}
+
+/**
  * atc_prep_dma_memcpy - prepare a memcpy operation
  * @chan: the channel to prepare operation on
  * @dest: operation virtual destination address
@@ -657,8 +824,8 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	u32			ctrla;
 	u32			ctrlb;
 
-	dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d0x%x s0x%x l0x%zx f0x%lx\n",
-			dest, src, len, flags);
+	dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n",
+			&dest, &src, len, flags);
 
 	if (unlikely(!len)) {
 		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
@@ -702,10 +869,6 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	first->txd.cookie = -EBUSY;
 	first->total_len = len;
 
-	/* set transfer width for the calculation of the residue */
-	first->tx_width = src_width;
-	prev->tx_width = src_width;
-
 	/* set end-of-link to the last link descriptor of list*/
 	set_desc_eol(desc);
 
@@ -718,6 +881,187 @@ err_desc_get:
 	return NULL;
 }
 
+static struct at_desc *atc_create_memset_desc(struct dma_chan *chan,
+					      dma_addr_t psrc,
+					      dma_addr_t pdst,
+					      size_t len)
+{
+	struct at_dma_chan *atchan = to_at_dma_chan(chan);
+	struct at_desc *desc;
+	size_t xfer_count;
+
+	u32 ctrla = ATC_SRC_WIDTH(2) | ATC_DST_WIDTH(2);
+	u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN |
+		ATC_SRC_ADDR_MODE_FIXED |
+		ATC_DST_ADDR_MODE_INCR |
+		ATC_FC_MEM2MEM;
+
+	xfer_count = len >> 2;
+	if (xfer_count > ATC_BTSIZE_MAX) {
+		dev_err(chan2dev(chan), "%s: buffer is too big\n",
+			__func__);
+		return NULL;
+	}
+
+	desc = atc_desc_get(atchan);
+	if (!desc) {
+		dev_err(chan2dev(chan), "%s: can't get a descriptor\n",
+			__func__);
+		return NULL;
+	}
+
+	desc->lli.saddr = psrc;
+	desc->lli.daddr = pdst;
+	desc->lli.ctrla = ctrla | xfer_count;
+	desc->lli.ctrlb = ctrlb;
+
+	desc->txd.cookie = 0;
+	desc->len = len;
+
+	return desc;
+}
+
+/**
+ * atc_prep_dma_memset - prepare a memcpy operation
+ * @chan: the channel to prepare operation on
+ * @dest: operation virtual destination address
+ * @value: value to set memory buffer to
+ * @len: operation length
+ * @flags: tx descriptor status flags
+ */
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+		    size_t len, unsigned long flags)
+{
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	struct at_desc		*desc;
+	void __iomem		*vaddr;
+	dma_addr_t		paddr;
+
+	dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__,
+		&dest, value, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(chan2dev(chan), "%s: length is zero!\n", __func__);
+		return NULL;
+	}
+
+	if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
+		dev_dbg(chan2dev(chan), "%s: buffer is not aligned\n",
+			__func__);
+		return NULL;
+	}
+
+	vaddr = dma_pool_alloc(atdma->memset_pool, GFP_ATOMIC, &paddr);
+	if (!vaddr) {
+		dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n",
+			__func__);
+		return NULL;
+	}
+	*(u32*)vaddr = value;
+
+	desc = atc_create_memset_desc(chan, paddr, dest, len);
+	if (!desc) {
+		dev_err(chan2dev(chan), "%s: couldn't get a descriptor\n",
+			__func__);
+		goto err_free_buffer;
+	}
+
+	desc->memset_paddr = paddr;
+	desc->memset_vaddr = vaddr;
+	desc->memset_buffer = true;
+
+	desc->txd.cookie = -EBUSY;
+	desc->total_len = len;
+
+	/* set end-of-link on the descriptor */
+	set_desc_eol(desc);
+
+	desc->txd.flags = flags;
+
+	return &desc->txd;
+
+err_free_buffer:
+	dma_pool_free(atdma->memset_pool, vaddr, paddr);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+atc_prep_dma_memset_sg(struct dma_chan *chan,
+		       struct scatterlist *sgl,
+		       unsigned int sg_len, int value,
+		       unsigned long flags)
+{
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
+	struct at_dma		*atdma = to_at_dma(chan->device);
+	struct at_desc		*desc = NULL, *first = NULL, *prev = NULL;
+	struct scatterlist	*sg;
+	void __iomem		*vaddr;
+	dma_addr_t		paddr;
+	size_t			total_len = 0;
+	int			i;
+
+	dev_vdbg(chan2dev(chan), "%s: v0x%x l0x%zx f0x%lx\n", __func__,
+		 value, sg_len, flags);
+
+	if (unlikely(!sgl || !sg_len)) {
+		dev_dbg(chan2dev(chan), "%s: scatterlist is empty!\n",
+			__func__);
+		return NULL;
+	}
+
+	vaddr = dma_pool_alloc(atdma->memset_pool, GFP_ATOMIC, &paddr);
+	if (!vaddr) {
+		dev_err(chan2dev(chan), "%s: couldn't allocate buffer\n",
+			__func__);
+		return NULL;
+	}
+	*(u32*)vaddr = value;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t dest = sg_dma_address(sg);
+		size_t len = sg_dma_len(sg);
+
+		dev_vdbg(chan2dev(chan), "%s: d%pad, l0x%zx\n",
+			 __func__, &dest, len);
+
+		if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
+			dev_err(chan2dev(chan), "%s: buffer is not aligned\n",
+				__func__);
+			goto err_put_desc;
+		}
+
+		desc = atc_create_memset_desc(chan, paddr, dest, len);
+		if (!desc)
+			goto err_put_desc;
+
+		atc_desc_chain(&first, &prev, desc);
+
+		total_len += len;
+	}
+
+	/*
+	 * Only set the buffer pointers on the last descriptor to
+	 * avoid free'ing while we have our transfer still going
+	 */
+	desc->memset_paddr = paddr;
+	desc->memset_vaddr = vaddr;
+	desc->memset_buffer = true;
+
+	first->txd.cookie = -EBUSY;
+	first->total_len = total_len;
+
+	/* set end-of-link on the descriptor */
+	set_desc_eol(desc);
+
+	first->txd.flags = flags;
+
+	return &first->txd;
+
+err_put_desc:
+	atc_desc_put(atchan, first);
+	return NULL;
+}
 
 /**
  * atc_prep_slave_sg - prepare descriptors for a DMA_SLAVE transaction
@@ -854,10 +1198,6 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	first->txd.cookie = -EBUSY;
 	first->total_len = total_len;
 
-	/* set transfer width for the calculation of the residue */
-	first->tx_width = reg_width;
-	prev->tx_width = reg_width;
-
 	/* first link descriptor of list is responsible of flags */
 	first->txd.flags = flags; /* client is in control of this ack */
 
@@ -975,12 +1315,6 @@ atc_prep_dma_sg(struct dma_chan *chan,
 		desc->txd.cookie = 0;
 		desc->len = len;
 
-		/*
-		 * Although we only need the transfer width for the first and
-		 * the last descriptor, its easier to set it to all descriptors.
-		 */
-		desc->tx_width = src_width;
-
 		atc_desc_chain(&first, &prev, desc);
 
 		/* update the lengths and addresses for the next loop cycle */
@@ -1105,9 +1439,9 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	unsigned int		periods = buf_len / period_len;
 	unsigned int		i;
 
-	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n",
+	dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@%pad - %d (%d/%d)\n",
 			direction == DMA_MEM_TO_DEV ? "TO DEVICE" : "FROM DEVICE",
-			buf_addr,
+			&buf_addr,
 			periods, buf_len, period_len);
 
 	if (unlikely(!atslave || !buf_len || !period_len)) {
@@ -1154,7 +1488,6 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	/* First descriptor of the chain embedds additional information */
 	first->txd.cookie = -EBUSY;
 	first->total_len = buf_len;
-	first->tx_width = reg_width;
 
 	return &first->txd;
 
@@ -1609,7 +1942,11 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	/* setup platform data for each SoC */
 	dma_cap_set(DMA_MEMCPY, at91sam9rl_config.cap_mask);
 	dma_cap_set(DMA_SG, at91sam9rl_config.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_MEMCPY, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_MEMSET, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_MEMSET_SG, at91sam9g45_config.cap_mask);
+	dma_cap_set(DMA_PRIVATE, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_SLAVE, at91sam9g45_config.cap_mask);
 	dma_cap_set(DMA_SG, at91sam9g45_config.cap_mask);
 
@@ -1673,7 +2010,16 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	if (!atdma->dma_desc_pool) {
 		dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
 		err = -ENOMEM;
-		goto err_pool_create;
+		goto err_desc_pool_create;
+	}
+
+	/* create a pool of consistent memory blocks for memset blocks */
+	atdma->memset_pool = dma_pool_create("at_hdmac_memset_pool",
+					     &pdev->dev, sizeof(int), 4, 0);
+	if (!atdma->memset_pool) {
+		dev_err(&pdev->dev, "No memory for memset dma pool\n");
+		err = -ENOMEM;
+		goto err_memset_pool_create;
 	}
 
 	/* clear any pending interrupt */
@@ -1713,9 +2059,18 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	atdma->dma_common.dev = &pdev->dev;
 
 	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask))
+		atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved;
+
 	if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
 		atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
 
+	if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) {
+		atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset;
+		atdma->dma_common.device_prep_dma_memset_sg = atc_prep_dma_memset_sg;
+		atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES;
+	}
+
 	if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) {
 		atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg;
 		/* controller can do slave DMA: can trigger cyclic transfers */
@@ -1736,8 +2091,9 @@ static int __init at_dma_probe(struct platform_device *pdev)
 
 	dma_writel(atdma, EN, AT_DMA_ENABLE);
 
-	dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n",
+	dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s%s), %d channels\n",
 	  dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
+	  dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "",
 	  dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)  ? "slave " : "",
 	  dma_has_cap(DMA_SG, atdma->dma_common.cap_mask)  ? "sg-cpy " : "",
 	  plat_dat->nr_channels);
@@ -1762,8 +2118,10 @@ static int __init at_dma_probe(struct platform_device *pdev)
 
 err_of_dma_controller_register:
 	dma_async_device_unregister(&atdma->dma_common);
+	dma_pool_destroy(atdma->memset_pool);
+err_memset_pool_create:
 	dma_pool_destroy(atdma->dma_desc_pool);
-err_pool_create:
+err_desc_pool_create:
 	free_irq(platform_get_irq(pdev, 0), atdma);
 err_irq:
 	clk_disable_unprepare(atdma->clk);
@@ -1788,6 +2146,7 @@ static int at_dma_remove(struct platform_device *pdev)
 	at_dma_off(atdma);
 	dma_async_device_unregister(&atdma->dma_common);
 
+	dma_pool_destroy(atdma->memset_pool);
 	dma_pool_destroy(atdma->dma_desc_pool);
 	free_irq(platform_get_irq(pdev, 0), atdma);
 
diff --git a/kernel/drivers/dma/at_hdmac_regs.h b/kernel/drivers/dma/at_hdmac_regs.h
index 2727ca560..7f58f0615 100644
--- a/kernel/drivers/dma/at_hdmac_regs.h
+++ b/kernel/drivers/dma/at_hdmac_regs.h
@@ -112,6 +112,7 @@
 #define		ATC_SRC_WIDTH_BYTE	(0x0 << 24)
 #define		ATC_SRC_WIDTH_HALFWORD	(0x1 << 24)
 #define		ATC_SRC_WIDTH_WORD	(0x2 << 24)
+#define		ATC_REG_TO_SRC_WIDTH(r)	(((r) >> 24) & 0x3)
 #define	ATC_DST_WIDTH_MASK	(0x3 << 28)	/* Destination Single Transfer Size */
 #define		ATC_DST_WIDTH(x)	((x) << 28)
 #define		ATC_DST_WIDTH_BYTE	(0x0 << 28)
@@ -182,7 +183,6 @@ struct at_lli {
  * @txd: support for the async_tx api
  * @desc_node: node on the channed descriptors list
  * @len: descriptor byte count
- * @tx_width: transfer width
  * @total_len: total transaction byte count
  */
 struct at_desc {
@@ -194,8 +194,17 @@ struct at_desc {
 	struct dma_async_tx_descriptor	txd;
 	struct list_head		desc_node;
 	size_t				len;
-	u32				tx_width;
 	size_t				total_len;
+
+	/* Interleaved data */
+	size_t				boundary;
+	size_t				dst_hole;
+	size_t				src_hole;
+
+	/* Memset temporary buffer */
+	bool				memset_buffer;
+	dma_addr_t			memset_paddr;
+	int				*memset_vaddr;
 };
 
 static inline struct at_desc *
@@ -326,6 +335,7 @@ struct at_dma {
 	u8			all_chan_mask;
 
 	struct dma_pool		*dma_desc_pool;
+	struct dma_pool		*memset_pool;
 	/* AT THE END channels table */
 	struct at_dma_chan	chan[0];
 };
@@ -375,9 +385,9 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
 static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
 {
 	dev_crit(chan2dev(&atchan->chan_common),
-		 "  desc: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
-		 lli->saddr, lli->daddr,
-		 lli->ctrla, lli->ctrlb, lli->dscr);
+		 "  desc: s%pad d%pad ctrl0x%x:0x%x l0x%pad\n",
+		 &lli->saddr, &lli->daddr,
+		 lli->ctrla, lli->ctrlb, &lli->dscr);
 }
 
 
diff --git a/kernel/drivers/dma/at_xdmac.c b/kernel/drivers/dma/at_xdmac.c
index c89a7abb5..02f9aa4eb 100644
--- a/kernel/drivers/dma/at_xdmac.c
+++ b/kernel/drivers/dma/at_xdmac.c
@@ -156,7 +156,7 @@
 #define		AT_XDMAC_CC_WRIP	(0x1 << 23)	/* Write in Progress (read only) */
 #define			AT_XDMAC_CC_WRIP_DONE		(0x0 << 23)
 #define			AT_XDMAC_CC_WRIP_IN_PROGRESS	(0x1 << 23)
-#define		AT_XDMAC_CC_PERID(i)	(0x7f & (h) << 24)	/* Channel Peripheral Identifier */
+#define		AT_XDMAC_CC_PERID(i)	(0x7f & (i) << 24)	/* Channel Peripheral Identifier */
 #define AT_XDMAC_CDS_MSP	0x2C	/* Channel Data Stride Memory Set Pattern */
 #define AT_XDMAC_CSUS		0x30	/* Channel Source Microblock Stride */
 #define AT_XDMAC_CDUS		0x34	/* Channel Destination Microblock Stride */
@@ -176,6 +176,7 @@
 #define AT_XDMAC_MAX_CHAN	0x20
 #define AT_XDMAC_MAX_CSIZE	16	/* 16 data */
 #define AT_XDMAC_MAX_DWIDTH	8	/* 64 bits */
+#define AT_XDMAC_RESIDUE_MAX_RETRIES	5
 
 #define AT_XDMAC_DMA_BUSWIDTHS\
 	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
@@ -235,6 +236,10 @@ struct at_xdmac_lld {
 	dma_addr_t	mbr_sa;		/* Source Address Member */
 	dma_addr_t	mbr_da;		/* Destination Address Member */
 	u32		mbr_cfg;	/* Configuration Register */
+	u32		mbr_bc;		/* Block Control Register */
+	u32		mbr_ds;		/* Data Stride Register */
+	u32		mbr_sus;	/* Source Microblock Stride Register */
+	u32		mbr_dus;	/* Destination Microblock Stride Register */
 };
 
 
@@ -355,16 +360,19 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan,
 	 * descriptor view 2 since some fields of the configuration register
 	 * depend on transfer size and src/dest addresses.
 	 */
-	if (at_xdmac_chan_is_cyclic(atchan)) {
+	if (at_xdmac_chan_is_cyclic(atchan))
 		reg = AT_XDMAC_CNDC_NDVIEW_NDV1;
-		at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg);
-	} else {
-		/*
-		 * No need to write AT_XDMAC_CC reg, it will be done when the
-		 * descriptor is fecthed.
-		 */
+	else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3)
+		reg = AT_XDMAC_CNDC_NDVIEW_NDV3;
+	else
 		reg = AT_XDMAC_CNDC_NDVIEW_NDV2;
-	}
+	/*
+	 * Even if the register will be updated from the configuration in the
+	 * descriptor when using view 2 or higher, the PROT bit won't be set
+	 * properly. This bit can be modified only by using the channel
+	 * configuration register.
+	 */
+	at_xdmac_chan_write(atchan, AT_XDMAC_CC, first->lld.mbr_cfg);
 
 	reg |= AT_XDMAC_CNDC_NDDUP
 	       | AT_XDMAC_CNDC_NDSUP
@@ -448,6 +456,15 @@ static struct at_xdmac_desc *at_xdmac_alloc_desc(struct dma_chan *chan,
 	return desc;
 }
 
+void at_xdmac_init_used_desc(struct at_xdmac_desc *desc)
+{
+	memset(&desc->lld, 0, sizeof(desc->lld));
+	INIT_LIST_HEAD(&desc->descs_list);
+	desc->direction = DMA_TRANS_NONE;
+	desc->xfer_size = 0;
+	desc->active_xfer = false;
+}
+
 /* Call must be protected by lock. */
 static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
 {
@@ -459,12 +476,39 @@ static struct at_xdmac_desc *at_xdmac_get_desc(struct at_xdmac_chan *atchan)
 		desc = list_first_entry(&atchan->free_descs_list,
 					struct at_xdmac_desc, desc_node);
 		list_del(&desc->desc_node);
-		desc->active_xfer = false;
+		at_xdmac_init_used_desc(desc);
 	}
 
 	return desc;
 }
 
+static void at_xdmac_queue_desc(struct dma_chan *chan,
+				struct at_xdmac_desc *prev,
+				struct at_xdmac_desc *desc)
+{
+	if (!prev || !desc)
+		return;
+
+	prev->lld.mbr_nda = desc->tx_dma_desc.phys;
+	prev->lld.mbr_ubc |= AT_XDMAC_MBR_UBC_NDE;
+
+	dev_dbg(chan2dev(chan),	"%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
+		__func__, prev, &prev->lld.mbr_nda);
+}
+
+static inline void at_xdmac_increment_block_count(struct dma_chan *chan,
+						  struct at_xdmac_desc *desc)
+{
+	if (!desc)
+		return;
+
+	desc->lld.mbr_bc++;
+
+	dev_dbg(chan2dev(chan),
+		"%s: incrementing the block count of the desc 0x%p\n",
+		__func__, desc);
+}
+
 static struct dma_chan *at_xdmac_xlate(struct of_phandle_args *dma_spec,
 				       struct of_dma *of_dma)
 {
@@ -591,12 +635,12 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		       unsigned int sg_len, enum dma_transfer_direction direction,
 		       unsigned long flags, void *context)
 {
-	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
-	struct at_xdmac_desc	*first = NULL, *prev = NULL;
-	struct scatterlist	*sg;
-	int			i;
-	unsigned int		xfer_size = 0;
-	unsigned long		irqflags;
+	struct at_xdmac_chan		*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc		*first = NULL, *prev = NULL;
+	struct scatterlist		*sg;
+	int				i;
+	unsigned int			xfer_size = 0;
+	unsigned long			irqflags;
 	struct dma_async_tx_descriptor	*ret = NULL;
 
 	if (!sgl)
@@ -655,7 +699,6 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2			/* next descriptor view */
 			| AT_XDMAC_MBR_UBC_NDEN					/* next descriptor dst parameter update */
 			| AT_XDMAC_MBR_UBC_NSEN					/* next descriptor src parameter update */
-			| (i == sg_len - 1 ? 0 : AT_XDMAC_MBR_UBC_NDE)		/* descriptor fetch */
 			| (len >> fixed_dwidth);				/* microblock length */
 		desc->lld.mbr_cfg = (atchan->cfg & ~AT_XDMAC_CC_DWIDTH_MASK) |
 				    AT_XDMAC_CC_DWIDTH(fixed_dwidth);
@@ -664,12 +707,8 @@ at_xdmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
-				 __func__, prev, &prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -749,7 +788,6 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV1
 			| AT_XDMAC_MBR_UBC_NDEN
 			| AT_XDMAC_MBR_UBC_NSEN
-			| AT_XDMAC_MBR_UBC_NDE
 			| period_len >> at_xdmac_get_dwidth(desc->lld.mbr_cfg);
 
 		dev_dbg(chan2dev(chan),
@@ -757,12 +795,8 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
-				 __func__, prev, &prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -773,10 +807,7 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
 		list_add_tail(&desc->desc_node, &first->descs_list);
 	}
 
-	prev->lld.mbr_nda = first->tx_dma_desc.phys;
-	dev_dbg(chan2dev(chan),
-		"%s: chain lld: prev=0x%p, mbr_nda=%pad\n",
-		__func__, prev, &prev->lld.mbr_nda);
+	at_xdmac_queue_desc(chan, prev, first);
 	first->tx_dma_desc.flags = flags;
 	first->xfer_size = buf_len;
 	first->direction = direction;
@@ -784,6 +815,217 @@ at_xdmac_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr,
 	return &first->tx_dma_desc;
 }
 
+static inline u32 at_xdmac_align_width(struct dma_chan *chan, dma_addr_t addr)
+{
+	u32 width;
+
+	/*
+	 * Check address alignment to select the greater data width we
+	 * can use.
+	 *
+	 * Some XDMAC implementations don't provide dword transfer, in
+	 * this case selecting dword has the same behavior as
+	 * selecting word transfers.
+	 */
+	if (!(addr & 7)) {
+		width = AT_XDMAC_CC_DWIDTH_DWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
+	} else if (!(addr & 3)) {
+		width = AT_XDMAC_CC_DWIDTH_WORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
+	} else if (!(addr & 1)) {
+		width = AT_XDMAC_CC_DWIDTH_HALFWORD;
+		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
+	} else {
+		width = AT_XDMAC_CC_DWIDTH_BYTE;
+		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
+	}
+
+	return width;
+}
+
+static struct at_xdmac_desc *
+at_xdmac_interleaved_queue_desc(struct dma_chan *chan,
+				struct at_xdmac_chan *atchan,
+				struct at_xdmac_desc *prev,
+				dma_addr_t src, dma_addr_t dst,
+				struct dma_interleaved_template *xt,
+				struct data_chunk *chunk)
+{
+	struct at_xdmac_desc	*desc;
+	u32			dwidth;
+	unsigned long		flags;
+	size_t			ublen;
+	/*
+	 * WARNING: The channel configuration is set here since there is no
+	 * dmaengine_slave_config call in this case. Moreover we don't know the
+	 * direction, it involves we can't dynamically set the source and dest
+	 * interface so we have to use the same one. Only interface 0 allows EBI
+	 * access. Hopefully we can access DDR through both ports (at least on
+	 * SAMA5D4x), so we can use the same interface for source and dest,
+	 * that solves the fact we don't know the direction.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dwidth = at_xdmac_align_width(chan, src | dst | chunk->size);
+	if (chunk->size >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+		dev_dbg(chan2dev(chan),
+			"%s: chunk too big (%d, max size %lu)...\n",
+			__func__, chunk->size,
+			AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth);
+		return NULL;
+	}
+
+	if (prev)
+		dev_dbg(chan2dev(chan),
+			"Adding items at the end of desc 0x%p\n", prev);
+
+	if (xt->src_inc) {
+		if (xt->src_sgl)
+			chan_cc |=  AT_XDMAC_CC_SAM_UBS_AM;
+		else
+			chan_cc |=  AT_XDMAC_CC_SAM_INCREMENTED_AM;
+	}
+
+	if (xt->dst_inc) {
+		if (xt->dst_sgl)
+			chan_cc |=  AT_XDMAC_CC_DAM_UBS_AM;
+		else
+			chan_cc |=  AT_XDMAC_CC_DAM_INCREMENTED_AM;
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	desc = at_xdmac_get_desc(atchan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	if (!desc) {
+		dev_err(chan2dev(chan), "can't get descriptor\n");
+		return NULL;
+	}
+
+	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	ublen = chunk->size >> dwidth;
+
+	desc->lld.mbr_sa = src;
+	desc->lld.mbr_da = dst;
+	desc->lld.mbr_sus = dmaengine_get_src_icg(xt, chunk);
+	desc->lld.mbr_dus = dmaengine_get_dst_icg(xt, chunk);
+
+	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+		| AT_XDMAC_MBR_UBC_NDEN
+		| AT_XDMAC_MBR_UBC_NSEN
+		| ublen;
+	desc->lld.mbr_cfg = chan_cc;
+
+	dev_dbg(chan2dev(chan),
+		"%s: lld: mbr_sa=%pad, mbr_da=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, &desc->lld.mbr_sa, &desc->lld.mbr_da,
+		desc->lld.mbr_ubc, desc->lld.mbr_cfg);
+
+	/* Chain lld. */
+	if (prev)
+		at_xdmac_queue_desc(chan, prev, desc);
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_interleaved(struct dma_chan *chan,
+			  struct dma_interleaved_template *xt,
+			  unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*prev = NULL, *first = NULL;
+	dma_addr_t		dst_addr, src_addr;
+	size_t			src_skip = 0, dst_skip = 0, len = 0;
+	struct data_chunk	*chunk;
+	int			i;
+
+	if (!xt || !xt->numf || (xt->dir != DMA_MEM_TO_MEM))
+		return NULL;
+
+	/*
+	 * TODO: Handle the case where we have to repeat a chain of
+	 * descriptors...
+	 */
+	if ((xt->numf > 1) && (xt->frame_size > 1))
+		return NULL;
+
+	dev_dbg(chan2dev(chan), "%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
+		__func__, &xt->src_start, &xt->dst_start,	xt->numf,
+		xt->frame_size, flags);
+
+	src_addr = xt->src_start;
+	dst_addr = xt->dst_start;
+
+	if (xt->numf > 1) {
+		first = at_xdmac_interleaved_queue_desc(chan, atchan,
+							NULL,
+							src_addr, dst_addr,
+							xt, xt->sgl);
+
+		/* Length of the block is (BLEN+1) microblocks. */
+		for (i = 0; i < xt->numf - 1; i++)
+			at_xdmac_increment_block_count(chan, first);
+
+		dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+			__func__, first, first);
+		list_add_tail(&first->desc_node, &first->descs_list);
+	} else {
+		for (i = 0; i < xt->frame_size; i++) {
+			size_t src_icg = 0, dst_icg = 0;
+			struct at_xdmac_desc *desc;
+
+			chunk = xt->sgl + i;
+
+			dst_icg = dmaengine_get_dst_icg(xt, chunk);
+			src_icg = dmaengine_get_src_icg(xt, chunk);
+
+			src_skip = chunk->size + src_icg;
+			dst_skip = chunk->size + dst_icg;
+
+			dev_dbg(chan2dev(chan),
+				"%s: chunk size=%d, src icg=%d, dst icg=%d\n",
+				__func__, chunk->size, src_icg, dst_icg);
+
+			desc = at_xdmac_interleaved_queue_desc(chan, atchan,
+							       prev,
+							       src_addr, dst_addr,
+							       xt, chunk);
+			if (!desc) {
+				list_splice_init(&first->descs_list,
+						 &atchan->free_descs_list);
+				return NULL;
+			}
+
+			if (!first)
+				first = desc;
+
+			dev_dbg(chan2dev(chan), "%s: add desc 0x%p to descs_list 0x%p\n",
+				__func__, desc, first);
+			list_add_tail(&desc->desc_node, &first->descs_list);
+
+			if (xt->src_sgl)
+				src_addr += src_skip;
+
+			if (xt->dst_sgl)
+				dst_addr += dst_skip;
+
+			len += chunk->size;
+			prev = desc;
+		}
+	}
+
+	first->tx_dma_desc.cookie = -EBUSY;
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
 static struct dma_async_tx_descriptor *
 at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 			 size_t len, unsigned long flags)
@@ -815,24 +1057,7 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	if (unlikely(!len))
 		return NULL;
 
-	/*
-	 * Check address alignment to select the greater data width we can use.
-	 * Some XDMAC implementations don't provide dword transfer, in this
-	 * case selecting dword has the same behavior as selecting word transfers.
-	 */
-	if (!((src_addr | dst_addr) & 7)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
-	} else if (!((src_addr | dst_addr)  & 3)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_WORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
-	} else if (!((src_addr | dst_addr) & 1)) {
-		dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
-		dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
-	} else {
-		dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
-		dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
-	}
+	dwidth = at_xdmac_align_width(chan, src_addr | dst_addr);
 
 	/* Prepare descriptors. */
 	while (remaining_size) {
@@ -862,19 +1087,9 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		dev_dbg(chan2dev(chan), "%s: xfer_size=%zu\n", __func__, xfer_size);
 
 		/* Check remaining length and change data width if needed. */
-		if (!((src_addr | dst_addr | xfer_size) & 7)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_DWORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: double word\n", __func__);
-		} else if (!((src_addr | dst_addr | xfer_size)  & 3)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_WORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: word\n", __func__);
-		} else if (!((src_addr | dst_addr | xfer_size) & 1)) {
-			dwidth = AT_XDMAC_CC_DWIDTH_HALFWORD;
-			dev_dbg(chan2dev(chan), "%s: dwidth: half word\n", __func__);
-		} else if ((src_addr | dst_addr | xfer_size) & 1) {
-			dwidth = AT_XDMAC_CC_DWIDTH_BYTE;
-			dev_dbg(chan2dev(chan), "%s: dwidth: byte\n", __func__);
-		}
+		dwidth = at_xdmac_align_width(chan,
+					      src_addr | dst_addr | xfer_size);
+		chan_cc &= ~AT_XDMAC_CC_DWIDTH_MASK;
 		chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
 
 		ublen = xfer_size >> dwidth;
@@ -885,7 +1100,6 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV2
 			| AT_XDMAC_MBR_UBC_NDEN
 			| AT_XDMAC_MBR_UBC_NSEN
-			| (remaining_size ? AT_XDMAC_MBR_UBC_NDE : 0)
 			| ublen;
 		desc->lld.mbr_cfg = chan_cc;
 
@@ -894,12 +1108,8 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 			 __func__, &desc->lld.mbr_sa, &desc->lld.mbr_da, desc->lld.mbr_ubc, desc->lld.mbr_cfg);
 
 		/* Chain lld. */
-		if (prev) {
-			prev->lld.mbr_nda = desc->tx_dma_desc.phys;
-			dev_dbg(chan2dev(chan),
-				 "%s: chain lld: prev=0x%p, mbr_nda=0x%08x\n",
-				 __func__, prev, prev->lld.mbr_nda);
-		}
+		if (prev)
+			at_xdmac_queue_desc(chan, prev, desc);
 
 		prev = desc;
 		if (!first)
@@ -916,6 +1126,255 @@ at_xdmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	return &first->tx_dma_desc;
 }
 
+static struct at_xdmac_desc *at_xdmac_memset_create_desc(struct dma_chan *chan,
+							 struct at_xdmac_chan *atchan,
+							 dma_addr_t dst_addr,
+							 size_t len,
+							 int value)
+{
+	struct at_xdmac_desc	*desc;
+	unsigned long		flags;
+	size_t			ublen;
+	u32			dwidth;
+	/*
+	 * WARNING: The channel configuration is set here since there is no
+	 * dmaengine_slave_config call in this case. Moreover we don't know the
+	 * direction, it involves we can't dynamically set the source and dest
+	 * interface so we have to use the same one. Only interface 0 allows EBI
+	 * access. Hopefully we can access DDR through both ports (at least on
+	 * SAMA5D4x), so we can use the same interface for source and dest,
+	 * that solves the fact we don't know the direction.
+	 */
+	u32			chan_cc = AT_XDMAC_CC_DAM_UBS_AM
+					| AT_XDMAC_CC_SAM_INCREMENTED_AM
+					| AT_XDMAC_CC_DIF(0)
+					| AT_XDMAC_CC_SIF(0)
+					| AT_XDMAC_CC_MBSIZE_SIXTEEN
+					| AT_XDMAC_CC_MEMSET_HW_MODE
+					| AT_XDMAC_CC_TYPE_MEM_TRAN;
+
+	dwidth = at_xdmac_align_width(chan, dst_addr);
+
+	if (len >= (AT_XDMAC_MBR_UBC_UBLEN_MAX << dwidth)) {
+		dev_err(chan2dev(chan),
+			"%s: Transfer too large, aborting...\n",
+			__func__);
+		return NULL;
+	}
+
+	spin_lock_irqsave(&atchan->lock, flags);
+	desc = at_xdmac_get_desc(atchan);
+	spin_unlock_irqrestore(&atchan->lock, flags);
+	if (!desc) {
+		dev_err(chan2dev(chan), "can't get descriptor\n");
+		return NULL;
+	}
+
+	chan_cc |= AT_XDMAC_CC_DWIDTH(dwidth);
+
+	ublen = len >> dwidth;
+
+	desc->lld.mbr_da = dst_addr;
+	desc->lld.mbr_ds = value;
+	desc->lld.mbr_ubc = AT_XDMAC_MBR_UBC_NDV3
+		| AT_XDMAC_MBR_UBC_NDEN
+		| AT_XDMAC_MBR_UBC_NSEN
+		| ublen;
+	desc->lld.mbr_cfg = chan_cc;
+
+	dev_dbg(chan2dev(chan),
+		"%s: lld: mbr_da=%pad, mbr_ds=%pad, mbr_ubc=0x%08x, mbr_cfg=0x%08x\n",
+		__func__, &desc->lld.mbr_da, &desc->lld.mbr_ds, desc->lld.mbr_ubc,
+		desc->lld.mbr_cfg);
+
+	return desc;
+}
+
+struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+			 size_t len, unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*desc;
+
+	dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+		__func__, &dest, len, value, flags);
+
+	if (unlikely(!len))
+		return NULL;
+
+	desc = at_xdmac_memset_create_desc(chan, atchan, dest, len, value);
+	list_add_tail(&desc->desc_node, &desc->descs_list);
+
+	desc->tx_dma_desc.cookie = -EBUSY;
+	desc->tx_dma_desc.flags = flags;
+	desc->xfer_size = len;
+
+	return &desc->tx_dma_desc;
+}
+
+static struct dma_async_tx_descriptor *
+at_xdmac_prep_dma_memset_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			    unsigned int sg_len, int value,
+			    unsigned long flags)
+{
+	struct at_xdmac_chan	*atchan = to_at_xdmac_chan(chan);
+	struct at_xdmac_desc	*desc, *pdesc = NULL,
+				*ppdesc = NULL, *first = NULL;
+	struct scatterlist	*sg, *psg = NULL, *ppsg = NULL;
+	size_t			stride = 0, pstride = 0, len = 0;
+	int			i;
+
+	if (!sgl)
+		return NULL;
+
+	dev_dbg(chan2dev(chan), "%s: sg_len=%d, value=0x%x, flags=0x%lx\n",
+		__func__, sg_len, value, flags);
+
+	/* Prepare descriptors. */
+	for_each_sg(sgl, sg, sg_len, i) {
+		dev_dbg(chan2dev(chan), "%s: dest=%pad, len=%d, pattern=0x%x, flags=0x%lx\n",
+			__func__, &sg_dma_address(sg), sg_dma_len(sg),
+			value, flags);
+		desc = at_xdmac_memset_create_desc(chan, atchan,
+						   sg_dma_address(sg),
+						   sg_dma_len(sg),
+						   value);
+		if (!desc && first)
+			list_splice_init(&first->descs_list,
+					 &atchan->free_descs_list);
+
+		if (!first)
+			first = desc;
+
+		/* Update our strides */
+		pstride = stride;
+		if (psg)
+			stride = sg_dma_address(sg) -
+				(sg_dma_address(psg) + sg_dma_len(psg));
+
+		/*
+		 * The scatterlist API gives us only the address and
+		 * length of each elements.
+		 *
+		 * Unfortunately, we don't have the stride, which we
+		 * will need to compute.
+		 *
+		 * That make us end up in a situation like this one:
+		 *    len    stride    len    stride    len
+		 * +-------+        +-------+        +-------+
+		 * |  N-2  |        |  N-1  |        |   N   |
+		 * +-------+        +-------+        +-------+
+		 *
+		 * We need all these three elements (N-2, N-1 and N)
+		 * to actually take the decision on whether we need to
+		 * queue N-1 or reuse N-2.
+		 *
+		 * We will only consider N if it is the last element.
+		 */
+		if (ppdesc && pdesc) {
+			if ((stride == pstride) &&
+			    (sg_dma_len(ppsg) == sg_dma_len(psg))) {
+				dev_dbg(chan2dev(chan),
+					"%s: desc 0x%p can be merged with desc 0x%p\n",
+					__func__, pdesc, ppdesc);
+
+				/*
+				 * Increment the block count of the
+				 * N-2 descriptor
+				 */
+				at_xdmac_increment_block_count(chan, ppdesc);
+				ppdesc->lld.mbr_dus = stride;
+
+				/*
+				 * Put back the N-1 descriptor in the
+				 * free descriptor list
+				 */
+				list_add_tail(&pdesc->desc_node,
+					      &atchan->free_descs_list);
+
+				/*
+				 * Make our N-1 descriptor pointer
+				 * point to the N-2 since they were
+				 * actually merged.
+				 */
+				pdesc = ppdesc;
+
+			/*
+			 * Rule out the case where we don't have
+			 * pstride computed yet (our second sg
+			 * element)
+			 *
+			 * We also want to catch the case where there
+			 * would be a negative stride,
+			 */
+			} else if (pstride ||
+				   sg_dma_address(sg) < sg_dma_address(psg)) {
+				/*
+				 * Queue the N-1 descriptor after the
+				 * N-2
+				 */
+				at_xdmac_queue_desc(chan, ppdesc, pdesc);
+
+				/*
+				 * Add the N-1 descriptor to the list
+				 * of the descriptors used for this
+				 * transfer
+				 */
+				list_add_tail(&desc->desc_node,
+					      &first->descs_list);
+				dev_dbg(chan2dev(chan),
+					"%s: add desc 0x%p to descs_list 0x%p\n",
+					__func__, desc, first);
+			}
+		}
+
+		/*
+		 * If we are the last element, just see if we have the
+		 * same size than the previous element.
+		 *
+		 * If so, we can merge it with the previous descriptor
+		 * since we don't care about the stride anymore.
+		 */
+		if ((i == (sg_len - 1)) &&
+		    sg_dma_len(psg) == sg_dma_len(sg)) {
+			dev_dbg(chan2dev(chan),
+				"%s: desc 0x%p can be merged with desc 0x%p\n",
+				__func__, desc, pdesc);
+
+			/*
+			 * Increment the block count of the N-1
+			 * descriptor
+			 */
+			at_xdmac_increment_block_count(chan, pdesc);
+			pdesc->lld.mbr_dus = stride;
+
+			/*
+			 * Put back the N descriptor in the free
+			 * descriptor list
+			 */
+			list_add_tail(&desc->desc_node,
+				      &atchan->free_descs_list);
+		}
+
+		/* Update our descriptors */
+		ppdesc = pdesc;
+		pdesc = desc;
+
+		/* Update our scatter pointers */
+		ppsg = psg;
+		psg = sg;
+
+		len += sg_dma_len(sg);
+	}
+
+	first->tx_dma_desc.cookie = -EBUSY;
+	first->tx_dma_desc.flags = flags;
+	first->xfer_size = len;
+
+	return &first->tx_dma_desc;
+}
+
 static enum dma_status
 at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		struct dma_tx_state *txstate)
@@ -925,8 +1384,8 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	struct at_xdmac_desc	*desc, *_desc;
 	struct list_head	*descs_list;
 	enum dma_status		ret;
-	int			residue;
-	u32			cur_nda, mask, value;
+	int			residue, retry;
+	u32			cur_nda, check_nda, cur_ubc, mask, value;
 	u8			dwidth = 0;
 	unsigned long		flags;
 
@@ -963,7 +1422,42 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 			cpu_relax();
 	}
 
+	/*
+	 * When processing the residue, we need to read two registers but we
+	 * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
+	 * we stand in the descriptor list and AT_XDMAC_CUBC is used
+	 * to know how many data are remaining for the current descriptor.
+	 * Since the dma channel is not paused to not loose data, between the
+	 * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
+	 * descriptor.
+	 * For that reason, after reading AT_XDMAC_CUBC, we check if we are
+	 * still using the same descriptor by reading a second time
+	 * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
+	 * read again AT_XDMAC_CUBC.
+	 * Memory barriers are used to ensure the read order of the registers.
+	 * A max number of retries is set because unlikely it can never ends if
+	 * we are transferring a lot of data with small buffers.
+	 */
 	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+	rmb();
+	cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
+		rmb();
+		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+
+		if (likely(cur_nda == check_nda))
+			break;
+
+		cur_nda = check_nda;
+		rmb();
+		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+	}
+
+	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
+		ret = DMA_ERROR;
+		goto spin_unlock;
+	}
+
 	/*
 	 * Remove size of all microblocks already transferred and the current
 	 * one. Then add the remaining size to transfer of the current
@@ -976,7 +1470,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 		if ((desc->lld.mbr_nda & 0xfffffffc) == cur_nda)
 			break;
 	}
-	residue += at_xdmac_chan_read(atchan, AT_XDMAC_CUBC) << dwidth;
+	residue += cur_ubc << dwidth;
 
 	dma_set_residue(txstate, residue);
 
@@ -1230,6 +1724,7 @@ static int at_xdmac_device_terminate_all(struct dma_chan *chan)
 	list_for_each_entry_safe(desc, _desc, &atchan->xfers_list, xfer_node)
 		at_xdmac_remove_xfer(atchan, desc);
 
+	clear_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
 	clear_bit(AT_XDMAC_CHAN_IS_CYCLIC, &atchan->status);
 	spin_unlock_irqrestore(&atchan->lock, flags);
 
@@ -1362,6 +1857,8 @@ static int atmel_xdmac_resume(struct device *dev)
 		atchan = to_at_xdmac_chan(chan);
 		at_xdmac_chan_write(atchan, AT_XDMAC_CC, atchan->save_cc);
 		if (at_xdmac_chan_is_cyclic(atchan)) {
+			if (at_xdmac_chan_is_paused(atchan))
+				at_xdmac_device_resume(chan);
 			at_xdmac_chan_write(atchan, AT_XDMAC_CNDA, atchan->save_cnda);
 			at_xdmac_chan_write(atchan, AT_XDMAC_CNDC, atchan->save_cndc);
 			at_xdmac_chan_write(atchan, AT_XDMAC_CIE, atchan->save_cim);
@@ -1446,7 +1943,10 @@ static int at_xdmac_probe(struct platform_device *pdev)
 	}
 
 	dma_cap_set(DMA_CYCLIC, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_INTERLEAVE, atxdmac->dma.cap_mask);
 	dma_cap_set(DMA_MEMCPY, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_MEMSET, atxdmac->dma.cap_mask);
+	dma_cap_set(DMA_MEMSET_SG, atxdmac->dma.cap_mask);
 	dma_cap_set(DMA_SLAVE, atxdmac->dma.cap_mask);
 	/*
 	 * Without DMA_PRIVATE the driver is not able to allocate more than
@@ -1459,7 +1959,10 @@ static int at_xdmac_probe(struct platform_device *pdev)
 	atxdmac->dma.device_tx_status			= at_xdmac_tx_status;
 	atxdmac->dma.device_issue_pending		= at_xdmac_issue_pending;
 	atxdmac->dma.device_prep_dma_cyclic		= at_xdmac_prep_dma_cyclic;
+	atxdmac->dma.device_prep_interleaved_dma	= at_xdmac_prep_interleaved;
 	atxdmac->dma.device_prep_dma_memcpy		= at_xdmac_prep_dma_memcpy;
+	atxdmac->dma.device_prep_dma_memset		= at_xdmac_prep_dma_memset;
+	atxdmac->dma.device_prep_dma_memset_sg		= at_xdmac_prep_dma_memset_sg;
 	atxdmac->dma.device_prep_slave_sg		= at_xdmac_prep_slave_sg;
 	atxdmac->dma.device_config			= at_xdmac_device_config;
 	atxdmac->dma.device_pause			= at_xdmac_device_pause;
diff --git a/kernel/drivers/dma/bcm2835-dma.c b/kernel/drivers/dma/bcm2835-dma.c
index c92d6a70c..996c4b00d 100644
--- a/kernel/drivers/dma/bcm2835-dma.c
+++ b/kernel/drivers/dma/bcm2835-dma.c
@@ -31,6 +31,7 @@
  */
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -62,6 +63,11 @@ struct bcm2835_dma_cb {
 	uint32_t pad[2];
 };
 
+struct bcm2835_cb_entry {
+	struct bcm2835_dma_cb *cb;
+	dma_addr_t paddr;
+};
+
 struct bcm2835_chan {
 	struct virt_dma_chan vc;
 	struct list_head node;
@@ -72,18 +78,18 @@ struct bcm2835_chan {
 
 	int ch;
 	struct bcm2835_desc *desc;
+	struct dma_pool *cb_pool;
 
 	void __iomem *chan_base;
 	int irq_number;
 };
 
 struct bcm2835_desc {
+	struct bcm2835_chan *c;
 	struct virt_dma_desc vd;
 	enum dma_transfer_direction dir;
 
-	unsigned int control_block_size;
-	struct bcm2835_dma_cb *control_block_base;
-	dma_addr_t control_block_base_phys;
+	struct bcm2835_cb_entry *cb_list;
 
 	unsigned int frames;
 	size_t size;
@@ -143,10 +149,13 @@ static inline struct bcm2835_desc *to_bcm2835_dma_desc(
 static void bcm2835_dma_desc_free(struct virt_dma_desc *vd)
 {
 	struct bcm2835_desc *desc = container_of(vd, struct bcm2835_desc, vd);
-	dma_free_coherent(desc->vd.tx.chan->device->dev,
-			desc->control_block_size,
-			desc->control_block_base,
-			desc->control_block_base_phys);
+	int i;
+
+	for (i = 0; i < desc->frames; i++)
+		dma_pool_free(desc->c->cb_pool, desc->cb_list[i].cb,
+			      desc->cb_list[i].paddr);
+
+	kfree(desc->cb_list);
 	kfree(desc);
 }
 
@@ -199,7 +208,7 @@ static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
 
 	c->desc = d = to_bcm2835_dma_desc(&vd->tx);
 
-	writel(d->control_block_base_phys, c->chan_base + BCM2835_DMA_ADDR);
+	writel(d->cb_list[0].paddr, c->chan_base + BCM2835_DMA_ADDR);
 	writel(BCM2835_DMA_ACTIVE, c->chan_base + BCM2835_DMA_CS);
 }
 
@@ -232,9 +241,16 @@ static irqreturn_t bcm2835_dma_callback(int irq, void *data)
 static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
+	struct device *dev = c->vc.chan.device->dev;
+
+	dev_dbg(dev, "Allocating DMA channel %d\n", c->ch);
 
-	dev_dbg(c->vc.chan.device->dev,
-			"Allocating DMA channel %d\n", c->ch);
+	c->cb_pool = dma_pool_create(dev_name(dev), dev,
+				     sizeof(struct bcm2835_dma_cb), 0, 0);
+	if (!c->cb_pool) {
+		dev_err(dev, "unable to allocate descriptor pool\n");
+		return -ENOMEM;
+	}
 
 	return request_irq(c->irq_number,
 			bcm2835_dma_callback, 0, "DMA IRQ", c);
@@ -246,6 +262,7 @@ static void bcm2835_dma_free_chan_resources(struct dma_chan *chan)
 
 	vchan_free_chan_resources(&c->vc);
 	free_irq(c->irq_number, c);
+	dma_pool_destroy(c->cb_pool);
 
 	dev_dbg(c->vc.chan.device->dev, "Freeing DMA channel %u\n", c->ch);
 }
@@ -261,8 +278,7 @@ static size_t bcm2835_dma_desc_size_pos(struct bcm2835_desc *d, dma_addr_t addr)
 	size_t size;
 
 	for (size = i = 0; i < d->frames; i++) {
-		struct bcm2835_dma_cb *control_block =
-			&d->control_block_base[i];
+		struct bcm2835_dma_cb *control_block = d->cb_list[i].cb;
 		size_t this_size = control_block->length;
 		dma_addr_t dma;
 
@@ -343,6 +359,7 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 	dma_addr_t dev_addr;
 	unsigned int es, sync_type;
 	unsigned int frame;
+	int i;
 
 	/* Grab configuration */
 	if (!is_slave_direction(direction)) {
@@ -374,27 +391,31 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 	if (!d)
 		return NULL;
 
+	d->c = c;
 	d->dir = direction;
 	d->frames = buf_len / period_len;
 
-	/* Allocate memory for control blocks */
-	d->control_block_size = d->frames * sizeof(struct bcm2835_dma_cb);
-	d->control_block_base = dma_zalloc_coherent(chan->device->dev,
-			d->control_block_size, &d->control_block_base_phys,
-			GFP_NOWAIT);
-
-	if (!d->control_block_base) {
+	d->cb_list = kcalloc(d->frames, sizeof(*d->cb_list), GFP_KERNEL);
+	if (!d->cb_list) {
 		kfree(d);
 		return NULL;
 	}
+	/* Allocate memory for control blocks */
+	for (i = 0; i < d->frames; i++) {
+		struct bcm2835_cb_entry *cb_entry = &d->cb_list[i];
+
+		cb_entry->cb = dma_pool_zalloc(c->cb_pool, GFP_ATOMIC,
+					       &cb_entry->paddr);
+		if (!cb_entry->cb)
+			goto error_cb;
+	}
 
 	/*
 	 * Iterate over all frames, create a control block
 	 * for each frame and link them together.
 	 */
 	for (frame = 0; frame < d->frames; frame++) {
-		struct bcm2835_dma_cb *control_block =
-			&d->control_block_base[frame];
+		struct bcm2835_dma_cb *control_block = d->cb_list[frame].cb;
 
 		/* Setup adresses */
 		if (d->dir == DMA_DEV_TO_MEM) {
@@ -428,12 +449,21 @@ static struct dma_async_tx_descriptor *bcm2835_dma_prep_dma_cyclic(
 		 * This DMA engine driver currently only supports cyclic DMA.
 		 * Therefore, wrap around at number of frames.
 		 */
-		control_block->next = d->control_block_base_phys +
-			sizeof(struct bcm2835_dma_cb)
-			* ((frame + 1) % d->frames);
+		control_block->next = d->cb_list[((frame + 1) % d->frames)].paddr;
 	}
 
 	return vchan_tx_prep(&c->vc, &d->vd, flags);
+error_cb:
+	i--;
+	for (; i >= 0; i--) {
+		struct bcm2835_cb_entry *cb_entry = &d->cb_list[i];
+
+		dma_pool_free(c->cb_pool, cb_entry->cb, cb_entry->paddr);
+	}
+
+	kfree(d->cb_list);
+	kfree(d);
+	return NULL;
 }
 
 static int bcm2835_dma_slave_config(struct dma_chan *chan,
diff --git a/kernel/drivers/dma/coh901318.c b/kernel/drivers/dma/coh901318.c
index fd22dd369..c340ca9bd 100644
--- a/kernel/drivers/dma/coh901318.c
+++ b/kernel/drivers/dma/coh901318.c
@@ -2730,7 +2730,7 @@ static int __init coh901318_probe(struct platform_device *pdev)
 	 * This controller can only access address at even 32bit boundaries,
 	 * i.e. 2^2
 	 */
-	base->dma_memcpy.copy_align = 2;
+	base->dma_memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES;
 	err = dma_async_device_register(&base->dma_memcpy);
 
 	if (err)
diff --git a/kernel/drivers/dma/dma-axi-dmac.c b/kernel/drivers/dma/dma-axi-dmac.c
new file mode 100644
index 000000000..5b2395e7e
--- /dev/null
+++ b/kernel/drivers/dma/dma-axi-dmac.c
@@ -0,0 +1,691 @@
+/*
+ * Driver for the Analog Devices AXI-DMAC core
+ *
+ * Copyright 2013-2015 Analog Devices Inc.
+ *  Author: Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * Licensed under the GPL-2.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <dt-bindings/dma/axi-dmac.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+/*
+ * The AXI-DMAC is a soft IP core that is used in FPGA designs. The core has
+ * various instantiation parameters which decided the exact feature set support
+ * by the core.
+ *
+ * Each channel of the core has a source interface and a destination interface.
+ * The number of channels and the type of the channel interfaces is selected at
+ * configuration time. A interface can either be a connected to a central memory
+ * interconnect, which allows access to system memory, or it can be connected to
+ * a dedicated bus which is directly connected to a data port on a peripheral.
+ * Given that those are configuration options of the core that are selected when
+ * it is instantiated this means that they can not be changed by software at
+ * runtime. By extension this means that each channel is uni-directional. It can
+ * either be device to memory or memory to device, but not both. Also since the
+ * device side is a dedicated data bus only connected to a single peripheral
+ * there is no address than can or needs to be configured for the device side.
+ */
+
+#define AXI_DMAC_REG_IRQ_MASK		0x80
+#define AXI_DMAC_REG_IRQ_PENDING	0x84
+#define AXI_DMAC_REG_IRQ_SOURCE		0x88
+
+#define AXI_DMAC_REG_CTRL		0x400
+#define AXI_DMAC_REG_TRANSFER_ID	0x404
+#define AXI_DMAC_REG_START_TRANSFER	0x408
+#define AXI_DMAC_REG_FLAGS		0x40c
+#define AXI_DMAC_REG_DEST_ADDRESS	0x410
+#define AXI_DMAC_REG_SRC_ADDRESS	0x414
+#define AXI_DMAC_REG_X_LENGTH		0x418
+#define AXI_DMAC_REG_Y_LENGTH		0x41c
+#define AXI_DMAC_REG_DEST_STRIDE	0x420
+#define AXI_DMAC_REG_SRC_STRIDE		0x424
+#define AXI_DMAC_REG_TRANSFER_DONE	0x428
+#define AXI_DMAC_REG_ACTIVE_TRANSFER_ID 0x42c
+#define AXI_DMAC_REG_STATUS		0x430
+#define AXI_DMAC_REG_CURRENT_SRC_ADDR	0x434
+#define AXI_DMAC_REG_CURRENT_DEST_ADDR	0x438
+
+#define AXI_DMAC_CTRL_ENABLE		BIT(0)
+#define AXI_DMAC_CTRL_PAUSE		BIT(1)
+
+#define AXI_DMAC_IRQ_SOT		BIT(0)
+#define AXI_DMAC_IRQ_EOT		BIT(1)
+
+#define AXI_DMAC_FLAG_CYCLIC		BIT(0)
+
+struct axi_dmac_sg {
+	dma_addr_t src_addr;
+	dma_addr_t dest_addr;
+	unsigned int x_len;
+	unsigned int y_len;
+	unsigned int dest_stride;
+	unsigned int src_stride;
+	unsigned int id;
+};
+
+struct axi_dmac_desc {
+	struct virt_dma_desc vdesc;
+	bool cyclic;
+
+	unsigned int num_submitted;
+	unsigned int num_completed;
+	unsigned int num_sgs;
+	struct axi_dmac_sg sg[];
+};
+
+struct axi_dmac_chan {
+	struct virt_dma_chan vchan;
+
+	struct axi_dmac_desc *next_desc;
+	struct list_head active_descs;
+	enum dma_transfer_direction direction;
+
+	unsigned int src_width;
+	unsigned int dest_width;
+	unsigned int src_type;
+	unsigned int dest_type;
+
+	unsigned int max_length;
+	unsigned int align_mask;
+
+	bool hw_cyclic;
+	bool hw_2d;
+};
+
+struct axi_dmac {
+	void __iomem *base;
+	int irq;
+
+	struct clk *clk;
+
+	struct dma_device dma_dev;
+	struct axi_dmac_chan chan;
+
+	struct device_dma_parameters dma_parms;
+};
+
+static struct axi_dmac *chan_to_axi_dmac(struct axi_dmac_chan *chan)
+{
+	return container_of(chan->vchan.chan.device, struct axi_dmac,
+		dma_dev);
+}
+
+static struct axi_dmac_chan *to_axi_dmac_chan(struct dma_chan *c)
+{
+	return container_of(c, struct axi_dmac_chan, vchan.chan);
+}
+
+static struct axi_dmac_desc *to_axi_dmac_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct axi_dmac_desc, vdesc);
+}
+
+static void axi_dmac_write(struct axi_dmac *axi_dmac, unsigned int reg,
+	unsigned int val)
+{
+	writel(val, axi_dmac->base + reg);
+}
+
+static int axi_dmac_read(struct axi_dmac *axi_dmac, unsigned int reg)
+{
+	return readl(axi_dmac->base + reg);
+}
+
+static int axi_dmac_src_is_mem(struct axi_dmac_chan *chan)
+{
+	return chan->src_type == AXI_DMAC_BUS_TYPE_AXI_MM;
+}
+
+static int axi_dmac_dest_is_mem(struct axi_dmac_chan *chan)
+{
+	return chan->dest_type == AXI_DMAC_BUS_TYPE_AXI_MM;
+}
+
+static bool axi_dmac_check_len(struct axi_dmac_chan *chan, unsigned int len)
+{
+	if (len == 0 || len > chan->max_length)
+		return false;
+	if ((len & chan->align_mask) != 0) /* Not aligned */
+		return false;
+	return true;
+}
+
+static bool axi_dmac_check_addr(struct axi_dmac_chan *chan, dma_addr_t addr)
+{
+	if ((addr & chan->align_mask) != 0) /* Not aligned */
+		return false;
+	return true;
+}
+
+static void axi_dmac_start_transfer(struct axi_dmac_chan *chan)
+{
+	struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+	struct virt_dma_desc *vdesc;
+	struct axi_dmac_desc *desc;
+	struct axi_dmac_sg *sg;
+	unsigned int flags = 0;
+	unsigned int val;
+
+	val = axi_dmac_read(dmac, AXI_DMAC_REG_START_TRANSFER);
+	if (val) /* Queue is full, wait for the next SOT IRQ */
+		return;
+
+	desc = chan->next_desc;
+
+	if (!desc) {
+		vdesc = vchan_next_desc(&chan->vchan);
+		if (!vdesc)
+			return;
+		list_move_tail(&vdesc->node, &chan->active_descs);
+		desc = to_axi_dmac_desc(vdesc);
+	}
+	sg = &desc->sg[desc->num_submitted];
+
+	desc->num_submitted++;
+	if (desc->num_submitted == desc->num_sgs)
+		chan->next_desc = NULL;
+	else
+		chan->next_desc = desc;
+
+	sg->id = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_ID);
+
+	if (axi_dmac_dest_is_mem(chan)) {
+		axi_dmac_write(dmac, AXI_DMAC_REG_DEST_ADDRESS, sg->dest_addr);
+		axi_dmac_write(dmac, AXI_DMAC_REG_DEST_STRIDE, sg->dest_stride);
+	}
+
+	if (axi_dmac_src_is_mem(chan)) {
+		axi_dmac_write(dmac, AXI_DMAC_REG_SRC_ADDRESS, sg->src_addr);
+		axi_dmac_write(dmac, AXI_DMAC_REG_SRC_STRIDE, sg->src_stride);
+	}
+
+	/*
+	 * If the hardware supports cyclic transfers and there is no callback to
+	 * call, enable hw cyclic mode to avoid unnecessary interrupts.
+	 */
+	if (chan->hw_cyclic && desc->cyclic && !desc->vdesc.tx.callback)
+		flags |= AXI_DMAC_FLAG_CYCLIC;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_X_LENGTH, sg->x_len - 1);
+	axi_dmac_write(dmac, AXI_DMAC_REG_Y_LENGTH, sg->y_len - 1);
+	axi_dmac_write(dmac, AXI_DMAC_REG_FLAGS, flags);
+	axi_dmac_write(dmac, AXI_DMAC_REG_START_TRANSFER, 1);
+}
+
+static struct axi_dmac_desc *axi_dmac_active_desc(struct axi_dmac_chan *chan)
+{
+	return list_first_entry_or_null(&chan->active_descs,
+		struct axi_dmac_desc, vdesc.node);
+}
+
+static void axi_dmac_transfer_done(struct axi_dmac_chan *chan,
+	unsigned int completed_transfers)
+{
+	struct axi_dmac_desc *active;
+	struct axi_dmac_sg *sg;
+
+	active = axi_dmac_active_desc(chan);
+	if (!active)
+		return;
+
+	if (active->cyclic) {
+		vchan_cyclic_callback(&active->vdesc);
+	} else {
+		do {
+			sg = &active->sg[active->num_completed];
+			if (!(BIT(sg->id) & completed_transfers))
+				break;
+			active->num_completed++;
+			if (active->num_completed == active->num_sgs) {
+				list_del(&active->vdesc.node);
+				vchan_cookie_complete(&active->vdesc);
+				active = axi_dmac_active_desc(chan);
+			}
+		} while (active);
+	}
+}
+
+static irqreturn_t axi_dmac_interrupt_handler(int irq, void *devid)
+{
+	struct axi_dmac *dmac = devid;
+	unsigned int pending;
+
+	pending = axi_dmac_read(dmac, AXI_DMAC_REG_IRQ_PENDING);
+	axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_PENDING, pending);
+
+	spin_lock(&dmac->chan.vchan.lock);
+	/* One or more transfers have finished */
+	if (pending & AXI_DMAC_IRQ_EOT) {
+		unsigned int completed;
+
+		completed = axi_dmac_read(dmac, AXI_DMAC_REG_TRANSFER_DONE);
+		axi_dmac_transfer_done(&dmac->chan, completed);
+	}
+	/* Space has become available in the descriptor queue */
+	if (pending & AXI_DMAC_IRQ_SOT)
+		axi_dmac_start_transfer(&dmac->chan);
+	spin_unlock(&dmac->chan.vchan.lock);
+
+	return IRQ_HANDLED;
+}
+
+static int axi_dmac_terminate_all(struct dma_chan *c)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, 0);
+	chan->next_desc = NULL;
+	vchan_get_all_descriptors(&chan->vchan, &head);
+	list_splice_tail_init(&chan->active_descs, &head);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&chan->vchan, &head);
+
+	return 0;
+}
+
+static void axi_dmac_issue_pending(struct dma_chan *c)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac *dmac = chan_to_axi_dmac(chan);
+	unsigned long flags;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_CTRL, AXI_DMAC_CTRL_ENABLE);
+
+	spin_lock_irqsave(&chan->vchan.lock, flags);
+	if (vchan_issue_pending(&chan->vchan))
+		axi_dmac_start_transfer(chan);
+	spin_unlock_irqrestore(&chan->vchan.lock, flags);
+}
+
+static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs)
+{
+	struct axi_dmac_desc *desc;
+
+	desc = kzalloc(sizeof(struct axi_dmac_desc) +
+		sizeof(struct axi_dmac_sg) * num_sgs, GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->num_sgs = num_sgs;
+
+	return desc;
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_slave_sg(
+	struct dma_chan *c, struct scatterlist *sgl,
+	unsigned int sg_len, enum dma_transfer_direction direction,
+	unsigned long flags, void *context)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	if (direction != chan->direction)
+		return NULL;
+
+	desc = axi_dmac_alloc_desc(sg_len);
+	if (!desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		if (!axi_dmac_check_addr(chan, sg_dma_address(sg)) ||
+		    !axi_dmac_check_len(chan, sg_dma_len(sg))) {
+			kfree(desc);
+			return NULL;
+		}
+
+		if (direction == DMA_DEV_TO_MEM)
+			desc->sg[i].dest_addr = sg_dma_address(sg);
+		else
+			desc->sg[i].src_addr = sg_dma_address(sg);
+		desc->sg[i].x_len = sg_dma_len(sg);
+		desc->sg[i].y_len = 1;
+	}
+
+	desc->cyclic = false;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_dma_cyclic(
+	struct dma_chan *c, dma_addr_t buf_addr, size_t buf_len,
+	size_t period_len, enum dma_transfer_direction direction,
+	unsigned long flags)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac_desc *desc;
+	unsigned int num_periods, i;
+
+	if (direction != chan->direction)
+		return NULL;
+
+	if (!axi_dmac_check_len(chan, buf_len) ||
+	    !axi_dmac_check_addr(chan, buf_addr))
+		return NULL;
+
+	if (period_len == 0 || buf_len % period_len)
+		return NULL;
+
+	num_periods = buf_len / period_len;
+
+	desc = axi_dmac_alloc_desc(num_periods);
+	if (!desc)
+		return NULL;
+
+	for (i = 0; i < num_periods; i++) {
+		if (direction == DMA_DEV_TO_MEM)
+			desc->sg[i].dest_addr = buf_addr;
+		else
+			desc->sg[i].src_addr = buf_addr;
+		desc->sg[i].x_len = period_len;
+		desc->sg[i].y_len = 1;
+		buf_addr += period_len;
+	}
+
+	desc->cyclic = true;
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved(
+	struct dma_chan *c, struct dma_interleaved_template *xt,
+	unsigned long flags)
+{
+	struct axi_dmac_chan *chan = to_axi_dmac_chan(c);
+	struct axi_dmac_desc *desc;
+	size_t dst_icg, src_icg;
+
+	if (xt->frame_size != 1)
+		return NULL;
+
+	if (xt->dir != chan->direction)
+		return NULL;
+
+	if (axi_dmac_src_is_mem(chan)) {
+		if (!xt->src_inc || !axi_dmac_check_addr(chan, xt->src_start))
+			return NULL;
+	}
+
+	if (axi_dmac_dest_is_mem(chan)) {
+		if (!xt->dst_inc || !axi_dmac_check_addr(chan, xt->dst_start))
+			return NULL;
+	}
+
+	dst_icg = dmaengine_get_dst_icg(xt, &xt->sgl[0]);
+	src_icg = dmaengine_get_src_icg(xt, &xt->sgl[0]);
+
+	if (chan->hw_2d) {
+		if (!axi_dmac_check_len(chan, xt->sgl[0].size) ||
+		    !axi_dmac_check_len(chan, xt->numf))
+			return NULL;
+		if (xt->sgl[0].size + dst_icg > chan->max_length ||
+		    xt->sgl[0].size + src_icg > chan->max_length)
+			return NULL;
+	} else {
+		if (dst_icg != 0 || src_icg != 0)
+			return NULL;
+		if (chan->max_length / xt->sgl[0].size < xt->numf)
+			return NULL;
+		if (!axi_dmac_check_len(chan, xt->sgl[0].size * xt->numf))
+			return NULL;
+	}
+
+	desc = axi_dmac_alloc_desc(1);
+	if (!desc)
+		return NULL;
+
+	if (axi_dmac_src_is_mem(chan)) {
+		desc->sg[0].src_addr = xt->src_start;
+		desc->sg[0].src_stride = xt->sgl[0].size + src_icg;
+	}
+
+	if (axi_dmac_dest_is_mem(chan)) {
+		desc->sg[0].dest_addr = xt->dst_start;
+		desc->sg[0].dest_stride = xt->sgl[0].size + dst_icg;
+	}
+
+	if (chan->hw_2d) {
+		desc->sg[0].x_len = xt->sgl[0].size;
+		desc->sg[0].y_len = xt->numf;
+	} else {
+		desc->sg[0].x_len = xt->sgl[0].size * xt->numf;
+		desc->sg[0].y_len = 1;
+	}
+
+	return vchan_tx_prep(&chan->vchan, &desc->vdesc, flags);
+}
+
+static void axi_dmac_free_chan_resources(struct dma_chan *c)
+{
+	vchan_free_chan_resources(to_virt_chan(c));
+}
+
+static void axi_dmac_desc_free(struct virt_dma_desc *vdesc)
+{
+	kfree(container_of(vdesc, struct axi_dmac_desc, vdesc));
+}
+
+/*
+ * The configuration stored in the devicetree matches the configuration
+ * parameters of the peripheral instance and allows the driver to know which
+ * features are implemented and how it should behave.
+ */
+static int axi_dmac_parse_chan_dt(struct device_node *of_chan,
+	struct axi_dmac_chan *chan)
+{
+	u32 val;
+	int ret;
+
+	ret = of_property_read_u32(of_chan, "reg", &val);
+	if (ret)
+		return ret;
+
+	/* We only support 1 channel for now */
+	if (val != 0)
+		return -EINVAL;
+
+	ret = of_property_read_u32(of_chan, "adi,source-bus-type", &val);
+	if (ret)
+		return ret;
+	if (val > AXI_DMAC_BUS_TYPE_FIFO)
+		return -EINVAL;
+	chan->src_type = val;
+
+	ret = of_property_read_u32(of_chan, "adi,destination-bus-type", &val);
+	if (ret)
+		return ret;
+	if (val > AXI_DMAC_BUS_TYPE_FIFO)
+		return -EINVAL;
+	chan->dest_type = val;
+
+	ret = of_property_read_u32(of_chan, "adi,source-bus-width", &val);
+	if (ret)
+		return ret;
+	chan->src_width = val / 8;
+
+	ret = of_property_read_u32(of_chan, "adi,destination-bus-width", &val);
+	if (ret)
+		return ret;
+	chan->dest_width = val / 8;
+
+	ret = of_property_read_u32(of_chan, "adi,length-width", &val);
+	if (ret)
+		return ret;
+
+	if (val >= 32)
+		chan->max_length = UINT_MAX;
+	else
+		chan->max_length = (1ULL << val) - 1;
+
+	chan->align_mask = max(chan->dest_width, chan->src_width) - 1;
+
+	if (axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan))
+		chan->direction = DMA_MEM_TO_MEM;
+	else if (!axi_dmac_dest_is_mem(chan) && axi_dmac_src_is_mem(chan))
+		chan->direction = DMA_MEM_TO_DEV;
+	else if (axi_dmac_dest_is_mem(chan) && !axi_dmac_src_is_mem(chan))
+		chan->direction = DMA_DEV_TO_MEM;
+	else
+		chan->direction = DMA_DEV_TO_DEV;
+
+	chan->hw_cyclic = of_property_read_bool(of_chan, "adi,cyclic");
+	chan->hw_2d = of_property_read_bool(of_chan, "adi,2d");
+
+	return 0;
+}
+
+static int axi_dmac_probe(struct platform_device *pdev)
+{
+	struct device_node *of_channels, *of_chan;
+	struct dma_device *dma_dev;
+	struct axi_dmac *dmac;
+	struct resource *res;
+	int ret;
+
+	dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+	if (!dmac)
+		return -ENOMEM;
+
+	dmac->irq = platform_get_irq(pdev, 0);
+	if (dmac->irq <= 0)
+		return -EINVAL;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dmac->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dmac->base))
+		return PTR_ERR(dmac->base);
+
+	dmac->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(dmac->clk))
+		return PTR_ERR(dmac->clk);
+
+	INIT_LIST_HEAD(&dmac->chan.active_descs);
+
+	of_channels = of_get_child_by_name(pdev->dev.of_node, "adi,channels");
+	if (of_channels == NULL)
+		return -ENODEV;
+
+	for_each_child_of_node(of_channels, of_chan) {
+		ret = axi_dmac_parse_chan_dt(of_chan, &dmac->chan);
+		if (ret) {
+			of_node_put(of_chan);
+			of_node_put(of_channels);
+			return -EINVAL;
+		}
+	}
+	of_node_put(of_channels);
+
+	pdev->dev.dma_parms = &dmac->dma_parms;
+	dma_set_max_seg_size(&pdev->dev, dmac->chan.max_length);
+
+	dma_dev = &dmac->dma_dev;
+	dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, dma_dev->cap_mask);
+	dma_dev->device_free_chan_resources = axi_dmac_free_chan_resources;
+	dma_dev->device_tx_status = dma_cookie_status;
+	dma_dev->device_issue_pending = axi_dmac_issue_pending;
+	dma_dev->device_prep_slave_sg = axi_dmac_prep_slave_sg;
+	dma_dev->device_prep_dma_cyclic = axi_dmac_prep_dma_cyclic;
+	dma_dev->device_prep_interleaved_dma = axi_dmac_prep_interleaved;
+	dma_dev->device_terminate_all = axi_dmac_terminate_all;
+	dma_dev->dev = &pdev->dev;
+	dma_dev->chancnt = 1;
+	dma_dev->src_addr_widths = BIT(dmac->chan.src_width);
+	dma_dev->dst_addr_widths = BIT(dmac->chan.dest_width);
+	dma_dev->directions = BIT(dmac->chan.direction);
+	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	dmac->chan.vchan.desc_free = axi_dmac_desc_free;
+	vchan_init(&dmac->chan.vchan, dma_dev);
+
+	ret = clk_prepare_enable(dmac->clk);
+	if (ret < 0)
+		return ret;
+
+	axi_dmac_write(dmac, AXI_DMAC_REG_IRQ_MASK, 0x00);
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto err_clk_disable;
+
+	ret = of_dma_controller_register(pdev->dev.of_node,
+		of_dma_xlate_by_chan_id, dma_dev);
+	if (ret)
+		goto err_unregister_device;
+
+	ret = request_irq(dmac->irq, axi_dmac_interrupt_handler, 0,
+		dev_name(&pdev->dev), dmac);
+	if (ret)
+		goto err_unregister_of;
+
+	platform_set_drvdata(pdev, dmac);
+
+	return 0;
+
+err_unregister_of:
+	of_dma_controller_free(pdev->dev.of_node);
+err_unregister_device:
+	dma_async_device_unregister(&dmac->dma_dev);
+err_clk_disable:
+	clk_disable_unprepare(dmac->clk);
+
+	return ret;
+}
+
+static int axi_dmac_remove(struct platform_device *pdev)
+{
+	struct axi_dmac *dmac = platform_get_drvdata(pdev);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	free_irq(dmac->irq, dmac);
+	tasklet_kill(&dmac->chan.vchan.task);
+	dma_async_device_unregister(&dmac->dma_dev);
+	clk_disable_unprepare(dmac->clk);
+
+	return 0;
+}
+
+static const struct of_device_id axi_dmac_of_match_table[] = {
+	{ .compatible = "adi,axi-dmac-1.00.a" },
+	{ },
+};
+
+static struct platform_driver axi_dmac_driver = {
+	.driver = {
+		.name = "dma-axi-dmac",
+		.of_match_table = axi_dmac_of_match_table,
+	},
+	.probe = axi_dmac_probe,
+	.remove = axi_dmac_remove,
+};
+module_platform_driver(axi_dmac_driver);
+
+MODULE_AUTHOR("Lars-Peter Clausen <lars@metafoo.de>");
+MODULE_DESCRIPTION("DMA controller driver for the AXI-DMAC controller");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/dma/dma-jz4780.c b/kernel/drivers/dma/dma-jz4780.c
index 26d2f0e09..dade7c47f 100644
--- a/kernel/drivers/dma/dma-jz4780.c
+++ b/kernel/drivers/dma/dma-jz4780.c
@@ -145,7 +145,8 @@ struct jz4780_dma_dev {
 	struct jz4780_dma_chan chan[JZ_DMA_NR_CHANNELS];
 };
 
-struct jz4780_dma_data {
+struct jz4780_dma_filter_data {
+	struct device_node *of_node;
 	uint32_t transfer_type;
 	int channel;
 };
@@ -214,11 +215,25 @@ static void jz4780_dma_desc_free(struct virt_dma_desc *vdesc)
 	kfree(desc);
 }
 
-static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord)
+static uint32_t jz4780_dma_transfer_size(unsigned long val, uint32_t *shift)
 {
-	*ord = ffs(val) - 1;
+	int ord = ffs(val) - 1;
 
-	switch (*ord) {
+	/*
+	 * 8 byte transfer sizes unsupported so fall back on 4. If it's larger
+	 * than the maximum, just limit it. It is perfectly safe to fall back
+	 * in this way since we won't exceed the maximum burst size supported
+	 * by the device, the only effect is reduced efficiency. This is better
+	 * than refusing to perform the request at all.
+	 */
+	if (ord == 3)
+		ord = 2;
+	else if (ord > 7)
+		ord = 7;
+
+	*shift = ord;
+
+	switch (ord) {
 	case 0:
 		return JZ_DMA_SIZE_1_BYTE;
 	case 1:
@@ -231,20 +246,17 @@ static uint32_t jz4780_dma_transfer_size(unsigned long val, int *ord)
 		return JZ_DMA_SIZE_32_BYTE;
 	case 6:
 		return JZ_DMA_SIZE_64_BYTE;
-	case 7:
-		return JZ_DMA_SIZE_128_BYTE;
 	default:
-		return -EINVAL;
+		return JZ_DMA_SIZE_128_BYTE;
 	}
 }
 
-static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
+static int jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
 	struct jz4780_dma_hwdesc *desc, dma_addr_t addr, size_t len,
 	enum dma_transfer_direction direction)
 {
 	struct dma_slave_config *config = &jzchan->config;
 	uint32_t width, maxburst, tsz;
-	int ord;
 
 	if (direction == DMA_MEM_TO_DEV) {
 		desc->dcm = JZ_DMA_DCM_SAI;
@@ -271,8 +283,8 @@ static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
 	 * divisible by the transfer size, and we must not use more than the
 	 * maximum burst specified by the user.
 	 */
-	tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst), &ord);
-	jzchan->transfer_shift = ord;
+	tsz = jz4780_dma_transfer_size(addr | len | (width * maxburst),
+				       &jzchan->transfer_shift);
 
 	switch (width) {
 	case DMA_SLAVE_BUSWIDTH_1_BYTE:
@@ -289,12 +301,14 @@ static uint32_t jz4780_dma_setup_hwdesc(struct jz4780_dma_chan *jzchan,
 	desc->dcm |= width << JZ_DMA_DCM_SP_SHIFT;
 	desc->dcm |= width << JZ_DMA_DCM_DP_SHIFT;
 
-	desc->dtc = len >> ord;
+	desc->dtc = len >> jzchan->transfer_shift;
+	return 0;
 }
 
 static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg(
 	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sg_len,
-	enum dma_transfer_direction direction, unsigned long flags)
+	enum dma_transfer_direction direction, unsigned long flags,
+	void *context)
 {
 	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
 	struct jz4780_dma_desc *desc;
@@ -307,12 +321,11 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_slave_sg(
 
 	for (i = 0; i < sg_len; i++) {
 		err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i],
-					sg_dma_address(&sgl[i]),
-					sg_dma_len(&sgl[i]),
-					direction);
+					      sg_dma_address(&sgl[i]),
+					      sg_dma_len(&sgl[i]),
+					      direction);
 		if (err < 0)
-			return ERR_PTR(err);
-
+			return NULL;
 
 		desc->desc[i].dcm |= JZ_DMA_DCM_TIE;
 
@@ -354,9 +367,9 @@ static struct dma_async_tx_descriptor *jz4780_dma_prep_dma_cyclic(
 
 	for (i = 0; i < periods; i++) {
 		err = jz4780_dma_setup_hwdesc(jzchan, &desc->desc[i], buf_addr,
-					period_len, direction);
+					      period_len, direction);
 		if (err < 0)
-			return ERR_PTR(err);
+			return NULL;
 
 		buf_addr += period_len;
 
@@ -390,15 +403,13 @@ struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy(
 	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
 	struct jz4780_dma_desc *desc;
 	uint32_t tsz;
-	int ord;
 
 	desc = jz4780_dma_desc_alloc(jzchan, 1, DMA_MEMCPY);
 	if (!desc)
 		return NULL;
 
-	tsz = jz4780_dma_transfer_size(dest | src | len, &ord);
-	if (tsz < 0)
-		return ERR_PTR(tsz);
+	tsz = jz4780_dma_transfer_size(dest | src | len,
+				       &jzchan->transfer_shift);
 
 	desc->desc[0].dsa = src;
 	desc->desc[0].dta = dest;
@@ -407,7 +418,7 @@ struct dma_async_tx_descriptor *jz4780_dma_prep_dma_memcpy(
 			    tsz << JZ_DMA_DCM_TSZ_SHIFT |
 			    JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_SP_SHIFT |
 			    JZ_DMA_WIDTH_32_BIT << JZ_DMA_DCM_DP_SHIFT;
-	desc->desc[0].dtc = len >> ord;
+	desc->desc[0].dtc = len >> jzchan->transfer_shift;
 
 	return vchan_tx_prep(&jzchan->vchan, &desc->vdesc, flags);
 }
@@ -484,8 +495,9 @@ static void jz4780_dma_issue_pending(struct dma_chan *chan)
 	spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
 }
 
-static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan)
+static int jz4780_dma_terminate_all(struct dma_chan *chan)
 {
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
 	struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
 	unsigned long flags;
 	LIST_HEAD(head);
@@ -507,9 +519,11 @@ static int jz4780_dma_terminate_all(struct jz4780_dma_chan *jzchan)
 	return 0;
 }
 
-static int jz4780_dma_slave_config(struct jz4780_dma_chan *jzchan,
-	const struct dma_slave_config *config)
+static int jz4780_dma_config(struct dma_chan *chan,
+	struct dma_slave_config *config)
 {
+	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
+
 	if ((config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
 	   || (config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES))
 		return -EINVAL;
@@ -567,8 +581,8 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan,
 		txstate->residue = 0;
 
 	if (vdesc && jzchan->desc && vdesc == &jzchan->desc->vdesc
-		&& jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT))
-			status = DMA_ERROR;
+	    && jzchan->desc->status & (JZ_DMA_DCS_AR | JZ_DMA_DCS_HLT))
+		status = DMA_ERROR;
 
 	spin_unlock_irqrestore(&jzchan->vchan.lock, flags);
 	return status;
@@ -671,7 +685,10 @@ static bool jz4780_dma_filter_fn(struct dma_chan *chan, void *param)
 {
 	struct jz4780_dma_chan *jzchan = to_jz4780_dma_chan(chan);
 	struct jz4780_dma_dev *jzdma = jz4780_dma_chan_parent(jzchan);
-	struct jz4780_dma_data *data = param;
+	struct jz4780_dma_filter_data *data = param;
+
+	if (jzdma->dma_device.dev->of_node != data->of_node)
+		return false;
 
 	if (data->channel > -1) {
 		if (data->channel != jzchan->id)
@@ -690,11 +707,12 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
 {
 	struct jz4780_dma_dev *jzdma = ofdma->of_dma_data;
 	dma_cap_mask_t mask = jzdma->dma_device.cap_mask;
-	struct jz4780_dma_data data;
+	struct jz4780_dma_filter_data data;
 
 	if (dma_spec->args_count != 2)
 		return NULL;
 
+	data.of_node = ofdma->of_node;
 	data.transfer_type = dma_spec->args[0];
 	data.channel = dma_spec->args[1];
 
@@ -713,9 +731,14 @@ static struct dma_chan *jz4780_of_dma_xlate(struct of_phandle_args *dma_spec,
 				data.channel);
 			return NULL;
 		}
-	}
 
-	return dma_request_channel(mask, jz4780_dma_filter_fn, &data);
+		jzdma->chan[data.channel].transfer_type = data.transfer_type;
+
+		return dma_get_slave_channel(
+			&jzdma->chan[data.channel].vchan.chan);
+	} else {
+		return dma_request_channel(mask, jz4780_dma_filter_fn, &data);
+	}
 }
 
 static int jz4780_dma_probe(struct platform_device *pdev)
@@ -743,23 +766,26 @@ static int jz4780_dma_probe(struct platform_device *pdev)
 	if (IS_ERR(jzdma->base))
 		return PTR_ERR(jzdma->base);
 
-	jzdma->irq = platform_get_irq(pdev, 0);
-	if (jzdma->irq < 0) {
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
 		dev_err(dev, "failed to get IRQ: %d\n", ret);
-		return jzdma->irq;
+		return ret;
 	}
 
-	ret = devm_request_irq(dev, jzdma->irq, jz4780_dma_irq_handler, 0,
-			       dev_name(dev), jzdma);
+	jzdma->irq = ret;
+
+	ret = request_irq(jzdma->irq, jz4780_dma_irq_handler, 0, dev_name(dev),
+			  jzdma);
 	if (ret) {
 		dev_err(dev, "failed to request IRQ %u!\n", jzdma->irq);
-		return -EINVAL;
+		return ret;
 	}
 
 	jzdma->clk = devm_clk_get(dev, NULL);
 	if (IS_ERR(jzdma->clk)) {
 		dev_err(dev, "failed to get clock\n");
-		return PTR_ERR(jzdma->clk);
+		ret = PTR_ERR(jzdma->clk);
+		goto err_free_irq;
 	}
 
 	clk_prepare_enable(jzdma->clk);
@@ -775,13 +801,13 @@ static int jz4780_dma_probe(struct platform_device *pdev)
 	dma_cap_set(DMA_CYCLIC, dd->cap_mask);
 
 	dd->dev = dev;
-	dd->copy_align = 2; /* 2^2 = 4 byte alignment */
+	dd->copy_align = DMAENGINE_ALIGN_4_BYTES;
 	dd->device_alloc_chan_resources = jz4780_dma_alloc_chan_resources;
 	dd->device_free_chan_resources = jz4780_dma_free_chan_resources;
 	dd->device_prep_slave_sg = jz4780_dma_prep_slave_sg;
 	dd->device_prep_dma_cyclic = jz4780_dma_prep_dma_cyclic;
 	dd->device_prep_dma_memcpy = jz4780_dma_prep_dma_memcpy;
-	dd->device_config = jz4780_dma_slave_config;
+	dd->device_config = jz4780_dma_config;
 	dd->device_terminate_all = jz4780_dma_terminate_all;
 	dd->device_tx_status = jz4780_dma_tx_status;
 	dd->device_issue_pending = jz4780_dma_issue_pending;
@@ -790,7 +816,6 @@ static int jz4780_dma_probe(struct platform_device *pdev)
 	dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
 	dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 
-
 	/*
 	 * Enable DMA controller, mark all channels as not programmable.
 	 * Also set the FMSC bit - it increases MSC performance, so it makes
@@ -832,15 +857,24 @@ err_unregister_dev:
 
 err_disable_clk:
 	clk_disable_unprepare(jzdma->clk);
+
+err_free_irq:
+	free_irq(jzdma->irq, jzdma);
 	return ret;
 }
 
 static int jz4780_dma_remove(struct platform_device *pdev)
 {
 	struct jz4780_dma_dev *jzdma = platform_get_drvdata(pdev);
+	int i;
 
 	of_dma_controller_free(pdev->dev.of_node);
-	devm_free_irq(&pdev->dev, jzdma->irq, jzdma);
+
+	free_irq(jzdma->irq, jzdma);
+
+	for (i = 0; i < JZ_DMA_NR_CHANNELS; i++)
+		tasklet_kill(&jzdma->chan[i].vchan.task);
+
 	dma_async_device_unregister(&jzdma->dma_device);
 	return 0;
 }
diff --git a/kernel/drivers/dma/dmaengine.c b/kernel/drivers/dma/dmaengine.c
index 3ddfd1f6c..3ecec1445 100644
--- a/kernel/drivers/dma/dmaengine.c
+++ b/kernel/drivers/dma/dmaengine.c
@@ -267,6 +267,13 @@ static void dma_chan_put(struct dma_chan *chan)
 	/* This channel is not in use anymore, free it */
 	if (!chan->client_count && chan->device->device_free_chan_resources)
 		chan->device->device_free_chan_resources(chan);
+
+	/* If the channel is used via a DMA request router, free the mapping */
+	if (chan->router && chan->router->route_free) {
+		chan->router->route_free(chan->router->dev, chan->route_data);
+		chan->router = NULL;
+		chan->route_data = NULL;
+	}
 }
 
 enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
@@ -536,7 +543,7 @@ static struct dma_chan *private_candidate(const dma_cap_mask_t *mask,
 }
 
 /**
- * dma_request_slave_channel - try to get specific channel exclusively
+ * dma_get_slave_channel - try to get specific channel exclusively
  * @chan: target channel
  */
 struct dma_chan *dma_get_slave_channel(struct dma_chan *chan)
@@ -547,10 +554,18 @@ struct dma_chan *dma_get_slave_channel(struct dma_chan *chan)
 	mutex_lock(&dma_list_mutex);
 
 	if (chan->client_count == 0) {
+		struct dma_device *device = chan->device;
+
+		dma_cap_set(DMA_PRIVATE, device->cap_mask);
+		device->privatecnt++;
 		err = dma_chan_get(chan);
-		if (err)
+		if (err) {
 			pr_debug("%s: failed to get %s: (%d)\n",
 				__func__, dma_chan_name(chan), err);
+			chan = NULL;
+			if (--device->privatecnt == 0)
+				dma_cap_clear(DMA_PRIVATE, device->cap_mask);
+		}
 	} else
 		chan = NULL;
 
@@ -648,7 +663,7 @@ struct dma_chan *__dma_request_channel(const dma_cap_mask_t *mask,
 EXPORT_SYMBOL_GPL(__dma_request_channel);
 
 /**
- * dma_request_slave_channel - try to allocate an exclusive slave channel
+ * dma_request_slave_channel_reason - try to allocate an exclusive slave channel
  * @dev:	pointer to client device structure
  * @name:	slave channel name
  *
@@ -682,6 +697,10 @@ struct dma_chan *dma_request_slave_channel(struct device *dev,
 	struct dma_chan *ch = dma_request_slave_channel_reason(dev, name);
 	if (IS_ERR(ch))
 		return NULL;
+
+	dma_cap_set(DMA_PRIVATE, ch->device->cap_mask);
+	ch->device->privatecnt++;
+
 	return ch;
 }
 EXPORT_SYMBOL_GPL(dma_request_slave_channel);
@@ -836,6 +855,8 @@ int dma_async_device_register(struct dma_device *device)
 		!device->device_prep_dma_pq);
 	BUG_ON(dma_has_cap(DMA_PQ_VAL, device->cap_mask) &&
 		!device->device_prep_dma_pq_val);
+	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
+		!device->device_prep_dma_memset);
 	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
 		!device->device_prep_dma_interrupt);
 	BUG_ON(dma_has_cap(DMA_SG, device->cap_mask) &&
@@ -1053,11 +1074,9 @@ static void dmaengine_destroy_unmap_pool(void)
 	for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) {
 		struct dmaengine_unmap_pool *p = &unmap_pool[i];
 
-		if (p->pool)
-			mempool_destroy(p->pool);
+		mempool_destroy(p->pool);
 		p->pool = NULL;
-		if (p->cache)
-			kmem_cache_destroy(p->cache);
+		kmem_cache_destroy(p->cache);
 		p->cache = NULL;
 	}
 }
diff --git a/kernel/drivers/dma/dmatest.c b/kernel/drivers/dma/dmatest.c
index 220ee4963..b8576fd6b 100644
--- a/kernel/drivers/dma/dmatest.c
+++ b/kernel/drivers/dma/dmatest.c
@@ -120,7 +120,7 @@ static struct dmatest_info {
 
 static int dmatest_run_set(const char *val, const struct kernel_param *kp);
 static int dmatest_run_get(char *val, const struct kernel_param *kp);
-static struct kernel_param_ops run_ops = {
+static const struct kernel_param_ops run_ops = {
 	.set = dmatest_run_set,
 	.get = dmatest_run_get,
 };
@@ -195,7 +195,7 @@ static int dmatest_wait_get(char *val, const struct kernel_param *kp)
 	return param_get_bool(val, kp);
 }
 
-static struct kernel_param_ops wait_ops = {
+static const struct kernel_param_ops wait_ops = {
 	.get = dmatest_wait_get,
 	.set = param_set_bool,
 };
diff --git a/kernel/drivers/dma/dw/Kconfig b/kernel/drivers/dma/dw/Kconfig
index 36e02f0f6..e00c9b022 100644
--- a/kernel/drivers/dma/dw/Kconfig
+++ b/kernel/drivers/dma/dw/Kconfig
@@ -6,6 +6,9 @@ config DW_DMAC_CORE
 	tristate
 	select DMA_ENGINE
 
+config DW_DMAC_BIG_ENDIAN_IO
+	bool
+
 config DW_DMAC
 	tristate "Synopsys DesignWare AHB DMA platform driver"
 	select DW_DMAC_CORE
@@ -23,6 +26,3 @@ config DW_DMAC_PCI
 	  Support the Synopsys DesignWare AHB DMA controller on the
 	  platfroms that enumerate it as a PCI device. For example,
 	  Intel Medfield has integrated this GPDMA controller.
-
-config DW_DMAC_BIG_ENDIAN_IO
-	bool
diff --git a/kernel/drivers/dma/dw/core.c b/kernel/drivers/dma/dw/core.c
index 1022c2e1a..4f099ea29 100644
--- a/kernel/drivers/dma/dw/core.c
+++ b/kernel/drivers/dma/dw/core.c
@@ -163,7 +163,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 
 /*----------------------------------------------------------------------*/
 
-static inline unsigned int dwc_fast_fls(unsigned long long v)
+static inline unsigned int dwc_fast_ffs(unsigned long long v)
 {
 	/*
 	 * We can be a lot more clever here, but this should take care
@@ -536,16 +536,17 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr);
 
 /* Called with dwc->lock held and all DMAC interrupts disabled */
 static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
-		u32 status_err, u32 status_xfer)
+		u32 status_block, u32 status_err, u32 status_xfer)
 {
 	unsigned long flags;
 
-	if (dwc->mask) {
+	if (status_block & dwc->mask) {
 		void (*callback)(void *param);
 		void *callback_param;
 
 		dev_vdbg(chan2dev(&dwc->chan), "new cyclic period llp 0x%08x\n",
 				channel_readl(dwc, LLP));
+		dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 
 		callback = dwc->cdesc->period_callback;
 		callback_param = dwc->cdesc->period_callback_param;
@@ -577,6 +578,7 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 		channel_writel(dwc, CTL_LO, 0);
 		channel_writel(dwc, CTL_HI, 0);
 
+		dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 		dma_writel(dw, CLEAR.ERROR, dwc->mask);
 		dma_writel(dw, CLEAR.XFER, dwc->mask);
 
@@ -585,6 +587,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc,
 
 		spin_unlock_irqrestore(&dwc->lock, flags);
 	}
+
+	/* Re-enable interrupts */
+	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 }
 
 /* ------------------------------------------------------------------------- */
@@ -593,10 +598,12 @@ static void dw_dma_tasklet(unsigned long data)
 {
 	struct dw_dma *dw = (struct dw_dma *)data;
 	struct dw_dma_chan *dwc;
+	u32 status_block;
 	u32 status_xfer;
 	u32 status_err;
 	int i;
 
+	status_block = dma_readl(dw, RAW.BLOCK);
 	status_xfer = dma_readl(dw, RAW.XFER);
 	status_err = dma_readl(dw, RAW.ERROR);
 
@@ -605,16 +612,15 @@ static void dw_dma_tasklet(unsigned long data)
 	for (i = 0; i < dw->dma.chancnt; i++) {
 		dwc = &dw->chan[i];
 		if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags))
-			dwc_handle_cyclic(dw, dwc, status_err, status_xfer);
+			dwc_handle_cyclic(dw, dwc, status_block, status_err,
+					status_xfer);
 		else if (status_err & (1 << i))
 			dwc_handle_error(dw, dwc);
 		else if (status_xfer & (1 << i))
 			dwc_scan_descriptors(dw, dwc);
 	}
 
-	/*
-	 * Re-enable interrupts.
-	 */
+	/* Re-enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
 	channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
 }
@@ -635,6 +641,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 	 * softirq handler.
 	 */
 	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
 
 	status = dma_readl(dw, STATUS_INT);
@@ -645,6 +652,7 @@ static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
 
 		/* Try to recover */
 		channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
 		channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
@@ -712,7 +720,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 			   dw->data_width[dwc->dst_master]);
 
 	src_width = dst_width = min_t(unsigned int, data_width,
-				      dwc_fast_fls(src | dest | len));
+				      dwc_fast_ffs(src | dest | len));
 
 	ctllo = DWC_DEFAULT_CTLLO(chan)
 			| DWC_CTLL_DST_WIDTH(dst_width)
@@ -791,7 +799,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
-		reg_width = __fls(sconfig->dst_addr_width);
+		reg_width = __ffs(sconfig->dst_addr_width);
 		reg = sconfig->dst_addr;
 		ctllo = (DWC_DEFAULT_CTLLO(chan)
 				| DWC_CTLL_DST_WIDTH(reg_width)
@@ -811,7 +819,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			len = sg_dma_len(sg);
 
 			mem_width = min_t(unsigned int,
-					  data_width, dwc_fast_fls(mem | len));
+					  data_width, dwc_fast_ffs(mem | len));
 
 slave_sg_todev_fill_desc:
 			desc = dwc_desc_get(dwc);
@@ -848,7 +856,7 @@ slave_sg_todev_fill_desc:
 		}
 		break;
 	case DMA_DEV_TO_MEM:
-		reg_width = __fls(sconfig->src_addr_width);
+		reg_width = __ffs(sconfig->src_addr_width);
 		reg = sconfig->src_addr;
 		ctllo = (DWC_DEFAULT_CTLLO(chan)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
@@ -868,7 +876,7 @@ slave_sg_todev_fill_desc:
 			len = sg_dma_len(sg);
 
 			mem_width = min_t(unsigned int,
-					  data_width, dwc_fast_fls(mem | len));
+					  data_width, dwc_fast_ffs(mem | len));
 
 slave_sg_fromdev_fill_desc:
 			desc = dwc_desc_get(dwc);
@@ -1111,6 +1119,7 @@ static void dw_dma_off(struct dw_dma *dw)
 	dma_writel(dw, CFG, 0);
 
 	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
 	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
@@ -1216,6 +1225,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 
 	/* Disable interrupts */
 	channel_clear_bit(dw, MASK.XFER, dwc->mask);
+	channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
 	channel_clear_bit(dw, MASK.ERROR, dwc->mask);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
@@ -1245,7 +1255,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 int dw_dma_cyclic_start(struct dma_chan *chan)
 {
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
-	struct dw_dma		*dw = to_dw_dma(dwc->chan.device);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
 	unsigned long		flags;
 
 	if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) {
@@ -1255,25 +1265,10 @@ int dw_dma_cyclic_start(struct dma_chan *chan)
 
 	spin_lock_irqsave(&dwc->lock, flags);
 
-	/* Assert channel is idle */
-	if (dma_readl(dw, CH_EN) & dwc->mask) {
-		dev_err(chan2dev(&dwc->chan),
-			"%s: BUG: Attempted to start non-idle channel\n",
-			__func__);
-		dwc_dump_chan_regs(dwc);
-		spin_unlock_irqrestore(&dwc->lock, flags);
-		return -EBUSY;
-	}
-
-	dma_writel(dw, CLEAR.ERROR, dwc->mask);
-	dma_writel(dw, CLEAR.XFER, dwc->mask);
+	/* Enable interrupts to perform cyclic transfer */
+	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
 
-	/* Setup DMAC channel registers */
-	channel_writel(dwc, LLP, dwc->cdesc->desc[0]->txd.phys);
-	channel_writel(dwc, CTL_LO, DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
-	channel_writel(dwc, CTL_HI, 0);
-
-	channel_set_bit(dw, CH_EN, dwc->mask);
+	dwc_dostart(dwc, dwc->cdesc->desc[0]);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -1479,6 +1474,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan)
 
 	dwc_chan_disable(dw, dwc);
 
+	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
 	dma_writel(dw, CLEAR.ERROR, dwc->mask);
 	dma_writel(dw, CLEAR.XFER, dwc->mask);
 
@@ -1499,9 +1495,8 @@ EXPORT_SYMBOL(dw_dma_cyclic_free);
 int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 {
 	struct dw_dma		*dw;
-	bool			autocfg;
+	bool			autocfg = false;
 	unsigned int		dw_params;
-	unsigned int		nr_channels;
 	unsigned int		max_blk_size = 0;
 	int			err;
 	int			i;
@@ -1515,33 +1510,42 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 
 	pm_runtime_get_sync(chip->dev);
 
-	dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
-	autocfg = dw_params >> DW_PARAMS_EN & 0x1;
+	if (!pdata) {
+		dw_params = dma_read_byaddr(chip->regs, DW_PARAMS);
+		dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
 
-	dev_dbg(chip->dev, "DW_PARAMS: 0x%08x\n", dw_params);
+		autocfg = dw_params >> DW_PARAMS_EN & 1;
+		if (!autocfg) {
+			err = -EINVAL;
+			goto err_pdata;
+		}
 
-	if (!pdata && autocfg) {
 		pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
 		if (!pdata) {
 			err = -ENOMEM;
 			goto err_pdata;
 		}
 
+		/* Get hardware configuration parameters */
+		pdata->nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 7) + 1;
+		pdata->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
+		for (i = 0; i < pdata->nr_masters; i++) {
+			pdata->data_width[i] =
+				(dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
+		}
+		max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
+
 		/* Fill platform data with the default values */
 		pdata->is_private = true;
+		pdata->is_memcpy = true;
 		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
 		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
-	} else if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
+	} else if (pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
 		err = -EINVAL;
 		goto err_pdata;
 	}
 
-	if (autocfg)
-		nr_channels = (dw_params >> DW_PARAMS_NR_CHAN & 0x7) + 1;
-	else
-		nr_channels = pdata->nr_channels;
-
-	dw->chan = devm_kcalloc(chip->dev, nr_channels, sizeof(*dw->chan),
+	dw->chan = devm_kcalloc(chip->dev, pdata->nr_channels, sizeof(*dw->chan),
 				GFP_KERNEL);
 	if (!dw->chan) {
 		err = -ENOMEM;
@@ -1549,29 +1553,16 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 	}
 
 	/* Get hardware configuration parameters */
-	if (autocfg) {
-		max_blk_size = dma_readl(dw, MAX_BLK_SIZE);
-
-		dw->nr_masters = (dw_params >> DW_PARAMS_NR_MASTER & 3) + 1;
-		for (i = 0; i < dw->nr_masters; i++) {
-			dw->data_width[i] =
-				(dw_params >> DW_PARAMS_DATA_WIDTH(i) & 3) + 2;
-		}
-	} else {
-		dw->nr_masters = pdata->nr_masters;
-		for (i = 0; i < dw->nr_masters; i++)
-			dw->data_width[i] = pdata->data_width[i];
-	}
+	dw->nr_masters = pdata->nr_masters;
+	for (i = 0; i < dw->nr_masters; i++)
+		dw->data_width[i] = pdata->data_width[i];
 
 	/* Calculate all channel mask before DMA setup */
-	dw->all_chan_mask = (1 << nr_channels) - 1;
+	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
 
 	/* Force dma off, just in case */
 	dw_dma_off(dw);
 
-	/* Disable BLOCK interrupts as well */
-	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
-
 	/* Create a pool of consistent memory blocks for hardware descriptors */
 	dw->desc_pool = dmam_pool_create("dw_dmac_desc_pool", chip->dev,
 					 sizeof(struct dw_desc), 4, 0);
@@ -1589,9 +1580,8 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 		goto err_pdata;
 
 	INIT_LIST_HEAD(&dw->dma.channels);
-	for (i = 0; i < nr_channels; i++) {
+	for (i = 0; i < pdata->nr_channels; i++) {
 		struct dw_dma_chan	*dwc = &dw->chan[i];
-		int			r = nr_channels - i - 1;
 
 		dwc->chan.device = &dw->dma;
 		dma_cookie_init(&dwc->chan);
@@ -1603,7 +1593,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 
 		/* 7 is highest priority & 0 is lowest. */
 		if (pdata->chan_priority == CHAN_PRIORITY_ASCENDING)
-			dwc->priority = r;
+			dwc->priority = pdata->nr_channels - i - 1;
 		else
 			dwc->priority = i;
 
@@ -1622,6 +1612,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 		/* Hardware configuration */
 		if (autocfg) {
 			unsigned int dwc_params;
+			unsigned int r = DW_DMA_MAX_NR_CHANNELS - i - 1;
 			void __iomem *addr = chip->regs + r * sizeof(u32);
 
 			dwc_params = dma_read_byaddr(addr, DWC_PARAMS);
@@ -1656,10 +1647,13 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
 	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
 
-	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+	/* Set capabilities */
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
 	if (pdata->is_private)
 		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
+	if (pdata->is_memcpy)
+		dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+
 	dw->dma.dev = chip->dev;
 	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
 	dw->dma.device_free_chan_resources = dwc_free_chan_resources;
@@ -1687,7 +1681,7 @@ int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata)
 		goto err_dma_register;
 
 	dev_info(chip->dev, "DesignWare DMA Controller, %d channels\n",
-		 nr_channels);
+		 pdata->nr_channels);
 
 	pm_runtime_put_sync_suspend(chip->dev);
 
@@ -1746,4 +1740,4 @@ EXPORT_SYMBOL_GPL(dw_dma_enable);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver");
 MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
-MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
+MODULE_AUTHOR("Viresh Kumar <vireshk@kernel.org>");
diff --git a/kernel/drivers/dma/dw/pci.c b/kernel/drivers/dma/dw/pci.c
index b144706b3..4c30fdd09 100644
--- a/kernel/drivers/dma/dw/pci.c
+++ b/kernel/drivers/dma/dw/pci.c
@@ -15,12 +15,6 @@
 
 #include "internal.h"
 
-static struct dw_dma_platform_data dw_pci_pdata = {
-	.is_private = 1,
-	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
-	.chan_priority = CHAN_PRIORITY_ASCENDING,
-};
-
 static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 {
 	struct dw_dma_chip *chip;
@@ -101,19 +95,19 @@ static const struct dev_pm_ops dw_pci_dev_pm_ops = {
 
 static const struct pci_device_id dw_pci_id_table[] = {
 	/* Medfield */
-	{ PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_pdata },
-	{ PCI_VDEVICE(INTEL, 0x0830), (kernel_ulong_t)&dw_pci_pdata },
+	{ PCI_VDEVICE(INTEL, 0x0827) },
+	{ PCI_VDEVICE(INTEL, 0x0830) },
 
 	/* BayTrail */
-	{ PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_pci_pdata },
-	{ PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_pci_pdata },
+	{ PCI_VDEVICE(INTEL, 0x0f06) },
+	{ PCI_VDEVICE(INTEL, 0x0f40) },
 
 	/* Braswell */
-	{ PCI_VDEVICE(INTEL, 0x2286), (kernel_ulong_t)&dw_pci_pdata },
-	{ PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_pci_pdata },
+	{ PCI_VDEVICE(INTEL, 0x2286) },
+	{ PCI_VDEVICE(INTEL, 0x22c0) },
 
 	/* Haswell */
-	{ PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_pci_pdata },
+	{ PCI_VDEVICE(INTEL, 0x9c60) },
 	{ }
 };
 MODULE_DEVICE_TABLE(pci, dw_pci_id_table);
diff --git a/kernel/drivers/dma/dw/platform.c b/kernel/drivers/dma/dw/platform.c
index b2c3ae071..68a481575 100644
--- a/kernel/drivers/dma/dw/platform.c
+++ b/kernel/drivers/dma/dw/platform.c
@@ -155,6 +155,7 @@ static int dw_probe(struct platform_device *pdev)
 	struct dw_dma_chip *chip;
 	struct device *dev = &pdev->dev;
 	struct resource *mem;
+	const struct acpi_device_id *id;
 	struct dw_dma_platform_data *pdata;
 	int err;
 
@@ -178,6 +179,11 @@ static int dw_probe(struct platform_device *pdev)
 	pdata = dev_get_platdata(dev);
 	if (!pdata)
 		pdata = dw_dma_parse_dt(pdev);
+	if (!pdata && has_acpi_companion(dev)) {
+		id = acpi_match_device(dev->driver->acpi_match_table, dev);
+		if (id)
+			pdata = (struct dw_dma_platform_data *)id->driver_data;
+	}
 
 	chip->dev = dev;
 
@@ -246,8 +252,17 @@ MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
 #endif
 
 #ifdef CONFIG_ACPI
+static struct dw_dma_platform_data dw_dma_acpi_pdata = {
+	.nr_channels = 8,
+	.is_private = true,
+	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+	.chan_priority = CHAN_PRIORITY_ASCENDING,
+	.block_size = 4095,
+	.nr_masters = 2,
+};
+
 static const struct acpi_device_id dw_dma_acpi_id_table[] = {
-	{ "INTL9C60", 0 },
+	{ "INTL9C60", (kernel_ulong_t)&dw_dma_acpi_pdata },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, dw_dma_acpi_id_table);
diff --git a/kernel/drivers/dma/edma.c b/kernel/drivers/dma/edma.c
index bf09db7ca..16fe773fb 100644
--- a/kernel/drivers/dma/edma.c
+++ b/kernel/drivers/dma/edma.c
@@ -25,28 +25,93 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
 
 #include <linux/platform_data/edma.h>
 
 #include "dmaengine.h"
 #include "virt-dma.h"
 
-/*
- * This will go away when the private EDMA API is folded
- * into this driver and the platform device(s) are
- * instantiated in the arch code. We can only get away
- * with this simplification because DA8XX may not be built
- * in the same kernel image with other DaVinci parts. This
- * avoids having to sprinkle dmaengine driver platform devices
- * and data throughout all the existing board files.
- */
-#ifdef CONFIG_ARCH_DAVINCI_DA8XX
-#define EDMA_CTLRS	2
-#define EDMA_CHANS	32
-#else
-#define EDMA_CTLRS	1
-#define EDMA_CHANS	64
-#endif /* CONFIG_ARCH_DAVINCI_DA8XX */
+/* Offsets matching "struct edmacc_param" */
+#define PARM_OPT		0x00
+#define PARM_SRC		0x04
+#define PARM_A_B_CNT		0x08
+#define PARM_DST		0x0c
+#define PARM_SRC_DST_BIDX	0x10
+#define PARM_LINK_BCNTRLD	0x14
+#define PARM_SRC_DST_CIDX	0x18
+#define PARM_CCNT		0x1c
+
+#define PARM_SIZE		0x20
+
+/* Offsets for EDMA CC global channel registers and their shadows */
+#define SH_ER			0x00	/* 64 bits */
+#define SH_ECR			0x08	/* 64 bits */
+#define SH_ESR			0x10	/* 64 bits */
+#define SH_CER			0x18	/* 64 bits */
+#define SH_EER			0x20	/* 64 bits */
+#define SH_EECR			0x28	/* 64 bits */
+#define SH_EESR			0x30	/* 64 bits */
+#define SH_SER			0x38	/* 64 bits */
+#define SH_SECR			0x40	/* 64 bits */
+#define SH_IER			0x50	/* 64 bits */
+#define SH_IECR			0x58	/* 64 bits */
+#define SH_IESR			0x60	/* 64 bits */
+#define SH_IPR			0x68	/* 64 bits */
+#define SH_ICR			0x70	/* 64 bits */
+#define SH_IEVAL		0x78
+#define SH_QER			0x80
+#define SH_QEER			0x84
+#define SH_QEECR		0x88
+#define SH_QEESR		0x8c
+#define SH_QSER			0x90
+#define SH_QSECR		0x94
+#define SH_SIZE			0x200
+
+/* Offsets for EDMA CC global registers */
+#define EDMA_REV		0x0000
+#define EDMA_CCCFG		0x0004
+#define EDMA_QCHMAP		0x0200	/* 8 registers */
+#define EDMA_DMAQNUM		0x0240	/* 8 registers (4 on OMAP-L1xx) */
+#define EDMA_QDMAQNUM		0x0260
+#define EDMA_QUETCMAP		0x0280
+#define EDMA_QUEPRI		0x0284
+#define EDMA_EMR		0x0300	/* 64 bits */
+#define EDMA_EMCR		0x0308	/* 64 bits */
+#define EDMA_QEMR		0x0310
+#define EDMA_QEMCR		0x0314
+#define EDMA_CCERR		0x0318
+#define EDMA_CCERRCLR		0x031c
+#define EDMA_EEVAL		0x0320
+#define EDMA_DRAE		0x0340	/* 4 x 64 bits*/
+#define EDMA_QRAE		0x0380	/* 4 registers */
+#define EDMA_QUEEVTENTRY	0x0400	/* 2 x 16 registers */
+#define EDMA_QSTAT		0x0600	/* 2 registers */
+#define EDMA_QWMTHRA		0x0620
+#define EDMA_QWMTHRB		0x0624
+#define EDMA_CCSTAT		0x0640
+
+#define EDMA_M			0x1000	/* global channel registers */
+#define EDMA_ECR		0x1008
+#define EDMA_ECRH		0x100C
+#define EDMA_SHADOW0		0x2000	/* 4 shadow regions */
+#define EDMA_PARM		0x4000	/* PaRAM entries */
+
+#define PARM_OFFSET(param_no)	(EDMA_PARM + ((param_no) << 5))
+
+#define EDMA_DCHMAP		0x0100  /* 64 registers */
+
+/* CCCFG register */
+#define GET_NUM_DMACH(x)	(x & 0x7) /* bits 0-2 */
+#define GET_NUM_QDMACH(x)	((x & 0x70) >> 4) /* bits 4-6 */
+#define GET_NUM_PAENTRY(x)	((x & 0x7000) >> 12) /* bits 12-14 */
+#define GET_NUM_EVQUE(x)	((x & 0x70000) >> 16) /* bits 16-18 */
+#define GET_NUM_REGN(x)		((x & 0x300000) >> 20) /* bits 20-21 */
+#define CHMAP_EXIST		BIT(24)
 
 /*
  * Max of 20 segments per channel to conserve PaRAM slots
@@ -59,6 +124,37 @@
 #define EDMA_MAX_SLOTS		MAX_NR_SG
 #define EDMA_DESCRIPTORS	16
 
+#define EDMA_CHANNEL_ANY		-1	/* for edma_alloc_channel() */
+#define EDMA_SLOT_ANY			-1	/* for edma_alloc_slot() */
+#define EDMA_CONT_PARAMS_ANY		 1001
+#define EDMA_CONT_PARAMS_FIXED_EXACT	 1002
+#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003
+
+/* PaRAM slots are laid out like this */
+struct edmacc_param {
+	u32 opt;
+	u32 src;
+	u32 a_b_cnt;
+	u32 dst;
+	u32 src_dst_bidx;
+	u32 link_bcntrld;
+	u32 src_dst_cidx;
+	u32 ccnt;
+} __packed;
+
+/* fields in edmacc_param.opt */
+#define SAM		BIT(0)
+#define DAM		BIT(1)
+#define SYNCDIM		BIT(2)
+#define STATIC		BIT(3)
+#define EDMA_FWID	(0x07 << 8)
+#define TCCMODE		BIT(11)
+#define EDMA_TCC(t)	((t) << 12)
+#define TCINTEN		BIT(20)
+#define ITCINTEN	BIT(21)
+#define TCCHEN		BIT(22)
+#define ITCCHEN		BIT(23)
+
 struct edma_pset {
 	u32				len;
 	dma_addr_t			addr;
@@ -105,26 +201,524 @@ struct edma_desc {
 
 struct edma_cc;
 
+struct edma_tc {
+	struct device_node		*node;
+	u16				id;
+};
+
 struct edma_chan {
 	struct virt_dma_chan		vchan;
 	struct list_head		node;
 	struct edma_desc		*edesc;
 	struct edma_cc			*ecc;
+	struct edma_tc			*tc;
 	int				ch_num;
 	bool				alloced;
+	bool				hw_triggered;
 	int				slot[EDMA_MAX_SLOTS];
 	int				missed;
 	struct dma_slave_config		cfg;
 };
 
 struct edma_cc {
-	int				ctlr;
+	struct device			*dev;
+	struct edma_soc_info		*info;
+	void __iomem			*base;
+	int				id;
+	bool				legacy_mode;
+
+	/* eDMA3 resource information */
+	unsigned			num_channels;
+	unsigned			num_qchannels;
+	unsigned			num_region;
+	unsigned			num_slots;
+	unsigned			num_tc;
+	bool				chmap_exist;
+	enum dma_event_q		default_queue;
+
+	/*
+	 * The slot_inuse bit for each PaRAM slot is clear unless the slot is
+	 * in use by Linux or if it is allocated to be used by DSP.
+	 */
+	unsigned long *slot_inuse;
+
 	struct dma_device		dma_slave;
-	struct edma_chan		slave_chans[EDMA_CHANS];
-	int				num_slave_chans;
+	struct dma_device		*dma_memcpy;
+	struct edma_chan		*slave_chans;
+	struct edma_tc			*tc_list;
 	int				dummy_slot;
 };
 
+/* dummy param set used to (re)initialize parameter RAM slots */
+static const struct edmacc_param dummy_paramset = {
+	.link_bcntrld = 0xffff,
+	.ccnt = 1,
+};
+
+#define EDMA_BINDING_LEGACY	0
+#define EDMA_BINDING_TPCC	1
+static const struct of_device_id edma_of_ids[] = {
+	{
+		.compatible = "ti,edma3",
+		.data = (void *)EDMA_BINDING_LEGACY,
+	},
+	{
+		.compatible = "ti,edma3-tpcc",
+		.data = (void *)EDMA_BINDING_TPCC,
+	},
+	{}
+};
+
+static const struct of_device_id edma_tptc_of_ids[] = {
+	{ .compatible = "ti,edma3-tptc", },
+	{}
+};
+
+static inline unsigned int edma_read(struct edma_cc *ecc, int offset)
+{
+	return (unsigned int)__raw_readl(ecc->base + offset);
+}
+
+static inline void edma_write(struct edma_cc *ecc, int offset, int val)
+{
+	__raw_writel(val, ecc->base + offset);
+}
+
+static inline void edma_modify(struct edma_cc *ecc, int offset, unsigned and,
+			       unsigned or)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val &= and;
+	val |= or;
+	edma_write(ecc, offset, val);
+}
+
+static inline void edma_and(struct edma_cc *ecc, int offset, unsigned and)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val &= and;
+	edma_write(ecc, offset, val);
+}
+
+static inline void edma_or(struct edma_cc *ecc, int offset, unsigned or)
+{
+	unsigned val = edma_read(ecc, offset);
+
+	val |= or;
+	edma_write(ecc, offset, val);
+}
+
+static inline unsigned int edma_read_array(struct edma_cc *ecc, int offset,
+					   int i)
+{
+	return edma_read(ecc, offset + (i << 2));
+}
+
+static inline void edma_write_array(struct edma_cc *ecc, int offset, int i,
+				    unsigned val)
+{
+	edma_write(ecc, offset + (i << 2), val);
+}
+
+static inline void edma_modify_array(struct edma_cc *ecc, int offset, int i,
+				     unsigned and, unsigned or)
+{
+	edma_modify(ecc, offset + (i << 2), and, or);
+}
+
+static inline void edma_or_array(struct edma_cc *ecc, int offset, int i,
+				 unsigned or)
+{
+	edma_or(ecc, offset + (i << 2), or);
+}
+
+static inline void edma_or_array2(struct edma_cc *ecc, int offset, int i, int j,
+				  unsigned or)
+{
+	edma_or(ecc, offset + ((i * 2 + j) << 2), or);
+}
+
+static inline void edma_write_array2(struct edma_cc *ecc, int offset, int i,
+				     int j, unsigned val)
+{
+	edma_write(ecc, offset + ((i * 2 + j) << 2), val);
+}
+
+static inline unsigned int edma_shadow0_read(struct edma_cc *ecc, int offset)
+{
+	return edma_read(ecc, EDMA_SHADOW0 + offset);
+}
+
+static inline unsigned int edma_shadow0_read_array(struct edma_cc *ecc,
+						   int offset, int i)
+{
+	return edma_read(ecc, EDMA_SHADOW0 + offset + (i << 2));
+}
+
+static inline void edma_shadow0_write(struct edma_cc *ecc, int offset,
+				      unsigned val)
+{
+	edma_write(ecc, EDMA_SHADOW0 + offset, val);
+}
+
+static inline void edma_shadow0_write_array(struct edma_cc *ecc, int offset,
+					    int i, unsigned val)
+{
+	edma_write(ecc, EDMA_SHADOW0 + offset + (i << 2), val);
+}
+
+static inline unsigned int edma_param_read(struct edma_cc *ecc, int offset,
+					   int param_no)
+{
+	return edma_read(ecc, EDMA_PARM + offset + (param_no << 5));
+}
+
+static inline void edma_param_write(struct edma_cc *ecc, int offset,
+				    int param_no, unsigned val)
+{
+	edma_write(ecc, EDMA_PARM + offset + (param_no << 5), val);
+}
+
+static inline void edma_param_modify(struct edma_cc *ecc, int offset,
+				     int param_no, unsigned and, unsigned or)
+{
+	edma_modify(ecc, EDMA_PARM + offset + (param_no << 5), and, or);
+}
+
+static inline void edma_param_and(struct edma_cc *ecc, int offset, int param_no,
+				  unsigned and)
+{
+	edma_and(ecc, EDMA_PARM + offset + (param_no << 5), and);
+}
+
+static inline void edma_param_or(struct edma_cc *ecc, int offset, int param_no,
+				 unsigned or)
+{
+	edma_or(ecc, EDMA_PARM + offset + (param_no << 5), or);
+}
+
+static inline void set_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		set_bit(offset + (len - 1), p);
+}
+
+static inline void clear_bits(int offset, int len, unsigned long *p)
+{
+	for (; len > 0; len--)
+		clear_bit(offset + (len - 1), p);
+}
+
+static void edma_assign_priority_to_queue(struct edma_cc *ecc, int queue_no,
+					  int priority)
+{
+	int bit = queue_no * 4;
+
+	edma_modify(ecc, EDMA_QUEPRI, ~(0x7 << bit), ((priority & 0x7) << bit));
+}
+
+static void edma_set_chmap(struct edma_chan *echan, int slot)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+	if (ecc->chmap_exist) {
+		slot = EDMA_CHAN_SLOT(slot);
+		edma_write_array(ecc, EDMA_DCHMAP, channel, (slot << 5));
+	}
+}
+
+static void edma_setup_interrupt(struct edma_chan *echan, bool enable)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+	if (enable) {
+		edma_shadow0_write_array(ecc, SH_ICR, channel >> 5,
+					 BIT(channel & 0x1f));
+		edma_shadow0_write_array(ecc, SH_IESR, channel >> 5,
+					 BIT(channel & 0x1f));
+	} else {
+		edma_shadow0_write_array(ecc, SH_IECR, channel >> 5,
+					 BIT(channel & 0x1f));
+	}
+}
+
+/*
+ * paRAM slot management functions
+ */
+static void edma_write_slot(struct edma_cc *ecc, unsigned slot,
+			    const struct edmacc_param *param)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+	memcpy_toio(ecc->base + PARM_OFFSET(slot), param, PARM_SIZE);
+}
+
+static void edma_read_slot(struct edma_cc *ecc, unsigned slot,
+			   struct edmacc_param *param)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+	memcpy_fromio(param, ecc->base + PARM_OFFSET(slot), PARM_SIZE);
+}
+
+/**
+ * edma_alloc_slot - allocate DMA parameter RAM
+ * @ecc: pointer to edma_cc struct
+ * @slot: specific slot to allocate; negative for "any unused slot"
+ *
+ * This allocates a parameter RAM slot, initializing it to hold a
+ * dummy transfer.  Slots allocated using this routine have not been
+ * mapped to a hardware DMA channel, and will normally be used by
+ * linking to them from a slot associated with a DMA channel.
+ *
+ * Normal use is to pass EDMA_SLOT_ANY as the @slot, but specific
+ * slots may be allocated on behalf of DSP firmware.
+ *
+ * Returns the number of the slot, else negative errno.
+ */
+static int edma_alloc_slot(struct edma_cc *ecc, int slot)
+{
+	if (slot > 0) {
+		slot = EDMA_CHAN_SLOT(slot);
+		/* Requesting entry paRAM slot for a HW triggered channel. */
+		if (ecc->chmap_exist && slot < ecc->num_channels)
+			slot = EDMA_SLOT_ANY;
+	}
+
+	if (slot < 0) {
+		if (ecc->chmap_exist)
+			slot = 0;
+		else
+			slot = ecc->num_channels;
+		for (;;) {
+			slot = find_next_zero_bit(ecc->slot_inuse,
+						  ecc->num_slots,
+						  slot);
+			if (slot == ecc->num_slots)
+				return -ENOMEM;
+			if (!test_and_set_bit(slot, ecc->slot_inuse))
+				break;
+		}
+	} else if (slot >= ecc->num_slots) {
+		return -EINVAL;
+	} else if (test_and_set_bit(slot, ecc->slot_inuse)) {
+		return -EBUSY;
+	}
+
+	edma_write_slot(ecc, slot, &dummy_paramset);
+
+	return EDMA_CTLR_CHAN(ecc->id, slot);
+}
+
+static void edma_free_slot(struct edma_cc *ecc, unsigned slot)
+{
+	slot = EDMA_CHAN_SLOT(slot);
+	if (slot >= ecc->num_slots)
+		return;
+
+	edma_write_slot(ecc, slot, &dummy_paramset);
+	clear_bit(slot, ecc->slot_inuse);
+}
+
+/**
+ * edma_link - link one parameter RAM slot to another
+ * @ecc: pointer to edma_cc struct
+ * @from: parameter RAM slot originating the link
+ * @to: parameter RAM slot which is the link target
+ *
+ * The originating slot should not be part of any active DMA transfer.
+ */
+static void edma_link(struct edma_cc *ecc, unsigned from, unsigned to)
+{
+	if (unlikely(EDMA_CTLR(from) != EDMA_CTLR(to)))
+		dev_warn(ecc->dev, "Ignoring eDMA instance for linking\n");
+
+	from = EDMA_CHAN_SLOT(from);
+	to = EDMA_CHAN_SLOT(to);
+	if (from >= ecc->num_slots || to >= ecc->num_slots)
+		return;
+
+	edma_param_modify(ecc, PARM_LINK_BCNTRLD, from, 0xffff0000,
+			  PARM_OFFSET(to));
+}
+
+/**
+ * edma_get_position - returns the current transfer point
+ * @ecc: pointer to edma_cc struct
+ * @slot: parameter RAM slot being examined
+ * @dst:  true selects the dest position, false the source
+ *
+ * Returns the position of the current active slot
+ */
+static dma_addr_t edma_get_position(struct edma_cc *ecc, unsigned slot,
+				    bool dst)
+{
+	u32 offs;
+
+	slot = EDMA_CHAN_SLOT(slot);
+	offs = PARM_OFFSET(slot);
+	offs += dst ? PARM_DST : PARM_SRC;
+
+	return edma_read(ecc, offs);
+}
+
+/*
+ * Channels with event associations will be triggered by their hardware
+ * events, and channels without such associations will be triggered by
+ * software.  (At this writing there is no interface for using software
+ * triggers except with channels that don't support hardware triggers.)
+ */
+static void edma_start(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	int j = (channel >> 5);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	if (!echan->hw_triggered) {
+		/* EDMA channels without event association */
+		dev_dbg(ecc->dev, "ESR%d %08x\n", j,
+			edma_shadow0_read_array(ecc, SH_ESR, j));
+		edma_shadow0_write_array(ecc, SH_ESR, j, mask);
+	} else {
+		/* EDMA channel with event association */
+		dev_dbg(ecc->dev, "ER%d %08x\n", j,
+			edma_shadow0_read_array(ecc, SH_ER, j));
+		/* Clear any pending event or error */
+		edma_write_array(ecc, EDMA_ECR, j, mask);
+		edma_write_array(ecc, EDMA_EMCR, j, mask);
+		/* Clear any SER */
+		edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+		edma_shadow0_write_array(ecc, SH_EESR, j, mask);
+		dev_dbg(ecc->dev, "EER%d %08x\n", j,
+			edma_shadow0_read_array(ecc, SH_EER, j));
+	}
+}
+
+static void edma_stop(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	int j = (channel >> 5);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(ecc, SH_EECR, j, mask);
+	edma_shadow0_write_array(ecc, SH_ECR, j, mask);
+	edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+	edma_write_array(ecc, EDMA_EMCR, j, mask);
+
+	/* clear possibly pending completion interrupt */
+	edma_shadow0_write_array(ecc, SH_ICR, j, mask);
+
+	dev_dbg(ecc->dev, "EER%d %08x\n", j,
+		edma_shadow0_read_array(ecc, SH_EER, j));
+
+	/* REVISIT:  consider guarding against inappropriate event
+	 * chaining by overwriting with dummy_paramset.
+	 */
+}
+
+/*
+ * Temporarily disable EDMA hardware events on the specified channel,
+ * preventing them from triggering new transfers
+ */
+static void edma_pause(struct edma_chan *echan)
+{
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(echan->ecc, SH_EECR, channel >> 5, mask);
+}
+
+/* Re-enable EDMA hardware events on the specified channel.  */
+static void edma_resume(struct edma_chan *echan)
+{
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(echan->ecc, SH_EESR, channel >> 5, mask);
+}
+
+static void edma_trigger_channel(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	edma_shadow0_write_array(ecc, SH_ESR, (channel >> 5), mask);
+
+	dev_dbg(ecc->dev, "ESR%d %08x\n", (channel >> 5),
+		edma_shadow0_read_array(ecc, SH_ESR, (channel >> 5)));
+}
+
+static void edma_clean_channel(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	int j = (channel >> 5);
+	unsigned int mask = BIT(channel & 0x1f);
+
+	dev_dbg(ecc->dev, "EMR%d %08x\n", j, edma_read_array(ecc, EDMA_EMR, j));
+	edma_shadow0_write_array(ecc, SH_ECR, j, mask);
+	/* Clear the corresponding EMR bits */
+	edma_write_array(ecc, EDMA_EMCR, j, mask);
+	/* Clear any SER */
+	edma_shadow0_write_array(ecc, SH_SECR, j, mask);
+	edma_write(ecc, EDMA_CCERRCLR, BIT(16) | BIT(1) | BIT(0));
+}
+
+/* Move channel to a specific event queue */
+static void edma_assign_channel_eventq(struct edma_chan *echan,
+				       enum dma_event_q eventq_no)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+	int bit = (channel & 0x7) * 4;
+
+	/* default to low priority queue */
+	if (eventq_no == EVENTQ_DEFAULT)
+		eventq_no = ecc->default_queue;
+	if (eventq_no >= ecc->num_tc)
+		return;
+
+	eventq_no &= 7;
+	edma_modify_array(ecc, EDMA_DMAQNUM, (channel >> 3), ~(0x7 << bit),
+			  eventq_no << bit);
+}
+
+static int edma_alloc_channel(struct edma_chan *echan,
+			      enum dma_event_q eventq_no)
+{
+	struct edma_cc *ecc = echan->ecc;
+	int channel = EDMA_CHAN_SLOT(echan->ch_num);
+
+	/* ensure access through shadow region 0 */
+	edma_or_array2(ecc, EDMA_DRAE, 0, channel >> 5, BIT(channel & 0x1f));
+
+	/* ensure no events are pending */
+	edma_stop(echan);
+
+	edma_setup_interrupt(echan, true);
+
+	edma_assign_channel_eventq(echan, eventq_no);
+
+	return 0;
+}
+
+static void edma_free_channel(struct edma_chan *echan)
+{
+	/* ensure no events are pending */
+	edma_stop(echan);
+	/* REVISIT should probably take out of shadow region 0 */
+	edma_setup_interrupt(echan, false);
+}
+
 static inline struct edma_cc *to_edma_cc(struct dma_device *d)
 {
 	return container_of(d, struct edma_cc, dma_slave);
@@ -135,8 +729,7 @@ static inline struct edma_chan *to_edma_chan(struct dma_chan *c)
 	return container_of(c, struct edma_chan, vchan.chan);
 }
 
-static inline struct edma_desc
-*to_edma_desc(struct dma_async_tx_descriptor *tx)
+static inline struct edma_desc *to_edma_desc(struct dma_async_tx_descriptor *tx)
 {
 	return container_of(tx, struct edma_desc, vdesc.tx);
 }
@@ -149,20 +742,17 @@ static void edma_desc_free(struct virt_dma_desc *vdesc)
 /* Dispatch a queued descriptor to the controller (caller holds lock) */
 static void edma_execute(struct edma_chan *echan)
 {
+	struct edma_cc *ecc = echan->ecc;
 	struct virt_dma_desc *vdesc;
 	struct edma_desc *edesc;
 	struct device *dev = echan->vchan.chan.device->dev;
 	int i, j, left, nslots;
 
-	/* If either we processed all psets or we're still not started */
-	if (!echan->edesc ||
-	    echan->edesc->pset_nr == echan->edesc->processed) {
-		/* Get next vdesc */
+	if (!echan->edesc) {
+		/* Setup is needed for the first transfer */
 		vdesc = vchan_next_desc(&echan->vchan);
-		if (!vdesc) {
-			echan->edesc = NULL;
+		if (!vdesc)
 			return;
-		}
 		list_del(&vdesc->node);
 		echan->edesc = to_edma_desc(&vdesc->tx);
 	}
@@ -177,32 +767,32 @@ static void edma_execute(struct edma_chan *echan)
 	/* Write descriptor PaRAM set(s) */
 	for (i = 0; i < nslots; i++) {
 		j = i + edesc->processed;
-		edma_write_slot(echan->slot[i], &edesc->pset[j].param);
+		edma_write_slot(ecc, echan->slot[i], &edesc->pset[j].param);
 		edesc->sg_len += edesc->pset[j].len;
-		dev_vdbg(echan->vchan.chan.device->dev,
-			"\n pset[%d]:\n"
-			"  chnum\t%d\n"
-			"  slot\t%d\n"
-			"  opt\t%08x\n"
-			"  src\t%08x\n"
-			"  dst\t%08x\n"
-			"  abcnt\t%08x\n"
-			"  ccnt\t%08x\n"
-			"  bidx\t%08x\n"
-			"  cidx\t%08x\n"
-			"  lkrld\t%08x\n",
-			j, echan->ch_num, echan->slot[i],
-			edesc->pset[j].param.opt,
-			edesc->pset[j].param.src,
-			edesc->pset[j].param.dst,
-			edesc->pset[j].param.a_b_cnt,
-			edesc->pset[j].param.ccnt,
-			edesc->pset[j].param.src_dst_bidx,
-			edesc->pset[j].param.src_dst_cidx,
-			edesc->pset[j].param.link_bcntrld);
+		dev_vdbg(dev,
+			 "\n pset[%d]:\n"
+			 "  chnum\t%d\n"
+			 "  slot\t%d\n"
+			 "  opt\t%08x\n"
+			 "  src\t%08x\n"
+			 "  dst\t%08x\n"
+			 "  abcnt\t%08x\n"
+			 "  ccnt\t%08x\n"
+			 "  bidx\t%08x\n"
+			 "  cidx\t%08x\n"
+			 "  lkrld\t%08x\n",
+			 j, echan->ch_num, echan->slot[i],
+			 edesc->pset[j].param.opt,
+			 edesc->pset[j].param.src,
+			 edesc->pset[j].param.dst,
+			 edesc->pset[j].param.a_b_cnt,
+			 edesc->pset[j].param.ccnt,
+			 edesc->pset[j].param.src_dst_bidx,
+			 edesc->pset[j].param.src_dst_cidx,
+			 edesc->pset[j].param.link_bcntrld);
 		/* Link to the previous slot if not the last set */
 		if (i != (nslots - 1))
-			edma_link(echan->slot[i], echan->slot[i+1]);
+			edma_link(ecc, echan->slot[i], echan->slot[i + 1]);
 	}
 
 	edesc->processed += nslots;
@@ -214,34 +804,32 @@ static void edma_execute(struct edma_chan *echan)
 	 */
 	if (edesc->processed == edesc->pset_nr) {
 		if (edesc->cyclic)
-			edma_link(echan->slot[nslots-1], echan->slot[1]);
+			edma_link(ecc, echan->slot[nslots - 1], echan->slot[1]);
 		else
-			edma_link(echan->slot[nslots-1],
+			edma_link(ecc, echan->slot[nslots - 1],
 				  echan->ecc->dummy_slot);
 	}
 
-	if (edesc->processed <= MAX_NR_SG) {
+	if (echan->missed) {
+		/*
+		 * This happens due to setup times between intermediate
+		 * transfers in long SG lists which have to be broken up into
+		 * transfers of MAX_NR_SG
+		 */
+		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
+		edma_clean_channel(echan);
+		edma_stop(echan);
+		edma_start(echan);
+		edma_trigger_channel(echan);
+		echan->missed = 0;
+	} else if (edesc->processed <= MAX_NR_SG) {
 		dev_dbg(dev, "first transfer starting on channel %d\n",
 			echan->ch_num);
-		edma_start(echan->ch_num);
+		edma_start(echan);
 	} else {
 		dev_dbg(dev, "chan: %d: completed %d elements, resuming\n",
 			echan->ch_num, edesc->processed);
-		edma_resume(echan->ch_num);
-	}
-
-	/*
-	 * This happens due to setup times between intermediate transfers
-	 * in long SG lists which have to be broken up into transfers of
-	 * MAX_NR_SG
-	 */
-	if (echan->missed) {
-		dev_dbg(dev, "missed event on channel %d\n", echan->ch_num);
-		edma_clean_channel(echan->ch_num);
-		edma_stop(echan->ch_num);
-		edma_start(echan->ch_num);
-		edma_trigger_channel(echan->ch_num);
-		echan->missed = 0;
+		edma_resume(echan);
 	}
 }
 
@@ -259,20 +847,16 @@ static int edma_terminate_all(struct dma_chan *chan)
 	 * echan->edesc is NULL and exit.)
 	 */
 	if (echan->edesc) {
-		int cyclic = echan->edesc->cyclic;
-
+		edma_stop(echan);
+		/* Move the cyclic channel back to default queue */
+		if (!echan->tc && echan->edesc->cyclic)
+			edma_assign_channel_eventq(echan, EVENTQ_DEFAULT);
 		/*
 		 * free the running request descriptor
 		 * since it is not in any of the vdesc lists
 		 */
 		edma_desc_free(&echan->edesc->vdesc);
-
 		echan->edesc = NULL;
-		edma_stop(echan->ch_num);
-		/* Move the cyclic channel back to default queue */
-		if (cyclic)
-			edma_assign_channel_eventq(echan->ch_num,
-						   EVENTQ_DEFAULT);
 	}
 
 	vchan_get_all_descriptors(&echan->vchan, &head);
@@ -300,11 +884,10 @@ static int edma_dma_pause(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 
-	/* Pause/Resume only allowed with cyclic mode */
-	if (!echan->edesc || !echan->edesc->cyclic)
+	if (!echan->edesc)
 		return -EINVAL;
 
-	edma_pause(echan->ch_num);
+	edma_pause(echan);
 	return 0;
 }
 
@@ -312,11 +895,7 @@ static int edma_dma_resume(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 
-	/* Pause/Resume only allowed with cyclic mode */
-	if (!echan->edesc->cyclic)
-		return -EINVAL;
-
-	edma_resume(echan->ch_num);
+	edma_resume(echan);
 	return 0;
 }
 
@@ -332,19 +911,17 @@ static int edma_dma_resume(struct dma_chan *chan)
  * @direction: Direction of the transfer
  */
 static int edma_config_pset(struct dma_chan *chan, struct edma_pset *epset,
-	dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
-	enum dma_slave_buswidth dev_width, unsigned int dma_length,
-	enum dma_transfer_direction direction)
+			    dma_addr_t src_addr, dma_addr_t dst_addr, u32 burst,
+			    unsigned int acnt, unsigned int dma_length,
+			    enum dma_transfer_direction direction)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
 	struct device *dev = chan->device->dev;
 	struct edmacc_param *param = &epset->param;
-	int acnt, bcnt, ccnt, cidx;
+	int bcnt, ccnt, cidx;
 	int src_bidx, dst_bidx, src_cidx, dst_cidx;
 	int absync;
 
-	acnt = dev_width;
-
 	/* src/dst_maxburst == 0 is the same case as src/dst_maxburst == 1 */
 	if (!burst)
 		burst = 1;
@@ -480,8 +1057,8 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 		return NULL;
 	}
 
-	edesc = kzalloc(sizeof(*edesc) + sg_len *
-		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	edesc = kzalloc(sizeof(*edesc) + sg_len * sizeof(edesc->pset[0]),
+			GFP_ATOMIC);
 	if (!edesc) {
 		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
 		return NULL;
@@ -498,8 +1075,7 @@ static struct dma_async_tx_descriptor *edma_prep_slave_sg(
 	for (i = 0; i < nslots; i++) {
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
-						EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -546,36 +1122,98 @@ static struct dma_async_tx_descriptor *edma_prep_dma_memcpy(
 	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	size_t len, unsigned long tx_flags)
 {
-	int ret;
+	int ret, nslots;
 	struct edma_desc *edesc;
 	struct device *dev = chan->device->dev;
 	struct edma_chan *echan = to_edma_chan(chan);
+	unsigned int width, pset_len;
 
 	if (unlikely(!echan || !len))
 		return NULL;
 
-	edesc = kzalloc(sizeof(*edesc) + sizeof(edesc->pset[0]), GFP_ATOMIC);
+	if (len < SZ_64K) {
+		/*
+		 * Transfer size less than 64K can be handled with one paRAM
+		 * slot and with one burst.
+		 * ACNT = length
+		 */
+		width = len;
+		pset_len = len;
+		nslots = 1;
+	} else {
+		/*
+		 * Transfer size bigger than 64K will be handled with maximum of
+		 * two paRAM slots.
+		 * slot1: (full_length / 32767) times 32767 bytes bursts.
+		 *	  ACNT = 32767, length1: (full_length / 32767) * 32767
+		 * slot2: the remaining amount of data after slot1.
+		 *	  ACNT = full_length - length1, length2 = ACNT
+		 *
+		 * When the full_length is multibple of 32767 one slot can be
+		 * used to complete the transfer.
+		 */
+		width = SZ_32K - 1;
+		pset_len = rounddown(len, width);
+		/* One slot is enough for lengths multiple of (SZ_32K -1) */
+		if (unlikely(pset_len == len))
+			nslots = 1;
+		else
+			nslots = 2;
+	}
+
+	edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]),
+			GFP_ATOMIC);
 	if (!edesc) {
 		dev_dbg(dev, "Failed to allocate a descriptor\n");
 		return NULL;
 	}
 
-	edesc->pset_nr = 1;
+	edesc->pset_nr = nslots;
+	edesc->residue = edesc->residue_stat = len;
+	edesc->direction = DMA_MEM_TO_MEM;
+	edesc->echan = echan;
 
 	ret = edma_config_pset(chan, &edesc->pset[0], src, dest, 1,
-			       DMA_SLAVE_BUSWIDTH_4_BYTES, len, DMA_MEM_TO_MEM);
-	if (ret < 0)
+			       width, pset_len, DMA_MEM_TO_MEM);
+	if (ret < 0) {
+		kfree(edesc);
 		return NULL;
+	}
 
 	edesc->absync = ret;
 
-	/*
-	 * Enable intermediate transfer chaining to re-trigger channel
-	 * on completion of every TR, and enable transfer-completion
-	 * interrupt on completion of the whole transfer.
-	 */
 	edesc->pset[0].param.opt |= ITCCHEN;
-	edesc->pset[0].param.opt |= TCINTEN;
+	if (nslots == 1) {
+		/* Enable transfer complete interrupt */
+		edesc->pset[0].param.opt |= TCINTEN;
+	} else {
+		/* Enable transfer complete chaining for the first slot */
+		edesc->pset[0].param.opt |= TCCHEN;
+
+		if (echan->slot[1] < 0) {
+			echan->slot[1] = edma_alloc_slot(echan->ecc,
+							 EDMA_SLOT_ANY);
+			if (echan->slot[1] < 0) {
+				kfree(edesc);
+				dev_err(dev, "%s: Failed to allocate slot\n",
+					__func__);
+				return NULL;
+			}
+		}
+		dest += pset_len;
+		src += pset_len;
+		pset_len = width = len % (SZ_32K - 1);
+
+		ret = edma_config_pset(chan, &edesc->pset[1], src, dest, 1,
+				       width, pset_len, DMA_MEM_TO_MEM);
+		if (ret < 0) {
+			kfree(edesc);
+			return NULL;
+		}
+
+		edesc->pset[1].param.opt |= ITCCHEN;
+		edesc->pset[1].param.opt |= TCINTEN;
+	}
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
@@ -634,8 +1272,8 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	if (nslots > MAX_NR_SG)
 		return NULL;
 
-	edesc = kzalloc(sizeof(*edesc) + nslots *
-		sizeof(edesc->pset[0]), GFP_ATOMIC);
+	edesc = kzalloc(sizeof(*edesc) + nslots * sizeof(edesc->pset[0]),
+			GFP_ATOMIC);
 	if (!edesc) {
 		dev_err(dev, "%s: Failed to allocate a descriptor\n", __func__);
 		return NULL;
@@ -654,8 +1292,7 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 		/* Allocate a PaRAM slot, if needed */
 		if (echan->slot[i] < 0) {
 			echan->slot[i] =
-				edma_alloc_slot(EDMA_CTLR(echan->ch_num),
-						EDMA_SLOT_ANY);
+				edma_alloc_slot(echan->ecc, EDMA_SLOT_ANY);
 			if (echan->slot[i] < 0) {
 				kfree(edesc);
 				dev_err(dev, "%s: Failed to allocate slot\n",
@@ -716,128 +1353,281 @@ static struct dma_async_tx_descriptor *edma_prep_dma_cyclic(
 	}
 
 	/* Place the cyclic channel to highest priority queue */
-	edma_assign_channel_eventq(echan->ch_num, EVENTQ_0);
+	if (!echan->tc)
+		edma_assign_channel_eventq(echan, EVENTQ_0);
 
 	return vchan_tx_prep(&echan->vchan, &edesc->vdesc, tx_flags);
 }
 
-static void edma_callback(unsigned ch_num, u16 ch_status, void *data)
+static void edma_completion_handler(struct edma_chan *echan)
 {
-	struct edma_chan *echan = data;
 	struct device *dev = echan->vchan.chan.device->dev;
-	struct edma_desc *edesc;
-	struct edmacc_param p;
+	struct edma_desc *edesc = echan->edesc;
 
-	edesc = echan->edesc;
+	if (!edesc)
+		return;
 
-	/* Pause the channel for non-cyclic */
-	if (!edesc || (edesc && !edesc->cyclic))
-		edma_pause(echan->ch_num);
-
-	switch (ch_status) {
-	case EDMA_DMA_COMPLETE:
-		spin_lock(&echan->vchan.lock);
-
-		if (edesc) {
-			if (edesc->cyclic) {
-				vchan_cyclic_callback(&edesc->vdesc);
-			} else if (edesc->processed == edesc->pset_nr) {
-				dev_dbg(dev, "Transfer complete, stopping channel %d\n", ch_num);
-				edesc->residue = 0;
-				edma_stop(echan->ch_num);
-				vchan_cookie_complete(&edesc->vdesc);
-				edma_execute(echan);
-			} else {
-				dev_dbg(dev, "Intermediate transfer complete on channel %d\n", ch_num);
-
-				/* Update statistics for tx_status */
-				edesc->residue -= edesc->sg_len;
-				edesc->residue_stat = edesc->residue;
-				edesc->processed_stat = edesc->processed;
-
-				edma_execute(echan);
-			}
+	spin_lock(&echan->vchan.lock);
+	if (edesc->cyclic) {
+		vchan_cyclic_callback(&edesc->vdesc);
+		spin_unlock(&echan->vchan.lock);
+		return;
+	} else if (edesc->processed == edesc->pset_nr) {
+		edesc->residue = 0;
+		edma_stop(echan);
+		vchan_cookie_complete(&edesc->vdesc);
+		echan->edesc = NULL;
+
+		dev_dbg(dev, "Transfer completed on channel %d\n",
+			echan->ch_num);
+	} else {
+		dev_dbg(dev, "Sub transfer completed on channel %d\n",
+			echan->ch_num);
+
+		edma_pause(echan);
+
+		/* Update statistics for tx_status */
+		edesc->residue -= edesc->sg_len;
+		edesc->residue_stat = edesc->residue;
+		edesc->processed_stat = edesc->processed;
+	}
+	edma_execute(echan);
+
+	spin_unlock(&echan->vchan.lock);
+}
+
+/* eDMA interrupt handler */
+static irqreturn_t dma_irq_handler(int irq, void *data)
+{
+	struct edma_cc *ecc = data;
+	int ctlr;
+	u32 sh_ier;
+	u32 sh_ipr;
+	u32 bank;
+
+	ctlr = ecc->id;
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_vdbg(ecc->dev, "dma_irq_handler\n");
+
+	sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 0);
+	if (!sh_ipr) {
+		sh_ipr = edma_shadow0_read_array(ecc, SH_IPR, 1);
+		if (!sh_ipr)
+			return IRQ_NONE;
+		sh_ier = edma_shadow0_read_array(ecc, SH_IER, 1);
+		bank = 1;
+	} else {
+		sh_ier = edma_shadow0_read_array(ecc, SH_IER, 0);
+		bank = 0;
+	}
+
+	do {
+		u32 slot;
+		u32 channel;
+
+		slot = __ffs(sh_ipr);
+		sh_ipr &= ~(BIT(slot));
+
+		if (sh_ier & BIT(slot)) {
+			channel = (bank << 5) | slot;
+			/* Clear the corresponding IPR bits */
+			edma_shadow0_write_array(ecc, SH_ICR, bank, BIT(slot));
+			edma_completion_handler(&ecc->slave_chans[channel]);
 		}
+	} while (sh_ipr);
 
-		spin_unlock(&echan->vchan.lock);
+	edma_shadow0_write(ecc, SH_IEVAL, 1);
+	return IRQ_HANDLED;
+}
+
+static void edma_error_handler(struct edma_chan *echan)
+{
+	struct edma_cc *ecc = echan->ecc;
+	struct device *dev = echan->vchan.chan.device->dev;
+	struct edmacc_param p;
 
-		break;
-	case EDMA_DMA_CC_ERROR:
-		spin_lock(&echan->vchan.lock);
+	if (!echan->edesc)
+		return;
 
-		edma_read_slot(EDMA_CHAN_SLOT(echan->slot[0]), &p);
+	spin_lock(&echan->vchan.lock);
 
+	edma_read_slot(ecc, echan->slot[0], &p);
+	/*
+	 * Issue later based on missed flag which will be sure
+	 * to happen as:
+	 * (1) we finished transmitting an intermediate slot and
+	 *     edma_execute is coming up.
+	 * (2) or we finished current transfer and issue will
+	 *     call edma_execute.
+	 *
+	 * Important note: issuing can be dangerous here and
+	 * lead to some nasty recursion when we are in a NULL
+	 * slot. So we avoid doing so and set the missed flag.
+	 */
+	if (p.a_b_cnt == 0 && p.ccnt == 0) {
+		dev_dbg(dev, "Error on null slot, setting miss\n");
+		echan->missed = 1;
+	} else {
 		/*
-		 * Issue later based on missed flag which will be sure
-		 * to happen as:
-		 * (1) we finished transmitting an intermediate slot and
-		 *     edma_execute is coming up.
-		 * (2) or we finished current transfer and issue will
-		 *     call edma_execute.
-		 *
-		 * Important note: issuing can be dangerous here and
-		 * lead to some nasty recursion when we are in a NULL
-		 * slot. So we avoid doing so and set the missed flag.
+		 * The slot is already programmed but the event got
+		 * missed, so its safe to issue it here.
 		 */
-		if (p.a_b_cnt == 0 && p.ccnt == 0) {
-			dev_dbg(dev, "Error occurred, looks like slot is null, just setting miss\n");
-			echan->missed = 1;
-		} else {
-			/*
-			 * The slot is already programmed but the event got
-			 * missed, so its safe to issue it here.
-			 */
-			dev_dbg(dev, "Error occurred but slot is non-null, TRIGGERING\n");
-			edma_clean_channel(echan->ch_num);
-			edma_stop(echan->ch_num);
-			edma_start(echan->ch_num);
-			edma_trigger_channel(echan->ch_num);
+		dev_dbg(dev, "Missed event, TRIGGERING\n");
+		edma_clean_channel(echan);
+		edma_stop(echan);
+		edma_start(echan);
+		edma_trigger_channel(echan);
+	}
+	spin_unlock(&echan->vchan.lock);
+}
+
+static inline bool edma_error_pending(struct edma_cc *ecc)
+{
+	if (edma_read_array(ecc, EDMA_EMR, 0) ||
+	    edma_read_array(ecc, EDMA_EMR, 1) ||
+	    edma_read(ecc, EDMA_QEMR) || edma_read(ecc, EDMA_CCERR))
+		return true;
+
+	return false;
+}
+
+/* eDMA error interrupt handler */
+static irqreturn_t dma_ccerr_handler(int irq, void *data)
+{
+	struct edma_cc *ecc = data;
+	int i, j;
+	int ctlr;
+	unsigned int cnt = 0;
+	unsigned int val;
+
+	ctlr = ecc->id;
+	if (ctlr < 0)
+		return IRQ_NONE;
+
+	dev_vdbg(ecc->dev, "dma_ccerr_handler\n");
+
+	if (!edma_error_pending(ecc))
+		return IRQ_NONE;
+
+	while (1) {
+		/* Event missed register(s) */
+		for (j = 0; j < 2; j++) {
+			unsigned long emr;
+
+			val = edma_read_array(ecc, EDMA_EMR, j);
+			if (!val)
+				continue;
+
+			dev_dbg(ecc->dev, "EMR%d 0x%08x\n", j, val);
+			emr = val;
+			for (i = find_next_bit(&emr, 32, 0); i < 32;
+			     i = find_next_bit(&emr, 32, i + 1)) {
+				int k = (j << 5) + i;
+
+				/* Clear the corresponding EMR bits */
+				edma_write_array(ecc, EDMA_EMCR, j, BIT(i));
+				/* Clear any SER */
+				edma_shadow0_write_array(ecc, SH_SECR, j,
+							 BIT(i));
+				edma_error_handler(&ecc->slave_chans[k]);
+			}
 		}
 
-		spin_unlock(&echan->vchan.lock);
+		val = edma_read(ecc, EDMA_QEMR);
+		if (val) {
+			dev_dbg(ecc->dev, "QEMR 0x%02x\n", val);
+			/* Not reported, just clear the interrupt reason. */
+			edma_write(ecc, EDMA_QEMCR, val);
+			edma_shadow0_write(ecc, SH_QSECR, val);
+		}
+
+		val = edma_read(ecc, EDMA_CCERR);
+		if (val) {
+			dev_warn(ecc->dev, "CCERR 0x%08x\n", val);
+			/* Not reported, just clear the interrupt reason. */
+			edma_write(ecc, EDMA_CCERRCLR, val);
+		}
 
-		break;
-	default:
-		break;
+		if (!edma_error_pending(ecc))
+			break;
+		cnt++;
+		if (cnt > 10)
+			break;
 	}
+	edma_write(ecc, EDMA_EEVAL, 1);
+	return IRQ_HANDLED;
+}
+
+static void edma_tc_set_pm_state(struct edma_tc *tc, bool enable)
+{
+	struct platform_device *tc_pdev;
+	int ret;
+
+	if (!IS_ENABLED(CONFIG_OF) || !tc)
+		return;
+
+	tc_pdev = of_find_device_by_node(tc->node);
+	if (!tc_pdev) {
+		pr_err("%s: TPTC device is not found\n", __func__);
+		return;
+	}
+	if (!pm_runtime_enabled(&tc_pdev->dev))
+		pm_runtime_enable(&tc_pdev->dev);
+
+	if (enable)
+		ret = pm_runtime_get_sync(&tc_pdev->dev);
+	else
+		ret = pm_runtime_put_sync(&tc_pdev->dev);
+
+	if (ret < 0)
+		pr_err("%s: pm_runtime_%s_sync() failed for %s\n", __func__,
+		       enable ? "get" : "put", dev_name(&tc_pdev->dev));
 }
 
 /* Alloc channel resources */
 static int edma_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
-	struct device *dev = chan->device->dev;
+	struct edma_cc *ecc = echan->ecc;
+	struct device *dev = ecc->dev;
+	enum dma_event_q eventq_no = EVENTQ_DEFAULT;
 	int ret;
-	int a_ch_num;
-	LIST_HEAD(descs);
-
-	a_ch_num = edma_alloc_channel(echan->ch_num, edma_callback,
-					echan, EVENTQ_DEFAULT);
 
-	if (a_ch_num < 0) {
-		ret = -ENODEV;
-		goto err_no_chan;
+	if (echan->tc) {
+		eventq_no = echan->tc->id;
+	} else if (ecc->tc_list) {
+		/* memcpy channel */
+		echan->tc = &ecc->tc_list[ecc->info->default_queue];
+		eventq_no = echan->tc->id;
 	}
 
-	if (a_ch_num != echan->ch_num) {
-		dev_err(dev, "failed to allocate requested channel %u:%u\n",
-			EDMA_CTLR(echan->ch_num),
+	ret = edma_alloc_channel(echan, eventq_no);
+	if (ret)
+		return ret;
+
+	echan->slot[0] = edma_alloc_slot(ecc, echan->ch_num);
+	if (echan->slot[0] < 0) {
+		dev_err(dev, "Entry slot allocation failed for channel %u\n",
 			EDMA_CHAN_SLOT(echan->ch_num));
-		ret = -ENODEV;
-		goto err_wrong_chan;
+		goto err_slot;
 	}
 
+	/* Set up channel -> slot mapping for the entry slot */
+	edma_set_chmap(echan, echan->slot[0]);
 	echan->alloced = true;
-	echan->slot[0] = echan->ch_num;
 
-	dev_dbg(dev, "allocated channel %d for %u:%u\n", echan->ch_num,
-		EDMA_CTLR(echan->ch_num), EDMA_CHAN_SLOT(echan->ch_num));
+	dev_dbg(dev, "Got eDMA channel %d for virt channel %d (%s trigger)\n",
+		EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id,
+		echan->hw_triggered ? "HW" : "SW");
+
+	edma_tc_set_pm_state(echan->tc, true);
 
 	return 0;
 
-err_wrong_chan:
-	edma_free_channel(a_ch_num);
-err_no_chan:
+err_slot:
+	edma_free_channel(echan);
 	return ret;
 }
 
@@ -845,29 +1635,37 @@ err_no_chan:
 static void edma_free_chan_resources(struct dma_chan *chan)
 {
 	struct edma_chan *echan = to_edma_chan(chan);
-	struct device *dev = chan->device->dev;
+	struct device *dev = echan->ecc->dev;
 	int i;
 
 	/* Terminate transfers */
-	edma_stop(echan->ch_num);
+	edma_stop(echan);
 
 	vchan_free_chan_resources(&echan->vchan);
 
 	/* Free EDMA PaRAM slots */
-	for (i = 1; i < EDMA_MAX_SLOTS; i++) {
+	for (i = 0; i < EDMA_MAX_SLOTS; i++) {
 		if (echan->slot[i] >= 0) {
-			edma_free_slot(echan->slot[i]);
+			edma_free_slot(echan->ecc, echan->slot[i]);
 			echan->slot[i] = -1;
 		}
 	}
 
+	/* Set entry slot to the dummy slot */
+	edma_set_chmap(echan, echan->ecc->dummy_slot);
+
 	/* Free EDMA channel */
 	if (echan->alloced) {
-		edma_free_channel(echan->ch_num);
+		edma_free_channel(echan);
 		echan->alloced = false;
 	}
 
-	dev_dbg(dev, "freeing channel for %u\n", echan->ch_num);
+	edma_tc_set_pm_state(echan->tc, false);
+	echan->tc = NULL;
+	echan->hw_triggered = false;
+
+	dev_dbg(dev, "Free eDMA channel %d for virt channel %d\n",
+		EDMA_CHAN_SLOT(echan->ch_num), chan->chan_id);
 }
 
 /* Send pending descriptor to hardware */
@@ -893,7 +1691,7 @@ static u32 edma_residue(struct edma_desc *edesc)
 	 * We always read the dst/src position from the first RamPar
 	 * pset. That's the one which is active now.
 	 */
-	pos = edma_get_position(edesc->echan->slot[0], dst);
+	pos = edma_get_position(edesc->echan->ecc, edesc->echan->slot[0], dst);
 
 	/*
 	 * Cyclic is simple. Just subtract pset[0].addr from pos.
@@ -954,19 +1752,99 @@ static enum dma_status edma_tx_status(struct dma_chan *chan,
 	return ret;
 }
 
-static void __init edma_chan_init(struct edma_cc *ecc,
-				  struct dma_device *dma,
-				  struct edma_chan *echans)
+static bool edma_is_memcpy_channel(int ch_num, s32 *memcpy_channels)
+{
+	if (!memcpy_channels)
+		return false;
+	while (*memcpy_channels != -1) {
+		if (*memcpy_channels == ch_num)
+			return true;
+		memcpy_channels++;
+	}
+	return false;
+}
+
+#define EDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
+static void edma_dma_init(struct edma_cc *ecc, bool legacy_mode)
 {
+	struct dma_device *s_ddev = &ecc->dma_slave;
+	struct dma_device *m_ddev = NULL;
+	s32 *memcpy_channels = ecc->info->memcpy_channels;
 	int i, j;
 
-	for (i = 0; i < EDMA_CHANS; i++) {
-		struct edma_chan *echan = &echans[i];
-		echan->ch_num = EDMA_CTLR_CHAN(ecc->ctlr, i);
+	dma_cap_zero(s_ddev->cap_mask);
+	dma_cap_set(DMA_SLAVE, s_ddev->cap_mask);
+	dma_cap_set(DMA_CYCLIC, s_ddev->cap_mask);
+	if (ecc->legacy_mode && !memcpy_channels) {
+		dev_warn(ecc->dev,
+			 "Legacy memcpy is enabled, things might not work\n");
+
+		dma_cap_set(DMA_MEMCPY, s_ddev->cap_mask);
+		s_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+		s_ddev->directions = BIT(DMA_MEM_TO_MEM);
+	}
+
+	s_ddev->device_prep_slave_sg = edma_prep_slave_sg;
+	s_ddev->device_prep_dma_cyclic = edma_prep_dma_cyclic;
+	s_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+	s_ddev->device_free_chan_resources = edma_free_chan_resources;
+	s_ddev->device_issue_pending = edma_issue_pending;
+	s_ddev->device_tx_status = edma_tx_status;
+	s_ddev->device_config = edma_slave_config;
+	s_ddev->device_pause = edma_dma_pause;
+	s_ddev->device_resume = edma_dma_resume;
+	s_ddev->device_terminate_all = edma_terminate_all;
+
+	s_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+	s_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+	s_ddev->directions |= (BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV));
+	s_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	s_ddev->dev = ecc->dev;
+	INIT_LIST_HEAD(&s_ddev->channels);
+
+	if (memcpy_channels) {
+		m_ddev = devm_kzalloc(ecc->dev, sizeof(*m_ddev), GFP_KERNEL);
+		ecc->dma_memcpy = m_ddev;
+
+		dma_cap_zero(m_ddev->cap_mask);
+		dma_cap_set(DMA_MEMCPY, m_ddev->cap_mask);
+
+		m_ddev->device_prep_dma_memcpy = edma_prep_dma_memcpy;
+		m_ddev->device_alloc_chan_resources = edma_alloc_chan_resources;
+		m_ddev->device_free_chan_resources = edma_free_chan_resources;
+		m_ddev->device_issue_pending = edma_issue_pending;
+		m_ddev->device_tx_status = edma_tx_status;
+		m_ddev->device_config = edma_slave_config;
+		m_ddev->device_pause = edma_dma_pause;
+		m_ddev->device_resume = edma_dma_resume;
+		m_ddev->device_terminate_all = edma_terminate_all;
+
+		m_ddev->src_addr_widths = EDMA_DMA_BUSWIDTHS;
+		m_ddev->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
+		m_ddev->directions = BIT(DMA_MEM_TO_MEM);
+		m_ddev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+		m_ddev->dev = ecc->dev;
+		INIT_LIST_HEAD(&m_ddev->channels);
+	} else if (!ecc->legacy_mode) {
+		dev_info(ecc->dev, "memcpy is disabled\n");
+	}
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		struct edma_chan *echan = &ecc->slave_chans[i];
+		echan->ch_num = EDMA_CTLR_CHAN(ecc->id, i);
 		echan->ecc = ecc;
 		echan->vchan.desc_free = edma_desc_free;
 
-		vchan_init(&echan->vchan, dma);
+		if (m_ddev && edma_is_memcpy_channel(i, memcpy_channels))
+			vchan_init(&echan->vchan, m_ddev);
+		else
+			vchan_init(&echan->vchan, s_ddev);
 
 		INIT_LIST_HEAD(&echan->node);
 		for (j = 0; j < EDMA_MAX_SLOTS; j++)
@@ -974,85 +1852,493 @@ static void __init edma_chan_init(struct edma_cc *ecc,
 	}
 }
 
-#define EDMA_DMA_BUSWIDTHS	(BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
-				 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
-				 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
-				 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
-
-static void edma_dma_init(struct edma_cc *ecc, struct dma_device *dma,
-			  struct device *dev)
+static int edma_setup_from_hw(struct device *dev, struct edma_soc_info *pdata,
+			      struct edma_cc *ecc)
 {
-	dma->device_prep_slave_sg = edma_prep_slave_sg;
-	dma->device_prep_dma_cyclic = edma_prep_dma_cyclic;
-	dma->device_prep_dma_memcpy = edma_prep_dma_memcpy;
-	dma->device_alloc_chan_resources = edma_alloc_chan_resources;
-	dma->device_free_chan_resources = edma_free_chan_resources;
-	dma->device_issue_pending = edma_issue_pending;
-	dma->device_tx_status = edma_tx_status;
-	dma->device_config = edma_slave_config;
-	dma->device_pause = edma_dma_pause;
-	dma->device_resume = edma_dma_resume;
-	dma->device_terminate_all = edma_terminate_all;
+	int i;
+	u32 value, cccfg;
+	s8 (*queue_priority_map)[2];
+
+	/* Decode the eDMA3 configuration from CCCFG register */
+	cccfg = edma_read(ecc, EDMA_CCCFG);
 
-	dma->src_addr_widths = EDMA_DMA_BUSWIDTHS;
-	dma->dst_addr_widths = EDMA_DMA_BUSWIDTHS;
-	dma->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-	dma->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+	value = GET_NUM_REGN(cccfg);
+	ecc->num_region = BIT(value);
 
-	dma->dev = dev;
+	value = GET_NUM_DMACH(cccfg);
+	ecc->num_channels = BIT(value + 1);
+
+	value = GET_NUM_QDMACH(cccfg);
+	ecc->num_qchannels = value * 2;
+
+	value = GET_NUM_PAENTRY(cccfg);
+	ecc->num_slots = BIT(value + 4);
+
+	value = GET_NUM_EVQUE(cccfg);
+	ecc->num_tc = value + 1;
+
+	ecc->chmap_exist = (cccfg & CHMAP_EXIST) ? true : false;
+
+	dev_dbg(dev, "eDMA3 CC HW configuration (cccfg: 0x%08x):\n", cccfg);
+	dev_dbg(dev, "num_region: %u\n", ecc->num_region);
+	dev_dbg(dev, "num_channels: %u\n", ecc->num_channels);
+	dev_dbg(dev, "num_qchannels: %u\n", ecc->num_qchannels);
+	dev_dbg(dev, "num_slots: %u\n", ecc->num_slots);
+	dev_dbg(dev, "num_tc: %u\n", ecc->num_tc);
+	dev_dbg(dev, "chmap_exist: %s\n", ecc->chmap_exist ? "yes" : "no");
+
+	/* Nothing need to be done if queue priority is provided */
+	if (pdata->queue_priority_mapping)
+		return 0;
 
 	/*
-	 * code using dma memcpy must make sure alignment of
-	 * length is at dma->copy_align boundary.
+	 * Configure TC/queue priority as follows:
+	 * Q0 - priority 0
+	 * Q1 - priority 1
+	 * Q2 - priority 2
+	 * ...
+	 * The meaning of priority numbers: 0 highest priority, 7 lowest
+	 * priority. So Q0 is the highest priority queue and the last queue has
+	 * the lowest priority.
 	 */
-	dma->copy_align = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	queue_priority_map = devm_kcalloc(dev, ecc->num_tc + 1, sizeof(s8),
+					  GFP_KERNEL);
+	if (!queue_priority_map)
+		return -ENOMEM;
+
+	for (i = 0; i < ecc->num_tc; i++) {
+		queue_priority_map[i][0] = i;
+		queue_priority_map[i][1] = i;
+	}
+	queue_priority_map[i][0] = -1;
+	queue_priority_map[i][1] = -1;
+
+	pdata->queue_priority_mapping = queue_priority_map;
+	/* Default queue has the lowest priority */
+	pdata->default_queue = i - 1;
 
-	INIT_LIST_HEAD(&dma->channels);
+	return 0;
 }
 
+#if IS_ENABLED(CONFIG_OF)
+static int edma_xbar_event_map(struct device *dev, struct edma_soc_info *pdata,
+			       size_t sz)
+{
+	const char pname[] = "ti,edma-xbar-event-map";
+	struct resource res;
+	void __iomem *xbar;
+	s16 (*xbar_chans)[2];
+	size_t nelm = sz / sizeof(s16);
+	u32 shift, offset, mux;
+	int ret, i;
+
+	xbar_chans = devm_kcalloc(dev, nelm + 2, sizeof(s16), GFP_KERNEL);
+	if (!xbar_chans)
+		return -ENOMEM;
+
+	ret = of_address_to_resource(dev->of_node, 1, &res);
+	if (ret)
+		return -ENOMEM;
+
+	xbar = devm_ioremap(dev, res.start, resource_size(&res));
+	if (!xbar)
+		return -ENOMEM;
+
+	ret = of_property_read_u16_array(dev->of_node, pname, (u16 *)xbar_chans,
+					 nelm);
+	if (ret)
+		return -EIO;
+
+	/* Invalidate last entry for the other user of this mess */
+	nelm >>= 1;
+	xbar_chans[nelm][0] = -1;
+	xbar_chans[nelm][1] = -1;
+
+	for (i = 0; i < nelm; i++) {
+		shift = (xbar_chans[i][1] & 0x03) << 3;
+		offset = xbar_chans[i][1] & 0xfffffffc;
+		mux = readl(xbar + offset);
+		mux &= ~(0xff << shift);
+		mux |= xbar_chans[i][0] << shift;
+		writel(mux, (xbar + offset));
+	}
+
+	pdata->xbar_chans = (const s16 (*)[2]) xbar_chans;
+	return 0;
+}
+
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						     bool legacy_mode)
+{
+	struct edma_soc_info *info;
+	struct property *prop;
+	size_t sz;
+	int ret;
+
+	info = devm_kzalloc(dev, sizeof(struct edma_soc_info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	if (legacy_mode) {
+		prop = of_find_property(dev->of_node, "ti,edma-xbar-event-map",
+					&sz);
+		if (prop) {
+			ret = edma_xbar_event_map(dev, info, sz);
+			if (ret)
+				return ERR_PTR(ret);
+		}
+		return info;
+	}
+
+	/* Get the list of channels allocated to be used for memcpy */
+	prop = of_find_property(dev->of_node, "ti,edma-memcpy-channels", &sz);
+	if (prop) {
+		const char pname[] = "ti,edma-memcpy-channels";
+		size_t nelm = sz / sizeof(s32);
+		s32 *memcpy_ch;
+
+		memcpy_ch = devm_kcalloc(dev, nelm + 1, sizeof(s32),
+					 GFP_KERNEL);
+		if (!memcpy_ch)
+			return ERR_PTR(-ENOMEM);
+
+		ret = of_property_read_u32_array(dev->of_node, pname,
+						 (u32 *)memcpy_ch, nelm);
+		if (ret)
+			return ERR_PTR(ret);
+
+		memcpy_ch[nelm] = -1;
+		info->memcpy_channels = memcpy_ch;
+	}
+
+	prop = of_find_property(dev->of_node, "ti,edma-reserved-slot-ranges",
+				&sz);
+	if (prop) {
+		const char pname[] = "ti,edma-reserved-slot-ranges";
+		u32 (*tmp)[2];
+		s16 (*rsv_slots)[2];
+		size_t nelm = sz / sizeof(*tmp);
+		struct edma_rsv_info *rsv_info;
+		int i;
+
+		if (!nelm)
+			return info;
+
+		tmp = kcalloc(nelm, sizeof(*tmp), GFP_KERNEL);
+		if (!tmp)
+			return ERR_PTR(-ENOMEM);
+
+		rsv_info = devm_kzalloc(dev, sizeof(*rsv_info), GFP_KERNEL);
+		if (!rsv_info) {
+			kfree(tmp);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		rsv_slots = devm_kcalloc(dev, nelm + 1, sizeof(*rsv_slots),
+					 GFP_KERNEL);
+		if (!rsv_slots) {
+			kfree(tmp);
+			return ERR_PTR(-ENOMEM);
+		}
+
+		ret = of_property_read_u32_array(dev->of_node, pname,
+						 (u32 *)tmp, nelm * 2);
+		if (ret) {
+			kfree(tmp);
+			return ERR_PTR(ret);
+		}
+
+		for (i = 0; i < nelm; i++) {
+			rsv_slots[i][0] = tmp[i][0];
+			rsv_slots[i][1] = tmp[i][1];
+		}
+		rsv_slots[nelm][0] = -1;
+		rsv_slots[nelm][1] = -1;
+
+		info->rsv = rsv_info;
+		info->rsv->rsv_slots = (const s16 (*)[2])rsv_slots;
+
+		kfree(tmp);
+	}
+
+	return info;
+}
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	struct edma_cc *ecc = ofdma->of_dma_data;
+	struct dma_chan *chan = NULL;
+	struct edma_chan *echan;
+	int i;
+
+	if (!ecc || dma_spec->args_count < 1)
+		return NULL;
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		echan = &ecc->slave_chans[i];
+		if (echan->ch_num == dma_spec->args[0]) {
+			chan = &echan->vchan.chan;
+			break;
+		}
+	}
+
+	if (!chan)
+		return NULL;
+
+	if (echan->ecc->legacy_mode && dma_spec->args_count == 1)
+		goto out;
+
+	if (!echan->ecc->legacy_mode && dma_spec->args_count == 2 &&
+	    dma_spec->args[1] < echan->ecc->num_tc) {
+		echan->tc = &echan->ecc->tc_list[dma_spec->args[1]];
+		goto out;
+	}
+
+	return NULL;
+out:
+	/* The channel is going to be used as HW synchronized */
+	echan->hw_triggered = true;
+	return dma_get_slave_channel(chan);
+}
+#else
+static struct edma_soc_info *edma_setup_info_from_dt(struct device *dev,
+						     bool legacy_mode)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static struct dma_chan *of_edma_xlate(struct of_phandle_args *dma_spec,
+				      struct of_dma *ofdma)
+{
+	return NULL;
+}
+#endif
+
 static int edma_probe(struct platform_device *pdev)
 {
-	struct edma_cc *ecc;
+	struct edma_soc_info	*info = pdev->dev.platform_data;
+	s8			(*queue_priority_mapping)[2];
+	int			i, off, ln;
+	const s16		(*rsv_slots)[2];
+	const s16		(*xbar_chans)[2];
+	int			irq;
+	char			*irq_name;
+	struct resource		*mem;
+	struct device_node	*node = pdev->dev.of_node;
+	struct device		*dev = &pdev->dev;
+	struct edma_cc		*ecc;
+	bool			legacy_mode = true;
 	int ret;
 
-	ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	if (node) {
+		const struct of_device_id *match;
+
+		match = of_match_node(edma_of_ids, node);
+		if (match && (u32)match->data == EDMA_BINDING_TPCC)
+			legacy_mode = false;
+
+		info = edma_setup_info_from_dt(dev, legacy_mode);
+		if (IS_ERR(info)) {
+			dev_err(dev, "failed to get DT data\n");
+			return PTR_ERR(info);
+		}
+	}
+
+	if (!info)
+		return -ENODEV;
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (ret < 0) {
+		dev_err(dev, "pm_runtime_get_sync() failed\n");
+		return ret;
+	}
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
 
-	ecc = devm_kzalloc(&pdev->dev, sizeof(*ecc), GFP_KERNEL);
+	ecc = devm_kzalloc(dev, sizeof(*ecc), GFP_KERNEL);
 	if (!ecc) {
-		dev_err(&pdev->dev, "Can't allocate controller\n");
+		dev_err(dev, "Can't allocate controller\n");
 		return -ENOMEM;
 	}
 
-	ecc->ctlr = pdev->id;
-	ecc->dummy_slot = edma_alloc_slot(ecc->ctlr, EDMA_SLOT_ANY);
+	ecc->dev = dev;
+	ecc->id = pdev->id;
+	ecc->legacy_mode = legacy_mode;
+	/* When booting with DT the pdev->id is -1 */
+	if (ecc->id < 0)
+		ecc->id = 0;
+
+	mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "edma3_cc");
+	if (!mem) {
+		dev_dbg(dev, "mem resource not found, using index 0\n");
+		mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!mem) {
+			dev_err(dev, "no mem resource?\n");
+			return -ENODEV;
+		}
+	}
+	ecc->base = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(ecc->base))
+		return PTR_ERR(ecc->base);
+
+	platform_set_drvdata(pdev, ecc);
+
+	/* Get eDMA3 configuration from IP */
+	ret = edma_setup_from_hw(dev, info, ecc);
+	if (ret)
+		return ret;
+
+	/* Allocate memory based on the information we got from the IP */
+	ecc->slave_chans = devm_kcalloc(dev, ecc->num_channels,
+					sizeof(*ecc->slave_chans), GFP_KERNEL);
+	if (!ecc->slave_chans)
+		return -ENOMEM;
+
+	ecc->slot_inuse = devm_kcalloc(dev, BITS_TO_LONGS(ecc->num_slots),
+				       sizeof(unsigned long), GFP_KERNEL);
+	if (!ecc->slot_inuse)
+		return -ENOMEM;
+
+	ecc->default_queue = info->default_queue;
+
+	for (i = 0; i < ecc->num_slots; i++)
+		edma_write_slot(ecc, i, &dummy_paramset);
+
+	if (info->rsv) {
+		/* Set the reserved slots in inuse list */
+		rsv_slots = info->rsv->rsv_slots;
+		if (rsv_slots) {
+			for (i = 0; rsv_slots[i][0] != -1; i++) {
+				off = rsv_slots[i][0];
+				ln = rsv_slots[i][1];
+				set_bits(off, ln, ecc->slot_inuse);
+			}
+		}
+	}
+
+	/* Clear the xbar mapped channels in unused list */
+	xbar_chans = info->xbar_chans;
+	if (xbar_chans) {
+		for (i = 0; xbar_chans[i][1] != -1; i++) {
+			off = xbar_chans[i][1];
+		}
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma3_ccint");
+	if (irq < 0 && node)
+		irq = irq_of_parse_and_map(node, 0);
+
+	if (irq >= 0) {
+		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccint",
+					  dev_name(dev));
+		ret = devm_request_irq(dev, irq, dma_irq_handler, 0, irq_name,
+				       ecc);
+		if (ret) {
+			dev_err(dev, "CCINT (%d) failed --> %d\n", irq, ret);
+			return ret;
+		}
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma3_ccerrint");
+	if (irq < 0 && node)
+		irq = irq_of_parse_and_map(node, 2);
+
+	if (irq >= 0) {
+		irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_ccerrint",
+					  dev_name(dev));
+		ret = devm_request_irq(dev, irq, dma_ccerr_handler, 0, irq_name,
+				       ecc);
+		if (ret) {
+			dev_err(dev, "CCERRINT (%d) failed --> %d\n", irq, ret);
+			return ret;
+		}
+	}
+
+	ecc->dummy_slot = edma_alloc_slot(ecc, EDMA_SLOT_ANY);
 	if (ecc->dummy_slot < 0) {
-		dev_err(&pdev->dev, "Can't allocate PaRAM dummy slot\n");
+		dev_err(dev, "Can't allocate PaRAM dummy slot\n");
 		return ecc->dummy_slot;
 	}
 
-	dma_cap_zero(ecc->dma_slave.cap_mask);
-	dma_cap_set(DMA_SLAVE, ecc->dma_slave.cap_mask);
-	dma_cap_set(DMA_CYCLIC, ecc->dma_slave.cap_mask);
-	dma_cap_set(DMA_MEMCPY, ecc->dma_slave.cap_mask);
+	queue_priority_mapping = info->queue_priority_mapping;
+
+	if (!ecc->legacy_mode) {
+		int lowest_priority = 0;
+		struct of_phandle_args tc_args;
+
+		ecc->tc_list = devm_kcalloc(dev, ecc->num_tc,
+					    sizeof(*ecc->tc_list), GFP_KERNEL);
+		if (!ecc->tc_list)
+			return -ENOMEM;
+
+		for (i = 0;; i++) {
+			ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs",
+							       1, i, &tc_args);
+			if (ret || i == ecc->num_tc)
+				break;
+
+			ecc->tc_list[i].node = tc_args.np;
+			ecc->tc_list[i].id = i;
+			queue_priority_mapping[i][1] = tc_args.args[0];
+			if (queue_priority_mapping[i][1] > lowest_priority) {
+				lowest_priority = queue_priority_mapping[i][1];
+				info->default_queue = i;
+			}
+		}
+	}
+
+	/* Event queue priority mapping */
+	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+					      queue_priority_mapping[i][1]);
 
-	edma_dma_init(ecc, &ecc->dma_slave, &pdev->dev);
+	for (i = 0; i < ecc->num_region; i++) {
+		edma_write_array2(ecc, EDMA_DRAE, i, 0, 0x0);
+		edma_write_array2(ecc, EDMA_DRAE, i, 1, 0x0);
+		edma_write_array(ecc, EDMA_QRAE, i, 0x0);
+	}
+	ecc->info = info;
 
-	edma_chan_init(ecc, &ecc->dma_slave, ecc->slave_chans);
+	/* Init the dma device and channels */
+	edma_dma_init(ecc, legacy_mode);
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		/* Assign all channels to the default queue */
+		edma_assign_channel_eventq(&ecc->slave_chans[i],
+					   info->default_queue);
+		/* Set entry slot to the dummy slot */
+		edma_set_chmap(&ecc->slave_chans[i], ecc->dummy_slot);
+	}
 
 	ret = dma_async_device_register(&ecc->dma_slave);
-	if (ret)
+	if (ret) {
+		dev_err(dev, "slave ddev registration failed (%d)\n", ret);
 		goto err_reg1;
+	}
 
-	platform_set_drvdata(pdev, ecc);
+	if (ecc->dma_memcpy) {
+		ret = dma_async_device_register(ecc->dma_memcpy);
+		if (ret) {
+			dev_err(dev, "memcpy ddev registration failed (%d)\n",
+				ret);
+			dma_async_device_unregister(&ecc->dma_slave);
+			goto err_reg1;
+		}
+	}
 
-	dev_info(&pdev->dev, "TI EDMA DMA engine driver\n");
+	if (node)
+		of_dma_controller_register(node, of_edma_xlate, ecc);
+
+	dev_info(dev, "TI EDMA DMA engine driver\n");
 
 	return 0;
 
 err_reg1:
-	edma_free_slot(ecc->dummy_slot);
+	edma_free_slot(ecc, ecc->dummy_slot);
 	return ret;
 }
 
@@ -1061,33 +2347,112 @@ static int edma_remove(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct edma_cc *ecc = dev_get_drvdata(dev);
 
+	if (dev->of_node)
+		of_dma_controller_free(dev->of_node);
 	dma_async_device_unregister(&ecc->dma_slave);
-	edma_free_slot(ecc->dummy_slot);
+	if (ecc->dma_memcpy)
+		dma_async_device_unregister(ecc->dma_memcpy);
+	edma_free_slot(ecc, ecc->dummy_slot);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int edma_pm_suspend(struct device *dev)
+{
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+	struct edma_chan *echan = ecc->slave_chans;
+	int i;
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		if (echan[i].alloced) {
+			edma_setup_interrupt(&echan[i], false);
+			edma_tc_set_pm_state(echan[i].tc, false);
+		}
+	}
 
 	return 0;
 }
 
+static int edma_pm_resume(struct device *dev)
+{
+	struct edma_cc *ecc = dev_get_drvdata(dev);
+	struct edma_chan *echan = ecc->slave_chans;
+	int i;
+	s8 (*queue_priority_mapping)[2];
+
+	queue_priority_mapping = ecc->info->queue_priority_mapping;
+
+	/* Event queue priority mapping */
+	for (i = 0; queue_priority_mapping[i][0] != -1; i++)
+		edma_assign_priority_to_queue(ecc, queue_priority_mapping[i][0],
+					      queue_priority_mapping[i][1]);
+
+	for (i = 0; i < ecc->num_channels; i++) {
+		if (echan[i].alloced) {
+			/* ensure access through shadow region 0 */
+			edma_or_array2(ecc, EDMA_DRAE, 0, i >> 5,
+				       BIT(i & 0x1f));
+
+			edma_setup_interrupt(&echan[i], true);
+
+			/* Set up channel -> slot mapping for the entry slot */
+			edma_set_chmap(&echan[i], echan[i].slot[0]);
+
+			edma_tc_set_pm_state(echan[i].tc, true);
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops edma_pm_ops = {
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(edma_pm_suspend, edma_pm_resume)
+};
+
 static struct platform_driver edma_driver = {
 	.probe		= edma_probe,
 	.remove		= edma_remove,
 	.driver = {
-		.name = "edma-dma-engine",
+		.name	= "edma",
+		.pm	= &edma_pm_ops,
+		.of_match_table = edma_of_ids,
+	},
+};
+
+static struct platform_driver edma_tptc_driver = {
+	.driver = {
+		.name	= "edma3-tptc",
+		.of_match_table = edma_tptc_of_ids,
 	},
 };
 
 bool edma_filter_fn(struct dma_chan *chan, void *param)
 {
+	bool match = false;
+
 	if (chan->device->dev->driver == &edma_driver.driver) {
 		struct edma_chan *echan = to_edma_chan(chan);
 		unsigned ch_req = *(unsigned *)param;
-		return ch_req == echan->ch_num;
+		if (ch_req == echan->ch_num) {
+			/* The channel is going to be used as HW synchronized */
+			echan->hw_triggered = true;
+			match = true;
+		}
 	}
-	return false;
+	return match;
 }
 EXPORT_SYMBOL(edma_filter_fn);
 
 static int edma_init(void)
 {
+	int ret;
+
+	ret = platform_driver_register(&edma_tptc_driver);
+	if (ret)
+		return ret;
+
 	return platform_driver_register(&edma_driver);
 }
 subsys_initcall(edma_init);
@@ -1095,6 +2460,7 @@ subsys_initcall(edma_init);
 static void __exit edma_exit(void)
 {
 	platform_driver_unregister(&edma_driver);
+	platform_driver_unregister(&edma_tptc_driver);
 }
 module_exit(edma_exit);
 
diff --git a/kernel/drivers/dma/ep93xx_dma.c b/kernel/drivers/dma/ep93xx_dma.c
index 24e5290fa..57ff46284 100644
--- a/kernel/drivers/dma/ep93xx_dma.c
+++ b/kernel/drivers/dma/ep93xx_dma.c
@@ -1364,7 +1364,7 @@ static int __init ep93xx_dma_probe(struct platform_device *pdev)
 	return ret;
 }
 
-static struct platform_device_id ep93xx_dma_driver_ids[] = {
+static const struct platform_device_id ep93xx_dma_driver_ids[] = {
 	{ "ep93xx-dma-m2p", 0 },
 	{ "ep93xx-dma-m2m", 1 },
 	{ },
diff --git a/kernel/drivers/dma/fsl-edma.c b/kernel/drivers/dma/fsl-edma.c
index 09e2842d1..915eec3cc 100644
--- a/kernel/drivers/dma/fsl-edma.c
+++ b/kernel/drivers/dma/fsl-edma.c
@@ -881,10 +881,6 @@ static int fsl_edma_probe(struct platform_device *pdev)
 
 	}
 
-	ret = fsl_edma_irq_init(pdev, fsl_edma);
-	if (ret)
-		return ret;
-
 	fsl_edma->big_endian = of_property_read_bool(np, "big-endian");
 
 	INIT_LIST_HEAD(&fsl_edma->dma_dev.channels);
@@ -900,6 +896,11 @@ static int fsl_edma_probe(struct platform_device *pdev)
 		fsl_edma_chan_mux(fsl_chan, 0, false);
 	}
 
+	edma_writel(fsl_edma, ~0, fsl_edma->membase + EDMA_INTR);
+	ret = fsl_edma_irq_init(pdev, fsl_edma);
+	if (ret)
+		return ret;
+
 	dma_cap_set(DMA_PRIVATE, fsl_edma->dma_dev.cap_mask);
 	dma_cap_set(DMA_SLAVE, fsl_edma->dma_dev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, fsl_edma->dma_dev.cap_mask);
diff --git a/kernel/drivers/dma/fsldma.c b/kernel/drivers/dma/fsldma.c
index 300f821f1..2209f75fd 100644
--- a/kernel/drivers/dma/fsldma.c
+++ b/kernel/drivers/dma/fsldma.c
@@ -1512,6 +1512,7 @@ static const struct of_device_id fsldma_of_ids[] = {
 	{ .compatible = "fsl,elo-dma", },
 	{}
 };
+MODULE_DEVICE_TABLE(of, fsldma_of_ids);
 
 static struct platform_driver fsldma_of_driver = {
 	.driver = {
diff --git a/kernel/drivers/dma/hsu/Kconfig b/kernel/drivers/dma/hsu/Kconfig
index 2810dca70..c70841731 100644
--- a/kernel/drivers/dma/hsu/Kconfig
+++ b/kernel/drivers/dma/hsu/Kconfig
@@ -5,10 +5,5 @@ config HSU_DMA
 	select DMA_VIRTUAL_CHANNELS
 
 config HSU_DMA_PCI
-	tristate "High Speed UART DMA PCI driver"
-	depends on PCI
-	select HSU_DMA
-	help
-	  Support the High Speed UART DMA on the platfroms that
-	  enumerate it as a PCI device. For example, Intel Medfield
-	  has integrated this HSU DMA controller.
+	tristate
+	depends on HSU_DMA && PCI
diff --git a/kernel/drivers/dma/hsu/hsu.c b/kernel/drivers/dma/hsu/hsu.c
index f42f71e37..823ad728a 100644
--- a/kernel/drivers/dma/hsu/hsu.c
+++ b/kernel/drivers/dma/hsu/hsu.c
@@ -99,21 +99,13 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc)
 
 static void hsu_dma_stop_channel(struct hsu_dma_chan *hsuc)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&hsuc->lock, flags);
 	hsu_chan_disable(hsuc);
 	hsu_chan_writel(hsuc, HSU_CH_DCR, 0);
-	spin_unlock_irqrestore(&hsuc->lock, flags);
 }
 
 static void hsu_dma_start_channel(struct hsu_dma_chan *hsuc)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&hsuc->lock, flags);
 	hsu_dma_chan_start(hsuc);
-	spin_unlock_irqrestore(&hsuc->lock, flags);
 }
 
 static void hsu_dma_start_transfer(struct hsu_dma_chan *hsuc)
@@ -139,9 +131,9 @@ static u32 hsu_dma_chan_get_sr(struct hsu_dma_chan *hsuc)
 	unsigned long flags;
 	u32 sr;
 
-	spin_lock_irqsave(&hsuc->lock, flags);
+	spin_lock_irqsave(&hsuc->vchan.lock, flags);
 	sr = hsu_chan_readl(hsuc, HSU_CH_SR);
-	spin_unlock_irqrestore(&hsuc->lock, flags);
+	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
 
 	return sr;
 }
@@ -154,7 +146,7 @@ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr)
 	u32 sr;
 
 	/* Sanity check */
-	if (nr >= chip->pdata->nr_channels)
+	if (nr >= chip->hsu->nr_channels)
 		return IRQ_NONE;
 
 	hsuc = &chip->hsu->chan[nr];
@@ -273,14 +265,11 @@ static size_t hsu_dma_active_desc_size(struct hsu_dma_chan *hsuc)
 	struct hsu_dma_desc *desc = hsuc->desc;
 	size_t bytes = hsu_dma_desc_size(desc);
 	int i;
-	unsigned long flags;
 
-	spin_lock_irqsave(&hsuc->lock, flags);
 	i = desc->active % HSU_DMA_CHAN_NR_DESC;
 	do {
 		bytes += hsu_chan_readl(hsuc, HSU_CH_DxTSR(i));
 	} while (--i >= 0);
-	spin_unlock_irqrestore(&hsuc->lock, flags);
 
 	return bytes;
 }
@@ -327,24 +316,6 @@ static int hsu_dma_slave_config(struct dma_chan *chan,
 	return 0;
 }
 
-static void hsu_dma_chan_deactivate(struct hsu_dma_chan *hsuc)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&hsuc->lock, flags);
-	hsu_chan_disable(hsuc);
-	spin_unlock_irqrestore(&hsuc->lock, flags);
-}
-
-static void hsu_dma_chan_activate(struct hsu_dma_chan *hsuc)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&hsuc->lock, flags);
-	hsu_chan_enable(hsuc);
-	spin_unlock_irqrestore(&hsuc->lock, flags);
-}
-
 static int hsu_dma_pause(struct dma_chan *chan)
 {
 	struct hsu_dma_chan *hsuc = to_hsu_dma_chan(chan);
@@ -352,7 +323,7 @@ static int hsu_dma_pause(struct dma_chan *chan)
 
 	spin_lock_irqsave(&hsuc->vchan.lock, flags);
 	if (hsuc->desc && hsuc->desc->status == DMA_IN_PROGRESS) {
-		hsu_dma_chan_deactivate(hsuc);
+		hsu_chan_disable(hsuc);
 		hsuc->desc->status = DMA_PAUSED;
 	}
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
@@ -368,7 +339,7 @@ static int hsu_dma_resume(struct dma_chan *chan)
 	spin_lock_irqsave(&hsuc->vchan.lock, flags);
 	if (hsuc->desc && hsuc->desc->status == DMA_PAUSED) {
 		hsuc->desc->status = DMA_IN_PROGRESS;
-		hsu_dma_chan_activate(hsuc);
+		hsu_chan_enable(hsuc);
 	}
 	spin_unlock_irqrestore(&hsuc->vchan.lock, flags);
 
@@ -404,7 +375,6 @@ static void hsu_dma_free_chan_resources(struct dma_chan *chan)
 int hsu_dma_probe(struct hsu_dma_chip *chip)
 {
 	struct hsu_dma *hsu;
-	struct hsu_dma_platform_data *pdata = chip->pdata;
 	void __iomem *addr = chip->regs + chip->offset;
 	unsigned short i;
 	int ret;
@@ -415,25 +385,16 @@ int hsu_dma_probe(struct hsu_dma_chip *chip)
 
 	chip->hsu = hsu;
 
-	if (!pdata) {
-		pdata = devm_kzalloc(chip->dev, sizeof(*pdata), GFP_KERNEL);
-		if (!pdata)
-			return -ENOMEM;
-
-		chip->pdata = pdata;
+	/* Calculate nr_channels from the IO space length */
+	hsu->nr_channels = (chip->length - chip->offset) / HSU_DMA_CHAN_LENGTH;
 
-		/* Guess nr_channels from the IO space length */
-		pdata->nr_channels = (chip->length - chip->offset) /
-				     HSU_DMA_CHAN_LENGTH;
-	}
-
-	hsu->chan = devm_kcalloc(chip->dev, pdata->nr_channels,
+	hsu->chan = devm_kcalloc(chip->dev, hsu->nr_channels,
 				 sizeof(*hsu->chan), GFP_KERNEL);
 	if (!hsu->chan)
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&hsu->dma.channels);
-	for (i = 0; i < pdata->nr_channels; i++) {
+	for (i = 0; i < hsu->nr_channels; i++) {
 		struct hsu_dma_chan *hsuc = &hsu->chan[i];
 
 		hsuc->vchan.desc_free = hsu_dma_desc_free;
@@ -441,8 +402,6 @@ int hsu_dma_probe(struct hsu_dma_chip *chip)
 
 		hsuc->direction = (i & 0x1) ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV;
 		hsuc->reg = addr + i * HSU_DMA_CHAN_LENGTH;
-
-		spin_lock_init(&hsuc->lock);
 	}
 
 	dma_cap_set(DMA_SLAVE, hsu->dma.cap_mask);
@@ -471,7 +430,7 @@ int hsu_dma_probe(struct hsu_dma_chip *chip)
 	if (ret)
 		return ret;
 
-	dev_info(chip->dev, "Found HSU DMA, %d channels\n", pdata->nr_channels);
+	dev_info(chip->dev, "Found HSU DMA, %d channels\n", hsu->nr_channels);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(hsu_dma_probe);
@@ -483,7 +442,7 @@ int hsu_dma_remove(struct hsu_dma_chip *chip)
 
 	dma_async_device_unregister(&hsu->dma);
 
-	for (i = 0; i < chip->pdata->nr_channels; i++) {
+	for (i = 0; i < hsu->nr_channels; i++) {
 		struct hsu_dma_chan *hsuc = &hsu->chan[i];
 
 		tasklet_kill(&hsuc->vchan.task);
diff --git a/kernel/drivers/dma/hsu/hsu.h b/kernel/drivers/dma/hsu/hsu.h
index 0275233cf..f06579c6d 100644
--- a/kernel/drivers/dma/hsu/hsu.h
+++ b/kernel/drivers/dma/hsu/hsu.h
@@ -78,7 +78,6 @@ struct hsu_dma_chan {
 	struct virt_dma_chan vchan;
 
 	void __iomem *reg;
-	spinlock_t lock;
 
 	/* hardware configuration */
 	enum dma_transfer_direction direction;
@@ -108,6 +107,7 @@ struct hsu_dma {
 
 	/* channels */
 	struct hsu_dma_chan		*chan;
+	unsigned short			nr_channels;
 };
 
 static inline struct hsu_dma *to_hsu_dma(struct dma_device *ddev)
diff --git a/kernel/drivers/dma/hsu/pci.c b/kernel/drivers/dma/hsu/pci.c
index 77879e6dd..e2db76bd5 100644
--- a/kernel/drivers/dma/hsu/pci.c
+++ b/kernel/drivers/dma/hsu/pci.c
@@ -31,7 +31,7 @@ static irqreturn_t hsu_pci_irq(int irq, void *dev)
 	irqreturn_t ret = IRQ_NONE;
 
 	dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
-	for (i = 0; i < chip->pdata->nr_channels; i++) {
+	for (i = 0; i < chip->hsu->nr_channels; i++) {
 		if (dmaisr & 0x1)
 			ret |= hsu_dma_irq(chip, i);
 		dmaisr >>= 1;
diff --git a/kernel/drivers/dma/idma64.c b/kernel/drivers/dma/idma64.c
new file mode 100644
index 000000000..7d56b47e4
--- /dev/null
+++ b/kernel/drivers/dma/idma64.c
@@ -0,0 +1,712 @@
+/*
+ * Core driver for the Intel integrated DMA 64-bit
+ *
+ * Copyright (C) 2015 Intel Corporation
+ * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "idma64.h"
+
+/* Platform driver name */
+#define DRV_NAME		"idma64"
+
+/* For now we support only two channels */
+#define IDMA64_NR_CHAN		2
+
+/* ---------------------------------------------------------------------- */
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_off(struct idma64 *idma64)
+{
+	unsigned short count = 100;
+
+	dma_writel(idma64, CFG, 0);
+
+	channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(BLOCK), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(SRC_TRAN), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(DST_TRAN), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
+
+	do {
+		cpu_relax();
+	} while (dma_readl(idma64, CFG) & IDMA64_CFG_DMA_EN && --count);
+}
+
+static void idma64_on(struct idma64 *idma64)
+{
+	dma_writel(idma64, CFG, IDMA64_CFG_DMA_EN);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_chan_init(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+	u32 cfghi = IDMA64C_CFGH_SRC_PER(1) | IDMA64C_CFGH_DST_PER(0);
+	u32 cfglo = 0;
+
+	/* Set default burst alignment */
+	cfglo |= IDMA64C_CFGL_DST_BURST_ALIGN | IDMA64C_CFGL_SRC_BURST_ALIGN;
+
+	channel_writel(idma64c, CFG_LO, cfglo);
+	channel_writel(idma64c, CFG_HI, cfghi);
+
+	/* Enable interrupts */
+	channel_set_bit(idma64, MASK(XFER), idma64c->mask);
+	channel_set_bit(idma64, MASK(ERROR), idma64c->mask);
+
+	/*
+	 * Enforce the controller to be turned on.
+	 *
+	 * The iDMA is turned off in ->probe() and looses context during system
+	 * suspend / resume cycle. That's why we have to enable it each time we
+	 * use it.
+	 */
+	idma64_on(idma64);
+}
+
+static void idma64_chan_stop(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+	channel_clear_bit(idma64, CH_EN, idma64c->mask);
+}
+
+static void idma64_chan_start(struct idma64 *idma64, struct idma64_chan *idma64c)
+{
+	struct idma64_desc *desc = idma64c->desc;
+	struct idma64_hw_desc *hw = &desc->hw[0];
+
+	channel_writeq(idma64c, SAR, 0);
+	channel_writeq(idma64c, DAR, 0);
+
+	channel_writel(idma64c, CTL_HI, IDMA64C_CTLH_BLOCK_TS(~0UL));
+	channel_writel(idma64c, CTL_LO, IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN);
+
+	channel_writeq(idma64c, LLP, hw->llp);
+
+	channel_set_bit(idma64, CH_EN, idma64c->mask);
+}
+
+static void idma64_stop_transfer(struct idma64_chan *idma64c)
+{
+	struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device);
+
+	idma64_chan_stop(idma64, idma64c);
+}
+
+static void idma64_start_transfer(struct idma64_chan *idma64c)
+{
+	struct idma64 *idma64 = to_idma64(idma64c->vchan.chan.device);
+	struct virt_dma_desc *vdesc;
+
+	/* Get the next descriptor */
+	vdesc = vchan_next_desc(&idma64c->vchan);
+	if (!vdesc) {
+		idma64c->desc = NULL;
+		return;
+	}
+
+	list_del(&vdesc->node);
+	idma64c->desc = to_idma64_desc(vdesc);
+
+	/* Configure the channel */
+	idma64_chan_init(idma64, idma64c);
+
+	/* Start the channel with a new descriptor */
+	idma64_chan_start(idma64, idma64c);
+}
+
+/* ---------------------------------------------------------------------- */
+
+static void idma64_chan_irq(struct idma64 *idma64, unsigned short c,
+		u32 status_err, u32 status_xfer)
+{
+	struct idma64_chan *idma64c = &idma64->chan[c];
+	struct idma64_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	desc = idma64c->desc;
+	if (desc) {
+		if (status_err & (1 << c)) {
+			dma_writel(idma64, CLEAR(ERROR), idma64c->mask);
+			desc->status = DMA_ERROR;
+		} else if (status_xfer & (1 << c)) {
+			dma_writel(idma64, CLEAR(XFER), idma64c->mask);
+			desc->status = DMA_COMPLETE;
+			vchan_cookie_complete(&desc->vdesc);
+			idma64_start_transfer(idma64c);
+		}
+
+		/* idma64_start_transfer() updates idma64c->desc */
+		if (idma64c->desc == NULL || desc->status == DMA_ERROR)
+			idma64_stop_transfer(idma64c);
+	}
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+}
+
+static irqreturn_t idma64_irq(int irq, void *dev)
+{
+	struct idma64 *idma64 = dev;
+	u32 status = dma_readl(idma64, STATUS_INT);
+	u32 status_xfer;
+	u32 status_err;
+	unsigned short i;
+
+	dev_vdbg(idma64->dma.dev, "%s: status=%#x\n", __func__, status);
+
+	/* Check if we have any interrupt from the DMA controller */
+	if (!status)
+		return IRQ_NONE;
+
+	/* Disable interrupts */
+	channel_clear_bit(idma64, MASK(XFER), idma64->all_chan_mask);
+	channel_clear_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
+
+	status_xfer = dma_readl(idma64, RAW(XFER));
+	status_err = dma_readl(idma64, RAW(ERROR));
+
+	for (i = 0; i < idma64->dma.chancnt; i++)
+		idma64_chan_irq(idma64, i, status_err, status_xfer);
+
+	/* Re-enable interrupts */
+	channel_set_bit(idma64, MASK(XFER), idma64->all_chan_mask);
+	channel_set_bit(idma64, MASK(ERROR), idma64->all_chan_mask);
+
+	return IRQ_HANDLED;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static struct idma64_desc *idma64_alloc_desc(unsigned int ndesc)
+{
+	struct idma64_desc *desc;
+
+	desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	desc->hw = kcalloc(ndesc, sizeof(*desc->hw), GFP_NOWAIT);
+	if (!desc->hw) {
+		kfree(desc);
+		return NULL;
+	}
+
+	return desc;
+}
+
+static void idma64_desc_free(struct idma64_chan *idma64c,
+		struct idma64_desc *desc)
+{
+	struct idma64_hw_desc *hw;
+
+	if (desc->ndesc) {
+		unsigned int i = desc->ndesc;
+
+		do {
+			hw = &desc->hw[--i];
+			dma_pool_free(idma64c->pool, hw->lli, hw->llp);
+		} while (i);
+	}
+
+	kfree(desc->hw);
+	kfree(desc);
+}
+
+static void idma64_vdesc_free(struct virt_dma_desc *vdesc)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(vdesc->tx.chan);
+
+	idma64_desc_free(idma64c, to_idma64_desc(vdesc));
+}
+
+static u64 idma64_hw_desc_fill(struct idma64_hw_desc *hw,
+		struct dma_slave_config *config,
+		enum dma_transfer_direction direction, u64 llp)
+{
+	struct idma64_lli *lli = hw->lli;
+	u64 sar, dar;
+	u32 ctlhi = IDMA64C_CTLH_BLOCK_TS(hw->len);
+	u32 ctllo = IDMA64C_CTLL_LLP_S_EN | IDMA64C_CTLL_LLP_D_EN;
+	u32 src_width, dst_width;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		sar = hw->phys;
+		dar = config->dst_addr;
+		ctllo |= IDMA64C_CTLL_DST_FIX | IDMA64C_CTLL_SRC_INC |
+			 IDMA64C_CTLL_FC_M2P;
+		src_width = __ffs(sar | hw->len | 4);
+		dst_width = __ffs(config->dst_addr_width);
+	} else {	/* DMA_DEV_TO_MEM */
+		sar = config->src_addr;
+		dar = hw->phys;
+		ctllo |= IDMA64C_CTLL_DST_INC | IDMA64C_CTLL_SRC_FIX |
+			 IDMA64C_CTLL_FC_P2M;
+		src_width = __ffs(config->src_addr_width);
+		dst_width = __ffs(dar | hw->len | 4);
+	}
+
+	lli->sar = sar;
+	lli->dar = dar;
+
+	lli->ctlhi = ctlhi;
+	lli->ctllo = ctllo |
+		     IDMA64C_CTLL_SRC_MSIZE(config->src_maxburst) |
+		     IDMA64C_CTLL_DST_MSIZE(config->dst_maxburst) |
+		     IDMA64C_CTLL_DST_WIDTH(dst_width) |
+		     IDMA64C_CTLL_SRC_WIDTH(src_width);
+
+	lli->llp = llp;
+	return hw->llp;
+}
+
+static void idma64_desc_fill(struct idma64_chan *idma64c,
+		struct idma64_desc *desc)
+{
+	struct dma_slave_config *config = &idma64c->config;
+	struct idma64_hw_desc *hw = &desc->hw[desc->ndesc - 1];
+	struct idma64_lli *lli = hw->lli;
+	u64 llp = 0;
+	unsigned int i = desc->ndesc;
+
+	/* Fill the hardware descriptors and link them to a list */
+	do {
+		hw = &desc->hw[--i];
+		llp = idma64_hw_desc_fill(hw, config, desc->direction, llp);
+		desc->length += hw->len;
+	} while (i);
+
+	/* Trigger interrupt after last block */
+	lli->ctllo |= IDMA64C_CTLL_INT_EN;
+}
+
+static struct dma_async_tx_descriptor *idma64_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	struct idma64_desc *desc;
+	struct scatterlist *sg;
+	unsigned int i;
+
+	desc = idma64_alloc_desc(sg_len);
+	if (!desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		struct idma64_hw_desc *hw = &desc->hw[i];
+
+		/* Allocate DMA capable memory for hardware descriptor */
+		hw->lli = dma_pool_alloc(idma64c->pool, GFP_NOWAIT, &hw->llp);
+		if (!hw->lli) {
+			desc->ndesc = i;
+			idma64_desc_free(idma64c, desc);
+			return NULL;
+		}
+
+		hw->phys = sg_dma_address(sg);
+		hw->len = sg_dma_len(sg);
+	}
+
+	desc->ndesc = sg_len;
+	desc->direction = direction;
+	desc->status = DMA_IN_PROGRESS;
+
+	idma64_desc_fill(idma64c, desc);
+	return vchan_tx_prep(&idma64c->vchan, &desc->vdesc, flags);
+}
+
+static void idma64_issue_pending(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	if (vchan_issue_pending(&idma64c->vchan) && !idma64c->desc)
+		idma64_start_transfer(idma64c);
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+}
+
+static size_t idma64_active_desc_size(struct idma64_chan *idma64c)
+{
+	struct idma64_desc *desc = idma64c->desc;
+	struct idma64_hw_desc *hw;
+	size_t bytes = desc->length;
+	u64 llp = channel_readq(idma64c, LLP);
+	u32 ctlhi = channel_readl(idma64c, CTL_HI);
+	unsigned int i = 0;
+
+	do {
+		hw = &desc->hw[i];
+		if (hw->llp == llp)
+			break;
+		bytes -= hw->len;
+	} while (++i < desc->ndesc);
+
+	if (!i)
+		return bytes;
+
+	/* The current chunk is not fully transfered yet */
+	bytes += desc->hw[--i].len;
+
+	return bytes - IDMA64C_CTLH_BLOCK_TS(ctlhi);
+}
+
+static enum dma_status idma64_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *state)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	struct virt_dma_desc *vdesc;
+	enum dma_status status;
+	size_t bytes;
+	unsigned long flags;
+
+	status = dma_cookie_status(chan, cookie, state);
+	if (status == DMA_COMPLETE)
+		return status;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	vdesc = vchan_find_desc(&idma64c->vchan, cookie);
+	if (idma64c->desc && cookie == idma64c->desc->vdesc.tx.cookie) {
+		bytes = idma64_active_desc_size(idma64c);
+		dma_set_residue(state, bytes);
+		status = idma64c->desc->status;
+	} else if (vdesc) {
+		bytes = to_idma64_desc(vdesc)->length;
+		dma_set_residue(state, bytes);
+	}
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+	return status;
+}
+
+static void convert_burst(u32 *maxburst)
+{
+	if (*maxburst)
+		*maxburst = __fls(*maxburst);
+	else
+		*maxburst = 0;
+}
+
+static int idma64_slave_config(struct dma_chan *chan,
+		struct dma_slave_config *config)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+	/* Check if chan will be configured for slave transfers */
+	if (!is_slave_direction(config->direction))
+		return -EINVAL;
+
+	memcpy(&idma64c->config, config, sizeof(idma64c->config));
+
+	convert_burst(&idma64c->config.src_maxburst);
+	convert_burst(&idma64c->config.dst_maxburst);
+
+	return 0;
+}
+
+static void idma64_chan_deactivate(struct idma64_chan *idma64c, bool drain)
+{
+	unsigned short count = 100;
+	u32 cfglo;
+
+	cfglo = channel_readl(idma64c, CFG_LO);
+	if (drain)
+		cfglo |= IDMA64C_CFGL_CH_DRAIN;
+	else
+		cfglo &= ~IDMA64C_CFGL_CH_DRAIN;
+
+	channel_writel(idma64c, CFG_LO, cfglo | IDMA64C_CFGL_CH_SUSP);
+	do {
+		udelay(1);
+		cfglo = channel_readl(idma64c, CFG_LO);
+	} while (!(cfglo & IDMA64C_CFGL_FIFO_EMPTY) && --count);
+}
+
+static void idma64_chan_activate(struct idma64_chan *idma64c)
+{
+	u32 cfglo;
+
+	cfglo = channel_readl(idma64c, CFG_LO);
+	channel_writel(idma64c, CFG_LO, cfglo & ~IDMA64C_CFGL_CH_SUSP);
+}
+
+static int idma64_pause(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	if (idma64c->desc && idma64c->desc->status == DMA_IN_PROGRESS) {
+		idma64_chan_deactivate(idma64c, false);
+		idma64c->desc->status = DMA_PAUSED;
+	}
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+	return 0;
+}
+
+static int idma64_resume(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	if (idma64c->desc && idma64c->desc->status == DMA_PAUSED) {
+		idma64c->desc->status = DMA_IN_PROGRESS;
+		idma64_chan_activate(idma64c);
+	}
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+	return 0;
+}
+
+static int idma64_terminate_all(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&idma64c->vchan.lock, flags);
+	idma64_chan_deactivate(idma64c, true);
+	idma64_stop_transfer(idma64c);
+	if (idma64c->desc) {
+		idma64_vdesc_free(&idma64c->desc->vdesc);
+		idma64c->desc = NULL;
+	}
+	vchan_get_all_descriptors(&idma64c->vchan, &head);
+	spin_unlock_irqrestore(&idma64c->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&idma64c->vchan, &head);
+	return 0;
+}
+
+static int idma64_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+	/* Create a pool of consistent memory blocks for hardware descriptors */
+	idma64c->pool = dma_pool_create(dev_name(chan2dev(chan)),
+					chan->device->dev,
+					sizeof(struct idma64_lli), 8, 0);
+	if (!idma64c->pool) {
+		dev_err(chan2dev(chan), "No memory for descriptors\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void idma64_free_chan_resources(struct dma_chan *chan)
+{
+	struct idma64_chan *idma64c = to_idma64_chan(chan);
+
+	vchan_free_chan_resources(to_virt_chan(chan));
+	dma_pool_destroy(idma64c->pool);
+	idma64c->pool = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+#define IDMA64_BUSWIDTHS				\
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES)		|	\
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
+static int idma64_probe(struct idma64_chip *chip)
+{
+	struct idma64 *idma64;
+	unsigned short nr_chan = IDMA64_NR_CHAN;
+	unsigned short i;
+	int ret;
+
+	idma64 = devm_kzalloc(chip->dev, sizeof(*idma64), GFP_KERNEL);
+	if (!idma64)
+		return -ENOMEM;
+
+	idma64->regs = chip->regs;
+	chip->idma64 = idma64;
+
+	idma64->chan = devm_kcalloc(chip->dev, nr_chan, sizeof(*idma64->chan),
+				    GFP_KERNEL);
+	if (!idma64->chan)
+		return -ENOMEM;
+
+	idma64->all_chan_mask = (1 << nr_chan) - 1;
+
+	/* Turn off iDMA controller */
+	idma64_off(idma64);
+
+	ret = devm_request_irq(chip->dev, chip->irq, idma64_irq, IRQF_SHARED,
+			       dev_name(chip->dev), idma64);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&idma64->dma.channels);
+	for (i = 0; i < nr_chan; i++) {
+		struct idma64_chan *idma64c = &idma64->chan[i];
+
+		idma64c->vchan.desc_free = idma64_vdesc_free;
+		vchan_init(&idma64c->vchan, &idma64->dma);
+
+		idma64c->regs = idma64->regs + i * IDMA64_CH_LENGTH;
+		idma64c->mask = BIT(i);
+	}
+
+	dma_cap_set(DMA_SLAVE, idma64->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, idma64->dma.cap_mask);
+
+	idma64->dma.device_alloc_chan_resources = idma64_alloc_chan_resources;
+	idma64->dma.device_free_chan_resources = idma64_free_chan_resources;
+
+	idma64->dma.device_prep_slave_sg = idma64_prep_slave_sg;
+
+	idma64->dma.device_issue_pending = idma64_issue_pending;
+	idma64->dma.device_tx_status = idma64_tx_status;
+
+	idma64->dma.device_config = idma64_slave_config;
+	idma64->dma.device_pause = idma64_pause;
+	idma64->dma.device_resume = idma64_resume;
+	idma64->dma.device_terminate_all = idma64_terminate_all;
+
+	idma64->dma.src_addr_widths = IDMA64_BUSWIDTHS;
+	idma64->dma.dst_addr_widths = IDMA64_BUSWIDTHS;
+	idma64->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+	idma64->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+
+	idma64->dma.dev = chip->dev;
+
+	ret = dma_async_device_register(&idma64->dma);
+	if (ret)
+		return ret;
+
+	dev_info(chip->dev, "Found Intel integrated DMA 64-bit\n");
+	return 0;
+}
+
+static int idma64_remove(struct idma64_chip *chip)
+{
+	struct idma64 *idma64 = chip->idma64;
+	unsigned short i;
+
+	dma_async_device_unregister(&idma64->dma);
+
+	/*
+	 * Explicitly call devm_request_irq() to avoid the side effects with
+	 * the scheduled tasklets.
+	 */
+	devm_free_irq(chip->dev, chip->irq, idma64);
+
+	for (i = 0; i < idma64->dma.chancnt; i++) {
+		struct idma64_chan *idma64c = &idma64->chan[i];
+
+		tasklet_kill(&idma64c->vchan.task);
+	}
+
+	return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+static int idma64_platform_probe(struct platform_device *pdev)
+{
+	struct idma64_chip *chip;
+	struct device *dev = &pdev->dev;
+	struct resource *mem;
+	int ret;
+
+	chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
+	if (!chip)
+		return -ENOMEM;
+
+	chip->irq = platform_get_irq(pdev, 0);
+	if (chip->irq < 0)
+		return chip->irq;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	chip->regs = devm_ioremap_resource(dev, mem);
+	if (IS_ERR(chip->regs))
+		return PTR_ERR(chip->regs);
+
+	ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (ret)
+		return ret;
+
+	chip->dev = dev;
+
+	ret = idma64_probe(chip);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, chip);
+	return 0;
+}
+
+static int idma64_platform_remove(struct platform_device *pdev)
+{
+	struct idma64_chip *chip = platform_get_drvdata(pdev);
+
+	return idma64_remove(chip);
+}
+
+#ifdef CONFIG_PM_SLEEP
+
+static int idma64_pm_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct idma64_chip *chip = platform_get_drvdata(pdev);
+
+	idma64_off(chip->idma64);
+	return 0;
+}
+
+static int idma64_pm_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct idma64_chip *chip = platform_get_drvdata(pdev);
+
+	idma64_on(chip->idma64);
+	return 0;
+}
+
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct dev_pm_ops idma64_dev_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(idma64_pm_suspend, idma64_pm_resume)
+};
+
+static struct platform_driver idma64_platform_driver = {
+	.probe		= idma64_platform_probe,
+	.remove		= idma64_platform_remove,
+	.driver = {
+		.name	= DRV_NAME,
+		.pm	= &idma64_dev_pm_ops,
+	},
+};
+
+module_platform_driver(idma64_platform_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("iDMA64 core driver");
+MODULE_AUTHOR("Andy Shevchenko <andriy.shevchenko@linux.intel.com>");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/kernel/drivers/dma/idma64.h b/kernel/drivers/dma/idma64.h
new file mode 100644
index 000000000..f6aeff0af
--- /dev/null
+++ b/kernel/drivers/dma/idma64.h
@@ -0,0 +1,229 @@
+/*
+ * Driver for the Intel integrated DMA 64-bit
+ *
+ * Copyright (C) 2015 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __DMA_IDMA64_H__
+#define __DMA_IDMA64_H__
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+
+#include "virt-dma.h"
+
+/* Channel registers */
+
+#define IDMA64_CH_SAR		0x00	/* Source Address Register */
+#define IDMA64_CH_DAR		0x08	/* Destination Address Register */
+#define IDMA64_CH_LLP		0x10	/* Linked List Pointer */
+#define IDMA64_CH_CTL_LO	0x18	/* Control Register Low */
+#define IDMA64_CH_CTL_HI	0x1c	/* Control Register High */
+#define IDMA64_CH_SSTAT		0x20
+#define IDMA64_CH_DSTAT		0x28
+#define IDMA64_CH_SSTATAR	0x30
+#define IDMA64_CH_DSTATAR	0x38
+#define IDMA64_CH_CFG_LO	0x40	/* Configuration Register Low */
+#define IDMA64_CH_CFG_HI	0x44	/* Configuration Register High */
+#define IDMA64_CH_SGR		0x48
+#define IDMA64_CH_DSR		0x50
+
+#define IDMA64_CH_LENGTH	0x58
+
+/* Bitfields in CTL_LO */
+#define IDMA64C_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
+#define IDMA64C_CTLL_DST_WIDTH(x)	((x) << 1)	/* bytes per element */
+#define IDMA64C_CTLL_SRC_WIDTH(x)	((x) << 4)
+#define IDMA64C_CTLL_DST_INC		(0 << 8)	/* DAR update/not */
+#define IDMA64C_CTLL_DST_FIX		(1 << 8)
+#define IDMA64C_CTLL_SRC_INC		(0 << 10)	/* SAR update/not */
+#define IDMA64C_CTLL_SRC_FIX		(1 << 10)
+#define IDMA64C_CTLL_DST_MSIZE(x)	((x) << 11)	/* burst, #elements */
+#define IDMA64C_CTLL_SRC_MSIZE(x)	((x) << 14)
+#define IDMA64C_CTLL_FC_M2P		(1 << 20)	/* mem-to-periph */
+#define IDMA64C_CTLL_FC_P2M		(2 << 20)	/* periph-to-mem */
+#define IDMA64C_CTLL_LLP_D_EN		(1 << 27)	/* dest block chain */
+#define IDMA64C_CTLL_LLP_S_EN		(1 << 28)	/* src block chain */
+
+/* Bitfields in CTL_HI */
+#define IDMA64C_CTLH_BLOCK_TS(x)	((x) & ((1 << 17) - 1))
+#define IDMA64C_CTLH_DONE		(1 << 17)
+
+/* Bitfields in CFG_LO */
+#define IDMA64C_CFGL_DST_BURST_ALIGN	(1 << 0)	/* dst burst align */
+#define IDMA64C_CFGL_SRC_BURST_ALIGN	(1 << 1)	/* src burst align */
+#define IDMA64C_CFGL_CH_SUSP		(1 << 8)
+#define IDMA64C_CFGL_FIFO_EMPTY		(1 << 9)
+#define IDMA64C_CFGL_CH_DRAIN		(1 << 10)	/* drain FIFO */
+#define IDMA64C_CFGL_DST_OPT_BL		(1 << 20)	/* optimize dst burst length */
+#define IDMA64C_CFGL_SRC_OPT_BL		(1 << 21)	/* optimize src burst length */
+
+/* Bitfields in CFG_HI */
+#define IDMA64C_CFGH_SRC_PER(x)		((x) << 0)	/* src peripheral */
+#define IDMA64C_CFGH_DST_PER(x)		((x) << 4)	/* dst peripheral */
+#define IDMA64C_CFGH_RD_ISSUE_THD(x)	((x) << 8)
+#define IDMA64C_CFGH_RW_ISSUE_THD(x)	((x) << 18)
+
+/* Interrupt registers */
+
+#define IDMA64_INT_XFER		0x00
+#define IDMA64_INT_BLOCK	0x08
+#define IDMA64_INT_SRC_TRAN	0x10
+#define IDMA64_INT_DST_TRAN	0x18
+#define IDMA64_INT_ERROR	0x20
+
+#define IDMA64_RAW(x)		(0x2c0 + IDMA64_INT_##x)	/* r */
+#define IDMA64_STATUS(x)	(0x2e8 + IDMA64_INT_##x)	/* r (raw & mask) */
+#define IDMA64_MASK(x)		(0x310 + IDMA64_INT_##x)	/* rw (set = irq enabled) */
+#define IDMA64_CLEAR(x)		(0x338 + IDMA64_INT_##x)	/* w (ack, affects "raw") */
+
+/* Common registers */
+
+#define IDMA64_STATUS_INT	0x360	/* r */
+#define IDMA64_CFG		0x398
+#define IDMA64_CH_EN		0x3a0
+
+/* Bitfields in CFG */
+#define IDMA64_CFG_DMA_EN		(1 << 0)
+
+/* Hardware descriptor for Linked LIst transfers */
+struct idma64_lli {
+	u64		sar;
+	u64		dar;
+	u64		llp;
+	u32		ctllo;
+	u32		ctlhi;
+	u32		sstat;
+	u32		dstat;
+};
+
+struct idma64_hw_desc {
+	struct idma64_lli *lli;
+	dma_addr_t llp;
+	dma_addr_t phys;
+	unsigned int len;
+};
+
+struct idma64_desc {
+	struct virt_dma_desc vdesc;
+	enum dma_transfer_direction direction;
+	struct idma64_hw_desc *hw;
+	unsigned int ndesc;
+	size_t length;
+	enum dma_status status;
+};
+
+static inline struct idma64_desc *to_idma64_desc(struct virt_dma_desc *vdesc)
+{
+	return container_of(vdesc, struct idma64_desc, vdesc);
+}
+
+struct idma64_chan {
+	struct virt_dma_chan vchan;
+
+	void __iomem *regs;
+
+	/* hardware configuration */
+	enum dma_transfer_direction direction;
+	unsigned int mask;
+	struct dma_slave_config config;
+
+	void *pool;
+	struct idma64_desc *desc;
+};
+
+static inline struct idma64_chan *to_idma64_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct idma64_chan, vchan.chan);
+}
+
+#define channel_set_bit(idma64, reg, mask)	\
+	dma_writel(idma64, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(idma64, reg, mask)	\
+	dma_writel(idma64, reg, ((mask) << 8) | 0)
+
+static inline u32 idma64c_readl(struct idma64_chan *idma64c, int offset)
+{
+	return readl(idma64c->regs + offset);
+}
+
+static inline void idma64c_writel(struct idma64_chan *idma64c, int offset,
+				  u32 value)
+{
+	writel(value, idma64c->regs + offset);
+}
+
+#define channel_readl(idma64c, reg)		\
+	idma64c_readl(idma64c, IDMA64_CH_##reg)
+#define channel_writel(idma64c, reg, value)	\
+	idma64c_writel(idma64c, IDMA64_CH_##reg, (value))
+
+static inline u64 idma64c_readq(struct idma64_chan *idma64c, int offset)
+{
+	return lo_hi_readq(idma64c->regs + offset);
+}
+
+static inline void idma64c_writeq(struct idma64_chan *idma64c, int offset,
+				  u64 value)
+{
+	lo_hi_writeq(value, idma64c->regs + offset);
+}
+
+#define channel_readq(idma64c, reg)		\
+	idma64c_readq(idma64c, IDMA64_CH_##reg)
+#define channel_writeq(idma64c, reg, value)	\
+	idma64c_writeq(idma64c, IDMA64_CH_##reg, (value))
+
+struct idma64 {
+	struct dma_device dma;
+
+	void __iomem *regs;
+
+	/* channels */
+	unsigned short all_chan_mask;
+	struct idma64_chan *chan;
+};
+
+static inline struct idma64 *to_idma64(struct dma_device *ddev)
+{
+	return container_of(ddev, struct idma64, dma);
+}
+
+static inline u32 idma64_readl(struct idma64 *idma64, int offset)
+{
+	return readl(idma64->regs + offset);
+}
+
+static inline void idma64_writel(struct idma64 *idma64, int offset, u32 value)
+{
+	writel(value, idma64->regs + offset);
+}
+
+#define dma_readl(idma64, reg)			\
+	idma64_readl(idma64, IDMA64_##reg)
+#define dma_writel(idma64, reg, value)		\
+	idma64_writel(idma64, IDMA64_##reg, (value))
+
+/**
+ * struct idma64_chip - representation of iDMA 64-bit controller hardware
+ * @dev:		struct device of the DMA controller
+ * @irq:		irq line
+ * @regs:		memory mapped I/O space
+ * @idma64:		struct idma64 that is filed by idma64_probe()
+ */
+struct idma64_chip {
+	struct device	*dev;
+	int		irq;
+	void __iomem	*regs;
+	struct idma64	*idma64;
+};
+
+#endif /* __DMA_IDMA64_H__ */
diff --git a/kernel/drivers/dma/imx-dma.c b/kernel/drivers/dma/imx-dma.c
index eed405976..48d85f8b9 100644
--- a/kernel/drivers/dma/imx-dma.c
+++ b/kernel/drivers/dma/imx-dma.c
@@ -193,7 +193,7 @@ struct imxdma_filter_data {
 	int			 request;
 };
 
-static struct platform_device_id imx_dma_devtype[] = {
+static const struct platform_device_id imx_dma_devtype[] = {
 	{
 		.name = "imx1-dma",
 		.driver_data = IMX1_DMA,
@@ -1083,8 +1083,12 @@ static int __init imxdma_probe(struct platform_device *pdev)
 	if (IS_ERR(imxdma->dma_ahb))
 		return PTR_ERR(imxdma->dma_ahb);
 
-	clk_prepare_enable(imxdma->dma_ipg);
-	clk_prepare_enable(imxdma->dma_ahb);
+	ret = clk_prepare_enable(imxdma->dma_ipg);
+	if (ret)
+		return ret;
+	ret = clk_prepare_enable(imxdma->dma_ahb);
+	if (ret)
+		goto disable_dma_ipg_clk;
 
 	/* reset DMA module */
 	imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR);
@@ -1094,20 +1098,20 @@ static int __init imxdma_probe(struct platform_device *pdev)
 				       dma_irq_handler, 0, "DMA", imxdma);
 		if (ret) {
 			dev_warn(imxdma->dev, "Can't register IRQ for DMA\n");
-			goto err;
+			goto disable_dma_ahb_clk;
 		}
 
 		irq_err = platform_get_irq(pdev, 1);
 		if (irq_err < 0) {
 			ret = irq_err;
-			goto err;
+			goto disable_dma_ahb_clk;
 		}
 
 		ret = devm_request_irq(&pdev->dev, irq_err,
 				       imxdma_err_handler, 0, "DMA", imxdma);
 		if (ret) {
 			dev_warn(imxdma->dev, "Can't register ERRIRQ for DMA\n");
-			goto err;
+			goto disable_dma_ahb_clk;
 		}
 	}
 
@@ -1144,7 +1148,7 @@ static int __init imxdma_probe(struct platform_device *pdev)
 				dev_warn(imxdma->dev, "Can't register IRQ %d "
 					 "for DMA channel %d\n",
 					 irq + i, i);
-				goto err;
+				goto disable_dma_ahb_clk;
 			}
 			init_timer(&imxdmac->watchdog);
 			imxdmac->watchdog.function = &imxdma_watchdog;
@@ -1183,14 +1187,14 @@ static int __init imxdma_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, imxdma);
 
-	imxdma->dma_device.copy_align = 2; /* 2^2 = 4 bytes alignment */
+	imxdma->dma_device.copy_align = DMAENGINE_ALIGN_4_BYTES;
 	imxdma->dma_device.dev->dma_parms = &imxdma->dma_parms;
 	dma_set_max_seg_size(imxdma->dma_device.dev, 0xffffff);
 
 	ret = dma_async_device_register(&imxdma->dma_device);
 	if (ret) {
 		dev_err(&pdev->dev, "unable to register\n");
-		goto err;
+		goto disable_dma_ahb_clk;
 	}
 
 	if (pdev->dev.of_node) {
@@ -1206,9 +1210,10 @@ static int __init imxdma_probe(struct platform_device *pdev)
 
 err_of_dma_controller:
 	dma_async_device_unregister(&imxdma->dma_device);
-err:
-	clk_disable_unprepare(imxdma->dma_ipg);
+disable_dma_ahb_clk:
 	clk_disable_unprepare(imxdma->dma_ahb);
+disable_dma_ipg_clk:
+	clk_disable_unprepare(imxdma->dma_ipg);
 	return ret;
 }
 
diff --git a/kernel/drivers/dma/imx-sdma.c b/kernel/drivers/dma/imx-sdma.c
index 62bbd7933..0f6fd42f5 100644
--- a/kernel/drivers/dma/imx-sdma.c
+++ b/kernel/drivers/dma/imx-sdma.c
@@ -35,12 +35,16 @@
 #include <linux/platform_device.h>
 #include <linux/dmaengine.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
 
 #include <asm/irq.h>
 #include <linux/platform_data/dma-imx-sdma.h>
 #include <linux/platform_data/dma-imx.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
 
 #include "dmaengine.h"
 
@@ -124,6 +128,56 @@
 #define CHANGE_ENDIANNESS   0x80
 
 /*
+ *  p_2_p watermark_level description
+ *	Bits		Name			Description
+ *	0-7		Lower WML		Lower watermark level
+ *	8		PS			1: Pad Swallowing
+ *						0: No Pad Swallowing
+ *	9		PA			1: Pad Adding
+ *						0: No Pad Adding
+ *	10		SPDIF			If this bit is set both source
+ *						and destination are on SPBA
+ *	11		Source Bit(SP)		1: Source on SPBA
+ *						0: Source on AIPS
+ *	12		Destination Bit(DP)	1: Destination on SPBA
+ *						0: Destination on AIPS
+ *	13-15		---------		MUST BE 0
+ *	16-23		Higher WML		HWML
+ *	24-27		N			Total number of samples after
+ *						which Pad adding/Swallowing
+ *						must be done. It must be odd.
+ *	28		Lower WML Event(LWE)	SDMA events reg to check for
+ *						LWML event mask
+ *						0: LWE in EVENTS register
+ *						1: LWE in EVENTS2 register
+ *	29		Higher WML Event(HWE)	SDMA events reg to check for
+ *						HWML event mask
+ *						0: HWE in EVENTS register
+ *						1: HWE in EVENTS2 register
+ *	30		---------		MUST BE 0
+ *	31		CONT			1: Amount of samples to be
+ *						transferred is unknown and
+ *						script will keep on
+ *						transferring samples as long as
+ *						both events are detected and
+ *						script must be manually stopped
+ *						by the application
+ *						0: The amount of samples to be
+ *						transferred is equal to the
+ *						count field of mode word
+ */
+#define SDMA_WATERMARK_LEVEL_LWML	0xFF
+#define SDMA_WATERMARK_LEVEL_PS		BIT(8)
+#define SDMA_WATERMARK_LEVEL_PA		BIT(9)
+#define SDMA_WATERMARK_LEVEL_SPDIF	BIT(10)
+#define SDMA_WATERMARK_LEVEL_SP		BIT(11)
+#define SDMA_WATERMARK_LEVEL_DP		BIT(12)
+#define SDMA_WATERMARK_LEVEL_HWML	(0xFF << 16)
+#define SDMA_WATERMARK_LEVEL_LWE	BIT(28)
+#define SDMA_WATERMARK_LEVEL_HWE	BIT(29)
+#define SDMA_WATERMARK_LEVEL_CONT	BIT(31)
+
+/*
  * Mode/Count of data node descriptors - IPCv2
  */
 struct sdma_mode_count {
@@ -259,8 +313,9 @@ struct sdma_channel {
 	struct sdma_buffer_descriptor	*bd;
 	dma_addr_t			bd_phys;
 	unsigned int			pc_from_device, pc_to_device;
+	unsigned int			device_to_device;
 	unsigned long			flags;
-	dma_addr_t			per_address;
+	dma_addr_t			per_address, per_address2;
 	unsigned long			event_mask[2];
 	unsigned long			watermark_level;
 	u32				shp_addr, per_addr;
@@ -328,6 +383,8 @@ struct sdma_engine {
 	u32				script_number;
 	struct sdma_script_start_addrs	*script_addrs;
 	const struct sdma_driver_data	*drvdata;
+	u32				spba_start_addr;
+	u32				spba_end_addr;
 };
 
 static struct sdma_driver_data sdma_imx31 = {
@@ -420,7 +477,7 @@ static struct sdma_driver_data sdma_imx6q = {
 	.script_addrs = &sdma_script_imx6q,
 };
 
-static struct platform_device_id sdma_devtypes[] = {
+static const struct platform_device_id sdma_devtypes[] = {
 	{
 		.name = "imx25-sdma",
 		.driver_data = (unsigned long)&sdma_imx25,
@@ -705,6 +762,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 
 	sdmac->pc_from_device = 0;
 	sdmac->pc_to_device = 0;
+	sdmac->device_to_device = 0;
 
 	switch (peripheral_type) {
 	case IMX_DMATYPE_MEMORY:
@@ -780,6 +838,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 
 	sdmac->pc_from_device = per_2_emi;
 	sdmac->pc_to_device = emi_2_per;
+	sdmac->device_to_device = per_2_per;
 }
 
 static int sdma_load_context(struct sdma_channel *sdmac)
@@ -792,11 +851,12 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 	int ret;
 	unsigned long flags;
 
-	if (sdmac->direction == DMA_DEV_TO_MEM) {
+	if (sdmac->direction == DMA_DEV_TO_MEM)
 		load_address = sdmac->pc_from_device;
-	} else {
+	else if (sdmac->direction == DMA_DEV_TO_DEV)
+		load_address = sdmac->device_to_device;
+	else
 		load_address = sdmac->pc_to_device;
-	}
 
 	if (load_address < 0)
 		return load_address;
@@ -851,6 +911,46 @@ static int sdma_disable_channel(struct dma_chan *chan)
 	return 0;
 }
 
+static void sdma_set_watermarklevel_for_p2p(struct sdma_channel *sdmac)
+{
+	struct sdma_engine *sdma = sdmac->sdma;
+
+	int lwml = sdmac->watermark_level & SDMA_WATERMARK_LEVEL_LWML;
+	int hwml = (sdmac->watermark_level & SDMA_WATERMARK_LEVEL_HWML) >> 16;
+
+	set_bit(sdmac->event_id0 % 32, &sdmac->event_mask[1]);
+	set_bit(sdmac->event_id1 % 32, &sdmac->event_mask[0]);
+
+	if (sdmac->event_id0 > 31)
+		sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_LWE;
+
+	if (sdmac->event_id1 > 31)
+		sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_HWE;
+
+	/*
+	 * If LWML(src_maxburst) > HWML(dst_maxburst), we need
+	 * swap LWML and HWML of INFO(A.3.2.5.1), also need swap
+	 * r0(event_mask[1]) and r1(event_mask[0]).
+	 */
+	if (lwml > hwml) {
+		sdmac->watermark_level &= ~(SDMA_WATERMARK_LEVEL_LWML |
+						SDMA_WATERMARK_LEVEL_HWML);
+		sdmac->watermark_level |= hwml;
+		sdmac->watermark_level |= lwml << 16;
+		swap(sdmac->event_mask[0], sdmac->event_mask[1]);
+	}
+
+	if (sdmac->per_address2 >= sdma->spba_start_addr &&
+			sdmac->per_address2 <= sdma->spba_end_addr)
+		sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_SP;
+
+	if (sdmac->per_address >= sdma->spba_start_addr &&
+			sdmac->per_address <= sdma->spba_end_addr)
+		sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_DP;
+
+	sdmac->watermark_level |= SDMA_WATERMARK_LEVEL_CONT;
+}
+
 static int sdma_config_channel(struct dma_chan *chan)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
@@ -869,6 +969,12 @@ static int sdma_config_channel(struct dma_chan *chan)
 		sdma_event_enable(sdmac, sdmac->event_id0);
 	}
 
+	if (sdmac->event_id1) {
+		if (sdmac->event_id1 >= sdmac->sdma->drvdata->num_events)
+			return -EINVAL;
+		sdma_event_enable(sdmac, sdmac->event_id1);
+	}
+
 	switch (sdmac->peripheral_type) {
 	case IMX_DMATYPE_DSP:
 		sdma_config_ownership(sdmac, false, true, true);
@@ -887,19 +993,17 @@ static int sdma_config_channel(struct dma_chan *chan)
 			(sdmac->peripheral_type != IMX_DMATYPE_DSP)) {
 		/* Handle multiple event channels differently */
 		if (sdmac->event_id1) {
-			sdmac->event_mask[1] = BIT(sdmac->event_id1 % 32);
-			if (sdmac->event_id1 > 31)
-				__set_bit(31, &sdmac->watermark_level);
-			sdmac->event_mask[0] = BIT(sdmac->event_id0 % 32);
-			if (sdmac->event_id0 > 31)
-				__set_bit(30, &sdmac->watermark_level);
-		} else {
+			if (sdmac->peripheral_type == IMX_DMATYPE_ASRC_SP ||
+			    sdmac->peripheral_type == IMX_DMATYPE_ASRC)
+				sdma_set_watermarklevel_for_p2p(sdmac);
+		} else
 			__set_bit(sdmac->event_id0, sdmac->event_mask);
-		}
+
 		/* Watermark Level */
 		sdmac->watermark_level |= sdmac->watermark_level;
 		/* Address */
 		sdmac->shp_addr = sdmac->per_address;
+		sdmac->per_addr = sdmac->per_address2;
 	} else {
 		sdmac->watermark_level = 0; /* FIXME: M3_BASE_ADDRESS */
 	}
@@ -987,17 +1091,22 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan)
 
 	sdmac->peripheral_type = data->peripheral_type;
 	sdmac->event_id0 = data->dma_request;
+	sdmac->event_id1 = data->dma_request2;
 
-	clk_enable(sdmac->sdma->clk_ipg);
-	clk_enable(sdmac->sdma->clk_ahb);
+	ret = clk_enable(sdmac->sdma->clk_ipg);
+	if (ret)
+		return ret;
+	ret = clk_enable(sdmac->sdma->clk_ahb);
+	if (ret)
+		goto disable_clk_ipg;
 
 	ret = sdma_request_channel(sdmac);
 	if (ret)
-		return ret;
+		goto disable_clk_ahb;
 
 	ret = sdma_set_channel_priority(sdmac, prio);
 	if (ret)
-		return ret;
+		goto disable_clk_ahb;
 
 	dma_async_tx_descriptor_init(&sdmac->desc, chan);
 	sdmac->desc.tx_submit = sdma_tx_submit;
@@ -1005,6 +1114,12 @@ static int sdma_alloc_chan_resources(struct dma_chan *chan)
 	sdmac->desc.flags = DMA_CTRL_ACK;
 
 	return 0;
+
+disable_clk_ahb:
+	clk_disable(sdmac->sdma->clk_ahb);
+disable_clk_ipg:
+	clk_disable(sdmac->sdma->clk_ipg);
+	return ret;
 }
 
 static void sdma_free_chan_resources(struct dma_chan *chan)
@@ -1221,6 +1336,14 @@ static int sdma_config(struct dma_chan *chan,
 		sdmac->watermark_level = dmaengine_cfg->src_maxburst *
 			dmaengine_cfg->src_addr_width;
 		sdmac->word_size = dmaengine_cfg->src_addr_width;
+	} else if (dmaengine_cfg->direction == DMA_DEV_TO_DEV) {
+		sdmac->per_address2 = dmaengine_cfg->src_addr;
+		sdmac->per_address = dmaengine_cfg->dst_addr;
+		sdmac->watermark_level = dmaengine_cfg->src_maxburst &
+			SDMA_WATERMARK_LEVEL_LWML;
+		sdmac->watermark_level |= (dmaengine_cfg->dst_maxburst << 16) &
+			SDMA_WATERMARK_LEVEL_HWML;
+		sdmac->word_size = dmaengine_cfg->dst_addr_width;
 	} else {
 		sdmac->per_address = dmaengine_cfg->dst_addr;
 		sdmac->watermark_level = dmaengine_cfg->dst_maxburst *
@@ -1337,6 +1460,72 @@ err_firmware:
 	release_firmware(fw);
 }
 
+#define EVENT_REMAP_CELLS 3
+
+static int sdma_event_remap(struct sdma_engine *sdma)
+{
+	struct device_node *np = sdma->dev->of_node;
+	struct device_node *gpr_np = of_parse_phandle(np, "gpr", 0);
+	struct property *event_remap;
+	struct regmap *gpr;
+	char propname[] = "fsl,sdma-event-remap";
+	u32 reg, val, shift, num_map, i;
+	int ret = 0;
+
+	if (IS_ERR(np) || IS_ERR(gpr_np))
+		goto out;
+
+	event_remap = of_find_property(np, propname, NULL);
+	num_map = event_remap ? (event_remap->length / sizeof(u32)) : 0;
+	if (!num_map) {
+		dev_dbg(sdma->dev, "no event needs to be remapped\n");
+		goto out;
+	} else if (num_map % EVENT_REMAP_CELLS) {
+		dev_err(sdma->dev, "the property %s must modulo %d\n",
+				propname, EVENT_REMAP_CELLS);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	gpr = syscon_node_to_regmap(gpr_np);
+	if (IS_ERR(gpr)) {
+		dev_err(sdma->dev, "failed to get gpr regmap\n");
+		ret = PTR_ERR(gpr);
+		goto out;
+	}
+
+	for (i = 0; i < num_map; i += EVENT_REMAP_CELLS) {
+		ret = of_property_read_u32_index(np, propname, i, &reg);
+		if (ret) {
+			dev_err(sdma->dev, "failed to read property %s index %d\n",
+					propname, i);
+			goto out;
+		}
+
+		ret = of_property_read_u32_index(np, propname, i + 1, &shift);
+		if (ret) {
+			dev_err(sdma->dev, "failed to read property %s index %d\n",
+					propname, i + 1);
+			goto out;
+		}
+
+		ret = of_property_read_u32_index(np, propname, i + 2, &val);
+		if (ret) {
+			dev_err(sdma->dev, "failed to read property %s index %d\n",
+					propname, i + 2);
+			goto out;
+		}
+
+		regmap_update_bits(gpr, reg, BIT(shift), val << shift);
+	}
+
+out:
+	if (!IS_ERR(gpr_np))
+		of_node_put(gpr_np);
+
+	return ret;
+}
+
 static int sdma_get_firmware(struct sdma_engine *sdma,
 		const char *fw_name)
 {
@@ -1354,8 +1543,12 @@ static int sdma_init(struct sdma_engine *sdma)
 	int i, ret;
 	dma_addr_t ccb_phys;
 
-	clk_enable(sdma->clk_ipg);
-	clk_enable(sdma->clk_ahb);
+	ret = clk_enable(sdma->clk_ipg);
+	if (ret)
+		return ret;
+	ret = clk_enable(sdma->clk_ahb);
+	if (ret)
+		goto disable_clk_ipg;
 
 	/* Be sure SDMA has not started yet */
 	writel_relaxed(0, sdma->regs + SDMA_H_C0PTR);
@@ -1411,8 +1604,9 @@ static int sdma_init(struct sdma_engine *sdma)
 	return 0;
 
 err_dma_alloc:
-	clk_disable(sdma->clk_ipg);
 	clk_disable(sdma->clk_ahb);
+disable_clk_ipg:
+	clk_disable(sdma->clk_ipg);
 	dev_err(sdma->dev, "initialisation failed with %d\n", ret);
 	return ret;
 }
@@ -1444,6 +1638,14 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec,
 	data.dma_request = dma_spec->args[0];
 	data.peripheral_type = dma_spec->args[1];
 	data.priority = dma_spec->args[2];
+	/*
+	 * init dma_request2 to zero, which is not used by the dts.
+	 * For P2P, dma_request2 is init from dma_request_channel(),
+	 * chan->private will point to the imx_dma_data, and in
+	 * device_alloc_chan_resources(), imx_dma_data.dma_request2 will
+	 * be set to sdmac->event_id1.
+	 */
+	data.dma_request2 = 0;
 
 	return dma_request_channel(mask, sdma_filter_fn, &data);
 }
@@ -1453,10 +1655,12 @@ static int sdma_probe(struct platform_device *pdev)
 	const struct of_device_id *of_id =
 			of_match_device(sdma_dt_ids, &pdev->dev);
 	struct device_node *np = pdev->dev.of_node;
+	struct device_node *spba_bus;
 	const char *fw_name;
 	int ret;
 	int irq;
 	struct resource *iores;
+	struct resource spba_res;
 	struct sdma_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	int i;
 	struct sdma_engine *sdma;
@@ -1551,6 +1755,10 @@ static int sdma_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_init;
 
+	ret = sdma_event_remap(sdma);
+	if (ret)
+		goto err_init;
+
 	if (sdma->drvdata->script_addrs)
 		sdma_add_scripts(sdma, sdma->drvdata->script_addrs);
 	if (pdata && pdata->script_addrs)
@@ -1608,9 +1816,15 @@ static int sdma_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "failed to register controller\n");
 			goto err_register;
 		}
-	}
 
-	dev_info(sdma->dev, "initialized\n");
+		spba_bus = of_find_compatible_node(NULL, NULL, "fsl,spba-bus");
+		ret = of_address_to_resource(spba_bus, 0, &spba_res);
+		if (!ret) {
+			sdma->spba_start_addr = spba_res.start;
+			sdma->spba_end_addr = spba_res.end;
+		}
+		of_node_put(spba_bus);
+	}
 
 	return 0;
 
@@ -1636,7 +1850,6 @@ static int sdma_remove(struct platform_device *pdev)
 	}
 
 	platform_set_drvdata(pdev, NULL);
-	dev_info(&pdev->dev, "Removed...\n");
 	return 0;
 }
 
diff --git a/kernel/drivers/dma/ioat/Makefile b/kernel/drivers/dma/ioat/Makefile
index 0ff7270af..cf5fedbe2 100644
--- a/kernel/drivers/dma/ioat/Makefile
+++ b/kernel/drivers/dma/ioat/Makefile
@@ -1,2 +1,2 @@
 obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
-ioatdma-y := pci.o dma.o dma_v2.o dma_v3.o dca.o
+ioatdma-y := init.o dma.o prep.o dca.o sysfs.o
diff --git a/kernel/drivers/dma/ioat/dca.c b/kernel/drivers/dma/ioat/dca.c
index ea1e107ae..2cb7c308d 100644
--- a/kernel/drivers/dma/ioat/dca.c
+++ b/kernel/drivers/dma/ioat/dca.c
@@ -31,7 +31,6 @@
 
 #include "dma.h"
 #include "registers.h"
-#include "dma_v2.h"
 
 /*
  * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
@@ -71,14 +70,6 @@ static inline int dca2_tag_map_valid(u8 *tag_map)
 #define APICID_BIT(x)		(DCA_TAG_MAP_VALID | (x))
 #define IOAT_TAG_MAP_LEN	8
 
-static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
-	1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
-static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
-	1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
-static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
-	1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
-static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
-
 /* pack PCI B/D/F into a u16 */
 static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
 {
@@ -126,96 +117,6 @@ struct ioat_dca_priv {
 	struct ioat_dca_slot 	 req_slots[0];
 };
 
-/* 5000 series chipset DCA Port Requester ID Table Entry Format
- * [15:8]	PCI-Express Bus Number
- * [7:3]	PCI-Express Device Number
- * [2:0]	PCI-Express Function Number
- *
- * 5000 series chipset DCA control register format
- * [7:1]	Reserved (0)
- * [0]		Ignore Function Number
- */
-
-static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
-	struct ioat_dca_priv *ioatdca = dca_priv(dca);
-	struct pci_dev *pdev;
-	int i;
-	u16 id;
-
-	/* This implementation only supports PCI-Express */
-	if (!dev_is_pci(dev))
-		return -ENODEV;
-	pdev = to_pci_dev(dev);
-	id = dcaid_from_pcidev(pdev);
-
-	if (ioatdca->requester_count == ioatdca->max_requesters)
-		return -ENODEV;
-
-	for (i = 0; i < ioatdca->max_requesters; i++) {
-		if (ioatdca->req_slots[i].pdev == NULL) {
-			/* found an empty slot */
-			ioatdca->requester_count++;
-			ioatdca->req_slots[i].pdev = pdev;
-			ioatdca->req_slots[i].rid = id;
-			writew(id, ioatdca->dca_base + (i * 4));
-			/* make sure the ignore function bit is off */
-			writeb(0, ioatdca->dca_base + (i * 4) + 2);
-			return i;
-		}
-	}
-	/* Error, ioatdma->requester_count is out of whack */
-	return -EFAULT;
-}
-
-static int ioat_dca_remove_requester(struct dca_provider *dca,
-				     struct device *dev)
-{
-	struct ioat_dca_priv *ioatdca = dca_priv(dca);
-	struct pci_dev *pdev;
-	int i;
-
-	/* This implementation only supports PCI-Express */
-	if (!dev_is_pci(dev))
-		return -ENODEV;
-	pdev = to_pci_dev(dev);
-
-	for (i = 0; i < ioatdca->max_requesters; i++) {
-		if (ioatdca->req_slots[i].pdev == pdev) {
-			writew(0, ioatdca->dca_base + (i * 4));
-			ioatdca->req_slots[i].pdev = NULL;
-			ioatdca->req_slots[i].rid = 0;
-			ioatdca->requester_count--;
-			return i;
-		}
-	}
-	return -ENODEV;
-}
-
-static u8 ioat_dca_get_tag(struct dca_provider *dca,
-			   struct device *dev,
-			   int cpu)
-{
-	struct ioat_dca_priv *ioatdca = dca_priv(dca);
-	int i, apic_id, bit, value;
-	u8 entry, tag;
-
-	tag = 0;
-	apic_id = cpu_physical_id(cpu);
-
-	for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
-		entry = ioatdca->tag_map[i];
-		if (entry & DCA_TAG_MAP_VALID) {
-			bit = entry & ~DCA_TAG_MAP_VALID;
-			value = (apic_id & (1 << bit)) ? 1 : 0;
-		} else {
-			value = entry ? 1 : 0;
-		}
-		tag |= (value << i);
-	}
-	return tag;
-}
-
 static int ioat_dca_dev_managed(struct dca_provider *dca,
 				struct device *dev)
 {
@@ -231,260 +132,7 @@ static int ioat_dca_dev_managed(struct dca_provider *dca,
 	return 0;
 }
 
-static struct dca_ops ioat_dca_ops = {
-	.add_requester		= ioat_dca_add_requester,
-	.remove_requester	= ioat_dca_remove_requester,
-	.get_tag		= ioat_dca_get_tag,
-	.dev_managed		= ioat_dca_dev_managed,
-};
-
-
-struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
-	struct dca_provider *dca;
-	struct ioat_dca_priv *ioatdca;
-	u8 *tag_map = NULL;
-	int i;
-	int err;
-	u8 version;
-	u8 max_requesters;
-
-	if (!system_has_dca_enabled(pdev))
-		return NULL;
-
-	/* I/OAT v1 systems must have a known tag_map to support DCA */
-	switch (pdev->vendor) {
-	case PCI_VENDOR_ID_INTEL:
-		switch (pdev->device) {
-		case PCI_DEVICE_ID_INTEL_IOAT:
-			tag_map = ioat_tag_map_BNB;
-			break;
-		case PCI_DEVICE_ID_INTEL_IOAT_CNB:
-			tag_map = ioat_tag_map_CNB;
-			break;
-		case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
-			tag_map = ioat_tag_map_SCNB;
-			break;
-		}
-		break;
-	case PCI_VENDOR_ID_UNISYS:
-		switch (pdev->device) {
-		case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
-			tag_map = ioat_tag_map_UNISYS;
-			break;
-		}
-		break;
-	}
-	if (tag_map == NULL)
-		return NULL;
-
-	version = readb(iobase + IOAT_VER_OFFSET);
-	if (version == IOAT_VER_3_0)
-		max_requesters = IOAT3_DCA_MAX_REQ;
-	else
-		max_requesters = IOAT_DCA_MAX_REQ;
-
-	dca = alloc_dca_provider(&ioat_dca_ops,
-			sizeof(*ioatdca) +
-			(sizeof(struct ioat_dca_slot) * max_requesters));
-	if (!dca)
-		return NULL;
-
-	ioatdca = dca_priv(dca);
-	ioatdca->max_requesters = max_requesters;
-	ioatdca->dca_base = iobase + 0x54;
-
-	/* copy over the APIC ID to DCA tag mapping */
-	for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
-		ioatdca->tag_map[i] = tag_map[i];
-
-	err = register_dca_provider(dca, &pdev->dev);
-	if (err) {
-		free_dca_provider(dca);
-		return NULL;
-	}
-
-	return dca;
-}
-
-
-static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
-{
-	struct ioat_dca_priv *ioatdca = dca_priv(dca);
-	struct pci_dev *pdev;
-	int i;
-	u16 id;
-	u16 global_req_table;
-
-	/* This implementation only supports PCI-Express */
-	if (!dev_is_pci(dev))
-		return -ENODEV;
-	pdev = to_pci_dev(dev);
-	id = dcaid_from_pcidev(pdev);
-
-	if (ioatdca->requester_count == ioatdca->max_requesters)
-		return -ENODEV;
-
-	for (i = 0; i < ioatdca->max_requesters; i++) {
-		if (ioatdca->req_slots[i].pdev == NULL) {
-			/* found an empty slot */
-			ioatdca->requester_count++;
-			ioatdca->req_slots[i].pdev = pdev;
-			ioatdca->req_slots[i].rid = id;
-			global_req_table =
-			      readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
-			writel(id | IOAT_DCA_GREQID_VALID,
-			       ioatdca->iobase + global_req_table + (i * 4));
-			return i;
-		}
-	}
-	/* Error, ioatdma->requester_count is out of whack */
-	return -EFAULT;
-}
-
-static int ioat2_dca_remove_requester(struct dca_provider *dca,
-				      struct device *dev)
-{
-	struct ioat_dca_priv *ioatdca = dca_priv(dca);
-	struct pci_dev *pdev;
-	int i;
-	u16 global_req_table;
-
-	/* This implementation only supports PCI-Express */
-	if (!dev_is_pci(dev))
-		return -ENODEV;
-	pdev = to_pci_dev(dev);
-
-	for (i = 0; i < ioatdca->max_requesters; i++) {
-		if (ioatdca->req_slots[i].pdev == pdev) {
-			global_req_table =
-			      readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
-			writel(0, ioatdca->iobase + global_req_table + (i * 4));
-			ioatdca->req_slots[i].pdev = NULL;
-			ioatdca->req_slots[i].rid = 0;
-			ioatdca->requester_count--;
-			return i;
-		}
-	}
-	return -ENODEV;
-}
-
-static u8 ioat2_dca_get_tag(struct dca_provider *dca,
-			    struct device *dev,
-			    int cpu)
-{
-	u8 tag;
-
-	tag = ioat_dca_get_tag(dca, dev, cpu);
-	tag = (~tag) & 0x1F;
-	return tag;
-}
-
-static struct dca_ops ioat2_dca_ops = {
-	.add_requester		= ioat2_dca_add_requester,
-	.remove_requester	= ioat2_dca_remove_requester,
-	.get_tag		= ioat2_dca_get_tag,
-	.dev_managed		= ioat_dca_dev_managed,
-};
-
-static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
-{
-	int slots = 0;
-	u32 req;
-	u16 global_req_table;
-
-	global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
-	if (global_req_table == 0)
-		return 0;
-	do {
-		req = readl(iobase + global_req_table + (slots * sizeof(u32)));
-		slots++;
-	} while ((req & IOAT_DCA_GREQID_LASTID) == 0);
-
-	return slots;
-}
-
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
-{
-	struct dca_provider *dca;
-	struct ioat_dca_priv *ioatdca;
-	int slots;
-	int i;
-	int err;
-	u32 tag_map;
-	u16 dca_offset;
-	u16 csi_fsb_control;
-	u16 pcie_control;
-	u8 bit;
-
-	if (!system_has_dca_enabled(pdev))
-		return NULL;
-
-	dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
-	if (dca_offset == 0)
-		return NULL;
-
-	slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
-	if (slots == 0)
-		return NULL;
-
-	dca = alloc_dca_provider(&ioat2_dca_ops,
-				 sizeof(*ioatdca)
-				      + (sizeof(struct ioat_dca_slot) * slots));
-	if (!dca)
-		return NULL;
-
-	ioatdca = dca_priv(dca);
-	ioatdca->iobase = iobase;
-	ioatdca->dca_base = iobase + dca_offset;
-	ioatdca->max_requesters = slots;
-
-	/* some bios might not know to turn these on */
-	csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
-	if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
-		csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
-		writew(csi_fsb_control,
-		       ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
-	}
-	pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
-	if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
-		pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
-		writew(pcie_control,
-		       ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
-	}
-
-
-	/* TODO version, compatibility and configuration checks */
-
-	/* copy out the APIC to DCA tag map */
-	tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
-	for (i = 0; i < 5; i++) {
-		bit = (tag_map >> (4 * i)) & 0x0f;
-		if (bit < 8)
-			ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
-		else
-			ioatdca->tag_map[i] = 0;
-	}
-
-	if (!dca2_tag_map_valid(ioatdca->tag_map)) {
-		WARN_TAINT_ONCE(1, TAINT_FIRMWARE_WORKAROUND,
-				"%s %s: APICID_TAG_MAP set incorrectly by BIOS, disabling DCA\n",
-				dev_driver_string(&pdev->dev),
-				dev_name(&pdev->dev));
-		free_dca_provider(dca);
-		return NULL;
-	}
-
-	err = register_dca_provider(dca, &pdev->dev);
-	if (err) {
-		free_dca_provider(dca);
-		return NULL;
-	}
-
-	return dca;
-}
-
-static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
 {
 	struct ioat_dca_priv *ioatdca = dca_priv(dca);
 	struct pci_dev *pdev;
@@ -518,7 +166,7 @@ static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
 	return -EFAULT;
 }
 
-static int ioat3_dca_remove_requester(struct dca_provider *dca,
+static int ioat_dca_remove_requester(struct dca_provider *dca,
 				      struct device *dev)
 {
 	struct ioat_dca_priv *ioatdca = dca_priv(dca);
@@ -545,7 +193,7 @@ static int ioat3_dca_remove_requester(struct dca_provider *dca,
 	return -ENODEV;
 }
 
-static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
 			    struct device *dev,
 			    int cpu)
 {
@@ -576,14 +224,14 @@ static u8 ioat3_dca_get_tag(struct dca_provider *dca,
 	return tag;
 }
 
-static struct dca_ops ioat3_dca_ops = {
-	.add_requester		= ioat3_dca_add_requester,
-	.remove_requester	= ioat3_dca_remove_requester,
-	.get_tag		= ioat3_dca_get_tag,
+static struct dca_ops ioat_dca_ops = {
+	.add_requester		= ioat_dca_add_requester,
+	.remove_requester	= ioat_dca_remove_requester,
+	.get_tag		= ioat_dca_get_tag,
 	.dev_managed		= ioat_dca_dev_managed,
 };
 
-static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+static int ioat_dca_count_dca_slots(void *iobase, u16 dca_offset)
 {
 	int slots = 0;
 	u32 req;
@@ -618,7 +266,7 @@ static inline int dca3_tag_map_invalid(u8 *tag_map)
 		(tag_map[4] == DCA_TAG_MAP_VALID));
 }
 
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
 {
 	struct dca_provider *dca;
 	struct ioat_dca_priv *ioatdca;
@@ -645,11 +293,11 @@ struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
 	if (dca_offset == 0)
 		return NULL;
 
-	slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+	slots = ioat_dca_count_dca_slots(iobase, dca_offset);
 	if (slots == 0)
 		return NULL;
 
-	dca = alloc_dca_provider(&ioat3_dca_ops,
+	dca = alloc_dca_provider(&ioat_dca_ops,
 				 sizeof(*ioatdca)
 				      + (sizeof(struct ioat_dca_slot) * slots));
 	if (!dca)
diff --git a/kernel/drivers/dma/ioat/dma.c b/kernel/drivers/dma/ioat/dma.c
index ee0aa9f4c..1d5df2ef1 100644
--- a/kernel/drivers/dma/ioat/dma.c
+++ b/kernel/drivers/dma/ioat/dma.c
@@ -1,6 +1,6 @@
 /*
  * Intel I/OAT DMA Linux driver
- * Copyright(c) 2004 - 2009 Intel Corporation.
+ * Copyright(c) 2004 - 2015 Intel Corporation.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -31,31 +31,23 @@
 #include <linux/dma-mapping.h>
 #include <linux/workqueue.h>
 #include <linux/prefetch.h>
-#include <linux/i7300_idle.h>
 #include "dma.h"
 #include "registers.h"
 #include "hw.h"
 
 #include "../dmaengine.h"
 
-int ioat_pending_level = 4;
-module_param(ioat_pending_level, int, 0644);
-MODULE_PARM_DESC(ioat_pending_level,
-		 "high-water mark for pushing ioat descriptors (default: 4)");
-
-/* internal functions */
-static void ioat1_cleanup(struct ioat_dma_chan *ioat);
-static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
+static void ioat_eh(struct ioatdma_chan *ioat_chan);
 
 /**
  * ioat_dma_do_interrupt - handler used for single vector interrupt mode
  * @irq: interrupt id
  * @data: interrupt data
  */
-static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
 {
 	struct ioatdma_device *instance = data;
-	struct ioat_chan_common *chan;
+	struct ioatdma_chan *ioat_chan;
 	unsigned long attnstatus;
 	int bit;
 	u8 intrctrl;
@@ -72,9 +64,9 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
 
 	attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
 	for_each_set_bit(bit, &attnstatus, BITS_PER_LONG) {
-		chan = ioat_chan_by_index(instance, bit);
-		if (test_bit(IOAT_RUN, &chan->state))
-			tasklet_schedule(&chan->cleanup_task);
+		ioat_chan = ioat_chan_by_index(instance, bit);
+		if (test_bit(IOAT_RUN, &ioat_chan->state))
+			tasklet_schedule(&ioat_chan->cleanup_task);
 	}
 
 	writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
@@ -86,1161 +78,913 @@ static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
  * @irq: interrupt id
  * @data: interrupt data
  */
-static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
 {
-	struct ioat_chan_common *chan = data;
+	struct ioatdma_chan *ioat_chan = data;
 
-	if (test_bit(IOAT_RUN, &chan->state))
-		tasklet_schedule(&chan->cleanup_task);
+	if (test_bit(IOAT_RUN, &ioat_chan->state))
+		tasklet_schedule(&ioat_chan->cleanup_task);
 
 	return IRQ_HANDLED;
 }
 
-/* common channel initialization */
-void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx)
+void ioat_stop(struct ioatdma_chan *ioat_chan)
 {
-	struct dma_device *dma = &device->common;
-	struct dma_chan *c = &chan->common;
-	unsigned long data = (unsigned long) c;
-
-	chan->device = device;
-	chan->reg_base = device->reg_base + (0x80 * (idx + 1));
-	spin_lock_init(&chan->cleanup_lock);
-	chan->common.device = dma;
-	dma_cookie_init(&chan->common);
-	list_add_tail(&chan->common.device_node, &dma->channels);
-	device->idx[idx] = chan;
-	init_timer(&chan->timer);
-	chan->timer.function = device->timer_fn;
-	chan->timer.data = data;
-	tasklet_init(&chan->cleanup_task, device->cleanup_fn, data);
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	int chan_id = chan_num(ioat_chan);
+	struct msix_entry *msix;
+
+	/* 1/ stop irq from firing tasklets
+	 * 2/ stop the tasklet from re-arming irqs
+	 */
+	clear_bit(IOAT_RUN, &ioat_chan->state);
+
+	/* flush inflight interrupts */
+	switch (ioat_dma->irq_mode) {
+	case IOAT_MSIX:
+		msix = &ioat_dma->msix_entries[chan_id];
+		synchronize_irq(msix->vector);
+		break;
+	case IOAT_MSI:
+	case IOAT_INTX:
+		synchronize_irq(pdev->irq);
+		break;
+	default:
+		break;
+	}
+
+	/* flush inflight timers */
+	del_timer_sync(&ioat_chan->timer);
+
+	/* flush inflight tasklet runs */
+	tasklet_kill(&ioat_chan->cleanup_task);
+
+	/* final cleanup now that everything is quiesced and can't re-arm */
+	ioat_cleanup_event((unsigned long)&ioat_chan->dma_chan);
 }
 
-/**
- * ioat1_dma_enumerate_channels - find and initialize the device's channels
- * @device: the device to be enumerated
- */
-static int ioat1_enumerate_channels(struct ioatdma_device *device)
+static void __ioat_issue_pending(struct ioatdma_chan *ioat_chan)
 {
-	u8 xfercap_scale;
-	u32 xfercap;
-	int i;
-	struct ioat_dma_chan *ioat;
-	struct device *dev = &device->pdev->dev;
-	struct dma_device *dma = &device->common;
-
-	INIT_LIST_HEAD(&dma->channels);
-	dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
-	dma->chancnt &= 0x1f; /* bits [4:0] valid */
-	if (dma->chancnt > ARRAY_SIZE(device->idx)) {
-		dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
-			 dma->chancnt, ARRAY_SIZE(device->idx));
-		dma->chancnt = ARRAY_SIZE(device->idx);
-	}
-	xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
-	xfercap_scale &= 0x1f; /* bits [4:0] valid */
-	xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
-	dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
-
-#ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
-	if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
-		dma->chancnt--;
-#endif
-	for (i = 0; i < dma->chancnt; i++) {
-		ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
-		if (!ioat)
-			break;
+	ioat_chan->dmacount += ioat_ring_pending(ioat_chan);
+	ioat_chan->issued = ioat_chan->head;
+	writew(ioat_chan->dmacount,
+	       ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+	dev_dbg(to_dev(ioat_chan),
+		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+		__func__, ioat_chan->head, ioat_chan->tail,
+		ioat_chan->issued, ioat_chan->dmacount);
+}
+
+void ioat_issue_pending(struct dma_chan *c)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
 
-		ioat_init_channel(device, &ioat->base, i);
-		ioat->xfercap = xfercap;
-		spin_lock_init(&ioat->desc_lock);
-		INIT_LIST_HEAD(&ioat->free_desc);
-		INIT_LIST_HEAD(&ioat->used_desc);
+	if (ioat_ring_pending(ioat_chan)) {
+		spin_lock_bh(&ioat_chan->prep_lock);
+		__ioat_issue_pending(ioat_chan);
+		spin_unlock_bh(&ioat_chan->prep_lock);
 	}
-	dma->chancnt = i;
-	return i;
 }
 
 /**
- * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
- *                                 descriptors to hw
- * @chan: DMA channel handle
+ * ioat_update_pending - log pending descriptors
+ * @ioat: ioat+ channel
+ *
+ * Check if the number of unsubmitted descriptors has exceeded the
+ * watermark.  Called with prep_lock held
  */
-static inline void
-__ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
+static void ioat_update_pending(struct ioatdma_chan *ioat_chan)
 {
-	void __iomem *reg_base = ioat->base.reg_base;
-
-	dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
-		__func__, ioat->pending);
-	ioat->pending = 0;
-	writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
+	if (ioat_ring_pending(ioat_chan) > ioat_pending_level)
+		__ioat_issue_pending(ioat_chan);
 }
 
-static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+static void __ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
 {
-	struct ioat_dma_chan *ioat = to_ioat_chan(chan);
+	struct ioat_ring_ent *desc;
+	struct ioat_dma_descriptor *hw;
 
-	if (ioat->pending > 0) {
-		spin_lock_bh(&ioat->desc_lock);
-		__ioat1_dma_memcpy_issue_pending(ioat);
-		spin_unlock_bh(&ioat->desc_lock);
+	if (ioat_ring_space(ioat_chan) < 1) {
+		dev_err(to_dev(ioat_chan),
+			"Unable to start null desc - ring full\n");
+		return;
 	}
+
+	dev_dbg(to_dev(ioat_chan),
+		"%s: head: %#x tail: %#x issued: %#x\n",
+		__func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
+	desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
+
+	hw = desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = 1;
+	hw->ctl_f.compl_write = 1;
+	/* set size to non-zero value (channel returns error when size is 0) */
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	hw->src_addr = 0;
+	hw->dst_addr = 0;
+	async_tx_ack(&desc->txd);
+	ioat_set_chainaddr(ioat_chan, desc->txd.phys);
+	dump_desc_dbg(ioat_chan, desc);
+	/* make sure descriptors are written before we submit */
+	wmb();
+	ioat_chan->head += 1;
+	__ioat_issue_pending(ioat_chan);
 }
 
-/**
- * ioat1_reset_channel - restart a channel
- * @ioat: IOAT DMA channel handle
- */
-static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
+void ioat_start_null_desc(struct ioatdma_chan *ioat_chan)
 {
-	struct ioat_chan_common *chan = &ioat->base;
-	void __iomem *reg_base = chan->reg_base;
-	u32 chansts, chanerr;
-
-	dev_warn(to_dev(chan), "reset\n");
-	chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
-	chansts = *chan->completion & IOAT_CHANSTS_STATUS;
-	if (chanerr) {
-		dev_err(to_dev(chan),
-			"chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
-			chan_num(chan), chansts, chanerr);
-		writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
+	spin_lock_bh(&ioat_chan->prep_lock);
+	if (!test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		__ioat_start_null_desc(ioat_chan);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+}
+
+static void __ioat_restart_chan(struct ioatdma_chan *ioat_chan)
+{
+	/* set the tail to be re-issued */
+	ioat_chan->issued = ioat_chan->tail;
+	ioat_chan->dmacount = 0;
+	mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	dev_dbg(to_dev(ioat_chan),
+		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
+		__func__, ioat_chan->head, ioat_chan->tail,
+		ioat_chan->issued, ioat_chan->dmacount);
+
+	if (ioat_ring_pending(ioat_chan)) {
+		struct ioat_ring_ent *desc;
+
+		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+		ioat_set_chainaddr(ioat_chan, desc->txd.phys);
+		__ioat_issue_pending(ioat_chan);
+	} else
+		__ioat_start_null_desc(ioat_chan);
+}
+
+static int ioat_quiesce(struct ioatdma_chan *ioat_chan, unsigned long tmo)
+{
+	unsigned long end = jiffies + tmo;
+	int err = 0;
+	u32 status;
+
+	status = ioat_chansts(ioat_chan);
+	if (is_ioat_active(status) || is_ioat_idle(status))
+		ioat_suspend(ioat_chan);
+	while (is_ioat_active(status) || is_ioat_idle(status)) {
+		if (tmo && time_after(jiffies, end)) {
+			err = -ETIMEDOUT;
+			break;
+		}
+		status = ioat_chansts(ioat_chan);
+		cpu_relax();
 	}
 
-	/*
-	 * whack it upside the head with a reset
-	 * and wait for things to settle out.
-	 * force the pending count to a really big negative
-	 * to make sure no one forces an issue_pending
-	 * while we're waiting.
-	 */
+	return err;
+}
 
-	ioat->pending = INT_MIN;
-	writeb(IOAT_CHANCMD_RESET,
-	       reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
-	set_bit(IOAT_RESET_PENDING, &chan->state);
-	mod_timer(&chan->timer, jiffies + RESET_DELAY);
+static int ioat_reset_sync(struct ioatdma_chan *ioat_chan, unsigned long tmo)
+{
+	unsigned long end = jiffies + tmo;
+	int err = 0;
+
+	ioat_reset(ioat_chan);
+	while (ioat_reset_pending(ioat_chan)) {
+		if (end && time_after(jiffies, end)) {
+			err = -ETIMEDOUT;
+			break;
+		}
+		cpu_relax();
+	}
+
+	return err;
 }
 
-static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+static dma_cookie_t ioat_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
+	__releases(&ioat_chan->prep_lock)
 {
 	struct dma_chan *c = tx->chan;
-	struct ioat_dma_chan *ioat = to_ioat_chan(c);
-	struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioat_desc_sw *first;
-	struct ioat_desc_sw *chain_tail;
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
 	dma_cookie_t cookie;
 
-	spin_lock_bh(&ioat->desc_lock);
-	/* cookie incr and addition to used_list must be atomic */
 	cookie = dma_cookie_assign(tx);
-	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
+	dev_dbg(to_dev(ioat_chan), "%s: cookie: %d\n", __func__, cookie);
+
+	if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
 
-	/* write address into NextDescriptor field of last desc in chain */
-	first = to_ioat_desc(desc->tx_list.next);
-	chain_tail = to_ioat_desc(ioat->used_desc.prev);
-	/* make descriptor updates globally visible before chaining */
+	/* make descriptor updates visible before advancing ioat->head,
+	 * this is purposefully not smp_wmb() since we are also
+	 * publishing the descriptor updates to a dma device
+	 */
 	wmb();
-	chain_tail->hw->next = first->txd.phys;
-	list_splice_tail_init(&desc->tx_list, &ioat->used_desc);
-	dump_desc_dbg(ioat, chain_tail);
-	dump_desc_dbg(ioat, first);
 
-	if (!test_and_set_bit(IOAT_COMPLETION_PENDING, &chan->state))
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+	ioat_chan->head += ioat_chan->produce;
 
-	ioat->active += desc->hw->tx_cnt;
-	ioat->pending += desc->hw->tx_cnt;
-	if (ioat->pending >= ioat_pending_level)
-		__ioat1_dma_memcpy_issue_pending(ioat);
-	spin_unlock_bh(&ioat->desc_lock);
+	ioat_update_pending(ioat_chan);
+	spin_unlock_bh(&ioat_chan->prep_lock);
 
 	return cookie;
 }
 
-/**
- * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
- * @ioat: the channel supplying the memory pool for the descriptors
- * @flags: allocation flags
- */
-static struct ioat_desc_sw *
-ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
+static struct ioat_ring_ent *
+ioat_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
 {
-	struct ioat_dma_descriptor *desc;
-	struct ioat_desc_sw *desc_sw;
-	struct ioatdma_device *ioatdma_device;
+	struct ioat_dma_descriptor *hw;
+	struct ioat_ring_ent *desc;
+	struct ioatdma_device *ioat_dma;
 	dma_addr_t phys;
 
-	ioatdma_device = ioat->base.device;
-	desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
-	if (unlikely(!desc))
+	ioat_dma = to_ioatdma_device(chan->device);
+	hw = pci_pool_alloc(ioat_dma->dma_pool, flags, &phys);
+	if (!hw)
 		return NULL;
+	memset(hw, 0, sizeof(*hw));
 
-	desc_sw = kzalloc(sizeof(*desc_sw), flags);
-	if (unlikely(!desc_sw)) {
-		pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+	desc = kmem_cache_zalloc(ioat_cache, flags);
+	if (!desc) {
+		pci_pool_free(ioat_dma->dma_pool, hw, phys);
 		return NULL;
 	}
 
-	memset(desc, 0, sizeof(*desc));
+	dma_async_tx_descriptor_init(&desc->txd, chan);
+	desc->txd.tx_submit = ioat_tx_submit_unlock;
+	desc->hw = hw;
+	desc->txd.phys = phys;
+	return desc;
+}
 
-	INIT_LIST_HEAD(&desc_sw->tx_list);
-	dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
-	desc_sw->txd.tx_submit = ioat1_tx_submit;
-	desc_sw->hw = desc;
-	desc_sw->txd.phys = phys;
-	set_desc_id(desc_sw, -1);
+void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
+{
+	struct ioatdma_device *ioat_dma;
 
-	return desc_sw;
+	ioat_dma = to_ioatdma_device(chan->device);
+	pci_pool_free(ioat_dma->dma_pool, desc->hw, desc->txd.phys);
+	kmem_cache_free(ioat_cache, desc);
 }
 
-static int ioat_initial_desc_count = 256;
-module_param(ioat_initial_desc_count, int, 0644);
-MODULE_PARM_DESC(ioat_initial_desc_count,
-		 "ioat1: initial descriptors per channel (default: 256)");
-/**
- * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
- * @chan: the channel to be filled out
- */
-static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
+struct ioat_ring_ent **
+ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
 {
-	struct ioat_dma_chan *ioat = to_ioat_chan(c);
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioat_desc_sw *desc;
-	u32 chanerr;
+	struct ioat_ring_ent **ring;
+	int descs = 1 << order;
 	int i;
-	LIST_HEAD(tmp_list);
-
-	/* have we already been set up? */
-	if (!list_empty(&ioat->free_desc))
-		return ioat->desccount;
 
-	/* Setup register to interrupt and write completion status on error */
-	writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
+	if (order > ioat_get_max_alloc_order())
+		return NULL;
 
-	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-	if (chanerr) {
-		dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
-		writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
+	/* allocate the array to hold the software ring */
+	ring = kcalloc(descs, sizeof(*ring), flags);
+	if (!ring)
+		return NULL;
+	for (i = 0; i < descs; i++) {
+		ring[i] = ioat_alloc_ring_ent(c, flags);
+		if (!ring[i]) {
+			while (i--)
+				ioat_free_ring_ent(ring[i], c);
+			kfree(ring);
+			return NULL;
+		}
+		set_desc_id(ring[i], i);
 	}
 
-	/* Allocate descriptors */
-	for (i = 0; i < ioat_initial_desc_count; i++) {
-		desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
-		if (!desc) {
-			dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
-			break;
-		}
-		set_desc_id(desc, i);
-		list_add_tail(&desc->node, &tmp_list);
+	/* link descs */
+	for (i = 0; i < descs-1; i++) {
+		struct ioat_ring_ent *next = ring[i+1];
+		struct ioat_dma_descriptor *hw = ring[i]->hw;
+
+		hw->next = next->txd.phys;
 	}
-	spin_lock_bh(&ioat->desc_lock);
-	ioat->desccount = i;
-	list_splice(&tmp_list, &ioat->free_desc);
-	spin_unlock_bh(&ioat->desc_lock);
-
-	/* allocate a completion writeback area */
-	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
-	chan->completion = pci_pool_alloc(chan->device->completion_pool,
-					  GFP_KERNEL, &chan->completion_dma);
-	memset(chan->completion, 0, sizeof(*chan->completion));
-	writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
-	       chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
-	writel(((u64) chan->completion_dma) >> 32,
-	       chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
-
-	set_bit(IOAT_RUN, &chan->state);
-	ioat1_dma_start_null_desc(ioat);  /* give chain to dma device */
-	dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
-		__func__, ioat->desccount);
-	return ioat->desccount;
+	ring[i]->hw->next = ring[0]->txd.phys;
+
+	return ring;
 }
 
-void ioat_stop(struct ioat_chan_common *chan)
+static bool reshape_ring(struct ioatdma_chan *ioat_chan, int order)
 {
-	struct ioatdma_device *device = chan->device;
-	struct pci_dev *pdev = device->pdev;
-	int chan_id = chan_num(chan);
-	struct msix_entry *msix;
+	/* reshape differs from normal ring allocation in that we want
+	 * to allocate a new software ring while only
+	 * extending/truncating the hardware ring
+	 */
+	struct dma_chan *c = &ioat_chan->dma_chan;
+	const u32 curr_size = ioat_ring_size(ioat_chan);
+	const u16 active = ioat_ring_active(ioat_chan);
+	const u32 new_size = 1 << order;
+	struct ioat_ring_ent **ring;
+	u32 i;
+
+	if (order > ioat_get_max_alloc_order())
+		return false;
 
-	/* 1/ stop irq from firing tasklets
-	 * 2/ stop the tasklet from re-arming irqs
+	/* double check that we have at least 1 free descriptor */
+	if (active == curr_size)
+		return false;
+
+	/* when shrinking, verify that we can hold the current active
+	 * set in the new ring
 	 */
-	clear_bit(IOAT_RUN, &chan->state);
+	if (active >= new_size)
+		return false;
 
-	/* flush inflight interrupts */
-	switch (device->irq_mode) {
-	case IOAT_MSIX:
-		msix = &device->msix_entries[chan_id];
-		synchronize_irq(msix->vector);
-		break;
-	case IOAT_MSI:
-	case IOAT_INTX:
-		synchronize_irq(pdev->irq);
-		break;
-	default:
-		break;
-	}
+	/* allocate the array to hold the software ring */
+	ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
+	if (!ring)
+		return false;
 
-	/* flush inflight timers */
-	del_timer_sync(&chan->timer);
+	/* allocate/trim descriptors as needed */
+	if (new_size > curr_size) {
+		/* copy current descriptors to the new ring */
+		for (i = 0; i < curr_size; i++) {
+			u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
+			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
 
-	/* flush inflight tasklet runs */
-	tasklet_kill(&chan->cleanup_task);
+			ring[new_idx] = ioat_chan->ring[curr_idx];
+			set_desc_id(ring[new_idx], new_idx);
+		}
 
-	/* final cleanup now that everything is quiesced and can't re-arm */
-	device->cleanup_fn((unsigned long) &chan->common);
-}
+		/* add new descriptors to the ring */
+		for (i = curr_size; i < new_size; i++) {
+			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
 
-/**
- * ioat1_dma_free_chan_resources - release all the descriptors
- * @chan: the channel to be cleaned
- */
-static void ioat1_dma_free_chan_resources(struct dma_chan *c)
-{
-	struct ioat_dma_chan *ioat = to_ioat_chan(c);
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioatdma_device *ioatdma_device = chan->device;
-	struct ioat_desc_sw *desc, *_desc;
-	int in_use_descs = 0;
-
-	/* Before freeing channel resources first check
-	 * if they have been previously allocated for this channel.
-	 */
-	if (ioat->desccount == 0)
-		return;
+			ring[new_idx] = ioat_alloc_ring_ent(c, GFP_NOWAIT);
+			if (!ring[new_idx]) {
+				while (i--) {
+					u16 new_idx = (ioat_chan->tail+i) &
+						       (new_size-1);
+
+					ioat_free_ring_ent(ring[new_idx], c);
+				}
+				kfree(ring);
+				return false;
+			}
+			set_desc_id(ring[new_idx], new_idx);
+		}
 
-	ioat_stop(chan);
+		/* hw link new descriptors */
+		for (i = curr_size-1; i < new_size; i++) {
+			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
+			struct ioat_ring_ent *next =
+				ring[(new_idx+1) & (new_size-1)];
+			struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
 
-	/* Delay 100ms after reset to allow internal DMA logic to quiesce
-	 * before removing DMA descriptor resources.
-	 */
-	writeb(IOAT_CHANCMD_RESET,
-	       chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
-	mdelay(100);
-
-	spin_lock_bh(&ioat->desc_lock);
-	list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
-		dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
-			__func__, desc_id(desc));
-		dump_desc_dbg(ioat, desc);
-		in_use_descs++;
-		list_del(&desc->node);
-		pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-			      desc->txd.phys);
-		kfree(desc);
-	}
-	list_for_each_entry_safe(desc, _desc,
-				 &ioat->free_desc, node) {
-		list_del(&desc->node);
-		pci_pool_free(ioatdma_device->dma_pool, desc->hw,
-			      desc->txd.phys);
-		kfree(desc);
+			hw->next = next->txd.phys;
+		}
+	} else {
+		struct ioat_dma_descriptor *hw;
+		struct ioat_ring_ent *next;
+
+		/* copy current descriptors to the new ring, dropping the
+		 * removed descriptors
+		 */
+		for (i = 0; i < new_size; i++) {
+			u16 curr_idx = (ioat_chan->tail+i) & (curr_size-1);
+			u16 new_idx = (ioat_chan->tail+i) & (new_size-1);
+
+			ring[new_idx] = ioat_chan->ring[curr_idx];
+			set_desc_id(ring[new_idx], new_idx);
+		}
+
+		/* free deleted descriptors */
+		for (i = new_size; i < curr_size; i++) {
+			struct ioat_ring_ent *ent;
+
+			ent = ioat_get_ring_ent(ioat_chan, ioat_chan->tail+i);
+			ioat_free_ring_ent(ent, c);
+		}
+
+		/* fix up hardware ring */
+		hw = ring[(ioat_chan->tail+new_size-1) & (new_size-1)]->hw;
+		next = ring[(ioat_chan->tail+new_size) & (new_size-1)];
+		hw->next = next->txd.phys;
 	}
-	spin_unlock_bh(&ioat->desc_lock);
 
-	pci_pool_free(ioatdma_device->completion_pool,
-		      chan->completion,
-		      chan->completion_dma);
+	dev_dbg(to_dev(ioat_chan), "%s: allocated %d descriptors\n",
+		__func__, new_size);
 
-	/* one is ok since we left it on there on purpose */
-	if (in_use_descs > 1)
-		dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
-			in_use_descs - 1);
+	kfree(ioat_chan->ring);
+	ioat_chan->ring = ring;
+	ioat_chan->alloc_order = order;
 
-	chan->last_completion = 0;
-	chan->completion_dma = 0;
-	ioat->pending = 0;
-	ioat->desccount = 0;
+	return true;
 }
 
 /**
- * ioat1_dma_get_next_descriptor - return the next available descriptor
- * @ioat: IOAT DMA channel handle
- *
- * Gets the next descriptor from the chain, and must be called with the
- * channel's desc_lock held.  Allocates more descriptors if the channel
- * has run out.
+ * ioat_check_space_lock - verify space and grab ring producer lock
+ * @ioat: ioat,3 channel (ring) to operate on
+ * @num_descs: allocation length
  */
-static struct ioat_desc_sw *
-ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
+int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
+	__acquires(&ioat_chan->prep_lock)
 {
-	struct ioat_desc_sw *new;
+	bool retry;
 
-	if (!list_empty(&ioat->free_desc)) {
-		new = to_ioat_desc(ioat->free_desc.next);
-		list_del(&new->node);
-	} else {
-		/* try to get another desc */
-		new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
-		if (!new) {
-			dev_err(to_dev(&ioat->base), "alloc failed\n");
-			return NULL;
-		}
+ retry:
+	spin_lock_bh(&ioat_chan->prep_lock);
+	/* never allow the last descriptor to be consumed, we need at
+	 * least one free at all times to allow for on-the-fly ring
+	 * resizing.
+	 */
+	if (likely(ioat_ring_space(ioat_chan) > num_descs)) {
+		dev_dbg(to_dev(ioat_chan), "%s: num_descs: %d (%x:%x:%x)\n",
+			__func__, num_descs, ioat_chan->head,
+			ioat_chan->tail, ioat_chan->issued);
+		ioat_chan->produce = num_descs;
+		return 0;  /* with ioat->prep_lock held */
+	}
+	retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+
+	/* is another cpu already trying to expand the ring? */
+	if (retry)
+		goto retry;
+
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+	spin_lock_bh(&ioat_chan->prep_lock);
+	retry = reshape_ring(ioat_chan, ioat_chan->alloc_order + 1);
+	clear_bit(IOAT_RESHAPE_PENDING, &ioat_chan->state);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+	/* if we were able to expand the ring retry the allocation */
+	if (retry)
+		goto retry;
+
+	dev_dbg_ratelimited(to_dev(ioat_chan),
+			    "%s: ring full! num_descs: %d (%x:%x:%x)\n",
+			    __func__, num_descs, ioat_chan->head,
+			    ioat_chan->tail, ioat_chan->issued);
+
+	/* progress reclaim in the allocation failure case we may be
+	 * called under bh_disabled so we need to trigger the timer
+	 * event directly
+	 */
+	if (time_is_before_jiffies(ioat_chan->timer.expires)
+	    && timer_pending(&ioat_chan->timer)) {
+		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+		ioat_timer_event((unsigned long)ioat_chan);
 	}
-	dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
-		__func__, desc_id(new));
-	prefetch(new->hw);
-	return new;
+
+	return -ENOMEM;
 }
 
-static struct dma_async_tx_descriptor *
-ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
-		      dma_addr_t dma_src, size_t len, unsigned long flags)
+static bool desc_has_ext(struct ioat_ring_ent *desc)
 {
-	struct ioat_dma_chan *ioat = to_ioat_chan(c);
-	struct ioat_desc_sw *desc;
-	size_t copy;
-	LIST_HEAD(chain);
-	dma_addr_t src = dma_src;
-	dma_addr_t dest = dma_dest;
-	size_t total_len = len;
-	struct ioat_dma_descriptor *hw = NULL;
-	int tx_cnt = 0;
-
-	spin_lock_bh(&ioat->desc_lock);
-	desc = ioat1_dma_get_next_descriptor(ioat);
-	do {
-		if (!desc)
-			break;
-
-		tx_cnt++;
-		copy = min_t(size_t, len, ioat->xfercap);
+	struct ioat_dma_descriptor *hw = desc->hw;
 
-		hw = desc->hw;
-		hw->size = copy;
-		hw->ctl = 0;
-		hw->src_addr = src;
-		hw->dst_addr = dest;
+	if (hw->ctl_f.op == IOAT_OP_XOR ||
+	    hw->ctl_f.op == IOAT_OP_XOR_VAL) {
+		struct ioat_xor_descriptor *xor = desc->xor;
 
-		list_add_tail(&desc->node, &chain);
+		if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
+			return true;
+	} else if (hw->ctl_f.op == IOAT_OP_PQ ||
+		   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
+		struct ioat_pq_descriptor *pq = desc->pq;
 
-		len -= copy;
-		dest += copy;
-		src += copy;
-		if (len) {
-			struct ioat_desc_sw *next;
-
-			async_tx_ack(&desc->txd);
-			next = ioat1_dma_get_next_descriptor(ioat);
-			hw->next = next ? next->txd.phys : 0;
-			dump_desc_dbg(ioat, desc);
-			desc = next;
-		} else
-			hw->next = 0;
-	} while (len);
-
-	if (!desc) {
-		struct ioat_chan_common *chan = &ioat->base;
-
-		dev_err(to_dev(chan),
-			"chan%d - get_next_desc failed\n", chan_num(chan));
-		list_splice(&chain, &ioat->free_desc);
-		spin_unlock_bh(&ioat->desc_lock);
-		return NULL;
+		if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
+			return true;
 	}
-	spin_unlock_bh(&ioat->desc_lock);
 
-	desc->txd.flags = flags;
-	desc->len = total_len;
-	list_splice(&chain, &desc->tx_list);
-	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
-	hw->ctl_f.compl_write = 1;
-	hw->tx_cnt = tx_cnt;
-	dump_desc_dbg(ioat, desc);
-
-	return &desc->txd;
+	return false;
 }
 
-static void ioat1_cleanup_event(unsigned long data)
+static void
+ioat_free_sed(struct ioatdma_device *ioat_dma, struct ioat_sed_ent *sed)
 {
-	struct ioat_dma_chan *ioat = to_ioat_chan((void *) data);
-	struct ioat_chan_common *chan = &ioat->base;
-
-	ioat1_cleanup(ioat);
-	if (!test_bit(IOAT_RUN, &chan->state))
+	if (!sed)
 		return;
-	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
+
+	dma_pool_free(ioat_dma->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
+	kmem_cache_free(ioat_sed_cache, sed);
 }
 
-dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan)
+static u64 ioat_get_current_completion(struct ioatdma_chan *ioat_chan)
 {
-	dma_addr_t phys_complete;
+	u64 phys_complete;
 	u64 completion;
 
-	completion = *chan->completion;
+	completion = *ioat_chan->completion;
 	phys_complete = ioat_chansts_to_addr(completion);
 
-	dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
+	dev_dbg(to_dev(ioat_chan), "%s: phys_complete: %#llx\n", __func__,
 		(unsigned long long) phys_complete);
 
-	if (is_ioat_halted(completion)) {
-		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-		dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
-			chanerr);
-
-		/* TODO do something to salvage the situation */
-	}
-
 	return phys_complete;
 }
 
-bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
-			   dma_addr_t *phys_complete)
+static bool ioat_cleanup_preamble(struct ioatdma_chan *ioat_chan,
+				   u64 *phys_complete)
 {
-	*phys_complete = ioat_get_current_completion(chan);
-	if (*phys_complete == chan->last_completion)
+	*phys_complete = ioat_get_current_completion(ioat_chan);
+	if (*phys_complete == ioat_chan->last_completion)
 		return false;
-	clear_bit(IOAT_COMPLETION_ACK, &chan->state);
-	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+
+	clear_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
+	mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
 
 	return true;
 }
 
-static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete)
+static void
+desc_get_errstat(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc)
 {
-	struct ioat_chan_common *chan = &ioat->base;
-	struct list_head *_desc, *n;
-	struct dma_async_tx_descriptor *tx;
+	struct ioat_dma_descriptor *hw = desc->hw;
 
-	dev_dbg(to_dev(chan), "%s: phys_complete: %llx\n",
-		 __func__, (unsigned long long) phys_complete);
-	list_for_each_safe(_desc, n, &ioat->used_desc) {
-		struct ioat_desc_sw *desc;
+	switch (hw->ctl_f.op) {
+	case IOAT_OP_PQ_VAL:
+	case IOAT_OP_PQ_VAL_16S:
+	{
+		struct ioat_pq_descriptor *pq = desc->pq;
 
-		prefetch(n);
-		desc = list_entry(_desc, typeof(*desc), node);
-		tx = &desc->txd;
-		/*
-		 * Incoming DMA requests may use multiple descriptors,
-		 * due to exceeding xfercap, perhaps. If so, only the
-		 * last one will have a cookie, and require unmapping.
-		 */
-		dump_desc_dbg(ioat, desc);
-		if (tx->cookie) {
-			dma_cookie_complete(tx);
-			dma_descriptor_unmap(tx);
-			ioat->active -= desc->hw->tx_cnt;
-			if (tx->callback) {
-				tx->callback(tx->callback_param);
-				tx->callback = NULL;
-			}
-		}
+		/* check if there's error written */
+		if (!pq->dwbes_f.wbes)
+			return;
 
-		if (tx->phys != phys_complete) {
-			/*
-			 * a completed entry, but not the last, so clean
-			 * up if the client is done with the descriptor
-			 */
-			if (async_tx_test_ack(tx))
-				list_move_tail(&desc->node, &ioat->free_desc);
-		} else {
-			/*
-			 * last used desc. Do not remove, so we can
-			 * append from it.
-			 */
-
-			/* if nothing else is pending, cancel the
-			 * completion timeout
-			 */
-			if (n == &ioat->used_desc) {
-				dev_dbg(to_dev(chan),
-					"%s cancel completion timeout\n",
-					__func__);
-				clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
-			}
+		/* need to set a chanerr var for checking to clear later */
 
-			/* TODO check status bits? */
-			break;
-		}
-	}
+		if (pq->dwbes_f.p_val_err)
+			*desc->result |= SUM_CHECK_P_RESULT;
+
+		if (pq->dwbes_f.q_val_err)
+			*desc->result |= SUM_CHECK_Q_RESULT;
 
-	chan->last_completion = phys_complete;
+		return;
+	}
+	default:
+		return;
+	}
 }
 
 /**
- * ioat1_cleanup - cleanup up finished descriptors
- * @chan: ioat channel to be cleaned up
- *
- * To prevent lock contention we defer cleanup when the locks are
- * contended with a terminal timeout that forces cleanup and catches
- * completion notification errors.
+ * __cleanup - reclaim used descriptors
+ * @ioat: channel (ring) to clean
  */
-static void ioat1_cleanup(struct ioat_dma_chan *ioat)
+static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
 {
-	struct ioat_chan_common *chan = &ioat->base;
-	dma_addr_t phys_complete;
-
-	prefetch(chan->completion);
-
-	if (!spin_trylock_bh(&chan->cleanup_lock))
-		return;
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *desc;
+	bool seen_current = false;
+	int idx = ioat_chan->tail, i;
+	u16 active;
 
-	if (!ioat_cleanup_preamble(chan, &phys_complete)) {
-		spin_unlock_bh(&chan->cleanup_lock);
-		return;
-	}
+	dev_dbg(to_dev(ioat_chan), "%s: head: %#x tail: %#x issued: %#x\n",
+		__func__, ioat_chan->head, ioat_chan->tail, ioat_chan->issued);
 
-	if (!spin_trylock_bh(&ioat->desc_lock)) {
-		spin_unlock_bh(&chan->cleanup_lock);
+	/*
+	 * At restart of the channel, the completion address and the
+	 * channel status will be 0 due to starting a new chain. Since
+	 * it's new chain and the first descriptor "fails", there is
+	 * nothing to clean up. We do not want to reap the entire submitted
+	 * chain due to this 0 address value and then BUG.
+	 */
+	if (!phys_complete)
 		return;
-	}
 
-	__cleanup(ioat, phys_complete);
+	active = ioat_ring_active(ioat_chan);
+	for (i = 0; i < active && !seen_current; i++) {
+		struct dma_async_tx_descriptor *tx;
 
-	spin_unlock_bh(&ioat->desc_lock);
-	spin_unlock_bh(&chan->cleanup_lock);
-}
-
-static void ioat1_timer_event(unsigned long data)
-{
-	struct ioat_dma_chan *ioat = to_ioat_chan((void *) data);
-	struct ioat_chan_common *chan = &ioat->base;
+		smp_read_barrier_depends();
+		prefetch(ioat_get_ring_ent(ioat_chan, idx + i + 1));
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		dump_desc_dbg(ioat_chan, desc);
 
-	dev_dbg(to_dev(chan), "%s: state: %lx\n", __func__, chan->state);
+		/* set err stat if we are using dwbes */
+		if (ioat_dma->cap & IOAT_CAP_DWBES)
+			desc_get_errstat(ioat_chan, desc);
 
-	spin_lock_bh(&chan->cleanup_lock);
-	if (test_and_clear_bit(IOAT_RESET_PENDING, &chan->state)) {
-		struct ioat_desc_sw *desc;
-
-		spin_lock_bh(&ioat->desc_lock);
+		tx = &desc->txd;
+		if (tx->cookie) {
+			dma_cookie_complete(tx);
+			dma_descriptor_unmap(tx);
+			if (tx->callback) {
+				tx->callback(tx->callback_param);
+				tx->callback = NULL;
+			}
+		}
 
-		/* restart active descriptors */
-		desc = to_ioat_desc(ioat->used_desc.prev);
-		ioat_set_chainaddr(ioat, desc->txd.phys);
-		ioat_start(chan);
+		if (tx->phys == phys_complete)
+			seen_current = true;
 
-		ioat->pending = 0;
-		set_bit(IOAT_COMPLETION_PENDING, &chan->state);
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-		spin_unlock_bh(&ioat->desc_lock);
-	} else if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
-		dma_addr_t phys_complete;
+		/* skip extended descriptors */
+		if (desc_has_ext(desc)) {
+			BUG_ON(i + 1 >= active);
+			i++;
+		}
 
-		spin_lock_bh(&ioat->desc_lock);
-		/* if we haven't made progress and we have already
-		 * acknowledged a pending completion once, then be more
-		 * forceful with a restart
-		 */
-		if (ioat_cleanup_preamble(chan, &phys_complete))
-			__cleanup(ioat, phys_complete);
-		else if (test_bit(IOAT_COMPLETION_ACK, &chan->state))
-			ioat1_reset_channel(ioat);
-		else {
-			u64 status = ioat_chansts(chan);
-
-			/* manually update the last completion address */
-			if (ioat_chansts_to_addr(status) != 0)
-				*chan->completion = status;
-
-			set_bit(IOAT_COMPLETION_ACK, &chan->state);
-			mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
+		/* cleanup super extended descriptors */
+		if (desc->sed) {
+			ioat_free_sed(ioat_dma, desc->sed);
+			desc->sed = NULL;
 		}
-		spin_unlock_bh(&ioat->desc_lock);
 	}
-	spin_unlock_bh(&chan->cleanup_lock);
-}
-
-enum dma_status
-ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
-		   struct dma_tx_state *txstate)
-{
-	struct ioat_chan_common *chan = to_chan_common(c);
-	struct ioatdma_device *device = chan->device;
-	enum dma_status ret;
 
-	ret = dma_cookie_status(c, cookie, txstate);
-	if (ret == DMA_COMPLETE)
-		return ret;
+	/* finish all descriptor reads before incrementing tail */
+	smp_mb();
+	ioat_chan->tail = idx + i;
+	/* no active descs have written a completion? */
+	BUG_ON(active && !seen_current);
+	ioat_chan->last_completion = phys_complete;
 
-	device->cleanup_fn((unsigned long) c);
+	if (active - i == 0) {
+		dev_dbg(to_dev(ioat_chan), "%s: cancel completion timeout\n",
+			__func__);
+		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+	}
 
-	return dma_cookie_status(c, cookie, txstate);
+	/* 5 microsecond delay per pending descriptor */
+	writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
+	       ioat_chan->ioat_dma->reg_base + IOAT_INTRDELAY_OFFSET);
 }
 
-static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
+static void ioat_cleanup(struct ioatdma_chan *ioat_chan)
 {
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioat_desc_sw *desc;
-	struct ioat_dma_descriptor *hw;
+	u64 phys_complete;
 
-	spin_lock_bh(&ioat->desc_lock);
+	spin_lock_bh(&ioat_chan->cleanup_lock);
 
-	desc = ioat1_dma_get_next_descriptor(ioat);
+	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+		__cleanup(ioat_chan, phys_complete);
 
-	if (!desc) {
-		dev_err(to_dev(chan),
-			"Unable to start null desc - get next desc failed\n");
-		spin_unlock_bh(&ioat->desc_lock);
-		return;
-	}
+	if (is_ioat_halted(*ioat_chan->completion)) {
+		u32 chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 
-	hw = desc->hw;
-	hw->ctl = 0;
-	hw->ctl_f.null = 1;
-	hw->ctl_f.int_en = 1;
-	hw->ctl_f.compl_write = 1;
-	/* set size to non-zero value (channel returns error when size is 0) */
-	hw->size = NULL_DESC_BUFFER_SIZE;
-	hw->src_addr = 0;
-	hw->dst_addr = 0;
-	async_tx_ack(&desc->txd);
-	hw->next = 0;
-	list_add_tail(&desc->node, &ioat->used_desc);
-	dump_desc_dbg(ioat, desc);
+		if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
+			mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+			ioat_eh(ioat_chan);
+		}
+	}
 
-	ioat_set_chainaddr(ioat, desc->txd.phys);
-	ioat_start(chan);
-	spin_unlock_bh(&ioat->desc_lock);
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
 }
 
-/*
- * Perform a IOAT transaction to verify the HW works.
- */
-#define IOAT_TEST_SIZE 2000
+void ioat_cleanup_event(unsigned long data)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data);
+
+	ioat_cleanup(ioat_chan);
+	if (!test_bit(IOAT_RUN, &ioat_chan->state))
+		return;
+	writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+}
 
-static void ioat_dma_test_callback(void *dma_async_param)
+static void ioat_restart_channel(struct ioatdma_chan *ioat_chan)
 {
-	struct completion *cmp = dma_async_param;
+	u64 phys_complete;
+
+	ioat_quiesce(ioat_chan, 0);
+	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+		__cleanup(ioat_chan, phys_complete);
 
-	complete(cmp);
+	__ioat_restart_chan(ioat_chan);
 }
 
-/**
- * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
- * @device: device to be tested
- */
-int ioat_dma_self_test(struct ioatdma_device *device)
+static void ioat_eh(struct ioatdma_chan *ioat_chan)
 {
-	int i;
-	u8 *src;
-	u8 *dest;
-	struct dma_device *dma = &device->common;
-	struct device *dev = &device->pdev->dev;
-	struct dma_chan *dma_chan;
+	struct pci_dev *pdev = to_pdev(ioat_chan);
+	struct ioat_dma_descriptor *hw;
 	struct dma_async_tx_descriptor *tx;
-	dma_addr_t dma_dest, dma_src;
-	dma_cookie_t cookie;
-	int err = 0;
-	struct completion cmp;
-	unsigned long tmo;
-	unsigned long flags;
-
-	src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
-	if (!src)
-		return -ENOMEM;
-	dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
-	if (!dest) {
-		kfree(src);
-		return -ENOMEM;
-	}
+	u64 phys_complete;
+	struct ioat_ring_ent *desc;
+	u32 err_handled = 0;
+	u32 chanerr_int;
+	u32 chanerr;
 
-	/* Fill in src buffer */
-	for (i = 0; i < IOAT_TEST_SIZE; i++)
-		src[i] = (u8)i;
-
-	/* Start copy, using first DMA channel */
-	dma_chan = container_of(dma->channels.next, struct dma_chan,
-				device_node);
-	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
-		dev_err(dev, "selftest cannot allocate chan resource\n");
-		err = -ENODEV;
-		goto out;
-	}
+	/* cleanup so tail points to descriptor that caused the error */
+	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+		__cleanup(ioat_chan, phys_complete);
 
-	dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
-	if (dma_mapping_error(dev, dma_src)) {
-		dev_err(dev, "mapping src buffer failed\n");
-		goto free_resources;
-	}
-	dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, dma_dest)) {
-		dev_err(dev, "mapping dest buffer failed\n");
-		goto unmap_src;
-	}
-	flags = DMA_PREP_INTERRUPT;
-	tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
-						   IOAT_TEST_SIZE, flags);
-	if (!tx) {
-		dev_err(dev, "Self-test prep failed, disabling\n");
-		err = -ENODEV;
-		goto unmap_dma;
-	}
+	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int);
 
-	async_tx_ack(tx);
-	init_completion(&cmp);
-	tx->callback = ioat_dma_test_callback;
-	tx->callback_param = &cmp;
-	cookie = tx->tx_submit(tx);
-	if (cookie < 0) {
-		dev_err(dev, "Self-test setup failed, disabling\n");
-		err = -ENODEV;
-		goto unmap_dma;
-	}
-	dma->device_issue_pending(dma_chan);
+	dev_dbg(to_dev(ioat_chan), "%s: error = %x:%x\n",
+		__func__, chanerr, chanerr_int);
 
-	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+	desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail);
+	hw = desc->hw;
+	dump_desc_dbg(ioat_chan, desc);
 
-	if (tmo == 0 ||
-	    dma->device_tx_status(dma_chan, cookie, NULL)
-					!= DMA_COMPLETE) {
-		dev_err(dev, "Self-test copy timed out, disabling\n");
-		err = -ENODEV;
-		goto unmap_dma;
-	}
-	if (memcmp(src, dest, IOAT_TEST_SIZE)) {
-		dev_err(dev, "Self-test copy failed compare, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
+	switch (hw->ctl_f.op) {
+	case IOAT_OP_XOR_VAL:
+		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+			*desc->result |= SUM_CHECK_P_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+		}
+		break;
+	case IOAT_OP_PQ_VAL:
+	case IOAT_OP_PQ_VAL_16S:
+		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
+			*desc->result |= SUM_CHECK_P_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
+		}
+		if (chanerr & IOAT_CHANERR_XOR_Q_ERR) {
+			*desc->result |= SUM_CHECK_Q_RESULT;
+			err_handled |= IOAT_CHANERR_XOR_Q_ERR;
+		}
+		break;
 	}
 
-unmap_dma:
-	dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
-unmap_src:
-	dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
-free_resources:
-	dma->device_free_chan_resources(dma_chan);
-out:
-	kfree(src);
-	kfree(dest);
-	return err;
-}
-
-static char ioat_interrupt_style[32] = "msix";
-module_param_string(ioat_interrupt_style, ioat_interrupt_style,
-		    sizeof(ioat_interrupt_style), 0644);
-MODULE_PARM_DESC(ioat_interrupt_style,
-		 "set ioat interrupt style: msix (default), msi, intx");
-
-/**
- * ioat_dma_setup_interrupts - setup interrupt handler
- * @device: ioat device
- */
-int ioat_dma_setup_interrupts(struct ioatdma_device *device)
-{
-	struct ioat_chan_common *chan;
-	struct pci_dev *pdev = device->pdev;
-	struct device *dev = &pdev->dev;
-	struct msix_entry *msix;
-	int i, j, msixcnt;
-	int err = -EINVAL;
-	u8 intrctrl = 0;
-
-	if (!strcmp(ioat_interrupt_style, "msix"))
-		goto msix;
-	if (!strcmp(ioat_interrupt_style, "msi"))
-		goto msi;
-	if (!strcmp(ioat_interrupt_style, "intx"))
-		goto intx;
-	dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
-	goto err_no_irq;
-
-msix:
-	/* The number of MSI-X vectors should equal the number of channels */
-	msixcnt = device->common.chancnt;
-	for (i = 0; i < msixcnt; i++)
-		device->msix_entries[i].entry = i;
-
-	err = pci_enable_msix_exact(pdev, device->msix_entries, msixcnt);
-	if (err)
-		goto msi;
-
-	for (i = 0; i < msixcnt; i++) {
-		msix = &device->msix_entries[i];
-		chan = ioat_chan_by_index(device, i);
-		err = devm_request_irq(dev, msix->vector,
-				       ioat_dma_do_interrupt_msix, 0,
-				       "ioat-msix", chan);
-		if (err) {
-			for (j = 0; j < i; j++) {
-				msix = &device->msix_entries[j];
-				chan = ioat_chan_by_index(device, j);
-				devm_free_irq(dev, msix->vector, chan);
+	/* fault on unhandled error or spurious halt */
+	if (chanerr ^ err_handled || chanerr == 0) {
+		dev_err(to_dev(ioat_chan), "%s: fatal error (%x:%x)\n",
+			__func__, chanerr, err_handled);
+		BUG();
+	} else { /* cleanup the faulty descriptor */
+		tx = &desc->txd;
+		if (tx->cookie) {
+			dma_cookie_complete(tx);
+			dma_descriptor_unmap(tx);
+			if (tx->callback) {
+				tx->callback(tx->callback_param);
+				tx->callback = NULL;
 			}
-			goto msi;
 		}
 	}
-	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
-	device->irq_mode = IOAT_MSIX;
-	goto done;
 
-msi:
-	err = pci_enable_msi(pdev);
-	if (err)
-		goto intx;
+	writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
 
-	err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
-			       "ioat-msi", device);
-	if (err) {
-		pci_disable_msi(pdev);
-		goto intx;
-	}
-	device->irq_mode = IOAT_MSI;
-	goto done;
-
-intx:
-	err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
-			       IRQF_SHARED, "ioat-intx", device);
-	if (err)
-		goto err_no_irq;
-
-	device->irq_mode = IOAT_INTX;
-done:
-	if (device->intr_quirk)
-		device->intr_quirk(device);
-	intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
-	writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
-	return 0;
-
-err_no_irq:
-	/* Disable all interrupt generation */
-	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
-	device->irq_mode = IOAT_NOIRQ;
-	dev_err(dev, "no usable interrupts\n");
-	return err;
-}
-EXPORT_SYMBOL(ioat_dma_setup_interrupts);
+	/* mark faulting descriptor as complete */
+	*ioat_chan->completion = desc->txd.phys;
 
-static void ioat_disable_interrupts(struct ioatdma_device *device)
-{
-	/* Disable all interrupt generation */
-	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+	spin_lock_bh(&ioat_chan->prep_lock);
+	ioat_restart_channel(ioat_chan);
+	spin_unlock_bh(&ioat_chan->prep_lock);
 }
 
-int ioat_probe(struct ioatdma_device *device)
+static void check_active(struct ioatdma_chan *ioat_chan)
 {
-	int err = -ENODEV;
-	struct dma_device *dma = &device->common;
-	struct pci_dev *pdev = device->pdev;
-	struct device *dev = &pdev->dev;
-
-	/* DMA coherent memory pool for DMA descriptor allocations */
-	device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
-					   sizeof(struct ioat_dma_descriptor),
-					   64, 0);
-	if (!device->dma_pool) {
-		err = -ENOMEM;
-		goto err_dma_pool;
-	}
-
-	device->completion_pool = pci_pool_create("completion_pool", pdev,
-						  sizeof(u64), SMP_CACHE_BYTES,
-						  SMP_CACHE_BYTES);
-
-	if (!device->completion_pool) {
-		err = -ENOMEM;
-		goto err_completion_pool;
+	if (ioat_ring_active(ioat_chan)) {
+		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+		return;
 	}
 
-	device->enumerate_channels(device);
-
-	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
-	dma->dev = &pdev->dev;
+	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &ioat_chan->state))
+		mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
+	else if (ioat_chan->alloc_order > ioat_get_alloc_order()) {
+		/* if the ring is idle, empty, and oversized try to step
+		 * down the size
+		 */
+		reshape_ring(ioat_chan, ioat_chan->alloc_order - 1);
 
-	if (!dma->chancnt) {
-		dev_err(dev, "channel enumeration error\n");
-		goto err_setup_interrupts;
+		/* keep shrinking until we get back to our minimum
+		 * default size
+		 */
+		if (ioat_chan->alloc_order > ioat_get_alloc_order())
+			mod_timer(&ioat_chan->timer, jiffies + IDLE_TIMEOUT);
 	}
 
-	err = ioat_dma_setup_interrupts(device);
-	if (err)
-		goto err_setup_interrupts;
+}
 
-	err = device->self_test(device);
-	if (err)
-		goto err_self_test;
+void ioat_timer_event(unsigned long data)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan((void *)data);
+	dma_addr_t phys_complete;
+	u64 status;
 
-	return 0;
+	status = ioat_chansts(ioat_chan);
 
-err_self_test:
-	ioat_disable_interrupts(device);
-err_setup_interrupts:
-	pci_pool_destroy(device->completion_pool);
-err_completion_pool:
-	pci_pool_destroy(device->dma_pool);
-err_dma_pool:
-	return err;
-}
+	/* when halted due to errors check for channel
+	 * programming errors before advancing the completion state
+	 */
+	if (is_ioat_halted(status)) {
+		u32 chanerr;
 
-int ioat_register(struct ioatdma_device *device)
-{
-	int err = dma_async_device_register(&device->common);
+		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+		dev_err(to_dev(ioat_chan), "%s: Channel halted (%x)\n",
+			__func__, chanerr);
+		if (test_bit(IOAT_RUN, &ioat_chan->state))
+			BUG_ON(is_ioat_bug(chanerr));
+		else /* we never got off the ground */
+			return;
+	}
 
-	if (err) {
-		ioat_disable_interrupts(device);
-		pci_pool_destroy(device->completion_pool);
-		pci_pool_destroy(device->dma_pool);
+	/* if we haven't made progress and we have already
+	 * acknowledged a pending completion once, then be more
+	 * forceful with a restart
+	 */
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+	if (ioat_cleanup_preamble(ioat_chan, &phys_complete))
+		__cleanup(ioat_chan, phys_complete);
+	else if (test_bit(IOAT_COMPLETION_ACK, &ioat_chan->state)) {
+		spin_lock_bh(&ioat_chan->prep_lock);
+		ioat_restart_channel(ioat_chan);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+		spin_unlock_bh(&ioat_chan->cleanup_lock);
+		return;
+	} else {
+		set_bit(IOAT_COMPLETION_ACK, &ioat_chan->state);
+		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
 	}
 
-	return err;
-}
 
-/* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
-static void ioat1_intr_quirk(struct ioatdma_device *device)
-{
-	struct pci_dev *pdev = device->pdev;
-	u32 dmactrl;
-
-	pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
-	if (pdev->msi_enabled)
-		dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
-	else
-		dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
-	pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+	if (ioat_ring_active(ioat_chan))
+		mod_timer(&ioat_chan->timer, jiffies + COMPLETION_TIMEOUT);
+	else {
+		spin_lock_bh(&ioat_chan->prep_lock);
+		check_active(ioat_chan);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+	}
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
 }
 
-static ssize_t ring_size_show(struct dma_chan *c, char *page)
+enum dma_status
+ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+		struct dma_tx_state *txstate)
 {
-	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	enum dma_status ret;
 
-	return sprintf(page, "%d\n", ioat->desccount);
-}
-static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+	ret = dma_cookie_status(c, cookie, txstate);
+	if (ret == DMA_COMPLETE)
+		return ret;
 
-static ssize_t ring_active_show(struct dma_chan *c, char *page)
-{
-	struct ioat_dma_chan *ioat = to_ioat_chan(c);
+	ioat_cleanup(ioat_chan);
 
-	return sprintf(page, "%d\n", ioat->active);
+	return dma_cookie_status(c, cookie, txstate);
 }
-static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
 
-static ssize_t cap_show(struct dma_chan *c, char *page)
+static int ioat_irq_reinit(struct ioatdma_device *ioat_dma)
 {
-	struct dma_device *dma = c->device;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	int irq = pdev->irq, i;
 
-	return sprintf(page, "copy%s%s%s%s%s\n",
-		       dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
-		       dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
-		       dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
-		       dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
-		       dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+	if (!is_bwd_ioat(pdev))
+		return 0;
 
-}
-struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
-
-static ssize_t version_show(struct dma_chan *c, char *page)
-{
-	struct dma_device *dma = c->device;
-	struct ioatdma_device *device = to_ioatdma_device(dma);
+	switch (ioat_dma->irq_mode) {
+	case IOAT_MSIX:
+		for (i = 0; i < ioat_dma->dma_dev.chancnt; i++) {
+			struct msix_entry *msix = &ioat_dma->msix_entries[i];
+			struct ioatdma_chan *ioat_chan;
 
-	return sprintf(page, "%d.%d\n",
-		       device->version >> 4, device->version & 0xf);
-}
-struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
-
-static struct attribute *ioat1_attrs[] = {
-	&ring_size_attr.attr,
-	&ring_active_attr.attr,
-	&ioat_cap_attr.attr,
-	&ioat_version_attr.attr,
-	NULL,
-};
-
-static ssize_t
-ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
-{
-	struct ioat_sysfs_entry *entry;
-	struct ioat_chan_common *chan;
+			ioat_chan = ioat_chan_by_index(ioat_dma, i);
+			devm_free_irq(&pdev->dev, msix->vector, ioat_chan);
+		}
 
-	entry = container_of(attr, struct ioat_sysfs_entry, attr);
-	chan = container_of(kobj, struct ioat_chan_common, kobj);
+		pci_disable_msix(pdev);
+		break;
+	case IOAT_MSI:
+		pci_disable_msi(pdev);
+		/* fall through */
+	case IOAT_INTX:
+		devm_free_irq(&pdev->dev, irq, ioat_dma);
+		break;
+	default:
+		return 0;
+	}
+	ioat_dma->irq_mode = IOAT_NOIRQ;
 
-	if (!entry->show)
-		return -EIO;
-	return entry->show(&chan->common, page);
+	return ioat_dma_setup_interrupts(ioat_dma);
 }
 
-const struct sysfs_ops ioat_sysfs_ops = {
-	.show	= ioat_attr_show,
-};
-
-static struct kobj_type ioat1_ktype = {
-	.sysfs_ops = &ioat_sysfs_ops,
-	.default_attrs = ioat1_attrs,
-};
-
-void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type)
+int ioat_reset_hw(struct ioatdma_chan *ioat_chan)
 {
-	struct dma_device *dma = &device->common;
-	struct dma_chan *c;
+	/* throw away whatever the channel was doing and get it
+	 * initialized, with ioat3 specific workarounds
+	 */
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	u32 chanerr;
+	u16 dev_id;
+	int err;
+
+	ioat_quiesce(ioat_chan, msecs_to_jiffies(100));
 
-	list_for_each_entry(c, &dma->channels, device_node) {
-		struct ioat_chan_common *chan = to_chan_common(c);
-		struct kobject *parent = &c->dev->device.kobj;
-		int err;
+	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 
-		err = kobject_init_and_add(&chan->kobj, type, parent, "quickdata");
+	if (ioat_dma->version < IOAT_VER_3_3) {
+		/* clear any pending errors */
+		err = pci_read_config_dword(pdev,
+				IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
 		if (err) {
-			dev_warn(to_dev(chan),
-				 "sysfs init error (%d), continuing...\n", err);
-			kobject_put(&chan->kobj);
-			set_bit(IOAT_KOBJ_INIT_FAIL, &chan->state);
+			dev_err(&pdev->dev,
+				"channel error register unreachable\n");
+			return err;
 		}
-	}
-}
+		pci_write_config_dword(pdev,
+				IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
 
-void ioat_kobject_del(struct ioatdma_device *device)
-{
-	struct dma_device *dma = &device->common;
-	struct dma_chan *c;
-
-	list_for_each_entry(c, &dma->channels, device_node) {
-		struct ioat_chan_common *chan = to_chan_common(c);
-
-		if (!test_bit(IOAT_KOBJ_INIT_FAIL, &chan->state)) {
-			kobject_del(&chan->kobj);
-			kobject_put(&chan->kobj);
+		/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+		 * (workaround for spurious config parity error after restart)
+		 */
+		pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
+		if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
+			pci_write_config_dword(pdev,
+					       IOAT_PCI_DMAUNCERRSTS_OFFSET,
+					       0x10);
 		}
 	}
-}
 
-int ioat1_dma_probe(struct ioatdma_device *device, int dca)
-{
-	struct pci_dev *pdev = device->pdev;
-	struct dma_device *dma;
-	int err;
+	err = ioat_reset_sync(ioat_chan, msecs_to_jiffies(200));
+	if (!err)
+		err = ioat_irq_reinit(ioat_dma);
 
-	device->intr_quirk = ioat1_intr_quirk;
-	device->enumerate_channels = ioat1_enumerate_channels;
-	device->self_test = ioat_dma_self_test;
-	device->timer_fn = ioat1_timer_event;
-	device->cleanup_fn = ioat1_cleanup_event;
-	dma = &device->common;
-	dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
-	dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
-	dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
-	dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
-	dma->device_tx_status = ioat_dma_tx_status;
-
-	err = ioat_probe(device);
-	if (err)
-		return err;
-	err = ioat_register(device);
 	if (err)
-		return err;
-	ioat_kobject_add(device, &ioat1_ktype);
-
-	if (dca)
-		device->dca = ioat_dca_init(pdev, device->reg_base);
+		dev_err(&pdev->dev, "Failed to reset: %d\n", err);
 
 	return err;
 }
-
-void ioat_dma_remove(struct ioatdma_device *device)
-{
-	struct dma_device *dma = &device->common;
-
-	ioat_disable_interrupts(device);
-
-	ioat_kobject_del(device);
-
-	dma_async_device_unregister(dma);
-
-	pci_pool_destroy(device->dma_pool);
-	pci_pool_destroy(device->completion_pool);
-
-	INIT_LIST_HEAD(&dma->channels);
-}
diff --git a/kernel/drivers/dma/ioat/dma.h b/kernel/drivers/dma/ioat/dma.h
index 30f5c7eed..8f4e607d5 100644
--- a/kernel/drivers/dma/ioat/dma.h
+++ b/kernel/drivers/dma/ioat/dma.h
@@ -18,26 +18,32 @@
 #define IOATDMA_H
 
 #include <linux/dmaengine.h>
-#include "hw.h"
-#include "registers.h"
 #include <linux/init.h>
 #include <linux/dmapool.h>
 #include <linux/cache.h>
 #include <linux/pci_ids.h>
-#include <net/tcp.h>
+#include <linux/circ_buf.h>
+#include <linux/interrupt.h>
+#include "registers.h"
+#include "hw.h"
 
 #define IOAT_DMA_VERSION  "4.00"
 
-#define IOAT_LOW_COMPLETION_MASK	0xffffffc0
 #define IOAT_DMA_DCA_ANY_CPU		~0
 
-#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
-#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
-#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, txd)
-#define to_dev(ioat_chan) (&(ioat_chan)->device->pdev->dev)
-#define to_pdev(ioat_chan) ((ioat_chan)->device->pdev)
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, dma_dev)
+#define to_dev(ioat_chan) (&(ioat_chan)->ioat_dma->pdev->dev)
+#define to_pdev(ioat_chan) ((ioat_chan)->ioat_dma->pdev)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->ioat_dma->reg_base) / 0x80)
 
-#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+/* ioat hardware assumes at least two sources for raid operations */
+#define src_cnt_to_sw(x) ((x) + 2)
+#define src_cnt_to_hw(x) ((x) - 2)
+#define ndest_to_sw(x) ((x) + 1)
+#define ndest_to_hw(x) ((x) - 1)
+#define src16_cnt_to_sw(x) ((x) + 9)
+#define src16_cnt_to_hw(x) ((x) - 9)
 
 /*
  * workaround for IOAT ver.3.0 null descriptor issue
@@ -57,19 +63,15 @@ enum ioat_irq_mode {
  * @pdev: PCI-Express device
  * @reg_base: MMIO register space base address
  * @dma_pool: for allocating DMA descriptors
- * @common: embedded struct dma_device
+ * @completion_pool: DMA buffers for completion ops
+ * @sed_hw_pool: DMA super descriptor pools
+ * @dma_dev: embedded struct dma_device
  * @version: version of ioatdma device
  * @msix_entries: irq handlers
  * @idx: per channel data
  * @dca: direct cache access context
- * @intr_quirk: interrupt setup quirk (for ioat_v1 devices)
- * @enumerate_channels: hw version specific channel enumeration
- * @reset_hw: hw version specific channel (re)initialization
- * @cleanup_fn: select between the v2 and v3 cleanup routines
- * @timer_fn: select between the v2 and v3 timer watchdog routines
- * @self_test: hardware version specific self test for each supported op type
- *
- * Note: the v3 cleanup routine supports raid operations
+ * @irq_mode: interrupt mode (INTX, MSI, MSIX)
+ * @cap: read DMA capabilities register
  */
 struct ioatdma_device {
 	struct pci_dev *pdev;
@@ -78,28 +80,23 @@ struct ioatdma_device {
 	struct pci_pool *completion_pool;
 #define MAX_SED_POOLS	5
 	struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
-	struct dma_device common;
+	struct dma_device dma_dev;
 	u8 version;
-	struct msix_entry msix_entries[4];
-	struct ioat_chan_common *idx[4];
+#define IOAT_MAX_CHANS 4
+	struct msix_entry msix_entries[IOAT_MAX_CHANS];
+	struct ioatdma_chan *idx[IOAT_MAX_CHANS];
 	struct dca_provider *dca;
 	enum ioat_irq_mode irq_mode;
 	u32 cap;
-	void (*intr_quirk)(struct ioatdma_device *device);
-	int (*enumerate_channels)(struct ioatdma_device *device);
-	int (*reset_hw)(struct ioat_chan_common *chan);
-	void (*cleanup_fn)(unsigned long data);
-	void (*timer_fn)(unsigned long data);
-	int (*self_test)(struct ioatdma_device *device);
 };
 
-struct ioat_chan_common {
-	struct dma_chan common;
+struct ioatdma_chan {
+	struct dma_chan dma_chan;
 	void __iomem *reg_base;
 	dma_addr_t last_completion;
 	spinlock_t cleanup_lock;
 	unsigned long state;
-	#define IOAT_COMPLETION_PENDING 0
+	#define IOAT_CHAN_DOWN 0
 	#define IOAT_COMPLETION_ACK 1
 	#define IOAT_RESET_PENDING 2
 	#define IOAT_KOBJ_INIT_FAIL 3
@@ -110,11 +107,32 @@ struct ioat_chan_common {
 	#define COMPLETION_TIMEOUT msecs_to_jiffies(100)
 	#define IDLE_TIMEOUT msecs_to_jiffies(2000)
 	#define RESET_DELAY msecs_to_jiffies(100)
-	struct ioatdma_device *device;
+	struct ioatdma_device *ioat_dma;
 	dma_addr_t completion_dma;
 	u64 *completion;
 	struct tasklet_struct cleanup_task;
 	struct kobject kobj;
+
+/* ioat v2 / v3 channel attributes
+ * @xfercap_log; log2 of channel max transfer length (for fast division)
+ * @head: allocated index
+ * @issued: hardware notification point
+ * @tail: cleanup index
+ * @dmacount: identical to 'head' except for occasionally resetting to zero
+ * @alloc_order: log2 of the number of allocated descriptors
+ * @produce: number of descriptors to produce at submit time
+ * @ring: software ring buffer implementation of hardware ring
+ * @prep_lock: serializes descriptor preparation (producers)
+ */
+	size_t xfercap_log;
+	u16 head;
+	u16 issued;
+	u16 tail;
+	u16 dmacount;
+	u16 alloc_order;
+	u16 produce;
+	struct ioat_ring_ent **ring;
+	spinlock_t prep_lock;
 };
 
 struct ioat_sysfs_entry {
@@ -123,28 +141,11 @@ struct ioat_sysfs_entry {
 };
 
 /**
- * struct ioat_dma_chan - internal representation of a DMA channel
- */
-struct ioat_dma_chan {
-	struct ioat_chan_common base;
-
-	size_t xfercap;	/* XFERCAP register value expanded out */
-
-	spinlock_t desc_lock;
-	struct list_head free_desc;
-	struct list_head used_desc;
-
-	int pending;
-	u16 desccount;
-	u16 active;
-};
-
-/**
  * struct ioat_sed_ent - wrapper around super extended hardware descriptor
  * @hw: hardware SED
- * @sed_dma: dma address for the SED
- * @list: list member
+ * @dma: dma address for the SED
  * @parent: point to the dma descriptor that's the parent
+ * @hw_pool: descriptor pool index
  */
 struct ioat_sed_ent {
 	struct ioat_sed_raw_descriptor *hw;
@@ -153,39 +154,57 @@ struct ioat_sed_ent {
 	unsigned int hw_pool;
 };
 
-static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
-{
-	return container_of(c, struct ioat_chan_common, common);
-}
-
-static inline struct ioat_dma_chan *to_ioat_chan(struct dma_chan *c)
-{
-	struct ioat_chan_common *chan = to_chan_common(c);
-
-	return container_of(chan, struct ioat_dma_chan, base);
-}
-
-/* wrapper around hardware descriptor format + additional software fields */
-
 /**
- * struct ioat_desc_sw - wrapper around hardware descriptor
+ * struct ioat_ring_ent - wrapper around hardware descriptor
  * @hw: hardware DMA descriptor (for memcpy)
- * @node: this descriptor will either be on the free list,
- *     or attached to a transaction list (tx_list)
+ * @xor: hardware xor descriptor
+ * @xor_ex: hardware xor extension descriptor
+ * @pq: hardware pq descriptor
+ * @pq_ex: hardware pq extension descriptor
+ * @pqu: hardware pq update descriptor
+ * @raw: hardware raw (un-typed) descriptor
  * @txd: the generic software descriptor for all engines
+ * @len: total transaction length for unmap
+ * @result: asynchronous result of validate operations
  * @id: identifier for debug
+ * @sed: pointer to super extended descriptor sw desc
  */
-struct ioat_desc_sw {
-	struct ioat_dma_descriptor *hw;
-	struct list_head node;
+
+struct ioat_ring_ent {
+	union {
+		struct ioat_dma_descriptor *hw;
+		struct ioat_xor_descriptor *xor;
+		struct ioat_xor_ext_descriptor *xor_ex;
+		struct ioat_pq_descriptor *pq;
+		struct ioat_pq_ext_descriptor *pq_ex;
+		struct ioat_pq_update_descriptor *pqu;
+		struct ioat_raw_descriptor *raw;
+	};
 	size_t len;
-	struct list_head tx_list;
 	struct dma_async_tx_descriptor txd;
+	enum sum_check_flags *result;
 	#ifdef DEBUG
 	int id;
 	#endif
+	struct ioat_sed_ent *sed;
 };
 
+extern const struct sysfs_ops ioat_sysfs_ops;
+extern struct ioat_sysfs_entry ioat_version_attr;
+extern struct ioat_sysfs_entry ioat_cap_attr;
+extern int ioat_pending_level;
+extern int ioat_ring_alloc_order;
+extern struct kobj_type ioat_ktype;
+extern struct kmem_cache *ioat_cache;
+extern int ioat_ring_max_alloc_order;
+extern struct kmem_cache *ioat_sed_cache;
+
+static inline struct ioatdma_chan *to_ioat_chan(struct dma_chan *c)
+{
+	return container_of(c, struct ioatdma_chan, dma_chan);
+}
+
+/* wrapper around hardware descriptor format + additional software fields */
 #ifdef DEBUG
 #define set_desc_id(desc, i) ((desc)->id = (i))
 #define desc_id(desc) ((desc)->id)
@@ -195,10 +214,10 @@ struct ioat_desc_sw {
 #endif
 
 static inline void
-__dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
+__dump_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_dma_descriptor *hw,
 		struct dma_async_tx_descriptor *tx, int id)
 {
-	struct device *dev = to_dev(chan);
+	struct device *dev = to_dev(ioat_chan);
 
 	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) cookie: %d flags: %#x"
 		" ctl: %#10.8x (op: %#x int_en: %d compl: %d)\n", id,
@@ -208,25 +227,25 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
 }
 
 #define dump_desc_dbg(c, d) \
-	({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
+	({ if (d) __dump_desc_dbg(c, d->hw, &d->txd, desc_id(d)); 0; })
 
-static inline struct ioat_chan_common *
-ioat_chan_by_index(struct ioatdma_device *device, int index)
+static inline struct ioatdma_chan *
+ioat_chan_by_index(struct ioatdma_device *ioat_dma, int index)
 {
-	return device->idx[index];
+	return ioat_dma->idx[index];
 }
 
-static inline u64 ioat_chansts_32(struct ioat_chan_common *chan)
+static inline u64 ioat_chansts_32(struct ioatdma_chan *ioat_chan)
 {
-	u8 ver = chan->device->version;
+	u8 ver = ioat_chan->ioat_dma->version;
 	u64 status;
 	u32 status_lo;
 
 	/* We need to read the low address first as this causes the
 	 * chipset to latch the upper bits for the subsequent read
 	 */
-	status_lo = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
-	status = readl(chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
+	status_lo = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_LOW(ver));
+	status = readl(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET_HIGH(ver));
 	status <<= 32;
 	status |= status_lo;
 
@@ -235,16 +254,16 @@ static inline u64 ioat_chansts_32(struct ioat_chan_common *chan)
 
 #if BITS_PER_LONG == 64
 
-static inline u64 ioat_chansts(struct ioat_chan_common *chan)
+static inline u64 ioat_chansts(struct ioatdma_chan *ioat_chan)
 {
-	u8 ver = chan->device->version;
+	u8 ver = ioat_chan->ioat_dma->version;
 	u64 status;
 
 	 /* With IOAT v3.3 the status register is 64bit.  */
 	if (ver >= IOAT_VER_3_3)
-		status = readq(chan->reg_base + IOAT_CHANSTS_OFFSET(ver));
+		status = readq(ioat_chan->reg_base + IOAT_CHANSTS_OFFSET(ver));
 	else
-		status = ioat_chansts_32(chan);
+		status = ioat_chansts_32(ioat_chan);
 
 	return status;
 }
@@ -253,56 +272,41 @@ static inline u64 ioat_chansts(struct ioat_chan_common *chan)
 #define ioat_chansts ioat_chansts_32
 #endif
 
-static inline void ioat_start(struct ioat_chan_common *chan)
-{
-	u8 ver = chan->device->version;
-
-	writeb(IOAT_CHANCMD_START, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
-}
-
 static inline u64 ioat_chansts_to_addr(u64 status)
 {
 	return status & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
 }
 
-static inline u32 ioat_chanerr(struct ioat_chan_common *chan)
+static inline u32 ioat_chanerr(struct ioatdma_chan *ioat_chan)
 {
-	return readl(chan->reg_base + IOAT_CHANERR_OFFSET);
+	return readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
 }
 
-static inline void ioat_suspend(struct ioat_chan_common *chan)
+static inline void ioat_suspend(struct ioatdma_chan *ioat_chan)
 {
-	u8 ver = chan->device->version;
+	u8 ver = ioat_chan->ioat_dma->version;
 
-	writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+	writeb(IOAT_CHANCMD_SUSPEND,
+	       ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
 }
 
-static inline void ioat_reset(struct ioat_chan_common *chan)
+static inline void ioat_reset(struct ioatdma_chan *ioat_chan)
 {
-	u8 ver = chan->device->version;
+	u8 ver = ioat_chan->ioat_dma->version;
 
-	writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+	writeb(IOAT_CHANCMD_RESET,
+	       ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
 }
 
-static inline bool ioat_reset_pending(struct ioat_chan_common *chan)
+static inline bool ioat_reset_pending(struct ioatdma_chan *ioat_chan)
 {
-	u8 ver = chan->device->version;
+	u8 ver = ioat_chan->ioat_dma->version;
 	u8 cmd;
 
-	cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
+	cmd = readb(ioat_chan->reg_base + IOAT_CHANCMD_OFFSET(ver));
 	return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET;
 }
 
-static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-
-	writel(addr & 0x00000000FFFFFFFF,
-	       chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
-	writel(addr >> 32,
-	       chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
-}
-
 static inline bool is_ioat_active(unsigned long status)
 {
 	return ((status & IOAT_CHANSTS_STATUS) == IOAT_CHANSTS_ACTIVE);
@@ -329,24 +333,111 @@ static inline bool is_ioat_bug(unsigned long err)
 	return !!err;
 }
 
-int ioat_probe(struct ioatdma_device *device);
-int ioat_register(struct ioatdma_device *device);
-int ioat1_dma_probe(struct ioatdma_device *dev, int dca);
-int ioat_dma_self_test(struct ioatdma_device *device);
-void ioat_dma_remove(struct ioatdma_device *device);
+#define IOAT_MAX_ORDER 16
+#define ioat_get_alloc_order() \
+	(min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
+#define ioat_get_max_alloc_order() \
+	(min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
+
+static inline u32 ioat_ring_size(struct ioatdma_chan *ioat_chan)
+{
+	return 1 << ioat_chan->alloc_order;
+}
+
+/* count of descriptors in flight with the engine */
+static inline u16 ioat_ring_active(struct ioatdma_chan *ioat_chan)
+{
+	return CIRC_CNT(ioat_chan->head, ioat_chan->tail,
+			ioat_ring_size(ioat_chan));
+}
+
+/* count of descriptors pending submission to hardware */
+static inline u16 ioat_ring_pending(struct ioatdma_chan *ioat_chan)
+{
+	return CIRC_CNT(ioat_chan->head, ioat_chan->issued,
+			ioat_ring_size(ioat_chan));
+}
+
+static inline u32 ioat_ring_space(struct ioatdma_chan *ioat_chan)
+{
+	return ioat_ring_size(ioat_chan) - ioat_ring_active(ioat_chan);
+}
+
+static inline u16
+ioat_xferlen_to_descs(struct ioatdma_chan *ioat_chan, size_t len)
+{
+	u16 num_descs = len >> ioat_chan->xfercap_log;
+
+	num_descs += !!(len & ((1 << ioat_chan->xfercap_log) - 1));
+	return num_descs;
+}
+
+static inline struct ioat_ring_ent *
+ioat_get_ring_ent(struct ioatdma_chan *ioat_chan, u16 idx)
+{
+	return ioat_chan->ring[idx & (ioat_ring_size(ioat_chan) - 1)];
+}
+
+static inline void
+ioat_set_chainaddr(struct ioatdma_chan *ioat_chan, u64 addr)
+{
+	writel(addr & 0x00000000FFFFFFFF,
+	       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+	writel(addr >> 32,
+	       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+}
+
+/* IOAT Prep functions */
+struct dma_async_tx_descriptor *
+ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+			   dma_addr_t dma_src, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+	       unsigned int src_cnt, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+		    unsigned int src_cnt, size_t len,
+		    enum sum_check_flags *result, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+	      unsigned int src_cnt, const unsigned char *scf, size_t len,
+	      unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		  unsigned int src_cnt, const unsigned char *scf, size_t len,
+		  enum sum_check_flags *pqres, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+		 unsigned int src_cnt, size_t len, unsigned long flags);
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+		     unsigned int src_cnt, size_t len,
+		     enum sum_check_flags *result, unsigned long flags);
+
+/* IOAT Operation functions */
+irqreturn_t ioat_dma_do_interrupt(int irq, void *data);
+irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data);
+struct ioat_ring_ent **
+ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags);
+void ioat_start_null_desc(struct ioatdma_chan *ioat_chan);
+void ioat_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan);
+int ioat_reset_hw(struct ioatdma_chan *ioat_chan);
+enum dma_status
+ioat_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+		struct dma_tx_state *txstate);
+void ioat_cleanup_event(unsigned long data);
+void ioat_timer_event(unsigned long data);
+int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs);
+void ioat_issue_pending(struct dma_chan *chan);
+void ioat_timer_event(unsigned long data);
+
+/* IOAT Init functions */
+bool is_bwd_ioat(struct pci_dev *pdev);
 struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan);
-void ioat_init_channel(struct ioatdma_device *device,
-		       struct ioat_chan_common *chan, int idx);
-enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie,
-				   struct dma_tx_state *txstate);
-bool ioat_cleanup_preamble(struct ioat_chan_common *chan,
-			   dma_addr_t *phys_complete);
-void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type);
-void ioat_kobject_del(struct ioatdma_device *device);
-int ioat_dma_setup_interrupts(struct ioatdma_device *device);
-void ioat_stop(struct ioat_chan_common *chan);
-extern const struct sysfs_ops ioat_sysfs_ops;
-extern struct ioat_sysfs_entry ioat_version_attr;
-extern struct ioat_sysfs_entry ioat_cap_attr;
+void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type);
+void ioat_kobject_del(struct ioatdma_device *ioat_dma);
+int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma);
+void ioat_stop(struct ioatdma_chan *ioat_chan);
 #endif /* IOATDMA_H */
diff --git a/kernel/drivers/dma/ioat/dma_v2.c b/kernel/drivers/dma/ioat/dma_v2.c
deleted file mode 100644
index 69c7dfcad..000000000
--- a/kernel/drivers/dma/ioat/dma_v2.c
+++ /dev/null
@@ -1,916 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2004 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine (versions >= 2), which
- * does asynchronous data movement and checksumming operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dmaengine.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/workqueue.h>
-#include <linux/prefetch.h>
-#include <linux/i7300_idle.h>
-#include "dma.h"
-#include "dma_v2.h"
-#include "registers.h"
-#include "hw.h"
-
-#include "../dmaengine.h"
-
-int ioat_ring_alloc_order = 8;
-module_param(ioat_ring_alloc_order, int, 0644);
-MODULE_PARM_DESC(ioat_ring_alloc_order,
-		 "ioat2+: allocate 2^n descriptors per channel"
-		 " (default: 8 max: 16)");
-static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
-module_param(ioat_ring_max_alloc_order, int, 0644);
-MODULE_PARM_DESC(ioat_ring_max_alloc_order,
-		 "ioat2+: upper limit for ring size (default: 16)");
-
-void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-
-	ioat->dmacount += ioat2_ring_pending(ioat);
-	ioat->issued = ioat->head;
-	writew(ioat->dmacount, chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
-	dev_dbg(to_dev(chan),
-		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
-		__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
-}
-
-void ioat2_issue_pending(struct dma_chan *c)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-
-	if (ioat2_ring_pending(ioat)) {
-		spin_lock_bh(&ioat->prep_lock);
-		__ioat2_issue_pending(ioat);
-		spin_unlock_bh(&ioat->prep_lock);
-	}
-}
-
-/**
- * ioat2_update_pending - log pending descriptors
- * @ioat: ioat2+ channel
- *
- * Check if the number of unsubmitted descriptors has exceeded the
- * watermark.  Called with prep_lock held
- */
-static void ioat2_update_pending(struct ioat2_dma_chan *ioat)
-{
-	if (ioat2_ring_pending(ioat) > ioat_pending_level)
-		__ioat2_issue_pending(ioat);
-}
-
-static void __ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_ring_ent *desc;
-	struct ioat_dma_descriptor *hw;
-
-	if (ioat2_ring_space(ioat) < 1) {
-		dev_err(to_dev(&ioat->base),
-			"Unable to start null desc - ring full\n");
-		return;
-	}
-
-	dev_dbg(to_dev(&ioat->base), "%s: head: %#x tail: %#x issued: %#x\n",
-		__func__, ioat->head, ioat->tail, ioat->issued);
-	desc = ioat2_get_ring_ent(ioat, ioat->head);
-
-	hw = desc->hw;
-	hw->ctl = 0;
-	hw->ctl_f.null = 1;
-	hw->ctl_f.int_en = 1;
-	hw->ctl_f.compl_write = 1;
-	/* set size to non-zero value (channel returns error when size is 0) */
-	hw->size = NULL_DESC_BUFFER_SIZE;
-	hw->src_addr = 0;
-	hw->dst_addr = 0;
-	async_tx_ack(&desc->txd);
-	ioat2_set_chainaddr(ioat, desc->txd.phys);
-	dump_desc_dbg(ioat, desc);
-	wmb();
-	ioat->head += 1;
-	__ioat2_issue_pending(ioat);
-}
-
-static void ioat2_start_null_desc(struct ioat2_dma_chan *ioat)
-{
-	spin_lock_bh(&ioat->prep_lock);
-	__ioat2_start_null_desc(ioat);
-	spin_unlock_bh(&ioat->prep_lock);
-}
-
-static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	struct dma_async_tx_descriptor *tx;
-	struct ioat_ring_ent *desc;
-	bool seen_current = false;
-	u16 active;
-	int idx = ioat->tail, i;
-
-	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
-		__func__, ioat->head, ioat->tail, ioat->issued);
-
-	active = ioat2_ring_active(ioat);
-	for (i = 0; i < active && !seen_current; i++) {
-		smp_read_barrier_depends();
-		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
-		desc = ioat2_get_ring_ent(ioat, idx + i);
-		tx = &desc->txd;
-		dump_desc_dbg(ioat, desc);
-		if (tx->cookie) {
-			dma_descriptor_unmap(tx);
-			dma_cookie_complete(tx);
-			if (tx->callback) {
-				tx->callback(tx->callback_param);
-				tx->callback = NULL;
-			}
-		}
-
-		if (tx->phys == phys_complete)
-			seen_current = true;
-	}
-	smp_mb(); /* finish all descriptor reads before incrementing tail */
-	ioat->tail = idx + i;
-	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
-
-	chan->last_completion = phys_complete;
-	if (active - i == 0) {
-		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
-			__func__);
-		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
-		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
-	}
-}
-
-/**
- * ioat2_cleanup - clean finished descriptors (advance tail pointer)
- * @chan: ioat channel to be cleaned up
- */
-static void ioat2_cleanup(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	dma_addr_t phys_complete;
-
-	spin_lock_bh(&chan->cleanup_lock);
-	if (ioat_cleanup_preamble(chan, &phys_complete))
-		__cleanup(ioat, phys_complete);
-	spin_unlock_bh(&chan->cleanup_lock);
-}
-
-void ioat2_cleanup_event(unsigned long data)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
-	struct ioat_chan_common *chan = &ioat->base;
-
-	ioat2_cleanup(ioat);
-	if (!test_bit(IOAT_RUN, &chan->state))
-		return;
-	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
-}
-
-void __ioat2_restart_chan(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-
-	/* set the tail to be re-issued */
-	ioat->issued = ioat->tail;
-	ioat->dmacount = 0;
-	set_bit(IOAT_COMPLETION_PENDING, &chan->state);
-	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-
-	dev_dbg(to_dev(chan),
-		"%s: head: %#x tail: %#x issued: %#x count: %#x\n",
-		__func__, ioat->head, ioat->tail, ioat->issued, ioat->dmacount);
-
-	if (ioat2_ring_pending(ioat)) {
-		struct ioat_ring_ent *desc;
-
-		desc = ioat2_get_ring_ent(ioat, ioat->tail);
-		ioat2_set_chainaddr(ioat, desc->txd.phys);
-		__ioat2_issue_pending(ioat);
-	} else
-		__ioat2_start_null_desc(ioat);
-}
-
-int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo)
-{
-	unsigned long end = jiffies + tmo;
-	int err = 0;
-	u32 status;
-
-	status = ioat_chansts(chan);
-	if (is_ioat_active(status) || is_ioat_idle(status))
-		ioat_suspend(chan);
-	while (is_ioat_active(status) || is_ioat_idle(status)) {
-		if (tmo && time_after(jiffies, end)) {
-			err = -ETIMEDOUT;
-			break;
-		}
-		status = ioat_chansts(chan);
-		cpu_relax();
-	}
-
-	return err;
-}
-
-int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo)
-{
-	unsigned long end = jiffies + tmo;
-	int err = 0;
-
-	ioat_reset(chan);
-	while (ioat_reset_pending(chan)) {
-		if (end && time_after(jiffies, end)) {
-			err = -ETIMEDOUT;
-			break;
-		}
-		cpu_relax();
-	}
-
-	return err;
-}
-
-static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	dma_addr_t phys_complete;
-
-	ioat2_quiesce(chan, 0);
-	if (ioat_cleanup_preamble(chan, &phys_complete))
-		__cleanup(ioat, phys_complete);
-
-	__ioat2_restart_chan(ioat);
-}
-
-static void check_active(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-
-	if (ioat2_ring_active(ioat)) {
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-		return;
-	}
-
-	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state))
-		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
-	else if (ioat->alloc_order > ioat_get_alloc_order()) {
-		/* if the ring is idle, empty, and oversized try to step
-		 * down the size
-		 */
-		reshape_ring(ioat, ioat->alloc_order - 1);
-
-		/* keep shrinking until we get back to our minimum
-		 * default size
-		 */
-		if (ioat->alloc_order > ioat_get_alloc_order())
-			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
-	}
-
-}
-
-void ioat2_timer_event(unsigned long data)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
-	struct ioat_chan_common *chan = &ioat->base;
-	dma_addr_t phys_complete;
-	u64 status;
-
-	status = ioat_chansts(chan);
-
-	/* when halted due to errors check for channel
-	 * programming errors before advancing the completion state
-	 */
-	if (is_ioat_halted(status)) {
-		u32 chanerr;
-
-		chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-		dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
-			__func__, chanerr);
-		if (test_bit(IOAT_RUN, &chan->state))
-			BUG_ON(is_ioat_bug(chanerr));
-		else /* we never got off the ground */
-			return;
-	}
-
-	/* if we haven't made progress and we have already
-	 * acknowledged a pending completion once, then be more
-	 * forceful with a restart
-	 */
-	spin_lock_bh(&chan->cleanup_lock);
-	if (ioat_cleanup_preamble(chan, &phys_complete))
-		__cleanup(ioat, phys_complete);
-	else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
-		spin_lock_bh(&ioat->prep_lock);
-		ioat2_restart_channel(ioat);
-		spin_unlock_bh(&ioat->prep_lock);
-		spin_unlock_bh(&chan->cleanup_lock);
-		return;
-	} else {
-		set_bit(IOAT_COMPLETION_ACK, &chan->state);
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-	}
-
-
-	if (ioat2_ring_active(ioat))
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-	else {
-		spin_lock_bh(&ioat->prep_lock);
-		check_active(ioat);
-		spin_unlock_bh(&ioat->prep_lock);
-	}
-	spin_unlock_bh(&chan->cleanup_lock);
-}
-
-static int ioat2_reset_hw(struct ioat_chan_common *chan)
-{
-	/* throw away whatever the channel was doing and get it initialized */
-	u32 chanerr;
-
-	ioat2_quiesce(chan, msecs_to_jiffies(100));
-
-	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
-
-	return ioat2_reset_sync(chan, msecs_to_jiffies(200));
-}
-
-/**
- * ioat2_enumerate_channels - find and initialize the device's channels
- * @device: the device to be enumerated
- */
-int ioat2_enumerate_channels(struct ioatdma_device *device)
-{
-	struct ioat2_dma_chan *ioat;
-	struct device *dev = &device->pdev->dev;
-	struct dma_device *dma = &device->common;
-	u8 xfercap_log;
-	int i;
-
-	INIT_LIST_HEAD(&dma->channels);
-	dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
-	dma->chancnt &= 0x1f; /* bits [4:0] valid */
-	if (dma->chancnt > ARRAY_SIZE(device->idx)) {
-		dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
-			 dma->chancnt, ARRAY_SIZE(device->idx));
-		dma->chancnt = ARRAY_SIZE(device->idx);
-	}
-	xfercap_log = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
-	xfercap_log &= 0x1f; /* bits [4:0] valid */
-	if (xfercap_log == 0)
-		return 0;
-	dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
-
-	/* FIXME which i/oat version is i7300? */
-#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
-	if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
-		dma->chancnt--;
-#endif
-	for (i = 0; i < dma->chancnt; i++) {
-		ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
-		if (!ioat)
-			break;
-
-		ioat_init_channel(device, &ioat->base, i);
-		ioat->xfercap_log = xfercap_log;
-		spin_lock_init(&ioat->prep_lock);
-		if (device->reset_hw(&ioat->base)) {
-			i = 0;
-			break;
-		}
-	}
-	dma->chancnt = i;
-	return i;
-}
-
-static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
-{
-	struct dma_chan *c = tx->chan;
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioat_chan_common *chan = &ioat->base;
-	dma_cookie_t cookie;
-
-	cookie = dma_cookie_assign(tx);
-	dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
-
-	if (!test_and_set_bit(IOAT_CHAN_ACTIVE, &chan->state))
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-
-	/* make descriptor updates visible before advancing ioat->head,
-	 * this is purposefully not smp_wmb() since we are also
-	 * publishing the descriptor updates to a dma device
-	 */
-	wmb();
-
-	ioat->head += ioat->produce;
-
-	ioat2_update_pending(ioat);
-	spin_unlock_bh(&ioat->prep_lock);
-
-	return cookie;
-}
-
-static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
-{
-	struct ioat_dma_descriptor *hw;
-	struct ioat_ring_ent *desc;
-	struct ioatdma_device *dma;
-	dma_addr_t phys;
-
-	dma = to_ioatdma_device(chan->device);
-	hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
-	if (!hw)
-		return NULL;
-	memset(hw, 0, sizeof(*hw));
-
-	desc = kmem_cache_zalloc(ioat2_cache, flags);
-	if (!desc) {
-		pci_pool_free(dma->dma_pool, hw, phys);
-		return NULL;
-	}
-
-	dma_async_tx_descriptor_init(&desc->txd, chan);
-	desc->txd.tx_submit = ioat2_tx_submit_unlock;
-	desc->hw = hw;
-	desc->txd.phys = phys;
-	return desc;
-}
-
-static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *chan)
-{
-	struct ioatdma_device *dma;
-
-	dma = to_ioatdma_device(chan->device);
-	pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
-	kmem_cache_free(ioat2_cache, desc);
-}
-
-static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
-{
-	struct ioat_ring_ent **ring;
-	int descs = 1 << order;
-	int i;
-
-	if (order > ioat_get_max_alloc_order())
-		return NULL;
-
-	/* allocate the array to hold the software ring */
-	ring = kcalloc(descs, sizeof(*ring), flags);
-	if (!ring)
-		return NULL;
-	for (i = 0; i < descs; i++) {
-		ring[i] = ioat2_alloc_ring_ent(c, flags);
-		if (!ring[i]) {
-			while (i--)
-				ioat2_free_ring_ent(ring[i], c);
-			kfree(ring);
-			return NULL;
-		}
-		set_desc_id(ring[i], i);
-	}
-
-	/* link descs */
-	for (i = 0; i < descs-1; i++) {
-		struct ioat_ring_ent *next = ring[i+1];
-		struct ioat_dma_descriptor *hw = ring[i]->hw;
-
-		hw->next = next->txd.phys;
-	}
-	ring[i]->hw->next = ring[0]->txd.phys;
-
-	return ring;
-}
-
-void ioat2_free_chan_resources(struct dma_chan *c);
-
-/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
- * @chan: channel to be initialized
- */
-int ioat2_alloc_chan_resources(struct dma_chan *c)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioat_ring_ent **ring;
-	u64 status;
-	int order;
-	int i = 0;
-
-	/* have we already been set up? */
-	if (ioat->ring)
-		return 1 << ioat->alloc_order;
-
-	/* Setup register to interrupt and write completion status on error */
-	writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET);
-
-	/* allocate a completion writeback area */
-	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
-	chan->completion = pci_pool_alloc(chan->device->completion_pool,
-					  GFP_KERNEL, &chan->completion_dma);
-	if (!chan->completion)
-		return -ENOMEM;
-
-	memset(chan->completion, 0, sizeof(*chan->completion));
-	writel(((u64) chan->completion_dma) & 0x00000000FFFFFFFF,
-	       chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
-	writel(((u64) chan->completion_dma) >> 32,
-	       chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
-
-	order = ioat_get_alloc_order();
-	ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
-	if (!ring)
-		return -ENOMEM;
-
-	spin_lock_bh(&chan->cleanup_lock);
-	spin_lock_bh(&ioat->prep_lock);
-	ioat->ring = ring;
-	ioat->head = 0;
-	ioat->issued = 0;
-	ioat->tail = 0;
-	ioat->alloc_order = order;
-	set_bit(IOAT_RUN, &chan->state);
-	spin_unlock_bh(&ioat->prep_lock);
-	spin_unlock_bh(&chan->cleanup_lock);
-
-	ioat2_start_null_desc(ioat);
-
-	/* check that we got off the ground */
-	do {
-		udelay(1);
-		status = ioat_chansts(chan);
-	} while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
-
-	if (is_ioat_active(status) || is_ioat_idle(status)) {
-		return 1 << ioat->alloc_order;
-	} else {
-		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-
-		dev_WARN(to_dev(chan),
-			"failed to start channel chanerr: %#x\n", chanerr);
-		ioat2_free_chan_resources(c);
-		return -EFAULT;
-	}
-}
-
-bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
-{
-	/* reshape differs from normal ring allocation in that we want
-	 * to allocate a new software ring while only
-	 * extending/truncating the hardware ring
-	 */
-	struct ioat_chan_common *chan = &ioat->base;
-	struct dma_chan *c = &chan->common;
-	const u32 curr_size = ioat2_ring_size(ioat);
-	const u16 active = ioat2_ring_active(ioat);
-	const u32 new_size = 1 << order;
-	struct ioat_ring_ent **ring;
-	u16 i;
-
-	if (order > ioat_get_max_alloc_order())
-		return false;
-
-	/* double check that we have at least 1 free descriptor */
-	if (active == curr_size)
-		return false;
-
-	/* when shrinking, verify that we can hold the current active
-	 * set in the new ring
-	 */
-	if (active >= new_size)
-		return false;
-
-	/* allocate the array to hold the software ring */
-	ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
-	if (!ring)
-		return false;
-
-	/* allocate/trim descriptors as needed */
-	if (new_size > curr_size) {
-		/* copy current descriptors to the new ring */
-		for (i = 0; i < curr_size; i++) {
-			u16 curr_idx = (ioat->tail+i) & (curr_size-1);
-			u16 new_idx = (ioat->tail+i) & (new_size-1);
-
-			ring[new_idx] = ioat->ring[curr_idx];
-			set_desc_id(ring[new_idx], new_idx);
-		}
-
-		/* add new descriptors to the ring */
-		for (i = curr_size; i < new_size; i++) {
-			u16 new_idx = (ioat->tail+i) & (new_size-1);
-
-			ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
-			if (!ring[new_idx]) {
-				while (i--) {
-					u16 new_idx = (ioat->tail+i) & (new_size-1);
-
-					ioat2_free_ring_ent(ring[new_idx], c);
-				}
-				kfree(ring);
-				return false;
-			}
-			set_desc_id(ring[new_idx], new_idx);
-		}
-
-		/* hw link new descriptors */
-		for (i = curr_size-1; i < new_size; i++) {
-			u16 new_idx = (ioat->tail+i) & (new_size-1);
-			struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
-			struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
-
-			hw->next = next->txd.phys;
-		}
-	} else {
-		struct ioat_dma_descriptor *hw;
-		struct ioat_ring_ent *next;
-
-		/* copy current descriptors to the new ring, dropping the
-		 * removed descriptors
-		 */
-		for (i = 0; i < new_size; i++) {
-			u16 curr_idx = (ioat->tail+i) & (curr_size-1);
-			u16 new_idx = (ioat->tail+i) & (new_size-1);
-
-			ring[new_idx] = ioat->ring[curr_idx];
-			set_desc_id(ring[new_idx], new_idx);
-		}
-
-		/* free deleted descriptors */
-		for (i = new_size; i < curr_size; i++) {
-			struct ioat_ring_ent *ent;
-
-			ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
-			ioat2_free_ring_ent(ent, c);
-		}
-
-		/* fix up hardware ring */
-		hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
-		next = ring[(ioat->tail+new_size) & (new_size-1)];
-		hw->next = next->txd.phys;
-	}
-
-	dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
-		__func__, new_size);
-
-	kfree(ioat->ring);
-	ioat->ring = ring;
-	ioat->alloc_order = order;
-
-	return true;
-}
-
-/**
- * ioat2_check_space_lock - verify space and grab ring producer lock
- * @ioat: ioat2,3 channel (ring) to operate on
- * @num_descs: allocation length
- */
-int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	bool retry;
-
- retry:
-	spin_lock_bh(&ioat->prep_lock);
-	/* never allow the last descriptor to be consumed, we need at
-	 * least one free at all times to allow for on-the-fly ring
-	 * resizing.
-	 */
-	if (likely(ioat2_ring_space(ioat) > num_descs)) {
-		dev_dbg(to_dev(chan), "%s: num_descs: %d (%x:%x:%x)\n",
-			__func__, num_descs, ioat->head, ioat->tail, ioat->issued);
-		ioat->produce = num_descs;
-		return 0;  /* with ioat->prep_lock held */
-	}
-	retry = test_and_set_bit(IOAT_RESHAPE_PENDING, &chan->state);
-	spin_unlock_bh(&ioat->prep_lock);
-
-	/* is another cpu already trying to expand the ring? */
-	if (retry)
-		goto retry;
-
-	spin_lock_bh(&chan->cleanup_lock);
-	spin_lock_bh(&ioat->prep_lock);
-	retry = reshape_ring(ioat, ioat->alloc_order + 1);
-	clear_bit(IOAT_RESHAPE_PENDING, &chan->state);
-	spin_unlock_bh(&ioat->prep_lock);
-	spin_unlock_bh(&chan->cleanup_lock);
-
-	/* if we were able to expand the ring retry the allocation */
-	if (retry)
-		goto retry;
-
-	if (printk_ratelimit())
-		dev_dbg(to_dev(chan), "%s: ring full! num_descs: %d (%x:%x:%x)\n",
-			__func__, num_descs, ioat->head, ioat->tail, ioat->issued);
-
-	/* progress reclaim in the allocation failure case we may be
-	 * called under bh_disabled so we need to trigger the timer
-	 * event directly
-	 */
-	if (time_is_before_jiffies(chan->timer.expires)
-	    && timer_pending(&chan->timer)) {
-		struct ioatdma_device *device = chan->device;
-
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-		device->timer_fn((unsigned long) &chan->common);
-	}
-
-	return -ENOMEM;
-}
-
-struct dma_async_tx_descriptor *
-ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
-			   dma_addr_t dma_src, size_t len, unsigned long flags)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioat_dma_descriptor *hw;
-	struct ioat_ring_ent *desc;
-	dma_addr_t dst = dma_dest;
-	dma_addr_t src = dma_src;
-	size_t total_len = len;
-	int num_descs, idx, i;
-
-	num_descs = ioat2_xferlen_to_descs(ioat, len);
-	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
-		idx = ioat->head;
-	else
-		return NULL;
-	i = 0;
-	do {
-		size_t copy = min_t(size_t, len, 1 << ioat->xfercap_log);
-
-		desc = ioat2_get_ring_ent(ioat, idx + i);
-		hw = desc->hw;
-
-		hw->size = copy;
-		hw->ctl = 0;
-		hw->src_addr = src;
-		hw->dst_addr = dst;
-
-		len -= copy;
-		dst += copy;
-		src += copy;
-		dump_desc_dbg(ioat, desc);
-	} while (++i < num_descs);
-
-	desc->txd.flags = flags;
-	desc->len = total_len;
-	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
-	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
-	hw->ctl_f.compl_write = 1;
-	dump_desc_dbg(ioat, desc);
-	/* we leave the channel locked to ensure in order submission */
-
-	return &desc->txd;
-}
-
-/**
- * ioat2_free_chan_resources - release all the descriptors
- * @chan: the channel to be cleaned
- */
-void ioat2_free_chan_resources(struct dma_chan *c)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioatdma_device *device = chan->device;
-	struct ioat_ring_ent *desc;
-	const u16 total_descs = 1 << ioat->alloc_order;
-	int descs;
-	int i;
-
-	/* Before freeing channel resources first check
-	 * if they have been previously allocated for this channel.
-	 */
-	if (!ioat->ring)
-		return;
-
-	ioat_stop(chan);
-	device->reset_hw(chan);
-
-	spin_lock_bh(&chan->cleanup_lock);
-	spin_lock_bh(&ioat->prep_lock);
-	descs = ioat2_ring_space(ioat);
-	dev_dbg(to_dev(chan), "freeing %d idle descriptors\n", descs);
-	for (i = 0; i < descs; i++) {
-		desc = ioat2_get_ring_ent(ioat, ioat->head + i);
-		ioat2_free_ring_ent(desc, c);
-	}
-
-	if (descs < total_descs)
-		dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
-			total_descs - descs);
-
-	for (i = 0; i < total_descs - descs; i++) {
-		desc = ioat2_get_ring_ent(ioat, ioat->tail + i);
-		dump_desc_dbg(ioat, desc);
-		ioat2_free_ring_ent(desc, c);
-	}
-
-	kfree(ioat->ring);
-	ioat->ring = NULL;
-	ioat->alloc_order = 0;
-	pci_pool_free(device->completion_pool, chan->completion,
-		      chan->completion_dma);
-	spin_unlock_bh(&ioat->prep_lock);
-	spin_unlock_bh(&chan->cleanup_lock);
-
-	chan->last_completion = 0;
-	chan->completion_dma = 0;
-	ioat->dmacount = 0;
-}
-
-static ssize_t ring_size_show(struct dma_chan *c, char *page)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-
-	return sprintf(page, "%d\n", (1 << ioat->alloc_order) & ~1);
-}
-static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
-
-static ssize_t ring_active_show(struct dma_chan *c, char *page)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-
-	/* ...taken outside the lock, no need to be precise */
-	return sprintf(page, "%d\n", ioat2_ring_active(ioat));
-}
-static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
-
-static struct attribute *ioat2_attrs[] = {
-	&ring_size_attr.attr,
-	&ring_active_attr.attr,
-	&ioat_cap_attr.attr,
-	&ioat_version_attr.attr,
-	NULL,
-};
-
-struct kobj_type ioat2_ktype = {
-	.sysfs_ops = &ioat_sysfs_ops,
-	.default_attrs = ioat2_attrs,
-};
-
-int ioat2_dma_probe(struct ioatdma_device *device, int dca)
-{
-	struct pci_dev *pdev = device->pdev;
-	struct dma_device *dma;
-	struct dma_chan *c;
-	struct ioat_chan_common *chan;
-	int err;
-
-	device->enumerate_channels = ioat2_enumerate_channels;
-	device->reset_hw = ioat2_reset_hw;
-	device->cleanup_fn = ioat2_cleanup_event;
-	device->timer_fn = ioat2_timer_event;
-	device->self_test = ioat_dma_self_test;
-	dma = &device->common;
-	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
-	dma->device_issue_pending = ioat2_issue_pending;
-	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
-	dma->device_free_chan_resources = ioat2_free_chan_resources;
-	dma->device_tx_status = ioat_dma_tx_status;
-
-	err = ioat_probe(device);
-	if (err)
-		return err;
-
-	list_for_each_entry(c, &dma->channels, device_node) {
-		chan = to_chan_common(c);
-		writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU,
-		       chan->reg_base + IOAT_DCACTRL_OFFSET);
-	}
-
-	err = ioat_register(device);
-	if (err)
-		return err;
-
-	ioat_kobject_add(device, &ioat2_ktype);
-
-	if (dca)
-		device->dca = ioat2_dca_init(pdev, device->reg_base);
-
-	return err;
-}
diff --git a/kernel/drivers/dma/ioat/dma_v2.h b/kernel/drivers/dma/ioat/dma_v2.h
deleted file mode 100644
index bf24ebe87..000000000
--- a/kernel/drivers/dma/ioat/dma_v2.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef IOATDMA_V2_H
-#define IOATDMA_V2_H
-
-#include <linux/dmaengine.h>
-#include <linux/circ_buf.h>
-#include "dma.h"
-#include "hw.h"
-
-
-extern int ioat_pending_level;
-extern int ioat_ring_alloc_order;
-
-/*
- * workaround for IOAT ver.3.0 null descriptor issue
- * (channel returns error when size is 0)
- */
-#define NULL_DESC_BUFFER_SIZE 1
-
-#define IOAT_MAX_ORDER 16
-#define ioat_get_alloc_order() \
-	(min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
-#define ioat_get_max_alloc_order() \
-	(min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
-
-/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
- * @base: common ioat channel parameters
- * @xfercap_log; log2 of channel max transfer length (for fast division)
- * @head: allocated index
- * @issued: hardware notification point
- * @tail: cleanup index
- * @dmacount: identical to 'head' except for occasionally resetting to zero
- * @alloc_order: log2 of the number of allocated descriptors
- * @produce: number of descriptors to produce at submit time
- * @ring: software ring buffer implementation of hardware ring
- * @prep_lock: serializes descriptor preparation (producers)
- */
-struct ioat2_dma_chan {
-	struct ioat_chan_common base;
-	size_t xfercap_log;
-	u16 head;
-	u16 issued;
-	u16 tail;
-	u16 dmacount;
-	u16 alloc_order;
-	u16 produce;
-	struct ioat_ring_ent **ring;
-	spinlock_t prep_lock;
-};
-
-static inline struct ioat2_dma_chan *to_ioat2_chan(struct dma_chan *c)
-{
-	struct ioat_chan_common *chan = to_chan_common(c);
-
-	return container_of(chan, struct ioat2_dma_chan, base);
-}
-
-static inline u32 ioat2_ring_size(struct ioat2_dma_chan *ioat)
-{
-	return 1 << ioat->alloc_order;
-}
-
-/* count of descriptors in flight with the engine */
-static inline u16 ioat2_ring_active(struct ioat2_dma_chan *ioat)
-{
-	return CIRC_CNT(ioat->head, ioat->tail, ioat2_ring_size(ioat));
-}
-
-/* count of descriptors pending submission to hardware */
-static inline u16 ioat2_ring_pending(struct ioat2_dma_chan *ioat)
-{
-	return CIRC_CNT(ioat->head, ioat->issued, ioat2_ring_size(ioat));
-}
-
-static inline u32 ioat2_ring_space(struct ioat2_dma_chan *ioat)
-{
-	return ioat2_ring_size(ioat) - ioat2_ring_active(ioat);
-}
-
-static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len)
-{
-	u16 num_descs = len >> ioat->xfercap_log;
-
-	num_descs += !!(len & ((1 << ioat->xfercap_log) - 1));
-	return num_descs;
-}
-
-/**
- * struct ioat_ring_ent - wrapper around hardware descriptor
- * @hw: hardware DMA descriptor (for memcpy)
- * @fill: hardware fill descriptor
- * @xor: hardware xor descriptor
- * @xor_ex: hardware xor extension descriptor
- * @pq: hardware pq descriptor
- * @pq_ex: hardware pq extension descriptor
- * @pqu: hardware pq update descriptor
- * @raw: hardware raw (un-typed) descriptor
- * @txd: the generic software descriptor for all engines
- * @len: total transaction length for unmap
- * @result: asynchronous result of validate operations
- * @id: identifier for debug
- */
-
-struct ioat_ring_ent {
-	union {
-		struct ioat_dma_descriptor *hw;
-		struct ioat_xor_descriptor *xor;
-		struct ioat_xor_ext_descriptor *xor_ex;
-		struct ioat_pq_descriptor *pq;
-		struct ioat_pq_ext_descriptor *pq_ex;
-		struct ioat_pq_update_descriptor *pqu;
-		struct ioat_raw_descriptor *raw;
-	};
-	size_t len;
-	struct dma_async_tx_descriptor txd;
-	enum sum_check_flags *result;
-	#ifdef DEBUG
-	int id;
-	#endif
-	struct ioat_sed_ent *sed;
-};
-
-static inline struct ioat_ring_ent *
-ioat2_get_ring_ent(struct ioat2_dma_chan *ioat, u16 idx)
-{
-	return ioat->ring[idx & (ioat2_ring_size(ioat) - 1)];
-}
-
-static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-
-	writel(addr & 0x00000000FFFFFFFF,
-	       chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
-	writel(addr >> 32,
-	       chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
-}
-
-int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
-int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
-struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
-int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
-int ioat2_enumerate_channels(struct ioatdma_device *device);
-struct dma_async_tx_descriptor *
-ioat2_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
-			   dma_addr_t dma_src, size_t len, unsigned long flags);
-void ioat2_issue_pending(struct dma_chan *chan);
-int ioat2_alloc_chan_resources(struct dma_chan *c);
-void ioat2_free_chan_resources(struct dma_chan *c);
-void __ioat2_restart_chan(struct ioat2_dma_chan *ioat);
-bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
-void __ioat2_issue_pending(struct ioat2_dma_chan *ioat);
-void ioat2_cleanup_event(unsigned long data);
-void ioat2_timer_event(unsigned long data);
-int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo);
-int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo);
-extern struct kobj_type ioat2_ktype;
-extern struct kmem_cache *ioat2_cache;
-#endif /* IOATDMA_V2_H */
diff --git a/kernel/drivers/dma/ioat/dma_v3.c b/kernel/drivers/dma/ioat/dma_v3.c
deleted file mode 100644
index 64790a45e..000000000
--- a/kernel/drivers/dma/ioat/dma_v3.c
+++ /dev/null
@@ -1,1717 +0,0 @@
-/*
- * This file is provided under a dual BSD/GPLv2 license.  When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- * BSD LICENSE
- *
- * Copyright(c) 2004-2009 Intel Corporation. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
- * Support routines for v3+ hardware
- */
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/gfp.h>
-#include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
-#include <linux/prefetch.h>
-#include "../dmaengine.h"
-#include "registers.h"
-#include "hw.h"
-#include "dma.h"
-#include "dma_v2.h"
-
-extern struct kmem_cache *ioat3_sed_cache;
-
-/* ioat hardware assumes at least two sources for raid operations */
-#define src_cnt_to_sw(x) ((x) + 2)
-#define src_cnt_to_hw(x) ((x) - 2)
-#define ndest_to_sw(x) ((x) + 1)
-#define ndest_to_hw(x) ((x) - 1)
-#define src16_cnt_to_sw(x) ((x) + 9)
-#define src16_cnt_to_hw(x) ((x) - 9)
-
-/* provide a lookup table for setting the source address in the base or
- * extended descriptor of an xor or pq descriptor
- */
-static const u8 xor_idx_to_desc = 0xe0;
-static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
-static const u8 pq_idx_to_desc = 0xf8;
-static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
-				       2, 2, 2, 2, 2, 2, 2 };
-static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
-static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
-					0, 1, 2, 3, 4, 5, 6 };
-
-static void ioat3_eh(struct ioat2_dma_chan *ioat);
-
-static void xor_set_src(struct ioat_raw_descriptor *descs[2],
-			dma_addr_t addr, u32 offset, int idx)
-{
-	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
-
-	raw->field[xor_idx_to_field[idx]] = addr + offset;
-}
-
-static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
-{
-	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
-
-	return raw->field[pq_idx_to_field[idx]];
-}
-
-static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
-{
-	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
-
-	return raw->field[pq16_idx_to_field[idx]];
-}
-
-static void pq_set_src(struct ioat_raw_descriptor *descs[2],
-		       dma_addr_t addr, u32 offset, u8 coef, int idx)
-{
-	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
-	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
-
-	raw->field[pq_idx_to_field[idx]] = addr + offset;
-	pq->coef[idx] = coef;
-}
-
-static bool is_jf_ioat(struct pci_dev *pdev)
-{
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
-	case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool is_snb_ioat(struct pci_dev *pdev)
-{
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
-	case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool is_ivb_ioat(struct pci_dev *pdev)
-{
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
-	case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
-		return true;
-	default:
-		return false;
-	}
-
-}
-
-static bool is_hsw_ioat(struct pci_dev *pdev)
-{
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW0:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW1:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW2:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW3:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW4:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW5:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW6:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW7:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW8:
-	case PCI_DEVICE_ID_INTEL_IOAT_HSW9:
-		return true;
-	default:
-		return false;
-	}
-
-}
-
-static bool is_xeon_cb32(struct pci_dev *pdev)
-{
-	return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
-		is_hsw_ioat(pdev);
-}
-
-static bool is_bwd_ioat(struct pci_dev *pdev)
-{
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_BWD0:
-	case PCI_DEVICE_ID_INTEL_IOAT_BWD1:
-	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
-	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
-	/* even though not Atom, BDX-DE has same DMA silicon */
-	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
-	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
-	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
-	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static bool is_bwd_noraid(struct pci_dev *pdev)
-{
-	switch (pdev->device) {
-	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
-	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
-	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
-	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
-	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
-	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
-		return true;
-	default:
-		return false;
-	}
-
-}
-
-static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
-			dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
-{
-	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
-	struct ioat_pq16a_descriptor *pq16 =
-		(struct ioat_pq16a_descriptor *)desc[1];
-	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
-
-	raw->field[pq16_idx_to_field[idx]] = addr + offset;
-
-	if (idx < 8)
-		pq->coef[idx] = coef;
-	else
-		pq16->coef[idx - 8] = coef;
-}
-
-static struct ioat_sed_ent *
-ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
-{
-	struct ioat_sed_ent *sed;
-	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
-
-	sed = kmem_cache_alloc(ioat3_sed_cache, flags);
-	if (!sed)
-		return NULL;
-
-	sed->hw_pool = hw_pool;
-	sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
-				 flags, &sed->dma);
-	if (!sed->hw) {
-		kmem_cache_free(ioat3_sed_cache, sed);
-		return NULL;
-	}
-
-	return sed;
-}
-
-static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed)
-{
-	if (!sed)
-		return;
-
-	dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
-	kmem_cache_free(ioat3_sed_cache, sed);
-}
-
-static bool desc_has_ext(struct ioat_ring_ent *desc)
-{
-	struct ioat_dma_descriptor *hw = desc->hw;
-
-	if (hw->ctl_f.op == IOAT_OP_XOR ||
-	    hw->ctl_f.op == IOAT_OP_XOR_VAL) {
-		struct ioat_xor_descriptor *xor = desc->xor;
-
-		if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5)
-			return true;
-	} else if (hw->ctl_f.op == IOAT_OP_PQ ||
-		   hw->ctl_f.op == IOAT_OP_PQ_VAL) {
-		struct ioat_pq_descriptor *pq = desc->pq;
-
-		if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3)
-			return true;
-	}
-
-	return false;
-}
-
-static u64 ioat3_get_current_completion(struct ioat_chan_common *chan)
-{
-	u64 phys_complete;
-	u64 completion;
-
-	completion = *chan->completion;
-	phys_complete = ioat_chansts_to_addr(completion);
-
-	dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
-		(unsigned long long) phys_complete);
-
-	return phys_complete;
-}
-
-static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan,
-				   u64 *phys_complete)
-{
-	*phys_complete = ioat3_get_current_completion(chan);
-	if (*phys_complete == chan->last_completion)
-		return false;
-
-	clear_bit(IOAT_COMPLETION_ACK, &chan->state);
-	mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-
-	return true;
-}
-
-static void
-desc_get_errstat(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc)
-{
-	struct ioat_dma_descriptor *hw = desc->hw;
-
-	switch (hw->ctl_f.op) {
-	case IOAT_OP_PQ_VAL:
-	case IOAT_OP_PQ_VAL_16S:
-	{
-		struct ioat_pq_descriptor *pq = desc->pq;
-
-		/* check if there's error written */
-		if (!pq->dwbes_f.wbes)
-			return;
-
-		/* need to set a chanerr var for checking to clear later */
-
-		if (pq->dwbes_f.p_val_err)
-			*desc->result |= SUM_CHECK_P_RESULT;
-
-		if (pq->dwbes_f.q_val_err)
-			*desc->result |= SUM_CHECK_Q_RESULT;
-
-		return;
-	}
-	default:
-		return;
-	}
-}
-
-/**
- * __cleanup - reclaim used descriptors
- * @ioat: channel (ring) to clean
- *
- * The difference from the dma_v2.c __cleanup() is that this routine
- * handles extended descriptors and dma-unmapping raid operations.
- */
-static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioatdma_device *device = chan->device;
-	struct ioat_ring_ent *desc;
-	bool seen_current = false;
-	int idx = ioat->tail, i;
-	u16 active;
-
-	dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
-		__func__, ioat->head, ioat->tail, ioat->issued);
-
-	/*
-	 * At restart of the channel, the completion address and the
-	 * channel status will be 0 due to starting a new chain. Since
-	 * it's new chain and the first descriptor "fails", there is
-	 * nothing to clean up. We do not want to reap the entire submitted
-	 * chain due to this 0 address value and then BUG.
-	 */
-	if (!phys_complete)
-		return;
-
-	active = ioat2_ring_active(ioat);
-	for (i = 0; i < active && !seen_current; i++) {
-		struct dma_async_tx_descriptor *tx;
-
-		smp_read_barrier_depends();
-		prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
-		desc = ioat2_get_ring_ent(ioat, idx + i);
-		dump_desc_dbg(ioat, desc);
-
-		/* set err stat if we are using dwbes */
-		if (device->cap & IOAT_CAP_DWBES)
-			desc_get_errstat(ioat, desc);
-
-		tx = &desc->txd;
-		if (tx->cookie) {
-			dma_cookie_complete(tx);
-			dma_descriptor_unmap(tx);
-			if (tx->callback) {
-				tx->callback(tx->callback_param);
-				tx->callback = NULL;
-			}
-		}
-
-		if (tx->phys == phys_complete)
-			seen_current = true;
-
-		/* skip extended descriptors */
-		if (desc_has_ext(desc)) {
-			BUG_ON(i + 1 >= active);
-			i++;
-		}
-
-		/* cleanup super extended descriptors */
-		if (desc->sed) {
-			ioat3_free_sed(device, desc->sed);
-			desc->sed = NULL;
-		}
-	}
-	smp_mb(); /* finish all descriptor reads before incrementing tail */
-	ioat->tail = idx + i;
-	BUG_ON(active && !seen_current); /* no active descs have written a completion? */
-	chan->last_completion = phys_complete;
-
-	if (active - i == 0) {
-		dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
-			__func__);
-		clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
-		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
-	}
-	/* 5 microsecond delay per pending descriptor */
-	writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
-	       chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
-}
-
-static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	u64 phys_complete;
-
-	spin_lock_bh(&chan->cleanup_lock);
-
-	if (ioat3_cleanup_preamble(chan, &phys_complete))
-		__cleanup(ioat, phys_complete);
-
-	if (is_ioat_halted(*chan->completion)) {
-		u32 chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-
-		if (chanerr & IOAT_CHANERR_HANDLE_MASK) {
-			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
-			ioat3_eh(ioat);
-		}
-	}
-
-	spin_unlock_bh(&chan->cleanup_lock);
-}
-
-static void ioat3_cleanup_event(unsigned long data)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
-	struct ioat_chan_common *chan = &ioat->base;
-
-	ioat3_cleanup(ioat);
-	if (!test_bit(IOAT_RUN, &chan->state))
-		return;
-	writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
-}
-
-static void ioat3_restart_channel(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	u64 phys_complete;
-
-	ioat2_quiesce(chan, 0);
-	if (ioat3_cleanup_preamble(chan, &phys_complete))
-		__cleanup(ioat, phys_complete);
-
-	__ioat2_restart_chan(ioat);
-}
-
-static void ioat3_eh(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-	struct pci_dev *pdev = to_pdev(chan);
-	struct ioat_dma_descriptor *hw;
-	struct dma_async_tx_descriptor *tx;
-	u64 phys_complete;
-	struct ioat_ring_ent *desc;
-	u32 err_handled = 0;
-	u32 chanerr_int;
-	u32 chanerr;
-
-	/* cleanup so tail points to descriptor that caused the error */
-	if (ioat3_cleanup_preamble(chan, &phys_complete))
-		__cleanup(ioat, phys_complete);
-
-	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-	pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr_int);
-
-	dev_dbg(to_dev(chan), "%s: error = %x:%x\n",
-		__func__, chanerr, chanerr_int);
-
-	desc = ioat2_get_ring_ent(ioat, ioat->tail);
-	hw = desc->hw;
-	dump_desc_dbg(ioat, desc);
-
-	switch (hw->ctl_f.op) {
-	case IOAT_OP_XOR_VAL:
-		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
-			*desc->result |= SUM_CHECK_P_RESULT;
-			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
-		}
-		break;
-	case IOAT_OP_PQ_VAL:
-	case IOAT_OP_PQ_VAL_16S:
-		if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
-			*desc->result |= SUM_CHECK_P_RESULT;
-			err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
-		}
-		if (chanerr & IOAT_CHANERR_XOR_Q_ERR) {
-			*desc->result |= SUM_CHECK_Q_RESULT;
-			err_handled |= IOAT_CHANERR_XOR_Q_ERR;
-		}
-		break;
-	}
-
-	/* fault on unhandled error or spurious halt */
-	if (chanerr ^ err_handled || chanerr == 0) {
-		dev_err(to_dev(chan), "%s: fatal error (%x:%x)\n",
-			__func__, chanerr, err_handled);
-		BUG();
-	} else { /* cleanup the faulty descriptor */
-		tx = &desc->txd;
-		if (tx->cookie) {
-			dma_cookie_complete(tx);
-			dma_descriptor_unmap(tx);
-			if (tx->callback) {
-				tx->callback(tx->callback_param);
-				tx->callback = NULL;
-			}
-		}
-	}
-
-	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
-	pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr_int);
-
-	/* mark faulting descriptor as complete */
-	*chan->completion = desc->txd.phys;
-
-	spin_lock_bh(&ioat->prep_lock);
-	ioat3_restart_channel(ioat);
-	spin_unlock_bh(&ioat->prep_lock);
-}
-
-static void check_active(struct ioat2_dma_chan *ioat)
-{
-	struct ioat_chan_common *chan = &ioat->base;
-
-	if (ioat2_ring_active(ioat)) {
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-		return;
-	}
-
-	if (test_and_clear_bit(IOAT_CHAN_ACTIVE, &chan->state))
-		mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
-	else if (ioat->alloc_order > ioat_get_alloc_order()) {
-		/* if the ring is idle, empty, and oversized try to step
-		 * down the size
-		 */
-		reshape_ring(ioat, ioat->alloc_order - 1);
-
-		/* keep shrinking until we get back to our minimum
-		 * default size
-		 */
-		if (ioat->alloc_order > ioat_get_alloc_order())
-			mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
-	}
-
-}
-
-static void ioat3_timer_event(unsigned long data)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
-	struct ioat_chan_common *chan = &ioat->base;
-	dma_addr_t phys_complete;
-	u64 status;
-
-	status = ioat_chansts(chan);
-
-	/* when halted due to errors check for channel
-	 * programming errors before advancing the completion state
-	 */
-	if (is_ioat_halted(status)) {
-		u32 chanerr;
-
-		chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-		dev_err(to_dev(chan), "%s: Channel halted (%x)\n",
-			__func__, chanerr);
-		if (test_bit(IOAT_RUN, &chan->state))
-			BUG_ON(is_ioat_bug(chanerr));
-		else /* we never got off the ground */
-			return;
-	}
-
-	/* if we haven't made progress and we have already
-	 * acknowledged a pending completion once, then be more
-	 * forceful with a restart
-	 */
-	spin_lock_bh(&chan->cleanup_lock);
-	if (ioat_cleanup_preamble(chan, &phys_complete))
-		__cleanup(ioat, phys_complete);
-	else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
-		spin_lock_bh(&ioat->prep_lock);
-		ioat3_restart_channel(ioat);
-		spin_unlock_bh(&ioat->prep_lock);
-		spin_unlock_bh(&chan->cleanup_lock);
-		return;
-	} else {
-		set_bit(IOAT_COMPLETION_ACK, &chan->state);
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-	}
-
-
-	if (ioat2_ring_active(ioat))
-		mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
-	else {
-		spin_lock_bh(&ioat->prep_lock);
-		check_active(ioat);
-		spin_unlock_bh(&ioat->prep_lock);
-	}
-	spin_unlock_bh(&chan->cleanup_lock);
-}
-
-static enum dma_status
-ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
-		struct dma_tx_state *txstate)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	enum dma_status ret;
-
-	ret = dma_cookie_status(c, cookie, txstate);
-	if (ret == DMA_COMPLETE)
-		return ret;
-
-	ioat3_cleanup(ioat);
-
-	return dma_cookie_status(c, cookie, txstate);
-}
-
-static struct dma_async_tx_descriptor *
-__ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
-		      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
-		      size_t len, unsigned long flags)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioat_ring_ent *compl_desc;
-	struct ioat_ring_ent *desc;
-	struct ioat_ring_ent *ext;
-	size_t total_len = len;
-	struct ioat_xor_descriptor *xor;
-	struct ioat_xor_ext_descriptor *xor_ex = NULL;
-	struct ioat_dma_descriptor *hw;
-	int num_descs, with_ext, idx, i;
-	u32 offset = 0;
-	u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
-
-	BUG_ON(src_cnt < 2);
-
-	num_descs = ioat2_xferlen_to_descs(ioat, len);
-	/* we need 2x the number of descriptors to cover greater than 5
-	 * sources
-	 */
-	if (src_cnt > 5) {
-		with_ext = 1;
-		num_descs *= 2;
-	} else
-		with_ext = 0;
-
-	/* completion writes from the raid engine may pass completion
-	 * writes from the legacy engine, so we need one extra null
-	 * (legacy) descriptor to ensure all completion writes arrive in
-	 * order.
-	 */
-	if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
-		idx = ioat->head;
-	else
-		return NULL;
-	i = 0;
-	do {
-		struct ioat_raw_descriptor *descs[2];
-		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
-		int s;
-
-		desc = ioat2_get_ring_ent(ioat, idx + i);
-		xor = desc->xor;
-
-		/* save a branch by unconditionally retrieving the
-		 * extended descriptor xor_set_src() knows to not write
-		 * to it in the single descriptor case
-		 */
-		ext = ioat2_get_ring_ent(ioat, idx + i + 1);
-		xor_ex = ext->xor_ex;
-
-		descs[0] = (struct ioat_raw_descriptor *) xor;
-		descs[1] = (struct ioat_raw_descriptor *) xor_ex;
-		for (s = 0; s < src_cnt; s++)
-			xor_set_src(descs, src[s], offset, s);
-		xor->size = xfer_size;
-		xor->dst_addr = dest + offset;
-		xor->ctl = 0;
-		xor->ctl_f.op = op;
-		xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
-
-		len -= xfer_size;
-		offset += xfer_size;
-		dump_desc_dbg(ioat, desc);
-	} while ((i += 1 + with_ext) < num_descs);
-
-	/* last xor descriptor carries the unmap parameters and fence bit */
-	desc->txd.flags = flags;
-	desc->len = total_len;
-	if (result)
-		desc->result = result;
-	xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
-
-	/* completion descriptor carries interrupt bit */
-	compl_desc = ioat2_get_ring_ent(ioat, idx + i);
-	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
-	hw = compl_desc->hw;
-	hw->ctl = 0;
-	hw->ctl_f.null = 1;
-	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
-	hw->ctl_f.compl_write = 1;
-	hw->size = NULL_DESC_BUFFER_SIZE;
-	dump_desc_dbg(ioat, compl_desc);
-
-	/* we leave the channel locked to ensure in order submission */
-	return &compl_desc->txd;
-}
-
-static struct dma_async_tx_descriptor *
-ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
-	       unsigned int src_cnt, size_t len, unsigned long flags)
-{
-	return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
-}
-
-static struct dma_async_tx_descriptor *
-ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
-		    unsigned int src_cnt, size_t len,
-		    enum sum_check_flags *result, unsigned long flags)
-{
-	/* the cleanup routine only sets bits on validate failure, it
-	 * does not clear bits on validate success... so clear it here
-	 */
-	*result = 0;
-
-	return __ioat3_prep_xor_lock(chan, result, src[0], &src[1],
-				     src_cnt - 1, len, flags);
-}
-
-static void
-dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext)
-{
-	struct device *dev = to_dev(&ioat->base);
-	struct ioat_pq_descriptor *pq = desc->pq;
-	struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
-	struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
-	int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
-	int i;
-
-	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
-		" sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
-		" src_cnt: %d)\n",
-		desc_id(desc), (unsigned long long) desc->txd.phys,
-		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
-		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
-		pq->ctl_f.compl_write,
-		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
-		pq->ctl_f.src_cnt);
-	for (i = 0; i < src_cnt; i++)
-		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
-			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
-	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
-	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
-	dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
-}
-
-static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat,
-			       struct ioat_ring_ent *desc)
-{
-	struct device *dev = to_dev(&ioat->base);
-	struct ioat_pq_descriptor *pq = desc->pq;
-	struct ioat_raw_descriptor *descs[] = { (void *)pq,
-						(void *)pq,
-						(void *)pq };
-	int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
-	int i;
-
-	if (desc->sed) {
-		descs[1] = (void *)desc->sed->hw;
-		descs[2] = (void *)desc->sed->hw + 64;
-	}
-
-	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
-		" sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
-		" src_cnt: %d)\n",
-		desc_id(desc), (unsigned long long) desc->txd.phys,
-		(unsigned long long) pq->next,
-		desc->txd.flags, pq->size, pq->ctl,
-		pq->ctl_f.op, pq->ctl_f.int_en,
-		pq->ctl_f.compl_write,
-		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
-		pq->ctl_f.src_cnt);
-	for (i = 0; i < src_cnt; i++) {
-		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
-			(unsigned long long) pq16_get_src(descs, i),
-			pq->coef[i]);
-	}
-	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
-	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
-}
-
-static struct dma_async_tx_descriptor *
-__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
-		     const dma_addr_t *dst, const dma_addr_t *src,
-		     unsigned int src_cnt, const unsigned char *scf,
-		     size_t len, unsigned long flags)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioatdma_device *device = chan->device;
-	struct ioat_ring_ent *compl_desc;
-	struct ioat_ring_ent *desc;
-	struct ioat_ring_ent *ext;
-	size_t total_len = len;
-	struct ioat_pq_descriptor *pq;
-	struct ioat_pq_ext_descriptor *pq_ex = NULL;
-	struct ioat_dma_descriptor *hw;
-	u32 offset = 0;
-	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
-	int i, s, idx, with_ext, num_descs;
-	int cb32 = (device->version < IOAT_VER_3_3) ? 1 : 0;
-
-	dev_dbg(to_dev(chan), "%s\n", __func__);
-	/* the engine requires at least two sources (we provide
-	 * at least 1 implied source in the DMA_PREP_CONTINUE case)
-	 */
-	BUG_ON(src_cnt + dmaf_continue(flags) < 2);
-
-	num_descs = ioat2_xferlen_to_descs(ioat, len);
-	/* we need 2x the number of descriptors to cover greater than 3
-	 * sources (we need 1 extra source in the q-only continuation
-	 * case and 3 extra sources in the p+q continuation case.
-	 */
-	if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
-	    (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
-		with_ext = 1;
-		num_descs *= 2;
-	} else
-		with_ext = 0;
-
-	/* completion writes from the raid engine may pass completion
-	 * writes from the legacy engine, so we need one extra null
-	 * (legacy) descriptor to ensure all completion writes arrive in
-	 * order.
-	 */
-	if (likely(num_descs) &&
-	    ioat2_check_space_lock(ioat, num_descs + cb32) == 0)
-		idx = ioat->head;
-	else
-		return NULL;
-	i = 0;
-	do {
-		struct ioat_raw_descriptor *descs[2];
-		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
-
-		desc = ioat2_get_ring_ent(ioat, idx + i);
-		pq = desc->pq;
-
-		/* save a branch by unconditionally retrieving the
-		 * extended descriptor pq_set_src() knows to not write
-		 * to it in the single descriptor case
-		 */
-		ext = ioat2_get_ring_ent(ioat, idx + i + with_ext);
-		pq_ex = ext->pq_ex;
-
-		descs[0] = (struct ioat_raw_descriptor *) pq;
-		descs[1] = (struct ioat_raw_descriptor *) pq_ex;
-
-		for (s = 0; s < src_cnt; s++)
-			pq_set_src(descs, src[s], offset, scf[s], s);
-
-		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
-		if (dmaf_p_disabled_continue(flags))
-			pq_set_src(descs, dst[1], offset, 1, s++);
-		else if (dmaf_continue(flags)) {
-			pq_set_src(descs, dst[0], offset, 0, s++);
-			pq_set_src(descs, dst[1], offset, 1, s++);
-			pq_set_src(descs, dst[1], offset, 0, s++);
-		}
-		pq->size = xfer_size;
-		pq->p_addr = dst[0] + offset;
-		pq->q_addr = dst[1] + offset;
-		pq->ctl = 0;
-		pq->ctl_f.op = op;
-		/* we turn on descriptor write back error status */
-		if (device->cap & IOAT_CAP_DWBES)
-			pq->ctl_f.wb_en = result ? 1 : 0;
-		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
-		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
-		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
-
-		len -= xfer_size;
-		offset += xfer_size;
-	} while ((i += 1 + with_ext) < num_descs);
-
-	/* last pq descriptor carries the unmap parameters and fence bit */
-	desc->txd.flags = flags;
-	desc->len = total_len;
-	if (result)
-		desc->result = result;
-	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
-	dump_pq_desc_dbg(ioat, desc, ext);
-
-	if (!cb32) {
-		pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
-		pq->ctl_f.compl_write = 1;
-		compl_desc = desc;
-	} else {
-		/* completion descriptor carries interrupt bit */
-		compl_desc = ioat2_get_ring_ent(ioat, idx + i);
-		compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
-		hw = compl_desc->hw;
-		hw->ctl = 0;
-		hw->ctl_f.null = 1;
-		hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
-		hw->ctl_f.compl_write = 1;
-		hw->size = NULL_DESC_BUFFER_SIZE;
-		dump_desc_dbg(ioat, compl_desc);
-	}
-
-
-	/* we leave the channel locked to ensure in order submission */
-	return &compl_desc->txd;
-}
-
-static struct dma_async_tx_descriptor *
-__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
-		       const dma_addr_t *dst, const dma_addr_t *src,
-		       unsigned int src_cnt, const unsigned char *scf,
-		       size_t len, unsigned long flags)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioat_chan_common *chan = &ioat->base;
-	struct ioatdma_device *device = chan->device;
-	struct ioat_ring_ent *desc;
-	size_t total_len = len;
-	struct ioat_pq_descriptor *pq;
-	u32 offset = 0;
-	u8 op;
-	int i, s, idx, num_descs;
-
-	/* this function is only called with 9-16 sources */
-	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
-
-	dev_dbg(to_dev(chan), "%s\n", __func__);
-
-	num_descs = ioat2_xferlen_to_descs(ioat, len);
-
-	/*
-	 * 16 source pq is only available on cb3.3 and has no completion
-	 * write hw bug.
-	 */
-	if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0)
-		idx = ioat->head;
-	else
-		return NULL;
-
-	i = 0;
-
-	do {
-		struct ioat_raw_descriptor *descs[4];
-		size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
-
-		desc = ioat2_get_ring_ent(ioat, idx + i);
-		pq = desc->pq;
-
-		descs[0] = (struct ioat_raw_descriptor *) pq;
-
-		desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3);
-		if (!desc->sed) {
-			dev_err(to_dev(chan),
-				"%s: no free sed entries\n", __func__);
-			return NULL;
-		}
-
-		pq->sed_addr = desc->sed->dma;
-		desc->sed->parent = desc;
-
-		descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
-		descs[2] = (void *)descs[1] + 64;
-
-		for (s = 0; s < src_cnt; s++)
-			pq16_set_src(descs, src[s], offset, scf[s], s);
-
-		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
-		if (dmaf_p_disabled_continue(flags))
-			pq16_set_src(descs, dst[1], offset, 1, s++);
-		else if (dmaf_continue(flags)) {
-			pq16_set_src(descs, dst[0], offset, 0, s++);
-			pq16_set_src(descs, dst[1], offset, 1, s++);
-			pq16_set_src(descs, dst[1], offset, 0, s++);
-		}
-
-		pq->size = xfer_size;
-		pq->p_addr = dst[0] + offset;
-		pq->q_addr = dst[1] + offset;
-		pq->ctl = 0;
-		pq->ctl_f.op = op;
-		pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
-		/* we turn on descriptor write back error status */
-		if (device->cap & IOAT_CAP_DWBES)
-			pq->ctl_f.wb_en = result ? 1 : 0;
-		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
-		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
-
-		len -= xfer_size;
-		offset += xfer_size;
-	} while (++i < num_descs);
-
-	/* last pq descriptor carries the unmap parameters and fence bit */
-	desc->txd.flags = flags;
-	desc->len = total_len;
-	if (result)
-		desc->result = result;
-	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
-
-	/* with cb3.3 we should be able to do completion w/o a null desc */
-	pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
-	pq->ctl_f.compl_write = 1;
-
-	dump_pq16_desc_dbg(ioat, desc);
-
-	/* we leave the channel locked to ensure in order submission */
-	return &desc->txd;
-}
-
-static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
-{
-	if (dmaf_p_disabled_continue(flags))
-		return src_cnt + 1;
-	else if (dmaf_continue(flags))
-		return src_cnt + 3;
-	else
-		return src_cnt;
-}
-
-static struct dma_async_tx_descriptor *
-ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
-	      unsigned int src_cnt, const unsigned char *scf, size_t len,
-	      unsigned long flags)
-{
-	/* specify valid address for disabled result */
-	if (flags & DMA_PREP_PQ_DISABLE_P)
-		dst[0] = dst[1];
-	if (flags & DMA_PREP_PQ_DISABLE_Q)
-		dst[1] = dst[0];
-
-	/* handle the single source multiply case from the raid6
-	 * recovery path
-	 */
-	if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
-		dma_addr_t single_source[2];
-		unsigned char single_source_coef[2];
-
-		BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
-		single_source[0] = src[0];
-		single_source[1] = src[0];
-		single_source_coef[0] = scf[0];
-		single_source_coef[1] = 0;
-
-		return src_cnt_flags(src_cnt, flags) > 8 ?
-			__ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
-					       2, single_source_coef, len,
-					       flags) :
-			__ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
-					     single_source_coef, len, flags);
-
-	} else {
-		return src_cnt_flags(src_cnt, flags) > 8 ?
-			__ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
-					       scf, len, flags) :
-			__ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
-					     scf, len, flags);
-	}
-}
-
-static struct dma_async_tx_descriptor *
-ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
-		  unsigned int src_cnt, const unsigned char *scf, size_t len,
-		  enum sum_check_flags *pqres, unsigned long flags)
-{
-	/* specify valid address for disabled result */
-	if (flags & DMA_PREP_PQ_DISABLE_P)
-		pq[0] = pq[1];
-	if (flags & DMA_PREP_PQ_DISABLE_Q)
-		pq[1] = pq[0];
-
-	/* the cleanup routine only sets bits on validate failure, it
-	 * does not clear bits on validate success... so clear it here
-	 */
-	*pqres = 0;
-
-	return src_cnt_flags(src_cnt, flags) > 8 ?
-		__ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
-				       flags) :
-		__ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
-				     flags);
-}
-
-static struct dma_async_tx_descriptor *
-ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
-		 unsigned int src_cnt, size_t len, unsigned long flags)
-{
-	unsigned char scf[src_cnt];
-	dma_addr_t pq[2];
-
-	memset(scf, 0, src_cnt);
-	pq[0] = dst;
-	flags |= DMA_PREP_PQ_DISABLE_Q;
-	pq[1] = dst; /* specify valid address for disabled result */
-
-	return src_cnt_flags(src_cnt, flags) > 8 ?
-		__ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
-				       flags) :
-		__ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
-				     flags);
-}
-
-static struct dma_async_tx_descriptor *
-ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
-		     unsigned int src_cnt, size_t len,
-		     enum sum_check_flags *result, unsigned long flags)
-{
-	unsigned char scf[src_cnt];
-	dma_addr_t pq[2];
-
-	/* the cleanup routine only sets bits on validate failure, it
-	 * does not clear bits on validate success... so clear it here
-	 */
-	*result = 0;
-
-	memset(scf, 0, src_cnt);
-	pq[0] = src[0];
-	flags |= DMA_PREP_PQ_DISABLE_Q;
-	pq[1] = pq[0]; /* specify valid address for disabled result */
-
-	return src_cnt_flags(src_cnt, flags) > 8 ?
-		__ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
-				       scf, len, flags) :
-		__ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
-				     scf, len, flags);
-}
-
-static struct dma_async_tx_descriptor *
-ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
-{
-	struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
-	struct ioat_ring_ent *desc;
-	struct ioat_dma_descriptor *hw;
-
-	if (ioat2_check_space_lock(ioat, 1) == 0)
-		desc = ioat2_get_ring_ent(ioat, ioat->head);
-	else
-		return NULL;
-
-	hw = desc->hw;
-	hw->ctl = 0;
-	hw->ctl_f.null = 1;
-	hw->ctl_f.int_en = 1;
-	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
-	hw->ctl_f.compl_write = 1;
-	hw->size = NULL_DESC_BUFFER_SIZE;
-	hw->src_addr = 0;
-	hw->dst_addr = 0;
-
-	desc->txd.flags = flags;
-	desc->len = 1;
-
-	dump_desc_dbg(ioat, desc);
-
-	/* we leave the channel locked to ensure in order submission */
-	return &desc->txd;
-}
-
-static void ioat3_dma_test_callback(void *dma_async_param)
-{
-	struct completion *cmp = dma_async_param;
-
-	complete(cmp);
-}
-
-#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
-static int ioat_xor_val_self_test(struct ioatdma_device *device)
-{
-	int i, src_idx;
-	struct page *dest;
-	struct page *xor_srcs[IOAT_NUM_SRC_TEST];
-	struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
-	dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
-	dma_addr_t dest_dma;
-	struct dma_async_tx_descriptor *tx;
-	struct dma_chan *dma_chan;
-	dma_cookie_t cookie;
-	u8 cmp_byte = 0;
-	u32 cmp_word;
-	u32 xor_val_result;
-	int err = 0;
-	struct completion cmp;
-	unsigned long tmo;
-	struct device *dev = &device->pdev->dev;
-	struct dma_device *dma = &device->common;
-	u8 op = 0;
-
-	dev_dbg(dev, "%s\n", __func__);
-
-	if (!dma_has_cap(DMA_XOR, dma->cap_mask))
-		return 0;
-
-	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
-		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
-		if (!xor_srcs[src_idx]) {
-			while (src_idx--)
-				__free_page(xor_srcs[src_idx]);
-			return -ENOMEM;
-		}
-	}
-
-	dest = alloc_page(GFP_KERNEL);
-	if (!dest) {
-		while (src_idx--)
-			__free_page(xor_srcs[src_idx]);
-		return -ENOMEM;
-	}
-
-	/* Fill in src buffers */
-	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
-		u8 *ptr = page_address(xor_srcs[src_idx]);
-		for (i = 0; i < PAGE_SIZE; i++)
-			ptr[i] = (1 << src_idx);
-	}
-
-	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
-		cmp_byte ^= (u8) (1 << src_idx);
-
-	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
-			(cmp_byte << 8) | cmp_byte;
-
-	memset(page_address(dest), 0, PAGE_SIZE);
-
-	dma_chan = container_of(dma->channels.next, struct dma_chan,
-				device_node);
-	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	/* test xor */
-	op = IOAT_OP_XOR;
-
-	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, dest_dma))
-		goto dma_unmap;
-
-	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-		dma_srcs[i] = DMA_ERROR_CODE;
-	for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
-		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
-					   DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, dma_srcs[i]))
-			goto dma_unmap;
-	}
-	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
-				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
-				      DMA_PREP_INTERRUPT);
-
-	if (!tx) {
-		dev_err(dev, "Self-test xor prep failed\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-
-	async_tx_ack(tx);
-	init_completion(&cmp);
-	tx->callback = ioat3_dma_test_callback;
-	tx->callback_param = &cmp;
-	cookie = tx->tx_submit(tx);
-	if (cookie < 0) {
-		dev_err(dev, "Self-test xor setup failed\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-	dma->device_issue_pending(dma_chan);
-
-	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
-
-	if (tmo == 0 ||
-	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
-		dev_err(dev, "Self-test xor timed out\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-
-	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
-
-	dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
-	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
-		u32 *ptr = page_address(dest);
-		if (ptr[i] != cmp_word) {
-			dev_err(dev, "Self-test xor failed compare\n");
-			err = -ENODEV;
-			goto free_resources;
-		}
-	}
-	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
-
-	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
-
-	/* skip validate if the capability is not present */
-	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
-		goto free_resources;
-
-	op = IOAT_OP_XOR_VAL;
-
-	/* validate the sources with the destintation page */
-	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-		xor_val_srcs[i] = xor_srcs[i];
-	xor_val_srcs[i] = dest;
-
-	xor_val_result = 1;
-
-	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-		dma_srcs[i] = DMA_ERROR_CODE;
-	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
-		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
-					   DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, dma_srcs[i]))
-			goto dma_unmap;
-	}
-	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
-					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-					  &xor_val_result, DMA_PREP_INTERRUPT);
-	if (!tx) {
-		dev_err(dev, "Self-test zero prep failed\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-
-	async_tx_ack(tx);
-	init_completion(&cmp);
-	tx->callback = ioat3_dma_test_callback;
-	tx->callback_param = &cmp;
-	cookie = tx->tx_submit(tx);
-	if (cookie < 0) {
-		dev_err(dev, "Self-test zero setup failed\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-	dma->device_issue_pending(dma_chan);
-
-	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
-
-	if (tmo == 0 ||
-	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
-		dev_err(dev, "Self-test validate timed out\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-
-	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
-
-	if (xor_val_result != 0) {
-		dev_err(dev, "Self-test validate failed compare\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	memset(page_address(dest), 0, PAGE_SIZE);
-
-	/* test for non-zero parity sum */
-	op = IOAT_OP_XOR_VAL;
-
-	xor_val_result = 0;
-	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-		dma_srcs[i] = DMA_ERROR_CODE;
-	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
-		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
-					   DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, dma_srcs[i]))
-			goto dma_unmap;
-	}
-	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
-					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
-					  &xor_val_result, DMA_PREP_INTERRUPT);
-	if (!tx) {
-		dev_err(dev, "Self-test 2nd zero prep failed\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-
-	async_tx_ack(tx);
-	init_completion(&cmp);
-	tx->callback = ioat3_dma_test_callback;
-	tx->callback_param = &cmp;
-	cookie = tx->tx_submit(tx);
-	if (cookie < 0) {
-		dev_err(dev, "Self-test  2nd zero setup failed\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-	dma->device_issue_pending(dma_chan);
-
-	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
-
-	if (tmo == 0 ||
-	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
-		dev_err(dev, "Self-test 2nd validate timed out\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-
-	if (xor_val_result != SUM_CHECK_P_RESULT) {
-		dev_err(dev, "Self-test validate failed compare\n");
-		err = -ENODEV;
-		goto dma_unmap;
-	}
-
-	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
-
-	goto free_resources;
-dma_unmap:
-	if (op == IOAT_OP_XOR) {
-		if (dest_dma != DMA_ERROR_CODE)
-			dma_unmap_page(dev, dest_dma, PAGE_SIZE,
-				       DMA_FROM_DEVICE);
-		for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
-			if (dma_srcs[i] != DMA_ERROR_CODE)
-				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-					       DMA_TO_DEVICE);
-	} else if (op == IOAT_OP_XOR_VAL) {
-		for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
-			if (dma_srcs[i] != DMA_ERROR_CODE)
-				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
-					       DMA_TO_DEVICE);
-	}
-free_resources:
-	dma->device_free_chan_resources(dma_chan);
-out:
-	src_idx = IOAT_NUM_SRC_TEST;
-	while (src_idx--)
-		__free_page(xor_srcs[src_idx]);
-	__free_page(dest);
-	return err;
-}
-
-static int ioat3_dma_self_test(struct ioatdma_device *device)
-{
-	int rc = ioat_dma_self_test(device);
-
-	if (rc)
-		return rc;
-
-	rc = ioat_xor_val_self_test(device);
-	if (rc)
-		return rc;
-
-	return 0;
-}
-
-static int ioat3_irq_reinit(struct ioatdma_device *device)
-{
-	struct pci_dev *pdev = device->pdev;
-	int irq = pdev->irq, i;
-
-	if (!is_bwd_ioat(pdev))
-		return 0;
-
-	switch (device->irq_mode) {
-	case IOAT_MSIX:
-		for (i = 0; i < device->common.chancnt; i++) {
-			struct msix_entry *msix = &device->msix_entries[i];
-			struct ioat_chan_common *chan;
-
-			chan = ioat_chan_by_index(device, i);
-			devm_free_irq(&pdev->dev, msix->vector, chan);
-		}
-
-		pci_disable_msix(pdev);
-		break;
-	case IOAT_MSI:
-		pci_disable_msi(pdev);
-		/* fall through */
-	case IOAT_INTX:
-		devm_free_irq(&pdev->dev, irq, device);
-		break;
-	default:
-		return 0;
-	}
-	device->irq_mode = IOAT_NOIRQ;
-
-	return ioat_dma_setup_interrupts(device);
-}
-
-static int ioat3_reset_hw(struct ioat_chan_common *chan)
-{
-	/* throw away whatever the channel was doing and get it
-	 * initialized, with ioat3 specific workarounds
-	 */
-	struct ioatdma_device *device = chan->device;
-	struct pci_dev *pdev = device->pdev;
-	u32 chanerr;
-	u16 dev_id;
-	int err;
-
-	ioat2_quiesce(chan, msecs_to_jiffies(100));
-
-	chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
-	writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
-
-	if (device->version < IOAT_VER_3_3) {
-		/* clear any pending errors */
-		err = pci_read_config_dword(pdev,
-				IOAT_PCI_CHANERR_INT_OFFSET, &chanerr);
-		if (err) {
-			dev_err(&pdev->dev,
-				"channel error register unreachable\n");
-			return err;
-		}
-		pci_write_config_dword(pdev,
-				IOAT_PCI_CHANERR_INT_OFFSET, chanerr);
-
-		/* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
-		 * (workaround for spurious config parity error after restart)
-		 */
-		pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id);
-		if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
-			pci_write_config_dword(pdev,
-					       IOAT_PCI_DMAUNCERRSTS_OFFSET,
-					       0x10);
-		}
-	}
-
-	err = ioat2_reset_sync(chan, msecs_to_jiffies(200));
-	if (!err)
-		err = ioat3_irq_reinit(device);
-
-	if (err)
-		dev_err(&pdev->dev, "Failed to reset: %d\n", err);
-
-	return err;
-}
-
-static void ioat3_intr_quirk(struct ioatdma_device *device)
-{
-	struct dma_device *dma;
-	struct dma_chan *c;
-	struct ioat_chan_common *chan;
-	u32 errmask;
-
-	dma = &device->common;
-
-	/*
-	 * if we have descriptor write back error status, we mask the
-	 * error interrupts
-	 */
-	if (device->cap & IOAT_CAP_DWBES) {
-		list_for_each_entry(c, &dma->channels, device_node) {
-			chan = to_chan_common(c);
-			errmask = readl(chan->reg_base +
-					IOAT_CHANERR_MASK_OFFSET);
-			errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR |
-				   IOAT_CHANERR_XOR_Q_ERR;
-			writel(errmask, chan->reg_base +
-					IOAT_CHANERR_MASK_OFFSET);
-		}
-	}
-}
-
-int ioat3_dma_probe(struct ioatdma_device *device, int dca)
-{
-	struct pci_dev *pdev = device->pdev;
-	int dca_en = system_has_dca_enabled(pdev);
-	struct dma_device *dma;
-	struct dma_chan *c;
-	struct ioat_chan_common *chan;
-	bool is_raid_device = false;
-	int err;
-
-	device->enumerate_channels = ioat2_enumerate_channels;
-	device->reset_hw = ioat3_reset_hw;
-	device->self_test = ioat3_dma_self_test;
-	device->intr_quirk = ioat3_intr_quirk;
-	dma = &device->common;
-	dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock;
-	dma->device_issue_pending = ioat2_issue_pending;
-	dma->device_alloc_chan_resources = ioat2_alloc_chan_resources;
-	dma->device_free_chan_resources = ioat2_free_chan_resources;
-
-	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
-	dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock;
-
-	device->cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET);
-
-	if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev))
-		device->cap &= ~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
-
-	/* dca is incompatible with raid operations */
-	if (dca_en && (device->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
-		device->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
-
-	if (device->cap & IOAT_CAP_XOR) {
-		is_raid_device = true;
-		dma->max_xor = 8;
-
-		dma_cap_set(DMA_XOR, dma->cap_mask);
-		dma->device_prep_dma_xor = ioat3_prep_xor;
-
-		dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
-		dma->device_prep_dma_xor_val = ioat3_prep_xor_val;
-	}
-
-	if (device->cap & IOAT_CAP_PQ) {
-		is_raid_device = true;
-
-		dma->device_prep_dma_pq = ioat3_prep_pq;
-		dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
-		dma_cap_set(DMA_PQ, dma->cap_mask);
-		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
-
-		if (device->cap & IOAT_CAP_RAID16SS) {
-			dma_set_maxpq(dma, 16, 0);
-		} else {
-			dma_set_maxpq(dma, 8, 0);
-		}
-
-		if (!(device->cap & IOAT_CAP_XOR)) {
-			dma->device_prep_dma_xor = ioat3_prep_pqxor;
-			dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val;
-			dma_cap_set(DMA_XOR, dma->cap_mask);
-			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
-
-			if (device->cap & IOAT_CAP_RAID16SS) {
-				dma->max_xor = 16;
-			} else {
-				dma->max_xor = 8;
-			}
-		}
-	}
-
-	dma->device_tx_status = ioat3_tx_status;
-	device->cleanup_fn = ioat3_cleanup_event;
-	device->timer_fn = ioat3_timer_event;
-
-	/* starting with CB3.3 super extended descriptors are supported */
-	if (device->cap & IOAT_CAP_RAID16SS) {
-		char pool_name[14];
-		int i;
-
-		for (i = 0; i < MAX_SED_POOLS; i++) {
-			snprintf(pool_name, 14, "ioat_hw%d_sed", i);
-
-			/* allocate SED DMA pool */
-			device->sed_hw_pool[i] = dmam_pool_create(pool_name,
-					&pdev->dev,
-					SED_SIZE * (i + 1), 64, 0);
-			if (!device->sed_hw_pool[i])
-				return -ENOMEM;
-
-		}
-	}
-
-	err = ioat_probe(device);
-	if (err)
-		return err;
-
-	list_for_each_entry(c, &dma->channels, device_node) {
-		chan = to_chan_common(c);
-		writel(IOAT_DMA_DCA_ANY_CPU,
-		       chan->reg_base + IOAT_DCACTRL_OFFSET);
-	}
-
-	err = ioat_register(device);
-	if (err)
-		return err;
-
-	ioat_kobject_add(device, &ioat2_ktype);
-
-	if (dca)
-		device->dca = ioat3_dca_init(pdev, device->reg_base);
-
-	return 0;
-}
diff --git a/kernel/drivers/dma/ioat/hw.h b/kernel/drivers/dma/ioat/hw.h
index a3e731edc..690e3b4f8 100644
--- a/kernel/drivers/dma/ioat/hw.h
+++ b/kernel/drivers/dma/ioat/hw.h
@@ -21,11 +21,6 @@
 #define IOAT_MMIO_BAR		0
 
 /* CB device ID's */
-#define IOAT_PCI_DID_5000       0x1A38
-#define IOAT_PCI_DID_CNB        0x360B
-#define IOAT_PCI_DID_SCNB       0x65FF
-#define IOAT_PCI_DID_SNB        0x402F
-
 #define PCI_DEVICE_ID_INTEL_IOAT_IVB0	0x0e20
 #define PCI_DEVICE_ID_INTEL_IOAT_IVB1	0x0e21
 #define PCI_DEVICE_ID_INTEL_IOAT_IVB2	0x0e22
@@ -58,6 +53,17 @@
 #define PCI_DEVICE_ID_INTEL_IOAT_BDXDE2	0x6f52
 #define PCI_DEVICE_ID_INTEL_IOAT_BDXDE3	0x6f53
 
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX0	0x6f20
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX1	0x6f21
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX2	0x6f22
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX3	0x6f23
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX4	0x6f24
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX5	0x6f25
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX6	0x6f26
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX7	0x6f27
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX8	0x6f2e
+#define PCI_DEVICE_ID_INTEL_IOAT_BDX9	0x6f2f
+
 #define IOAT_VER_1_2            0x12    /* Version 1.2 */
 #define IOAT_VER_2_0            0x20    /* Version 2.0 */
 #define IOAT_VER_3_0            0x30    /* Version 3.0 */
diff --git a/kernel/drivers/dma/ioat/init.c b/kernel/drivers/dma/ioat/init.c
new file mode 100644
index 000000000..4ef0c5e07
--- /dev/null
+++ b/kernel/drivers/dma/ioat/init.c
@@ -0,0 +1,1424 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/prefetch.h>
+#include <linux/dca.h>
+#include <linux/aer.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+	/* I/OAT v3 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+
+	/* I/OAT v3.2 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX3) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX4) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX5) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX6) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX7) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX8) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDX9) },
+
+	/* I/OAT v3.3 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) },
+
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) },
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) },
+
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
+
+static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static void ioat_remove(struct pci_dev *pdev);
+static void
+ioat_init_channel(struct ioatdma_device *ioat_dma,
+		  struct ioatdma_chan *ioat_chan, int idx);
+static void ioat_intr_quirk(struct ioatdma_device *ioat_dma);
+static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma);
+static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+		 "high-water mark for pushing ioat descriptors (default: 4)");
+int ioat_ring_alloc_order = 8;
+module_param(ioat_ring_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_alloc_order,
+		 "ioat+: allocate 2^n descriptors per channel (default: 8 max: 16)");
+int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
+module_param(ioat_ring_max_alloc_order, int, 0644);
+MODULE_PARM_DESC(ioat_ring_max_alloc_order,
+		 "ioat+: upper limit for ring size (default: 16)");
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+		    sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+		 "set ioat interrupt style: msix (default), msi, intx");
+
+struct kmem_cache *ioat_cache;
+struct kmem_cache *ioat_sed_cache;
+
+static bool is_jf_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF0:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF1:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF2:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF3:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF4:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF5:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF6:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF7:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF8:
+	case PCI_DEVICE_ID_INTEL_IOAT_JSF9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_snb_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB0:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB1:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB2:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB3:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB4:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB5:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB6:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB7:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB8:
+	case PCI_DEVICE_ID_INTEL_IOAT_SNB9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_ivb_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB0:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB1:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB2:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB3:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB4:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB5:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB6:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB7:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB8:
+	case PCI_DEVICE_ID_INTEL_IOAT_IVB9:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+static bool is_hsw_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW0:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW1:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW2:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW3:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW4:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW5:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW6:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW7:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW8:
+	case PCI_DEVICE_ID_INTEL_IOAT_HSW9:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+static bool is_bdx_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX3:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX4:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX5:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX6:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX7:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX8:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDX9:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_xeon_cb32(struct pci_dev *pdev)
+{
+	return is_jf_ioat(pdev) || is_snb_ioat(pdev) || is_ivb_ioat(pdev) ||
+		is_hsw_ioat(pdev) || is_bdx_ioat(pdev);
+}
+
+bool is_bwd_ioat(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+	/* even though not Atom, BDX-DE has same DMA silicon */
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool is_bwd_noraid(struct pci_dev *pdev)
+{
+	switch (pdev->device) {
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BWD3:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE0:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE1:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE2:
+	case PCI_DEVICE_ID_INTEL_IOAT_BDXDE3:
+		return true;
+	default:
+		return false;
+	}
+
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void ioat_dma_test_callback(void *dma_async_param)
+{
+	struct completion *cmp = dma_async_param;
+
+	complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @ioat_dma: dma device to be tested
+ */
+static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
+{
+	int i;
+	u8 *src;
+	u8 *dest;
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct device *dev = &ioat_dma->pdev->dev;
+	struct dma_chan *dma_chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	int err = 0;
+	struct completion cmp;
+	unsigned long tmo;
+	unsigned long flags;
+
+	src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+	dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < IOAT_TEST_SIZE; i++)
+		src[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(dma->channels.next, struct dma_chan,
+				device_node);
+	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+		dev_err(dev, "selftest cannot allocate chan resource\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma_src)) {
+		dev_err(dev, "mapping src buffer failed\n");
+		goto free_resources;
+	}
+	dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dma_dest)) {
+		dev_err(dev, "mapping dest buffer failed\n");
+		goto unmap_src;
+	}
+	flags = DMA_PREP_INTERRUPT;
+	tx = ioat_dma->dma_dev.device_prep_dma_memcpy(dma_chan, dma_dest,
+						      dma_src, IOAT_TEST_SIZE,
+						      flags);
+	if (!tx) {
+		dev_err(dev, "Self-test prep failed, disabling\n");
+		err = -ENODEV;
+		goto unmap_dma;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test setup failed, disabling\n");
+		err = -ENODEV;
+		goto unmap_dma;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL)
+					!= DMA_COMPLETE) {
+		dev_err(dev, "Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto unmap_dma;
+	}
+	if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+		dev_err(dev, "Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+unmap_dma:
+	dma_unmap_single(dev, dma_dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
+unmap_src:
+	dma_unmap_single(dev, dma_src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
+free_resources:
+	dma->device_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @ioat_dma: ioat dma device
+ */
+int ioat_dma_setup_interrupts(struct ioatdma_device *ioat_dma)
+{
+	struct ioatdma_chan *ioat_chan;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	struct device *dev = &pdev->dev;
+	struct msix_entry *msix;
+	int i, j, msixcnt;
+	int err = -EINVAL;
+	u8 intrctrl = 0;
+
+	if (!strcmp(ioat_interrupt_style, "msix"))
+		goto msix;
+	if (!strcmp(ioat_interrupt_style, "msi"))
+		goto msi;
+	if (!strcmp(ioat_interrupt_style, "intx"))
+		goto intx;
+	dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
+	goto err_no_irq;
+
+msix:
+	/* The number of MSI-X vectors should equal the number of channels */
+	msixcnt = ioat_dma->dma_dev.chancnt;
+	for (i = 0; i < msixcnt; i++)
+		ioat_dma->msix_entries[i].entry = i;
+
+	err = pci_enable_msix_exact(pdev, ioat_dma->msix_entries, msixcnt);
+	if (err)
+		goto msi;
+
+	for (i = 0; i < msixcnt; i++) {
+		msix = &ioat_dma->msix_entries[i];
+		ioat_chan = ioat_chan_by_index(ioat_dma, i);
+		err = devm_request_irq(dev, msix->vector,
+				       ioat_dma_do_interrupt_msix, 0,
+				       "ioat-msix", ioat_chan);
+		if (err) {
+			for (j = 0; j < i; j++) {
+				msix = &ioat_dma->msix_entries[j];
+				ioat_chan = ioat_chan_by_index(ioat_dma, j);
+				devm_free_irq(dev, msix->vector, ioat_chan);
+			}
+			goto msi;
+		}
+	}
+	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+	ioat_dma->irq_mode = IOAT_MSIX;
+	goto done;
+
+msi:
+	err = pci_enable_msi(pdev);
+	if (err)
+		goto intx;
+
+	err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
+			       "ioat-msi", ioat_dma);
+	if (err) {
+		pci_disable_msi(pdev);
+		goto intx;
+	}
+	ioat_dma->irq_mode = IOAT_MSI;
+	goto done;
+
+intx:
+	err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
+			       IRQF_SHARED, "ioat-intx", ioat_dma);
+	if (err)
+		goto err_no_irq;
+
+	ioat_dma->irq_mode = IOAT_INTX;
+done:
+	if (is_bwd_ioat(pdev))
+		ioat_intr_quirk(ioat_dma);
+	intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+	writeb(intrctrl, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+	return 0;
+
+err_no_irq:
+	/* Disable all interrupt generation */
+	writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+	ioat_dma->irq_mode = IOAT_NOIRQ;
+	dev_err(dev, "no usable interrupts\n");
+	return err;
+}
+
+static void ioat_disable_interrupts(struct ioatdma_device *ioat_dma)
+{
+	/* Disable all interrupt generation */
+	writeb(0, ioat_dma->reg_base + IOAT_INTRCTRL_OFFSET);
+}
+
+static int ioat_probe(struct ioatdma_device *ioat_dma)
+{
+	int err = -ENODEV;
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct pci_dev *pdev = ioat_dma->pdev;
+	struct device *dev = &pdev->dev;
+
+	/* DMA coherent memory pool for DMA descriptor allocations */
+	ioat_dma->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+					     sizeof(struct ioat_dma_descriptor),
+					     64, 0);
+	if (!ioat_dma->dma_pool) {
+		err = -ENOMEM;
+		goto err_dma_pool;
+	}
+
+	ioat_dma->completion_pool = pci_pool_create("completion_pool", pdev,
+						    sizeof(u64),
+						    SMP_CACHE_BYTES,
+						    SMP_CACHE_BYTES);
+
+	if (!ioat_dma->completion_pool) {
+		err = -ENOMEM;
+		goto err_completion_pool;
+	}
+
+	ioat_enumerate_channels(ioat_dma);
+
+	dma_cap_set(DMA_MEMCPY, dma->cap_mask);
+	dma->dev = &pdev->dev;
+
+	if (!dma->chancnt) {
+		dev_err(dev, "channel enumeration error\n");
+		goto err_setup_interrupts;
+	}
+
+	err = ioat_dma_setup_interrupts(ioat_dma);
+	if (err)
+		goto err_setup_interrupts;
+
+	err = ioat3_dma_self_test(ioat_dma);
+	if (err)
+		goto err_self_test;
+
+	return 0;
+
+err_self_test:
+	ioat_disable_interrupts(ioat_dma);
+err_setup_interrupts:
+	pci_pool_destroy(ioat_dma->completion_pool);
+err_completion_pool:
+	pci_pool_destroy(ioat_dma->dma_pool);
+err_dma_pool:
+	return err;
+}
+
+static int ioat_register(struct ioatdma_device *ioat_dma)
+{
+	int err = dma_async_device_register(&ioat_dma->dma_dev);
+
+	if (err) {
+		ioat_disable_interrupts(ioat_dma);
+		pci_pool_destroy(ioat_dma->completion_pool);
+		pci_pool_destroy(ioat_dma->dma_pool);
+	}
+
+	return err;
+}
+
+static void ioat_dma_remove(struct ioatdma_device *ioat_dma)
+{
+	struct dma_device *dma = &ioat_dma->dma_dev;
+
+	ioat_disable_interrupts(ioat_dma);
+
+	ioat_kobject_del(ioat_dma);
+
+	dma_async_device_unregister(dma);
+
+	pci_pool_destroy(ioat_dma->dma_pool);
+	pci_pool_destroy(ioat_dma->completion_pool);
+
+	INIT_LIST_HEAD(&dma->channels);
+}
+
+/**
+ * ioat_enumerate_channels - find and initialize the device's channels
+ * @ioat_dma: the ioat dma device to be enumerated
+ */
+static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma)
+{
+	struct ioatdma_chan *ioat_chan;
+	struct device *dev = &ioat_dma->pdev->dev;
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	u8 xfercap_log;
+	int i;
+
+	INIT_LIST_HEAD(&dma->channels);
+	dma->chancnt = readb(ioat_dma->reg_base + IOAT_CHANCNT_OFFSET);
+	dma->chancnt &= 0x1f; /* bits [4:0] valid */
+	if (dma->chancnt > ARRAY_SIZE(ioat_dma->idx)) {
+		dev_warn(dev, "(%d) exceeds max supported channels (%zu)\n",
+			 dma->chancnt, ARRAY_SIZE(ioat_dma->idx));
+		dma->chancnt = ARRAY_SIZE(ioat_dma->idx);
+	}
+	xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET);
+	xfercap_log &= 0x1f; /* bits [4:0] valid */
+	if (xfercap_log == 0)
+		return 0;
+	dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log);
+
+	for (i = 0; i < dma->chancnt; i++) {
+		ioat_chan = devm_kzalloc(dev, sizeof(*ioat_chan), GFP_KERNEL);
+		if (!ioat_chan)
+			break;
+
+		ioat_init_channel(ioat_dma, ioat_chan, i);
+		ioat_chan->xfercap_log = xfercap_log;
+		spin_lock_init(&ioat_chan->prep_lock);
+		if (ioat_reset_hw(ioat_chan)) {
+			i = 0;
+			break;
+		}
+	}
+	dma->chancnt = i;
+	return i;
+}
+
+/**
+ * ioat_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat_free_chan_resources(struct dma_chan *c)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *desc;
+	const int total_descs = 1 << ioat_chan->alloc_order;
+	int descs;
+	int i;
+
+	/* Before freeing channel resources first check
+	 * if they have been previously allocated for this channel.
+	 */
+	if (!ioat_chan->ring)
+		return;
+
+	ioat_stop(ioat_chan);
+	ioat_reset_hw(ioat_chan);
+
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+	spin_lock_bh(&ioat_chan->prep_lock);
+	descs = ioat_ring_space(ioat_chan);
+	dev_dbg(to_dev(ioat_chan), "freeing %d idle descriptors\n", descs);
+	for (i = 0; i < descs; i++) {
+		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head + i);
+		ioat_free_ring_ent(desc, c);
+	}
+
+	if (descs < total_descs)
+		dev_err(to_dev(ioat_chan), "Freeing %d in use descriptors!\n",
+			total_descs - descs);
+
+	for (i = 0; i < total_descs - descs; i++) {
+		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->tail + i);
+		dump_desc_dbg(ioat_chan, desc);
+		ioat_free_ring_ent(desc, c);
+	}
+
+	kfree(ioat_chan->ring);
+	ioat_chan->ring = NULL;
+	ioat_chan->alloc_order = 0;
+	pci_pool_free(ioat_dma->completion_pool, ioat_chan->completion,
+		      ioat_chan->completion_dma);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+	ioat_chan->last_completion = 0;
+	ioat_chan->completion_dma = 0;
+	ioat_chan->dmacount = 0;
+}
+
+/* ioat_alloc_chan_resources - allocate/initialize ioat descriptor ring
+ * @chan: channel to be initialized
+ */
+static int ioat_alloc_chan_resources(struct dma_chan *c)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_ring_ent **ring;
+	u64 status;
+	int order;
+	int i = 0;
+	u32 chanerr;
+
+	/* have we already been set up? */
+	if (ioat_chan->ring)
+		return 1 << ioat_chan->alloc_order;
+
+	/* Setup register to interrupt and write completion status on error */
+	writew(IOAT_CHANCTRL_RUN, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+	/* allocate a completion writeback area */
+	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+	ioat_chan->completion =
+		pci_pool_alloc(ioat_chan->ioat_dma->completion_pool,
+			       GFP_KERNEL, &ioat_chan->completion_dma);
+	if (!ioat_chan->completion)
+		return -ENOMEM;
+
+	memset(ioat_chan->completion, 0, sizeof(*ioat_chan->completion));
+	writel(((u64)ioat_chan->completion_dma) & 0x00000000FFFFFFFF,
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+	writel(((u64)ioat_chan->completion_dma) >> 32,
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+	order = ioat_get_alloc_order();
+	ring = ioat_alloc_ring(c, order, GFP_KERNEL);
+	if (!ring)
+		return -ENOMEM;
+
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+	spin_lock_bh(&ioat_chan->prep_lock);
+	ioat_chan->ring = ring;
+	ioat_chan->head = 0;
+	ioat_chan->issued = 0;
+	ioat_chan->tail = 0;
+	ioat_chan->alloc_order = order;
+	set_bit(IOAT_RUN, &ioat_chan->state);
+	spin_unlock_bh(&ioat_chan->prep_lock);
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+
+	ioat_start_null_desc(ioat_chan);
+
+	/* check that we got off the ground */
+	do {
+		udelay(1);
+		status = ioat_chansts(ioat_chan);
+	} while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status));
+
+	if (is_ioat_active(status) || is_ioat_idle(status))
+		return 1 << ioat_chan->alloc_order;
+
+	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+	dev_WARN(to_dev(ioat_chan),
+		 "failed to start channel chanerr: %#x\n", chanerr);
+	ioat_free_chan_resources(c);
+	return -EFAULT;
+}
+
+/* common channel initialization */
+static void
+ioat_init_channel(struct ioatdma_device *ioat_dma,
+		  struct ioatdma_chan *ioat_chan, int idx)
+{
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct dma_chan *c = &ioat_chan->dma_chan;
+	unsigned long data = (unsigned long) c;
+
+	ioat_chan->ioat_dma = ioat_dma;
+	ioat_chan->reg_base = ioat_dma->reg_base + (0x80 * (idx + 1));
+	spin_lock_init(&ioat_chan->cleanup_lock);
+	ioat_chan->dma_chan.device = dma;
+	dma_cookie_init(&ioat_chan->dma_chan);
+	list_add_tail(&ioat_chan->dma_chan.device_node, &dma->channels);
+	ioat_dma->idx[idx] = ioat_chan;
+	init_timer(&ioat_chan->timer);
+	ioat_chan->timer.function = ioat_timer_event;
+	ioat_chan->timer.data = data;
+	tasklet_init(&ioat_chan->cleanup_task, ioat_cleanup_event, data);
+}
+
+#define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */
+static int ioat_xor_val_self_test(struct ioatdma_device *ioat_dma)
+{
+	int i, src_idx;
+	struct page *dest;
+	struct page *xor_srcs[IOAT_NUM_SRC_TEST];
+	struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1];
+	dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1];
+	dma_addr_t dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	u32 xor_val_result;
+	int err = 0;
+	struct completion cmp;
+	unsigned long tmo;
+	struct device *dev = &ioat_dma->pdev->dev;
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	u8 op = 0;
+
+	dev_dbg(dev, "%s\n", __func__);
+
+	if (!dma_has_cap(DMA_XOR, dma->cap_mask))
+		return 0;
+
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx]) {
+			while (src_idx--)
+				__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest) {
+		while (src_idx--)
+			__free_page(xor_srcs[src_idx]);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+			(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = container_of(dma->channels.next, struct dma_chan,
+				device_node);
+	if (dma->device_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	op = IOAT_OP_XOR;
+
+	dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, dest_dma))
+		goto dma_unmap;
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++) {
+		dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
+	tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				      IOAT_NUM_SRC_TEST, PAGE_SIZE,
+				      DMA_PREP_INTERRUPT);
+
+	if (!tx) {
+		dev_err(dev, "Self-test xor prep failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test xor setup failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+		dev_err(dev, "Self-test xor timed out\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+	dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+
+		if (ptr[i] != cmp_word) {
+			dev_err(dev, "Self-test xor failed compare\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+	dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	dma_unmap_page(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	/* skip validate if the capability is not present */
+	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
+		goto free_resources;
+
+	op = IOAT_OP_XOR_VAL;
+
+	/* validate the sources with the destintation page */
+	for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+		xor_val_srcs[i] = xor_srcs[i];
+	xor_val_srcs[i] = dest;
+
+	xor_val_result = 1;
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
+		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
+	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+					  &xor_val_result, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(dev, "Self-test zero prep failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test zero setup failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+		dev_err(dev, "Self-test validate timed out\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+	if (xor_val_result != 0) {
+		dev_err(dev, "Self-test validate failed compare\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	/* test for non-zero parity sum */
+	op = IOAT_OP_XOR_VAL;
+
+	xor_val_result = 0;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = DMA_ERROR_CODE;
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) {
+		dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma_srcs[i]))
+			goto dma_unmap;
+	}
+	tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs,
+					  IOAT_NUM_SRC_TEST + 1, PAGE_SIZE,
+					  &xor_val_result, DMA_PREP_INTERRUPT);
+	if (!tx) {
+		dev_err(dev, "Self-test 2nd zero prep failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(dev, "Self-test  2nd zero setup failed\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+	dma->device_issue_pending(dma_chan);
+
+	tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (tmo == 0 ||
+	    dma->device_tx_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
+		dev_err(dev, "Self-test 2nd validate timed out\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	if (xor_val_result != SUM_CHECK_P_RESULT) {
+		dev_err(dev, "Self-test validate failed compare\n");
+		err = -ENODEV;
+		goto dma_unmap;
+	}
+
+	for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+		dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE, DMA_TO_DEVICE);
+
+	goto free_resources;
+dma_unmap:
+	if (op == IOAT_OP_XOR) {
+		if (dest_dma != DMA_ERROR_CODE)
+			dma_unmap_page(dev, dest_dma, PAGE_SIZE,
+				       DMA_FROM_DEVICE);
+		for (i = 0; i < IOAT_NUM_SRC_TEST; i++)
+			if (dma_srcs[i] != DMA_ERROR_CODE)
+				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+					       DMA_TO_DEVICE);
+	} else if (op == IOAT_OP_XOR_VAL) {
+		for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++)
+			if (dma_srcs[i] != DMA_ERROR_CODE)
+				dma_unmap_page(dev, dma_srcs[i], PAGE_SIZE,
+					       DMA_TO_DEVICE);
+	}
+free_resources:
+	dma->device_free_chan_resources(dma_chan);
+out:
+	src_idx = IOAT_NUM_SRC_TEST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma)
+{
+	int rc;
+
+	rc = ioat_dma_self_test(ioat_dma);
+	if (rc)
+		return rc;
+
+	rc = ioat_xor_val_self_test(ioat_dma);
+
+	return rc;
+}
+
+static void ioat_intr_quirk(struct ioatdma_device *ioat_dma)
+{
+	struct dma_device *dma;
+	struct dma_chan *c;
+	struct ioatdma_chan *ioat_chan;
+	u32 errmask;
+
+	dma = &ioat_dma->dma_dev;
+
+	/*
+	 * if we have descriptor write back error status, we mask the
+	 * error interrupts
+	 */
+	if (ioat_dma->cap & IOAT_CAP_DWBES) {
+		list_for_each_entry(c, &dma->channels, device_node) {
+			ioat_chan = to_ioat_chan(c);
+			errmask = readl(ioat_chan->reg_base +
+					IOAT_CHANERR_MASK_OFFSET);
+			errmask |= IOAT_CHANERR_XOR_P_OR_CRC_ERR |
+				   IOAT_CHANERR_XOR_Q_ERR;
+			writel(errmask, ioat_chan->reg_base +
+					IOAT_CHANERR_MASK_OFFSET);
+		}
+	}
+}
+
+static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca)
+{
+	struct pci_dev *pdev = ioat_dma->pdev;
+	int dca_en = system_has_dca_enabled(pdev);
+	struct dma_device *dma;
+	struct dma_chan *c;
+	struct ioatdma_chan *ioat_chan;
+	bool is_raid_device = false;
+	int err;
+
+	dma = &ioat_dma->dma_dev;
+	dma->device_prep_dma_memcpy = ioat_dma_prep_memcpy_lock;
+	dma->device_issue_pending = ioat_issue_pending;
+	dma->device_alloc_chan_resources = ioat_alloc_chan_resources;
+	dma->device_free_chan_resources = ioat_free_chan_resources;
+
+	dma_cap_set(DMA_INTERRUPT, dma->cap_mask);
+	dma->device_prep_dma_interrupt = ioat_prep_interrupt_lock;
+
+	ioat_dma->cap = readl(ioat_dma->reg_base + IOAT_DMA_CAP_OFFSET);
+
+	if (is_xeon_cb32(pdev) || is_bwd_noraid(pdev))
+		ioat_dma->cap &=
+			~(IOAT_CAP_XOR | IOAT_CAP_PQ | IOAT_CAP_RAID16SS);
+
+	/* dca is incompatible with raid operations */
+	if (dca_en && (ioat_dma->cap & (IOAT_CAP_XOR|IOAT_CAP_PQ)))
+		ioat_dma->cap &= ~(IOAT_CAP_XOR|IOAT_CAP_PQ);
+
+	if (ioat_dma->cap & IOAT_CAP_XOR) {
+		is_raid_device = true;
+		dma->max_xor = 8;
+
+		dma_cap_set(DMA_XOR, dma->cap_mask);
+		dma->device_prep_dma_xor = ioat_prep_xor;
+
+		dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+		dma->device_prep_dma_xor_val = ioat_prep_xor_val;
+	}
+
+	if (ioat_dma->cap & IOAT_CAP_PQ) {
+		is_raid_device = true;
+
+		dma->device_prep_dma_pq = ioat_prep_pq;
+		dma->device_prep_dma_pq_val = ioat_prep_pq_val;
+		dma_cap_set(DMA_PQ, dma->cap_mask);
+		dma_cap_set(DMA_PQ_VAL, dma->cap_mask);
+
+		if (ioat_dma->cap & IOAT_CAP_RAID16SS)
+			dma_set_maxpq(dma, 16, 0);
+		else
+			dma_set_maxpq(dma, 8, 0);
+
+		if (!(ioat_dma->cap & IOAT_CAP_XOR)) {
+			dma->device_prep_dma_xor = ioat_prep_pqxor;
+			dma->device_prep_dma_xor_val = ioat_prep_pqxor_val;
+			dma_cap_set(DMA_XOR, dma->cap_mask);
+			dma_cap_set(DMA_XOR_VAL, dma->cap_mask);
+
+			if (ioat_dma->cap & IOAT_CAP_RAID16SS)
+				dma->max_xor = 16;
+			else
+				dma->max_xor = 8;
+		}
+	}
+
+	dma->device_tx_status = ioat_tx_status;
+
+	/* starting with CB3.3 super extended descriptors are supported */
+	if (ioat_dma->cap & IOAT_CAP_RAID16SS) {
+		char pool_name[14];
+		int i;
+
+		for (i = 0; i < MAX_SED_POOLS; i++) {
+			snprintf(pool_name, 14, "ioat_hw%d_sed", i);
+
+			/* allocate SED DMA pool */
+			ioat_dma->sed_hw_pool[i] = dmam_pool_create(pool_name,
+					&pdev->dev,
+					SED_SIZE * (i + 1), 64, 0);
+			if (!ioat_dma->sed_hw_pool[i])
+				return -ENOMEM;
+
+		}
+	}
+
+	if (!(ioat_dma->cap & (IOAT_CAP_XOR | IOAT_CAP_PQ)))
+		dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+
+	err = ioat_probe(ioat_dma);
+	if (err)
+		return err;
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		ioat_chan = to_ioat_chan(c);
+		writel(IOAT_DMA_DCA_ANY_CPU,
+		       ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
+	}
+
+	err = ioat_register(ioat_dma);
+	if (err)
+		return err;
+
+	ioat_kobject_add(ioat_dma, &ioat_ktype);
+
+	if (dca)
+		ioat_dma->dca = ioat_dca_init(pdev, ioat_dma->reg_base);
+
+	return 0;
+}
+
+static void ioat_shutdown(struct pci_dev *pdev)
+{
+	struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+	struct ioatdma_chan *ioat_chan;
+	int i;
+
+	if (!ioat_dma)
+		return;
+
+	for (i = 0; i < IOAT_MAX_CHANS; i++) {
+		ioat_chan = ioat_dma->idx[i];
+		if (!ioat_chan)
+			continue;
+
+		spin_lock_bh(&ioat_chan->prep_lock);
+		set_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+		del_timer_sync(&ioat_chan->timer);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+		/* this should quiesce then reset */
+		ioat_reset_hw(ioat_chan);
+	}
+
+	ioat_disable_interrupts(ioat_dma);
+}
+
+void ioat_resume(struct ioatdma_device *ioat_dma)
+{
+	struct ioatdma_chan *ioat_chan;
+	u32 chanerr;
+	int i;
+
+	for (i = 0; i < IOAT_MAX_CHANS; i++) {
+		ioat_chan = ioat_dma->idx[i];
+		if (!ioat_chan)
+			continue;
+
+		spin_lock_bh(&ioat_chan->prep_lock);
+		clear_bit(IOAT_CHAN_DOWN, &ioat_chan->state);
+		spin_unlock_bh(&ioat_chan->prep_lock);
+
+		chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+		writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+
+		/* no need to reset as shutdown already did that */
+	}
+}
+
+#define DRV_NAME "ioatdma"
+
+static pci_ers_result_t ioat_pcie_error_detected(struct pci_dev *pdev,
+						 enum pci_channel_state error)
+{
+	dev_dbg(&pdev->dev, "%s: PCIe AER error %d\n", DRV_NAME, error);
+
+	/* quiesce and block I/O */
+	ioat_shutdown(pdev);
+
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t ioat_pcie_error_slot_reset(struct pci_dev *pdev)
+{
+	pci_ers_result_t result = PCI_ERS_RESULT_RECOVERED;
+	int err;
+
+	dev_dbg(&pdev->dev, "%s post reset handling\n", DRV_NAME);
+
+	if (pci_enable_device_mem(pdev) < 0) {
+		dev_err(&pdev->dev,
+			"Failed to enable PCIe device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_save_state(pdev);
+		pci_wake_from_d3(pdev, false);
+	}
+
+	err = pci_cleanup_aer_uncorrect_error_status(pdev);
+	if (err) {
+		dev_err(&pdev->dev,
+			"AER uncorrect error status clear failed: %#x\n", err);
+	}
+
+	return result;
+}
+
+static void ioat_pcie_error_resume(struct pci_dev *pdev)
+{
+	struct ioatdma_device *ioat_dma = pci_get_drvdata(pdev);
+
+	dev_dbg(&pdev->dev, "%s: AER handling resuming\n", DRV_NAME);
+
+	/* initialize and bring everything back */
+	ioat_resume(ioat_dma);
+}
+
+static const struct pci_error_handlers ioat_err_handler = {
+	.error_detected = ioat_pcie_error_detected,
+	.slot_reset = ioat_pcie_error_slot_reset,
+	.resume = ioat_pcie_error_resume,
+};
+
+static struct pci_driver ioat_pci_driver = {
+	.name		= DRV_NAME,
+	.id_table	= ioat_pci_tbl,
+	.probe		= ioat_pci_probe,
+	.remove		= ioat_remove,
+	.shutdown	= ioat_shutdown,
+	.err_handler	= &ioat_err_handler,
+};
+
+static struct ioatdma_device *
+alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct device *dev = &pdev->dev;
+	struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+
+	if (!d)
+		return NULL;
+	d->pdev = pdev;
+	d->reg_base = iobase;
+	return d;
+}
+
+static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	void __iomem * const *iomap;
+	struct device *dev = &pdev->dev;
+	struct ioatdma_device *device;
+	int err;
+
+	err = pcim_enable_device(pdev);
+	if (err)
+		return err;
+
+	err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
+	if (err)
+		return err;
+	iomap = pcim_iomap_table(pdev);
+	if (!iomap)
+		return -ENOMEM;
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err)
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		return err;
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (err)
+		return err;
+
+	device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
+	if (!device)
+		return -ENOMEM;
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, device);
+
+	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+	if (device->version >= IOAT_VER_3_0) {
+		err = ioat3_dma_probe(device, ioat_dca_enabled);
+
+		if (device->version >= IOAT_VER_3_3)
+			pci_enable_pcie_error_reporting(pdev);
+	} else
+		return -ENODEV;
+
+	if (err) {
+		dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
+		pci_disable_pcie_error_reporting(pdev);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static void ioat_remove(struct pci_dev *pdev)
+{
+	struct ioatdma_device *device = pci_get_drvdata(pdev);
+
+	if (!device)
+		return;
+
+	dev_err(&pdev->dev, "Removing dma and dca services\n");
+	if (device->dca) {
+		unregister_dca_provider(device->dca, &pdev->dev);
+		free_dca_provider(device->dca);
+		device->dca = NULL;
+	}
+
+	pci_disable_pcie_error_reporting(pdev);
+	ioat_dma_remove(device);
+}
+
+static int __init ioat_init_module(void)
+{
+	int err = -ENOMEM;
+
+	pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
+		DRV_NAME, IOAT_DMA_VERSION);
+
+	ioat_cache = kmem_cache_create("ioat", sizeof(struct ioat_ring_ent),
+					0, SLAB_HWCACHE_ALIGN, NULL);
+	if (!ioat_cache)
+		return -ENOMEM;
+
+	ioat_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
+	if (!ioat_sed_cache)
+		goto err_ioat_cache;
+
+	err = pci_register_driver(&ioat_pci_driver);
+	if (err)
+		goto err_ioat3_cache;
+
+	return 0;
+
+ err_ioat3_cache:
+	kmem_cache_destroy(ioat_sed_cache);
+
+ err_ioat_cache:
+	kmem_cache_destroy(ioat_cache);
+
+	return err;
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+	pci_unregister_driver(&ioat_pci_driver);
+	kmem_cache_destroy(ioat_cache);
+}
+module_exit(ioat_exit_module);
diff --git a/kernel/drivers/dma/ioat/pci.c b/kernel/drivers/dma/ioat/pci.c
deleted file mode 100644
index 76f0dc688..000000000
--- a/kernel/drivers/dma/ioat/pci.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Intel I/OAT DMA Linux driver
- * Copyright(c) 2007 - 2009 Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * The full GNU General Public License is included in this distribution in
- * the file called "COPYING".
- *
- */
-
-/*
- * This driver supports an Intel I/OAT DMA engine, which does asynchronous
- * copy operations.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/dca.h>
-#include <linux/slab.h>
-#include "dma.h"
-#include "dma_v2.h"
-#include "registers.h"
-#include "hw.h"
-
-MODULE_VERSION(IOAT_DMA_VERSION);
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_AUTHOR("Intel Corporation");
-
-static struct pci_device_id ioat_pci_tbl[] = {
-	/* I/OAT v1 platforms */
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
-	{ PCI_VDEVICE(UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
-
-	/* I/OAT v2 platforms */
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
-
-	/* I/OAT v3 platforms */
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
-
-	/* I/OAT v3.2 platforms */
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF0) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF1) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF2) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF3) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF4) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF5) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF6) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF7) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF8) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_JSF9) },
-
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB0) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB1) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB2) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB3) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB4) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB5) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB6) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB7) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB8) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB9) },
-
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB0) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB1) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB2) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB3) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB4) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB5) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB6) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB7) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB8) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_IVB9) },
-
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW0) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW1) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW2) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW3) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW4) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW5) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW6) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW7) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW8) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_HSW9) },
-
-	/* I/OAT v3.3 platforms */
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD0) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD1) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD2) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BWD3) },
-
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE0) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE1) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) },
-	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) },
-
-	{ 0, }
-};
-MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
-
-static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id);
-static void ioat_remove(struct pci_dev *pdev);
-
-static int ioat_dca_enabled = 1;
-module_param(ioat_dca_enabled, int, 0644);
-MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
-
-struct kmem_cache *ioat2_cache;
-struct kmem_cache *ioat3_sed_cache;
-
-#define DRV_NAME "ioatdma"
-
-static struct pci_driver ioat_pci_driver = {
-	.name		= DRV_NAME,
-	.id_table	= ioat_pci_tbl,
-	.probe		= ioat_pci_probe,
-	.remove		= ioat_remove,
-};
-
-static struct ioatdma_device *
-alloc_ioatdma(struct pci_dev *pdev, void __iomem *iobase)
-{
-	struct device *dev = &pdev->dev;
-	struct ioatdma_device *d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
-
-	if (!d)
-		return NULL;
-	d->pdev = pdev;
-	d->reg_base = iobase;
-	return d;
-}
-
-static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
-{
-	void __iomem * const *iomap;
-	struct device *dev = &pdev->dev;
-	struct ioatdma_device *device;
-	int err;
-
-	err = pcim_enable_device(pdev);
-	if (err)
-		return err;
-
-	err = pcim_iomap_regions(pdev, 1 << IOAT_MMIO_BAR, DRV_NAME);
-	if (err)
-		return err;
-	iomap = pcim_iomap_table(pdev);
-	if (!iomap)
-		return -ENOMEM;
-
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err)
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-	if (err)
-		return err;
-
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err)
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-	if (err)
-		return err;
-
-	device = alloc_ioatdma(pdev, iomap[IOAT_MMIO_BAR]);
-	if (!device)
-		return -ENOMEM;
-	pci_set_master(pdev);
-	pci_set_drvdata(pdev, device);
-
-	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
-	if (device->version == IOAT_VER_1_2)
-		err = ioat1_dma_probe(device, ioat_dca_enabled);
-	else if (device->version == IOAT_VER_2_0)
-		err = ioat2_dma_probe(device, ioat_dca_enabled);
-	else if (device->version >= IOAT_VER_3_0)
-		err = ioat3_dma_probe(device, ioat_dca_enabled);
-	else
-		return -ENODEV;
-
-	if (err) {
-		dev_err(dev, "Intel(R) I/OAT DMA Engine init failed\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-static void ioat_remove(struct pci_dev *pdev)
-{
-	struct ioatdma_device *device = pci_get_drvdata(pdev);
-
-	if (!device)
-		return;
-
-	dev_err(&pdev->dev, "Removing dma and dca services\n");
-	if (device->dca) {
-		unregister_dca_provider(device->dca, &pdev->dev);
-		free_dca_provider(device->dca);
-		device->dca = NULL;
-	}
-	ioat_dma_remove(device);
-}
-
-static int __init ioat_init_module(void)
-{
-	int err = -ENOMEM;
-
-	pr_info("%s: Intel(R) QuickData Technology Driver %s\n",
-		DRV_NAME, IOAT_DMA_VERSION);
-
-	ioat2_cache = kmem_cache_create("ioat2", sizeof(struct ioat_ring_ent),
-					0, SLAB_HWCACHE_ALIGN, NULL);
-	if (!ioat2_cache)
-		return -ENOMEM;
-
-	ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0);
-	if (!ioat3_sed_cache)
-		goto err_ioat2_cache;
-
-	err = pci_register_driver(&ioat_pci_driver);
-	if (err)
-		goto err_ioat3_cache;
-
-	return 0;
-
- err_ioat3_cache:
-	kmem_cache_destroy(ioat3_sed_cache);
-
- err_ioat2_cache:
-	kmem_cache_destroy(ioat2_cache);
-
-	return err;
-}
-module_init(ioat_init_module);
-
-static void __exit ioat_exit_module(void)
-{
-	pci_unregister_driver(&ioat_pci_driver);
-	kmem_cache_destroy(ioat2_cache);
-}
-module_exit(ioat_exit_module);
diff --git a/kernel/drivers/dma/ioat/prep.c b/kernel/drivers/dma/ioat/prep.c
new file mode 100644
index 000000000..6bb4a13a8
--- /dev/null
+++ b/kernel/drivers/dma/ioat/prep.c
@@ -0,0 +1,749 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/gfp.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/prefetch.h>
+#include "../dmaengine.h"
+#include "registers.h"
+#include "hw.h"
+#include "dma.h"
+
+#define MAX_SCF	1024
+
+/* provide a lookup table for setting the source address in the base or
+ * extended descriptor of an xor or pq descriptor
+ */
+static const u8 xor_idx_to_desc = 0xe0;
+static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
+static const u8 pq_idx_to_desc = 0xf8;
+static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
+				       2, 2, 2, 2, 2, 2, 2 };
+static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
+static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
+					0, 1, 2, 3, 4, 5, 6 };
+
+static void xor_set_src(struct ioat_raw_descriptor *descs[2],
+			dma_addr_t addr, u32 offset, int idx)
+{
+	struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1];
+
+	raw->field[xor_idx_to_field[idx]] = addr + offset;
+}
+
+static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
+{
+	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+	return raw->field[pq_idx_to_field[idx]];
+}
+
+static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
+{
+	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+	return raw->field[pq16_idx_to_field[idx]];
+}
+
+static void pq_set_src(struct ioat_raw_descriptor *descs[2],
+		       dma_addr_t addr, u32 offset, u8 coef, int idx)
+{
+	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0];
+	struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1];
+
+	raw->field[pq_idx_to_field[idx]] = addr + offset;
+	pq->coef[idx] = coef;
+}
+
+static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
+			dma_addr_t addr, u32 offset, u8 coef, unsigned idx)
+{
+	struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
+	struct ioat_pq16a_descriptor *pq16 =
+		(struct ioat_pq16a_descriptor *)desc[1];
+	struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
+
+	raw->field[pq16_idx_to_field[idx]] = addr + offset;
+
+	if (idx < 8)
+		pq->coef[idx] = coef;
+	else
+		pq16->coef[idx - 8] = coef;
+}
+
+static struct ioat_sed_ent *
+ioat3_alloc_sed(struct ioatdma_device *ioat_dma, unsigned int hw_pool)
+{
+	struct ioat_sed_ent *sed;
+	gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
+
+	sed = kmem_cache_alloc(ioat_sed_cache, flags);
+	if (!sed)
+		return NULL;
+
+	sed->hw_pool = hw_pool;
+	sed->hw = dma_pool_alloc(ioat_dma->sed_hw_pool[hw_pool],
+				 flags, &sed->dma);
+	if (!sed->hw) {
+		kmem_cache_free(ioat_sed_cache, sed);
+		return NULL;
+	}
+
+	return sed;
+}
+
+struct dma_async_tx_descriptor *
+ioat_dma_prep_memcpy_lock(struct dma_chan *c, dma_addr_t dma_dest,
+			   dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_dma_descriptor *hw;
+	struct ioat_ring_ent *desc;
+	dma_addr_t dst = dma_dest;
+	dma_addr_t src = dma_src;
+	size_t total_len = len;
+	int num_descs, idx, i;
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+	if (likely(num_descs) &&
+	    ioat_check_space_lock(ioat_chan, num_descs) == 0)
+		idx = ioat_chan->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		size_t copy = min_t(size_t, len, 1 << ioat_chan->xfercap_log);
+
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		hw = desc->hw;
+
+		hw->size = copy;
+		hw->ctl = 0;
+		hw->src_addr = src;
+		hw->dst_addr = dst;
+
+		len -= copy;
+		dst += copy;
+		src += copy;
+		dump_desc_dbg(ioat_chan, desc);
+	} while (++i < num_descs);
+
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	hw->ctl_f.compl_write = 1;
+	dump_desc_dbg(ioat_chan, desc);
+	/* we leave the channel locked to ensure in order submission */
+
+	return &desc->txd;
+}
+
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
+		      dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt,
+		      size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_ring_ent *compl_desc;
+	struct ioat_ring_ent *desc;
+	struct ioat_ring_ent *ext;
+	size_t total_len = len;
+	struct ioat_xor_descriptor *xor;
+	struct ioat_xor_ext_descriptor *xor_ex = NULL;
+	struct ioat_dma_descriptor *hw;
+	int num_descs, with_ext, idx, i;
+	u32 offset = 0;
+	u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
+
+	BUG_ON(src_cnt < 2);
+
+	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+	/* we need 2x the number of descriptors to cover greater than 5
+	 * sources
+	 */
+	if (src_cnt > 5) {
+		with_ext = 1;
+		num_descs *= 2;
+	} else
+		with_ext = 0;
+
+	/* completion writes from the raid engine may pass completion
+	 * writes from the legacy engine, so we need one extra null
+	 * (legacy) descriptor to ensure all completion writes arrive in
+	 * order.
+	 */
+	if (likely(num_descs) &&
+	    ioat_check_space_lock(ioat_chan, num_descs+1) == 0)
+		idx = ioat_chan->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		struct ioat_raw_descriptor *descs[2];
+		size_t xfer_size = min_t(size_t,
+					 len, 1 << ioat_chan->xfercap_log);
+		int s;
+
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		xor = desc->xor;
+
+		/* save a branch by unconditionally retrieving the
+		 * extended descriptor xor_set_src() knows to not write
+		 * to it in the single descriptor case
+		 */
+		ext = ioat_get_ring_ent(ioat_chan, idx + i + 1);
+		xor_ex = ext->xor_ex;
+
+		descs[0] = (struct ioat_raw_descriptor *) xor;
+		descs[1] = (struct ioat_raw_descriptor *) xor_ex;
+		for (s = 0; s < src_cnt; s++)
+			xor_set_src(descs, src[s], offset, s);
+		xor->size = xfer_size;
+		xor->dst_addr = dest + offset;
+		xor->ctl = 0;
+		xor->ctl_f.op = op;
+		xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt);
+
+		len -= xfer_size;
+		offset += xfer_size;
+		dump_desc_dbg(ioat_chan, desc);
+	} while ((i += 1 + with_ext) < num_descs);
+
+	/* last xor descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+	/* completion descriptor carries interrupt bit */
+	compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
+	compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+	hw = compl_desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	hw->ctl_f.compl_write = 1;
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	dump_desc_dbg(ioat_chan, compl_desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &compl_desc->txd;
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+	       unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	return __ioat_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_xor_val(struct dma_chan *chan, dma_addr_t *src,
+		    unsigned int src_cnt, size_t len,
+		    enum sum_check_flags *result, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*result = 0;
+
+	return __ioat_prep_xor_lock(chan, result, src[0], &src[1],
+				     src_cnt - 1, len, flags);
+}
+
+static void
+dump_pq_desc_dbg(struct ioatdma_chan *ioat_chan, struct ioat_ring_ent *desc,
+		 struct ioat_ring_ent *ext)
+{
+	struct device *dev = to_dev(ioat_chan);
+	struct ioat_pq_descriptor *pq = desc->pq;
+	struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL;
+	struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex };
+	int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt);
+	int i;
+
+	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+		" sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+		" src_cnt: %d)\n",
+		desc_id(desc), (unsigned long long) desc->txd.phys,
+		(unsigned long long) (pq_ex ? pq_ex->next : pq->next),
+		desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op,
+		pq->ctl_f.int_en, pq->ctl_f.compl_write,
+		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+		pq->ctl_f.src_cnt);
+	for (i = 0; i < src_cnt; i++)
+		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+			(unsigned long long) pq_get_src(descs, i), pq->coef[i]);
+	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+	dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
+}
+
+static void dump_pq16_desc_dbg(struct ioatdma_chan *ioat_chan,
+			       struct ioat_ring_ent *desc)
+{
+	struct device *dev = to_dev(ioat_chan);
+	struct ioat_pq_descriptor *pq = desc->pq;
+	struct ioat_raw_descriptor *descs[] = { (void *)pq,
+						(void *)pq,
+						(void *)pq };
+	int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
+	int i;
+
+	if (desc->sed) {
+		descs[1] = (void *)desc->sed->hw;
+		descs[2] = (void *)desc->sed->hw + 64;
+	}
+
+	dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
+		" sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
+		" src_cnt: %d)\n",
+		desc_id(desc), (unsigned long long) desc->txd.phys,
+		(unsigned long long) pq->next,
+		desc->txd.flags, pq->size, pq->ctl,
+		pq->ctl_f.op, pq->ctl_f.int_en,
+		pq->ctl_f.compl_write,
+		pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
+		pq->ctl_f.src_cnt);
+	for (i = 0; i < src_cnt; i++) {
+		dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
+			(unsigned long long) pq16_get_src(descs, i),
+			pq->coef[i]);
+	}
+	dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
+	dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
+}
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
+		     const dma_addr_t *dst, const dma_addr_t *src,
+		     unsigned int src_cnt, const unsigned char *scf,
+		     size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *compl_desc;
+	struct ioat_ring_ent *desc;
+	struct ioat_ring_ent *ext;
+	size_t total_len = len;
+	struct ioat_pq_descriptor *pq;
+	struct ioat_pq_ext_descriptor *pq_ex = NULL;
+	struct ioat_dma_descriptor *hw;
+	u32 offset = 0;
+	u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
+	int i, s, idx, with_ext, num_descs;
+	int cb32 = (ioat_dma->version < IOAT_VER_3_3) ? 1 : 0;
+
+	dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
+	/* the engine requires at least two sources (we provide
+	 * at least 1 implied source in the DMA_PREP_CONTINUE case)
+	 */
+	BUG_ON(src_cnt + dmaf_continue(flags) < 2);
+
+	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+	/* we need 2x the number of descriptors to cover greater than 3
+	 * sources (we need 1 extra source in the q-only continuation
+	 * case and 3 extra sources in the p+q continuation case.
+	 */
+	if (src_cnt + dmaf_p_disabled_continue(flags) > 3 ||
+	    (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) {
+		with_ext = 1;
+		num_descs *= 2;
+	} else
+		with_ext = 0;
+
+	/* completion writes from the raid engine may pass completion
+	 * writes from the legacy engine, so we need one extra null
+	 * (legacy) descriptor to ensure all completion writes arrive in
+	 * order.
+	 */
+	if (likely(num_descs) &&
+	    ioat_check_space_lock(ioat_chan, num_descs + cb32) == 0)
+		idx = ioat_chan->head;
+	else
+		return NULL;
+	i = 0;
+	do {
+		struct ioat_raw_descriptor *descs[2];
+		size_t xfer_size = min_t(size_t, len,
+					 1 << ioat_chan->xfercap_log);
+
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		pq = desc->pq;
+
+		/* save a branch by unconditionally retrieving the
+		 * extended descriptor pq_set_src() knows to not write
+		 * to it in the single descriptor case
+		 */
+		ext = ioat_get_ring_ent(ioat_chan, idx + i + with_ext);
+		pq_ex = ext->pq_ex;
+
+		descs[0] = (struct ioat_raw_descriptor *) pq;
+		descs[1] = (struct ioat_raw_descriptor *) pq_ex;
+
+		for (s = 0; s < src_cnt; s++)
+			pq_set_src(descs, src[s], offset, scf[s], s);
+
+		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
+		if (dmaf_p_disabled_continue(flags))
+			pq_set_src(descs, dst[1], offset, 1, s++);
+		else if (dmaf_continue(flags)) {
+			pq_set_src(descs, dst[0], offset, 0, s++);
+			pq_set_src(descs, dst[1], offset, 1, s++);
+			pq_set_src(descs, dst[1], offset, 0, s++);
+		}
+		pq->size = xfer_size;
+		pq->p_addr = dst[0] + offset;
+		pq->q_addr = dst[1] + offset;
+		pq->ctl = 0;
+		pq->ctl_f.op = op;
+		/* we turn on descriptor write back error status */
+		if (ioat_dma->cap & IOAT_CAP_DWBES)
+			pq->ctl_f.wb_en = result ? 1 : 0;
+		pq->ctl_f.src_cnt = src_cnt_to_hw(s);
+		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+		len -= xfer_size;
+		offset += xfer_size;
+	} while ((i += 1 + with_ext) < num_descs);
+
+	/* last pq descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	dump_pq_desc_dbg(ioat_chan, desc, ext);
+
+	if (!cb32) {
+		pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+		pq->ctl_f.compl_write = 1;
+		compl_desc = desc;
+	} else {
+		/* completion descriptor carries interrupt bit */
+		compl_desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT;
+		hw = compl_desc->hw;
+		hw->ctl = 0;
+		hw->ctl_f.null = 1;
+		hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+		hw->ctl_f.compl_write = 1;
+		hw->size = NULL_DESC_BUFFER_SIZE;
+		dump_desc_dbg(ioat_chan, compl_desc);
+	}
+
+
+	/* we leave the channel locked to ensure in order submission */
+	return &compl_desc->txd;
+}
+
+static struct dma_async_tx_descriptor *
+__ioat_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
+		       const dma_addr_t *dst, const dma_addr_t *src,
+		       unsigned int src_cnt, const unsigned char *scf,
+		       size_t len, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
+	struct ioat_ring_ent *desc;
+	size_t total_len = len;
+	struct ioat_pq_descriptor *pq;
+	u32 offset = 0;
+	u8 op;
+	int i, s, idx, num_descs;
+
+	/* this function is only called with 9-16 sources */
+	op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
+
+	dev_dbg(to_dev(ioat_chan), "%s\n", __func__);
+
+	num_descs = ioat_xferlen_to_descs(ioat_chan, len);
+
+	/*
+	 * 16 source pq is only available on cb3.3 and has no completion
+	 * write hw bug.
+	 */
+	if (num_descs && ioat_check_space_lock(ioat_chan, num_descs) == 0)
+		idx = ioat_chan->head;
+	else
+		return NULL;
+
+	i = 0;
+
+	do {
+		struct ioat_raw_descriptor *descs[4];
+		size_t xfer_size = min_t(size_t, len,
+					 1 << ioat_chan->xfercap_log);
+
+		desc = ioat_get_ring_ent(ioat_chan, idx + i);
+		pq = desc->pq;
+
+		descs[0] = (struct ioat_raw_descriptor *) pq;
+
+		desc->sed = ioat3_alloc_sed(ioat_dma, (src_cnt-2) >> 3);
+		if (!desc->sed) {
+			dev_err(to_dev(ioat_chan),
+				"%s: no free sed entries\n", __func__);
+			return NULL;
+		}
+
+		pq->sed_addr = desc->sed->dma;
+		desc->sed->parent = desc;
+
+		descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
+		descs[2] = (void *)descs[1] + 64;
+
+		for (s = 0; s < src_cnt; s++)
+			pq16_set_src(descs, src[s], offset, scf[s], s);
+
+		/* see the comment for dma_maxpq in include/linux/dmaengine.h */
+		if (dmaf_p_disabled_continue(flags))
+			pq16_set_src(descs, dst[1], offset, 1, s++);
+		else if (dmaf_continue(flags)) {
+			pq16_set_src(descs, dst[0], offset, 0, s++);
+			pq16_set_src(descs, dst[1], offset, 1, s++);
+			pq16_set_src(descs, dst[1], offset, 0, s++);
+		}
+
+		pq->size = xfer_size;
+		pq->p_addr = dst[0] + offset;
+		pq->q_addr = dst[1] + offset;
+		pq->ctl = 0;
+		pq->ctl_f.op = op;
+		pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
+		/* we turn on descriptor write back error status */
+		if (ioat_dma->cap & IOAT_CAP_DWBES)
+			pq->ctl_f.wb_en = result ? 1 : 0;
+		pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
+		pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
+
+		len -= xfer_size;
+		offset += xfer_size;
+	} while (++i < num_descs);
+
+	/* last pq descriptor carries the unmap parameters and fence bit */
+	desc->txd.flags = flags;
+	desc->len = total_len;
+	if (result)
+		desc->result = result;
+	pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+
+	/* with cb3.3 we should be able to do completion w/o a null desc */
+	pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
+	pq->ctl_f.compl_write = 1;
+
+	dump_pq16_desc_dbg(ioat_chan, desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &desc->txd;
+}
+
+static int src_cnt_flags(unsigned int src_cnt, unsigned long flags)
+{
+	if (dmaf_p_disabled_continue(flags))
+		return src_cnt + 1;
+	else if (dmaf_continue(flags))
+		return src_cnt + 3;
+	else
+		return src_cnt;
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
+	      unsigned int src_cnt, const unsigned char *scf, size_t len,
+	      unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	/* specify valid address for disabled result */
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		dst[0] = dst[1];
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		dst[1] = dst[0];
+
+	/* handle the single source multiply case from the raid6
+	 * recovery path
+	 */
+	if ((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1) {
+		dma_addr_t single_source[2];
+		unsigned char single_source_coef[2];
+
+		BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q);
+		single_source[0] = src[0];
+		single_source[1] = src[0];
+		single_source_coef[0] = scf[0];
+		single_source_coef[1] = 0;
+
+		return src_cnt_flags(src_cnt, flags) > 8 ?
+			__ioat_prep_pq16_lock(chan, NULL, dst, single_source,
+					       2, single_source_coef, len,
+					       flags) :
+			__ioat_prep_pq_lock(chan, NULL, dst, single_source, 2,
+					     single_source_coef, len, flags);
+
+	} else {
+		return src_cnt_flags(src_cnt, flags) > 8 ?
+			__ioat_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
+					       scf, len, flags) :
+			__ioat_prep_pq_lock(chan, NULL, dst, src, src_cnt,
+					     scf, len, flags);
+	}
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
+		  unsigned int src_cnt, const unsigned char *scf, size_t len,
+		  enum sum_check_flags *pqres, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	/* specify valid address for disabled result */
+	if (flags & DMA_PREP_PQ_DISABLE_P)
+		pq[0] = pq[1];
+	if (flags & DMA_PREP_PQ_DISABLE_Q)
+		pq[1] = pq[0];
+
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*pqres = 0;
+
+	return src_cnt_flags(src_cnt, flags) > 8 ?
+		__ioat_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
+				       flags) :
+		__ioat_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
+				     flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
+		 unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	unsigned char scf[MAX_SCF];
+	dma_addr_t pq[2];
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	if (src_cnt > MAX_SCF)
+		return NULL;
+
+	memset(scf, 0, src_cnt);
+	pq[0] = dst;
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+	pq[1] = dst; /* specify valid address for disabled result */
+
+	return src_cnt_flags(src_cnt, flags) > 8 ?
+		__ioat_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
+				       flags) :
+		__ioat_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
+				     flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
+		     unsigned int src_cnt, size_t len,
+		     enum sum_check_flags *result, unsigned long flags)
+{
+	unsigned char scf[MAX_SCF];
+	dma_addr_t pq[2];
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	if (src_cnt > MAX_SCF)
+		return NULL;
+
+	/* the cleanup routine only sets bits on validate failure, it
+	 * does not clear bits on validate success... so clear it here
+	 */
+	*result = 0;
+
+	memset(scf, 0, src_cnt);
+	pq[0] = src[0];
+	flags |= DMA_PREP_PQ_DISABLE_Q;
+	pq[1] = pq[0]; /* specify valid address for disabled result */
+
+	return src_cnt_flags(src_cnt, flags) > 8 ?
+		__ioat_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
+				       scf, len, flags) :
+		__ioat_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
+				     scf, len, flags);
+}
+
+struct dma_async_tx_descriptor *
+ioat_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioat_ring_ent *desc;
+	struct ioat_dma_descriptor *hw;
+
+	if (test_bit(IOAT_CHAN_DOWN, &ioat_chan->state))
+		return NULL;
+
+	if (ioat_check_space_lock(ioat_chan, 1) == 0)
+		desc = ioat_get_ring_ent(ioat_chan, ioat_chan->head);
+	else
+		return NULL;
+
+	hw = desc->hw;
+	hw->ctl = 0;
+	hw->ctl_f.null = 1;
+	hw->ctl_f.int_en = 1;
+	hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
+	hw->ctl_f.compl_write = 1;
+	hw->size = NULL_DESC_BUFFER_SIZE;
+	hw->src_addr = 0;
+	hw->dst_addr = 0;
+
+	desc->txd.flags = flags;
+	desc->len = 1;
+
+	dump_desc_dbg(ioat_chan, desc);
+
+	/* we leave the channel locked to ensure in order submission */
+	return &desc->txd;
+}
+
diff --git a/kernel/drivers/dma/ioat/sysfs.c b/kernel/drivers/dma/ioat/sysfs.c
new file mode 100644
index 000000000..cb4a857ee
--- /dev/null
+++ b/kernel/drivers/dma/ioat/sysfs.c
@@ -0,0 +1,135 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/pci.h>
+#include "dma.h"
+#include "registers.h"
+#include "hw.h"
+
+#include "../dmaengine.h"
+
+static ssize_t cap_show(struct dma_chan *c, char *page)
+{
+	struct dma_device *dma = c->device;
+
+	return sprintf(page, "copy%s%s%s%s%s\n",
+		       dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+		       dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+		       dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+		       dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+		       dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "");
+
+}
+struct ioat_sysfs_entry ioat_cap_attr = __ATTR_RO(cap);
+
+static ssize_t version_show(struct dma_chan *c, char *page)
+{
+	struct dma_device *dma = c->device;
+	struct ioatdma_device *ioat_dma = to_ioatdma_device(dma);
+
+	return sprintf(page, "%d.%d\n",
+		       ioat_dma->version >> 4, ioat_dma->version & 0xf);
+}
+struct ioat_sysfs_entry ioat_version_attr = __ATTR_RO(version);
+
+static ssize_t
+ioat_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+	struct ioat_sysfs_entry *entry;
+	struct ioatdma_chan *ioat_chan;
+
+	entry = container_of(attr, struct ioat_sysfs_entry, attr);
+	ioat_chan = container_of(kobj, struct ioatdma_chan, kobj);
+
+	if (!entry->show)
+		return -EIO;
+	return entry->show(&ioat_chan->dma_chan, page);
+}
+
+const struct sysfs_ops ioat_sysfs_ops = {
+	.show	= ioat_attr_show,
+};
+
+void ioat_kobject_add(struct ioatdma_device *ioat_dma, struct kobj_type *type)
+{
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct dma_chan *c;
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+		struct kobject *parent = &c->dev->device.kobj;
+		int err;
+
+		err = kobject_init_and_add(&ioat_chan->kobj, type,
+					   parent, "quickdata");
+		if (err) {
+			dev_warn(to_dev(ioat_chan),
+				 "sysfs init error (%d), continuing...\n", err);
+			kobject_put(&ioat_chan->kobj);
+			set_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state);
+		}
+	}
+}
+
+void ioat_kobject_del(struct ioatdma_device *ioat_dma)
+{
+	struct dma_device *dma = &ioat_dma->dma_dev;
+	struct dma_chan *c;
+
+	list_for_each_entry(c, &dma->channels, device_node) {
+		struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+		if (!test_bit(IOAT_KOBJ_INIT_FAIL, &ioat_chan->state)) {
+			kobject_del(&ioat_chan->kobj);
+			kobject_put(&ioat_chan->kobj);
+		}
+	}
+}
+
+static ssize_t ring_size_show(struct dma_chan *c, char *page)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	return sprintf(page, "%d\n", (1 << ioat_chan->alloc_order) & ~1);
+}
+static struct ioat_sysfs_entry ring_size_attr = __ATTR_RO(ring_size);
+
+static ssize_t ring_active_show(struct dma_chan *c, char *page)
+{
+	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+
+	/* ...taken outside the lock, no need to be precise */
+	return sprintf(page, "%d\n", ioat_ring_active(ioat_chan));
+}
+static struct ioat_sysfs_entry ring_active_attr = __ATTR_RO(ring_active);
+
+static struct attribute *ioat_attrs[] = {
+	&ring_size_attr.attr,
+	&ring_active_attr.attr,
+	&ioat_cap_attr.attr,
+	&ioat_version_attr.attr,
+	NULL,
+};
+
+struct kobj_type ioat_ktype = {
+	.sysfs_ops = &ioat_sysfs_ops,
+	.default_attrs = ioat_attrs,
+};
diff --git a/kernel/drivers/dma/iop-adma.c b/kernel/drivers/dma/iop-adma.c
index 998826854..e4f43125e 100644
--- a/kernel/drivers/dma/iop-adma.c
+++ b/kernel/drivers/dma/iop-adma.c
@@ -1300,10 +1300,11 @@ static int iop_adma_probe(struct platform_device *pdev)
 	 * note: writecombine gives slightly better performance, but
 	 * requires that we explicitly flush the writes
 	 */
-	if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
-					plat_data->pool_size,
-					&adev->dma_desc_pool,
-					GFP_KERNEL)) == NULL) {
+	adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+							  plat_data->pool_size,
+							  &adev->dma_desc_pool,
+							  GFP_KERNEL);
+	if (!adev->dma_desc_pool_virt) {
 		ret = -ENOMEM;
 		goto err_free_adev;
 	}
diff --git a/kernel/drivers/dma/ipu/ipu_irq.c b/kernel/drivers/dma/ipu/ipu_irq.c
index 2e284a443..2bf37e68a 100644
--- a/kernel/drivers/dma/ipu/ipu_irq.c
+++ b/kernel/drivers/dma/ipu/ipu_irq.c
@@ -265,10 +265,10 @@ int ipu_irq_unmap(unsigned int source)
 	return ret;
 }
 
-/* Chained IRQ handler for IPU error interrupt */
-static void ipu_irq_err(unsigned int irq, struct irq_desc *desc)
+/* Chained IRQ handler for IPU function and error interrupt */
+static void ipu_irq_handler(struct irq_desc *desc)
 {
-	struct ipu *ipu = irq_get_handler_data(irq);
+	struct ipu *ipu = irq_desc_get_handler_data(desc);
 	u32 status;
 	int i, line;
 
@@ -286,43 +286,7 @@ static void ipu_irq_err(unsigned int irq, struct irq_desc *desc)
 		raw_spin_unlock(&bank_lock);
 		while ((line = ffs(status))) {
 			struct ipu_irq_map *map;
-
-			line--;
-			status &= ~(1UL << line);
-
-			raw_spin_lock(&bank_lock);
-			map = src2map(32 * i + line);
-			if (map)
-				irq = map->irq;
-			raw_spin_unlock(&bank_lock);
-
-			if (!map) {
-				pr_err("IPU: Interrupt on unmapped source %u bank %d\n",
-				       line, i);
-				continue;
-			}
-			generic_handle_irq(irq);
-		}
-	}
-}
-
-/* Chained IRQ handler for IPU function interrupt */
-static void ipu_irq_fn(unsigned int irq, struct irq_desc *desc)
-{
-	struct ipu *ipu = irq_desc_get_handler_data(desc);
-	u32 status;
-	int i, line;
-
-	for (i = 0; i < IPU_IRQ_NR_FN_BANKS; i++) {
-		struct ipu_irq_bank *bank = irq_bank + i;
-
-		raw_spin_lock(&bank_lock);
-		status = ipu_read_reg(ipu, bank->status);
-		/* Not clearing all interrupts, see above */
-		status &= ipu_read_reg(ipu, bank->control);
-		raw_spin_unlock(&bank_lock);
-		while ((line = ffs(status))) {
-			struct ipu_irq_map *map;
+			unsigned int irq = NO_IRQ;
 
 			line--;
 			status &= ~(1UL << line);
@@ -377,16 +341,12 @@ int __init ipu_irq_attach_irq(struct ipu *ipu, struct platform_device *dev)
 		irq_map[i].irq = irq;
 		irq_map[i].source = -EINVAL;
 		irq_set_handler(irq, handle_level_irq);
-#ifdef CONFIG_ARM
-		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
-#endif
+		irq_clear_status_flags(irq, IRQ_NOREQUEST | IRQ_NOPROBE);
 	}
 
-	irq_set_handler_data(ipu->irq_fn, ipu);
-	irq_set_chained_handler(ipu->irq_fn, ipu_irq_fn);
+	irq_set_chained_handler_and_data(ipu->irq_fn, ipu_irq_handler, ipu);
 
-	irq_set_handler_data(ipu->irq_err, ipu);
-	irq_set_chained_handler(ipu->irq_err, ipu_irq_err);
+	irq_set_chained_handler_and_data(ipu->irq_err, ipu_irq_handler, ipu);
 
 	ipu->irq_base = irq_base;
 
@@ -399,16 +359,12 @@ void ipu_irq_detach_irq(struct ipu *ipu, struct platform_device *dev)
 
 	irq_base = ipu->irq_base;
 
-	irq_set_chained_handler(ipu->irq_fn, NULL);
-	irq_set_handler_data(ipu->irq_fn, NULL);
+	irq_set_chained_handler_and_data(ipu->irq_fn, NULL, NULL);
 
-	irq_set_chained_handler(ipu->irq_err, NULL);
-	irq_set_handler_data(ipu->irq_err, NULL);
+	irq_set_chained_handler_and_data(ipu->irq_err, NULL, NULL);
 
 	for (irq = irq_base; irq < irq_base + CONFIG_MX3_IPU_IRQS; irq++) {
-#ifdef CONFIG_ARM
-		set_irq_flags(irq, 0);
-#endif
+		irq_set_status_flags(irq, IRQ_NOREQUEST);
 		irq_set_chip(irq, NULL);
 		irq_set_chip_data(irq, NULL);
 	}
diff --git a/kernel/drivers/dma/k3dma.c b/kernel/drivers/dma/k3dma.c
index 647e362f0..1ba2fd738 100644
--- a/kernel/drivers/dma/k3dma.c
+++ b/kernel/drivers/dma/k3dma.c
@@ -24,7 +24,6 @@
 #include "virt-dma.h"
 
 #define DRIVER_NAME		"k3-dma"
-#define DMA_ALIGN		3
 #define DMA_MAX_SIZE		0x1ffc
 
 #define INT_STAT		0x00
@@ -732,7 +731,7 @@ static int k3_dma_probe(struct platform_device *op)
 	d->slave.device_pause = k3_dma_transfer_pause;
 	d->slave.device_resume = k3_dma_transfer_resume;
 	d->slave.device_terminate_all = k3_dma_terminate_all;
-	d->slave.copy_align = DMA_ALIGN;
+	d->slave.copy_align = DMAENGINE_ALIGN_8_BYTES;
 
 	/* init virtual channel */
 	d->chans = devm_kzalloc(&op->dev,
diff --git a/kernel/drivers/dma/lpc18xx-dmamux.c b/kernel/drivers/dma/lpc18xx-dmamux.c
new file mode 100644
index 000000000..761f32687
--- /dev/null
+++ b/kernel/drivers/dma/lpc18xx-dmamux.c
@@ -0,0 +1,183 @@
+/*
+ * DMA Router driver for LPC18xx/43xx DMA MUX
+ *
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * Based on TI DMA Crossbar driver by:
+ *   Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ *   Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/regmap.h>
+#include <linux/spinlock.h>
+
+/* CREG register offset and macros for mux manipulation */
+#define LPC18XX_CREG_DMAMUX		0x11c
+#define LPC18XX_DMAMUX_VAL(v, n)	((v) << (n * 2))
+#define LPC18XX_DMAMUX_MASK(n)		(0x3 << (n * 2))
+#define LPC18XX_DMAMUX_MAX_VAL		0x3
+
+struct lpc18xx_dmamux {
+	u32 value;
+	bool busy;
+};
+
+struct lpc18xx_dmamux_data {
+	struct dma_router dmarouter;
+	struct lpc18xx_dmamux *muxes;
+	u32 dma_master_requests;
+	u32 dma_mux_requests;
+	struct regmap *reg;
+	spinlock_t lock;
+};
+
+static void lpc18xx_dmamux_free(struct device *dev, void *route_data)
+{
+	struct lpc18xx_dmamux_data *dmamux = dev_get_drvdata(dev);
+	struct lpc18xx_dmamux *mux = route_data;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dmamux->lock, flags);
+	mux->busy = false;
+	spin_unlock_irqrestore(&dmamux->lock, flags);
+}
+
+static void *lpc18xx_dmamux_reserve(struct of_phandle_args *dma_spec,
+				    struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct lpc18xx_dmamux_data *dmamux = platform_get_drvdata(pdev);
+	unsigned long flags;
+	unsigned mux;
+
+	if (dma_spec->args_count != 3) {
+		dev_err(&pdev->dev, "invalid number of dma mux args\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	mux = dma_spec->args[0];
+	if (mux >= dmamux->dma_master_requests) {
+		dev_err(&pdev->dev, "invalid mux number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (dma_spec->args[1] > LPC18XX_DMAMUX_MAX_VAL) {
+		dev_err(&pdev->dev, "invalid dma mux value: %d\n",
+			dma_spec->args[1]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The of_node_put() will be done in the core for the node */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "can't get dma master\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	spin_lock_irqsave(&dmamux->lock, flags);
+	if (dmamux->muxes[mux].busy) {
+		spin_unlock_irqrestore(&dmamux->lock, flags);
+		dev_err(&pdev->dev, "dma request %u busy with %u.%u\n",
+			mux, mux, dmamux->muxes[mux].value);
+		of_node_put(dma_spec->np);
+		return ERR_PTR(-EBUSY);
+	}
+
+	dmamux->muxes[mux].busy = true;
+	dmamux->muxes[mux].value = dma_spec->args[1];
+
+	regmap_update_bits(dmamux->reg, LPC18XX_CREG_DMAMUX,
+			   LPC18XX_DMAMUX_MASK(mux),
+			   LPC18XX_DMAMUX_VAL(dmamux->muxes[mux].value, mux));
+	spin_unlock_irqrestore(&dmamux->lock, flags);
+
+	dma_spec->args[1] = dma_spec->args[2];
+	dma_spec->args_count = 2;
+
+	dev_dbg(&pdev->dev, "mapping dmamux %u.%u to dma request %u\n", mux,
+		dmamux->muxes[mux].value, mux);
+
+	return &dmamux->muxes[mux];
+}
+
+static int lpc18xx_dmamux_probe(struct platform_device *pdev)
+{
+	struct device_node *dma_np, *np = pdev->dev.of_node;
+	struct lpc18xx_dmamux_data *dmamux;
+	int ret;
+
+	dmamux = devm_kzalloc(&pdev->dev, sizeof(*dmamux), GFP_KERNEL);
+	if (!dmamux)
+		return -ENOMEM;
+
+	dmamux->reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
+	if (IS_ERR(dmamux->reg)) {
+		dev_err(&pdev->dev, "syscon lookup failed\n");
+		return PTR_ERR(dmamux->reg);
+	}
+
+	ret = of_property_read_u32(np, "dma-requests",
+				   &dmamux->dma_mux_requests);
+	if (ret) {
+		dev_err(&pdev->dev, "missing dma-requests property\n");
+		return ret;
+	}
+
+	dma_np = of_parse_phandle(np, "dma-masters", 0);
+	if (!dma_np) {
+		dev_err(&pdev->dev, "can't get dma master\n");
+		return -ENODEV;
+	}
+
+	ret = of_property_read_u32(dma_np, "dma-requests",
+				   &dmamux->dma_master_requests);
+	of_node_put(dma_np);
+	if (ret) {
+		dev_err(&pdev->dev, "missing master dma-requests property\n");
+		return ret;
+	}
+
+	dmamux->muxes = devm_kcalloc(&pdev->dev, dmamux->dma_master_requests,
+				     sizeof(struct lpc18xx_dmamux),
+				     GFP_KERNEL);
+	if (!dmamux->muxes)
+		return -ENOMEM;
+
+	spin_lock_init(&dmamux->lock);
+	platform_set_drvdata(pdev, dmamux);
+	dmamux->dmarouter.dev = &pdev->dev;
+	dmamux->dmarouter.route_free = lpc18xx_dmamux_free;
+
+	return of_dma_router_register(np, lpc18xx_dmamux_reserve,
+				      &dmamux->dmarouter);
+}
+
+static const struct of_device_id lpc18xx_dmamux_match[] = {
+	{ .compatible = "nxp,lpc1850-dmamux" },
+	{},
+};
+
+static struct platform_driver lpc18xx_dmamux_driver = {
+	.probe	= lpc18xx_dmamux_probe,
+	.driver = {
+		.name = "lpc18xx-dmamux",
+		.of_match_table = lpc18xx_dmamux_match,
+	},
+};
+
+static int __init lpc18xx_dmamux_init(void)
+{
+	return platform_driver_register(&lpc18xx_dmamux_driver);
+}
+arch_initcall(lpc18xx_dmamux_init);
diff --git a/kernel/drivers/dma/mic_x100_dma.c b/kernel/drivers/dma/mic_x100_dma.c
index 6de2e677b..068e920ec 100644
--- a/kernel/drivers/dma/mic_x100_dma.c
+++ b/kernel/drivers/dma/mic_x100_dma.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/io.h>
 #include <linux/seq_file.h>
+#include <linux/vmalloc.h>
 
 #include "mic_x100_dma.h"
 
@@ -192,8 +193,16 @@ static void mic_dma_prog_intr(struct mic_dma_chan *ch)
 static int mic_dma_do_dma(struct mic_dma_chan *ch, int flags, dma_addr_t src,
 			  dma_addr_t dst, size_t len)
 {
-	if (-ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len))
+	if (len && -ENOMEM == mic_dma_prog_memcpy_desc(ch, src, dst, len)) {
 		return -ENOMEM;
+	} else {
+		/* 3 is the maximum number of status descriptors */
+		int ret = mic_dma_avail_desc_ring_space(ch, 3);
+
+		if (ret < 0)
+			return ret;
+	}
+
 	/* Above mic_dma_prog_memcpy_desc() makes sure we have enough space */
 	if (flags & DMA_PREP_FENCE) {
 		mic_dma_prep_status_desc(&ch->desc_ring[ch->head], 0,
@@ -269,6 +278,33 @@ allocate_tx(struct mic_dma_chan *ch)
 	return tx;
 }
 
+/* Program a status descriptor with dst as address and value to be written */
+static struct dma_async_tx_descriptor *
+mic_dma_prep_status_lock(struct dma_chan *ch, dma_addr_t dst, u64 src_val,
+			 unsigned long flags)
+{
+	struct mic_dma_chan *mic_ch = to_mic_dma_chan(ch);
+	int result;
+
+	spin_lock(&mic_ch->prep_lock);
+	result = mic_dma_avail_desc_ring_space(mic_ch, 4);
+	if (result < 0)
+		goto error;
+	mic_dma_prep_status_desc(&mic_ch->desc_ring[mic_ch->head], src_val, dst,
+				 false);
+	mic_dma_hw_ring_inc_head(mic_ch);
+	result = mic_dma_do_dma(mic_ch, flags, 0, 0, 0);
+	if (result < 0)
+		goto error;
+
+	return allocate_tx(mic_ch);
+error:
+	dev_err(mic_dma_ch_to_device(mic_ch),
+		"Error enqueueing dma status descriptor, error=%d\n", result);
+	spin_unlock(&mic_ch->prep_lock);
+	return NULL;
+}
+
 /*
  * Prepare a memcpy descriptor to be added to the ring.
  * Note that the temporary descriptor adds an extra overhead of copying the
@@ -586,6 +622,8 @@ static int mic_dma_register_dma_device(struct mic_dma_device *mic_dma_dev,
 		mic_dma_free_chan_resources;
 	mic_dma_dev->dma_dev.device_tx_status = mic_dma_tx_status;
 	mic_dma_dev->dma_dev.device_prep_dma_memcpy = mic_dma_prep_memcpy_lock;
+	mic_dma_dev->dma_dev.device_prep_dma_imm_data =
+		mic_dma_prep_status_lock;
 	mic_dma_dev->dma_dev.device_prep_dma_interrupt =
 		mic_dma_prep_interrupt_lock;
 	mic_dma_dev->dma_dev.device_issue_pending = mic_dma_issue_pending;
diff --git a/kernel/drivers/dma/mic_x100_dma.h b/kernel/drivers/dma/mic_x100_dma.h
index f663b0bdd..d89982034 100644
--- a/kernel/drivers/dma/mic_x100_dma.h
+++ b/kernel/drivers/dma/mic_x100_dma.h
@@ -39,7 +39,7 @@
  */
 #define MIC_DMA_MAX_NUM_CHAN	8
 #define MIC_DMA_NUM_CHAN	4
-#define MIC_DMA_ALIGN_SHIFT	6
+#define MIC_DMA_ALIGN_SHIFT	DMAENGINE_ALIGN_64_BYTES
 #define MIC_DMA_ALIGN_BYTES	(1 << MIC_DMA_ALIGN_SHIFT)
 #define MIC_DMA_DESC_RX_SIZE	(128 * 1024 - 4)
 
diff --git a/kernel/drivers/dma/mmp_pdma.c b/kernel/drivers/dma/mmp_pdma.c
index 462a0229a..e39457f13 100644
--- a/kernel/drivers/dma/mmp_pdma.c
+++ b/kernel/drivers/dma/mmp_pdma.c
@@ -72,7 +72,6 @@
 #define DCMD_WIDTH4	(3 << 14)	/* 4 byte width (Word) */
 #define DCMD_LENGTH	0x01fff		/* length mask (max = 8K - 1) */
 
-#define PDMA_ALIGNMENT		3
 #define PDMA_MAX_DESC_BYTES	DCMD_LENGTH
 
 struct mmp_pdma_desc_hw {
@@ -1071,7 +1070,7 @@ static int mmp_pdma_probe(struct platform_device *op)
 	pdev->device.device_issue_pending = mmp_pdma_issue_pending;
 	pdev->device.device_config = mmp_pdma_config;
 	pdev->device.device_terminate_all = mmp_pdma_terminate_all;
-	pdev->device.copy_align = PDMA_ALIGNMENT;
+	pdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES;
 	pdev->device.src_addr_widths = widths;
 	pdev->device.dst_addr_widths = widths;
 	pdev->device.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
diff --git a/kernel/drivers/dma/mmp_tdma.c b/kernel/drivers/dma/mmp_tdma.c
index 449e785de..3df042260 100644
--- a/kernel/drivers/dma/mmp_tdma.c
+++ b/kernel/drivers/dma/mmp_tdma.c
@@ -100,7 +100,6 @@ enum mmp_tdma_type {
 	PXA910_SQU,
 };
 
-#define TDMA_ALIGNMENT		3
 #define TDMA_MAX_XFER_BYTES    SZ_64K
 
 struct mmp_tdma_chan {
@@ -657,7 +656,7 @@ static int mmp_tdma_probe(struct platform_device *pdev)
 	INIT_LIST_HEAD(&tdev->device.channels);
 
 	if (pdev->dev.of_node)
-		pool = of_get_named_gen_pool(pdev->dev.of_node, "asram", 0);
+		pool = of_gen_pool_get(pdev->dev.of_node, "asram", 0);
 	else
 		pool = sram_get_gpool("asram");
 	if (!pool) {
@@ -695,7 +694,7 @@ static int mmp_tdma_probe(struct platform_device *pdev)
 	tdev->device.device_pause = mmp_tdma_pause_chan;
 	tdev->device.device_resume = mmp_tdma_resume_chan;
 	tdev->device.device_terminate_all = mmp_tdma_terminate_all;
-	tdev->device.copy_align = TDMA_ALIGNMENT;
+	tdev->device.copy_align = DMAENGINE_ALIGN_8_BYTES;
 
 	dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
 	platform_set_drvdata(pdev, tdev);
diff --git a/kernel/drivers/dma/moxart-dma.c b/kernel/drivers/dma/moxart-dma.c
index b4634109e..631c4435e 100644
--- a/kernel/drivers/dma/moxart-dma.c
+++ b/kernel/drivers/dma/moxart-dma.c
@@ -652,6 +652,7 @@ static const struct of_device_id moxart_dma_match[] = {
 	{ .compatible = "moxa,moxart-dma" },
 	{ }
 };
+MODULE_DEVICE_TABLE(of, moxart_dma_match);
 
 static struct platform_driver moxart_driver = {
 	.probe	= moxart_probe,
diff --git a/kernel/drivers/dma/mpc512x_dma.c b/kernel/drivers/dma/mpc512x_dma.c
index e6281e7aa..aae76fb39 100644
--- a/kernel/drivers/dma/mpc512x_dma.c
+++ b/kernel/drivers/dma/mpc512x_dma.c
@@ -1073,6 +1073,7 @@ static const struct of_device_id mpc_dma_match[] = {
 	{ .compatible = "fsl,mpc8308-dma", },
 	{},
 };
+MODULE_DEVICE_TABLE(of, mpc_dma_match);
 
 static struct platform_driver mpc_dma_driver = {
 	.probe		= mpc_dma_probe,
diff --git a/kernel/drivers/dma/mv_xor.c b/kernel/drivers/dma/mv_xor.c
index 50f1b422d..1c2de9a83 100644
--- a/kernel/drivers/dma/mv_xor.c
+++ b/kernel/drivers/dma/mv_xor.c
@@ -13,23 +13,29 @@
  */
 
 #include <linux/init.h>
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/memory.h>
 #include <linux/clk.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/irqdomain.h>
+#include <linux/cpumask.h>
 #include <linux/platform_data/dma-mv_xor.h>
 
 #include "dmaengine.h"
 #include "mv_xor.h"
 
+enum mv_xor_mode {
+	XOR_MODE_IN_REG,
+	XOR_MODE_IN_DESC,
+};
+
 static void mv_xor_issue_pending(struct dma_chan *chan);
 
 #define to_mv_xor_chan(chan)		\
@@ -56,18 +62,30 @@ static void mv_desc_init(struct mv_xor_desc_slot *desc,
 	hw_desc->byte_count = byte_count;
 }
 
-static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
-				  u32 next_desc_addr)
+static void mv_desc_set_mode(struct mv_xor_desc_slot *desc)
 {
 	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	BUG_ON(hw_desc->phy_next_desc);
-	hw_desc->phy_next_desc = next_desc_addr;
+
+	switch (desc->type) {
+	case DMA_XOR:
+	case DMA_INTERRUPT:
+		hw_desc->desc_command |= XOR_DESC_OPERATION_XOR;
+		break;
+	case DMA_MEMCPY:
+		hw_desc->desc_command |= XOR_DESC_OPERATION_MEMCPY;
+		break;
+	default:
+		BUG();
+		return;
+	}
 }
 
-static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+				  u32 next_desc_addr)
 {
 	struct mv_xor_desc *hw_desc = desc->hw_desc;
-	hw_desc->phy_next_desc = 0;
+	BUG_ON(hw_desc->phy_next_desc);
+	hw_desc->phy_next_desc = next_desc_addr;
 }
 
 static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
@@ -104,7 +122,7 @@ static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
 	return intr_cause;
 }
 
-static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+static void mv_chan_clear_eoc_cause(struct mv_xor_chan *chan)
 {
 	u32 val;
 
@@ -114,14 +132,14 @@ static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
 	writel_relaxed(val, XOR_INTR_CAUSE(chan));
 }
 
-static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+static void mv_chan_clear_err_status(struct mv_xor_chan *chan)
 {
 	u32 val = 0xFFFF0000 >> (chan->idx * 16);
 	writel_relaxed(val, XOR_INTR_CAUSE(chan));
 }
 
-static void mv_set_mode(struct mv_xor_chan *chan,
-			       enum dma_transaction_type type)
+static void mv_chan_set_mode(struct mv_xor_chan *chan,
+			     enum dma_transaction_type type)
 {
 	u32 op_mode;
 	u32 config = readl_relaxed(XOR_CONFIG(chan));
@@ -154,6 +172,25 @@ static void mv_set_mode(struct mv_xor_chan *chan,
 	chan->current_type = type;
 }
 
+static void mv_chan_set_mode_to_desc(struct mv_xor_chan *chan)
+{
+	u32 op_mode;
+	u32 config = readl_relaxed(XOR_CONFIG(chan));
+
+	op_mode = XOR_OPERATION_MODE_IN_DESC;
+
+	config &= ~0x7;
+	config |= op_mode;
+
+#if defined(__BIG_ENDIAN)
+	config |= XOR_DESCRIPTOR_SWAP;
+#else
+	config &= ~XOR_DESCRIPTOR_SWAP;
+#endif
+
+	writel_relaxed(config, XOR_CONFIG(chan));
+}
+
 static void mv_chan_activate(struct mv_xor_chan *chan)
 {
 	dev_dbg(mv_chan_to_devp(chan), " activate chan.\n");
@@ -171,28 +208,13 @@ static char mv_chan_is_busy(struct mv_xor_chan *chan)
 	return (state == 1) ? 1 : 0;
 }
 
-/**
- * mv_xor_free_slots - flags descriptor slots for reuse
- * @slot: Slot to free
- * Caller must hold &mv_chan->lock while calling this function
- */
-static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
-			      struct mv_xor_desc_slot *slot)
-{
-	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d slot %p\n",
-		__func__, __LINE__, slot);
-
-	slot->slot_used = 0;
-
-}
-
 /*
- * mv_xor_start_new_chain - program the engine to operate on new chain headed by
- * sw_desc
+ * mv_chan_start_new_chain - program the engine to operate on new
+ * chain headed by sw_desc
  * Caller must hold &mv_chan->lock while calling this function
  */
-static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
-				   struct mv_xor_desc_slot *sw_desc)
+static void mv_chan_start_new_chain(struct mv_xor_chan *mv_chan,
+				    struct mv_xor_desc_slot *sw_desc)
 {
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: sw_desc %p\n",
 		__func__, __LINE__, sw_desc);
@@ -205,8 +227,9 @@ static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
 }
 
 static dma_cookie_t
-mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
-	struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+mv_desc_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+				struct mv_xor_chan *mv_chan,
+				dma_cookie_t cookie)
 {
 	BUG_ON(desc->async_tx.cookie < 0);
 
@@ -230,44 +253,41 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
 }
 
 static int
-mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+mv_chan_clean_completed_slots(struct mv_xor_chan *mv_chan)
 {
 	struct mv_xor_desc_slot *iter, *_iter;
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
 	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
-				 completed_node) {
+				 node) {
 
-		if (async_tx_test_ack(&iter->async_tx)) {
-			list_del(&iter->completed_node);
-			mv_xor_free_slots(mv_chan, iter);
-		}
+		if (async_tx_test_ack(&iter->async_tx))
+			list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	return 0;
 }
 
 static int
-mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
-	struct mv_xor_chan *mv_chan)
+mv_desc_clean_slot(struct mv_xor_desc_slot *desc,
+		   struct mv_xor_chan *mv_chan)
 {
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d: desc %p flags %d\n",
 		__func__, __LINE__, desc, desc->async_tx.flags);
-	list_del(&desc->chain_node);
+
 	/* the client is allowed to attach dependent operations
 	 * until 'ack' is set
 	 */
-	if (!async_tx_test_ack(&desc->async_tx)) {
+	if (!async_tx_test_ack(&desc->async_tx))
 		/* move this slot to the completed_slots */
-		list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
-		return 0;
-	}
+		list_move_tail(&desc->node, &mv_chan->completed_slots);
+	else
+		list_move_tail(&desc->node, &mv_chan->free_slots);
 
-	mv_xor_free_slots(mv_chan, desc);
 	return 0;
 }
 
 /* This function must be called with the mv_xor_chan spinlock held */
-static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+static void mv_chan_slot_cleanup(struct mv_xor_chan *mv_chan)
 {
 	struct mv_xor_desc_slot *iter, *_iter;
 	dma_cookie_t cookie = 0;
@@ -278,23 +298,23 @@ static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s %d\n", __func__, __LINE__);
 	dev_dbg(mv_chan_to_devp(mv_chan), "current_desc %x\n", current_desc);
-	mv_xor_clean_completed_slots(mv_chan);
+	mv_chan_clean_completed_slots(mv_chan);
 
 	/* free completed slots from the chain starting with
 	 * the oldest descriptor
 	 */
 
 	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
-					chain_node) {
+				 node) {
 
 		/* clean finished descriptors */
 		hw_desc = iter->hw_desc;
 		if (hw_desc->status & XOR_DESC_SUCCESS) {
-			cookie = mv_xor_run_tx_complete_actions(iter, mv_chan,
-								cookie);
+			cookie = mv_desc_run_tx_complete_actions(iter, mv_chan,
+								 cookie);
 
 			/* done processing desc, clean slot */
-			mv_xor_clean_slot(iter, mv_chan);
+			mv_desc_clean_slot(iter, mv_chan);
 
 			/* break if we did cleaned the current */
 			if (iter->async_tx.phys == current_desc) {
@@ -317,18 +337,18 @@ static void mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
 			 */
 			iter = list_entry(mv_chan->chain.next,
 					  struct mv_xor_desc_slot,
-					  chain_node);
-			mv_xor_start_new_chain(mv_chan, iter);
+					  node);
+			mv_chan_start_new_chain(mv_chan, iter);
 		} else {
-			if (!list_is_last(&iter->chain_node, &mv_chan->chain)) {
+			if (!list_is_last(&iter->node, &mv_chan->chain)) {
 				/*
 				 * descriptors are still waiting after
 				 * current, trigger them
 				 */
-				iter = list_entry(iter->chain_node.next,
+				iter = list_entry(iter->node.next,
 						  struct mv_xor_desc_slot,
-						  chain_node);
-				mv_xor_start_new_chain(mv_chan, iter);
+						  node);
+				mv_chan_start_new_chain(mv_chan, iter);
 			} else {
 				/*
 				 * some descriptors are still waiting
@@ -348,56 +368,35 @@ static void mv_xor_tasklet(unsigned long data)
 	struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
 
 	spin_lock_bh(&chan->lock);
-	mv_xor_slot_cleanup(chan);
+	mv_chan_slot_cleanup(chan);
 	spin_unlock_bh(&chan->lock);
 }
 
 static struct mv_xor_desc_slot *
-mv_xor_alloc_slot(struct mv_xor_chan *mv_chan)
+mv_chan_alloc_slot(struct mv_xor_chan *mv_chan)
 {
-	struct mv_xor_desc_slot *iter, *_iter;
-	int retry = 0;
+	struct mv_xor_desc_slot *iter;
 
-	/* start search from the last allocated descrtiptor
-	 * if a contiguous allocation can not be found start searching
-	 * from the beginning of the list
-	 */
-retry:
-	if (retry == 0)
-		iter = mv_chan->last_used;
-	else
-		iter = list_entry(&mv_chan->all_slots,
-			struct mv_xor_desc_slot,
-			slot_node);
-
-	list_for_each_entry_safe_continue(
-		iter, _iter, &mv_chan->all_slots, slot_node) {
-
-		prefetch(_iter);
-		prefetch(&_iter->async_tx);
-		if (iter->slot_used) {
-			/* give up after finding the first busy slot
-			 * on the second pass through the list
-			 */
-			if (retry)
-				break;
-			continue;
-		}
+	spin_lock_bh(&mv_chan->lock);
+
+	if (!list_empty(&mv_chan->free_slots)) {
+		iter = list_first_entry(&mv_chan->free_slots,
+					struct mv_xor_desc_slot,
+					node);
+
+		list_move_tail(&iter->node, &mv_chan->allocated_slots);
+
+		spin_unlock_bh(&mv_chan->lock);
 
 		/* pre-ack descriptor */
 		async_tx_ack(&iter->async_tx);
-
-		iter->slot_used = 1;
-		INIT_LIST_HEAD(&iter->chain_node);
 		iter->async_tx.cookie = -EBUSY;
-		mv_chan->last_used = iter;
-		mv_desc_clear_next_desc(iter);
 
 		return iter;
 
 	}
-	if (!retry++)
-		goto retry;
+
+	spin_unlock_bh(&mv_chan->lock);
 
 	/* try to free some slots if the allocation fails */
 	tasklet_schedule(&mv_chan->irq_tasklet);
@@ -423,14 +422,14 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
 	cookie = dma_cookie_assign(tx);
 
 	if (list_empty(&mv_chan->chain))
-		list_add_tail(&sw_desc->chain_node, &mv_chan->chain);
+		list_move_tail(&sw_desc->node, &mv_chan->chain);
 	else {
 		new_hw_chain = 0;
 
 		old_chain_tail = list_entry(mv_chan->chain.prev,
 					    struct mv_xor_desc_slot,
-					    chain_node);
-		list_add_tail(&sw_desc->chain_node, &mv_chan->chain);
+					    node);
+		list_move_tail(&sw_desc->node, &mv_chan->chain);
 
 		dev_dbg(mv_chan_to_devp(mv_chan), "Append to last desc %pa\n",
 			&old_chain_tail->async_tx.phys);
@@ -451,7 +450,7 @@ mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
 	}
 
 	if (new_hw_chain)
-		mv_xor_start_new_chain(mv_chan, sw_desc);
+		mv_chan_start_new_chain(mv_chan, sw_desc);
 
 	spin_unlock_bh(&mv_chan->lock);
 
@@ -483,26 +482,20 @@ static int mv_xor_alloc_chan_resources(struct dma_chan *chan)
 
 		dma_async_tx_descriptor_init(&slot->async_tx, chan);
 		slot->async_tx.tx_submit = mv_xor_tx_submit;
-		INIT_LIST_HEAD(&slot->chain_node);
-		INIT_LIST_HEAD(&slot->slot_node);
+		INIT_LIST_HEAD(&slot->node);
 		dma_desc = mv_chan->dma_desc_pool;
 		slot->async_tx.phys = dma_desc + idx * MV_XOR_SLOT_SIZE;
 		slot->idx = idx++;
 
 		spin_lock_bh(&mv_chan->lock);
 		mv_chan->slots_allocated = idx;
-		list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+		list_add_tail(&slot->node, &mv_chan->free_slots);
 		spin_unlock_bh(&mv_chan->lock);
 	}
 
-	if (mv_chan->slots_allocated && !mv_chan->last_used)
-		mv_chan->last_used = list_entry(mv_chan->all_slots.next,
-					struct mv_xor_desc_slot,
-					slot_node);
-
 	dev_dbg(mv_chan_to_devp(mv_chan),
-		"allocated %d descriptor slots last_used: %p\n",
-		mv_chan->slots_allocated, mv_chan->last_used);
+		"allocated %d descriptor slots\n",
+		mv_chan->slots_allocated);
 
 	return mv_chan->slots_allocated ? : -ENOMEM;
 }
@@ -523,16 +516,17 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
 		"%s src_cnt: %d len: %u dest %pad flags: %ld\n",
 		__func__, src_cnt, len, &dest, flags);
 
-	spin_lock_bh(&mv_chan->lock);
-	sw_desc = mv_xor_alloc_slot(mv_chan);
+	sw_desc = mv_chan_alloc_slot(mv_chan);
 	if (sw_desc) {
 		sw_desc->type = DMA_XOR;
 		sw_desc->async_tx.flags = flags;
 		mv_desc_init(sw_desc, dest, len, flags);
+		if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+			mv_desc_set_mode(sw_desc);
 		while (src_cnt--)
 			mv_desc_set_src_addr(sw_desc, src_cnt, src[src_cnt]);
 	}
-	spin_unlock_bh(&mv_chan->lock);
+
 	dev_dbg(mv_chan_to_devp(mv_chan),
 		"%s sw_desc %p async_tx %p \n",
 		__func__, sw_desc, &sw_desc->async_tx);
@@ -576,25 +570,29 @@ static void mv_xor_free_chan_resources(struct dma_chan *chan)
 
 	spin_lock_bh(&mv_chan->lock);
 
-	mv_xor_slot_cleanup(mv_chan);
+	mv_chan_slot_cleanup(mv_chan);
 
 	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
-					chain_node) {
+					node) {
 		in_use_descs++;
-		list_del(&iter->chain_node);
+		list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
-				 completed_node) {
+				 node) {
+		in_use_descs++;
+		list_move_tail(&iter->node, &mv_chan->free_slots);
+	}
+	list_for_each_entry_safe(iter, _iter, &mv_chan->allocated_slots,
+				 node) {
 		in_use_descs++;
-		list_del(&iter->completed_node);
+		list_move_tail(&iter->node, &mv_chan->free_slots);
 	}
 	list_for_each_entry_safe_reverse(
-		iter, _iter, &mv_chan->all_slots, slot_node) {
-		list_del(&iter->slot_node);
+		iter, _iter, &mv_chan->free_slots, node) {
+		list_del(&iter->node);
 		kfree(iter);
 		mv_chan->slots_allocated--;
 	}
-	mv_chan->last_used = NULL;
 
 	dev_dbg(mv_chan_to_devp(mv_chan), "%s slots_allocated %d\n",
 		__func__, mv_chan->slots_allocated);
@@ -623,13 +621,13 @@ static enum dma_status mv_xor_status(struct dma_chan *chan,
 		return ret;
 
 	spin_lock_bh(&mv_chan->lock);
-	mv_xor_slot_cleanup(mv_chan);
+	mv_chan_slot_cleanup(mv_chan);
 	spin_unlock_bh(&mv_chan->lock);
 
 	return dma_cookie_status(chan, cookie, txstate);
 }
 
-static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+static void mv_chan_dump_regs(struct mv_xor_chan *chan)
 {
 	u32 val;
 
@@ -652,8 +650,8 @@ static void mv_dump_xor_regs(struct mv_xor_chan *chan)
 	dev_err(mv_chan_to_devp(chan), "error addr   0x%08x\n", val);
 }
 
-static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
-					 u32 intr_cause)
+static void mv_chan_err_interrupt_handler(struct mv_xor_chan *chan,
+					  u32 intr_cause)
 {
 	if (intr_cause & XOR_INT_ERR_DECODE) {
 		dev_dbg(mv_chan_to_devp(chan), "ignoring address decode error\n");
@@ -663,7 +661,7 @@ static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
 	dev_err(mv_chan_to_devp(chan), "error on chan %d. intr cause 0x%08x\n",
 		chan->idx, intr_cause);
 
-	mv_dump_xor_regs(chan);
+	mv_chan_dump_regs(chan);
 	WARN_ON(1);
 }
 
@@ -675,11 +673,11 @@ static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
 	dev_dbg(mv_chan_to_devp(chan), "intr cause %x\n", intr_cause);
 
 	if (intr_cause & XOR_INTR_ERRORS)
-		mv_xor_err_interrupt_handler(chan, intr_cause);
+		mv_chan_err_interrupt_handler(chan, intr_cause);
 
 	tasklet_schedule(&chan->irq_tasklet);
 
-	mv_xor_device_clear_eoc_cause(chan);
+	mv_chan_clear_eoc_cause(chan);
 
 	return IRQ_HANDLED;
 }
@@ -698,7 +696,7 @@ static void mv_xor_issue_pending(struct dma_chan *chan)
  * Perform a transaction to verify the HW works.
  */
 
-static int mv_xor_memcpy_self_test(struct mv_xor_chan *mv_chan)
+static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 {
 	int i, ret;
 	void *src, *dest;
@@ -807,7 +805,7 @@ out:
 
 #define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
 static int
-mv_xor_xor_self_test(struct mv_xor_chan *mv_chan)
+mv_chan_xor_self_test(struct mv_xor_chan *mv_chan)
 {
 	int i, src_idx, ret;
 	struct page *dest;
@@ -971,7 +969,7 @@ static int mv_xor_channel_remove(struct mv_xor_chan *mv_chan)
 static struct mv_xor_chan *
 mv_xor_channel_add(struct mv_xor_device *xordev,
 		   struct platform_device *pdev,
-		   int idx, dma_cap_mask_t cap_mask, int irq)
+		   int idx, dma_cap_mask_t cap_mask, int irq, int op_in_desc)
 {
 	int ret = 0;
 	struct mv_xor_chan *mv_chan;
@@ -983,6 +981,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 
 	mv_chan->idx = idx;
 	mv_chan->irq = irq;
+	mv_chan->op_in_desc = op_in_desc;
 
 	dma_dev = &mv_chan->dmadev;
 
@@ -1034,7 +1033,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 		     mv_chan);
 
 	/* clear errors before enabling interrupts */
-	mv_xor_device_clear_err_status(mv_chan);
+	mv_chan_clear_err_status(mv_chan);
 
 	ret = request_irq(mv_chan->irq, mv_xor_interrupt_handler,
 			  0, dev_name(&pdev->dev), mv_chan);
@@ -1043,32 +1042,37 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 
 	mv_chan_unmask_interrupts(mv_chan);
 
-	mv_set_mode(mv_chan, DMA_XOR);
+	if (mv_chan->op_in_desc == XOR_MODE_IN_DESC)
+		mv_chan_set_mode_to_desc(mv_chan);
+	else
+		mv_chan_set_mode(mv_chan, DMA_XOR);
 
 	spin_lock_init(&mv_chan->lock);
 	INIT_LIST_HEAD(&mv_chan->chain);
 	INIT_LIST_HEAD(&mv_chan->completed_slots);
-	INIT_LIST_HEAD(&mv_chan->all_slots);
+	INIT_LIST_HEAD(&mv_chan->free_slots);
+	INIT_LIST_HEAD(&mv_chan->allocated_slots);
 	mv_chan->dmachan.device = dma_dev;
 	dma_cookie_init(&mv_chan->dmachan);
 
 	list_add_tail(&mv_chan->dmachan.device_node, &dma_dev->channels);
 
 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
-		ret = mv_xor_memcpy_self_test(mv_chan);
+		ret = mv_chan_memcpy_self_test(mv_chan);
 		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
 		if (ret)
 			goto err_free_irq;
 	}
 
 	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
-		ret = mv_xor_xor_self_test(mv_chan);
+		ret = mv_chan_xor_self_test(mv_chan);
 		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
 		if (ret)
 			goto err_free_irq;
 	}
 
-	dev_info(&pdev->dev, "Marvell XOR: ( %s%s%s)\n",
+	dev_info(&pdev->dev, "Marvell XOR (%s): ( %s%s%s)\n",
+		 mv_chan->op_in_desc ? "Descriptor Mode" : "Registers Mode",
 		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
 		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
 		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
@@ -1117,13 +1121,23 @@ mv_xor_conf_mbus_windows(struct mv_xor_device *xordev,
 	writel(0, base + WINDOW_OVERRIDE_CTRL(1));
 }
 
+static const struct of_device_id mv_xor_dt_ids[] = {
+	{ .compatible = "marvell,orion-xor", .data = (void *)XOR_MODE_IN_REG },
+	{ .compatible = "marvell,armada-380-xor", .data = (void *)XOR_MODE_IN_DESC },
+	{},
+};
+
+static unsigned int mv_xor_engine_count;
+
 static int mv_xor_probe(struct platform_device *pdev)
 {
 	const struct mbus_dram_target_info *dram;
 	struct mv_xor_device *xordev;
 	struct mv_xor_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct resource *res;
+	unsigned int max_engines, max_channels;
 	int i, ret;
+	int op_in_desc;
 
 	dev_notice(&pdev->dev, "Marvell shared XOR driver\n");
 
@@ -1165,22 +1179,41 @@ static int mv_xor_probe(struct platform_device *pdev)
 	if (!IS_ERR(xordev->clk))
 		clk_prepare_enable(xordev->clk);
 
+	/*
+	 * We don't want to have more than one channel per CPU in
+	 * order for async_tx to perform well. So we limit the number
+	 * of engines and channels so that we take into account this
+	 * constraint. Note that we also want to use channels from
+	 * separate engines when possible.
+	 */
+	max_engines = num_present_cpus();
+	max_channels = min_t(unsigned int,
+			     MV_XOR_MAX_CHANNELS,
+			     DIV_ROUND_UP(num_present_cpus(), 2));
+
+	if (mv_xor_engine_count >= max_engines)
+		return 0;
+
 	if (pdev->dev.of_node) {
 		struct device_node *np;
 		int i = 0;
+		const struct of_device_id *of_id =
+			of_match_device(mv_xor_dt_ids,
+					&pdev->dev);
 
 		for_each_child_of_node(pdev->dev.of_node, np) {
 			struct mv_xor_chan *chan;
 			dma_cap_mask_t cap_mask;
 			int irq;
+			op_in_desc = (int)of_id->data;
+
+			if (i >= max_channels)
+				continue;
 
 			dma_cap_zero(cap_mask);
-			if (of_property_read_bool(np, "dmacap,memcpy"))
-				dma_cap_set(DMA_MEMCPY, cap_mask);
-			if (of_property_read_bool(np, "dmacap,xor"))
-				dma_cap_set(DMA_XOR, cap_mask);
-			if (of_property_read_bool(np, "dmacap,interrupt"))
-				dma_cap_set(DMA_INTERRUPT, cap_mask);
+			dma_cap_set(DMA_MEMCPY, cap_mask);
+			dma_cap_set(DMA_XOR, cap_mask);
+			dma_cap_set(DMA_INTERRUPT, cap_mask);
 
 			irq = irq_of_parse_and_map(np, 0);
 			if (!irq) {
@@ -1189,7 +1222,7 @@ static int mv_xor_probe(struct platform_device *pdev)
 			}
 
 			chan = mv_xor_channel_add(xordev, pdev, i,
-						  cap_mask, irq);
+						  cap_mask, irq, op_in_desc);
 			if (IS_ERR(chan)) {
 				ret = PTR_ERR(chan);
 				irq_dispose_mapping(irq);
@@ -1200,7 +1233,7 @@ static int mv_xor_probe(struct platform_device *pdev)
 			i++;
 		}
 	} else if (pdata && pdata->channels) {
-		for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
+		for (i = 0; i < max_channels; i++) {
 			struct mv_xor_channel_data *cd;
 			struct mv_xor_chan *chan;
 			int irq;
@@ -1218,7 +1251,8 @@ static int mv_xor_probe(struct platform_device *pdev)
 			}
 
 			chan = mv_xor_channel_add(xordev, pdev, i,
-						  cd->cap_mask, irq);
+						  cd->cap_mask, irq,
+						  XOR_MODE_IN_REG);
 			if (IS_ERR(chan)) {
 				ret = PTR_ERR(chan);
 				goto err_channel_add;
@@ -1246,35 +1280,8 @@ err_channel_add:
 	return ret;
 }
 
-static int mv_xor_remove(struct platform_device *pdev)
-{
-	struct mv_xor_device *xordev = platform_get_drvdata(pdev);
-	int i;
-
-	for (i = 0; i < MV_XOR_MAX_CHANNELS; i++) {
-		if (xordev->channels[i])
-			mv_xor_channel_remove(xordev->channels[i]);
-	}
-
-	if (!IS_ERR(xordev->clk)) {
-		clk_disable_unprepare(xordev->clk);
-		clk_put(xordev->clk);
-	}
-
-	return 0;
-}
-
-#ifdef CONFIG_OF
-static const struct of_device_id mv_xor_dt_ids[] = {
-       { .compatible = "marvell,orion-xor", },
-       {},
-};
-MODULE_DEVICE_TABLE(of, mv_xor_dt_ids);
-#endif
-
 static struct platform_driver mv_xor_driver = {
 	.probe		= mv_xor_probe,
-	.remove		= mv_xor_remove,
 	.driver		= {
 		.name	        = MV_XOR_NAME,
 		.of_match_table = of_match_ptr(mv_xor_dt_ids),
@@ -1286,19 +1293,10 @@ static int __init mv_xor_init(void)
 {
 	return platform_driver_register(&mv_xor_driver);
 }
-module_init(mv_xor_init);
-
-/* it's currently unsafe to unload this module */
-#if 0
-static void __exit mv_xor_exit(void)
-{
-	platform_driver_unregister(&mv_xor_driver);
-	return;
-}
-
-module_exit(mv_xor_exit);
-#endif
+device_initcall(mv_xor_init);
 
+/*
 MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
 MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
 MODULE_LICENSE("GPL");
+*/
diff --git a/kernel/drivers/dma/mv_xor.h b/kernel/drivers/dma/mv_xor.h
index 0e302b3a3..b7455b421 100644
--- a/kernel/drivers/dma/mv_xor.h
+++ b/kernel/drivers/dma/mv_xor.h
@@ -19,7 +19,7 @@
 #include <linux/dmaengine.h>
 #include <linux/interrupt.h>
 
-#define MV_XOR_POOL_SIZE		PAGE_SIZE
+#define MV_XOR_POOL_SIZE		(MV_XOR_SLOT_SIZE * 3072)
 #define MV_XOR_SLOT_SIZE		64
 #define MV_XOR_THRESHOLD		1
 #define MV_XOR_MAX_CHANNELS             2
@@ -30,9 +30,14 @@
 /* Values for the XOR_CONFIG register */
 #define XOR_OPERATION_MODE_XOR		0
 #define XOR_OPERATION_MODE_MEMCPY	2
+#define XOR_OPERATION_MODE_IN_DESC      7
 #define XOR_DESCRIPTOR_SWAP		BIT(14)
 #define XOR_DESC_SUCCESS		0x40000000
 
+#define XOR_DESC_OPERATION_XOR          (0 << 24)
+#define XOR_DESC_OPERATION_CRC32C       (1 << 24)
+#define XOR_DESC_OPERATION_MEMCPY       (2 << 24)
+
 #define XOR_DESC_DMA_OWNED		BIT(31)
 #define XOR_DESC_EOD_INT_EN		BIT(31)
 
@@ -89,13 +94,14 @@ struct mv_xor_device {
  * @mmr_base: memory mapped register base
  * @idx: the index of the xor channel
  * @chain: device chain view of the descriptors
+ * @free_slots: free slots usable by the channel
+ * @allocated_slots: slots allocated by the driver
  * @completed_slots: slots completed by HW but still need to be acked
  * @device: parent device
  * @common: common dmaengine channel object members
- * @last_used: place holder for allocation to continue from where it left off
- * @all_slots: complete domain of slots usable by the channel
  * @slots_allocated: records the actual size of the descriptor slot pool
  * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ * @op_in_desc: new mode of driver, each op is writen to descriptor.
  */
 struct mv_xor_chan {
 	int			pending;
@@ -106,16 +112,17 @@ struct mv_xor_chan {
 	int                     irq;
 	enum dma_transaction_type	current_type;
 	struct list_head	chain;
+	struct list_head	free_slots;
+	struct list_head	allocated_slots;
 	struct list_head	completed_slots;
 	dma_addr_t		dma_desc_pool;
 	void			*dma_desc_pool_virt;
 	size_t                  pool_size;
 	struct dma_device	dmadev;
 	struct dma_chan		dmachan;
-	struct mv_xor_desc_slot	*last_used;
-	struct list_head	all_slots;
 	int			slots_allocated;
 	struct tasklet_struct	irq_tasklet;
+	int                     op_in_desc;
 	char			dummy_src[MV_XOR_MIN_BYTE_COUNT];
 	char			dummy_dst[MV_XOR_MIN_BYTE_COUNT];
 	dma_addr_t		dummy_src_addr, dummy_dst_addr;
@@ -123,9 +130,7 @@ struct mv_xor_chan {
 
 /**
  * struct mv_xor_desc_slot - software descriptor
- * @slot_node: node on the mv_xor_chan.all_slots list
- * @chain_node: node on the mv_xor_chan.chain list
- * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @node: node on the mv_xor_chan lists
  * @hw_desc: virtual address of the hardware descriptor chain
  * @phys: hardware address of the hardware descriptor chain
  * @slot_used: slot in use or not
@@ -134,12 +139,9 @@ struct mv_xor_chan {
  * @async_tx: support for the async_tx api
  */
 struct mv_xor_desc_slot {
-	struct list_head	slot_node;
-	struct list_head	chain_node;
-	struct list_head	completed_node;
+	struct list_head	node;
 	enum dma_transaction_type	type;
 	void			*hw_desc;
-	u16			slot_used;
 	u16			idx;
 	struct dma_async_tx_descriptor	async_tx;
 };
diff --git a/kernel/drivers/dma/mxs-dma.c b/kernel/drivers/dma/mxs-dma.c
index 829ec686d..60de35251 100644
--- a/kernel/drivers/dma/mxs-dma.c
+++ b/kernel/drivers/dma/mxs-dma.c
@@ -170,7 +170,7 @@ static struct mxs_dma_type mxs_dma_types[] = {
 	}
 };
 
-static struct platform_device_id mxs_dma_ids[] = {
+static const struct platform_device_id mxs_dma_ids[] = {
 	{
 		.name = "imx23-dma-apbh",
 		.driver_data = (kernel_ulong_t) &mxs_dma_types[0],
diff --git a/kernel/drivers/dma/nbpfaxi.c b/kernel/drivers/dma/nbpfaxi.c
index 88b77c983..2b5a198ac 100644
--- a/kernel/drivers/dma/nbpfaxi.c
+++ b/kernel/drivers/dma/nbpfaxi.c
@@ -1455,7 +1455,7 @@ static int nbpf_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_device_id nbpf_ids[] = {
+static const struct platform_device_id nbpf_ids[] = {
 	{"nbpfaxi64dmac1b4",	(kernel_ulong_t)&nbpf_cfg[NBPF1B4]},
 	{"nbpfaxi64dmac1b8",	(kernel_ulong_t)&nbpf_cfg[NBPF1B8]},
 	{"nbpfaxi64dmac1b16",	(kernel_ulong_t)&nbpf_cfg[NBPF1B16]},
diff --git a/kernel/drivers/dma/of-dma.c b/kernel/drivers/dma/of-dma.c
index cbd4a8aff..1e1f2986e 100644
--- a/kernel/drivers/dma/of-dma.c
+++ b/kernel/drivers/dma/of-dma.c
@@ -45,6 +45,50 @@ static struct of_dma *of_dma_find_controller(struct of_phandle_args *dma_spec)
 }
 
 /**
+ * of_dma_router_xlate - translation function for router devices
+ * @dma_spec:	pointer to DMA specifier as found in the device tree
+ * @of_dma:	pointer to DMA controller data (router information)
+ *
+ * The function creates new dma_spec to be passed to the router driver's
+ * of_dma_route_allocate() function to prepare a dma_spec which will be used
+ * to request channel from the real DMA controller.
+ */
+static struct dma_chan *of_dma_router_xlate(struct of_phandle_args *dma_spec,
+					    struct of_dma *ofdma)
+{
+	struct dma_chan		*chan;
+	struct of_dma		*ofdma_target;
+	struct of_phandle_args	dma_spec_target;
+	void			*route_data;
+
+	/* translate the request for the real DMA controller */
+	memcpy(&dma_spec_target, dma_spec, sizeof(dma_spec_target));
+	route_data = ofdma->of_dma_route_allocate(&dma_spec_target, ofdma);
+	if (IS_ERR(route_data))
+		return NULL;
+
+	ofdma_target = of_dma_find_controller(&dma_spec_target);
+	if (!ofdma_target)
+		return NULL;
+
+	chan = ofdma_target->of_dma_xlate(&dma_spec_target, ofdma_target);
+	if (chan) {
+		chan->router = ofdma->dma_router;
+		chan->route_data = route_data;
+	} else {
+		ofdma->dma_router->route_free(ofdma->dma_router->dev,
+					      route_data);
+	}
+
+	/*
+	 * Need to put the node back since the ofdma->of_dma_route_allocate
+	 * has taken it for generating the new, translated dma_spec
+	 */
+	of_node_put(dma_spec_target.np);
+	return chan;
+}
+
+/**
  * of_dma_controller_register - Register a DMA controller to DT DMA helpers
  * @np:			device node of DMA controller
  * @of_dma_xlate:	translation function which converts a phandle
@@ -110,6 +154,51 @@ void of_dma_controller_free(struct device_node *np)
 EXPORT_SYMBOL_GPL(of_dma_controller_free);
 
 /**
+ * of_dma_router_register - Register a DMA router to DT DMA helpers as a
+ *			    controller
+ * @np:				device node of DMA router
+ * @of_dma_route_allocate:	setup function for the router which need to
+ *				modify the dma_spec for the DMA controller to
+ *				use and to set up the requested route.
+ * @dma_router:			pointer to dma_router structure to be used when
+ *				the route need to be free up.
+ *
+ * Returns 0 on success or appropriate errno value on error.
+ *
+ * Allocated memory should be freed with appropriate of_dma_controller_free()
+ * call.
+ */
+int of_dma_router_register(struct device_node *np,
+			   void *(*of_dma_route_allocate)
+			   (struct of_phandle_args *, struct of_dma *),
+			   struct dma_router *dma_router)
+{
+	struct of_dma	*ofdma;
+
+	if (!np || !of_dma_route_allocate || !dma_router) {
+		pr_err("%s: not enough information provided\n", __func__);
+		return -EINVAL;
+	}
+
+	ofdma = kzalloc(sizeof(*ofdma), GFP_KERNEL);
+	if (!ofdma)
+		return -ENOMEM;
+
+	ofdma->of_node = np;
+	ofdma->of_dma_xlate = of_dma_router_xlate;
+	ofdma->of_dma_route_allocate = of_dma_route_allocate;
+	ofdma->dma_router = dma_router;
+
+	/* Now queue of_dma controller structure in list */
+	mutex_lock(&of_dma_lock);
+	list_add_tail(&ofdma->of_dma_controllers, &of_dma_list);
+	mutex_unlock(&of_dma_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_dma_router_register);
+
+/**
  * of_dma_match_channel - Check if a DMA specifier matches name
  * @np:		device node to look for DMA channels
  * @name:	channel name to be matched
diff --git a/kernel/drivers/dma/omap-dma.c b/kernel/drivers/dma/omap-dma.c
index 167dbaf65..1dfc71c90 100644
--- a/kernel/drivers/dma/omap-dma.c
+++ b/kernel/drivers/dma/omap-dma.c
@@ -22,6 +22,9 @@
 
 #include "virt-dma.h"
 
+#define OMAP_SDMA_REQUESTS	127
+#define OMAP_SDMA_CHANNELS	32
+
 struct omap_dmadev {
 	struct dma_device ddev;
 	spinlock_t lock;
@@ -31,9 +34,10 @@ struct omap_dmadev {
 	const struct omap_dma_reg *reg_map;
 	struct omap_system_dma_plat_info *plat;
 	bool legacy;
+	unsigned dma_requests;
 	spinlock_t irq_lock;
 	uint32_t irq_enable_mask;
-	struct omap_chan *lch_map[32];
+	struct omap_chan *lch_map[OMAP_SDMA_CHANNELS];
 };
 
 struct omap_chan {
@@ -362,7 +366,7 @@ static void omap_dma_start_sg(struct omap_chan *c, struct omap_desc *d,
 	struct omap_sg *sg = d->sg + idx;
 	unsigned cxsa, cxei, cxfi;
 
-	if (d->dir == DMA_DEV_TO_MEM) {
+	if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
 		cxsa = CDSA;
 		cxei = CDEI;
 		cxfi = CDFI;
@@ -408,7 +412,7 @@ static void omap_dma_start_desc(struct omap_chan *c)
 	if (dma_omap1())
 		omap_dma_chan_write(c, CCR2, d->ccr >> 16);
 
-	if (d->dir == DMA_DEV_TO_MEM) {
+	if (d->dir == DMA_DEV_TO_MEM || d->dir == DMA_MEM_TO_MEM) {
 		cxsa = CSSA;
 		cxei = CSEI;
 		cxfi = CSFI;
@@ -589,6 +593,7 @@ static void omap_dma_free_chan_resources(struct dma_chan *chan)
 	omap_free_dma(c->dma_ch);
 
 	dev_dbg(od->ddev.dev, "freeing channel for %u\n", c->dma_sig);
+	c->dma_sig = 0;
 }
 
 static size_t omap_dma_sg_size(struct omap_sg *sg)
@@ -930,8 +935,12 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic(
 		else
 			d->ccr |= CCR_SYNC_ELEMENT;
 
-		if (dir == DMA_DEV_TO_MEM)
+		if (dir == DMA_DEV_TO_MEM) {
 			d->ccr |= CCR_TRIGGER_SRC;
+			d->csdp |= CSDP_DST_PACKED;
+		} else {
+			d->csdp |= CSDP_SRC_PACKED;
+		}
 
 		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
 
@@ -948,6 +957,51 @@ static struct dma_async_tx_descriptor *omap_dma_prep_dma_cyclic(
 	return vchan_tx_prep(&c->vc, &d->vd, flags);
 }
 
+static struct dma_async_tx_descriptor *omap_dma_prep_dma_memcpy(
+	struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+	size_t len, unsigned long tx_flags)
+{
+	struct omap_chan *c = to_omap_dma_chan(chan);
+	struct omap_desc *d;
+	uint8_t data_type;
+
+	d = kzalloc(sizeof(*d) + sizeof(d->sg[0]), GFP_ATOMIC);
+	if (!d)
+		return NULL;
+
+	data_type = __ffs((src | dest | len));
+	if (data_type > CSDP_DATA_TYPE_32)
+		data_type = CSDP_DATA_TYPE_32;
+
+	d->dir = DMA_MEM_TO_MEM;
+	d->dev_addr = src;
+	d->fi = 0;
+	d->es = data_type;
+	d->sg[0].en = len / BIT(data_type);
+	d->sg[0].fn = 1;
+	d->sg[0].addr = dest;
+	d->sglen = 1;
+	d->ccr = c->ccr;
+	d->ccr |= CCR_DST_AMODE_POSTINC | CCR_SRC_AMODE_POSTINC;
+
+	d->cicr = CICR_DROP_IE;
+	if (tx_flags & DMA_PREP_INTERRUPT)
+		d->cicr |= CICR_FRAME_IE;
+
+	d->csdp = data_type;
+
+	if (dma_omap1()) {
+		d->cicr |= CICR_TOUT_IE;
+		d->csdp |= CSDP_DST_PORT_EMIFF | CSDP_SRC_PORT_EMIFF;
+	} else {
+		d->csdp |= CSDP_DST_PACKED | CSDP_SRC_PACKED;
+		d->cicr |= CICR_MISALIGNED_ERR_IE | CICR_TRANS_ERR_IE;
+		d->csdp |= CSDP_DST_BURST_64 | CSDP_SRC_BURST_64;
+	}
+
+	return vchan_tx_prep(&c->vc, &d->vd, tx_flags);
+}
+
 static int omap_dma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
 {
 	struct omap_chan *c = to_omap_dma_chan(chan);
@@ -1037,7 +1091,7 @@ static int omap_dma_resume(struct dma_chan *chan)
 	return 0;
 }
 
-static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig)
+static int omap_dma_chan_init(struct omap_dmadev *od)
 {
 	struct omap_chan *c;
 
@@ -1046,7 +1100,6 @@ static int omap_dma_chan_init(struct omap_dmadev *od, int dma_sig)
 		return -ENOMEM;
 
 	c->reg_map = od->reg_map;
-	c->dma_sig = dma_sig;
 	c->vc.desc_free = omap_dma_desc_free;
 	vchan_init(&c->vc, &od->ddev);
 	INIT_LIST_HEAD(&c->node);
@@ -1094,12 +1147,14 @@ static int omap_dma_probe(struct platform_device *pdev)
 
 	dma_cap_set(DMA_SLAVE, od->ddev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, od->ddev.cap_mask);
+	dma_cap_set(DMA_MEMCPY, od->ddev.cap_mask);
 	od->ddev.device_alloc_chan_resources = omap_dma_alloc_chan_resources;
 	od->ddev.device_free_chan_resources = omap_dma_free_chan_resources;
 	od->ddev.device_tx_status = omap_dma_tx_status;
 	od->ddev.device_issue_pending = omap_dma_issue_pending;
 	od->ddev.device_prep_slave_sg = omap_dma_prep_slave_sg;
 	od->ddev.device_prep_dma_cyclic = omap_dma_prep_dma_cyclic;
+	od->ddev.device_prep_dma_memcpy = omap_dma_prep_dma_memcpy;
 	od->ddev.device_config = omap_dma_slave_config;
 	od->ddev.device_pause = omap_dma_pause;
 	od->ddev.device_resume = omap_dma_resume;
@@ -1116,8 +1171,17 @@ static int omap_dma_probe(struct platform_device *pdev)
 
 	tasklet_init(&od->task, omap_dma_sched, (unsigned long)od);
 
-	for (i = 0; i < 127; i++) {
-		rc = omap_dma_chan_init(od, i);
+	od->dma_requests = OMAP_SDMA_REQUESTS;
+	if (pdev->dev.of_node && of_property_read_u32(pdev->dev.of_node,
+						      "dma-requests",
+						      &od->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing dma-requests property, using %u.\n",
+			 OMAP_SDMA_REQUESTS);
+	}
+
+	for (i = 0; i < OMAP_SDMA_CHANNELS; i++) {
+		rc = omap_dma_chan_init(od);
 		if (rc) {
 			omap_dma_free(od);
 			return rc;
@@ -1208,10 +1272,14 @@ static struct platform_driver omap_dma_driver = {
 bool omap_dma_filter_fn(struct dma_chan *chan, void *param)
 {
 	if (chan->device->dev->driver == &omap_dma_driver.driver) {
+		struct omap_dmadev *od = to_omap_dma_dev(chan->device);
 		struct omap_chan *c = to_omap_dma_chan(chan);
 		unsigned req = *(unsigned *)param;
 
-		return req == c->dma_sig;
+		if (req <= od->dma_requests) {
+			c->dma_sig = req;
+			return true;
+		}
 	}
 	return false;
 }
diff --git a/kernel/drivers/dma/pch_dma.c b/kernel/drivers/dma/pch_dma.c
index b859792dd..113605f6f 100644
--- a/kernel/drivers/dma/pch_dma.c
+++ b/kernel/drivers/dma/pch_dma.c
@@ -11,10 +11,6 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/dmaengine.h>
diff --git a/kernel/drivers/dma/pl330.c b/kernel/drivers/dma/pl330.c
index 3dabc52b9..17ee758b4 100644
--- a/kernel/drivers/dma/pl330.c
+++ b/kernel/drivers/dma/pl330.c
@@ -1198,6 +1198,9 @@ static inline int _loop(unsigned dry_run, u8 buf[],
 	unsigned lcnt0, lcnt1, ljmp0, ljmp1;
 	struct _arg_LPEND lpend;
 
+	if (*bursts == 1)
+		return _bursts(dry_run, buf, pxs, 1);
+
 	/* Max iterations possible in DMALP is 256 */
 	if (*bursts >= 256*256) {
 		lcnt1 = 256;
@@ -1424,8 +1427,8 @@ static int pl330_submit_req(struct pl330_thread *thrd,
 		goto xfer_exit;
 
 	if (ret > pl330->mcbufsz / 2) {
-		dev_info(pl330->ddma.dev, "%s:%d Trying increasing mcbufsz\n",
-				__func__, __LINE__);
+		dev_info(pl330->ddma.dev, "%s:%d Try increasing mcbufsz (%i/%i)\n",
+				__func__, __LINE__, ret, pl330->mcbufsz / 2);
 		ret = -ENOMEM;
 		goto xfer_exit;
 	}
@@ -2584,12 +2587,14 @@ pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dst,
 {
 	struct dma_pl330_desc *desc;
 	struct dma_pl330_chan *pch = to_pchan(chan);
-	struct pl330_dmac *pl330 = pch->dmac;
+	struct pl330_dmac *pl330;
 	int burst;
 
 	if (unlikely(!pch || !len))
 		return NULL;
 
+	pl330 = pch->dmac;
+
 	desc = __pl330_prep_dma_memcpy(pch, dst, src, len);
 	if (!desc)
 		return NULL;
diff --git a/kernel/drivers/dma/pxa_dma.c b/kernel/drivers/dma/pxa_dma.c
new file mode 100644
index 000000000..a59061e42
--- /dev/null
+++ b/kernel/drivers/dma/pxa_dma.c
@@ -0,0 +1,1491 @@
+/*
+ * Copyright 2015 Robert Jarzmik <robert.jarzmik@free.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/device.h>
+#include <linux/platform_data/mmp_dma.h>
+#include <linux/dmapool.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+#include <linux/of.h>
+#include <linux/dma/pxa-dma.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define DCSR(n)		(0x0000 + ((n) << 2))
+#define DALGN(n)	0x00a0
+#define DINT		0x00f0
+#define DDADR(n)	(0x0200 + ((n) << 4))
+#define DSADR(n)	(0x0204 + ((n) << 4))
+#define DTADR(n)	(0x0208 + ((n) << 4))
+#define DCMD(n)		(0x020c + ((n) << 4))
+
+#define PXA_DCSR_RUN		BIT(31)	/* Run Bit (read / write) */
+#define PXA_DCSR_NODESC		BIT(30)	/* No-Descriptor Fetch (read / write) */
+#define PXA_DCSR_STOPIRQEN	BIT(29)	/* Stop Interrupt Enable (R/W) */
+#define PXA_DCSR_REQPEND	BIT(8)	/* Request Pending (read-only) */
+#define PXA_DCSR_STOPSTATE	BIT(3)	/* Stop State (read-only) */
+#define PXA_DCSR_ENDINTR	BIT(2)	/* End Interrupt (read / write) */
+#define PXA_DCSR_STARTINTR	BIT(1)	/* Start Interrupt (read / write) */
+#define PXA_DCSR_BUSERR		BIT(0)	/* Bus Error Interrupt (read / write) */
+
+#define PXA_DCSR_EORIRQEN	BIT(28)	/* End of Receive IRQ Enable (R/W) */
+#define PXA_DCSR_EORJMPEN	BIT(27)	/* Jump to next descriptor on EOR */
+#define PXA_DCSR_EORSTOPEN	BIT(26)	/* STOP on an EOR */
+#define PXA_DCSR_SETCMPST	BIT(25)	/* Set Descriptor Compare Status */
+#define PXA_DCSR_CLRCMPST	BIT(24)	/* Clear Descriptor Compare Status */
+#define PXA_DCSR_CMPST		BIT(10)	/* The Descriptor Compare Status */
+#define PXA_DCSR_EORINTR	BIT(9)	/* The end of Receive */
+
+#define DRCMR_MAPVLD	BIT(7)	/* Map Valid (read / write) */
+#define DRCMR_CHLNUM	0x1f	/* mask for Channel Number (read / write) */
+
+#define DDADR_DESCADDR	0xfffffff0	/* Address of next descriptor (mask) */
+#define DDADR_STOP	BIT(0)	/* Stop (read / write) */
+
+#define PXA_DCMD_INCSRCADDR	BIT(31)	/* Source Address Increment Setting. */
+#define PXA_DCMD_INCTRGADDR	BIT(30)	/* Target Address Increment Setting. */
+#define PXA_DCMD_FLOWSRC	BIT(29)	/* Flow Control by the source. */
+#define PXA_DCMD_FLOWTRG	BIT(28)	/* Flow Control by the target. */
+#define PXA_DCMD_STARTIRQEN	BIT(22)	/* Start Interrupt Enable */
+#define PXA_DCMD_ENDIRQEN	BIT(21)	/* End Interrupt Enable */
+#define PXA_DCMD_ENDIAN		BIT(18)	/* Device Endian-ness. */
+#define PXA_DCMD_BURST8		(1 << 16)	/* 8 byte burst */
+#define PXA_DCMD_BURST16	(2 << 16)	/* 16 byte burst */
+#define PXA_DCMD_BURST32	(3 << 16)	/* 32 byte burst */
+#define PXA_DCMD_WIDTH1		(1 << 14)	/* 1 byte width */
+#define PXA_DCMD_WIDTH2		(2 << 14)	/* 2 byte width (HalfWord) */
+#define PXA_DCMD_WIDTH4		(3 << 14)	/* 4 byte width (Word) */
+#define PXA_DCMD_LENGTH		0x01fff		/* length mask (max = 8K - 1) */
+
+#define PDMA_ALIGNMENT		3
+#define PDMA_MAX_DESC_BYTES	(PXA_DCMD_LENGTH & ~((1 << PDMA_ALIGNMENT) - 1))
+
+struct pxad_desc_hw {
+	u32 ddadr;	/* Points to the next descriptor + flags */
+	u32 dsadr;	/* DSADR value for the current transfer */
+	u32 dtadr;	/* DTADR value for the current transfer */
+	u32 dcmd;	/* DCMD value for the current transfer */
+} __aligned(16);
+
+struct pxad_desc_sw {
+	struct virt_dma_desc	vd;		/* Virtual descriptor */
+	int			nb_desc;	/* Number of hw. descriptors */
+	size_t			len;		/* Number of bytes xfered */
+	dma_addr_t		first;		/* First descriptor's addr */
+
+	/* At least one descriptor has an src/dst address not multiple of 8 */
+	bool			misaligned;
+	bool			cyclic;
+	struct dma_pool		*desc_pool;	/* Channel's used allocator */
+
+	struct pxad_desc_hw	*hw_desc[];	/* DMA coherent descriptors */
+};
+
+struct pxad_phy {
+	int			idx;
+	void __iomem		*base;
+	struct pxad_chan	*vchan;
+};
+
+struct pxad_chan {
+	struct virt_dma_chan	vc;		/* Virtual channel */
+	u32			drcmr;		/* Requestor of the channel */
+	enum pxad_chan_prio	prio;		/* Required priority of phy */
+	/*
+	 * At least one desc_sw in submitted or issued transfers on this channel
+	 * has one address such as: addr % 8 != 0. This implies the DALGN
+	 * setting on the phy.
+	 */
+	bool			misaligned;
+	struct dma_slave_config	cfg;		/* Runtime config */
+
+	/* protected by vc->lock */
+	struct pxad_phy		*phy;
+	struct dma_pool		*desc_pool;	/* Descriptors pool */
+};
+
+struct pxad_device {
+	struct dma_device		slave;
+	int				nr_chans;
+	void __iomem			*base;
+	struct pxad_phy			*phys;
+	spinlock_t			phy_lock;	/* Phy association */
+#ifdef CONFIG_DEBUG_FS
+	struct dentry			*dbgfs_root;
+	struct dentry			*dbgfs_state;
+	struct dentry			**dbgfs_chan;
+#endif
+};
+
+#define tx_to_pxad_desc(tx)					\
+	container_of(tx, struct pxad_desc_sw, async_tx)
+#define to_pxad_chan(dchan)					\
+	container_of(dchan, struct pxad_chan, vc.chan)
+#define to_pxad_dev(dmadev)					\
+	container_of(dmadev, struct pxad_device, slave)
+#define to_pxad_sw_desc(_vd)				\
+	container_of((_vd), struct pxad_desc_sw, vd)
+
+#define _phy_readl_relaxed(phy, _reg)					\
+	readl_relaxed((phy)->base + _reg((phy)->idx))
+#define phy_readl_relaxed(phy, _reg)					\
+	({								\
+		u32 _v;							\
+		_v = readl_relaxed((phy)->base + _reg((phy)->idx));	\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): readl(%s): 0x%08x\n", __func__, #_reg,	\
+			  _v);						\
+		_v;							\
+	})
+#define phy_writel(phy, val, _reg)					\
+	do {								\
+		writel((val), (phy)->base + _reg((phy)->idx));		\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): writel(0x%08x, %s)\n",			\
+			 __func__, (u32)(val), #_reg);			\
+	} while (0)
+#define phy_writel_relaxed(phy, val, _reg)				\
+	do {								\
+		writel_relaxed((val), (phy)->base + _reg((phy)->idx));	\
+		dev_vdbg(&phy->vchan->vc.chan.dev->device,		\
+			 "%s(): writel_relaxed(0x%08x, %s)\n",		\
+			 __func__, (u32)(val), #_reg);			\
+	} while (0)
+
+static unsigned int pxad_drcmr(unsigned int line)
+{
+	if (line < 64)
+		return 0x100 + line * 4;
+	return 0x1000 + line * 4;
+}
+
+/*
+ * Debug fs
+ */
+#ifdef CONFIG_DEBUG_FS
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+
+static int dbg_show_requester_chan(struct seq_file *s, void *p)
+{
+	struct pxad_phy *phy = s->private;
+	int i;
+	u32 drcmr;
+
+	seq_printf(s, "DMA channel %d requester :\n", phy->idx);
+	for (i = 0; i < 70; i++) {
+		drcmr = readl_relaxed(phy->base + pxad_drcmr(i));
+		if ((drcmr & DRCMR_CHLNUM) == phy->idx)
+			seq_printf(s, "\tRequester %d (MAPVLD=%d)\n", i,
+				   !!(drcmr & DRCMR_MAPVLD));
+	}
+	return 0;
+}
+
+static inline int dbg_burst_from_dcmd(u32 dcmd)
+{
+	int burst = (dcmd >> 16) & 0x3;
+
+	return burst ? 4 << burst : 0;
+}
+
+static int is_phys_valid(unsigned long addr)
+{
+	return pfn_valid(__phys_to_pfn(addr));
+}
+
+#define PXA_DCSR_STR(flag) (dcsr & PXA_DCSR_##flag ? #flag" " : "")
+#define PXA_DCMD_STR(flag) (dcmd & PXA_DCMD_##flag ? #flag" " : "")
+
+static int dbg_show_descriptors(struct seq_file *s, void *p)
+{
+	struct pxad_phy *phy = s->private;
+	int i, max_show = 20, burst, width;
+	u32 dcmd;
+	unsigned long phys_desc, ddadr;
+	struct pxad_desc_hw *desc;
+
+	phys_desc = ddadr = _phy_readl_relaxed(phy, DDADR);
+
+	seq_printf(s, "DMA channel %d descriptors :\n", phy->idx);
+	seq_printf(s, "[%03d] First descriptor unknown\n", 0);
+	for (i = 1; i < max_show && is_phys_valid(phys_desc); i++) {
+		desc = phys_to_virt(phys_desc);
+		dcmd = desc->dcmd;
+		burst = dbg_burst_from_dcmd(dcmd);
+		width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+		seq_printf(s, "[%03d] Desc at %08lx(virt %p)\n",
+			   i, phys_desc, desc);
+		seq_printf(s, "\tDDADR = %08x\n", desc->ddadr);
+		seq_printf(s, "\tDSADR = %08x\n", desc->dsadr);
+		seq_printf(s, "\tDTADR = %08x\n", desc->dtadr);
+		seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+			   dcmd,
+			   PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR),
+			   PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG),
+			   PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN),
+			   PXA_DCMD_STR(ENDIAN), burst, width,
+			   dcmd & PXA_DCMD_LENGTH);
+		phys_desc = desc->ddadr;
+	}
+	if (i == max_show)
+		seq_printf(s, "[%03d] Desc at %08lx ... max display reached\n",
+			   i, phys_desc);
+	else
+		seq_printf(s, "[%03d] Desc at %08lx is %s\n",
+			   i, phys_desc, phys_desc == DDADR_STOP ?
+			   "DDADR_STOP" : "invalid");
+
+	return 0;
+}
+
+static int dbg_show_chan_state(struct seq_file *s, void *p)
+{
+	struct pxad_phy *phy = s->private;
+	u32 dcsr, dcmd;
+	int burst, width;
+	static const char * const str_prio[] = {
+		"high", "normal", "low", "invalid"
+	};
+
+	dcsr = _phy_readl_relaxed(phy, DCSR);
+	dcmd = _phy_readl_relaxed(phy, DCMD);
+	burst = dbg_burst_from_dcmd(dcmd);
+	width = (1 << ((dcmd >> 14) & 0x3)) >> 1;
+
+	seq_printf(s, "DMA channel %d\n", phy->idx);
+	seq_printf(s, "\tPriority : %s\n",
+			  str_prio[(phy->idx & 0xf) / 4]);
+	seq_printf(s, "\tUnaligned transfer bit: %s\n",
+			  _phy_readl_relaxed(phy, DALGN) & BIT(phy->idx) ?
+			  "yes" : "no");
+	seq_printf(s, "\tDCSR  = %08x (%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+		   dcsr, PXA_DCSR_STR(RUN), PXA_DCSR_STR(NODESC),
+		   PXA_DCSR_STR(STOPIRQEN), PXA_DCSR_STR(EORIRQEN),
+		   PXA_DCSR_STR(EORJMPEN), PXA_DCSR_STR(EORSTOPEN),
+		   PXA_DCSR_STR(SETCMPST), PXA_DCSR_STR(CLRCMPST),
+		   PXA_DCSR_STR(CMPST), PXA_DCSR_STR(EORINTR),
+		   PXA_DCSR_STR(REQPEND), PXA_DCSR_STR(STOPSTATE),
+		   PXA_DCSR_STR(ENDINTR), PXA_DCSR_STR(STARTINTR),
+		   PXA_DCSR_STR(BUSERR));
+
+	seq_printf(s, "\tDCMD  = %08x (%s%s%s%s%s%s%sburst=%d width=%d len=%d)\n",
+		   dcmd,
+		   PXA_DCMD_STR(INCSRCADDR), PXA_DCMD_STR(INCTRGADDR),
+		   PXA_DCMD_STR(FLOWSRC), PXA_DCMD_STR(FLOWTRG),
+		   PXA_DCMD_STR(STARTIRQEN), PXA_DCMD_STR(ENDIRQEN),
+		   PXA_DCMD_STR(ENDIAN), burst, width, dcmd & PXA_DCMD_LENGTH);
+	seq_printf(s, "\tDSADR = %08x\n", _phy_readl_relaxed(phy, DSADR));
+	seq_printf(s, "\tDTADR = %08x\n", _phy_readl_relaxed(phy, DTADR));
+	seq_printf(s, "\tDDADR = %08x\n", _phy_readl_relaxed(phy, DDADR));
+
+	return 0;
+}
+
+static int dbg_show_state(struct seq_file *s, void *p)
+{
+	struct pxad_device *pdev = s->private;
+
+	/* basic device status */
+	seq_puts(s, "DMA engine status\n");
+	seq_printf(s, "\tChannel number: %d\n", pdev->nr_chans);
+
+	return 0;
+}
+
+#define DBGFS_FUNC_DECL(name) \
+static int dbg_open_##name(struct inode *inode, struct file *file) \
+{ \
+	return single_open(file, dbg_show_##name, inode->i_private); \
+} \
+static const struct file_operations dbg_fops_##name = { \
+	.owner		= THIS_MODULE, \
+	.open		= dbg_open_##name, \
+	.llseek		= seq_lseek, \
+	.read		= seq_read, \
+	.release	= single_release, \
+}
+
+DBGFS_FUNC_DECL(state);
+DBGFS_FUNC_DECL(chan_state);
+DBGFS_FUNC_DECL(descriptors);
+DBGFS_FUNC_DECL(requester_chan);
+
+static struct dentry *pxad_dbg_alloc_chan(struct pxad_device *pdev,
+					     int ch, struct dentry *chandir)
+{
+	char chan_name[11];
+	struct dentry *chan, *chan_state = NULL, *chan_descr = NULL;
+	struct dentry *chan_reqs = NULL;
+	void *dt;
+
+	scnprintf(chan_name, sizeof(chan_name), "%d", ch);
+	chan = debugfs_create_dir(chan_name, chandir);
+	dt = (void *)&pdev->phys[ch];
+
+	if (chan)
+		chan_state = debugfs_create_file("state", 0400, chan, dt,
+						 &dbg_fops_chan_state);
+	if (chan_state)
+		chan_descr = debugfs_create_file("descriptors", 0400, chan, dt,
+						 &dbg_fops_descriptors);
+	if (chan_descr)
+		chan_reqs = debugfs_create_file("requesters", 0400, chan, dt,
+						&dbg_fops_requester_chan);
+	if (!chan_reqs)
+		goto err_state;
+
+	return chan;
+
+err_state:
+	debugfs_remove_recursive(chan);
+	return NULL;
+}
+
+static void pxad_init_debugfs(struct pxad_device *pdev)
+{
+	int i;
+	struct dentry *chandir;
+
+	pdev->dbgfs_root = debugfs_create_dir(dev_name(pdev->slave.dev), NULL);
+	if (IS_ERR(pdev->dbgfs_root) || !pdev->dbgfs_root)
+		goto err_root;
+
+	pdev->dbgfs_state = debugfs_create_file("state", 0400, pdev->dbgfs_root,
+						pdev, &dbg_fops_state);
+	if (!pdev->dbgfs_state)
+		goto err_state;
+
+	pdev->dbgfs_chan =
+		kmalloc_array(pdev->nr_chans, sizeof(*pdev->dbgfs_state),
+			      GFP_KERNEL);
+	if (!pdev->dbgfs_chan)
+		goto err_alloc;
+
+	chandir = debugfs_create_dir("channels", pdev->dbgfs_root);
+	if (!chandir)
+		goto err_chandir;
+
+	for (i = 0; i < pdev->nr_chans; i++) {
+		pdev->dbgfs_chan[i] = pxad_dbg_alloc_chan(pdev, i, chandir);
+		if (!pdev->dbgfs_chan[i])
+			goto err_chans;
+	}
+
+	return;
+err_chans:
+err_chandir:
+	kfree(pdev->dbgfs_chan);
+err_alloc:
+err_state:
+	debugfs_remove_recursive(pdev->dbgfs_root);
+err_root:
+	pr_err("pxad: debugfs is not available\n");
+}
+
+static void pxad_cleanup_debugfs(struct pxad_device *pdev)
+{
+	debugfs_remove_recursive(pdev->dbgfs_root);
+}
+#else
+static inline void pxad_init_debugfs(struct pxad_device *pdev) {}
+static inline void pxad_cleanup_debugfs(struct pxad_device *pdev) {}
+#endif
+
+/*
+ * In the transition phase where legacy pxa handling is done at the same time as
+ * mmp_dma, the DMA physical channel split between the 2 DMA providers is done
+ * through legacy_reserved. Legacy code reserves DMA channels by settings
+ * corresponding bits in legacy_reserved.
+ */
+static u32 legacy_reserved;
+static u32 legacy_unavailable;
+
+static struct pxad_phy *lookup_phy(struct pxad_chan *pchan)
+{
+	int prio, i;
+	struct pxad_device *pdev = to_pxad_dev(pchan->vc.chan.device);
+	struct pxad_phy *phy, *found = NULL;
+	unsigned long flags;
+
+	/*
+	 * dma channel priorities
+	 * ch 0 - 3,  16 - 19  <--> (0)
+	 * ch 4 - 7,  20 - 23  <--> (1)
+	 * ch 8 - 11, 24 - 27  <--> (2)
+	 * ch 12 - 15, 28 - 31  <--> (3)
+	 */
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	for (prio = pchan->prio; prio >= PXAD_PRIO_HIGHEST; prio--) {
+		for (i = 0; i < pdev->nr_chans; i++) {
+			if (prio != (i & 0xf) >> 2)
+				continue;
+			if ((i < 32) && (legacy_reserved & BIT(i)))
+				continue;
+			phy = &pdev->phys[i];
+			if (!phy->vchan) {
+				phy->vchan = pchan;
+				found = phy;
+				if (i < 32)
+					legacy_unavailable |= BIT(i);
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+	dev_dbg(&pchan->vc.chan.dev->device,
+		"%s(): phy=%p(%d)\n", __func__, found,
+		found ? found->idx : -1);
+
+	return found;
+}
+
+static void pxad_free_phy(struct pxad_chan *chan)
+{
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+	unsigned long flags;
+	u32 reg;
+	int i;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): freeing\n", __func__);
+	if (!chan->phy)
+		return;
+
+	/* clear the channel mapping in DRCMR */
+	if (chan->drcmr <= DRCMR_CHLNUM) {
+		reg = pxad_drcmr(chan->drcmr);
+		writel_relaxed(0, chan->phy->base + reg);
+	}
+
+	spin_lock_irqsave(&pdev->phy_lock, flags);
+	for (i = 0; i < 32; i++)
+		if (chan->phy == &pdev->phys[i])
+			legacy_unavailable &= ~BIT(i);
+	chan->phy->vchan = NULL;
+	chan->phy = NULL;
+	spin_unlock_irqrestore(&pdev->phy_lock, flags);
+}
+
+static bool is_chan_running(struct pxad_chan *chan)
+{
+	u32 dcsr;
+	struct pxad_phy *phy = chan->phy;
+
+	if (!phy)
+		return false;
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	return dcsr & PXA_DCSR_RUN;
+}
+
+static bool is_running_chan_misaligned(struct pxad_chan *chan)
+{
+	u32 dalgn;
+
+	BUG_ON(!chan->phy);
+	dalgn = phy_readl_relaxed(chan->phy, DALGN);
+	return dalgn & (BIT(chan->phy->idx));
+}
+
+static void phy_enable(struct pxad_phy *phy, bool misaligned)
+{
+	u32 reg, dalgn;
+
+	if (!phy->vchan)
+		return;
+
+	dev_dbg(&phy->vchan->vc.chan.dev->device,
+		"%s(); phy=%p(%d) misaligned=%d\n", __func__,
+		phy, phy->idx, misaligned);
+
+	if (phy->vchan->drcmr <= DRCMR_CHLNUM) {
+		reg = pxad_drcmr(phy->vchan->drcmr);
+		writel_relaxed(DRCMR_MAPVLD | phy->idx, phy->base + reg);
+	}
+
+	dalgn = phy_readl_relaxed(phy, DALGN);
+	if (misaligned)
+		dalgn |= BIT(phy->idx);
+	else
+		dalgn &= ~BIT(phy->idx);
+	phy_writel_relaxed(phy, dalgn, DALGN);
+
+	phy_writel(phy, PXA_DCSR_STOPIRQEN | PXA_DCSR_ENDINTR |
+		   PXA_DCSR_BUSERR | PXA_DCSR_RUN, DCSR);
+}
+
+static void phy_disable(struct pxad_phy *phy)
+{
+	u32 dcsr;
+
+	if (!phy)
+		return;
+
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	dev_dbg(&phy->vchan->vc.chan.dev->device,
+		"%s(): phy=%p(%d)\n", __func__, phy, phy->idx);
+	phy_writel(phy, dcsr & ~PXA_DCSR_RUN & ~PXA_DCSR_STOPIRQEN, DCSR);
+}
+
+static void pxad_launch_chan(struct pxad_chan *chan,
+				 struct pxad_desc_sw *desc)
+{
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): desc=%p\n", __func__, desc);
+	if (!chan->phy) {
+		chan->phy = lookup_phy(chan);
+		if (!chan->phy) {
+			dev_dbg(&chan->vc.chan.dev->device,
+				"%s(): no free dma channel\n", __func__);
+			return;
+		}
+	}
+
+	/*
+	 * Program the descriptor's address into the DMA controller,
+	 * then start the DMA transaction
+	 */
+	phy_writel(chan->phy, desc->first, DDADR);
+	phy_enable(chan->phy, chan->misaligned);
+}
+
+static void set_updater_desc(struct pxad_desc_sw *sw_desc,
+			     unsigned long flags)
+{
+	struct pxad_desc_hw *updater =
+		sw_desc->hw_desc[sw_desc->nb_desc - 1];
+	dma_addr_t dma = sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr;
+
+	updater->ddadr = DDADR_STOP;
+	updater->dsadr = dma;
+	updater->dtadr = dma + 8;
+	updater->dcmd = PXA_DCMD_WIDTH4 | PXA_DCMD_BURST32 |
+		(PXA_DCMD_LENGTH & sizeof(u32));
+	if (flags & DMA_PREP_INTERRUPT)
+		updater->dcmd |= PXA_DCMD_ENDIRQEN;
+	if (sw_desc->cyclic)
+		sw_desc->hw_desc[sw_desc->nb_desc - 2]->ddadr = sw_desc->first;
+}
+
+static bool is_desc_completed(struct virt_dma_desc *vd)
+{
+	struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+	struct pxad_desc_hw *updater =
+		sw_desc->hw_desc[sw_desc->nb_desc - 1];
+
+	return updater->dtadr != (updater->dsadr + 8);
+}
+
+static void pxad_desc_chain(struct virt_dma_desc *vd1,
+				struct virt_dma_desc *vd2)
+{
+	struct pxad_desc_sw *desc1 = to_pxad_sw_desc(vd1);
+	struct pxad_desc_sw *desc2 = to_pxad_sw_desc(vd2);
+	dma_addr_t dma_to_chain;
+
+	dma_to_chain = desc2->first;
+	desc1->hw_desc[desc1->nb_desc - 1]->ddadr = dma_to_chain;
+}
+
+static bool pxad_try_hotchain(struct virt_dma_chan *vc,
+				  struct virt_dma_desc *vd)
+{
+	struct virt_dma_desc *vd_last_issued = NULL;
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+
+	/*
+	 * Attempt to hot chain the tx if the phy is still running. This is
+	 * considered successful only if either the channel is still running
+	 * after the chaining, or if the chained transfer is completed after
+	 * having been hot chained.
+	 * A change of alignment is not allowed, and forbids hotchaining.
+	 */
+	if (is_chan_running(chan)) {
+		BUG_ON(list_empty(&vc->desc_issued));
+
+		if (!is_running_chan_misaligned(chan) &&
+		    to_pxad_sw_desc(vd)->misaligned)
+			return false;
+
+		vd_last_issued = list_entry(vc->desc_issued.prev,
+					    struct virt_dma_desc, node);
+		pxad_desc_chain(vd_last_issued, vd);
+		if (is_chan_running(chan) || is_desc_completed(vd_last_issued))
+			return true;
+	}
+
+	return false;
+}
+
+static unsigned int clear_chan_irq(struct pxad_phy *phy)
+{
+	u32 dcsr;
+	u32 dint = readl(phy->base + DINT);
+
+	if (!(dint & BIT(phy->idx)))
+		return PXA_DCSR_RUN;
+
+	/* clear irq */
+	dcsr = phy_readl_relaxed(phy, DCSR);
+	phy_writel(phy, dcsr, DCSR);
+	if ((dcsr & PXA_DCSR_BUSERR) && (phy->vchan))
+		dev_warn(&phy->vchan->vc.chan.dev->device,
+			 "%s(chan=%p): PXA_DCSR_BUSERR\n",
+			 __func__, &phy->vchan);
+
+	return dcsr & ~PXA_DCSR_RUN;
+}
+
+static irqreturn_t pxad_chan_handler(int irq, void *dev_id)
+{
+	struct pxad_phy *phy = dev_id;
+	struct pxad_chan *chan = phy->vchan;
+	struct virt_dma_desc *vd, *tmp;
+	unsigned int dcsr;
+	unsigned long flags;
+
+	BUG_ON(!chan);
+
+	dcsr = clear_chan_irq(phy);
+	if (dcsr & PXA_DCSR_RUN)
+		return IRQ_NONE;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	list_for_each_entry_safe(vd, tmp, &chan->vc.desc_issued, node) {
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): checking txd %p[%x]: completed=%d\n",
+			__func__, vd, vd->tx.cookie, is_desc_completed(vd));
+		if (to_pxad_sw_desc(vd)->cyclic) {
+			vchan_cyclic_callback(vd);
+			break;
+		}
+		if (is_desc_completed(vd)) {
+			list_del(&vd->node);
+			vchan_cookie_complete(vd);
+		} else {
+			break;
+		}
+	}
+
+	if (dcsr & PXA_DCSR_STOPSTATE) {
+		dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): channel stopped, submitted_empty=%d issued_empty=%d",
+			__func__,
+			list_empty(&chan->vc.desc_submitted),
+			list_empty(&chan->vc.desc_issued));
+		phy_writel_relaxed(phy, dcsr & ~PXA_DCSR_STOPIRQEN, DCSR);
+
+		if (list_empty(&chan->vc.desc_issued)) {
+			chan->misaligned =
+				!list_empty(&chan->vc.desc_submitted);
+		} else {
+			vd = list_first_entry(&chan->vc.desc_issued,
+					      struct virt_dma_desc, node);
+			pxad_launch_chan(chan, to_pxad_sw_desc(vd));
+		}
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t pxad_int_handler(int irq, void *dev_id)
+{
+	struct pxad_device *pdev = dev_id;
+	struct pxad_phy *phy;
+	u32 dint = readl(pdev->base + DINT);
+	int i, ret = IRQ_NONE;
+
+	while (dint) {
+		i = __ffs(dint);
+		dint &= (dint - 1);
+		phy = &pdev->phys[i];
+		if ((i < 32) && (legacy_reserved & BIT(i)))
+			continue;
+		if (pxad_chan_handler(irq, phy) == IRQ_HANDLED)
+			ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+static int pxad_alloc_chan_resources(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+
+	if (chan->desc_pool)
+		return 1;
+
+	chan->desc_pool = dma_pool_create(dma_chan_name(dchan),
+					  pdev->slave.dev,
+					  sizeof(struct pxad_desc_hw),
+					  __alignof__(struct pxad_desc_hw),
+					  0);
+	if (!chan->desc_pool) {
+		dev_err(&chan->vc.chan.dev->device,
+			"%s(): unable to allocate descriptor pool\n",
+			__func__);
+		return -ENOMEM;
+	}
+
+	return 1;
+}
+
+static void pxad_free_chan_resources(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+
+	vchan_free_chan_resources(&chan->vc);
+	dma_pool_destroy(chan->desc_pool);
+	chan->desc_pool = NULL;
+
+}
+
+static void pxad_free_desc(struct virt_dma_desc *vd)
+{
+	int i;
+	dma_addr_t dma;
+	struct pxad_desc_sw *sw_desc = to_pxad_sw_desc(vd);
+
+	BUG_ON(sw_desc->nb_desc == 0);
+	for (i = sw_desc->nb_desc - 1; i >= 0; i--) {
+		if (i > 0)
+			dma = sw_desc->hw_desc[i - 1]->ddadr;
+		else
+			dma = sw_desc->first;
+		dma_pool_free(sw_desc->desc_pool,
+			      sw_desc->hw_desc[i], dma);
+	}
+	sw_desc->nb_desc = 0;
+	kfree(sw_desc);
+}
+
+static struct pxad_desc_sw *
+pxad_alloc_desc(struct pxad_chan *chan, unsigned int nb_hw_desc)
+{
+	struct pxad_desc_sw *sw_desc;
+	dma_addr_t dma;
+	int i;
+
+	sw_desc = kzalloc(sizeof(*sw_desc) +
+			  nb_hw_desc * sizeof(struct pxad_desc_hw *),
+			  GFP_NOWAIT);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->desc_pool = chan->desc_pool;
+
+	for (i = 0; i < nb_hw_desc; i++) {
+		sw_desc->hw_desc[i] = dma_pool_alloc(sw_desc->desc_pool,
+						     GFP_NOWAIT, &dma);
+		if (!sw_desc->hw_desc[i]) {
+			dev_err(&chan->vc.chan.dev->device,
+				"%s(): Couldn't allocate the %dth hw_desc from dma_pool %p\n",
+				__func__, i, sw_desc->desc_pool);
+			goto err;
+		}
+
+		if (i == 0)
+			sw_desc->first = dma;
+		else
+			sw_desc->hw_desc[i - 1]->ddadr = dma;
+		sw_desc->nb_desc++;
+	}
+
+	return sw_desc;
+err:
+	pxad_free_desc(&sw_desc->vd);
+	return NULL;
+}
+
+static dma_cookie_t pxad_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
+	struct pxad_chan *chan = to_pxad_chan(&vc->chan);
+	struct virt_dma_desc *vd_chained = NULL,
+		*vd = container_of(tx, struct virt_dma_desc, tx);
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	set_updater_desc(to_pxad_sw_desc(vd), tx->flags);
+
+	spin_lock_irqsave(&vc->lock, flags);
+	cookie = dma_cookie_assign(tx);
+
+	if (list_empty(&vc->desc_submitted) && pxad_try_hotchain(vc, vd)) {
+		list_move_tail(&vd->node, &vc->desc_issued);
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): txd %p[%x]: submitted (hot linked)\n",
+			__func__, vd, cookie);
+		goto out;
+	}
+
+	/*
+	 * Fallback to placing the tx in the submitted queue
+	 */
+	if (!list_empty(&vc->desc_submitted)) {
+		vd_chained = list_entry(vc->desc_submitted.prev,
+					struct virt_dma_desc, node);
+		/*
+		 * Only chain the descriptors if no new misalignment is
+		 * introduced. If a new misalignment is chained, let the channel
+		 * stop, and be relaunched in misalign mode from the irq
+		 * handler.
+		 */
+		if (chan->misaligned || !to_pxad_sw_desc(vd)->misaligned)
+			pxad_desc_chain(vd_chained, vd);
+		else
+			vd_chained = NULL;
+	}
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x]: submitted (%s linked)\n",
+		__func__, vd, cookie, vd_chained ? "cold" : "not");
+	list_move_tail(&vd->node, &vc->desc_submitted);
+	chan->misaligned |= to_pxad_sw_desc(vd)->misaligned;
+
+out:
+	spin_unlock_irqrestore(&vc->lock, flags);
+	return cookie;
+}
+
+static void pxad_issue_pending(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct virt_dma_desc *vd_first;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	if (list_empty(&chan->vc.desc_submitted))
+		goto out;
+
+	vd_first = list_first_entry(&chan->vc.desc_submitted,
+				    struct virt_dma_desc, node);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x]", __func__, vd_first, vd_first->tx.cookie);
+
+	vchan_issue_pending(&chan->vc);
+	if (!pxad_try_hotchain(&chan->vc, vd_first))
+		pxad_launch_chan(chan, to_pxad_sw_desc(vd_first));
+out:
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+}
+
+static inline struct dma_async_tx_descriptor *
+pxad_tx_prep(struct virt_dma_chan *vc, struct virt_dma_desc *vd,
+		 unsigned long tx_flags)
+{
+	struct dma_async_tx_descriptor *tx;
+	struct pxad_chan *chan = container_of(vc, struct pxad_chan, vc);
+
+	INIT_LIST_HEAD(&vd->node);
+	tx = vchan_tx_prep(vc, vd, tx_flags);
+	tx->tx_submit = pxad_tx_submit;
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): vc=%p txd=%p[%x] flags=0x%lx\n", __func__,
+		vc, vd, vd->tx.cookie,
+		tx_flags);
+
+	return tx;
+}
+
+static void pxad_get_config(struct pxad_chan *chan,
+			    enum dma_transfer_direction dir,
+			    u32 *dcmd, u32 *dev_src, u32 *dev_dst)
+{
+	u32 maxburst = 0, dev_addr = 0;
+	enum dma_slave_buswidth width = DMA_SLAVE_BUSWIDTH_UNDEFINED;
+
+	*dcmd = 0;
+	if (dir == DMA_DEV_TO_MEM) {
+		maxburst = chan->cfg.src_maxburst;
+		width = chan->cfg.src_addr_width;
+		dev_addr = chan->cfg.src_addr;
+		*dev_src = dev_addr;
+		*dcmd |= PXA_DCMD_INCTRGADDR;
+		if (chan->drcmr <= DRCMR_CHLNUM)
+			*dcmd |= PXA_DCMD_FLOWSRC;
+	}
+	if (dir == DMA_MEM_TO_DEV) {
+		maxburst = chan->cfg.dst_maxburst;
+		width = chan->cfg.dst_addr_width;
+		dev_addr = chan->cfg.dst_addr;
+		*dev_dst = dev_addr;
+		*dcmd |= PXA_DCMD_INCSRCADDR;
+		if (chan->drcmr <= DRCMR_CHLNUM)
+			*dcmd |= PXA_DCMD_FLOWTRG;
+	}
+	if (dir == DMA_MEM_TO_MEM)
+		*dcmd |= PXA_DCMD_BURST32 | PXA_DCMD_INCTRGADDR |
+			PXA_DCMD_INCSRCADDR;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dev_addr=0x%x maxburst=%d width=%d  dir=%d\n",
+		__func__, dev_addr, maxburst, width, dir);
+
+	if (width == DMA_SLAVE_BUSWIDTH_1_BYTE)
+		*dcmd |= PXA_DCMD_WIDTH1;
+	else if (width == DMA_SLAVE_BUSWIDTH_2_BYTES)
+		*dcmd |= PXA_DCMD_WIDTH2;
+	else if (width == DMA_SLAVE_BUSWIDTH_4_BYTES)
+		*dcmd |= PXA_DCMD_WIDTH4;
+
+	if (maxburst == 8)
+		*dcmd |= PXA_DCMD_BURST8;
+	else if (maxburst == 16)
+		*dcmd |= PXA_DCMD_BURST16;
+	else if (maxburst == 32)
+		*dcmd |= PXA_DCMD_BURST32;
+
+	/* FIXME: drivers should be ported over to use the filter
+	 * function. Once that's done, the following two lines can
+	 * be removed.
+	 */
+	if (chan->cfg.slave_id)
+		chan->drcmr = chan->cfg.slave_id;
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_memcpy(struct dma_chan *dchan,
+		 dma_addr_t dma_dst, dma_addr_t dma_src,
+		 size_t len, unsigned long flags)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	struct pxad_desc_hw *hw_desc;
+	u32 dcmd;
+	unsigned int i, nb_desc = 0;
+	size_t copy;
+
+	if (!dchan || !len)
+		return NULL;
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dma_dst=0x%lx dma_src=0x%lx len=%zu flags=%lx\n",
+		__func__, (unsigned long)dma_dst, (unsigned long)dma_src,
+		len, flags);
+	pxad_get_config(chan, DMA_MEM_TO_MEM, &dcmd, NULL, NULL);
+
+	nb_desc = DIV_ROUND_UP(len, PDMA_MAX_DESC_BYTES);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->len = len;
+
+	if (!IS_ALIGNED(dma_src, 1 << PDMA_ALIGNMENT) ||
+	    !IS_ALIGNED(dma_dst, 1 << PDMA_ALIGNMENT))
+		sw_desc->misaligned = true;
+
+	i = 0;
+	do {
+		hw_desc = sw_desc->hw_desc[i++];
+		copy = min_t(size_t, len, PDMA_MAX_DESC_BYTES);
+		hw_desc->dcmd = dcmd | (PXA_DCMD_LENGTH & copy);
+		hw_desc->dsadr = dma_src;
+		hw_desc->dtadr = dma_dst;
+		len -= copy;
+		dma_src += copy;
+		dma_dst += copy;
+	} while (len);
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+		   unsigned int sg_len, enum dma_transfer_direction dir,
+		   unsigned long flags, void *context)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	size_t len, avail;
+	struct scatterlist *sg;
+	dma_addr_t dma;
+	u32 dcmd, dsadr = 0, dtadr = 0;
+	unsigned int nb_desc = 0, i, j = 0;
+
+	if ((sgl == NULL) || (sg_len == 0))
+		return NULL;
+
+	pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): dir=%d flags=%lx\n", __func__, dir, flags);
+
+	for_each_sg(sgl, sg, sg_len, i)
+		nb_desc += DIV_ROUND_UP(sg_dma_len(sg), PDMA_MAX_DESC_BYTES);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma = sg_dma_address(sg);
+		avail = sg_dma_len(sg);
+		sw_desc->len += avail;
+
+		do {
+			len = min_t(size_t, avail, PDMA_MAX_DESC_BYTES);
+			if (dma & 0x7)
+				sw_desc->misaligned = true;
+
+			sw_desc->hw_desc[j]->dcmd =
+				dcmd | (PXA_DCMD_LENGTH & len);
+			sw_desc->hw_desc[j]->dsadr = dsadr ? dsadr : dma;
+			sw_desc->hw_desc[j++]->dtadr = dtadr ? dtadr : dma;
+
+			dma += len;
+			avail -= len;
+		} while (avail);
+	}
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+pxad_prep_dma_cyclic(struct dma_chan *dchan,
+		     dma_addr_t buf_addr, size_t len, size_t period_len,
+		     enum dma_transfer_direction dir, unsigned long flags)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_desc_sw *sw_desc;
+	struct pxad_desc_hw **phw_desc;
+	dma_addr_t dma;
+	u32 dcmd, dsadr = 0, dtadr = 0;
+	unsigned int nb_desc = 0;
+
+	if (!dchan || !len || !period_len)
+		return NULL;
+	if ((dir != DMA_DEV_TO_MEM) && (dir != DMA_MEM_TO_DEV)) {
+		dev_err(&chan->vc.chan.dev->device,
+			"Unsupported direction for cyclic DMA\n");
+		return NULL;
+	}
+	/* the buffer length must be a multiple of period_len */
+	if (len % period_len != 0 || period_len > PDMA_MAX_DESC_BYTES ||
+	    !IS_ALIGNED(period_len, 1 << PDMA_ALIGNMENT))
+		return NULL;
+
+	pxad_get_config(chan, dir, &dcmd, &dsadr, &dtadr);
+	dcmd |= PXA_DCMD_ENDIRQEN | (PXA_DCMD_LENGTH & period_len);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): buf_addr=0x%lx len=%zu period=%zu dir=%d flags=%lx\n",
+		__func__, (unsigned long)buf_addr, len, period_len, dir, flags);
+
+	nb_desc = DIV_ROUND_UP(period_len, PDMA_MAX_DESC_BYTES);
+	nb_desc *= DIV_ROUND_UP(len, period_len);
+	sw_desc = pxad_alloc_desc(chan, nb_desc + 1);
+	if (!sw_desc)
+		return NULL;
+	sw_desc->cyclic = true;
+	sw_desc->len = len;
+
+	phw_desc = sw_desc->hw_desc;
+	dma = buf_addr;
+	do {
+		phw_desc[0]->dsadr = dsadr ? dsadr : dma;
+		phw_desc[0]->dtadr = dtadr ? dtadr : dma;
+		phw_desc[0]->dcmd = dcmd;
+		phw_desc++;
+		dma += period_len;
+		len -= period_len;
+	} while (len);
+	set_updater_desc(sw_desc, flags);
+
+	return pxad_tx_prep(&chan->vc, &sw_desc->vd, flags);
+}
+
+static int pxad_config(struct dma_chan *dchan,
+		       struct dma_slave_config *cfg)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan->cfg = *cfg;
+	return 0;
+}
+
+static int pxad_terminate_all(struct dma_chan *dchan)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	struct pxad_device *pdev = to_pxad_dev(chan->vc.chan.device);
+	struct virt_dma_desc *vd = NULL;
+	unsigned long flags;
+	struct pxad_phy *phy;
+	LIST_HEAD(head);
+
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): vchan %p: terminate all\n", __func__, &chan->vc);
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+	vchan_get_all_descriptors(&chan->vc, &head);
+
+	list_for_each_entry(vd, &head, node) {
+		dev_dbg(&chan->vc.chan.dev->device,
+			"%s(): cancelling txd %p[%x] (completed=%d)", __func__,
+			vd, vd->tx.cookie, is_desc_completed(vd));
+	}
+
+	phy = chan->phy;
+	if (phy) {
+		phy_disable(chan->phy);
+		pxad_free_phy(chan);
+		chan->phy = NULL;
+		spin_lock(&pdev->phy_lock);
+		phy->vchan = NULL;
+		spin_unlock(&pdev->phy_lock);
+	}
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	vchan_dma_desc_free_list(&chan->vc, &head);
+
+	return 0;
+}
+
+static unsigned int pxad_residue(struct pxad_chan *chan,
+				 dma_cookie_t cookie)
+{
+	struct virt_dma_desc *vd = NULL;
+	struct pxad_desc_sw *sw_desc = NULL;
+	struct pxad_desc_hw *hw_desc = NULL;
+	u32 curr, start, len, end, residue = 0;
+	unsigned long flags;
+	bool passed = false;
+	int i;
+
+	/*
+	 * If the channel does not have a phy pointer anymore, it has already
+	 * been completed. Therefore, its residue is 0.
+	 */
+	if (!chan->phy)
+		return 0;
+
+	spin_lock_irqsave(&chan->vc.lock, flags);
+
+	vd = vchan_find_desc(&chan->vc, cookie);
+	if (!vd)
+		goto out;
+
+	sw_desc = to_pxad_sw_desc(vd);
+	if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR)
+		curr = phy_readl_relaxed(chan->phy, DSADR);
+	else
+		curr = phy_readl_relaxed(chan->phy, DTADR);
+
+	/*
+	 * curr has to be actually read before checking descriptor
+	 * completion, so that a curr inside a status updater
+	 * descriptor implies the following test returns true, and
+	 * preventing reordering of curr load and the test.
+	 */
+	rmb();
+	if (is_desc_completed(vd))
+		goto out;
+
+	for (i = 0; i < sw_desc->nb_desc - 1; i++) {
+		hw_desc = sw_desc->hw_desc[i];
+		if (sw_desc->hw_desc[0]->dcmd & PXA_DCMD_INCSRCADDR)
+			start = hw_desc->dsadr;
+		else
+			start = hw_desc->dtadr;
+		len = hw_desc->dcmd & PXA_DCMD_LENGTH;
+		end = start + len;
+
+		/*
+		 * 'passed' will be latched once we found the descriptor
+		 * which lies inside the boundaries of the curr
+		 * pointer. All descriptors that occur in the list
+		 * _after_ we found that partially handled descriptor
+		 * are still to be processed and are hence added to the
+		 * residual bytes counter.
+		 */
+
+		if (passed) {
+			residue += len;
+		} else if (curr >= start && curr <= end) {
+			residue += end - curr;
+			passed = true;
+		}
+	}
+	if (!passed)
+		residue = sw_desc->len;
+
+out:
+	spin_unlock_irqrestore(&chan->vc.lock, flags);
+	dev_dbg(&chan->vc.chan.dev->device,
+		"%s(): txd %p[%x] sw_desc=%p: %d\n",
+		__func__, vd, cookie, sw_desc, residue);
+	return residue;
+}
+
+static enum dma_status pxad_tx_status(struct dma_chan *dchan,
+				      dma_cookie_t cookie,
+				      struct dma_tx_state *txstate)
+{
+	struct pxad_chan *chan = to_pxad_chan(dchan);
+	enum dma_status ret;
+
+	ret = dma_cookie_status(dchan, cookie, txstate);
+	if (likely(txstate && (ret != DMA_ERROR)))
+		dma_set_residue(txstate, pxad_residue(chan, cookie));
+
+	return ret;
+}
+
+static void pxad_free_channels(struct dma_device *dmadev)
+{
+	struct pxad_chan *c, *cn;
+
+	list_for_each_entry_safe(c, cn, &dmadev->channels,
+				 vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+		tasklet_kill(&c->vc.task);
+	}
+}
+
+static int pxad_remove(struct platform_device *op)
+{
+	struct pxad_device *pdev = platform_get_drvdata(op);
+
+	pxad_cleanup_debugfs(pdev);
+	pxad_free_channels(&pdev->slave);
+	dma_async_device_unregister(&pdev->slave);
+	return 0;
+}
+
+static int pxad_init_phys(struct platform_device *op,
+			  struct pxad_device *pdev,
+			  unsigned int nb_phy_chans)
+{
+	int irq0, irq, nr_irq = 0, i, ret;
+	struct pxad_phy *phy;
+
+	irq0 = platform_get_irq(op, 0);
+	if (irq0 < 0)
+		return irq0;
+
+	pdev->phys = devm_kcalloc(&op->dev, nb_phy_chans,
+				  sizeof(pdev->phys[0]), GFP_KERNEL);
+	if (!pdev->phys)
+		return -ENOMEM;
+
+	for (i = 0; i < nb_phy_chans; i++)
+		if (platform_get_irq(op, i) > 0)
+			nr_irq++;
+
+	for (i = 0; i < nb_phy_chans; i++) {
+		phy = &pdev->phys[i];
+		phy->base = pdev->base;
+		phy->idx = i;
+		irq = platform_get_irq(op, i);
+		if ((nr_irq > 1) && (irq > 0))
+			ret = devm_request_irq(&op->dev, irq,
+					       pxad_chan_handler,
+					       IRQF_SHARED, "pxa-dma", phy);
+		if ((nr_irq == 1) && (i == 0))
+			ret = devm_request_irq(&op->dev, irq0,
+					       pxad_int_handler,
+					       IRQF_SHARED, "pxa-dma", pdev);
+		if (ret) {
+			dev_err(pdev->slave.dev,
+				"%s(): can't request irq %d:%d\n", __func__,
+				irq, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static const struct of_device_id const pxad_dt_ids[] = {
+	{ .compatible = "marvell,pdma-1.0", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, pxad_dt_ids);
+
+static struct dma_chan *pxad_dma_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct pxad_device *d = ofdma->of_dma_data;
+	struct dma_chan *chan;
+
+	chan = dma_get_any_slave_channel(&d->slave);
+	if (!chan)
+		return NULL;
+
+	to_pxad_chan(chan)->drcmr = dma_spec->args[0];
+	to_pxad_chan(chan)->prio = dma_spec->args[1];
+
+	return chan;
+}
+
+static int pxad_init_dmadev(struct platform_device *op,
+			    struct pxad_device *pdev,
+			    unsigned int nr_phy_chans)
+{
+	int ret;
+	unsigned int i;
+	struct pxad_chan *c;
+
+	pdev->nr_chans = nr_phy_chans;
+	INIT_LIST_HEAD(&pdev->slave.channels);
+	pdev->slave.device_alloc_chan_resources = pxad_alloc_chan_resources;
+	pdev->slave.device_free_chan_resources = pxad_free_chan_resources;
+	pdev->slave.device_tx_status = pxad_tx_status;
+	pdev->slave.device_issue_pending = pxad_issue_pending;
+	pdev->slave.device_config = pxad_config;
+	pdev->slave.device_terminate_all = pxad_terminate_all;
+
+	if (op->dev.coherent_dma_mask)
+		dma_set_mask(&op->dev, op->dev.coherent_dma_mask);
+	else
+		dma_set_mask(&op->dev, DMA_BIT_MASK(32));
+
+	ret = pxad_init_phys(op, pdev, nr_phy_chans);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_phy_chans; i++) {
+		c = devm_kzalloc(&op->dev, sizeof(*c), GFP_KERNEL);
+		if (!c)
+			return -ENOMEM;
+		c->vc.desc_free = pxad_free_desc;
+		vchan_init(&c->vc, &pdev->slave);
+	}
+
+	return dma_async_device_register(&pdev->slave);
+}
+
+static int pxad_probe(struct platform_device *op)
+{
+	struct pxad_device *pdev;
+	const struct of_device_id *of_id;
+	struct mmp_dma_platdata *pdata = dev_get_platdata(&op->dev);
+	struct resource *iores;
+	int ret, dma_channels = 0;
+	const enum dma_slave_buswidth widths =
+		DMA_SLAVE_BUSWIDTH_1_BYTE   | DMA_SLAVE_BUSWIDTH_2_BYTES |
+		DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+	pdev = devm_kzalloc(&op->dev, sizeof(*pdev), GFP_KERNEL);
+	if (!pdev)
+		return -ENOMEM;
+
+	spin_lock_init(&pdev->phy_lock);
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	pdev->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(pdev->base))
+		return PTR_ERR(pdev->base);
+
+	of_id = of_match_device(pxad_dt_ids, &op->dev);
+	if (of_id)
+		of_property_read_u32(op->dev.of_node, "#dma-channels",
+				     &dma_channels);
+	else if (pdata && pdata->dma_channels)
+		dma_channels = pdata->dma_channels;
+	else
+		dma_channels = 32;	/* default 32 channel */
+
+	dma_cap_set(DMA_SLAVE, pdev->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, pdev->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, pdev->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, pdev->slave.cap_mask);
+	pdev->slave.device_prep_dma_memcpy = pxad_prep_memcpy;
+	pdev->slave.device_prep_slave_sg = pxad_prep_slave_sg;
+	pdev->slave.device_prep_dma_cyclic = pxad_prep_dma_cyclic;
+
+	pdev->slave.copy_align = PDMA_ALIGNMENT;
+	pdev->slave.src_addr_widths = widths;
+	pdev->slave.dst_addr_widths = widths;
+	pdev->slave.directions = BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+	pdev->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+
+	pdev->slave.dev = &op->dev;
+	ret = pxad_init_dmadev(op, pdev, dma_channels);
+	if (ret) {
+		dev_err(pdev->slave.dev, "unable to register\n");
+		return ret;
+	}
+
+	if (op->dev.of_node) {
+		/* Device-tree DMA controller registration */
+		ret = of_dma_controller_register(op->dev.of_node,
+						 pxad_dma_xlate, pdev);
+		if (ret < 0) {
+			dev_err(pdev->slave.dev,
+				"of_dma_controller_register failed\n");
+			return ret;
+		}
+	}
+
+	platform_set_drvdata(op, pdev);
+	pxad_init_debugfs(pdev);
+	dev_info(pdev->slave.dev, "initialized %d channels\n", dma_channels);
+	return 0;
+}
+
+static const struct platform_device_id pxad_id_table[] = {
+	{ "pxa-dma", },
+	{ },
+};
+
+static struct platform_driver pxad_driver = {
+	.driver		= {
+		.name	= "pxa-dma",
+		.of_match_table = pxad_dt_ids,
+	},
+	.id_table	= pxad_id_table,
+	.probe		= pxad_probe,
+	.remove		= pxad_remove,
+};
+
+bool pxad_filter_fn(struct dma_chan *chan, void *param)
+{
+	struct pxad_chan *c = to_pxad_chan(chan);
+	struct pxad_param *p = param;
+
+	if (chan->device->dev->driver != &pxad_driver.driver)
+		return false;
+
+	c->drcmr = p->drcmr;
+	c->prio = p->prio;
+
+	return true;
+}
+EXPORT_SYMBOL_GPL(pxad_filter_fn);
+
+int pxad_toggle_reserved_channel(int legacy_channel)
+{
+	if (legacy_unavailable & (BIT(legacy_channel)))
+		return -EBUSY;
+	legacy_reserved ^= BIT(legacy_channel);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pxad_toggle_reserved_channel);
+
+module_platform_driver(pxad_driver);
+
+MODULE_DESCRIPTION("Marvell PXA Peripheral DMA Driver");
+MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
+MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/dma/s3c24xx-dma.c b/kernel/drivers/dma/s3c24xx-dma.c
index 01dcaf21b..17ccdfd28 100644
--- a/kernel/drivers/dma/s3c24xx-dma.c
+++ b/kernel/drivers/dma/s3c24xx-dma.c
@@ -1168,7 +1168,7 @@ static struct soc_data soc_s3c2443 = {
 	.has_clocks = true,
 };
 
-static struct platform_device_id s3c24xx_dma_driver_ids[] = {
+static const struct platform_device_id s3c24xx_dma_driver_ids[] = {
 	{
 		.name		= "s3c2410-dma",
 		.driver_data	= (kernel_ulong_t)&soc_s3c2410,
diff --git a/kernel/drivers/dma/sh/Kconfig b/kernel/drivers/dma/sh/Kconfig
index 0f371524a..9fda65af8 100644
--- a/kernel/drivers/dma/sh/Kconfig
+++ b/kernel/drivers/dma/sh/Kconfig
@@ -39,18 +39,6 @@ config SH_DMAE_R8A73A4
 
 endif
 
-config SUDMAC
-	tristate "Renesas SUDMAC support"
-	depends on SH_DMAE_BASE
-	help
-	  Enable support for the Renesas SUDMAC controllers.
-
-config RCAR_HPB_DMAE
-	tristate "Renesas R-Car HPB DMAC support"
-	depends on SH_DMAE_BASE
-	help
-	  Enable support for the Renesas R-Car series DMA controllers.
-
 config RCAR_DMAC
 	tristate "Renesas R-Car Gen2 DMA Controller"
 	depends on ARCH_SHMOBILE || COMPILE_TEST
@@ -59,6 +47,12 @@ config RCAR_DMAC
 	  This driver supports the general purpose DMA controller found in the
 	  Renesas R-Car second generation SoCs.
 
+config RCAR_HPB_DMAE
+	tristate "Renesas R-Car HPB DMAC support"
+	depends on SH_DMAE_BASE
+	help
+	  Enable support for the Renesas R-Car series DMA controllers.
+
 config RENESAS_USB_DMAC
 	tristate "Renesas USB-DMA Controller"
 	depends on ARCH_SHMOBILE || COMPILE_TEST
@@ -67,3 +61,9 @@ config RENESAS_USB_DMAC
 	help
 	  This driver supports the USB-DMA controller found in the Renesas
 	  SoCs.
+
+config SUDMAC
+	tristate "Renesas SUDMAC support"
+	depends on SH_DMAE_BASE
+	help
+	  Enable support for the Renesas SUDMAC controllers.
diff --git a/kernel/drivers/dma/sh/Makefile b/kernel/drivers/dma/sh/Makefile
index b8a598066..0133e4658 100644
--- a/kernel/drivers/dma/sh/Makefile
+++ b/kernel/drivers/dma/sh/Makefile
@@ -13,7 +13,7 @@ shdma-$(CONFIG_SH_DMAE_R8A73A4) += shdma-r8a73a4.o
 shdma-objs := $(shdma-y)
 obj-$(CONFIG_SH_DMAE) += shdma.o
 
-obj-$(CONFIG_SUDMAC) += sudmac.o
-obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
 obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
+obj-$(CONFIG_RCAR_HPB_DMAE) += rcar-hpbdma.o
 obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o
+obj-$(CONFIG_SUDMAC) += sudmac.o
diff --git a/kernel/drivers/dma/sh/rcar-dmac.c b/kernel/drivers/dma/sh/rcar-dmac.c
index a18d16cc4..7820d07e7 100644
--- a/kernel/drivers/dma/sh/rcar-dmac.c
+++ b/kernel/drivers/dma/sh/rcar-dmac.c
@@ -183,7 +183,7 @@ struct rcar_dmac {
 	unsigned int n_channels;
 	struct rcar_dmac_chan *channels;
 
-	unsigned long modules[256 / BITS_PER_LONG];
+	DECLARE_BITMAP(modules, 256);
 };
 
 #define to_rcar_dmac(d)		container_of(d, struct rcar_dmac, engine)
@@ -465,6 +465,7 @@ static dma_cookie_t rcar_dmac_tx_submit(struct dma_async_tx_descriptor *tx)
 static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
 {
 	struct rcar_dmac_desc_page *page;
+	unsigned long flags;
 	LIST_HEAD(list);
 	unsigned int i;
 
@@ -482,10 +483,10 @@ static int rcar_dmac_desc_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
 		list_add_tail(&desc->node, &list);
 	}
 
-	spin_lock_irq(&chan->lock);
+	spin_lock_irqsave(&chan->lock, flags);
 	list_splice_tail(&list, &chan->desc.free);
 	list_add_tail(&page->node, &chan->desc.pages);
-	spin_unlock_irq(&chan->lock);
+	spin_unlock_irqrestore(&chan->lock, flags);
 
 	return 0;
 }
@@ -516,6 +517,7 @@ static void rcar_dmac_desc_put(struct rcar_dmac_chan *chan,
 static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
 {
 	struct rcar_dmac_desc *desc, *_desc;
+	unsigned long flags;
 	LIST_HEAD(list);
 
 	/*
@@ -524,9 +526,9 @@ static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
 	 * list_for_each_entry_safe, isn't safe if we release the channel lock
 	 * around the rcar_dmac_desc_put() call.
 	 */
-	spin_lock_irq(&chan->lock);
+	spin_lock_irqsave(&chan->lock, flags);
 	list_splice_init(&chan->desc.wait, &list);
-	spin_unlock_irq(&chan->lock);
+	spin_unlock_irqrestore(&chan->lock, flags);
 
 	list_for_each_entry_safe(desc, _desc, &list, node) {
 		if (async_tx_test_ack(&desc->async_tx)) {
@@ -539,9 +541,9 @@ static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
 		return;
 
 	/* Put the remaining descriptors back in the wait list. */
-	spin_lock_irq(&chan->lock);
+	spin_lock_irqsave(&chan->lock, flags);
 	list_splice(&list, &chan->desc.wait);
-	spin_unlock_irq(&chan->lock);
+	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
 /*
@@ -556,12 +558,13 @@ static void rcar_dmac_desc_recycle_acked(struct rcar_dmac_chan *chan)
 static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan)
 {
 	struct rcar_dmac_desc *desc;
+	unsigned long flags;
 	int ret;
 
 	/* Recycle acked descriptors before attempting allocation. */
 	rcar_dmac_desc_recycle_acked(chan);
 
-	spin_lock_irq(&chan->lock);
+	spin_lock_irqsave(&chan->lock, flags);
 
 	while (list_empty(&chan->desc.free)) {
 		/*
@@ -570,17 +573,17 @@ static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan)
 		 * allocated descriptors. If the allocation fails return an
 		 * error.
 		 */
-		spin_unlock_irq(&chan->lock);
+		spin_unlock_irqrestore(&chan->lock, flags);
 		ret = rcar_dmac_desc_alloc(chan, GFP_NOWAIT);
 		if (ret < 0)
 			return NULL;
-		spin_lock_irq(&chan->lock);
+		spin_lock_irqsave(&chan->lock, flags);
 	}
 
 	desc = list_first_entry(&chan->desc.free, struct rcar_dmac_desc, node);
 	list_del(&desc->node);
 
-	spin_unlock_irq(&chan->lock);
+	spin_unlock_irqrestore(&chan->lock, flags);
 
 	return desc;
 }
@@ -593,6 +596,7 @@ static struct rcar_dmac_desc *rcar_dmac_desc_get(struct rcar_dmac_chan *chan)
 static int rcar_dmac_xfer_chunk_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
 {
 	struct rcar_dmac_desc_page *page;
+	unsigned long flags;
 	LIST_HEAD(list);
 	unsigned int i;
 
@@ -606,10 +610,10 @@ static int rcar_dmac_xfer_chunk_alloc(struct rcar_dmac_chan *chan, gfp_t gfp)
 		list_add_tail(&chunk->node, &list);
 	}
 
-	spin_lock_irq(&chan->lock);
+	spin_lock_irqsave(&chan->lock, flags);
 	list_splice_tail(&list, &chan->desc.chunks_free);
 	list_add_tail(&page->node, &chan->desc.pages);
-	spin_unlock_irq(&chan->lock);
+	spin_unlock_irqrestore(&chan->lock, flags);
 
 	return 0;
 }
@@ -627,9 +631,10 @@ static struct rcar_dmac_xfer_chunk *
 rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan)
 {
 	struct rcar_dmac_xfer_chunk *chunk;
+	unsigned long flags;
 	int ret;
 
-	spin_lock_irq(&chan->lock);
+	spin_lock_irqsave(&chan->lock, flags);
 
 	while (list_empty(&chan->desc.chunks_free)) {
 		/*
@@ -638,18 +643,18 @@ rcar_dmac_xfer_chunk_get(struct rcar_dmac_chan *chan)
 		 * allocated descriptors. If the allocation fails return an
 		 * error.
 		 */
-		spin_unlock_irq(&chan->lock);
+		spin_unlock_irqrestore(&chan->lock, flags);
 		ret = rcar_dmac_xfer_chunk_alloc(chan, GFP_NOWAIT);
 		if (ret < 0)
 			return NULL;
-		spin_lock_irq(&chan->lock);
+		spin_lock_irqsave(&chan->lock, flags);
 	}
 
 	chunk = list_first_entry(&chan->desc.chunks_free,
 				 struct rcar_dmac_xfer_chunk, node);
 	list_del(&chunk->node);
 
-	spin_unlock_irq(&chan->lock);
+	spin_unlock_irqrestore(&chan->lock, flags);
 
 	return chunk;
 }
diff --git a/kernel/drivers/dma/sh/shdma-r8a73a4.c b/kernel/drivers/dma/sh/shdma-r8a73a4.c
index 4fb99970a..96ea3828c 100644
--- a/kernel/drivers/dma/sh/shdma-r8a73a4.c
+++ b/kernel/drivers/dma/sh/shdma-r8a73a4.c
@@ -11,7 +11,7 @@
 
 #include "shdma-arm.h"
 
-const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT;
+static const unsigned int dma_ts_shift[] = SH_DMAE_TS_SHIFT;
 
 static const struct sh_dmae_slave_config dma_slaves[] = {
 	{
diff --git a/kernel/drivers/dma/sh/usb-dmac.c b/kernel/drivers/dma/sh/usb-dmac.c
index ebd8a5f39..f1bcc2a16 100644
--- a/kernel/drivers/dma/sh/usb-dmac.c
+++ b/kernel/drivers/dma/sh/usb-dmac.c
@@ -679,8 +679,11 @@ static int usb_dmac_runtime_suspend(struct device *dev)
 	struct usb_dmac *dmac = dev_get_drvdata(dev);
 	int i;
 
-	for (i = 0; i < dmac->n_channels; ++i)
+	for (i = 0; i < dmac->n_channels; ++i) {
+		if (!dmac->channels[i].iomem)
+			break;
 		usb_dmac_chan_halt(&dmac->channels[i]);
+	}
 
 	return 0;
 }
@@ -799,11 +802,10 @@ static int usb_dmac_probe(struct platform_device *pdev)
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0) {
 		dev_err(&pdev->dev, "runtime PM get sync failed (%d)\n", ret);
-		return ret;
+		goto error_pm;
 	}
 
 	ret = usb_dmac_init(dmac);
-	pm_runtime_put(&pdev->dev);
 
 	if (ret) {
 		dev_err(&pdev->dev, "failed to reset device\n");
@@ -851,10 +853,13 @@ static int usb_dmac_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto error;
 
+	pm_runtime_put(&pdev->dev);
 	return 0;
 
 error:
 	of_dma_controller_free(pdev->dev.of_node);
+	pm_runtime_put(&pdev->dev);
+error_pm:
 	pm_runtime_disable(&pdev->dev);
 	return ret;
 }
diff --git a/kernel/drivers/dma/sirf-dma.c b/kernel/drivers/dma/sirf-dma.c
index a1afda43b..22ea2419e 100644
--- a/kernel/drivers/dma/sirf-dma.c
+++ b/kernel/drivers/dma/sirf-dma.c
@@ -23,8 +23,13 @@
 
 #include "dmaengine.h"
 
+#define SIRFSOC_DMA_VER_A7V1                    1
+#define SIRFSOC_DMA_VER_A7V2                    2
+#define SIRFSOC_DMA_VER_A6                      4
+
 #define SIRFSOC_DMA_DESCRIPTORS                 16
 #define SIRFSOC_DMA_CHANNELS                    16
+#define SIRFSOC_DMA_TABLE_NUM                   256
 
 #define SIRFSOC_DMA_CH_ADDR                     0x00
 #define SIRFSOC_DMA_CH_XLEN                     0x04
@@ -35,15 +40,44 @@
 #define SIRFSOC_DMA_CH_VALID                    0x140
 #define SIRFSOC_DMA_CH_INT                      0x144
 #define SIRFSOC_DMA_INT_EN                      0x148
-#define SIRFSOC_DMA_INT_EN_CLR			0x14C
+#define SIRFSOC_DMA_INT_EN_CLR                  0x14C
 #define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
-#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR            0x15C
+#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR            0x154
+#define SIRFSOC_DMA_WIDTH_ATLAS7                0x10
+#define SIRFSOC_DMA_VALID_ATLAS7                0x14
+#define SIRFSOC_DMA_INT_ATLAS7                  0x18
+#define SIRFSOC_DMA_INT_EN_ATLAS7               0x1c
+#define SIRFSOC_DMA_LOOP_CTRL_ATLAS7            0x20
+#define SIRFSOC_DMA_CUR_DATA_ADDR               0x34
+#define SIRFSOC_DMA_MUL_ATLAS7                  0x38
+#define SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7         0x158
+#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7     0x15C
+#define SIRFSOC_DMA_IOBG_SCMD_EN		0x800
+#define SIRFSOC_DMA_EARLY_RESP_SET		0x818
+#define SIRFSOC_DMA_EARLY_RESP_CLR		0x81C
 
 #define SIRFSOC_DMA_MODE_CTRL_BIT               4
 #define SIRFSOC_DMA_DIR_CTRL_BIT                5
+#define SIRFSOC_DMA_MODE_CTRL_BIT_ATLAS7        2
+#define SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7       3
+#define SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7         4
+#define SIRFSOC_DMA_TAB_NUM_ATLAS7              7
+#define SIRFSOC_DMA_CHAIN_INT_BIT_ATLAS7        5
+#define SIRFSOC_DMA_CHAIN_FLAG_SHIFT_ATLAS7     25
+#define SIRFSOC_DMA_CHAIN_ADDR_SHIFT            32
+
+#define SIRFSOC_DMA_INT_FINI_INT_ATLAS7         BIT(0)
+#define SIRFSOC_DMA_INT_CNT_INT_ATLAS7          BIT(1)
+#define SIRFSOC_DMA_INT_PAU_INT_ATLAS7          BIT(2)
+#define SIRFSOC_DMA_INT_LOOP_INT_ATLAS7         BIT(3)
+#define SIRFSOC_DMA_INT_INV_INT_ATLAS7          BIT(4)
+#define SIRFSOC_DMA_INT_END_INT_ATLAS7          BIT(5)
+#define SIRFSOC_DMA_INT_ALL_ATLAS7              0x3F
 
 /* xlen and dma_width register is in 4 bytes boundary */
 #define SIRFSOC_DMA_WORD_LEN			4
+#define SIRFSOC_DMA_XLEN_MAX_V1         0x800
+#define SIRFSOC_DMA_XLEN_MAX_V2         0x1000
 
 struct sirfsoc_dma_desc {
 	struct dma_async_tx_descriptor	desc;
@@ -56,7 +90,9 @@ struct sirfsoc_dma_desc {
 	int             width;          /* DMA width */
 	int             dir;
 	bool            cyclic;         /* is loop DMA? */
+	bool            chain;          /* is chain DMA? */
 	u32             addr;		/* DMA buffer address */
+	u64 chain_table[SIRFSOC_DMA_TABLE_NUM]; /* chain tbl */
 };
 
 struct sirfsoc_dma_chan {
@@ -87,10 +123,25 @@ struct sirfsoc_dma {
 	void __iomem			*base;
 	int				irq;
 	struct clk			*clk;
-	bool				is_marco;
+	int				type;
+	void (*exec_desc)(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base);
 	struct sirfsoc_dma_regs		regs_save;
 };
 
+struct sirfsoc_dmadata {
+	void (*exec)(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base);
+	int type;
+};
+
+enum sirfsoc_dma_chain_flag {
+	SIRFSOC_DMA_CHAIN_NORMAL = 0x01,
+	SIRFSOC_DMA_CHAIN_PAUSE = 0x02,
+	SIRFSOC_DMA_CHAIN_LOOP = 0x03,
+	SIRFSOC_DMA_CHAIN_END = 0x04
+};
+
 #define DRV_NAME	"sirfsoc_dma"
 
 static int sirfsoc_dma_runtime_suspend(struct device *dev);
@@ -109,48 +160,105 @@ static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
 	return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
 }
 
+static void sirfsoc_dma_execute_hw_a7v2(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	if (sdesc->chain) {
+		/* DMA v2 HW chain mode */
+		writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
+			       (sdesc->chain <<
+				SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
+			       (0x8 << SIRFSOC_DMA_TAB_NUM_ATLAS7) | 0x3,
+			       base + SIRFSOC_DMA_CH_CTRL);
+	} else {
+		/* DMA v2 legacy mode */
+		writel_relaxed(sdesc->xlen, base + SIRFSOC_DMA_CH_XLEN);
+		writel_relaxed(sdesc->ylen, base + SIRFSOC_DMA_CH_YLEN);
+		writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_ATLAS7);
+		writel_relaxed((sdesc->width*((sdesc->ylen+1)>>1)),
+				base + SIRFSOC_DMA_MUL_ATLAS7);
+		writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
+			       (sdesc->chain <<
+				SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
+			       0x3, base + SIRFSOC_DMA_CH_CTRL);
+	}
+	writel_relaxed(sdesc->chain ? SIRFSOC_DMA_INT_END_INT_ATLAS7 :
+		       (SIRFSOC_DMA_INT_FINI_INT_ATLAS7 |
+			SIRFSOC_DMA_INT_LOOP_INT_ATLAS7),
+		       base + SIRFSOC_DMA_INT_EN_ATLAS7);
+	writel(sdesc->addr, base + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic)
+		writel(0x10001, base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+}
+
+static void sirfsoc_dma_execute_hw_a7v1(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	writel_relaxed(1, base + SIRFSOC_DMA_IOBG_SCMD_EN);
+	writel_relaxed((1 << cid), base + SIRFSOC_DMA_EARLY_RESP_SET);
+	writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
+	writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		       (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		       base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
+		       (1 << cid), base + SIRFSOC_DMA_INT_EN);
+	writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+		       readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7),
+		       base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
+	}
+
+}
+
+static void sirfsoc_dma_execute_hw_a6(struct sirfsoc_dma_desc *sdesc,
+		int cid, int burst_mode, void __iomem *base)
+{
+	writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
+	writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
+		       (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
+		       base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
+	writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
+	writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
+	writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
+		       (1 << cid), base + SIRFSOC_DMA_INT_EN);
+	writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	if (sdesc->cyclic) {
+		writel((1 << cid) | 1 << (cid + 16) |
+		       readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL),
+		       base + SIRFSOC_DMA_CH_LOOP_CTRL);
+	}
+
+}
+
 /* Execute all queued DMA descriptors */
 static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
 {
 	struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
 	int cid = schan->chan.chan_id;
 	struct sirfsoc_dma_desc *sdesc = NULL;
+	void __iomem *base;
 
 	/*
 	 * lock has been held by functions calling this, so we don't hold
 	 * lock again
 	 */
-
+	base = sdma->base;
 	sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
-		node);
+				 node);
 	/* Move the first queued descriptor to active list */
 	list_move_tail(&sdesc->node, &schan->active);
 
-	/* Start the DMA transfer */
-	writel_relaxed(sdesc->width, sdma->base + SIRFSOC_DMA_WIDTH_0 +
-		cid * 4);
-	writel_relaxed(cid | (schan->mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
-		(sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
-		sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
-	writel_relaxed(sdesc->xlen, sdma->base + cid * 0x10 +
-		SIRFSOC_DMA_CH_XLEN);
-	writel_relaxed(sdesc->ylen, sdma->base + cid * 0x10 +
-		SIRFSOC_DMA_CH_YLEN);
-	writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) |
-		(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2)
+		cid = 0;
 
-	/*
-	 * writel has an implict memory write barrier to make sure data is
-	 * flushed into memory before starting DMA
-	 */
-	writel(sdesc->addr >> 2, sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
+	/* Start the DMA transfer */
+	sdma->exec_desc(sdesc, cid, schan->mode, base);
 
-	if (sdesc->cyclic) {
-		writel((1 << cid) | 1 << (cid + 16) |
-			readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+	if (sdesc->cyclic)
 		schan->happened_cyclic = schan->completed_cyclic = 0;
-	}
 }
 
 /* Interrupt handler */
@@ -160,27 +268,65 @@ static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
 	struct sirfsoc_dma_chan *schan;
 	struct sirfsoc_dma_desc *sdesc = NULL;
 	u32 is;
+	bool chain;
 	int ch;
+	void __iomem *reg;
+
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A6:
+	case SIRFSOC_DMA_VER_A7V1:
+		is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
+		reg = sdma->base + SIRFSOC_DMA_CH_INT;
+		while ((ch = fls(is) - 1) >= 0) {
+			is &= ~(1 << ch);
+			writel_relaxed(1 << ch, reg);
+			schan = &sdma->channels[ch];
+			spin_lock(&schan->lock);
+			sdesc = list_first_entry(&schan->active,
+						 struct sirfsoc_dma_desc, node);
+			if (!sdesc->cyclic) {
+				/* Execute queued descriptors */
+				list_splice_tail_init(&schan->active,
+						      &schan->completed);
+				dma_cookie_complete(&sdesc->desc);
+				if (!list_empty(&schan->queued))
+					sirfsoc_dma_execute(schan);
+			} else
+				schan->happened_cyclic++;
+			spin_unlock(&schan->lock);
+		}
+		break;
 
-	is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
-	while ((ch = fls(is) - 1) >= 0) {
-		is &= ~(1 << ch);
-		writel_relaxed(1 << ch, sdma->base + SIRFSOC_DMA_CH_INT);
-		schan = &sdma->channels[ch];
+	case SIRFSOC_DMA_VER_A7V2:
+		is = readl(sdma->base + SIRFSOC_DMA_INT_ATLAS7);
 
+		reg = sdma->base + SIRFSOC_DMA_INT_ATLAS7;
+		writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, reg);
+		schan = &sdma->channels[0];
 		spin_lock(&schan->lock);
-
-		sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-			node);
+		sdesc = list_first_entry(&schan->active,
+					 struct sirfsoc_dma_desc, node);
 		if (!sdesc->cyclic) {
-			/* Execute queued descriptors */
-			list_splice_tail_init(&schan->active, &schan->completed);
-			if (!list_empty(&schan->queued))
-				sirfsoc_dma_execute(schan);
-		} else
+			chain = sdesc->chain;
+			if ((chain && (is & SIRFSOC_DMA_INT_END_INT_ATLAS7)) ||
+				(!chain &&
+				(is & SIRFSOC_DMA_INT_FINI_INT_ATLAS7))) {
+				/* Execute queued descriptors */
+				list_splice_tail_init(&schan->active,
+						      &schan->completed);
+				dma_cookie_complete(&sdesc->desc);
+				if (!list_empty(&schan->queued))
+					sirfsoc_dma_execute(schan);
+			}
+		} else if (sdesc->cyclic && (is &
+					SIRFSOC_DMA_INT_LOOP_INT_ATLAS7))
 			schan->happened_cyclic++;
 
 		spin_unlock(&schan->lock);
+		break;
+
+	default:
+		break;
 	}
 
 	/* Schedule tasklet */
@@ -227,16 +373,15 @@ static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
 			schan->chan.completed_cookie = last_cookie;
 			spin_unlock_irqrestore(&schan->lock, flags);
 		} else {
-			/* for cyclic channel, desc is always in active list */
-			sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-				node);
-
-			if (!sdesc || (sdesc && !sdesc->cyclic)) {
-				/* without active cyclic DMA */
+			if (list_empty(&schan->active)) {
 				spin_unlock_irqrestore(&schan->lock, flags);
 				continue;
 			}
 
+			/* for cyclic channel, desc is always in active list */
+			sdesc = list_first_entry(&schan->active,
+				struct sirfsoc_dma_desc, node);
+
 			/* cyclic DMA */
 			happened_cyclic = schan->happened_cyclic;
 			spin_unlock_irqrestore(&schan->lock, flags);
@@ -307,20 +452,35 @@ static int sirfsoc_dma_terminate_all(struct dma_chan *chan)
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	if (!sdma->is_marco) {
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
-			~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			& ~((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	} else {
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_INT);
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR);
+			       sdma->base +
+			       SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7);
+		writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7,
+			       sdma->base + SIRFSOC_DMA_INT_ATLAS7);
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
+			       ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) &
+			       ~((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
+		break;
+	default:
+		break;
 	}
 
-	writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
-
 	list_splice_tail_init(&schan->active, &schan->free);
 	list_splice_tail_init(&schan->queued, &schan->free);
 
@@ -338,13 +498,25 @@ static int sirfsoc_dma_pause_chan(struct dma_chan *chan)
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	if (!sdma->is_marco)
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			& ~((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	else
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_CLR);
+			       sdma->base +
+			       SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) &
+			       ~((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		break;
+
+	default:
+		break;
+	}
 
 	spin_unlock_irqrestore(&schan->lock, flags);
 
@@ -359,14 +531,25 @@ static int sirfsoc_dma_resume_chan(struct dma_chan *chan)
 	unsigned long flags;
 
 	spin_lock_irqsave(&schan->lock, flags);
-
-	if (!sdma->is_marco)
-		writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL)
-			| ((1 << cid) | 1 << (cid + 16)),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-	else
+	switch (sdma->type) {
+	case SIRFSOC_DMA_VER_A7V1:
 		writel_relaxed((1 << cid) | 1 << (cid + 16),
-			sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A7V2:
+		writel_relaxed(0x10001,
+			       sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
+		break;
+	case SIRFSOC_DMA_VER_A6:
+		writel_relaxed(readl_relaxed(sdma->base +
+					     SIRFSOC_DMA_CH_LOOP_CTRL) |
+			       ((1 << cid) | 1 << (cid + 16)),
+			       sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
+		break;
+
+	default:
+		break;
+	}
 
 	spin_unlock_irqrestore(&schan->lock, flags);
 
@@ -473,14 +656,31 @@ sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 
 	spin_lock_irqsave(&schan->lock, flags);
 
-	sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc,
-			node);
-	dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) *
-		(sdesc->width * SIRFSOC_DMA_WORD_LEN);
+	if (list_empty(&schan->active)) {
+		ret = dma_cookie_status(chan, cookie, txstate);
+		dma_set_residue(txstate, 0);
+		spin_unlock_irqrestore(&schan->lock, flags);
+		return ret;
+	}
+	sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, node);
+	if (sdesc->cyclic)
+		dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) *
+			(sdesc->width * SIRFSOC_DMA_WORD_LEN);
+	else
+		dma_request_bytes = sdesc->xlen * SIRFSOC_DMA_WORD_LEN;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	dma_pos = readl_relaxed(sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR)
-		<< 2;
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2)
+		cid = 0;
+
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		dma_pos = readl_relaxed(sdma->base + SIRFSOC_DMA_CUR_DATA_ADDR);
+	} else {
+		dma_pos = readl_relaxed(
+			sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) << 2;
+	}
+
 	residue = dma_request_bytes - (dma_pos - sdesc->addr);
 	dma_set_residue(txstate, residue);
 
@@ -647,6 +847,7 @@ static int sirfsoc_dma_probe(struct platform_device *op)
 	struct dma_device *dma;
 	struct sirfsoc_dma *sdma;
 	struct sirfsoc_dma_chan *schan;
+	struct sirfsoc_dmadata *data;
 	struct resource res;
 	ulong regs_start, regs_size;
 	u32 id;
@@ -657,9 +858,11 @@ static int sirfsoc_dma_probe(struct platform_device *op)
 		dev_err(dev, "Memory exhausted!\n");
 		return -ENOMEM;
 	}
-
-	if (of_device_is_compatible(dn, "sirf,marco-dmac"))
-		sdma->is_marco = true;
+	data = (struct sirfsoc_dmadata *)
+		(of_match_device(op->dev.driver->of_match_table,
+				 &op->dev)->data);
+	sdma->exec_desc = data->exec;
+	sdma->type = data->type;
 
 	if (of_property_read_u32(dn, "cell-index", &id)) {
 		dev_err(dev, "Fail to get DMAC index\n");
@@ -816,6 +1019,8 @@ static int sirfsoc_dma_pm_suspend(struct device *dev)
 	struct sirfsoc_dma_chan *schan;
 	int ch;
 	int ret;
+	int count;
+	u32 int_offset;
 
 	/*
 	 * if we were runtime-suspended before, resume to enable clock
@@ -827,11 +1032,19 @@ static int sirfsoc_dma_pm_suspend(struct device *dev)
 			return ret;
 	}
 
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		count = 1;
+		int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
+	} else {
+		count = SIRFSOC_DMA_CHANNELS;
+		int_offset = SIRFSOC_DMA_INT_EN;
+	}
+
 	/*
 	 * DMA controller will lose all registers while suspending
 	 * so we need to save registers for active channels
 	 */
-	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+	for (ch = 0; ch < count; ch++) {
 		schan = &sdma->channels[ch];
 		if (list_empty(&schan->active))
 			continue;
@@ -841,7 +1054,7 @@ static int sirfsoc_dma_pm_suspend(struct device *dev)
 		save->ctrl[ch] = readl_relaxed(sdma->base +
 			ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
 	}
-	save->interrupt_en = readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN);
+	save->interrupt_en = readl_relaxed(sdma->base + int_offset);
 
 	/* Disable clock */
 	sirfsoc_dma_runtime_suspend(dev);
@@ -857,14 +1070,27 @@ static int sirfsoc_dma_pm_resume(struct device *dev)
 	struct sirfsoc_dma_chan *schan;
 	int ch;
 	int ret;
+	int count;
+	u32 int_offset;
+	u32 width_offset;
 
 	/* Enable clock before accessing register */
 	ret = sirfsoc_dma_runtime_resume(dev);
 	if (ret < 0)
 		return ret;
 
-	writel_relaxed(save->interrupt_en, sdma->base + SIRFSOC_DMA_INT_EN);
-	for (ch = 0; ch < SIRFSOC_DMA_CHANNELS; ch++) {
+	if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+		count = 1;
+		int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
+		width_offset = SIRFSOC_DMA_WIDTH_ATLAS7;
+	} else {
+		count = SIRFSOC_DMA_CHANNELS;
+		int_offset = SIRFSOC_DMA_INT_EN;
+		width_offset = SIRFSOC_DMA_WIDTH_0;
+	}
+
+	writel_relaxed(save->interrupt_en, sdma->base + int_offset);
+	for (ch = 0; ch < count; ch++) {
 		schan = &sdma->channels[ch];
 		if (list_empty(&schan->active))
 			continue;
@@ -872,15 +1098,21 @@ static int sirfsoc_dma_pm_resume(struct device *dev)
 			struct sirfsoc_dma_desc,
 			node);
 		writel_relaxed(sdesc->width,
-			sdma->base + SIRFSOC_DMA_WIDTH_0 + ch * 4);
+			sdma->base + width_offset + ch * 4);
 		writel_relaxed(sdesc->xlen,
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN);
 		writel_relaxed(sdesc->ylen,
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN);
 		writel_relaxed(save->ctrl[ch],
 			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
-		writel_relaxed(sdesc->addr >> 2,
-			sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+		if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
+			writel_relaxed(sdesc->addr,
+				sdma->base + SIRFSOC_DMA_CH_ADDR);
+		} else {
+			writel_relaxed(sdesc->addr >> 2,
+				sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
+
+		}
 	}
 
 	/* if we were runtime-suspended before, suspend again */
@@ -896,11 +1128,28 @@ static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
 };
 
+struct sirfsoc_dmadata sirfsoc_dmadata_a6 = {
+	.exec = sirfsoc_dma_execute_hw_a6,
+	.type = SIRFSOC_DMA_VER_A6,
+};
+
+struct sirfsoc_dmadata sirfsoc_dmadata_a7v1 = {
+	.exec = sirfsoc_dma_execute_hw_a7v1,
+	.type = SIRFSOC_DMA_VER_A7V1,
+};
+
+struct sirfsoc_dmadata sirfsoc_dmadata_a7v2 = {
+	.exec = sirfsoc_dma_execute_hw_a7v2,
+	.type = SIRFSOC_DMA_VER_A7V2,
+};
+
 static const struct of_device_id sirfsoc_dma_match[] = {
-	{ .compatible = "sirf,prima2-dmac", },
-	{ .compatible = "sirf,marco-dmac", },
+	{ .compatible = "sirf,prima2-dmac", .data = &sirfsoc_dmadata_a6,},
+	{ .compatible = "sirf,atlas7-dmac", .data = &sirfsoc_dmadata_a7v1,},
+	{ .compatible = "sirf,atlas7-dmac-v2", .data = &sirfsoc_dmadata_a7v2,},
 	{},
 };
+MODULE_DEVICE_TABLE(of, sirfsoc_dma_match);
 
 static struct platform_driver sirfsoc_dma_driver = {
 	.probe		= sirfsoc_dma_probe,
@@ -925,7 +1174,7 @@ static void __exit sirfsoc_dma_exit(void)
 subsys_initcall(sirfsoc_dma_init);
 module_exit(sirfsoc_dma_exit);
 
-MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>, "
-	"Barry Song <baohua.song@csr.com>");
+MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>");
+MODULE_AUTHOR("Barry Song <baohua.song@csr.com>");
 MODULE_DESCRIPTION("SIRFSOC DMA control driver");
 MODULE_LICENSE("GPL v2");
diff --git a/kernel/drivers/dma/ste_dma40.c b/kernel/drivers/dma/ste_dma40.c
index 3c10f034d..dd3e7ba27 100644
--- a/kernel/drivers/dma/ste_dma40.c
+++ b/kernel/drivers/dma/ste_dma40.c
@@ -2853,7 +2853,7 @@ static void d40_ops_init(struct d40_base *base, struct dma_device *dev)
 		 * This controller can only access address at even
 		 * 32bit boundaries, i.e. 2^2
 		 */
-		dev->copy_align = 2;
+		dev->copy_align = DMAENGINE_ALIGN_4_BYTES;
 	}
 
 	if (dma_has_cap(DMA_SG, dev->cap_mask))
@@ -2907,7 +2907,7 @@ static int __init d40_dmaengine_init(struct d40_base *base,
 
 	if (err) {
 		d40_err(base->dev,
-			"Failed to regsiter memcpy only channels\n");
+			"Failed to register memcpy only channels\n");
 		goto failure2;
 	}
 
diff --git a/kernel/drivers/dma/sun4i-dma.c b/kernel/drivers/dma/sun4i-dma.c
new file mode 100644
index 000000000..1661d5182
--- /dev/null
+++ b/kernel/drivers/dma/sun4i-dma.c
@@ -0,0 +1,1288 @@
+/*
+ * Copyright (C) 2014 Emilio López
+ * Emilio López <emilio@elopez.com.ar>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/dmaengine.h>
+#include <linux/dmapool.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "virt-dma.h"
+
+/** Common macros to normal and dedicated DMA registers **/
+
+#define SUN4I_DMA_CFG_LOADING			BIT(31)
+#define SUN4I_DMA_CFG_DST_DATA_WIDTH(width)	((width) << 25)
+#define SUN4I_DMA_CFG_DST_BURST_LENGTH(len)	((len) << 23)
+#define SUN4I_DMA_CFG_DST_ADDR_MODE(mode)	((mode) << 21)
+#define SUN4I_DMA_CFG_DST_DRQ_TYPE(type)	((type) << 16)
+#define SUN4I_DMA_CFG_SRC_DATA_WIDTH(width)	((width) << 9)
+#define SUN4I_DMA_CFG_SRC_BURST_LENGTH(len)	((len) << 7)
+#define SUN4I_DMA_CFG_SRC_ADDR_MODE(mode)	((mode) << 5)
+#define SUN4I_DMA_CFG_SRC_DRQ_TYPE(type)	(type)
+
+/** Normal DMA register values **/
+
+/* Normal DMA source/destination data request type values */
+#define SUN4I_NDMA_DRQ_TYPE_SDRAM		0x16
+#define SUN4I_NDMA_DRQ_TYPE_LIMIT		(0x1F + 1)
+
+/** Normal DMA register layout **/
+
+/* Dedicated DMA source/destination address mode values */
+#define SUN4I_NDMA_ADDR_MODE_LINEAR		0
+#define SUN4I_NDMA_ADDR_MODE_IO			1
+
+/* Normal DMA configuration register layout */
+#define SUN4I_NDMA_CFG_CONT_MODE		BIT(30)
+#define SUN4I_NDMA_CFG_WAIT_STATE(n)		((n) << 27)
+#define SUN4I_NDMA_CFG_DST_NON_SECURE		BIT(22)
+#define SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN	BIT(15)
+#define SUN4I_NDMA_CFG_SRC_NON_SECURE		BIT(6)
+
+/** Dedicated DMA register values **/
+
+/* Dedicated DMA source/destination address mode values */
+#define SUN4I_DDMA_ADDR_MODE_LINEAR		0
+#define SUN4I_DDMA_ADDR_MODE_IO			1
+#define SUN4I_DDMA_ADDR_MODE_HORIZONTAL_PAGE	2
+#define SUN4I_DDMA_ADDR_MODE_VERTICAL_PAGE	3
+
+/* Dedicated DMA source/destination data request type values */
+#define SUN4I_DDMA_DRQ_TYPE_SDRAM		0x1
+#define SUN4I_DDMA_DRQ_TYPE_LIMIT		(0x1F + 1)
+
+/** Dedicated DMA register layout **/
+
+/* Dedicated DMA configuration register layout */
+#define SUN4I_DDMA_CFG_BUSY			BIT(30)
+#define SUN4I_DDMA_CFG_CONT_MODE		BIT(29)
+#define SUN4I_DDMA_CFG_DST_NON_SECURE		BIT(28)
+#define SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN	BIT(15)
+#define SUN4I_DDMA_CFG_SRC_NON_SECURE		BIT(12)
+
+/* Dedicated DMA parameter register layout */
+#define SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(n)	(((n) - 1) << 24)
+#define SUN4I_DDMA_PARA_DST_WAIT_CYCLES(n)	(((n) - 1) << 16)
+#define SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(n)	(((n) - 1) << 8)
+#define SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(n)	(((n) - 1) << 0)
+
+/** DMA register offsets **/
+
+/* General register offsets */
+#define SUN4I_DMA_IRQ_ENABLE_REG		0x0
+#define SUN4I_DMA_IRQ_PENDING_STATUS_REG	0x4
+
+/* Normal DMA register offsets */
+#define SUN4I_NDMA_CHANNEL_REG_BASE(n)		(0x100 + (n) * 0x20)
+#define SUN4I_NDMA_CFG_REG			0x0
+#define SUN4I_NDMA_SRC_ADDR_REG			0x4
+#define SUN4I_NDMA_DST_ADDR_REG		0x8
+#define SUN4I_NDMA_BYTE_COUNT_REG		0xC
+
+/* Dedicated DMA register offsets */
+#define SUN4I_DDMA_CHANNEL_REG_BASE(n)		(0x300 + (n) * 0x20)
+#define SUN4I_DDMA_CFG_REG			0x0
+#define SUN4I_DDMA_SRC_ADDR_REG			0x4
+#define SUN4I_DDMA_DST_ADDR_REG		0x8
+#define SUN4I_DDMA_BYTE_COUNT_REG		0xC
+#define SUN4I_DDMA_PARA_REG			0x18
+
+/** DMA Driver **/
+
+/*
+ * Normal DMA has 8 channels, and Dedicated DMA has another 8, so
+ * that's 16 channels. As for endpoints, there's 29 and 21
+ * respectively. Given that the Normal DMA endpoints (other than
+ * SDRAM) can be used as tx/rx, we need 78 vchans in total
+ */
+#define SUN4I_NDMA_NR_MAX_CHANNELS	8
+#define SUN4I_DDMA_NR_MAX_CHANNELS	8
+#define SUN4I_DMA_NR_MAX_CHANNELS					\
+	(SUN4I_NDMA_NR_MAX_CHANNELS + SUN4I_DDMA_NR_MAX_CHANNELS)
+#define SUN4I_NDMA_NR_MAX_VCHANS	(29 * 2 - 1)
+#define SUN4I_DDMA_NR_MAX_VCHANS	21
+#define SUN4I_DMA_NR_MAX_VCHANS						\
+	(SUN4I_NDMA_NR_MAX_VCHANS + SUN4I_DDMA_NR_MAX_VCHANS)
+
+/* This set of SUN4I_DDMA timing parameters were found experimentally while
+ * working with the SPI driver and seem to make it behave correctly */
+#define SUN4I_DDMA_MAGIC_SPI_PARAMETERS \
+	(SUN4I_DDMA_PARA_DST_DATA_BLK_SIZE(1) |			\
+	 SUN4I_DDMA_PARA_SRC_DATA_BLK_SIZE(1) |				\
+	 SUN4I_DDMA_PARA_DST_WAIT_CYCLES(2) |				\
+	 SUN4I_DDMA_PARA_SRC_WAIT_CYCLES(2))
+
+struct sun4i_dma_pchan {
+	/* Register base of channel */
+	void __iomem			*base;
+	/* vchan currently being serviced */
+	struct sun4i_dma_vchan		*vchan;
+	/* Is this a dedicated pchan? */
+	int				is_dedicated;
+};
+
+struct sun4i_dma_vchan {
+	struct virt_dma_chan		vc;
+	struct dma_slave_config		cfg;
+	struct sun4i_dma_pchan		*pchan;
+	struct sun4i_dma_promise	*processing;
+	struct sun4i_dma_contract	*contract;
+	u8				endpoint;
+	int				is_dedicated;
+};
+
+struct sun4i_dma_promise {
+	u32				cfg;
+	u32				para;
+	dma_addr_t			src;
+	dma_addr_t			dst;
+	size_t				len;
+	struct list_head		list;
+};
+
+/* A contract is a set of promises */
+struct sun4i_dma_contract {
+	struct virt_dma_desc		vd;
+	struct list_head		demands;
+	struct list_head		completed_demands;
+	int				is_cyclic;
+};
+
+struct sun4i_dma_dev {
+	DECLARE_BITMAP(pchans_used, SUN4I_DMA_NR_MAX_CHANNELS);
+	struct dma_device		slave;
+	struct sun4i_dma_pchan		*pchans;
+	struct sun4i_dma_vchan		*vchans;
+	void __iomem			*base;
+	struct clk			*clk;
+	int				irq;
+	spinlock_t			lock;
+};
+
+static struct sun4i_dma_dev *to_sun4i_dma_dev(struct dma_device *dev)
+{
+	return container_of(dev, struct sun4i_dma_dev, slave);
+}
+
+static struct sun4i_dma_vchan *to_sun4i_dma_vchan(struct dma_chan *chan)
+{
+	return container_of(chan, struct sun4i_dma_vchan, vc.chan);
+}
+
+static struct sun4i_dma_contract *to_sun4i_dma_contract(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct sun4i_dma_contract, vd);
+}
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+static int convert_burst(u32 maxburst)
+{
+	if (maxburst > 8)
+		return -EINVAL;
+
+	/* 1 -> 0, 4 -> 1, 8 -> 2 */
+	return (maxburst >> 2);
+}
+
+static int convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+	if (addr_width > DMA_SLAVE_BUSWIDTH_4_BYTES)
+		return -EINVAL;
+
+	/* 8 (1 byte) -> 0, 16 (2 bytes) -> 1, 32 (4 bytes) -> 2 */
+	return (addr_width >> 1);
+}
+
+static void sun4i_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+
+	vchan_free_chan_resources(&vchan->vc);
+}
+
+static struct sun4i_dma_pchan *find_and_use_pchan(struct sun4i_dma_dev *priv,
+						  struct sun4i_dma_vchan *vchan)
+{
+	struct sun4i_dma_pchan *pchan = NULL, *pchans = priv->pchans;
+	unsigned long flags;
+	int i, max;
+
+	/*
+	 * pchans 0-SUN4I_NDMA_NR_MAX_CHANNELS are normal, and
+	 * SUN4I_NDMA_NR_MAX_CHANNELS+ are dedicated ones
+	 */
+	if (vchan->is_dedicated) {
+		i = SUN4I_NDMA_NR_MAX_CHANNELS;
+		max = SUN4I_DMA_NR_MAX_CHANNELS;
+	} else {
+		i = 0;
+		max = SUN4I_NDMA_NR_MAX_CHANNELS;
+	}
+
+	spin_lock_irqsave(&priv->lock, flags);
+	for_each_clear_bit_from(i, &priv->pchans_used, max) {
+		pchan = &pchans[i];
+		pchan->vchan = vchan;
+		set_bit(i, priv->pchans_used);
+		break;
+	}
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return pchan;
+}
+
+static void release_pchan(struct sun4i_dma_dev *priv,
+			  struct sun4i_dma_pchan *pchan)
+{
+	unsigned long flags;
+	int nr = pchan - priv->pchans;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	pchan->vchan = NULL;
+	clear_bit(nr, priv->pchans_used);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static void configure_pchan(struct sun4i_dma_pchan *pchan,
+			    struct sun4i_dma_promise *d)
+{
+	/*
+	 * Configure addresses and misc parameters depending on type
+	 * SUN4I_DDMA has an extra field with timing parameters
+	 */
+	if (pchan->is_dedicated) {
+		writel_relaxed(d->src, pchan->base + SUN4I_DDMA_SRC_ADDR_REG);
+		writel_relaxed(d->dst, pchan->base + SUN4I_DDMA_DST_ADDR_REG);
+		writel_relaxed(d->len, pchan->base + SUN4I_DDMA_BYTE_COUNT_REG);
+		writel_relaxed(d->para, pchan->base + SUN4I_DDMA_PARA_REG);
+		writel_relaxed(d->cfg, pchan->base + SUN4I_DDMA_CFG_REG);
+	} else {
+		writel_relaxed(d->src, pchan->base + SUN4I_NDMA_SRC_ADDR_REG);
+		writel_relaxed(d->dst, pchan->base + SUN4I_NDMA_DST_ADDR_REG);
+		writel_relaxed(d->len, pchan->base + SUN4I_NDMA_BYTE_COUNT_REG);
+		writel_relaxed(d->cfg, pchan->base + SUN4I_NDMA_CFG_REG);
+	}
+}
+
+static void set_pchan_interrupt(struct sun4i_dma_dev *priv,
+				struct sun4i_dma_pchan *pchan,
+				int half, int end)
+{
+	u32 reg;
+	int pchan_number = pchan - priv->pchans;
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->lock, flags);
+
+	reg = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+
+	if (half)
+		reg |= BIT(pchan_number * 2);
+	else
+		reg &= ~BIT(pchan_number * 2);
+
+	if (end)
+		reg |= BIT(pchan_number * 2 + 1);
+	else
+		reg &= ~BIT(pchan_number * 2 + 1);
+
+	writel_relaxed(reg, priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+
+	spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+/**
+ * Execute pending operations on a vchan
+ *
+ * When given a vchan, this function will try to acquire a suitable
+ * pchan and, if successful, will configure it to fulfill a promise
+ * from the next pending contract.
+ *
+ * This function must be called with &vchan->vc.lock held.
+ */
+static int __execute_vchan_pending(struct sun4i_dma_dev *priv,
+				   struct sun4i_dma_vchan *vchan)
+{
+	struct sun4i_dma_promise *promise = NULL;
+	struct sun4i_dma_contract *contract = NULL;
+	struct sun4i_dma_pchan *pchan;
+	struct virt_dma_desc *vd;
+	int ret;
+
+	lockdep_assert_held(&vchan->vc.lock);
+
+	/* We need a pchan to do anything, so secure one if available */
+	pchan = find_and_use_pchan(priv, vchan);
+	if (!pchan)
+		return -EBUSY;
+
+	/*
+	 * Channel endpoints must not be repeated, so if this vchan
+	 * has already submitted some work, we can't do anything else
+	 */
+	if (vchan->processing) {
+		dev_dbg(chan2dev(&vchan->vc.chan),
+			"processing something to this endpoint already\n");
+		ret = -EBUSY;
+		goto release_pchan;
+	}
+
+	do {
+		/* Figure out which contract we're working with today */
+		vd = vchan_next_desc(&vchan->vc);
+		if (!vd) {
+			dev_dbg(chan2dev(&vchan->vc.chan),
+				"No pending contract found");
+			ret = 0;
+			goto release_pchan;
+		}
+
+		contract = to_sun4i_dma_contract(vd);
+		if (list_empty(&contract->demands)) {
+			/* The contract has been completed so mark it as such */
+			list_del(&contract->vd.node);
+			vchan_cookie_complete(&contract->vd);
+			dev_dbg(chan2dev(&vchan->vc.chan),
+				"Empty contract found and marked complete");
+		}
+	} while (list_empty(&contract->demands));
+
+	/* Now find out what we need to do */
+	promise = list_first_entry(&contract->demands,
+				   struct sun4i_dma_promise, list);
+	vchan->processing = promise;
+
+	/* ... and make it reality */
+	if (promise) {
+		vchan->contract = contract;
+		vchan->pchan = pchan;
+		set_pchan_interrupt(priv, pchan, contract->is_cyclic, 1);
+		configure_pchan(pchan, promise);
+	}
+
+	return 0;
+
+release_pchan:
+	release_pchan(priv, pchan);
+	return ret;
+}
+
+static int sanitize_config(struct dma_slave_config *sconfig,
+			   enum dma_transfer_direction direction)
+{
+	switch (direction) {
+	case DMA_MEM_TO_DEV:
+		if ((sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) ||
+		    !sconfig->dst_maxburst)
+			return -EINVAL;
+
+		if (sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			sconfig->src_addr_width = sconfig->dst_addr_width;
+
+		if (!sconfig->src_maxburst)
+			sconfig->src_maxburst = sconfig->dst_maxburst;
+
+		break;
+
+	case DMA_DEV_TO_MEM:
+		if ((sconfig->src_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED) ||
+		    !sconfig->src_maxburst)
+			return -EINVAL;
+
+		if (sconfig->dst_addr_width == DMA_SLAVE_BUSWIDTH_UNDEFINED)
+			sconfig->dst_addr_width = sconfig->src_addr_width;
+
+		if (!sconfig->dst_maxburst)
+			sconfig->dst_maxburst = sconfig->src_maxburst;
+
+		break;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+/**
+ * Generate a promise, to be used in a normal DMA contract.
+ *
+ * A NDMA promise contains all the information required to program the
+ * normal part of the DMA Engine and get data copied. A non-executed
+ * promise will live in the demands list on a contract. Once it has been
+ * completed, it will be moved to the completed demands list for later freeing.
+ * All linked promises will be freed when the corresponding contract is freed
+ */
+static struct sun4i_dma_promise *
+generate_ndma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest,
+		      size_t len, struct dma_slave_config *sconfig,
+		      enum dma_transfer_direction direction)
+{
+	struct sun4i_dma_promise *promise;
+	int ret;
+
+	ret = sanitize_config(sconfig, direction);
+	if (ret)
+		return NULL;
+
+	promise = kzalloc(sizeof(*promise), GFP_NOWAIT);
+	if (!promise)
+		return NULL;
+
+	promise->src = src;
+	promise->dst = dest;
+	promise->len = len;
+	promise->cfg = SUN4I_DMA_CFG_LOADING |
+		SUN4I_NDMA_CFG_BYTE_COUNT_MODE_REMAIN;
+
+	dev_dbg(chan2dev(chan),
+		"src burst %d, dst burst %d, src buswidth %d, dst buswidth %d",
+		sconfig->src_maxburst, sconfig->dst_maxburst,
+		sconfig->src_addr_width, sconfig->dst_addr_width);
+
+	/* Source burst */
+	ret = convert_burst(sconfig->src_maxburst);
+	if (IS_ERR_VALUE(ret))
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret);
+
+	/* Destination burst */
+	ret = convert_burst(sconfig->dst_maxburst);
+	if (IS_ERR_VALUE(ret))
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret);
+
+	/* Source bus width */
+	ret = convert_buswidth(sconfig->src_addr_width);
+	if (IS_ERR_VALUE(ret))
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret);
+
+	/* Destination bus width */
+	ret = convert_buswidth(sconfig->dst_addr_width);
+	if (IS_ERR_VALUE(ret))
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret);
+
+	return promise;
+
+fail:
+	kfree(promise);
+	return NULL;
+}
+
+/**
+ * Generate a promise, to be used in a dedicated DMA contract.
+ *
+ * A DDMA promise contains all the information required to program the
+ * Dedicated part of the DMA Engine and get data copied. A non-executed
+ * promise will live in the demands list on a contract. Once it has been
+ * completed, it will be moved to the completed demands list for later freeing.
+ * All linked promises will be freed when the corresponding contract is freed
+ */
+static struct sun4i_dma_promise *
+generate_ddma_promise(struct dma_chan *chan, dma_addr_t src, dma_addr_t dest,
+		      size_t len, struct dma_slave_config *sconfig)
+{
+	struct sun4i_dma_promise *promise;
+	int ret;
+
+	promise = kzalloc(sizeof(*promise), GFP_NOWAIT);
+	if (!promise)
+		return NULL;
+
+	promise->src = src;
+	promise->dst = dest;
+	promise->len = len;
+	promise->cfg = SUN4I_DMA_CFG_LOADING |
+		SUN4I_DDMA_CFG_BYTE_COUNT_MODE_REMAIN;
+
+	/* Source burst */
+	ret = convert_burst(sconfig->src_maxburst);
+	if (IS_ERR_VALUE(ret))
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_SRC_BURST_LENGTH(ret);
+
+	/* Destination burst */
+	ret = convert_burst(sconfig->dst_maxburst);
+	if (IS_ERR_VALUE(ret))
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_DST_BURST_LENGTH(ret);
+
+	/* Source bus width */
+	ret = convert_buswidth(sconfig->src_addr_width);
+	if (IS_ERR_VALUE(ret))
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_SRC_DATA_WIDTH(ret);
+
+	/* Destination bus width */
+	ret = convert_buswidth(sconfig->dst_addr_width);
+	if (IS_ERR_VALUE(ret))
+		goto fail;
+	promise->cfg |= SUN4I_DMA_CFG_DST_DATA_WIDTH(ret);
+
+	return promise;
+
+fail:
+	kfree(promise);
+	return NULL;
+}
+
+/**
+ * Generate a contract
+ *
+ * Contracts function as DMA descriptors. As our hardware does not support
+ * linked lists, we need to implement SG via software. We use a contract
+ * to hold all the pieces of the request and process them serially one
+ * after another. Each piece is represented as a promise.
+ */
+static struct sun4i_dma_contract *generate_dma_contract(void)
+{
+	struct sun4i_dma_contract *contract;
+
+	contract = kzalloc(sizeof(*contract), GFP_NOWAIT);
+	if (!contract)
+		return NULL;
+
+	INIT_LIST_HEAD(&contract->demands);
+	INIT_LIST_HEAD(&contract->completed_demands);
+
+	return contract;
+}
+
+/**
+ * Get next promise on a cyclic transfer
+ *
+ * Cyclic contracts contain a series of promises which are executed on a
+ * loop. This function returns the next promise from a cyclic contract,
+ * so it can be programmed into the hardware.
+ */
+static struct sun4i_dma_promise *
+get_next_cyclic_promise(struct sun4i_dma_contract *contract)
+{
+	struct sun4i_dma_promise *promise;
+
+	promise = list_first_entry_or_null(&contract->demands,
+					   struct sun4i_dma_promise, list);
+	if (!promise) {
+		list_splice_init(&contract->completed_demands,
+				 &contract->demands);
+		promise = list_first_entry(&contract->demands,
+					   struct sun4i_dma_promise, list);
+	}
+
+	return promise;
+}
+
+/**
+ * Free a contract and all its associated promises
+ */
+static void sun4i_dma_free_contract(struct virt_dma_desc *vd)
+{
+	struct sun4i_dma_contract *contract = to_sun4i_dma_contract(vd);
+	struct sun4i_dma_promise *promise, *tmp;
+
+	/* Free all the demands and completed demands */
+	list_for_each_entry_safe(promise, tmp, &contract->demands, list)
+		kfree(promise);
+
+	list_for_each_entry_safe(promise, tmp, &contract->completed_demands, list)
+		kfree(promise);
+
+	kfree(contract);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest,
+			  dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun4i_dma_promise *promise;
+	struct sun4i_dma_contract *contract;
+
+	contract = generate_dma_contract();
+	if (!contract)
+		return NULL;
+
+	/*
+	 * We can only do the copy to bus aligned addresses, so
+	 * choose the best one so we get decent performance. We also
+	 * maximize the burst size for this same reason.
+	 */
+	sconfig->src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	sconfig->dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+	sconfig->src_maxburst = 8;
+	sconfig->dst_maxburst = 8;
+
+	if (vchan->is_dedicated)
+		promise = generate_ddma_promise(chan, src, dest, len, sconfig);
+	else
+		promise = generate_ndma_promise(chan, src, dest, len, sconfig,
+						DMA_MEM_TO_MEM);
+
+	if (!promise) {
+		kfree(contract);
+		return NULL;
+	}
+
+	/* Configure memcpy mode */
+	if (vchan->is_dedicated) {
+		promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM) |
+				SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_DDMA_DRQ_TYPE_SDRAM);
+	} else {
+		promise->cfg |= SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) |
+				SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM);
+	}
+
+	/* Fill the contract with our only promise */
+	list_add_tail(&promise->list, &contract->demands);
+
+	/* And add it to the vchan */
+	return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf, size_t len,
+			  size_t period_len, enum dma_transfer_direction dir,
+			  unsigned long flags)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun4i_dma_promise *promise;
+	struct sun4i_dma_contract *contract;
+	dma_addr_t src, dest;
+	u32 endpoints;
+	int nr_periods, offset, plength, i;
+
+	if (!is_slave_direction(dir)) {
+		dev_err(chan2dev(chan), "Invalid DMA direction\n");
+		return NULL;
+	}
+
+	if (vchan->is_dedicated) {
+		/*
+		 * As we are using this just for audio data, we need to use
+		 * normal DMA. There is nothing stopping us from supporting
+		 * dedicated DMA here as well, so if a client comes up and
+		 * requires it, it will be simple to implement it.
+		 */
+		dev_err(chan2dev(chan),
+			"Cyclic transfers are only supported on Normal DMA\n");
+		return NULL;
+	}
+
+	contract = generate_dma_contract();
+	if (!contract)
+		return NULL;
+
+	contract->is_cyclic = 1;
+
+	/* Figure out the endpoints and the address we need */
+	if (dir == DMA_MEM_TO_DEV) {
+		src = buf;
+		dest = sconfig->dst_addr;
+		endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM) |
+			    SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_DST_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO);
+	} else {
+		src = sconfig->src_addr;
+		dest = buf;
+		endpoints = SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_SRC_ADDR_MODE(SUN4I_NDMA_ADDR_MODE_IO) |
+			    SUN4I_DMA_CFG_DST_DRQ_TYPE(SUN4I_NDMA_DRQ_TYPE_SDRAM);
+	}
+
+	/*
+	 * We will be using half done interrupts to make two periods
+	 * out of a promise, so we need to program the DMA engine less
+	 * often
+	 */
+
+	/*
+	 * The engine can interrupt on half-transfer, so we can use
+	 * this feature to program the engine half as often as if we
+	 * didn't use it (keep in mind the hardware doesn't support
+	 * linked lists).
+	 *
+	 * Say you have a set of periods (| marks the start/end, I for
+	 * interrupt, P for programming the engine to do a new
+	 * transfer), the easy but slow way would be to do
+	 *
+	 *  |---|---|---|---| (periods / promises)
+	 *  P  I,P I,P I,P  I
+	 *
+	 * Using half transfer interrupts you can do
+	 *
+	 *  |-------|-------| (promises as configured on hw)
+	 *  |---|---|---|---| (periods)
+	 *  P   I  I,P  I   I
+	 *
+	 * Which requires half the engine programming for the same
+	 * functionality.
+	 */
+	nr_periods = DIV_ROUND_UP(len / period_len, 2);
+	for (i = 0; i < nr_periods; i++) {
+		/* Calculate the offset in the buffer and the length needed */
+		offset = i * period_len * 2;
+		plength = min((len - offset), (period_len * 2));
+		if (dir == DMA_MEM_TO_DEV)
+			src = buf + offset;
+		else
+			dest = buf + offset;
+
+		/* Make the promise */
+		promise = generate_ndma_promise(chan, src, dest,
+						plength, sconfig, dir);
+		if (!promise) {
+			/* TODO: should we free everything? */
+			return NULL;
+		}
+		promise->cfg |= endpoints;
+
+		/* Then add it to the contract */
+		list_add_tail(&promise->list, &contract->demands);
+	}
+
+	/* And add it to the vchan */
+	return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+sun4i_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+			unsigned int sg_len, enum dma_transfer_direction dir,
+			unsigned long flags, void *context)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct dma_slave_config *sconfig = &vchan->cfg;
+	struct sun4i_dma_promise *promise;
+	struct sun4i_dma_contract *contract;
+	u8 ram_type, io_mode, linear_mode;
+	struct scatterlist *sg;
+	dma_addr_t srcaddr, dstaddr;
+	u32 endpoints, para;
+	int i;
+
+	if (!sgl)
+		return NULL;
+
+	if (!is_slave_direction(dir)) {
+		dev_err(chan2dev(chan), "Invalid DMA direction\n");
+		return NULL;
+	}
+
+	contract = generate_dma_contract();
+	if (!contract)
+		return NULL;
+
+	if (vchan->is_dedicated) {
+		io_mode = SUN4I_DDMA_ADDR_MODE_IO;
+		linear_mode = SUN4I_DDMA_ADDR_MODE_LINEAR;
+		ram_type = SUN4I_DDMA_DRQ_TYPE_SDRAM;
+	} else {
+		io_mode = SUN4I_NDMA_ADDR_MODE_IO;
+		linear_mode = SUN4I_NDMA_ADDR_MODE_LINEAR;
+		ram_type = SUN4I_NDMA_DRQ_TYPE_SDRAM;
+	}
+
+	if (dir == DMA_MEM_TO_DEV)
+		endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_DST_ADDR_MODE(io_mode) |
+			    SUN4I_DMA_CFG_SRC_DRQ_TYPE(ram_type) |
+			    SUN4I_DMA_CFG_SRC_ADDR_MODE(linear_mode);
+	else
+		endpoints = SUN4I_DMA_CFG_DST_DRQ_TYPE(ram_type) |
+			    SUN4I_DMA_CFG_DST_ADDR_MODE(linear_mode) |
+			    SUN4I_DMA_CFG_SRC_DRQ_TYPE(vchan->endpoint) |
+			    SUN4I_DMA_CFG_SRC_ADDR_MODE(io_mode);
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		/* Figure out addresses */
+		if (dir == DMA_MEM_TO_DEV) {
+			srcaddr = sg_dma_address(sg);
+			dstaddr = sconfig->dst_addr;
+		} else {
+			srcaddr = sconfig->src_addr;
+			dstaddr = sg_dma_address(sg);
+		}
+
+		/*
+		 * These are the magic DMA engine timings that keep SPI going.
+		 * I haven't seen any interface on DMAEngine to configure
+		 * timings, and so far they seem to work for everything we
+		 * support, so I've kept them here. I don't know if other
+		 * devices need different timings because, as usual, we only
+		 * have the "para" bitfield meanings, but no comment on what
+		 * the values should be when doing a certain operation :|
+		 */
+		para = SUN4I_DDMA_MAGIC_SPI_PARAMETERS;
+
+		/* And make a suitable promise */
+		if (vchan->is_dedicated)
+			promise = generate_ddma_promise(chan, srcaddr, dstaddr,
+							sg_dma_len(sg),
+							sconfig);
+		else
+			promise = generate_ndma_promise(chan, srcaddr, dstaddr,
+							sg_dma_len(sg),
+							sconfig, dir);
+
+		if (!promise)
+			return NULL; /* TODO: should we free everything? */
+
+		promise->cfg |= endpoints;
+		promise->para = para;
+
+		/* Then add it to the contract */
+		list_add_tail(&promise->list, &contract->demands);
+	}
+
+	/*
+	 * Once we've got all the promises ready, add the contract
+	 * to the pending list on the vchan
+	 */
+	return vchan_tx_prep(&vchan->vc, &contract->vd, flags);
+}
+
+static int sun4i_dma_terminate_all(struct dma_chan *chan)
+{
+	struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device);
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct sun4i_dma_pchan *pchan = vchan->pchan;
+	LIST_HEAD(head);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+	vchan_get_all_descriptors(&vchan->vc, &head);
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	/*
+	 * Clearing the configuration register will halt the pchan. Interrupts
+	 * may still trigger, so don't forget to disable them.
+	 */
+	if (pchan) {
+		if (pchan->is_dedicated)
+			writel(0, pchan->base + SUN4I_DDMA_CFG_REG);
+		else
+			writel(0, pchan->base + SUN4I_NDMA_CFG_REG);
+		set_pchan_interrupt(priv, pchan, 0, 0);
+		release_pchan(priv, pchan);
+	}
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+	vchan_dma_desc_free_list(&vchan->vc, &head);
+	/* Clear these so the vchan is usable again */
+	vchan->processing = NULL;
+	vchan->pchan = NULL;
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	return 0;
+}
+
+static int sun4i_dma_config(struct dma_chan *chan,
+			    struct dma_slave_config *config)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+
+	memcpy(&vchan->cfg, config, sizeof(*config));
+
+	return 0;
+}
+
+static struct dma_chan *sun4i_dma_of_xlate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct sun4i_dma_dev *priv = ofdma->of_dma_data;
+	struct sun4i_dma_vchan *vchan;
+	struct dma_chan *chan;
+	u8 is_dedicated = dma_spec->args[0];
+	u8 endpoint = dma_spec->args[1];
+
+	/* Check if type is Normal or Dedicated */
+	if (is_dedicated != 0 && is_dedicated != 1)
+		return NULL;
+
+	/* Make sure the endpoint looks sane */
+	if ((is_dedicated && endpoint >= SUN4I_DDMA_DRQ_TYPE_LIMIT) ||
+	    (!is_dedicated && endpoint >= SUN4I_NDMA_DRQ_TYPE_LIMIT))
+		return NULL;
+
+	chan = dma_get_any_slave_channel(&priv->slave);
+	if (!chan)
+		return NULL;
+
+	/* Assign the endpoint to the vchan */
+	vchan = to_sun4i_dma_vchan(chan);
+	vchan->is_dedicated = is_dedicated;
+	vchan->endpoint = endpoint;
+
+	return chan;
+}
+
+static enum dma_status sun4i_dma_tx_status(struct dma_chan *chan,
+					   dma_cookie_t cookie,
+					   struct dma_tx_state *state)
+{
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	struct sun4i_dma_pchan *pchan = vchan->pchan;
+	struct sun4i_dma_contract *contract;
+	struct sun4i_dma_promise *promise;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(chan, cookie, state);
+	if (!state || (ret == DMA_COMPLETE))
+		return ret;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+	vd = vchan_find_desc(&vchan->vc, cookie);
+	if (!vd)
+		goto exit;
+	contract = to_sun4i_dma_contract(vd);
+
+	list_for_each_entry(promise, &contract->demands, list)
+		bytes += promise->len;
+
+	/*
+	 * The hardware is configured to return the remaining byte
+	 * quantity. If possible, replace the first listed element's
+	 * full size with the actual remaining amount
+	 */
+	promise = list_first_entry_or_null(&contract->demands,
+					   struct sun4i_dma_promise, list);
+	if (promise && pchan) {
+		bytes -= promise->len;
+		if (pchan->is_dedicated)
+			bytes += readl(pchan->base + SUN4I_DDMA_BYTE_COUNT_REG);
+		else
+			bytes += readl(pchan->base + SUN4I_NDMA_BYTE_COUNT_REG);
+	}
+
+exit:
+
+	dma_set_residue(state, bytes);
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+
+	return ret;
+}
+
+static void sun4i_dma_issue_pending(struct dma_chan *chan)
+{
+	struct sun4i_dma_dev *priv = to_sun4i_dma_dev(chan->device);
+	struct sun4i_dma_vchan *vchan = to_sun4i_dma_vchan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&vchan->vc.lock, flags);
+
+	/*
+	 * If there are pending transactions for this vchan, push one of
+	 * them into the engine to get the ball rolling.
+	 */
+	if (vchan_issue_pending(&vchan->vc))
+		__execute_vchan_pending(priv, vchan);
+
+	spin_unlock_irqrestore(&vchan->vc.lock, flags);
+}
+
+static irqreturn_t sun4i_dma_interrupt(int irq, void *dev_id)
+{
+	struct sun4i_dma_dev *priv = dev_id;
+	struct sun4i_dma_pchan *pchans = priv->pchans, *pchan;
+	struct sun4i_dma_vchan *vchan;
+	struct sun4i_dma_contract *contract;
+	struct sun4i_dma_promise *promise;
+	unsigned long pendirq, irqs, disableirqs;
+	int bit, i, free_room, allow_mitigation = 1;
+
+	pendirq = readl_relaxed(priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+handle_pending:
+
+	disableirqs = 0;
+	free_room = 0;
+
+	for_each_set_bit(bit, &pendirq, 32) {
+		pchan = &pchans[bit >> 1];
+		vchan = pchan->vchan;
+		if (!vchan) /* a terminated channel may still interrupt */
+			continue;
+		contract = vchan->contract;
+
+		/*
+		 * Disable the IRQ and free the pchan if it's an end
+		 * interrupt (odd bit)
+		 */
+		if (bit & 1) {
+			spin_lock(&vchan->vc.lock);
+
+			/*
+			 * Move the promise into the completed list now that
+			 * we're done with it
+			 */
+			list_del(&vchan->processing->list);
+			list_add_tail(&vchan->processing->list,
+				      &contract->completed_demands);
+
+			/*
+			 * Cyclic DMA transfers are special:
+			 * - There's always something we can dispatch
+			 * - We need to run the callback
+			 * - Latency is very important, as this is used by audio
+			 * We therefore just cycle through the list and dispatch
+			 * whatever we have here, reusing the pchan. There's
+			 * no need to run the thread after this.
+			 *
+			 * For non-cyclic transfers we need to look around,
+			 * so we can program some more work, or notify the
+			 * client that their transfers have been completed.
+			 */
+			if (contract->is_cyclic) {
+				promise = get_next_cyclic_promise(contract);
+				vchan->processing = promise;
+				configure_pchan(pchan, promise);
+				vchan_cyclic_callback(&contract->vd);
+			} else {
+				vchan->processing = NULL;
+				vchan->pchan = NULL;
+
+				free_room = 1;
+				disableirqs |= BIT(bit);
+				release_pchan(priv, pchan);
+			}
+
+			spin_unlock(&vchan->vc.lock);
+		} else {
+			/* Half done interrupt */
+			if (contract->is_cyclic)
+				vchan_cyclic_callback(&contract->vd);
+			else
+				disableirqs |= BIT(bit);
+		}
+	}
+
+	/* Disable the IRQs for events we handled */
+	spin_lock(&priv->lock);
+	irqs = readl_relaxed(priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+	writel_relaxed(irqs & ~disableirqs,
+		       priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+	spin_unlock(&priv->lock);
+
+	/* Writing 1 to the pending field will clear the pending interrupt */
+	writel_relaxed(pendirq, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+	/*
+	 * If a pchan was freed, we may be able to schedule something else,
+	 * so have a look around
+	 */
+	if (free_room) {
+		for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) {
+			vchan = &priv->vchans[i];
+			spin_lock(&vchan->vc.lock);
+			__execute_vchan_pending(priv, vchan);
+			spin_unlock(&vchan->vc.lock);
+		}
+	}
+
+	/*
+	 * Handle newer interrupts if some showed up, but only do it once
+	 * to avoid a too long a loop
+	 */
+	if (allow_mitigation) {
+		pendirq = readl_relaxed(priv->base +
+					SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+		if (pendirq) {
+			allow_mitigation = 0;
+			goto handle_pending;
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int sun4i_dma_probe(struct platform_device *pdev)
+{
+	struct sun4i_dma_dev *priv;
+	struct resource *res;
+	int i, j, ret;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	priv->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(priv->base))
+		return PTR_ERR(priv->base);
+
+	priv->irq = platform_get_irq(pdev, 0);
+	if (priv->irq < 0) {
+		dev_err(&pdev->dev, "Cannot claim IRQ\n");
+		return priv->irq;
+	}
+
+	priv->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(priv->clk)) {
+		dev_err(&pdev->dev, "No clock specified\n");
+		return PTR_ERR(priv->clk);
+	}
+
+	platform_set_drvdata(pdev, priv);
+	spin_lock_init(&priv->lock);
+
+	dma_cap_zero(priv->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, priv->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, priv->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, priv->slave.cap_mask);
+	dma_cap_set(DMA_SLAVE, priv->slave.cap_mask);
+
+	INIT_LIST_HEAD(&priv->slave.channels);
+	priv->slave.device_free_chan_resources	= sun4i_dma_free_chan_resources;
+	priv->slave.device_tx_status		= sun4i_dma_tx_status;
+	priv->slave.device_issue_pending	= sun4i_dma_issue_pending;
+	priv->slave.device_prep_slave_sg	= sun4i_dma_prep_slave_sg;
+	priv->slave.device_prep_dma_memcpy	= sun4i_dma_prep_dma_memcpy;
+	priv->slave.device_prep_dma_cyclic	= sun4i_dma_prep_dma_cyclic;
+	priv->slave.device_config		= sun4i_dma_config;
+	priv->slave.device_terminate_all	= sun4i_dma_terminate_all;
+	priv->slave.copy_align			= 2;
+	priv->slave.src_addr_widths		= BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+						  BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+						  BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	priv->slave.dst_addr_widths		= BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |
+						  BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) |
+						  BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+	priv->slave.directions			= BIT(DMA_DEV_TO_MEM) |
+						  BIT(DMA_MEM_TO_DEV);
+	priv->slave.residue_granularity		= DMA_RESIDUE_GRANULARITY_BURST;
+
+	priv->slave.dev = &pdev->dev;
+
+	priv->pchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_CHANNELS,
+				    sizeof(struct sun4i_dma_pchan), GFP_KERNEL);
+	priv->vchans = devm_kcalloc(&pdev->dev, SUN4I_DMA_NR_MAX_VCHANS,
+				    sizeof(struct sun4i_dma_vchan), GFP_KERNEL);
+	if (!priv->vchans || !priv->pchans)
+		return -ENOMEM;
+
+	/*
+	 * [0..SUN4I_NDMA_NR_MAX_CHANNELS) are normal pchans, and
+	 * [SUN4I_NDMA_NR_MAX_CHANNELS..SUN4I_DMA_NR_MAX_CHANNELS) are
+	 * dedicated ones
+	 */
+	for (i = 0; i < SUN4I_NDMA_NR_MAX_CHANNELS; i++)
+		priv->pchans[i].base = priv->base +
+			SUN4I_NDMA_CHANNEL_REG_BASE(i);
+
+	for (j = 0; i < SUN4I_DMA_NR_MAX_CHANNELS; i++, j++) {
+		priv->pchans[i].base = priv->base +
+			SUN4I_DDMA_CHANNEL_REG_BASE(j);
+		priv->pchans[i].is_dedicated = 1;
+	}
+
+	for (i = 0; i < SUN4I_DMA_NR_MAX_VCHANS; i++) {
+		struct sun4i_dma_vchan *vchan = &priv->vchans[i];
+
+		spin_lock_init(&vchan->vc.lock);
+		vchan->vc.desc_free = sun4i_dma_free_contract;
+		vchan_init(&vchan->vc, &priv->slave);
+	}
+
+	ret = clk_prepare_enable(priv->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Couldn't enable the clock\n");
+		return ret;
+	}
+
+	/*
+	 * Make sure the IRQs are all disabled and accounted for. The bootloader
+	 * likes to leave these dirty
+	 */
+	writel(0, priv->base + SUN4I_DMA_IRQ_ENABLE_REG);
+	writel(0xFFFFFFFF, priv->base + SUN4I_DMA_IRQ_PENDING_STATUS_REG);
+
+	ret = devm_request_irq(&pdev->dev, priv->irq, sun4i_dma_interrupt,
+			       0, dev_name(&pdev->dev), priv);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot request IRQ\n");
+		goto err_clk_disable;
+	}
+
+	ret = dma_async_device_register(&priv->slave);
+	if (ret) {
+		dev_warn(&pdev->dev, "Failed to register DMA engine device\n");
+		goto err_clk_disable;
+	}
+
+	ret = of_dma_controller_register(pdev->dev.of_node, sun4i_dma_of_xlate,
+					 priv);
+	if (ret) {
+		dev_err(&pdev->dev, "of_dma_controller_register failed\n");
+		goto err_dma_unregister;
+	}
+
+	dev_dbg(&pdev->dev, "Successfully probed SUN4I_DMA\n");
+
+	return 0;
+
+err_dma_unregister:
+	dma_async_device_unregister(&priv->slave);
+err_clk_disable:
+	clk_disable_unprepare(priv->clk);
+	return ret;
+}
+
+static int sun4i_dma_remove(struct platform_device *pdev)
+{
+	struct sun4i_dma_dev *priv = platform_get_drvdata(pdev);
+
+	/* Disable IRQ so no more work is scheduled */
+	disable_irq(priv->irq);
+
+	of_dma_controller_free(pdev->dev.of_node);
+	dma_async_device_unregister(&priv->slave);
+
+	clk_disable_unprepare(priv->clk);
+
+	return 0;
+}
+
+static const struct of_device_id sun4i_dma_match[] = {
+	{ .compatible = "allwinner,sun4i-a10-dma" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver sun4i_dma_driver = {
+	.probe	= sun4i_dma_probe,
+	.remove	= sun4i_dma_remove,
+	.driver	= {
+		.name		= "sun4i-dma",
+		.of_match_table	= sun4i_dma_match,
+	},
+};
+
+module_platform_driver(sun4i_dma_driver);
+
+MODULE_DESCRIPTION("Allwinner A10 Dedicated DMA Controller Driver");
+MODULE_AUTHOR("Emilio López <emilio@elopez.com.ar>");
+MODULE_LICENSE("GPL");
diff --git a/kernel/drivers/dma/sun6i-dma.c b/kernel/drivers/dma/sun6i-dma.c
index 11e536586..2db12e493 100644
--- a/kernel/drivers/dma/sun6i-dma.c
+++ b/kernel/drivers/dma/sun6i-dma.c
@@ -891,11 +891,24 @@ static struct sun6i_dma_config sun8i_a23_dma_cfg = {
 	.nr_max_vchans   = 37,
 };
 
+/*
+ * The H3 has 12 physical channels, a maximum DRQ port id of 27,
+ * and a total of 34 usable source and destination endpoints.
+ */
+
+static struct sun6i_dma_config sun8i_h3_dma_cfg = {
+	.nr_max_channels = 12,
+	.nr_max_requests = 27,
+	.nr_max_vchans   = 34,
+};
+
 static const struct of_device_id sun6i_dma_match[] = {
 	{ .compatible = "allwinner,sun6i-a31-dma", .data = &sun6i_a31_dma_cfg },
 	{ .compatible = "allwinner,sun8i-a23-dma", .data = &sun8i_a23_dma_cfg },
+	{ .compatible = "allwinner,sun8i-h3-dma", .data = &sun8i_h3_dma_cfg },
 	{ /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, sun6i_dma_match);
 
 static int sun6i_dma_probe(struct platform_device *pdev)
 {
@@ -957,7 +970,7 @@ static int sun6i_dma_probe(struct platform_device *pdev)
 	sdc->slave.device_issue_pending		= sun6i_dma_issue_pending;
 	sdc->slave.device_prep_slave_sg		= sun6i_dma_prep_slave_sg;
 	sdc->slave.device_prep_dma_memcpy	= sun6i_dma_prep_dma_memcpy;
-	sdc->slave.copy_align			= 4;
+	sdc->slave.copy_align			= DMAENGINE_ALIGN_4_BYTES;
 	sdc->slave.device_config		= sun6i_dma_config;
 	sdc->slave.device_pause			= sun6i_dma_pause;
 	sdc->slave.device_resume		= sun6i_dma_resume;
diff --git a/kernel/drivers/dma/tegra20-apb-dma.c b/kernel/drivers/dma/tegra20-apb-dma.c
index eaf585e82..c8f79dcaa 100644
--- a/kernel/drivers/dma/tegra20-apb-dma.c
+++ b/kernel/drivers/dma/tegra20-apb-dma.c
@@ -155,7 +155,6 @@ struct tegra_dma_sg_req {
 	int				req_len;
 	bool				configured;
 	bool				last_sg;
-	bool				half_done;
 	struct list_head		node;
 	struct tegra_dma_desc		*dma_desc;
 };
@@ -188,7 +187,7 @@ struct tegra_dma_channel {
 	bool			config_init;
 	int			id;
 	int			irq;
-	unsigned long		chan_base_offset;
+	void __iomem		*chan_addr;
 	spinlock_t		lock;
 	bool			busy;
 	struct tegra_dma	*tdma;
@@ -203,8 +202,6 @@ struct tegra_dma_channel {
 	/* ISR handler and tasklet for bottom half of isr handling */
 	dma_isr_handler		isr_handler;
 	struct tasklet_struct	tasklet;
-	dma_async_tx_callback	callback;
-	void			*callback_param;
 
 	/* Channel-slave specific configuration */
 	unsigned int slave_id;
@@ -222,6 +219,13 @@ struct tegra_dma {
 	void __iomem			*base_addr;
 	const struct tegra_dma_chip_data *chip_data;
 
+	/*
+	 * Counter for managing global pausing of the DMA controller.
+	 * Only applicable for devices that don't support individual
+	 * channel pausing.
+	 */
+	u32				global_pause_count;
+
 	/* Some register need to be cache before suspend */
 	u32				reg_gen;
 
@@ -242,12 +246,12 @@ static inline u32 tdma_read(struct tegra_dma *tdma, u32 reg)
 static inline void tdc_write(struct tegra_dma_channel *tdc,
 		u32 reg, u32 val)
 {
-	writel(val, tdc->tdma->base_addr + tdc->chan_base_offset + reg);
+	writel(val, tdc->chan_addr + reg);
 }
 
 static inline u32 tdc_read(struct tegra_dma_channel *tdc, u32 reg)
 {
-	return readl(tdc->tdma->base_addr + tdc->chan_base_offset + reg);
+	return readl(tdc->chan_addr + reg);
 }
 
 static inline struct tegra_dma_channel *to_tegra_dma_chan(struct dma_chan *dc)
@@ -361,16 +365,32 @@ static void tegra_dma_global_pause(struct tegra_dma_channel *tdc,
 	struct tegra_dma *tdma = tdc->tdma;
 
 	spin_lock(&tdma->global_lock);
-	tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0);
-	if (wait_for_burst_complete)
-		udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+
+	if (tdc->tdma->global_pause_count == 0) {
+		tdma_write(tdma, TEGRA_APBDMA_GENERAL, 0);
+		if (wait_for_burst_complete)
+			udelay(TEGRA_APBDMA_BURST_COMPLETE_TIME);
+	}
+
+	tdc->tdma->global_pause_count++;
+
+	spin_unlock(&tdma->global_lock);
 }
 
 static void tegra_dma_global_resume(struct tegra_dma_channel *tdc)
 {
 	struct tegra_dma *tdma = tdc->tdma;
 
-	tdma_write(tdma, TEGRA_APBDMA_GENERAL, TEGRA_APBDMA_GENERAL_ENABLE);
+	spin_lock(&tdma->global_lock);
+
+	if (WARN_ON(tdc->tdma->global_pause_count == 0))
+		goto out;
+
+	if (--tdc->tdma->global_pause_count == 0)
+		tdma_write(tdma, TEGRA_APBDMA_GENERAL,
+			   TEGRA_APBDMA_GENERAL_ENABLE);
+
+out:
 	spin_unlock(&tdma->global_lock);
 }
 
@@ -601,7 +621,6 @@ static void handle_once_dma_done(struct tegra_dma_channel *tdc,
 		return;
 
 	tdc_start_head_req(tdc);
-	return;
 }
 
 static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
@@ -628,7 +647,6 @@ static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
 		if (!st)
 			dma_desc->dma_status = DMA_ERROR;
 	}
-	return;
 }
 
 static void tegra_dma_tasklet(unsigned long data)
@@ -720,7 +738,6 @@ static void tegra_dma_issue_pending(struct dma_chan *dc)
 	}
 end:
 	spin_unlock_irqrestore(&tdc->lock, flags);
-	return;
 }
 
 static int tegra_dma_terminate_all(struct dma_chan *dc)
@@ -932,7 +949,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
 	struct tegra_dma_sg_req  *sg_req = NULL;
 	u32 burst_size;
 	enum dma_slave_buswidth slave_bw;
-	int ret;
 
 	if (!tdc->config_init) {
 		dev_err(tdc2dev(tdc), "dma channel is not configured\n");
@@ -943,9 +959,8 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
 		return NULL;
 	}
 
-	ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
-				&burst_size, &slave_bw);
-	if (ret < 0)
+	if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
+				&burst_size, &slave_bw) < 0)
 		return NULL;
 
 	INIT_LIST_HEAD(&req_list);
@@ -1048,7 +1063,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 	dma_addr_t mem = buf_addr;
 	u32 burst_size;
 	enum dma_slave_buswidth slave_bw;
-	int ret;
 
 	if (!buf_len || !period_len) {
 		dev_err(tdc2dev(tdc), "Invalid buffer/period len\n");
@@ -1087,12 +1101,10 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 		return NULL;
 	}
 
-	ret = get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
-				&burst_size, &slave_bw);
-	if (ret < 0)
+	if (get_transfer_param(tdc, direction, &apb_ptr, &apb_seq, &csr,
+				&burst_size, &slave_bw) < 0)
 		return NULL;
 
-
 	ahb_seq = TEGRA_APBDMA_AHBSEQ_INTR_ENB;
 	ahb_seq |= TEGRA_APBDMA_AHBSEQ_WRAP_NONE <<
 					TEGRA_APBDMA_AHBSEQ_WRAP_SHIFT;
@@ -1136,7 +1148,6 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 		sg_req->ch_regs.apb_seq = apb_seq;
 		sg_req->ch_regs.ahb_seq = ahb_seq;
 		sg_req->configured = false;
-		sg_req->half_done = false;
 		sg_req->last_sg = false;
 		sg_req->dma_desc = dma_desc;
 		sg_req->req_len = len;
@@ -1377,8 +1388,9 @@ static int tegra_dma_probe(struct platform_device *pdev)
 	for (i = 0; i < cdata->nr_channels; i++) {
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
 
-		tdc->chan_base_offset = TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET +
-					i * cdata->channel_reg_size;
+		tdc->chan_addr = tdma->base_addr +
+				 TEGRA_APBDMA_CHANNEL_BASE_ADD_OFFSET +
+				 (i * cdata->channel_reg_size);
 
 		res = platform_get_resource(pdev, IORESOURCE_IRQ, i);
 		if (!res) {
@@ -1418,6 +1430,7 @@ static int tegra_dma_probe(struct platform_device *pdev)
 	dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask);
 	dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask);
 
+	tdma->global_pause_count = 0;
 	tdma->dma_dev.dev = &pdev->dev;
 	tdma->dma_dev.device_alloc_chan_resources =
 					tegra_dma_alloc_chan_resources;
diff --git a/kernel/drivers/dma/ti-dma-crossbar.c b/kernel/drivers/dma/ti-dma-crossbar.c
new file mode 100644
index 000000000..a415edbe6
--- /dev/null
+++ b/kernel/drivers/dma/ti-dma-crossbar.c
@@ -0,0 +1,400 @@
+/*
+ *  Copyright (C) 2015 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/io.h>
+#include <linux/idr.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/of_dma.h>
+
+#define TI_XBAR_DRA7		0
+#define TI_XBAR_AM335X		1
+
+static const struct of_device_id ti_dma_xbar_match[] = {
+	{
+		.compatible = "ti,dra7-dma-crossbar",
+		.data = (void *)TI_XBAR_DRA7,
+	},
+	{
+		.compatible = "ti,am335x-edma-crossbar",
+		.data = (void *)TI_XBAR_AM335X,
+	},
+	{},
+};
+
+/* Crossbar on AM335x/AM437x family */
+#define TI_AM335X_XBAR_LINES	64
+
+struct ti_am335x_xbar_data {
+	void __iomem *iomem;
+
+	struct dma_router dmarouter;
+
+	u32 xbar_events; /* maximum number of events to select in xbar */
+	u32 dma_requests; /* number of DMA requests on eDMA */
+};
+
+struct ti_am335x_xbar_map {
+	u16 dma_line;
+	u16 mux_val;
+};
+
+static inline void ti_am335x_xbar_write(void __iomem *iomem, int event, u16 val)
+{
+	writeb_relaxed(val & 0x1f, iomem + event);
+}
+
+static void ti_am335x_xbar_free(struct device *dev, void *route_data)
+{
+	struct ti_am335x_xbar_data *xbar = dev_get_drvdata(dev);
+	struct ti_am335x_xbar_map *map = route_data;
+
+	dev_dbg(dev, "Unmapping XBAR event %u on channel %u\n",
+		map->mux_val, map->dma_line);
+
+	ti_am335x_xbar_write(xbar->iomem, map->dma_line, 0);
+	kfree(map);
+}
+
+static void *ti_am335x_xbar_route_allocate(struct of_phandle_args *dma_spec,
+					   struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct ti_am335x_xbar_data *xbar = platform_get_drvdata(pdev);
+	struct ti_am335x_xbar_map *map;
+
+	if (dma_spec->args_count != 3)
+		return ERR_PTR(-EINVAL);
+
+	if (dma_spec->args[2] >= xbar->xbar_events) {
+		dev_err(&pdev->dev, "Invalid XBAR event number: %d\n",
+			dma_spec->args[2]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (dma_spec->args[0] >= xbar->dma_requests) {
+		dev_err(&pdev->dev, "Invalid DMA request line number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The of_node_put() will be done in the core for the node */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "Can't get DMA master\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map) {
+		of_node_put(dma_spec->np);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	map->dma_line = (u16)dma_spec->args[0];
+	map->mux_val = (u16)dma_spec->args[2];
+
+	dma_spec->args[2] = 0;
+	dma_spec->args_count = 2;
+
+	dev_dbg(&pdev->dev, "Mapping XBAR event%u to DMA%u\n",
+		map->mux_val, map->dma_line);
+
+	ti_am335x_xbar_write(xbar->iomem, map->dma_line, map->mux_val);
+
+	return map;
+}
+
+static const struct of_device_id ti_am335x_master_match[] = {
+	{ .compatible = "ti,edma3-tpcc", },
+	{},
+};
+
+static int ti_am335x_xbar_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	const struct of_device_id *match;
+	struct device_node *dma_node;
+	struct ti_am335x_xbar_data *xbar;
+	struct resource *res;
+	void __iomem *iomem;
+	int i, ret;
+
+	if (!node)
+		return -ENODEV;
+
+	xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL);
+	if (!xbar)
+		return -ENOMEM;
+
+	dma_node = of_parse_phandle(node, "dma-masters", 0);
+	if (!dma_node) {
+		dev_err(&pdev->dev, "Can't get DMA master node\n");
+		return -ENODEV;
+	}
+
+	match = of_match_node(ti_am335x_master_match, dma_node);
+	if (!match) {
+		dev_err(&pdev->dev, "DMA master is not supported\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(dma_node, "dma-requests",
+				 &xbar->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR output information, using %u.\n",
+			 TI_AM335X_XBAR_LINES);
+		xbar->dma_requests = TI_AM335X_XBAR_LINES;
+	}
+	of_node_put(dma_node);
+
+	if (of_property_read_u32(node, "dma-requests", &xbar->xbar_events)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR input information, using %u.\n",
+			 TI_AM335X_XBAR_LINES);
+		xbar->xbar_events = TI_AM335X_XBAR_LINES;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iomem = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(iomem))
+		return PTR_ERR(iomem);
+
+	xbar->iomem = iomem;
+
+	xbar->dmarouter.dev = &pdev->dev;
+	xbar->dmarouter.route_free = ti_am335x_xbar_free;
+
+	platform_set_drvdata(pdev, xbar);
+
+	/* Reset the crossbar */
+	for (i = 0; i < xbar->dma_requests; i++)
+		ti_am335x_xbar_write(xbar->iomem, i, 0);
+
+	ret = of_dma_router_register(node, ti_am335x_xbar_route_allocate,
+				     &xbar->dmarouter);
+
+	return ret;
+}
+
+/* Crossbar on DRA7xx family */
+#define TI_DRA7_XBAR_OUTPUTS	127
+#define TI_DRA7_XBAR_INPUTS	256
+
+#define TI_XBAR_EDMA_OFFSET	0
+#define TI_XBAR_SDMA_OFFSET	1
+
+struct ti_dra7_xbar_data {
+	void __iomem *iomem;
+
+	struct dma_router dmarouter;
+	struct idr map_idr;
+
+	u16 safe_val; /* Value to rest the crossbar lines */
+	u32 xbar_requests; /* number of DMA requests connected to XBAR */
+	u32 dma_requests; /* number of DMA requests forwarded to DMA */
+	u32 dma_offset;
+};
+
+struct ti_dra7_xbar_map {
+	u16 xbar_in;
+	int xbar_out;
+};
+
+static inline void ti_dra7_xbar_write(void __iomem *iomem, int xbar, u16 val)
+{
+	writew_relaxed(val, iomem + (xbar * 2));
+}
+
+static void ti_dra7_xbar_free(struct device *dev, void *route_data)
+{
+	struct ti_dra7_xbar_data *xbar = dev_get_drvdata(dev);
+	struct ti_dra7_xbar_map *map = route_data;
+
+	dev_dbg(dev, "Unmapping XBAR%u (was routed to %d)\n",
+		map->xbar_in, map->xbar_out);
+
+	ti_dra7_xbar_write(xbar->iomem, map->xbar_out, xbar->safe_val);
+	idr_remove(&xbar->map_idr, map->xbar_out);
+	kfree(map);
+}
+
+static void *ti_dra7_xbar_route_allocate(struct of_phandle_args *dma_spec,
+					 struct of_dma *ofdma)
+{
+	struct platform_device *pdev = of_find_device_by_node(ofdma->of_node);
+	struct ti_dra7_xbar_data *xbar = platform_get_drvdata(pdev);
+	struct ti_dra7_xbar_map *map;
+
+	if (dma_spec->args[0] >= xbar->xbar_requests) {
+		dev_err(&pdev->dev, "Invalid XBAR request number: %d\n",
+			dma_spec->args[0]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* The of_node_put() will be done in the core for the node */
+	dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
+	if (!dma_spec->np) {
+		dev_err(&pdev->dev, "Can't get DMA master\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map) {
+		of_node_put(dma_spec->np);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	map->xbar_out = idr_alloc(&xbar->map_idr, NULL, 0, xbar->dma_requests,
+				  GFP_KERNEL);
+	map->xbar_in = (u16)dma_spec->args[0];
+
+	dma_spec->args[0] = map->xbar_out + xbar->dma_offset;
+
+	dev_dbg(&pdev->dev, "Mapping XBAR%u to DMA%d\n",
+		map->xbar_in, map->xbar_out);
+
+	ti_dra7_xbar_write(xbar->iomem, map->xbar_out, map->xbar_in);
+
+	return map;
+}
+
+static const struct of_device_id ti_dra7_master_match[] = {
+	{
+		.compatible = "ti,omap4430-sdma",
+		.data = (void *)TI_XBAR_SDMA_OFFSET,
+	},
+	{
+		.compatible = "ti,edma3",
+		.data = (void *)TI_XBAR_EDMA_OFFSET,
+	},
+	{},
+};
+
+static int ti_dra7_xbar_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	const struct of_device_id *match;
+	struct device_node *dma_node;
+	struct ti_dra7_xbar_data *xbar;
+	struct resource *res;
+	u32 safe_val;
+	void __iomem *iomem;
+	int i, ret;
+
+	if (!node)
+		return -ENODEV;
+
+	xbar = devm_kzalloc(&pdev->dev, sizeof(*xbar), GFP_KERNEL);
+	if (!xbar)
+		return -ENOMEM;
+
+	idr_init(&xbar->map_idr);
+
+	dma_node = of_parse_phandle(node, "dma-masters", 0);
+	if (!dma_node) {
+		dev_err(&pdev->dev, "Can't get DMA master node\n");
+		return -ENODEV;
+	}
+
+	match = of_match_node(ti_dra7_master_match, dma_node);
+	if (!match) {
+		dev_err(&pdev->dev, "DMA master is not supported\n");
+		return -EINVAL;
+	}
+
+	if (of_property_read_u32(dma_node, "dma-requests",
+				 &xbar->dma_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR output information, using %u.\n",
+			 TI_DRA7_XBAR_OUTPUTS);
+		xbar->dma_requests = TI_DRA7_XBAR_OUTPUTS;
+	}
+	of_node_put(dma_node);
+
+	if (of_property_read_u32(node, "dma-requests", &xbar->xbar_requests)) {
+		dev_info(&pdev->dev,
+			 "Missing XBAR input information, using %u.\n",
+			 TI_DRA7_XBAR_INPUTS);
+		xbar->xbar_requests = TI_DRA7_XBAR_INPUTS;
+	}
+
+	if (!of_property_read_u32(node, "ti,dma-safe-map", &safe_val))
+		xbar->safe_val = (u16)safe_val;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	iomem = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(iomem))
+		return PTR_ERR(iomem);
+
+	xbar->iomem = iomem;
+
+	xbar->dmarouter.dev = &pdev->dev;
+	xbar->dmarouter.route_free = ti_dra7_xbar_free;
+	xbar->dma_offset = (u32)match->data;
+
+	platform_set_drvdata(pdev, xbar);
+
+	/* Reset the crossbar */
+	for (i = 0; i < xbar->dma_requests; i++)
+		ti_dra7_xbar_write(xbar->iomem, i, xbar->safe_val);
+
+	ret = of_dma_router_register(node, ti_dra7_xbar_route_allocate,
+				     &xbar->dmarouter);
+	if (ret) {
+		/* Restore the defaults for the crossbar */
+		for (i = 0; i < xbar->dma_requests; i++)
+			ti_dra7_xbar_write(xbar->iomem, i, i);
+	}
+
+	return ret;
+}
+
+static int ti_dma_xbar_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	int ret;
+
+	match = of_match_node(ti_dma_xbar_match, pdev->dev.of_node);
+	if (unlikely(!match))
+		return -EINVAL;
+
+	switch ((u32)match->data) {
+	case TI_XBAR_DRA7:
+		ret = ti_dra7_xbar_probe(pdev);
+		break;
+	case TI_XBAR_AM335X:
+		ret = ti_am335x_xbar_probe(pdev);
+		break;
+	default:
+		dev_err(&pdev->dev, "Unsupported crossbar\n");
+		ret = -ENODEV;
+		break;
+	}
+
+	return ret;
+}
+
+static struct platform_driver ti_dma_xbar_driver = {
+	.driver = {
+		.name = "ti-dma-crossbar",
+		.of_match_table = of_match_ptr(ti_dma_xbar_match),
+	},
+	.probe	= ti_dma_xbar_probe,
+};
+
+int omap_dmaxbar_init(void)
+{
+	return platform_driver_register(&ti_dma_xbar_driver);
+}
+arch_initcall(omap_dmaxbar_init);
diff --git a/kernel/drivers/dma/timb_dma.c b/kernel/drivers/dma/timb_dma.c
index c4c3d93fd..559cd4073 100644
--- a/kernel/drivers/dma/timb_dma.c
+++ b/kernel/drivers/dma/timb_dma.c
@@ -10,10 +10,6 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 /* Supports:
diff --git a/kernel/drivers/dma/virt-dma.h b/kernel/drivers/dma/virt-dma.h
index 181b95267..2fa47745a 100644
--- a/kernel/drivers/dma/virt-dma.h
+++ b/kernel/drivers/dma/virt-dma.h
@@ -47,9 +47,9 @@ struct virt_dma_desc *vchan_find_desc(struct virt_dma_chan *, dma_cookie_t);
 
 /**
  * vchan_tx_prep - prepare a descriptor
- * vc: virtual channel allocating this descriptor
- * vd: virtual descriptor to prepare
- * tx_flags: flags argument passed in to prepare function
+ * @vc: virtual channel allocating this descriptor
+ * @vd: virtual descriptor to prepare
+ * @tx_flags: flags argument passed in to prepare function
  */
 static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan *vc,
 	struct virt_dma_desc *vd, unsigned long tx_flags)
@@ -65,7 +65,7 @@ static inline struct dma_async_tx_descriptor *vchan_tx_prep(struct virt_dma_chan
 
 /**
  * vchan_issue_pending - move submitted descriptors to issued list
- * vc: virtual channel to update
+ * @vc: virtual channel to update
  *
  * vc.lock must be held by caller
  */
@@ -77,7 +77,7 @@ static inline bool vchan_issue_pending(struct virt_dma_chan *vc)
 
 /**
  * vchan_cookie_complete - report completion of a descriptor
- * vd: virtual descriptor to update
+ * @vd: virtual descriptor to update
  *
  * vc.lock must be held by caller
  */
@@ -97,7 +97,7 @@ static inline void vchan_cookie_complete(struct virt_dma_desc *vd)
 
 /**
  * vchan_cyclic_callback - report the completion of a period
- * vd: virtual descriptor
+ * @vd: virtual descriptor
  */
 static inline void vchan_cyclic_callback(struct virt_dma_desc *vd)
 {
@@ -109,7 +109,7 @@ static inline void vchan_cyclic_callback(struct virt_dma_desc *vd)
 
 /**
  * vchan_next_desc - peek at the next descriptor to be processed
- * vc: virtual channel to obtain descriptor from
+ * @vc: virtual channel to obtain descriptor from
  *
  * vc.lock must be held by caller
  */
@@ -123,8 +123,8 @@ static inline struct virt_dma_desc *vchan_next_desc(struct virt_dma_chan *vc)
 
 /**
  * vchan_get_all_descriptors - obtain all submitted and issued descriptors
- * vc: virtual channel to get descriptors from
- * head: list of descriptors found
+ * @vc: virtual channel to get descriptors from
+ * @head: list of descriptors found
  *
  * vc.lock must be held by caller
  *
diff --git a/kernel/drivers/dma/xgene-dma.c b/kernel/drivers/dma/xgene-dma.c
index f52e37502..9cb93c5b6 100755..100644
--- a/kernel/drivers/dma/xgene-dma.c
+++ b/kernel/drivers/dma/xgene-dma.c
@@ -21,6 +21,7 @@
  * NOTE: PM support is currently not available.
  */
 
+#include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
@@ -28,6 +29,7 @@
 #include <linux/dmapool.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 
@@ -58,7 +60,6 @@
 #define XGENE_DMA_RING_MEM_RAM_SHUTDOWN		0xD070
 #define XGENE_DMA_RING_BLK_MEM_RDY		0xD074
 #define XGENE_DMA_RING_BLK_MEM_RDY_VAL		0xFFFFFFFF
-#define XGENE_DMA_RING_DESC_CNT(v)		(((v) & 0x0001FFFE) >> 1)
 #define XGENE_DMA_RING_ID_GET(owner, num)	(((owner) << 6) | (num))
 #define XGENE_DMA_RING_DST_ID(v)		((1 << 10) | (v))
 #define XGENE_DMA_RING_CMD_OFFSET		0x2C
@@ -111,6 +112,7 @@
 #define XGENE_DMA_MEM_RAM_SHUTDOWN		0xD070
 #define XGENE_DMA_BLK_MEM_RDY			0xD074
 #define XGENE_DMA_BLK_MEM_RDY_VAL		0xFFFFFFFF
+#define XGENE_DMA_RING_CMD_SM_OFFSET		0x8000
 
 /* X-Gene SoC EFUSE csr register and bit defination */
 #define XGENE_SOC_JTAG1_SHADOW			0x18
@@ -124,32 +126,8 @@
 #define XGENE_DMA_DESC_ELERR_POS		46
 #define XGENE_DMA_DESC_RTYPE_POS		56
 #define XGENE_DMA_DESC_LERR_POS			60
-#define XGENE_DMA_DESC_FLYBY_POS		4
 #define XGENE_DMA_DESC_BUFLEN_POS		48
 #define XGENE_DMA_DESC_HOENQ_NUM_POS		48
-
-#define XGENE_DMA_DESC_NV_SET(m)		\
-	(((u64 *)(m))[0] |= XGENE_DMA_DESC_NV_BIT)
-#define XGENE_DMA_DESC_IN_SET(m)		\
-	(((u64 *)(m))[0] |= XGENE_DMA_DESC_IN_BIT)
-#define XGENE_DMA_DESC_RTYPE_SET(m, v)		\
-	(((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_RTYPE_POS))
-#define XGENE_DMA_DESC_BUFADDR_SET(m, v)	\
-	(((u64 *)(m))[0] |= (v))
-#define XGENE_DMA_DESC_BUFLEN_SET(m, v)		\
-	(((u64 *)(m))[0] |= ((u64)(v) << XGENE_DMA_DESC_BUFLEN_POS))
-#define XGENE_DMA_DESC_C_SET(m)			\
-	(((u64 *)(m))[1] |= XGENE_DMA_DESC_C_BIT)
-#define XGENE_DMA_DESC_FLYBY_SET(m, v)		\
-	(((u64 *)(m))[2] |= ((v) << XGENE_DMA_DESC_FLYBY_POS))
-#define XGENE_DMA_DESC_MULTI_SET(m, v, i)	\
-	(((u64 *)(m))[2] |= ((u64)(v) << (((i) + 1) * 8)))
-#define XGENE_DMA_DESC_DR_SET(m)		\
-	(((u64 *)(m))[2] |= XGENE_DMA_DESC_DR_BIT)
-#define XGENE_DMA_DESC_DST_ADDR_SET(m, v)	\
-	(((u64 *)(m))[3] |= (v))
-#define XGENE_DMA_DESC_H0ENQ_NUM_SET(m, v)	\
-	(((u64 *)(m))[3] |= ((u64)(v) << XGENE_DMA_DESC_HOENQ_NUM_POS))
 #define XGENE_DMA_DESC_ELERR_RD(m)		\
 	(((m) >> XGENE_DMA_DESC_ELERR_POS) & 0x3)
 #define XGENE_DMA_DESC_LERR_RD(m)		\
@@ -158,14 +136,7 @@
 	(((elerr) << 4) | (lerr))
 
 /* X-Gene DMA descriptor empty s/w signature */
-#define XGENE_DMA_DESC_EMPTY_INDEX		0
 #define XGENE_DMA_DESC_EMPTY_SIGNATURE		~0ULL
-#define XGENE_DMA_DESC_SET_EMPTY(m)		\
-	(((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] =	\
-	 XGENE_DMA_DESC_EMPTY_SIGNATURE)
-#define XGENE_DMA_DESC_IS_EMPTY(m)		\
-	(((u64 *)(m))[XGENE_DMA_DESC_EMPTY_INDEX] ==	\
-	 XGENE_DMA_DESC_EMPTY_SIGNATURE)
 
 /* X-Gene DMA configurable parameters defines */
 #define XGENE_DMA_RING_NUM		512
@@ -181,10 +152,9 @@
 #define XGENE_DMA_PQ_CHANNEL		1
 #define XGENE_DMA_MAX_BYTE_CNT		0x4000	/* 16 KB */
 #define XGENE_DMA_MAX_64B_DESC_BYTE_CNT	0x14000	/* 80 KB */
-#define XGENE_DMA_XOR_ALIGNMENT		6	/* 64 Bytes */
 #define XGENE_DMA_MAX_XOR_SRC		5
 #define XGENE_DMA_16K_BUFFER_LEN_CODE	0x0
-#define XGENE_DMA_INVALID_LEN_CODE	0x7800
+#define XGENE_DMA_INVALID_LEN_CODE	0x7800000000000000ULL
 
 /* X-Gene DMA descriptor error codes */
 #define ERR_DESC_AXI			0x01
@@ -214,10 +184,10 @@
 #define ERR_DESC_SRC_INT		0xB
 
 /* X-Gene DMA flyby operation code */
-#define FLYBY_2SRC_XOR			0x8
-#define FLYBY_3SRC_XOR			0x9
-#define FLYBY_4SRC_XOR			0xA
-#define FLYBY_5SRC_XOR			0xB
+#define FLYBY_2SRC_XOR			0x80
+#define FLYBY_3SRC_XOR			0x90
+#define FLYBY_4SRC_XOR			0xA0
+#define FLYBY_5SRC_XOR			0xB0
 
 /* X-Gene DMA SW descriptor flags */
 #define XGENE_DMA_FLAG_64B_DESC		BIT(0)
@@ -238,10 +208,10 @@
 	dev_err(chan->dev, "%s: " fmt, chan->name, ##arg)
 
 struct xgene_dma_desc_hw {
-	u64 m0;
-	u64 m1;
-	u64 m2;
-	u64 m3;
+	__le64 m0;
+	__le64 m1;
+	__le64 m2;
+	__le64 m3;
 };
 
 enum xgene_dma_ring_cfgsize {
@@ -388,18 +358,11 @@ static bool is_pq_enabled(struct xgene_dma *pdma)
 	return !(val & XGENE_DMA_PQ_DISABLE_MASK);
 }
 
-static void xgene_dma_cpu_to_le64(u64 *desc, int count)
-{
-	int i;
-
-	for (i = 0; i < count; i++)
-		desc[i] = cpu_to_le64(desc[i]);
-}
-
-static u16 xgene_dma_encode_len(u32 len)
+static u64 xgene_dma_encode_len(size_t len)
 {
 	return (len < XGENE_DMA_MAX_BYTE_CNT) ?
-		len : XGENE_DMA_16K_BUFFER_LEN_CODE;
+		((u64)len << XGENE_DMA_DESC_BUFLEN_POS) :
+		XGENE_DMA_16K_BUFFER_LEN_CODE;
 }
 
 static u8 xgene_dma_encode_xor_flyby(u32 src_cnt)
@@ -416,42 +379,50 @@ static u8 xgene_dma_encode_xor_flyby(u32 src_cnt)
 	return flyby_type[src_cnt];
 }
 
-static u32 xgene_dma_ring_desc_cnt(struct xgene_dma_ring *ring)
-{
-	u32 __iomem *cmd_base = ring->cmd_base;
-	u32 ring_state = ioread32(&cmd_base[1]);
-
-	return XGENE_DMA_RING_DESC_CNT(ring_state);
-}
-
-static void xgene_dma_set_src_buffer(void *ext8, size_t *len,
+static void xgene_dma_set_src_buffer(__le64 *ext8, size_t *len,
 				     dma_addr_t *paddr)
 {
 	size_t nbytes = (*len < XGENE_DMA_MAX_BYTE_CNT) ?
 			*len : XGENE_DMA_MAX_BYTE_CNT;
 
-	XGENE_DMA_DESC_BUFADDR_SET(ext8, *paddr);
-	XGENE_DMA_DESC_BUFLEN_SET(ext8, xgene_dma_encode_len(nbytes));
+	*ext8 |= cpu_to_le64(*paddr);
+	*ext8 |= cpu_to_le64(xgene_dma_encode_len(nbytes));
 	*len -= nbytes;
 	*paddr += nbytes;
 }
 
-static void xgene_dma_invalidate_buffer(void *ext8)
+static void xgene_dma_invalidate_buffer(__le64 *ext8)
 {
-	XGENE_DMA_DESC_BUFLEN_SET(ext8, XGENE_DMA_INVALID_LEN_CODE);
+	*ext8 |= cpu_to_le64(XGENE_DMA_INVALID_LEN_CODE);
 }
 
-static void *xgene_dma_lookup_ext8(u64 *desc, int idx)
+static __le64 *xgene_dma_lookup_ext8(struct xgene_dma_desc_hw *desc, int idx)
 {
-	return (idx % 2) ? (desc + idx - 1) : (desc + idx + 1);
+	switch (idx) {
+	case 0:
+		return &desc->m1;
+	case 1:
+		return &desc->m0;
+	case 2:
+		return &desc->m3;
+	case 3:
+		return &desc->m2;
+	default:
+		pr_err("Invalid dma descriptor index\n");
+	}
+
+	return NULL;
 }
 
-static void xgene_dma_init_desc(void *desc, u16 dst_ring_num)
+static void xgene_dma_init_desc(struct xgene_dma_desc_hw *desc,
+				u16 dst_ring_num)
 {
-	XGENE_DMA_DESC_C_SET(desc); /* Coherent IO */
-	XGENE_DMA_DESC_IN_SET(desc);
-	XGENE_DMA_DESC_H0ENQ_NUM_SET(desc, dst_ring_num);
-	XGENE_DMA_DESC_RTYPE_SET(desc, XGENE_DMA_RING_OWNER_DMA);
+	desc->m0 |= cpu_to_le64(XGENE_DMA_DESC_IN_BIT);
+	desc->m0 |= cpu_to_le64((u64)XGENE_DMA_RING_OWNER_DMA <<
+				XGENE_DMA_DESC_RTYPE_POS);
+	desc->m1 |= cpu_to_le64(XGENE_DMA_DESC_C_BIT);
+	desc->m3 |= cpu_to_le64((u64)dst_ring_num <<
+				XGENE_DMA_DESC_HOENQ_NUM_POS);
 }
 
 static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
@@ -459,7 +430,7 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
 				    dma_addr_t dst, dma_addr_t src,
 				    size_t len)
 {
-	void *desc1, *desc2;
+	struct xgene_dma_desc_hw *desc1, *desc2;
 	int i;
 
 	/* Get 1st descriptor */
@@ -467,23 +438,21 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
 	xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
 
 	/* Set destination address */
-	XGENE_DMA_DESC_DR_SET(desc1);
-	XGENE_DMA_DESC_DST_ADDR_SET(desc1, dst);
+	desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT);
+	desc1->m3 |= cpu_to_le64(dst);
 
 	/* Set 1st source address */
-	xgene_dma_set_src_buffer(desc1 + 8, &len, &src);
+	xgene_dma_set_src_buffer(&desc1->m1, &len, &src);
 
-	if (len <= 0) {
-		desc2 = NULL;
-		goto skip_additional_src;
-	}
+	if (!len)
+		return;
 
 	/*
 	 * We need to split this source buffer,
 	 * and need to use 2nd descriptor
 	 */
 	desc2 = &desc_sw->desc2;
-	XGENE_DMA_DESC_NV_SET(desc1);
+	desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
 
 	/* Set 2nd to 5th source address */
 	for (i = 0; i < 4 && len; i++)
@@ -496,12 +465,6 @@ static void xgene_dma_prep_cpy_desc(struct xgene_dma_chan *chan,
 
 	/* Updated flag that we have prepared 64B descriptor */
 	desc_sw->flags |= XGENE_DMA_FLAG_64B_DESC;
-
-skip_additional_src:
-	/* Hardware stores descriptor in little endian format */
-	xgene_dma_cpu_to_le64(desc1, 4);
-	if (desc2)
-		xgene_dma_cpu_to_le64(desc2, 4);
 }
 
 static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
@@ -510,7 +473,7 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
 				    u32 src_cnt, size_t *nbytes,
 				    const u8 *scf)
 {
-	void *desc1, *desc2;
+	struct xgene_dma_desc_hw *desc1, *desc2;
 	size_t len = *nbytes;
 	int i;
 
@@ -521,28 +484,24 @@ static void xgene_dma_prep_xor_desc(struct xgene_dma_chan *chan,
 	xgene_dma_init_desc(desc1, chan->tx_ring.dst_ring_num);
 
 	/* Set destination address */
-	XGENE_DMA_DESC_DR_SET(desc1);
-	XGENE_DMA_DESC_DST_ADDR_SET(desc1, *dst);
+	desc1->m2 |= cpu_to_le64(XGENE_DMA_DESC_DR_BIT);
+	desc1->m3 |= cpu_to_le64(*dst);
 
 	/* We have multiple source addresses, so need to set NV bit*/
-	XGENE_DMA_DESC_NV_SET(desc1);
+	desc1->m0 |= cpu_to_le64(XGENE_DMA_DESC_NV_BIT);
 
 	/* Set flyby opcode */
-	XGENE_DMA_DESC_FLYBY_SET(desc1, xgene_dma_encode_xor_flyby(src_cnt));
+	desc1->m2 |= cpu_to_le64(xgene_dma_encode_xor_flyby(src_cnt));
 
 	/* Set 1st to 5th source addresses */
 	for (i = 0; i < src_cnt; i++) {
 		len = *nbytes;
-		xgene_dma_set_src_buffer((i == 0) ? (desc1 + 8) :
+		xgene_dma_set_src_buffer((i == 0) ? &desc1->m1 :
 					 xgene_dma_lookup_ext8(desc2, i - 1),
 					 &len, &src[i]);
-		XGENE_DMA_DESC_MULTI_SET(desc1, scf[i], i);
+		desc1->m2 |= cpu_to_le64((scf[i] << ((i + 1) * 8)));
 	}
 
-	/* Hardware stores descriptor in little endian format */
-	xgene_dma_cpu_to_le64(desc1, 4);
-	xgene_dma_cpu_to_le64(desc2, 4);
-
 	/* Update meta data */
 	*nbytes = len;
 	*dst += XGENE_DMA_MAX_BYTE_CNT;
@@ -589,14 +548,12 @@ static struct xgene_dma_desc_sw *xgene_dma_alloc_descriptor(
 	struct xgene_dma_desc_sw *desc;
 	dma_addr_t phys;
 
-	desc = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT, &phys);
+	desc = dma_pool_zalloc(chan->desc_pool, GFP_NOWAIT, &phys);
 	if (!desc) {
 		chan_err(chan, "Failed to allocate LDs\n");
 		return NULL;
 	}
 
-	memset(desc, 0, sizeof(*desc));
-
 	INIT_LIST_HEAD(&desc->tx_list);
 	desc->tx.phys = phys;
 	desc->tx.tx_submit = xgene_dma_tx_submit;
@@ -692,15 +649,12 @@ static void xgene_dma_clean_running_descriptor(struct xgene_dma_chan *chan,
 	dma_pool_free(chan->desc_pool, desc, desc->tx.phys);
 }
 
-static int xgene_chan_xfer_request(struct xgene_dma_ring *ring,
-				   struct xgene_dma_desc_sw *desc_sw)
+static void xgene_chan_xfer_request(struct xgene_dma_chan *chan,
+				    struct xgene_dma_desc_sw *desc_sw)
 {
+	struct xgene_dma_ring *ring = &chan->tx_ring;
 	struct xgene_dma_desc_hw *desc_hw;
 
-	/* Check if can push more descriptor to hw for execution */
-	if (xgene_dma_ring_desc_cnt(ring) > (ring->slots - 2))
-		return -EBUSY;
-
 	/* Get hw descriptor from DMA tx ring */
 	desc_hw = &ring->desc_hw[ring->head];
 
@@ -727,23 +681,24 @@ static int xgene_chan_xfer_request(struct xgene_dma_ring *ring,
 		memcpy(desc_hw, &desc_sw->desc2, sizeof(*desc_hw));
 	}
 
+	/* Increment the pending transaction count */
+	chan->pending += ((desc_sw->flags &
+			  XGENE_DMA_FLAG_64B_DESC) ? 2 : 1);
+
 	/* Notify the hw that we have descriptor ready for execution */
 	iowrite32((desc_sw->flags & XGENE_DMA_FLAG_64B_DESC) ?
 		  2 : 1, ring->cmd);
-
-	return 0;
 }
 
 /**
  * xgene_chan_xfer_ld_pending - push any pending transactions to hw
  * @chan : X-Gene DMA channel
  *
- * LOCKING: must hold chan->desc_lock
+ * LOCKING: must hold chan->lock
  */
 static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan)
 {
 	struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
-	int ret;
 
 	/*
 	 * If the list of pending descriptors is empty, then we
@@ -768,18 +723,13 @@ static void xgene_chan_xfer_ld_pending(struct xgene_dma_chan *chan)
 		if (chan->pending >= chan->max_outstanding)
 			return;
 
-		ret = xgene_chan_xfer_request(&chan->tx_ring, desc_sw);
-		if (ret)
-			return;
+		xgene_chan_xfer_request(chan, desc_sw);
 
 		/*
 		 * Delete this element from ld pending queue and append it to
 		 * ld running queue
 		 */
 		list_move_tail(&desc_sw->node, &chan->ld_running);
-
-		/* Increment the pending transaction count */
-		chan->pending++;
 	}
 }
 
@@ -797,18 +747,24 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
 	struct xgene_dma_ring *ring = &chan->rx_ring;
 	struct xgene_dma_desc_sw *desc_sw, *_desc_sw;
 	struct xgene_dma_desc_hw *desc_hw;
+	struct list_head ld_completed;
 	u8 status;
 
+	INIT_LIST_HEAD(&ld_completed);
+
+	spin_lock_bh(&chan->lock);
+
 	/* Clean already completed and acked descriptors */
 	xgene_dma_clean_completed_descriptor(chan);
 
-	/* Run the callback for each descriptor, in order */
+	/* Move all completed descriptors to ld completed queue, in order */
 	list_for_each_entry_safe(desc_sw, _desc_sw, &chan->ld_running, node) {
 		/* Get subsequent hw descriptor from DMA rx ring */
 		desc_hw = &ring->desc_hw[ring->head];
 
 		/* Check if this descriptor has been completed */
-		if (unlikely(XGENE_DMA_DESC_IS_EMPTY(desc_hw)))
+		if (unlikely(le64_to_cpu(desc_hw->m0) ==
+			     XGENE_DMA_DESC_EMPTY_SIGNATURE))
 			break;
 
 		if (++ring->head == ring->slots)
@@ -842,17 +798,20 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
 		iowrite32(-1, ring->cmd);
 
 		/* Mark this hw descriptor as processed */
-		XGENE_DMA_DESC_SET_EMPTY(desc_hw);
-
-		xgene_dma_run_tx_complete_actions(chan, desc_sw);
-
-		xgene_dma_clean_running_descriptor(chan, desc_sw);
+		desc_hw->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
 
 		/*
 		 * Decrement the pending transaction count
 		 * as we have processed one
 		 */
-		chan->pending--;
+		chan->pending -= ((desc_sw->flags &
+				  XGENE_DMA_FLAG_64B_DESC) ? 2 : 1);
+
+		/*
+		 * Delete this node from ld running queue and append it to
+		 * ld completed queue for further processing
+		 */
+		list_move_tail(&desc_sw->node, &ld_completed);
 	}
 
 	/*
@@ -861,6 +820,14 @@ static void xgene_dma_cleanup_descriptors(struct xgene_dma_chan *chan)
 	 * ahead and free the descriptors below.
 	 */
 	xgene_chan_xfer_ld_pending(chan);
+
+	spin_unlock_bh(&chan->lock);
+
+	/* Run the callback for each descriptor, in order */
+	list_for_each_entry_safe(desc_sw, _desc_sw, &ld_completed, node) {
+		xgene_dma_run_tx_complete_actions(chan, desc_sw);
+		xgene_dma_clean_running_descriptor(chan, desc_sw);
+	}
 }
 
 static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan)
@@ -889,7 +856,7 @@ static int xgene_dma_alloc_chan_resources(struct dma_chan *dchan)
  * @chan: X-Gene DMA channel
  * @list: the list to free
  *
- * LOCKING: must hold chan->desc_lock
+ * LOCKING: must hold chan->lock
  */
 static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
 				     struct list_head *list)
@@ -900,15 +867,6 @@ static void xgene_dma_free_desc_list(struct xgene_dma_chan *chan,
 		xgene_dma_clean_descriptor(chan, desc);
 }
 
-static void xgene_dma_free_tx_desc_list(struct xgene_dma_chan *chan,
-					struct list_head *list)
-{
-	struct xgene_dma_desc_sw *desc, *_desc;
-
-	list_for_each_entry_safe(desc, _desc, list, node)
-		xgene_dma_clean_descriptor(chan, desc);
-}
-
 static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
 {
 	struct xgene_dma_chan *chan = to_dma_chan(dchan);
@@ -918,11 +876,11 @@ static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
 	if (!chan->desc_pool)
 		return;
 
-	spin_lock_bh(&chan->lock);
-
 	/* Process all running descriptor */
 	xgene_dma_cleanup_descriptors(chan);
 
+	spin_lock_bh(&chan->lock);
+
 	/* Clean all link descriptor queues */
 	xgene_dma_free_desc_list(chan, &chan->ld_pending);
 	xgene_dma_free_desc_list(chan, &chan->ld_running);
@@ -935,60 +893,6 @@ static void xgene_dma_free_chan_resources(struct dma_chan *dchan)
 	chan->desc_pool = NULL;
 }
 
-static struct dma_async_tx_descriptor *xgene_dma_prep_memcpy(
-	struct dma_chan *dchan, dma_addr_t dst, dma_addr_t src,
-	size_t len, unsigned long flags)
-{
-	struct xgene_dma_desc_sw *first = NULL, *new;
-	struct xgene_dma_chan *chan;
-	size_t copy;
-
-	if (unlikely(!dchan || !len))
-		return NULL;
-
-	chan = to_dma_chan(dchan);
-
-	do {
-		/* Allocate the link descriptor from DMA pool */
-		new = xgene_dma_alloc_descriptor(chan);
-		if (!new)
-			goto fail;
-
-		/* Create the largest transaction possible */
-		copy = min_t(size_t, len, XGENE_DMA_MAX_64B_DESC_BYTE_CNT);
-
-		/* Prepare DMA descriptor */
-		xgene_dma_prep_cpy_desc(chan, new, dst, src, copy);
-
-		if (!first)
-			first = new;
-
-		new->tx.cookie = 0;
-		async_tx_ack(&new->tx);
-
-		/* Update metadata */
-		len -= copy;
-		dst += copy;
-		src += copy;
-
-		/* Insert the link descriptor to the LD ring */
-		list_add_tail(&new->node, &first->tx_list);
-	} while (len);
-
-	new->tx.flags = flags; /* client is in control of this ack */
-	new->tx.cookie = -EBUSY;
-	list_splice(&first->tx_list, &new->tx_list);
-
-	return &new->tx;
-
-fail:
-	if (!first)
-		return NULL;
-
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
-	return NULL;
-}
-
 static struct dma_async_tx_descriptor *xgene_dma_prep_sg(
 	struct dma_chan *dchan, struct scatterlist *dst_sg,
 	u32 dst_nents, struct scatterlist *src_sg,
@@ -1093,7 +997,7 @@ fail:
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1141,7 +1045,7 @@ fail:
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1218,7 +1122,7 @@ fail:
 	if (!first)
 		return NULL;
 
-	xgene_dma_free_tx_desc_list(chan, &first->tx_list);
+	xgene_dma_free_desc_list(chan, &first->tx_list);
 	return NULL;
 }
 
@@ -1242,15 +1146,11 @@ static void xgene_dma_tasklet_cb(unsigned long data)
 {
 	struct xgene_dma_chan *chan = (struct xgene_dma_chan *)data;
 
-	spin_lock_bh(&chan->lock);
-
 	/* Run all cleanup for descriptors which have been completed */
 	xgene_dma_cleanup_descriptors(chan);
 
 	/* Re-enable DMA channel IRQ */
 	enable_irq(chan->rx_irq);
-
-	spin_unlock_bh(&chan->lock);
 }
 
 static irqreturn_t xgene_dma_chan_ring_isr(int irq, void *id)
@@ -1316,7 +1216,6 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
 {
 	void *ring_cfg = ring->state;
 	u64 addr = ring->desc_paddr;
-	void *desc;
 	u32 i, val;
 
 	ring->slots = ring->size / XGENE_DMA_RING_WQ_DESC_SIZE;
@@ -1358,8 +1257,10 @@ static void xgene_dma_setup_ring(struct xgene_dma_ring *ring)
 
 	/* Set empty signature to DMA Rx ring descriptors */
 	for (i = 0; i < ring->slots; i++) {
+		struct xgene_dma_desc_hw *desc;
+
 		desc = &ring->desc_hw[i];
-		XGENE_DMA_DESC_SET_EMPTY(desc);
+		desc->m0 = cpu_to_le64(XGENE_DMA_DESC_EMPTY_SIGNATURE);
 	}
 
 	/* Enable DMA Rx ring interrupt */
@@ -1450,15 +1351,18 @@ static int xgene_dma_create_ring_one(struct xgene_dma_chan *chan,
 				     struct xgene_dma_ring *ring,
 				     enum xgene_dma_ring_cfgsize cfgsize)
 {
+	int ret;
+
 	/* Setup DMA ring descriptor variables */
 	ring->pdma = chan->pdma;
 	ring->cfgsize = cfgsize;
 	ring->num = chan->pdma->ring_num++;
 	ring->id = XGENE_DMA_RING_ID_GET(ring->owner, ring->buf_num);
 
-	ring->size = xgene_dma_get_ring_size(chan, cfgsize);
-	if (ring->size <= 0)
-		return ring->size;
+	ret = xgene_dma_get_ring_size(chan, cfgsize);
+	if (ret <= 0)
+		return ret;
+	ring->size = ret;
 
 	/* Allocate memory for DMA ring descriptor */
 	ring->desc_vaddr = dma_zalloc_coherent(chan->dev, ring->size,
@@ -1511,7 +1415,7 @@ static int xgene_dma_create_chan_rings(struct xgene_dma_chan *chan)
 		 tx_ring->id, tx_ring->num, tx_ring->desc_vaddr);
 
 	/* Set the max outstanding request possible to this channel */
-	chan->max_outstanding = rx_ring->slots;
+	chan->max_outstanding = tx_ring->slots;
 
 	return ret;
 }
@@ -1707,6 +1611,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma)
 	/* Register DMA channel rx irq */
 	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
 		chan = &pdma->chan[i];
+		irq_set_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
 		ret = devm_request_irq(chan->dev, chan->rx_irq,
 				       xgene_dma_chan_ring_isr,
 				       0, chan->name, chan);
@@ -1717,6 +1622,7 @@ static int xgene_dma_request_irqs(struct xgene_dma *pdma)
 
 			for (j = 0; j < i; j++) {
 				chan = &pdma->chan[i];
+				irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
 				devm_free_irq(chan->dev, chan->rx_irq, chan);
 			}
 
@@ -1737,6 +1643,7 @@ static void xgene_dma_free_irqs(struct xgene_dma *pdma)
 
 	for (i = 0; i < XGENE_DMA_MAX_CHANNEL; i++) {
 		chan = &pdma->chan[i];
+		irq_clear_status_flags(chan->rx_irq, IRQ_DISABLE_UNLAZY);
 		devm_free_irq(chan->dev, chan->rx_irq, chan);
 	}
 }
@@ -1748,7 +1655,6 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan,
 	dma_cap_zero(dma_dev->cap_mask);
 
 	/* Set DMA device capability */
-	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
 	dma_cap_set(DMA_SG, dma_dev->cap_mask);
 
 	/* Basically here, the X-Gene SoC DMA engine channel 0 supports XOR
@@ -1775,19 +1681,18 @@ static void xgene_dma_set_caps(struct xgene_dma_chan *chan,
 	dma_dev->device_free_chan_resources = xgene_dma_free_chan_resources;
 	dma_dev->device_issue_pending = xgene_dma_issue_pending;
 	dma_dev->device_tx_status = xgene_dma_tx_status;
-	dma_dev->device_prep_dma_memcpy = xgene_dma_prep_memcpy;
 	dma_dev->device_prep_dma_sg = xgene_dma_prep_sg;
 
 	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
 		dma_dev->device_prep_dma_xor = xgene_dma_prep_xor;
 		dma_dev->max_xor = XGENE_DMA_MAX_XOR_SRC;
-		dma_dev->xor_align = XGENE_DMA_XOR_ALIGNMENT;
+		dma_dev->xor_align = DMAENGINE_ALIGN_64_BYTES;
 	}
 
 	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
 		dma_dev->device_prep_dma_pq = xgene_dma_prep_pq;
 		dma_dev->max_pq = XGENE_DMA_MAX_XOR_SRC;
-		dma_dev->pq_align = XGENE_DMA_XOR_ALIGNMENT;
+		dma_dev->pq_align = DMAENGINE_ALIGN_64_BYTES;
 	}
 }
 
@@ -1828,8 +1733,7 @@ static int xgene_dma_async_register(struct xgene_dma *pdma, int id)
 
 	/* DMA capability info */
 	dev_info(pdma->dev,
-		 "%s: CAPABILITY ( %s%s%s%s)\n", dma_chan_name(&chan->dma_chan),
-		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "MEMCPY " : "",
+		 "%s: CAPABILITY ( %s%s%s)\n", dma_chan_name(&chan->dma_chan),
 		 dma_has_cap(DMA_SG, dma_dev->cap_mask) ? "SGCPY " : "",
 		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "XOR " : "",
 		 dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "PQ " : "");
@@ -1928,6 +1832,8 @@ static int xgene_dma_get_resources(struct platform_device *pdev,
 		return -ENOMEM;
 	}
 
+	pdma->csr_ring_cmd += XGENE_DMA_RING_CMD_SM_OFFSET;
+
 	/* Get efuse csr region */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
 	if (!res) {
@@ -1982,16 +1888,18 @@ static int xgene_dma_probe(struct platform_device *pdev)
 		return ret;
 
 	pdma->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(pdma->clk)) {
+	if (IS_ERR(pdma->clk) && !ACPI_COMPANION(&pdev->dev)) {
 		dev_err(&pdev->dev, "Failed to get clk\n");
 		return PTR_ERR(pdma->clk);
 	}
 
 	/* Enable clk before accessing registers */
-	ret = clk_prepare_enable(pdma->clk);
-	if (ret) {
-		dev_err(&pdev->dev, "Failed to enable clk %d\n", ret);
-		return ret;
+	if (!IS_ERR(pdma->clk)) {
+		ret = clk_prepare_enable(pdma->clk);
+		if (ret) {
+			dev_err(&pdev->dev, "Failed to enable clk %d\n", ret);
+			return ret;
+		}
 	}
 
 	/* Remove DMA RAM out of shutdown */
@@ -2036,7 +1944,8 @@ err_request_irq:
 
 err_dma_mask:
 err_clk_enable:
-	clk_disable_unprepare(pdma->clk);
+	if (!IS_ERR(pdma->clk))
+		clk_disable_unprepare(pdma->clk);
 
 	return ret;
 }
@@ -2060,11 +1969,20 @@ static int xgene_dma_remove(struct platform_device *pdev)
 		xgene_dma_delete_chan_rings(chan);
 	}
 
-	clk_disable_unprepare(pdma->clk);
+	if (!IS_ERR(pdma->clk))
+		clk_disable_unprepare(pdma->clk);
 
 	return 0;
 }
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgene_dma_acpi_match_ptr[] = {
+	{"APMC0D43", 0},
+	{},
+};
+MODULE_DEVICE_TABLE(acpi, xgene_dma_acpi_match_ptr);
+#endif
+
 static const struct of_device_id xgene_dma_of_match_ptr[] = {
 	{.compatible = "apm,xgene-storm-dma",},
 	{},
@@ -2077,6 +1995,7 @@ static struct platform_driver xgene_dma_driver = {
 	.driver = {
 		.name = "X-Gene-DMA",
 		.of_match_table = xgene_dma_of_match_ptr,
+		.acpi_match_table = ACPI_PTR(xgene_dma_acpi_match_ptr),
 	},
 };
 
diff --git a/kernel/drivers/dma/xilinx/xilinx_vdma.c b/kernel/drivers/dma/xilinx/xilinx_vdma.c
index d8434d465..6f4b5017c 100644
--- a/kernel/drivers/dma/xilinx/xilinx_vdma.c
+++ b/kernel/drivers/dma/xilinx/xilinx_vdma.c
@@ -1349,6 +1349,7 @@ static const struct of_device_id xilinx_vdma_of_ids[] = {
 	{ .compatible = "xlnx,axi-vdma-1.00.a",},
 	{}
 };
+MODULE_DEVICE_TABLE(of, xilinx_vdma_of_ids);
 
 static struct platform_driver xilinx_vdma_driver = {
 	.driver = {
diff --git a/kernel/drivers/dma/zx296702_dma.c b/kernel/drivers/dma/zx296702_dma.c
new file mode 100644
index 000000000..245d759d5
--- /dev/null
+++ b/kernel/drivers/dma/zx296702_dma.c
@@ -0,0 +1,951 @@
+/*
+ * Copyright 2015 Linaro.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/sched.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/clk.h>
+#include <linux/of_dma.h>
+
+#include "virt-dma.h"
+
+#define DRIVER_NAME		"zx-dma"
+#define DMA_ALIGN		4
+#define DMA_MAX_SIZE		(0x10000 - PAGE_SIZE)
+#define LLI_BLOCK_SIZE		(4 * PAGE_SIZE)
+
+#define REG_ZX_SRC_ADDR			0x00
+#define REG_ZX_DST_ADDR			0x04
+#define REG_ZX_TX_X_COUNT		0x08
+#define REG_ZX_TX_ZY_COUNT		0x0c
+#define REG_ZX_SRC_ZY_STEP		0x10
+#define REG_ZX_DST_ZY_STEP		0x14
+#define REG_ZX_LLI_ADDR			0x1c
+#define REG_ZX_CTRL			0x20
+#define REG_ZX_TC_IRQ			0x800
+#define REG_ZX_SRC_ERR_IRQ		0x804
+#define REG_ZX_DST_ERR_IRQ		0x808
+#define REG_ZX_CFG_ERR_IRQ		0x80c
+#define REG_ZX_TC_IRQ_RAW		0x810
+#define REG_ZX_SRC_ERR_IRQ_RAW		0x814
+#define REG_ZX_DST_ERR_IRQ_RAW		0x818
+#define REG_ZX_CFG_ERR_IRQ_RAW		0x81c
+#define REG_ZX_STATUS			0x820
+#define REG_ZX_DMA_GRP_PRIO		0x824
+#define REG_ZX_DMA_ARB			0x828
+
+#define ZX_FORCE_CLOSE			BIT(31)
+#define ZX_DST_BURST_WIDTH(x)		(((x) & 0x7) << 13)
+#define ZX_MAX_BURST_LEN		16
+#define ZX_SRC_BURST_LEN(x)		(((x) & 0xf) << 9)
+#define ZX_SRC_BURST_WIDTH(x)		(((x) & 0x7) << 6)
+#define ZX_IRQ_ENABLE_ALL		(3 << 4)
+#define ZX_DST_FIFO_MODE		BIT(3)
+#define ZX_SRC_FIFO_MODE		BIT(2)
+#define ZX_SOFT_REQ			BIT(1)
+#define ZX_CH_ENABLE			BIT(0)
+
+#define ZX_DMA_BUSWIDTHS \
+	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+	BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
+	BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
+
+enum zx_dma_burst_width {
+	ZX_DMA_WIDTH_8BIT	= 0,
+	ZX_DMA_WIDTH_16BIT	= 1,
+	ZX_DMA_WIDTH_32BIT	= 2,
+	ZX_DMA_WIDTH_64BIT	= 3,
+};
+
+struct zx_desc_hw {
+	u32 saddr;
+	u32 daddr;
+	u32 src_x;
+	u32 src_zy;
+	u32 src_zy_step;
+	u32 dst_zy_step;
+	u32 reserved1;
+	u32 lli;
+	u32 ctr;
+	u32 reserved[7]; /* pack as hardware registers region size */
+} __aligned(32);
+
+struct zx_dma_desc_sw {
+	struct virt_dma_desc	vd;
+	dma_addr_t		desc_hw_lli;
+	size_t			desc_num;
+	size_t			size;
+	struct zx_desc_hw	*desc_hw;
+};
+
+struct zx_dma_phy;
+
+struct zx_dma_chan {
+	struct dma_slave_config slave_cfg;
+	int			id; /* Request phy chan id */
+	u32			ccfg;
+	u32			cyclic;
+	struct virt_dma_chan	vc;
+	struct zx_dma_phy	*phy;
+	struct list_head	node;
+	dma_addr_t		dev_addr;
+	enum dma_status		status;
+};
+
+struct zx_dma_phy {
+	u32			idx;
+	void __iomem		*base;
+	struct zx_dma_chan	*vchan;
+	struct zx_dma_desc_sw	*ds_run;
+	struct zx_dma_desc_sw	*ds_done;
+};
+
+struct zx_dma_dev {
+	struct dma_device	slave;
+	void __iomem		*base;
+	spinlock_t		lock; /* lock for ch and phy */
+	struct list_head	chan_pending;
+	struct zx_dma_phy	*phy;
+	struct zx_dma_chan	*chans;
+	struct clk		*clk;
+	struct dma_pool		*pool;
+	u32			dma_channels;
+	u32			dma_requests;
+	int 			irq;
+};
+
+#define to_zx_dma(dmadev) container_of(dmadev, struct zx_dma_dev, slave)
+
+static struct zx_dma_chan *to_zx_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct zx_dma_chan, vc.chan);
+}
+
+static void zx_dma_terminate_chan(struct zx_dma_phy *phy, struct zx_dma_dev *d)
+{
+	u32 val = 0;
+
+	val = readl_relaxed(phy->base + REG_ZX_CTRL);
+	val &= ~ZX_CH_ENABLE;
+	val |= ZX_FORCE_CLOSE;
+	writel_relaxed(val, phy->base + REG_ZX_CTRL);
+
+	val = 0x1 << phy->idx;
+	writel_relaxed(val, d->base + REG_ZX_TC_IRQ_RAW);
+	writel_relaxed(val, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
+	writel_relaxed(val, d->base + REG_ZX_DST_ERR_IRQ_RAW);
+	writel_relaxed(val, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
+}
+
+static void zx_dma_set_desc(struct zx_dma_phy *phy, struct zx_desc_hw *hw)
+{
+	writel_relaxed(hw->saddr, phy->base + REG_ZX_SRC_ADDR);
+	writel_relaxed(hw->daddr, phy->base + REG_ZX_DST_ADDR);
+	writel_relaxed(hw->src_x, phy->base + REG_ZX_TX_X_COUNT);
+	writel_relaxed(0, phy->base + REG_ZX_TX_ZY_COUNT);
+	writel_relaxed(0, phy->base + REG_ZX_SRC_ZY_STEP);
+	writel_relaxed(0, phy->base + REG_ZX_DST_ZY_STEP);
+	writel_relaxed(hw->lli, phy->base + REG_ZX_LLI_ADDR);
+	writel_relaxed(hw->ctr, phy->base + REG_ZX_CTRL);
+}
+
+static u32 zx_dma_get_curr_lli(struct zx_dma_phy *phy)
+{
+	return readl_relaxed(phy->base + REG_ZX_LLI_ADDR);
+}
+
+static u32 zx_dma_get_chan_stat(struct zx_dma_dev *d)
+{
+	return readl_relaxed(d->base + REG_ZX_STATUS);
+}
+
+static void zx_dma_init_state(struct zx_dma_dev *d)
+{
+	/* set same priority */
+	writel_relaxed(0x0, d->base + REG_ZX_DMA_ARB);
+	/* clear all irq */
+	writel_relaxed(0xffffffff, d->base + REG_ZX_TC_IRQ_RAW);
+	writel_relaxed(0xffffffff, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
+	writel_relaxed(0xffffffff, d->base + REG_ZX_DST_ERR_IRQ_RAW);
+	writel_relaxed(0xffffffff, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
+}
+
+static int zx_dma_start_txd(struct zx_dma_chan *c)
+{
+	struct zx_dma_dev *d = to_zx_dma(c->vc.chan.device);
+	struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
+
+	if (!c->phy)
+		return -EAGAIN;
+
+	if (BIT(c->phy->idx) & zx_dma_get_chan_stat(d))
+		return -EAGAIN;
+
+	if (vd) {
+		struct zx_dma_desc_sw *ds =
+			container_of(vd, struct zx_dma_desc_sw, vd);
+		/*
+		 * fetch and remove request from vc->desc_issued
+		 * so vc->desc_issued only contains desc pending
+		 */
+		list_del(&ds->vd.node);
+		c->phy->ds_run = ds;
+		c->phy->ds_done = NULL;
+		/* start dma */
+		zx_dma_set_desc(c->phy, ds->desc_hw);
+		return 0;
+	}
+	c->phy->ds_done = NULL;
+	c->phy->ds_run = NULL;
+	return -EAGAIN;
+}
+
+static void zx_dma_task(struct zx_dma_dev *d)
+{
+	struct zx_dma_phy *p;
+	struct zx_dma_chan *c, *cn;
+	unsigned pch, pch_alloc = 0;
+	unsigned long flags;
+
+	/* check new dma request of running channel in vc->desc_issued */
+	list_for_each_entry_safe(c, cn, &d->slave.channels,
+				 vc.chan.device_node) {
+		spin_lock_irqsave(&c->vc.lock, flags);
+		p = c->phy;
+		if (p && p->ds_done && zx_dma_start_txd(c)) {
+			/* No current txd associated with this channel */
+			dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx);
+			/* Mark this channel free */
+			c->phy = NULL;
+			p->vchan = NULL;
+		}
+		spin_unlock_irqrestore(&c->vc.lock, flags);
+	}
+
+	/* check new channel request in d->chan_pending */
+	spin_lock_irqsave(&d->lock, flags);
+	while (!list_empty(&d->chan_pending)) {
+		c = list_first_entry(&d->chan_pending,
+				     struct zx_dma_chan, node);
+		p = &d->phy[c->id];
+		if (!p->vchan) {
+			/* remove from d->chan_pending */
+			list_del_init(&c->node);
+			pch_alloc |= 1 << c->id;
+			/* Mark this channel allocated */
+			p->vchan = c;
+			c->phy = p;
+		} else {
+			dev_dbg(d->slave.dev, "pchan %u: busy!\n", c->id);
+		}
+	}
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	for (pch = 0; pch < d->dma_channels; pch++) {
+		if (pch_alloc & (1 << pch)) {
+			p = &d->phy[pch];
+			c = p->vchan;
+			if (c) {
+				spin_lock_irqsave(&c->vc.lock, flags);
+				zx_dma_start_txd(c);
+				spin_unlock_irqrestore(&c->vc.lock, flags);
+			}
+		}
+	}
+}
+
+static irqreturn_t zx_dma_int_handler(int irq, void *dev_id)
+{
+	struct zx_dma_dev *d = (struct zx_dma_dev *)dev_id;
+	struct zx_dma_phy *p;
+	struct zx_dma_chan *c;
+	u32 tc = readl_relaxed(d->base + REG_ZX_TC_IRQ);
+	u32 serr = readl_relaxed(d->base + REG_ZX_SRC_ERR_IRQ);
+	u32 derr = readl_relaxed(d->base + REG_ZX_DST_ERR_IRQ);
+	u32 cfg = readl_relaxed(d->base + REG_ZX_CFG_ERR_IRQ);
+	u32 i, irq_chan = 0, task = 0;
+
+	while (tc) {
+		i = __ffs(tc);
+		tc &= ~BIT(i);
+		p = &d->phy[i];
+		c = p->vchan;
+		if (c) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&c->vc.lock, flags);
+			if (c->cyclic) {
+				vchan_cyclic_callback(&p->ds_run->vd);
+			} else {
+				vchan_cookie_complete(&p->ds_run->vd);
+				p->ds_done = p->ds_run;
+				task = 1;
+			}
+			spin_unlock_irqrestore(&c->vc.lock, flags);
+			irq_chan |= BIT(i);
+		}
+	}
+
+	if (serr || derr || cfg)
+		dev_warn(d->slave.dev, "DMA ERR src 0x%x, dst 0x%x, cfg 0x%x\n",
+			 serr, derr, cfg);
+
+	writel_relaxed(irq_chan, d->base + REG_ZX_TC_IRQ_RAW);
+	writel_relaxed(serr, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
+	writel_relaxed(derr, d->base + REG_ZX_DST_ERR_IRQ_RAW);
+	writel_relaxed(cfg, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
+
+	if (task)
+		zx_dma_task(d);
+	return IRQ_HANDLED;
+}
+
+static void zx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_dev *d = to_zx_dma(chan->device);
+	unsigned long flags;
+
+	spin_lock_irqsave(&d->lock, flags);
+	list_del_init(&c->node);
+	spin_unlock_irqrestore(&d->lock, flags);
+
+	vchan_free_chan_resources(&c->vc);
+	c->ccfg = 0;
+}
+
+static enum dma_status zx_dma_tx_status(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					struct dma_tx_state *state)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_phy *p;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	ret = dma_cookie_status(&c->vc.chan, cookie, state);
+	if (ret == DMA_COMPLETE || !state)
+		return ret;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	p = c->phy;
+	ret = c->status;
+
+	/*
+	 * If the cookie is on our issue queue, then the residue is
+	 * its total size.
+	 */
+	vd = vchan_find_desc(&c->vc, cookie);
+	if (vd) {
+		bytes = container_of(vd, struct zx_dma_desc_sw, vd)->size;
+	} else if ((!p) || (!p->ds_run)) {
+		bytes = 0;
+	} else {
+		struct zx_dma_desc_sw *ds = p->ds_run;
+		u32 clli = 0, index = 0;
+
+		bytes = 0;
+		clli = zx_dma_get_curr_lli(p);
+		index = (clli - ds->desc_hw_lli) / sizeof(struct zx_desc_hw);
+		for (; index < ds->desc_num; index++) {
+			bytes += ds->desc_hw[index].src_x;
+			/* end of lli */
+			if (!ds->desc_hw[index].lli)
+				break;
+		}
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	dma_set_residue(state, bytes);
+	return ret;
+}
+
+static void zx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_dev *d = to_zx_dma(chan->device);
+	unsigned long flags;
+	int issue = 0;
+
+	spin_lock_irqsave(&c->vc.lock, flags);
+	/* add request to vc->desc_issued */
+	if (vchan_issue_pending(&c->vc)) {
+		spin_lock(&d->lock);
+		if (!c->phy && list_empty(&c->node)) {
+			/* if new channel, add chan_pending */
+			list_add_tail(&c->node, &d->chan_pending);
+			issue = 1;
+			dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
+		}
+		spin_unlock(&d->lock);
+	} else {
+		dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+
+	if (issue)
+		zx_dma_task(d);
+}
+
+static void zx_dma_fill_desc(struct zx_dma_desc_sw *ds, dma_addr_t dst,
+			     dma_addr_t src, size_t len, u32 num, u32 ccfg)
+{
+	if ((num + 1) < ds->desc_num)
+		ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) *
+			sizeof(struct zx_desc_hw);
+	ds->desc_hw[num].saddr = src;
+	ds->desc_hw[num].daddr = dst;
+	ds->desc_hw[num].src_x = len;
+	ds->desc_hw[num].ctr = ccfg;
+}
+
+static struct zx_dma_desc_sw *zx_alloc_desc_resource(int num,
+						     struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_desc_sw *ds;
+	struct zx_dma_dev *d = to_zx_dma(chan->device);
+	int lli_limit = LLI_BLOCK_SIZE / sizeof(struct zx_desc_hw);
+
+	if (num > lli_limit) {
+		dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n",
+			&c->vc, num, lli_limit);
+		return NULL;
+	}
+
+	ds = kzalloc(sizeof(*ds), GFP_ATOMIC);
+	if (!ds)
+		return NULL;
+
+	ds->desc_hw = dma_pool_alloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli);
+	if (!ds->desc_hw) {
+		dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc);
+		kfree(ds);
+		return NULL;
+	}
+	memset(ds->desc_hw, 0, sizeof(struct zx_desc_hw) * num);
+	ds->desc_num = num;
+	return ds;
+}
+
+static enum zx_dma_burst_width zx_dma_burst_width(enum dma_slave_buswidth width)
+{
+	switch (width) {
+	case DMA_SLAVE_BUSWIDTH_1_BYTE:
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+	case DMA_SLAVE_BUSWIDTH_8_BYTES:
+		return ffs(width) - 1;
+	default:
+		return ZX_DMA_WIDTH_32BIT;
+	}
+}
+
+static int zx_pre_config(struct zx_dma_chan *c, enum dma_transfer_direction dir)
+{
+	struct dma_slave_config *cfg = &c->slave_cfg;
+	enum zx_dma_burst_width src_width;
+	enum zx_dma_burst_width dst_width;
+	u32 maxburst = 0;
+
+	switch (dir) {
+	case DMA_MEM_TO_MEM:
+		c->ccfg = ZX_CH_ENABLE | ZX_SOFT_REQ
+			| ZX_SRC_BURST_LEN(ZX_MAX_BURST_LEN - 1)
+			| ZX_SRC_BURST_WIDTH(ZX_DMA_WIDTH_32BIT)
+			| ZX_DST_BURST_WIDTH(ZX_DMA_WIDTH_32BIT);
+		break;
+	case DMA_MEM_TO_DEV:
+		c->dev_addr = cfg->dst_addr;
+		/* dst len is calculated from src width, len and dst width.
+		 * We need make sure dst len not exceed MAX LEN.
+		 * Trailing single transaction that does not fill a full
+		 * burst also require identical src/dst data width.
+		 */
+		dst_width = zx_dma_burst_width(cfg->dst_addr_width);
+		maxburst = cfg->dst_maxburst;
+		maxburst = maxburst < ZX_MAX_BURST_LEN ?
+				maxburst : ZX_MAX_BURST_LEN;
+		c->ccfg = ZX_DST_FIFO_MODE | ZX_CH_ENABLE
+			| ZX_SRC_BURST_LEN(maxburst - 1)
+			| ZX_SRC_BURST_WIDTH(dst_width)
+			| ZX_DST_BURST_WIDTH(dst_width);
+		break;
+	case DMA_DEV_TO_MEM:
+		c->dev_addr = cfg->src_addr;
+		src_width = zx_dma_burst_width(cfg->src_addr_width);
+		maxburst = cfg->src_maxburst;
+		maxburst = maxburst < ZX_MAX_BURST_LEN ?
+				maxburst : ZX_MAX_BURST_LEN;
+		c->ccfg = ZX_SRC_FIFO_MODE | ZX_CH_ENABLE
+			| ZX_SRC_BURST_LEN(maxburst - 1)
+			| ZX_SRC_BURST_WIDTH(src_width)
+			| ZX_DST_BURST_WIDTH(src_width);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *zx_dma_prep_memcpy(
+	struct dma_chan *chan,	dma_addr_t dst, dma_addr_t src,
+	size_t len, unsigned long flags)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_desc_sw *ds;
+	size_t copy = 0;
+	int num = 0;
+
+	if (!len)
+		return NULL;
+
+	if (zx_pre_config(c, DMA_MEM_TO_MEM))
+		return NULL;
+
+	num = DIV_ROUND_UP(len, DMA_MAX_SIZE);
+
+	ds = zx_alloc_desc_resource(num, chan);
+	if (!ds)
+		return NULL;
+
+	ds->size = len;
+	num = 0;
+
+	do {
+		copy = min_t(size_t, len, DMA_MAX_SIZE);
+		zx_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg);
+
+		src += copy;
+		dst += copy;
+		len -= copy;
+	} while (len);
+
+	c->cyclic = 0;
+	ds->desc_hw[num - 1].lli = 0;	/* end of link */
+	ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *zx_dma_prep_slave_sg(
+	struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen,
+	enum dma_transfer_direction dir, unsigned long flags, void *context)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_desc_sw *ds;
+	size_t len, avail, total = 0;
+	struct scatterlist *sg;
+	dma_addr_t addr, src = 0, dst = 0;
+	int num = sglen, i;
+
+	if (!sgl)
+		return NULL;
+
+	if (zx_pre_config(c, dir))
+		return NULL;
+
+	for_each_sg(sgl, sg, sglen, i) {
+		avail = sg_dma_len(sg);
+		if (avail > DMA_MAX_SIZE)
+			num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
+	}
+
+	ds = zx_alloc_desc_resource(num, chan);
+	if (!ds)
+		return NULL;
+
+	c->cyclic = 0;
+	num = 0;
+	for_each_sg(sgl, sg, sglen, i) {
+		addr = sg_dma_address(sg);
+		avail = sg_dma_len(sg);
+		total += avail;
+
+		do {
+			len = min_t(size_t, avail, DMA_MAX_SIZE);
+
+			if (dir == DMA_MEM_TO_DEV) {
+				src = addr;
+				dst = c->dev_addr;
+			} else if (dir == DMA_DEV_TO_MEM) {
+				src = c->dev_addr;
+				dst = addr;
+			}
+
+			zx_dma_fill_desc(ds, dst, src, len, num++, c->ccfg);
+
+			addr += len;
+			avail -= len;
+		} while (avail);
+	}
+
+	ds->desc_hw[num - 1].lli = 0;	/* end of link */
+	ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL;
+	ds->size = total;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *zx_dma_prep_dma_cyclic(
+		struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
+		size_t period_len, enum dma_transfer_direction dir,
+		unsigned long flags)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_desc_sw *ds;
+	dma_addr_t src = 0, dst = 0;
+	int num_periods = buf_len / period_len;
+	int buf = 0, num = 0;
+
+	if (period_len > DMA_MAX_SIZE) {
+		dev_err(chan->device->dev, "maximum period size exceeded\n");
+		return NULL;
+	}
+
+	if (zx_pre_config(c, dir))
+		return NULL;
+
+	ds = zx_alloc_desc_resource(num_periods, chan);
+	if (!ds)
+		return NULL;
+	c->cyclic = 1;
+
+	while (buf < buf_len) {
+		if (dir == DMA_MEM_TO_DEV) {
+			src = dma_addr;
+			dst = c->dev_addr;
+		} else if (dir == DMA_DEV_TO_MEM) {
+			src = c->dev_addr;
+			dst = dma_addr;
+		}
+		zx_dma_fill_desc(ds, dst, src, period_len, num++,
+				 c->ccfg | ZX_IRQ_ENABLE_ALL);
+		dma_addr += period_len;
+		buf += period_len;
+	}
+
+	ds->desc_hw[num - 1].lli = ds->desc_hw_lli;
+	ds->size = buf_len;
+	return vchan_tx_prep(&c->vc, &ds->vd, flags);
+}
+
+static int zx_dma_config(struct dma_chan *chan,
+			 struct dma_slave_config *cfg)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+
+	if (!cfg)
+		return -EINVAL;
+
+	memcpy(&c->slave_cfg, cfg, sizeof(*cfg));
+
+	return 0;
+}
+
+static int zx_dma_terminate_all(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	struct zx_dma_dev *d = to_zx_dma(chan->device);
+	struct zx_dma_phy *p = c->phy;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
+
+	/* Prevent this channel being scheduled */
+	spin_lock(&d->lock);
+	list_del_init(&c->node);
+	spin_unlock(&d->lock);
+
+	/* Clear the tx descriptor lists */
+	spin_lock_irqsave(&c->vc.lock, flags);
+	vchan_get_all_descriptors(&c->vc, &head);
+	if (p) {
+		/* vchan is assigned to a pchan - stop the channel */
+		zx_dma_terminate_chan(p, d);
+		c->phy = NULL;
+		p->vchan = NULL;
+		p->ds_run = NULL;
+		p->ds_done = NULL;
+	}
+	spin_unlock_irqrestore(&c->vc.lock, flags);
+	vchan_dma_desc_free_list(&c->vc, &head);
+
+	return 0;
+}
+
+static int zx_dma_transfer_pause(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	u32 val = 0;
+
+	val = readl_relaxed(c->phy->base + REG_ZX_CTRL);
+	val &= ~ZX_CH_ENABLE;
+	writel_relaxed(val, c->phy->base + REG_ZX_CTRL);
+
+	return 0;
+}
+
+static int zx_dma_transfer_resume(struct dma_chan *chan)
+{
+	struct zx_dma_chan *c = to_zx_chan(chan);
+	u32 val = 0;
+
+	val = readl_relaxed(c->phy->base + REG_ZX_CTRL);
+	val |= ZX_CH_ENABLE;
+	writel_relaxed(val, c->phy->base + REG_ZX_CTRL);
+
+	return 0;
+}
+
+static void zx_dma_free_desc(struct virt_dma_desc *vd)
+{
+	struct zx_dma_desc_sw *ds =
+		container_of(vd, struct zx_dma_desc_sw, vd);
+	struct zx_dma_dev *d = to_zx_dma(vd->tx.chan->device);
+
+	dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli);
+	kfree(ds);
+}
+
+static const struct of_device_id zx6702_dma_dt_ids[] = {
+	{ .compatible = "zte,zx296702-dma", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, zx6702_dma_dt_ids);
+
+static struct dma_chan *zx_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
+					       struct of_dma *ofdma)
+{
+	struct zx_dma_dev *d = ofdma->of_dma_data;
+	unsigned int request = dma_spec->args[0];
+	struct dma_chan *chan;
+	struct zx_dma_chan *c;
+
+	if (request >= d->dma_requests)
+		return NULL;
+
+	chan = dma_get_any_slave_channel(&d->slave);
+	if (!chan) {
+		dev_err(d->slave.dev, "get channel fail in %s.\n", __func__);
+		return NULL;
+	}
+	c = to_zx_chan(chan);
+	c->id = request;
+	dev_info(d->slave.dev, "zx_dma: pchan %u: alloc vchan %p\n",
+		 c->id, &c->vc);
+	return chan;
+}
+
+static int zx_dma_probe(struct platform_device *op)
+{
+	struct zx_dma_dev *d;
+	struct resource *iores;
+	int i, ret = 0;
+
+	iores = platform_get_resource(op, IORESOURCE_MEM, 0);
+	if (!iores)
+		return -EINVAL;
+
+	d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	d->base = devm_ioremap_resource(&op->dev, iores);
+	if (IS_ERR(d->base))
+		return PTR_ERR(d->base);
+
+	of_property_read_u32((&op->dev)->of_node,
+			     "dma-channels", &d->dma_channels);
+	of_property_read_u32((&op->dev)->of_node,
+			     "dma-requests", &d->dma_requests);
+	if (!d->dma_requests || !d->dma_channels)
+		return -EINVAL;
+
+	d->clk = devm_clk_get(&op->dev, NULL);
+	if (IS_ERR(d->clk)) {
+		dev_err(&op->dev, "no dma clk\n");
+		return PTR_ERR(d->clk);
+	}
+
+	d->irq = platform_get_irq(op, 0);
+	ret = devm_request_irq(&op->dev, d->irq, zx_dma_int_handler,
+			       0, DRIVER_NAME, d);
+	if (ret)
+		return ret;
+
+	/* A DMA memory pool for LLIs, align on 32-byte boundary */
+	d->pool = dmam_pool_create(DRIVER_NAME, &op->dev,
+			LLI_BLOCK_SIZE, 32, 0);
+	if (!d->pool)
+		return -ENOMEM;
+
+	/* init phy channel */
+	d->phy = devm_kzalloc(&op->dev,
+		d->dma_channels * sizeof(struct zx_dma_phy), GFP_KERNEL);
+	if (!d->phy)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_channels; i++) {
+		struct zx_dma_phy *p = &d->phy[i];
+
+		p->idx = i;
+		p->base = d->base + i * 0x40;
+	}
+
+	INIT_LIST_HEAD(&d->slave.channels);
+	dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
+	dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, d->slave.cap_mask);
+	d->slave.dev = &op->dev;
+	d->slave.device_free_chan_resources = zx_dma_free_chan_resources;
+	d->slave.device_tx_status = zx_dma_tx_status;
+	d->slave.device_prep_dma_memcpy = zx_dma_prep_memcpy;
+	d->slave.device_prep_slave_sg = zx_dma_prep_slave_sg;
+	d->slave.device_prep_dma_cyclic = zx_dma_prep_dma_cyclic;
+	d->slave.device_issue_pending = zx_dma_issue_pending;
+	d->slave.device_config = zx_dma_config;
+	d->slave.device_terminate_all = zx_dma_terminate_all;
+	d->slave.device_pause = zx_dma_transfer_pause;
+	d->slave.device_resume = zx_dma_transfer_resume;
+	d->slave.copy_align = DMA_ALIGN;
+	d->slave.src_addr_widths = ZX_DMA_BUSWIDTHS;
+	d->slave.dst_addr_widths = ZX_DMA_BUSWIDTHS;
+	d->slave.directions = BIT(DMA_MEM_TO_MEM) | BIT(DMA_MEM_TO_DEV)
+			| BIT(DMA_DEV_TO_MEM);
+	d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+
+	/* init virtual channel */
+	d->chans = devm_kzalloc(&op->dev,
+		d->dma_requests * sizeof(struct zx_dma_chan), GFP_KERNEL);
+	if (!d->chans)
+		return -ENOMEM;
+
+	for (i = 0; i < d->dma_requests; i++) {
+		struct zx_dma_chan *c = &d->chans[i];
+
+		c->status = DMA_IN_PROGRESS;
+		INIT_LIST_HEAD(&c->node);
+		c->vc.desc_free = zx_dma_free_desc;
+		vchan_init(&c->vc, &d->slave);
+	}
+
+	/* Enable clock before accessing registers */
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret);
+		goto zx_dma_out;
+	}
+
+	zx_dma_init_state(d);
+
+	spin_lock_init(&d->lock);
+	INIT_LIST_HEAD(&d->chan_pending);
+	platform_set_drvdata(op, d);
+
+	ret = dma_async_device_register(&d->slave);
+	if (ret)
+		goto clk_dis;
+
+	ret = of_dma_controller_register((&op->dev)->of_node,
+					 zx_of_dma_simple_xlate, d);
+	if (ret)
+		goto of_dma_register_fail;
+
+	dev_info(&op->dev, "initialized\n");
+	return 0;
+
+of_dma_register_fail:
+	dma_async_device_unregister(&d->slave);
+clk_dis:
+	clk_disable_unprepare(d->clk);
+zx_dma_out:
+	return ret;
+}
+
+static int zx_dma_remove(struct platform_device *op)
+{
+	struct zx_dma_chan *c, *cn;
+	struct zx_dma_dev *d = platform_get_drvdata(op);
+
+	/* explictly free the irq */
+	devm_free_irq(&op->dev, d->irq, d);
+
+	dma_async_device_unregister(&d->slave);
+	of_dma_controller_free((&op->dev)->of_node);
+
+	list_for_each_entry_safe(c, cn, &d->slave.channels,
+				 vc.chan.device_node) {
+		list_del(&c->vc.chan.device_node);
+	}
+	clk_disable_unprepare(d->clk);
+	dmam_pool_destroy(d->pool);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int zx_dma_suspend_dev(struct device *dev)
+{
+	struct zx_dma_dev *d = dev_get_drvdata(dev);
+	u32 stat = 0;
+
+	stat = zx_dma_get_chan_stat(d);
+	if (stat) {
+		dev_warn(d->slave.dev,
+			 "chan %d is running fail to suspend\n", stat);
+		return -1;
+	}
+	clk_disable_unprepare(d->clk);
+	return 0;
+}
+
+static int zx_dma_resume_dev(struct device *dev)
+{
+	struct zx_dma_dev *d = dev_get_drvdata(dev);
+	int ret = 0;
+
+	ret = clk_prepare_enable(d->clk);
+	if (ret < 0) {
+		dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret);
+		return ret;
+	}
+	zx_dma_init_state(d);
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(zx_dma_pmops, zx_dma_suspend_dev, zx_dma_resume_dev);
+
+static struct platform_driver zx_pdma_driver = {
+	.driver		= {
+		.name	= DRIVER_NAME,
+		.pm	= &zx_dma_pmops,
+		.of_match_table = zx6702_dma_dt_ids,
+	},
+	.probe		= zx_dma_probe,
+	.remove		= zx_dma_remove,
+};
+
+module_platform_driver(zx_pdma_driver);
+
+MODULE_DESCRIPTION("ZTE ZX296702 DMA Driver");
+MODULE_AUTHOR("Jun Nie jun.nie@linaro.org");
+MODULE_LICENSE("GPL v2");